CMLAB-GPU-Mailer/main.py
Ting-Jun Wang f0e94303cc
feat: first commit
- the filter with detecting multi card in the same server, but only
  retrive the first user in the list.
- get Gmail private code with the enviroment variables.
2024-03-22 15:50:43 +08:00

185 lines
5.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
import json
import smtplib
import time
from bs4 import BeautifulSoup
from email.mime.text import MIMEText
import os
URL = 'https://www.cmlab.csie.ntu.edu.tw/status-gpu/'
GPU_LIMIT = 2
GOOGLE_CODE = os.environ['GOOGLE_CODE']
MAIL_CD_HOUR = 6
MAIL_MESSAGE = '''
Hi, {}
提醒您,您目前在 {} 伺服器上已經使用了 {}{}
依照 CMLab 規定,每人在每台 CML workstation 上至多使用 2 顆 GPU詳細規定請參閱 https://www.cmlab.csie.ntu.edu.tw/wiki/doku.php?id=workstation:rule
為了公平起見,建議您降低 GPU 使用量!
雖然我們不會直接處理,但若有人檢舉,我們會停止您運行的程式!
若有其他問題歡迎來信討論!謝謝您的配合!
* 信件由網管自動化工具寄出,若為錯誤寄送請忽略此信件。
Best,
CMLab Unix Manager, Ting-Jun Wang
CMLab, National Taiwan University
Email: unix_manager@cmlab.csie.ntu.edu.tw
__ __ _
/ / / /__ (_)_ __
__ ___/ /_/ / _ \/ /\ \ /
/ |/ /\____/_//_/_//_\_\____ ____
/ /|_/ / _ `/ _ \/ _ `/ _ `/ -_) __/
/_/ /_/\_,_/_//_/\_,_/\_, /\__/_/
/___/
'''
def get_server_gpu_status() -> list:
'''
It will get html from CMLab webpage.
and retrive the server name & users who are using GPU resources.
Input:
None
Output:
servers(list):
[
{
'name': 'cml5'
'users': [
'snsd0805', 'snsd0805', 'timmy'
],
'gpu': 'V100'
},
...
]
means that 'cml5' server has 3 'V100', and the user 'snsd0805' is using 2 GPU and the user 'timmy' is usig 1 GPU now.
'''
servers = []
# get HTML file
response = requests.get(URL)
soup = BeautifulSoup(response.text, 'html.parser')
# find the table which shows the GPU status
table = soup.find('pre')
boxes = table.prettify().split('\n\n')
# retrive all server, one box means one server
for box in boxes[:-1]:
soup = BeautifulSoup(box, 'html.parser')
# get server name
server_name = soup.find('span', class_='f7').text.replace(' ', '')
# get GPU name
gpu_name = soup.find('span', class_='f4').text.replace(' ', '')
users = []
gpus = box.split('\n')
for i in gpus[1:]:
soup = BeautifulSoup(i, 'html.parser')
# get all users who is using this server's GPU resources.
user_obj = soup.find('span', class_='f0')
if user_obj != None:
username = user_obj.text
if username != 'gdm':
users.append(username)
servers.append({
'name': server_name,
'users': users,
'gpu': gpu_name
})
return servers
def filter(server_status: dict, limit: int = 2) -> list:
'''
You can set some rule here.
it will return the a list which contains the username that exceed the limit.
Rules now (2024/03/11):
- 每人每台僅限使用 2 張卡
I add this function beacause that we may need more filter rules. we can add some rules in this function easily
Input:
server_status (dict): e.g. {'name': 'cml5', ...}
limit (int): GPU limit (default: 2)
Output:
violators (list): a violator list(set) e.g. [ {'user': 'snsd0805', 'gpu': 3}] means that the violator 'snsd0805' is using 3 GPU.
'''
violators = []
counter = {}
for user in server_status['users']:
if user not in counter:
counter[user] = 1
else:
counter[user] += 1
for k, v in counter.items():
if v > limit:
violators.append({
'user': k,
'gpu': v,
})
return violators
def mail_notify(server_name: str, gpu_name: str, violators: list) -> None:
def check_send(log: dict, username: str) -> bool:
if username not in log:
return True
else:
if time.time() - log[username] >= (MAIL_CD_HOUR*60*60): # 6 hr 提醒一次
return True
else:
return False
# get last send time
with open('send_log.json') as fp:
send_log = json.load(fp)
for violator in violators:
username = violator['user']
print(f' {username}')
if check_send(send_log, username):
# update log
send_log[username] = time.time()
with open('send_log.json', 'w') as fp:
json.dump(send_log, fp)
usage = violator['gpu']
msg = MAIL_MESSAGE.format(username, server_name, usage, gpu_name)
msg = MIMEText(msg, 'plain') # 郵件內文
msg['Subject'] = f'[網管通知] 提醒您已經超過 CMLab GPU 使用限制!({username})'
msg['From'] = 'unix_manager@cmlab.csie.ntu.edu.tw'
msg['To'] = f'{username}@cmlab.csie.ntu.edu.tw'
msg['Cc'] = 'unix_manager@cmlab.csie.ntu.edu.tw'
smtp = smtplib.SMTP('smtp.gmail.com', 587)
smtp.ehlo()
smtp.starttls()
smtp.login('snsd0805@cmlab.csie.ntu.edu.tw', GOOGLE_CODE)
status = smtp.send_message(msg)
if status == {}:
print(f' {username}, 郵件傳送成功!')
else:
print(f' {username}, 郵件傳送失敗...')
smtp.quit()
if __name__ == '__main__':
server_status = get_server_gpu_status()
for server in server_status:
print(server['name'], server['gpu'])
violators = filter(server, 2)
print(violators)
mail_notify(server['name'], server['gpu'], violators)
print("=" * 20)