From f0e94303ccaf124d592ce5948c0ca978b915c1a6 Mon Sep 17 00:00:00 2001 From: Ting-Jun Wang Date: Fri, 22 Mar 2024 15:50:43 +0800 Subject: [PATCH] feat: first commit - the filter with detecting multi card in the same server, but only retrive the first user in the list. - get Gmail private code with the enviroment variables. --- main.py | 184 ++++++++++++++++++++++++++++++++++++++++++++++++++ send_log.json | 1 + 2 files changed, 185 insertions(+) create mode 100644 main.py create mode 100644 send_log.json diff --git a/main.py b/main.py new file mode 100644 index 0000000..c3b032f --- /dev/null +++ b/main.py @@ -0,0 +1,184 @@ +import requests +import json +import smtplib +import time +from bs4 import BeautifulSoup +from email.mime.text import MIMEText +import os + +URL = 'https://www.cmlab.csie.ntu.edu.tw/status-gpu/' +GPU_LIMIT = 2 +GOOGLE_CODE = os.environ['GOOGLE_CODE'] +MAIL_CD_HOUR = 6 +MAIL_MESSAGE = ''' +Hi, {} + +提醒您,您目前在 {} 伺服器上已經使用了 {} 張 {}, +依照 CMLab 規定,每人在每台 CML workstation 上至多使用 2 顆 GPU,詳細規定請參閱 https://www.cmlab.csie.ntu.edu.tw/wiki/doku.php?id=workstation:rule + +為了公平起見,建議您降低 GPU 使用量! +雖然我們不會直接處理,但若有人檢舉,我們會停止您運行的程式! +若有其他問題歡迎來信討論!謝謝您的配合! + +* 信件由網管自動化工具寄出,若為錯誤寄送請忽略此信件。 + +Best, +CMLab Unix Manager, Ting-Jun Wang +CMLab, National Taiwan University +Email: unix_manager@cmlab.csie.ntu.edu.tw + + __ __ _ + / / / /__ (_)_ __ + __ ___/ /_/ / _ \/ /\ \ / + / |/ /\____/_//_/_//_\_\____ ____ + / /|_/ / _ `/ _ \/ _ `/ _ `/ -_) __/ +/_/ /_/\_,_/_//_/\_,_/\_, /\__/_/ + /___/ +''' + +def get_server_gpu_status() -> list: + ''' + It will get html from CMLab webpage. + and retrive the server name & users who are using GPU resources. + + Input: + None + Output: + servers(list): + [ + { + 'name': 'cml5' + 'users': [ + 'snsd0805', 'snsd0805', 'timmy' + ], + 'gpu': 'V100' + }, + ... + ] + + means that 'cml5' server has 3 'V100', and the user 'snsd0805' is using 2 GPU and the user 'timmy' is usig 1 GPU now. + + ''' + servers = [] + + # get HTML file + response = requests.get(URL) + soup = BeautifulSoup(response.text, 'html.parser') + + # find the table which shows the GPU status + table = soup.find('pre') + boxes = table.prettify().split('\n\n') + + # retrive all server, one box means one server + for box in boxes[:-1]: + soup = BeautifulSoup(box, 'html.parser') + # get server name + server_name = soup.find('span', class_='f7').text.replace(' ', '') + + # get GPU name + gpu_name = soup.find('span', class_='f4').text.replace(' ', '') + + users = [] + gpus = box.split('\n') + for i in gpus[1:]: + soup = BeautifulSoup(i, 'html.parser') + + # get all users who is using this server's GPU resources. + user_obj = soup.find('span', class_='f0') + if user_obj != None: + username = user_obj.text + if username != 'gdm': + users.append(username) + + servers.append({ + 'name': server_name, + 'users': users, + 'gpu': gpu_name + }) + return servers + +def filter(server_status: dict, limit: int = 2) -> list: + ''' + You can set some rule here. + it will return the a list which contains the username that exceed the limit. + Rules now (2024/03/11): + - 每人每台僅限使用 2 張卡 + + I add this function beacause that we may need more filter rules. we can add some rules in this function easily + + Input: + server_status (dict): e.g. {'name': 'cml5', ...} + limit (int): GPU limit (default: 2) + Output: + violators (list): a violator list(set) e.g. [ {'user': 'snsd0805', 'gpu': 3}] means that the violator 'snsd0805' is using 3 GPU. + ''' + violators = [] + + counter = {} + for user in server_status['users']: + if user not in counter: + counter[user] = 1 + else: + counter[user] += 1 + + for k, v in counter.items(): + if v > limit: + violators.append({ + 'user': k, + 'gpu': v, + }) + return violators + +def mail_notify(server_name: str, gpu_name: str, violators: list) -> None: + def check_send(log: dict, username: str) -> bool: + if username not in log: + return True + else: + if time.time() - log[username] >= (MAIL_CD_HOUR*60*60): # 6 hr 提醒一次 + + return True + else: + return False + + # get last send time + with open('send_log.json') as fp: + send_log = json.load(fp) + + for violator in violators: + username = violator['user'] + print(f' {username}') + + if check_send(send_log, username): + # update log + send_log[username] = time.time() + with open('send_log.json', 'w') as fp: + json.dump(send_log, fp) + usage = violator['gpu'] + msg = MAIL_MESSAGE.format(username, server_name, usage, gpu_name) + msg = MIMEText(msg, 'plain') # 郵件內文 + msg['Subject'] = f'[網管通知] 提醒您已經超過 CMLab GPU 使用限制!({username})' + msg['From'] = 'unix_manager@cmlab.csie.ntu.edu.tw' + msg['To'] = f'{username}@cmlab.csie.ntu.edu.tw' + msg['Cc'] = 'unix_manager@cmlab.csie.ntu.edu.tw' + + smtp = smtplib.SMTP('smtp.gmail.com', 587) + smtp.ehlo() + smtp.starttls() + smtp.login('snsd0805@cmlab.csie.ntu.edu.tw', GOOGLE_CODE) + + status = smtp.send_message(msg) + if status == {}: + print(f' {username}, 郵件傳送成功!') + else: + print(f' {username}, 郵件傳送失敗...') + smtp.quit() + + +if __name__ == '__main__': + server_status = get_server_gpu_status() + for server in server_status: + print(server['name'], server['gpu']) + violators = filter(server, 2) + print(violators) + mail_notify(server['name'], server['gpu'], violators) + print("=" * 20) diff --git a/send_log.json b/send_log.json new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/send_log.json @@ -0,0 +1 @@ +{}