import requests import json import smtplib import time from bs4 import BeautifulSoup from email.mime.text import MIMEText import os from mailer import Mailer DEBUG_MODE = False URL = 'https://www.cmlab.csie.ntu.edu.tw/status-gpu/' GPU_LIMIT = 2 GOOGLE_OWNER = "snsd0805@cmlab.csie.ntu.edu.tw" GOOGLE_CODE = os.environ['GOOGLE_CODE'] MAIL_CD_HOUR = 12 MAIL_MESSAGE = ''' Hi, {}

提醒您,您目前{},
依照 CMLab 規定,每人在 CML workstation 上至多使用 2 顆 GPU

為了公平起見,建議您降低 GPU 使用量!
雖然我們不會直接處理,但若有人檢舉,我們會停止您運行的程式!
若有其他特殊需求請來信說明!謝謝您的配合!

* 信件由網管自動化工具寄出,若為錯誤寄送請忽略此信件。

Best,
CMLab Unix Manager, Ting-Jun Wang
CMLab, National Taiwan University
Email: unix_manager@cmlab.csie.ntu.edu.tw
            __  __     _             
           / / / /__  (_)_ __        
   __  ___/ /_/ / _ \/ /\ \ /        
  /  |/  /\____/_//_/_//_\_\____ ____
 / /|_/ / _ `/ _ \/ _ `/ _ `/ -_) __/
/_/  /_/\_,_/_//_/\_,_/\_, /\__/_/   
                      /___/          
''' mailer = Mailer(GOOGLE_OWNER, GOOGLE_CODE) def get_server_gpu_status() -> list: ''' It will get html from CMLab webpage. and retrive the server name & users who are using GPU resources. Input: None Output: servers(list): [ { 'name': 'cml5' 'users': [ ['snsd0805', 'timmy'], ['timmy'] ], 'gpus': ['V100', 'V100'] }, ... ] means that 'cml5' server has 2 'V100', and the user 'snsd0805' is using 1 GPU and the user 'timmy' is usig 2 GPU now. ''' servers = [] # get HTML file response = requests.get(URL) soup = BeautifulSoup(response.text, 'html.parser') # find the table which shows the GPU status table = soup.find('pre') boxes = table.prettify().split('\n\n') # retrive all server, one box means one server for box in boxes[:-1]: soup = BeautifulSoup(box, 'html.parser') # get server name server = soup.find('span', class_='f7') if server: server_name = server.text.replace(' ', '') gpu_names = [] users = [] gpus = box.split('\n') for i in gpus[:]: if 'f6' in i: # if this line is for a single GPU informations soup = BeautifulSoup(i, 'html.parser') # get all users who is using this server's GPU resources. user_objs = soup.findAll('span', class_='f0') user_in_this_gpu = set() # get users on this GPU for user_obj in user_objs: if user_obj != None: username = user_obj.text if username != 'gdm' and username != 'root': user_in_this_gpu.add(username) # get gpu names gpu_name = soup.find('span', class_='f4').text.replace(' ', '') # log gpu name & users on this GPU gpu_names.append(gpu_name) users.append(list(user_in_this_gpu)) servers.append({ 'name': server_name, 'users': users, 'gpus': gpu_names }) return servers def filter(server_status: list, limit: int = 2) -> list: ''' You can set some rule here. it will return the a list which contains the username that exceed the limit. Rules now (2024/03/22): - 每人僅限使用 2 張卡 I add this function beacause that we may need more filter rules. we can add some rules in this function easily Input: server_status (list): from get_server_gpu_status() limit (int): GPU limit (default: 2) Output: violators (list): a violator list(set) e.g. [ {'user': 'snsd0805', 'usage': [{'server': 'cml5', 'gpu': 'NVDIA3090', 'number': 3}...] }] means that the violator 'snsd0805' is using 3 GPU. ''' usage = {} for server in server_status: print(server['name']) for gpu_index, gpu_name in enumerate(server['gpus']): print(" ", gpu_index, gpu_name, server['users'][gpu_index]) for user in server['users'][gpu_index]: if user not in usage: usage[user] = [{'server': server['name'], 'gpu': gpu_name}] else: usage[user].append({'server': server['name'], 'gpu': gpu_name}) print('-') violators = [] for user, state in usage.items(): if len(state) > limit: violators.append({'username': user, 'usage': state}) return violators def mail_notify(violators: list) -> None: def check_send(log: dict, username: str) -> bool: if DEBUG_MODE: return True if username not in log: return True else: if time.time() - log[username]['time'] >= (MAIL_CD_HOUR*60*60): # 6 hr 提醒一次 return True else: return False def get_usage_msg(usage: list) -> str: server_usage = {} ans = "" for gpu in usage: if gpu['server'] not in server_usage: server_usage[gpu['server']] = { gpu['gpu']: 1 } else: if gpu['gpu'] not in server_usage[gpu['server']]: server_usage[gpu['server']][gpu['gpu']] = 1 else: server_usage[gpu['server']][gpu['gpu']] += 1 for server, gpus in server_usage.items(): for gpu, count in gpus.items(): ans += f"在 {server} 上使用 {gpu} * {count}, " return ans[:-2] print(" ===== 寄送 ===== ") # get last send time with open('send_log.json') as fp: send_log = json.load(fp) for violator in violators: username = violator['username'] if check_send(send_log, username): week_count = 1 if username not in send_log else send_log[username]['week_count']+1 # update log send_log[username] = { 'time': time.time(), 'week_count': week_count } with open('send_log.json', 'w') as fp: json.dump(send_log, fp) usage_msg = get_usage_msg(violator['usage']) print(f' {username} {usage_msg}') msg = MAIL_MESSAGE.format(username, usage_msg) status = mailer.send('unix_manager@cmlab.csie.ntu.edu.tw', f'{username}@cmlab.csie.ntu.edu.tw' if not DEBUG_MODE else 'snsd0805@cmlab.csie.ntu.edu.tw', \ f'[網管通知] 提醒您已經超過 CMLab GPU 使用限制!({username})', msg) if status == {}: print(f' {username}, 郵件傳送成功!') else: print(f' {username}, 郵件傳送失敗...') if __name__ == '__main__': server_status = get_server_gpu_status() violators = filter(server_status, 2) mail_notify(violators) ''' for server in server_status: print(server['name'], server['gpu']) violators = filter(server, 2) print(violators) mail_notify(server['name'], server['gpu'], violators) print("=" * 20) '''