diff --git a/main.py b/main.py
index 48c7f11..fcfa5da 100644
--- a/main.py
+++ b/main.py
@@ -19,8 +19,11 @@ MAIL_MESSAGE = '''
Hi, {}
提醒您,您目前{},
-依照 CMLab 規定,每人在 CML workstation 上至多使用 2 顆 GPU
+依照 CMLab 規定,每人在 CML workstation 使用的 GPU 之上限 VRAM 的總和不得超過 50GB
+詳細請參考 wiki 工作站規定
+
+
為了公平起見,建議您降低 GPU 使用量!
雖然我們不會直接處理,但若有人檢舉,我們會停止您運行的程式!
若有其他特殊需求請來信說明!謝謝您的配合!
@@ -84,7 +87,7 @@ def get_server_gpu_status() -> list:
server = soup.find('span', class_='f7')
if server:
server_name = server.text.replace(' ', '')
- gpu_names = []
+ gpu_infos = []
users = []
gpus = box.split('\n')
@@ -92,6 +95,9 @@ def get_server_gpu_status() -> list:
if 'f6' in i: # if this line is for a single GPU informations
soup = BeautifulSoup(i, 'html.parser')
+ # get max VRAM MB number
+ max_VRAM = int(soup.findAll('span', class_='f3')[1].text)
+
# get all users who is using this server's GPU resources.
user_objs = soup.findAll('span', class_='f0')
user_in_this_gpu = set()
@@ -105,15 +111,16 @@ def get_server_gpu_status() -> list:
# get gpu names
gpu_name = soup.find('span', class_='f4').text.replace(' ', '')
+ print(gpu_name, max_VRAM)
# log gpu name & users on this GPU
- gpu_names.append(gpu_name)
+ gpu_infos.append({'name': gpu_name, 'vram': max_VRAM})
users.append(list(user_in_this_gpu))
servers.append({
'name': server_name,
'users': users,
- 'gpus': gpu_names
+ 'gpus': gpu_infos,
})
return servers
@@ -136,20 +143,25 @@ def filter(server_status: list, limit: int = 2) -> list:
for server in server_status:
print(server['name'])
- for gpu_index, gpu_name in enumerate(server['gpus']):
- print(" ", gpu_index, gpu_name, server['users'][gpu_index])
+ for gpu_index, gpu_info in enumerate(server['gpus']):
+ print(" ", gpu_index, gpu_info['name'], server['users'][gpu_index])
for user in server['users'][gpu_index]:
if user not in usage:
- usage[user] = [{'server': server['name'], 'gpu': gpu_name}]
+ usage[user] = [{'server': server['name'], 'gpu': gpu_info['name'], 'vram': gpu_info['vram']}]
else:
- usage[user].append({'server': server['name'], 'gpu': gpu_name})
+ usage[user].append({'server': server['name'], 'gpu': gpu_info['name'], 'vram': gpu_info['vram']})
print('-')
violators = []
for user, state in usage.items():
- if len(state) > limit:
- violators.append({'username': user, 'usage': state})
+ # if len(state) > limit:
+ # violators.append({'username': user, 'usage': state})
+ vrams = 0
+ for i in state:
+ vrams += i['vram']
+ if vrams >= 50000:
+ violators.append({'username': user, 'usage': state})
return violators
@@ -169,7 +181,9 @@ def mail_notify(violators: list) -> None:
def get_usage_msg(usage: list) -> str:
server_usage = {}
ans = ""
+ vrams = 0
for gpu in usage:
+ vrams += gpu['vram']
if gpu['server'] not in server_usage:
server_usage[gpu['server']] = { gpu['gpu']: 1 }
else:
@@ -181,7 +195,9 @@ def mail_notify(violators: list) -> None:
for server, gpus in server_usage.items():
for gpu, count in gpus.items():
ans += f"在 {server} 上使用 {gpu} * {count}, "
- return ans[:-2]
+ ans = ans[:-2]
+ ans += f',GPU VRAM 上限之總和為 {vrams} MB VRAM。'
+ return ans
print(" ===== 寄送 ===== ")
@@ -208,7 +224,9 @@ def mail_notify(violators: list) -> None:
status = mailer.send('unix_manager@cmlab.csie.ntu.edu.tw', f'{username}@cmlab.csie.ntu.edu.tw' if not DEBUG_MODE else 'snsd0805@cmlab.csie.ntu.edu.tw', \
f'[網管通知] 提醒您已經超過 CMLab GPU 使用限制!({username})', msg)
- if status == {}:
+ print(status)
+
+ if status:
print(f' {username}, 郵件傳送成功!')
else:
print(f' {username}, 郵件傳送失敗...')