feat: change to new rules
This commit is contained in:
parent
3073c248fe
commit
26137b8a15
42
main.py
42
main.py
@ -19,8 +19,11 @@ MAIL_MESSAGE = '''
|
|||||||
Hi, {}<br>
|
Hi, {}<br>
|
||||||
<br>
|
<br>
|
||||||
提醒您,您目前{},<br>
|
提醒您,您目前{},<br>
|
||||||
依照 CMLab 規定,每人在 CML workstation 上至多使用 2 顆 GPU <br>
|
依照 CMLab 規定,每人在 CML workstation 使用的 GPU 之上限 VRAM 的總和不得超過 50GB<br>
|
||||||
|
詳細請參考 <a href='https://www.cmlab.csie.ntu.edu.tw/wiki/doku.php?id=workstation:rule#rule_1_%E5%9F%BA%E6%9C%AC%E8%A6%8F%E5%89%87'>wiki 工作站規定</a>
|
||||||
<br>
|
<br>
|
||||||
|
<br>
|
||||||
|
|
||||||
為了公平起見,建議您降低 GPU 使用量!<br>
|
為了公平起見,建議您降低 GPU 使用量!<br>
|
||||||
雖然我們不會直接處理,但若有人檢舉,我們會停止您運行的程式!<br>
|
雖然我們不會直接處理,但若有人檢舉,我們會停止您運行的程式!<br>
|
||||||
若有其他特殊需求請來信說明!謝謝您的配合!<br>
|
若有其他特殊需求請來信說明!謝謝您的配合!<br>
|
||||||
@ -84,7 +87,7 @@ def get_server_gpu_status() -> list:
|
|||||||
server = soup.find('span', class_='f7')
|
server = soup.find('span', class_='f7')
|
||||||
if server:
|
if server:
|
||||||
server_name = server.text.replace(' ', '')
|
server_name = server.text.replace(' ', '')
|
||||||
gpu_names = []
|
gpu_infos = []
|
||||||
|
|
||||||
users = []
|
users = []
|
||||||
gpus = box.split('\n')
|
gpus = box.split('\n')
|
||||||
@ -92,6 +95,9 @@ def get_server_gpu_status() -> list:
|
|||||||
if 'f6' in i: # if this line is for a single GPU informations
|
if 'f6' in i: # if this line is for a single GPU informations
|
||||||
soup = BeautifulSoup(i, 'html.parser')
|
soup = BeautifulSoup(i, 'html.parser')
|
||||||
|
|
||||||
|
# get max VRAM MB number
|
||||||
|
max_VRAM = int(soup.findAll('span', class_='f3')[1].text)
|
||||||
|
|
||||||
# get all users who is using this server's GPU resources.
|
# get all users who is using this server's GPU resources.
|
||||||
user_objs = soup.findAll('span', class_='f0')
|
user_objs = soup.findAll('span', class_='f0')
|
||||||
user_in_this_gpu = set()
|
user_in_this_gpu = set()
|
||||||
@ -105,15 +111,16 @@ def get_server_gpu_status() -> list:
|
|||||||
|
|
||||||
# get gpu names
|
# get gpu names
|
||||||
gpu_name = soup.find('span', class_='f4').text.replace(' ', '')
|
gpu_name = soup.find('span', class_='f4').text.replace(' ', '')
|
||||||
|
print(gpu_name, max_VRAM)
|
||||||
|
|
||||||
# log gpu name & users on this GPU
|
# log gpu name & users on this GPU
|
||||||
gpu_names.append(gpu_name)
|
gpu_infos.append({'name': gpu_name, 'vram': max_VRAM})
|
||||||
users.append(list(user_in_this_gpu))
|
users.append(list(user_in_this_gpu))
|
||||||
|
|
||||||
servers.append({
|
servers.append({
|
||||||
'name': server_name,
|
'name': server_name,
|
||||||
'users': users,
|
'users': users,
|
||||||
'gpus': gpu_names
|
'gpus': gpu_infos,
|
||||||
})
|
})
|
||||||
return servers
|
return servers
|
||||||
|
|
||||||
@ -136,20 +143,25 @@ def filter(server_status: list, limit: int = 2) -> list:
|
|||||||
|
|
||||||
for server in server_status:
|
for server in server_status:
|
||||||
print(server['name'])
|
print(server['name'])
|
||||||
for gpu_index, gpu_name in enumerate(server['gpus']):
|
for gpu_index, gpu_info in enumerate(server['gpus']):
|
||||||
print(" ", gpu_index, gpu_name, server['users'][gpu_index])
|
print(" ", gpu_index, gpu_info['name'], server['users'][gpu_index])
|
||||||
for user in server['users'][gpu_index]:
|
for user in server['users'][gpu_index]:
|
||||||
if user not in usage:
|
if user not in usage:
|
||||||
usage[user] = [{'server': server['name'], 'gpu': gpu_name}]
|
usage[user] = [{'server': server['name'], 'gpu': gpu_info['name'], 'vram': gpu_info['vram']}]
|
||||||
else:
|
else:
|
||||||
usage[user].append({'server': server['name'], 'gpu': gpu_name})
|
usage[user].append({'server': server['name'], 'gpu': gpu_info['name'], 'vram': gpu_info['vram']})
|
||||||
|
|
||||||
print('-')
|
print('-')
|
||||||
|
|
||||||
violators = []
|
violators = []
|
||||||
for user, state in usage.items():
|
for user, state in usage.items():
|
||||||
if len(state) > limit:
|
# if len(state) > limit:
|
||||||
violators.append({'username': user, 'usage': state})
|
# violators.append({'username': user, 'usage': state})
|
||||||
|
vrams = 0
|
||||||
|
for i in state:
|
||||||
|
vrams += i['vram']
|
||||||
|
if vrams >= 50000:
|
||||||
|
violators.append({'username': user, 'usage': state})
|
||||||
|
|
||||||
return violators
|
return violators
|
||||||
|
|
||||||
@ -169,7 +181,9 @@ def mail_notify(violators: list) -> None:
|
|||||||
def get_usage_msg(usage: list) -> str:
|
def get_usage_msg(usage: list) -> str:
|
||||||
server_usage = {}
|
server_usage = {}
|
||||||
ans = ""
|
ans = ""
|
||||||
|
vrams = 0
|
||||||
for gpu in usage:
|
for gpu in usage:
|
||||||
|
vrams += gpu['vram']
|
||||||
if gpu['server'] not in server_usage:
|
if gpu['server'] not in server_usage:
|
||||||
server_usage[gpu['server']] = { gpu['gpu']: 1 }
|
server_usage[gpu['server']] = { gpu['gpu']: 1 }
|
||||||
else:
|
else:
|
||||||
@ -181,7 +195,9 @@ def mail_notify(violators: list) -> None:
|
|||||||
for server, gpus in server_usage.items():
|
for server, gpus in server_usage.items():
|
||||||
for gpu, count in gpus.items():
|
for gpu, count in gpus.items():
|
||||||
ans += f"在 {server} 上使用 {gpu} * {count}, "
|
ans += f"在 {server} 上使用 {gpu} * {count}, "
|
||||||
return ans[:-2]
|
ans = ans[:-2]
|
||||||
|
ans += f',GPU VRAM 上限之總和為 {vrams} MB VRAM。'
|
||||||
|
return ans
|
||||||
|
|
||||||
print(" ===== 寄送 ===== ")
|
print(" ===== 寄送 ===== ")
|
||||||
|
|
||||||
@ -208,7 +224,9 @@ def mail_notify(violators: list) -> None:
|
|||||||
status = mailer.send('unix_manager@cmlab.csie.ntu.edu.tw', f'{username}@cmlab.csie.ntu.edu.tw' if not DEBUG_MODE else 'snsd0805@cmlab.csie.ntu.edu.tw', \
|
status = mailer.send('unix_manager@cmlab.csie.ntu.edu.tw', f'{username}@cmlab.csie.ntu.edu.tw' if not DEBUG_MODE else 'snsd0805@cmlab.csie.ntu.edu.tw', \
|
||||||
f'[網管通知] 提醒您已經超過 CMLab GPU 使用限制!({username})', msg)
|
f'[網管通知] 提醒您已經超過 CMLab GPU 使用限制!({username})', msg)
|
||||||
|
|
||||||
if status == {}:
|
print(status)
|
||||||
|
|
||||||
|
if status:
|
||||||
print(f' {username}, 郵件傳送成功!')
|
print(f' {username}, 郵件傳送成功!')
|
||||||
else:
|
else:
|
||||||
print(f' {username}, 郵件傳送失敗...')
|
print(f' {username}, 郵件傳送失敗...')
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user