feat: first commit
- the filter with detecting multi card in the same server, but only retrive the first user in the list. - get Gmail private code with the enviroment variables.
This commit is contained in:
commit
f0e94303cc
184
main.py
Normal file
184
main.py
Normal file
@ -0,0 +1,184 @@
|
||||
import requests
|
||||
import json
|
||||
import smtplib
|
||||
import time
|
||||
from bs4 import BeautifulSoup
|
||||
from email.mime.text import MIMEText
|
||||
import os
|
||||
|
||||
URL = 'https://www.cmlab.csie.ntu.edu.tw/status-gpu/'
|
||||
GPU_LIMIT = 2
|
||||
GOOGLE_CODE = os.environ['GOOGLE_CODE']
|
||||
MAIL_CD_HOUR = 6
|
||||
MAIL_MESSAGE = '''
|
||||
Hi, {}
|
||||
|
||||
提醒您,您目前在 {} 伺服器上已經使用了 {} 張 {},
|
||||
依照 CMLab 規定,每人在每台 CML workstation 上至多使用 2 顆 GPU,詳細規定請參閱 https://www.cmlab.csie.ntu.edu.tw/wiki/doku.php?id=workstation:rule
|
||||
|
||||
為了公平起見,建議您降低 GPU 使用量!
|
||||
雖然我們不會直接處理,但若有人檢舉,我們會停止您運行的程式!
|
||||
若有其他問題歡迎來信討論!謝謝您的配合!
|
||||
|
||||
* 信件由網管自動化工具寄出,若為錯誤寄送請忽略此信件。
|
||||
|
||||
Best,
|
||||
CMLab Unix Manager, Ting-Jun Wang
|
||||
CMLab, National Taiwan University
|
||||
Email: unix_manager@cmlab.csie.ntu.edu.tw
|
||||
|
||||
__ __ _
|
||||
/ / / /__ (_)_ __
|
||||
__ ___/ /_/ / _ \/ /\ \ /
|
||||
/ |/ /\____/_//_/_//_\_\____ ____
|
||||
/ /|_/ / _ `/ _ \/ _ `/ _ `/ -_) __/
|
||||
/_/ /_/\_,_/_//_/\_,_/\_, /\__/_/
|
||||
/___/
|
||||
'''
|
||||
|
||||
def get_server_gpu_status() -> list:
|
||||
'''
|
||||
It will get html from CMLab webpage.
|
||||
and retrive the server name & users who are using GPU resources.
|
||||
|
||||
Input:
|
||||
None
|
||||
Output:
|
||||
servers(list):
|
||||
[
|
||||
{
|
||||
'name': 'cml5'
|
||||
'users': [
|
||||
'snsd0805', 'snsd0805', 'timmy'
|
||||
],
|
||||
'gpu': 'V100'
|
||||
},
|
||||
...
|
||||
]
|
||||
|
||||
means that 'cml5' server has 3 'V100', and the user 'snsd0805' is using 2 GPU and the user 'timmy' is usig 1 GPU now.
|
||||
|
||||
'''
|
||||
servers = []
|
||||
|
||||
# get HTML file
|
||||
response = requests.get(URL)
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# find the table which shows the GPU status
|
||||
table = soup.find('pre')
|
||||
boxes = table.prettify().split('\n\n')
|
||||
|
||||
# retrive all server, one box means one server
|
||||
for box in boxes[:-1]:
|
||||
soup = BeautifulSoup(box, 'html.parser')
|
||||
# get server name
|
||||
server_name = soup.find('span', class_='f7').text.replace(' ', '')
|
||||
|
||||
# get GPU name
|
||||
gpu_name = soup.find('span', class_='f4').text.replace(' ', '')
|
||||
|
||||
users = []
|
||||
gpus = box.split('\n')
|
||||
for i in gpus[1:]:
|
||||
soup = BeautifulSoup(i, 'html.parser')
|
||||
|
||||
# get all users who is using this server's GPU resources.
|
||||
user_obj = soup.find('span', class_='f0')
|
||||
if user_obj != None:
|
||||
username = user_obj.text
|
||||
if username != 'gdm':
|
||||
users.append(username)
|
||||
|
||||
servers.append({
|
||||
'name': server_name,
|
||||
'users': users,
|
||||
'gpu': gpu_name
|
||||
})
|
||||
return servers
|
||||
|
||||
def filter(server_status: dict, limit: int = 2) -> list:
|
||||
'''
|
||||
You can set some rule here.
|
||||
it will return the a list which contains the username that exceed the limit.
|
||||
Rules now (2024/03/11):
|
||||
- 每人每台僅限使用 2 張卡
|
||||
|
||||
I add this function beacause that we may need more filter rules. we can add some rules in this function easily
|
||||
|
||||
Input:
|
||||
server_status (dict): e.g. {'name': 'cml5', ...}
|
||||
limit (int): GPU limit (default: 2)
|
||||
Output:
|
||||
violators (list): a violator list(set) e.g. [ {'user': 'snsd0805', 'gpu': 3}] means that the violator 'snsd0805' is using 3 GPU.
|
||||
'''
|
||||
violators = []
|
||||
|
||||
counter = {}
|
||||
for user in server_status['users']:
|
||||
if user not in counter:
|
||||
counter[user] = 1
|
||||
else:
|
||||
counter[user] += 1
|
||||
|
||||
for k, v in counter.items():
|
||||
if v > limit:
|
||||
violators.append({
|
||||
'user': k,
|
||||
'gpu': v,
|
||||
})
|
||||
return violators
|
||||
|
||||
def mail_notify(server_name: str, gpu_name: str, violators: list) -> None:
|
||||
def check_send(log: dict, username: str) -> bool:
|
||||
if username not in log:
|
||||
return True
|
||||
else:
|
||||
if time.time() - log[username] >= (MAIL_CD_HOUR*60*60): # 6 hr 提醒一次
|
||||
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
# get last send time
|
||||
with open('send_log.json') as fp:
|
||||
send_log = json.load(fp)
|
||||
|
||||
for violator in violators:
|
||||
username = violator['user']
|
||||
print(f' {username}')
|
||||
|
||||
if check_send(send_log, username):
|
||||
# update log
|
||||
send_log[username] = time.time()
|
||||
with open('send_log.json', 'w') as fp:
|
||||
json.dump(send_log, fp)
|
||||
usage = violator['gpu']
|
||||
msg = MAIL_MESSAGE.format(username, server_name, usage, gpu_name)
|
||||
msg = MIMEText(msg, 'plain') # 郵件內文
|
||||
msg['Subject'] = f'[網管通知] 提醒您已經超過 CMLab GPU 使用限制!({username})'
|
||||
msg['From'] = 'unix_manager@cmlab.csie.ntu.edu.tw'
|
||||
msg['To'] = f'{username}@cmlab.csie.ntu.edu.tw'
|
||||
msg['Cc'] = 'unix_manager@cmlab.csie.ntu.edu.tw'
|
||||
|
||||
smtp = smtplib.SMTP('smtp.gmail.com', 587)
|
||||
smtp.ehlo()
|
||||
smtp.starttls()
|
||||
smtp.login('snsd0805@cmlab.csie.ntu.edu.tw', GOOGLE_CODE)
|
||||
|
||||
status = smtp.send_message(msg)
|
||||
if status == {}:
|
||||
print(f' {username}, 郵件傳送成功!')
|
||||
else:
|
||||
print(f' {username}, 郵件傳送失敗...')
|
||||
smtp.quit()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
server_status = get_server_gpu_status()
|
||||
for server in server_status:
|
||||
print(server['name'], server['gpu'])
|
||||
violators = filter(server, 2)
|
||||
print(violators)
|
||||
mail_notify(server['name'], server['gpu'], violators)
|
||||
print("=" * 20)
|
||||
1
send_log.json
Normal file
1
send_log.json
Normal file
@ -0,0 +1 @@
|
||||
{}
|
||||
Loading…
Reference in New Issue
Block a user