From f0e94303ccaf124d592ce5948c0ca978b915c1a6 Mon Sep 17 00:00:00 2001
From: Ting-Jun Wang <levi900227@gmail.com>
Date: Fri, 22 Mar 2024 15:50:43 +0800
Subject: [PATCH] feat: first commit

- the filter with detecting multi card in the same server, but only
  retrive the first user in the list.
- get Gmail private code with the enviroment variables.
---
 main.py       | 184 ++++++++++++++++++++++++++++++++++++++++++++++++++
 send_log.json |   1 +
 2 files changed, 185 insertions(+)
 create mode 100644 main.py
 create mode 100644 send_log.json

diff --git a/main.py b/main.py
new file mode 100644
index 0000000..c3b032f
--- /dev/null
+++ b/main.py
@@ -0,0 +1,184 @@
+import requests
+import json
+import smtplib
+import time
+from bs4 import BeautifulSoup
+from email.mime.text import MIMEText
+import os
+
+URL = 'https://www.cmlab.csie.ntu.edu.tw/status-gpu/'
+GPU_LIMIT = 2
+GOOGLE_CODE = os.environ['GOOGLE_CODE']
+MAIL_CD_HOUR = 6
+MAIL_MESSAGE = '''
+Hi, {}
+
+提醒您，您目前在 {} 伺服器上已經使用了 {} 張 {}，
+依照 CMLab 規定，每人在每台 CML workstation 上至多使用 2 顆 GPU，詳細規定請參閱 https://www.cmlab.csie.ntu.edu.tw/wiki/doku.php?id=workstation:rule
+
+為了公平起見，建議您降低 GPU 使用量！
+雖然我們不會直接處理，但若有人檢舉，我們會停止您運行的程式！
+若有其他問題歡迎來信討論！謝謝您的配合！
+
+* 信件由網管自動化工具寄出，若為錯誤寄送請忽略此信件。
+
+Best,
+CMLab Unix Manager, Ting-Jun Wang
+CMLab, National Taiwan University
+Email: unix_manager@cmlab.csie.ntu.edu.tw
+
+            __  __     _
+           / / / /__  (_)_ __
+   __  ___/ /_/ / _ \/ /\ \ /
+  /  |/  /\____/_//_/_//_\_\____ ____
+ / /|_/ / _ `/ _ \/ _ `/ _ `/ -_) __/
+/_/  /_/\_,_/_//_/\_,_/\_, /\__/_/
+                      /___/
+'''
+
+def get_server_gpu_status() -> list:
+    '''
+        It will get html from CMLab webpage.
+        and retrive the server name & users who are using GPU resources.
+
+        Input:
+            None
+        Output:
+            servers(list):
+                [
+                    {
+                        'name': 'cml5'
+                        'users': [
+                            'snsd0805', 'snsd0805', 'timmy'
+                        ], 
+                        'gpu': 'V100'
+                    },
+                    ...
+                ]
+
+            means that 'cml5' server has 3 'V100', and the user 'snsd0805' is using 2 GPU and the user 'timmy' is usig 1 GPU now.
+
+    '''
+    servers = []
+
+    # get HTML file
+    response = requests.get(URL)
+    soup = BeautifulSoup(response.text, 'html.parser')
+
+    # find the table which shows the GPU status
+    table = soup.find('pre')
+    boxes = table.prettify().split('\n\n')
+
+    # retrive all server, one box means one server
+    for box in boxes[:-1]:
+        soup = BeautifulSoup(box, 'html.parser')
+        # get server name
+        server_name = soup.find('span', class_='f7').text.replace(' ', '')
+
+        # get GPU name
+        gpu_name = soup.find('span', class_='f4').text.replace(' ', '')
+
+        users = []
+        gpus = box.split('\n')
+        for i in gpus[1:]:
+            soup = BeautifulSoup(i, 'html.parser')
+
+            # get all users who is using this server's GPU resources.
+            user_obj = soup.find('span', class_='f0')
+            if user_obj != None:
+                username = user_obj.text
+                if username != 'gdm':
+                    users.append(username)
+
+        servers.append({
+            'name': server_name,
+            'users': users,
+            'gpu': gpu_name
+        })
+    return servers
+
+def filter(server_status: dict, limit: int = 2) -> list:
+    '''
+        You can set some rule here.
+        it will return the a list which contains the username that exceed the limit.
+        Rules now (2024/03/11):
+            - 每人每台僅限使用 2 張卡
+
+        I add this function beacause that we may need more filter rules. we can add some rules in this function easily
+
+        Input:
+            server_status (dict): e.g. {'name': 'cml5', ...}
+            limit (int): GPU limit (default: 2)
+        Output:
+            violators (list): a violator list(set) e.g. [ {'user': 'snsd0805', 'gpu': 3}] means that the violator 'snsd0805' is using 3 GPU.
+    '''
+    violators = []
+
+    counter = {}
+    for user in server_status['users']:
+        if user not in counter:
+            counter[user] = 1
+        else:
+            counter[user] += 1
+        
+    for k, v in counter.items():
+        if v > limit:
+            violators.append({
+                'user': k,
+                'gpu': v,
+            })
+    return violators
+
+def mail_notify(server_name: str, gpu_name: str, violators: list) -> None:
+    def check_send(log: dict, username: str) -> bool:
+        if username not in log:
+            return True
+        else:
+            if time.time() - log[username] >= (MAIL_CD_HOUR*60*60):        # 6 hr 提醒一次
+                
+                return True
+            else:
+                return False
+
+    # get last send time
+    with open('send_log.json') as fp:
+        send_log = json.load(fp)
+
+    for violator in violators:
+        username = violator['user']
+        print(f'    {username}')
+
+        if check_send(send_log, username):
+            # update log
+            send_log[username] = time.time()
+            with open('send_log.json', 'w') as fp:
+                json.dump(send_log, fp)
+            usage = violator['gpu']
+            msg = MAIL_MESSAGE.format(username, server_name, usage, gpu_name)
+            msg = MIMEText(msg, 'plain') # 郵件內文
+            msg['Subject'] = f'[網管通知] 提醒您已經超過 CMLab GPU 使用限制！({username})'
+            msg['From'] = 'unix_manager@cmlab.csie.ntu.edu.tw'
+            msg['To'] = f'{username}@cmlab.csie.ntu.edu.tw'
+            msg['Cc'] = 'unix_manager@cmlab.csie.ntu.edu.tw'
+
+            smtp = smtplib.SMTP('smtp.gmail.com', 587)
+            smtp.ehlo()
+            smtp.starttls()
+            smtp.login('snsd0805@cmlab.csie.ntu.edu.tw', GOOGLE_CODE)
+
+            status = smtp.send_message(msg)
+            if status == {}:
+                print(f'    {username}, 郵件傳送成功！')
+            else:
+                print(f'    {username}, 郵件傳送失敗...')
+            smtp.quit()
+
+
+if __name__ == '__main__':
+    server_status = get_server_gpu_status()
+    for server in server_status:
+        print(server['name'], server['gpu'])
+        violators = filter(server, 2)
+        print(violators)
+        mail_notify(server['name'], server['gpu'], violators)
+        print("=" * 20)
diff --git a/send_log.json b/send_log.json
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ b/send_log.json
@@ -0,0 +1 @@
+{}