fix: get all users on the same GPU
Problem:
Sometimes, multiple users may run the different program on the same
GPU if the VRAM is enough.
But the original program only get the first username on the webpage
so that we may lose some users.
Solution:
For one servers, I get all GPU's name and all usernames on this line
on the webpage.
So I will return a dict contains all GPU names and username on every
GPU.
This commit is contained in:
parent
834bed6eb0
commit
e907d42043
51
main.py
51
main.py
@ -77,30 +77,40 @@ def get_server_gpu_status() -> list:
|
|||||||
soup = BeautifulSoup(box, 'html.parser')
|
soup = BeautifulSoup(box, 'html.parser')
|
||||||
# get server name
|
# get server name
|
||||||
server_name = soup.find('span', class_='f7').text.replace(' ', '')
|
server_name = soup.find('span', class_='f7').text.replace(' ', '')
|
||||||
|
gpu_names = []
|
||||||
# get GPU name
|
|
||||||
gpu_name = soup.find('span', class_='f4').text.replace(' ', '')
|
|
||||||
|
|
||||||
users = []
|
users = []
|
||||||
gpus = box.split('\n')
|
gpus = box.split('\n')
|
||||||
for i in gpus[1:]:
|
for i in gpus[:]:
|
||||||
soup = BeautifulSoup(i, 'html.parser')
|
if 'f6' in i: # if this line is for a single GPU informations
|
||||||
|
soup = BeautifulSoup(i, 'html.parser')
|
||||||
|
|
||||||
# get all users who is using this server's GPU resources.
|
# get all users who is using this server's GPU resources.
|
||||||
user_obj = soup.find('span', class_='f0')
|
user_objs = soup.findAll('span', class_='f0')
|
||||||
if user_obj != None:
|
user_in_this_gpu = set()
|
||||||
username = user_obj.text
|
|
||||||
if username != 'gdm':
|
# get users on this GPU
|
||||||
users.append(username)
|
for user_obj in user_objs:
|
||||||
|
if user_obj != None:
|
||||||
|
username = user_obj.text
|
||||||
|
if username != 'gdm':
|
||||||
|
user_in_this_gpu.add(username)
|
||||||
|
|
||||||
|
# get gpu names
|
||||||
|
gpu_name = soup.find('span', class_='f4').text.replace(' ', '')
|
||||||
|
|
||||||
|
# log gpu name & users on this GPU
|
||||||
|
gpu_names.append(gpu_name)
|
||||||
|
users.append(list(user_in_this_gpu))
|
||||||
|
|
||||||
servers.append({
|
servers.append({
|
||||||
'name': server_name,
|
'name': server_name,
|
||||||
'users': users,
|
'users': users,
|
||||||
'gpu': gpu_name
|
'gpu': gpu_names
|
||||||
})
|
})
|
||||||
return servers
|
return servers
|
||||||
|
|
||||||
def filter(server_status: dict, limit: int = 2) -> list:
|
def filter(server_status: list, limit: int = 2) -> list:
|
||||||
'''
|
'''
|
||||||
You can set some rule here.
|
You can set some rule here.
|
||||||
it will return the a list which contains the username that exceed the limit.
|
it will return the a list which contains the username that exceed the limit.
|
||||||
@ -110,14 +120,18 @@ def filter(server_status: dict, limit: int = 2) -> list:
|
|||||||
I add this function beacause that we may need more filter rules. we can add some rules in this function easily
|
I add this function beacause that we may need more filter rules. we can add some rules in this function easily
|
||||||
|
|
||||||
Input:
|
Input:
|
||||||
server_status (dict): e.g. {'name': 'cml5', ...}
|
server_status (list): from get_server_gpu_status()
|
||||||
limit (int): GPU limit (default: 2)
|
limit (int): GPU limit (default: 2)
|
||||||
Output:
|
Output:
|
||||||
violators (list): a violator list(set) e.g. [ {'user': 'snsd0805', 'gpu': 3}] means that the violator 'snsd0805' is using 3 GPU.
|
violators (list): a violator list(set) e.g. [ {'user': 'snsd0805', 'usage': [{'server': 'cml5', 'gpu': 'NVDIA3090', 'number': 3}...] }] means that the violator 'snsd0805' is using 3 GPU.
|
||||||
'''
|
'''
|
||||||
violators = []
|
violators = []
|
||||||
|
|
||||||
counter = {}
|
counter = {}
|
||||||
|
|
||||||
|
for server in server_status:
|
||||||
|
print(server)
|
||||||
|
'''
|
||||||
for user in server_status['users']:
|
for user in server_status['users']:
|
||||||
if user not in counter:
|
if user not in counter:
|
||||||
counter[user] = 1
|
counter[user] = 1
|
||||||
@ -130,6 +144,7 @@ def filter(server_status: dict, limit: int = 2) -> list:
|
|||||||
'user': k,
|
'user': k,
|
||||||
'gpu': v,
|
'gpu': v,
|
||||||
})
|
})
|
||||||
|
'''
|
||||||
return violators
|
return violators
|
||||||
|
|
||||||
def mail_notify(server_name: str, gpu_name: str, violators: list) -> None:
|
def mail_notify(server_name: str, gpu_name: str, violators: list) -> None:
|
||||||
@ -181,9 +196,15 @@ def mail_notify(server_name: str, gpu_name: str, violators: list) -> None:
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
server_status = get_server_gpu_status()
|
server_status = get_server_gpu_status()
|
||||||
|
for i in server_status:
|
||||||
|
print(i)
|
||||||
|
|
||||||
|
violators = filter(server_status, 2)
|
||||||
|
'''
|
||||||
for server in server_status:
|
for server in server_status:
|
||||||
print(server['name'], server['gpu'])
|
print(server['name'], server['gpu'])
|
||||||
violators = filter(server, 2)
|
violators = filter(server, 2)
|
||||||
print(violators)
|
print(violators)
|
||||||
mail_notify(server['name'], server['gpu'], violators)
|
mail_notify(server['name'], server['gpu'], violators)
|
||||||
print("=" * 20)
|
print("=" * 20)
|
||||||
|
'''
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user