Compare commits

..

No commits in common. "b7372420bc937f3ae9b40fedfcca429dd95f8863" and "68123809bbd9e5588130d9674c37d36b2e4e0a8b" have entirely different histories.

2 changed files with 4 additions and 11 deletions

View File

@ -1,5 +1,6 @@
import socket import socket
import json import json
from variables import actions
class ClusterCommunicationModule(): class ClusterCommunicationModule():
def __init__(self, host, port, node_manager): def __init__(self, host, port, node_manager):

View File

@ -1,8 +1,8 @@
import threading import threading
from src.communication import ServiceExplorationModule, ClusterCommunicationModule from src.communication import ServiceExplorationModule, ClusterCommunicationModule
import torch
import time import time
class NodeManager(): class NodeManager():
def __init__(self, host, port): def __init__(self, host, port):
self.status = 'none' self.status = 'none'
@ -10,8 +10,8 @@ class NodeManager():
{'explanation': 'Add another node into our cluster', 'function': 'add_node'}, {'explanation': 'Add another node into our cluster', 'function': 'add_node'},
{'explanation': 'Exit', 'function': 'exit'}, {'explanation': 'Exit', 'function': 'exit'},
] ]
self.get_GPU_info() self.GPU = 'RTX 4090'
print(f"You have {self.GPU} * {self.GPU_num}") self.GPU_num = 1
# start Cluster Communication Module # start Cluster Communication Module
# let the nodes in the cluster can communicate # let the nodes in the cluster can communicate
@ -22,14 +22,6 @@ class NodeManager():
self.service_exploration_module = ServiceExplorationModule(host, port+1, self) self.service_exploration_module = ServiceExplorationModule(host, port+1, self)
time.sleep(2) time.sleep(2)
def get_GPU_info(self):
self.GPU_num = torch.cuda.device_count()
assert self.GPU_num > 0, "Your computer doesn't have GPU resource"
self.GPU = torch.cuda.get_device_name(0)
for i in range(self.GPU_num):
assert torch.cuda.get_device_name(i) == self.GPU, "Please provide same type of GPUs."
def start_service(self): def start_service(self):
communication_thread = threading.Thread(target=self.cluster_communication_module.listen) communication_thread = threading.Thread(target=self.cluster_communication_module.listen)