Compare commits
No commits in common. "24d26c863578dedfe1f00adafa9977aa666e6ae9" and "7bf0f39d3936d2cd87ea7dbaa50bcad4cdf922c6" have entirely different histories.
24d26c8635
...
7bf0f39d39
@ -1,6 +1,5 @@
|
|||||||
import socket
|
import socket
|
||||||
import json
|
import json
|
||||||
import docker
|
|
||||||
|
|
||||||
class ClusterCommunicationModule():
|
class ClusterCommunicationModule():
|
||||||
def __init__(self, host, port, node_manager):
|
def __init__(self, host, port, node_manager):
|
||||||
@ -52,10 +51,6 @@ class ClusterCommunicationModule():
|
|||||||
self.client_sock.connect((addr[0], self.port))
|
self.client_sock.connect((addr[0], self.port))
|
||||||
self.client_sock.send('[CHECK]'.encode())
|
self.client_sock.send('[CHECK]'.encode())
|
||||||
|
|
||||||
# join docker swarm cluster
|
|
||||||
docker_token = self.client_sock.recv(1024).decode().split()[-1]
|
|
||||||
print("Receive Docker Swarm Join_Token=", docker_token)
|
|
||||||
|
|
||||||
# remove 'add node'
|
# remove 'add node'
|
||||||
self.node_manager.actions = self.node_manager.actions[1:]
|
self.node_manager.actions = self.node_manager.actions[1:]
|
||||||
|
|
||||||
@ -81,26 +76,18 @@ class ClusterCommunicationModule():
|
|||||||
data = self.client_sock.recv(1024)
|
data = self.client_sock.recv(1024)
|
||||||
data = data.decode()
|
data = data.decode()
|
||||||
|
|
||||||
if data == '[REJECT]':
|
|
||||||
print(f"{host} reject.")
|
|
||||||
status = False
|
|
||||||
elif data == '[ACCEPT]':
|
|
||||||
self.node_manager.status = 'master'
|
|
||||||
print(f"{host} accept.")
|
|
||||||
|
|
||||||
# build docker swarm
|
|
||||||
self.node_manager.docker_client.init(advertise_addr="eth0", listen_addr=f"{self.host}:2377", force_new_cluster=True)
|
|
||||||
token = self.node_manager.docker_client.attrs['JoinTokens']['Worker']
|
|
||||||
self.client_sock.send(f'[DOCKER_TOKEN] {token}'.encode())
|
|
||||||
|
|
||||||
status = True
|
|
||||||
|
|
||||||
# close client_sock, and waiting for the worker connect to our self.sock
|
# close client_sock, and waiting for the worker connect to our self.sock
|
||||||
self.client_sock.close()
|
self.client_sock.close()
|
||||||
self.client_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
self.client_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
|
||||||
|
|
||||||
return status
|
if data == '[REJECT]':
|
||||||
|
print(f"{host} reject.")
|
||||||
|
return False
|
||||||
|
elif data == '[ACCEPT]':
|
||||||
|
self.node_manager.status = 'master'
|
||||||
|
|
||||||
|
print(f"{host} accept.")
|
||||||
|
return True
|
||||||
|
|
||||||
def cluster_info(self):
|
def cluster_info(self):
|
||||||
ans = []
|
ans = []
|
||||||
|
|||||||
@ -2,7 +2,6 @@ import threading
|
|||||||
from src.communication import ServiceExplorationModule, ClusterCommunicationModule
|
from src.communication import ServiceExplorationModule, ClusterCommunicationModule
|
||||||
import torch
|
import torch
|
||||||
import time
|
import time
|
||||||
import docker
|
|
||||||
|
|
||||||
class NodeManager():
|
class NodeManager():
|
||||||
def __init__(self, host, port):
|
def __init__(self, host, port):
|
||||||
@ -22,9 +21,6 @@ class NodeManager():
|
|||||||
# let all client can know which IP address has our service so that it can link to.
|
# let all client can know which IP address has our service so that it can link to.
|
||||||
self.service_exploration_module = ServiceExplorationModule(host, port+1, self)
|
self.service_exploration_module = ServiceExplorationModule(host, port+1, self)
|
||||||
|
|
||||||
# docker client
|
|
||||||
self.docker_client = docker.from_env()
|
|
||||||
|
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
def get_GPU_info(self):
|
def get_GPU_info(self):
|
||||||
|
|||||||
@ -2,7 +2,7 @@ import argparse
|
|||||||
|
|
||||||
def get_args():
|
def get_args():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('host', type=str, help="server's IPv4 address")
|
parser.add_argument('--host', type=str, help="server's IPv4 address", default='0.0.0.0')
|
||||||
parser.add_argument('--port', type=int, help="server's listening port", default=8888)
|
parser.add_argument('--port', type=int, help="server's listening port", default=8888)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user