feat: leave docker swarm cluster when exit

This commit is contained in:
Ting-Jun Wang 2024-05-30 16:49:10 +08:00
parent 755a929aeb
commit 5b8b3e7c39
Signed by: snsd0805
GPG Key ID: 48D331A3D6160354

View File

@ -88,6 +88,10 @@ class ClusterCommunicationModule():
if command == '[INFO]': if command == '[INFO]':
data = {'host': self.host, 'GPU': self.node_manager.GPU, 'GPU_num': self.node_manager.GPU_num} data = {'host': self.host, 'GPU': self.node_manager.GPU, 'GPU_num': self.node_manager.GPU_num}
self.client_sock.send(json.dumps(data).encode()) self.client_sock.send(json.dumps(data).encode())
elif command == '[STOP]':
self.node_manager.docker_client.swarm.leave()
data = {'host': self.host}
self.client_sock.send(f'[STOP_CHECK] {json.dumps(data)}'.encode())
return True return True
@ -104,6 +108,7 @@ class ClusterCommunicationModule():
if data == '[REJECT]': if data == '[REJECT]':
print(f"{host} reject.") print(f"{host} reject.")
return False return False
elif data == '[ACCEPT]': elif data == '[ACCEPT]':
self.node_manager.status = 'master' self.node_manager.status = 'master'
print(f"{host} accept.") print(f"{host} accept.")
@ -120,9 +125,20 @@ class ClusterCommunicationModule():
ans.append(data) ans.append(data)
except: except:
self.worker_conns.remove(conn) self.worker_conns.remove(conn)
print("1 worker disconnnected.")
return ans return ans
def exit(self): def exit(self):
if self.node_manager.status == 'master':
for conn in self.worker_conns:
conn.send('[STOP] {}'.encode())
check, args = conn.recv(1024).decode().split()
print(f'{args} has stopped.')
self.node_manager.docker_client.swarm.leave(force=True)
if self.node_manager.status == 'worker':
self.node_manager.docker_client.swarm.leave()
self.sock.close() self.sock.close()
self.client_sock.close() self.client_sock.close()
for conn in self.worker_conns: for conn in self.worker_conns: