feat: leave docker swarm cluster when exit

This commit is contained in:
Ting-Jun Wang 2024-05-30 16:49:10 +08:00
parent 755a929aeb
commit 5b8b3e7c39
Signed by: snsd0805
GPG Key ID: 48D331A3D6160354

View File

@ -88,7 +88,11 @@ class ClusterCommunicationModule():
if command == '[INFO]':
data = {'host': self.host, 'GPU': self.node_manager.GPU, 'GPU_num': self.node_manager.GPU_num}
self.client_sock.send(json.dumps(data).encode())
elif command == '[STOP]':
self.node_manager.docker_client.swarm.leave()
data = {'host': self.host}
self.client_sock.send(f'[STOP_CHECK] {json.dumps(data)}'.encode())
return True
def request(self, host): # master side
@ -104,6 +108,7 @@ class ClusterCommunicationModule():
if data == '[REJECT]':
print(f"{host} reject.")
return False
elif data == '[ACCEPT]':
self.node_manager.status = 'master'
print(f"{host} accept.")
@ -120,9 +125,20 @@ class ClusterCommunicationModule():
ans.append(data)
except:
self.worker_conns.remove(conn)
print("1 worker disconnnected.")
return ans
def exit(self):
if self.node_manager.status == 'master':
for conn in self.worker_conns:
conn.send('[STOP] {}'.encode())
check, args = conn.recv(1024).decode().split()
print(f'{args} has stopped.')
self.node_manager.docker_client.swarm.leave(force=True)
if self.node_manager.status == 'worker':
self.node_manager.docker_client.swarm.leave()
self.sock.close()
self.client_sock.close()
for conn in self.worker_conns: