77 lines
2.2 KiB
Python
77 lines
2.2 KiB
Python
import random
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
class Enviroment():
|
|
def __init__(self):
|
|
self.board = [ 0 for i in range(9) ]
|
|
|
|
self.bot_symbol = 2
|
|
self.user_symbol = 1
|
|
|
|
# self.bot_action()
|
|
|
|
def reset(self):
|
|
self.board = [ 0 for i in range(9) ]
|
|
|
|
def show(self):
|
|
print("┼───┼───┼───┼")
|
|
for i in range(3):
|
|
print("│ ", end='')
|
|
for j in range(3):
|
|
if self.board[ 3*i + j ] == 0:
|
|
print(" ", end=' │ ')
|
|
elif self.board[ 3*i + j ] == 1:
|
|
print("○", end=' │ ')
|
|
else:
|
|
print("✕", end=' │ ')
|
|
print()
|
|
print("┼───┼───┼───┼")
|
|
print()
|
|
|
|
def get_available_actions(self):
|
|
ans = []
|
|
for i in range(9):
|
|
if self.board[i] == 0:
|
|
ans.append(i)
|
|
return ans
|
|
|
|
def get_winner(self):
|
|
paths = [
|
|
[0, 1, 2], [3, 4, 5], [6, 7, 8],
|
|
[0, 3, 6], [1, 4, 7], [2, 5, 8],
|
|
[0, 4, 8], [2, 4, 6]
|
|
]
|
|
for path in paths:
|
|
x, y, z = path
|
|
if (self.board[x] == self.board[y]) and (self.board[y] == self.board[z]):
|
|
return self.board[x]
|
|
|
|
return 0
|
|
|
|
def state_hash(self):
|
|
ans = 0
|
|
for i in range(9):
|
|
ans += self.board[i] * (3**i)
|
|
return ans
|
|
|
|
def bot_action(self):
|
|
available_actions = self.get_available_actions()
|
|
if len(available_actions) > 0:
|
|
loc = random.choice(available_actions)
|
|
self.board[loc] = self.bot_symbol
|
|
|
|
def action(self, loc):
|
|
assert loc in self.get_available_actions(), "It's a wrong action"
|
|
self.board[loc] = self.user_symbol
|
|
|
|
winner = self.get_winner() # if != 0: stop
|
|
if winner == self.user_symbol:
|
|
reward = 1
|
|
elif winner == self.bot_symbol:
|
|
reward = -1
|
|
else:
|
|
reward = 0
|
|
self.bot_action()
|
|
state = self.state_hash()
|
|
return state, reward, winner |