#include #include #include #include #include #include #include #include #define BOT_SYMBOL 1 #define OPPONENT_SYMBOL 2 #define EPSILON 0.9 // Epsilon-greedy #define LR 0.1 // learning rate #define LAMBDA 0.9 // discount factor #define STATE_NUM 19683 #define ACTION_NUM 9 #define EPISODE_NUM 100000 #define FIRST true short PATHS[8][3] = { {0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {0, 3, 6}, {1, 4, 7}, {2, 5, 8}, {0, 4, 8}, {2, 4, 6} }; struct action{ short player; short loc; }; /* Reset the game, clear the chessboard. Args: - short *board (array's address): chessboard's status Results: - None, set all blocks on the chessboard to zero. */ void reset(short* board){ for (short i=0; i<9; i++) board[i] = 0; } /* Print the chessboard on the console. Args: - short *board (array's address): chessboard's status Results: - None. Only printing. */ void show(short *board){ short loc; printf("┼───┼───┼───┼\n"); for (short i=0; i<3; i++){ printf("│ "); for (short j=0; j<3; j++){ loc = 3*i+j; if (board[loc] == 0) printf(" │ "); else if (board[loc] == BOT_SYMBOL) printf("○ │ "); else printf("✕ │ "); } printf("\n"); printf("┼───┼───┼───┼\n"); } printf("\n\n"); } /* Save all available actions into the "result" array. Args: - short *board (array's address): chessboard's status - short *result (array's address): To save all available actions. - short *length (integer's pointer): To save the number of available actions. Results: - None. All available actions are saved into "result" and the number of actions is saved in "length" */ void get_available_actions(short *board, short *result, short *length){ short index = 0; for (int i=0; i<9; i++) if (board[i] == 0) result[index++] = i; *length = index; } /* Return winner's number; Args: - short *board (array's address): chessboard's status Results: - short winner_number(integer): winner's number, 0 for no winner now, 1 for Bot, 2 for opponent */ short get_winner(short *board){ int a, b, c; for (int i=0; i<8; i++){ a = PATHS[i][0]; b = PATHS[i][1]; c = PATHS[i][2]; if ((board[a] == board[b]) && (board[b] == board[c]) && (board[a] != 0)){ return board[a]; } } return 0; } /* Hash chesstable's status into hash. Args: - short *board (array's address): chessboard's status Results: - int hash (integer): chessboard's status in i-th block * pow(3, i) */ int state_hash(short *board){ int base, hash = 0; for (int i=0; i<9; i++){ base = pow(3, i); hash += (base * board[i]); } return hash; } /* Act on the chessboard. Args: - short *board (array's address): chessboards' status - struct action *a (a action's pointer): include player & choose loc - int *state (pointer): for return. To save the chessboard's state hash which after doing this action - float *reward (pointer): for return. To save the number of rewards which the player gets after doing this action. - float *opponent_reward (pointer): for return. To save the number of rewards which the opponents gets after the player doing this action. - short *winner (pointer): for return. To save the winner in this action. If haven't finish, it will be zero. Results: - None. Save in state & reward & winner */ void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner){ // printf("Act( player=%d, action=%d )\n", a->player, a->loc); assert(board[a->loc] == 0); board[a->loc] = a->player; *winner = get_winner(board); *state = state_hash(board); if (*winner == a->player){ *reward = 1.0; *opponent_reward = -1.0; } else if(*winner != 0){ *reward = -1.0; *opponent_reward = 1.0; } else{ *reward = 0; *opponent_reward = 0; } } /* Return the index with the max value in the array Args: - short *arr (array's address) - short length (integer): array's length Results: - short index (integer): the index with the max value */ short short_argmax(short *arr, short length){ short ans = -1, max = SHRT_MIN; for (short i=0; i max){ max = arr[i]; ans = i; } } return ans; } /* Return the index with the max value in the array Args: - float *arr (array's address) - short length (integer): array's length Results: - short index (integer): the index with the max value */ short float_argmax(float *arr, short length){ float ans = -1, max = -FLT_MAX; for (short i=0; i max){ max = arr[i]; ans = i; } } return ans; } /* Choose the next action with Epsilon-Greedy. EPSILON means the probability to choose the best action in this state from Q-Table. (1-EPSILON) to random an action to do. Args: - short *table (array's address): state table for Q-Learning - short *board (array's address): chessboards' status - int state (integer, state hash): hash for board's status Results: - short best_choice */ short bot_choose_action(float *table, short *board, int state){ // get available actions for choosing short available_actions[9]; short available_actions_length; get_available_actions(board, available_actions, &available_actions_length); // use argmax() to find the best choise, // first we should build an available_actions_state array for saving the state for all available choise. float available_actions_state[9]; short available_actions_state_index[9]; short available_actions_state_length, index = 0; short temp_index, best_choice; bool zeros = true; for (short i=0; i EPSILON -> random a action // If random number < EPSILON -> choose the best action in this state. double random_num = (double) rand() / (RAND_MAX + 1.0); if ((random_num > EPSILON) || zeros){ best_choice = available_actions_state_index[ rand() % index ]; } return best_choice; } /* Opponent random choose a action to do. Args: - short *table (array's address): state table for Q-Learning - short *board (array's address): chessboards' status - int state (integer, state hash): hash for board's status Results: - short choice (integer): random, -1 means no available action to choose */ short opponent_random_action(float *table, short *board, int state){ // get available actions for choosing short available_actions[9]; short available_action_length; get_available_actions(board, available_actions, &available_action_length); if (available_action_length == 0){ return -1; } // random short choice; choice = (short)( rand() % available_action_length ); choice = available_actions[choice]; return choice; } /* Inilialize the Q-Table Args: - float *table (two-dim array's start address) Results: - None. */ void init_table(float *table){ for (int i=0; i