#include #include #include #include #include #include "constant.h" #include "enviroment.h" /* Return the index with the max value in the array Args: - float *arr (array's address) - short length (integer): array's length Results: - short index (integer): the index with the max value */ short float_argmax(float *arr, short length){ float ans = -1, max = -FLT_MAX; for (short i=0; i max){ max = arr[i]; ans = i; } } return ans; } /* Choose the next action with Epsilon-Greedy. EPSILON means the probability to choose the best action in this state from Q-Table. (1-EPSILON) to random an action to do. Args: - short *table (array's address): state table for Q-Learning - short *board (array's address): chessboards' status - int state (integer, state hash): hash for board's status Results: - short best_choice */ short bot_choose_action(float *table, short *board, int state){ // get available actions for choosing short available_actions[9]; short available_actions_length; get_available_actions(board, available_actions, &available_actions_length); // use argmax() to find the best choise, // first we should build an available_actions_state array for saving the state for all available choise. float available_actions_state[9]; short available_actions_state_index[9]; short available_actions_state_length, index = 0; short temp_index, best_choice; bool zeros = true; for (short i=0; i EPSILON -> random a action // If random number < EPSILON -> choose the best action in this state. double random_num = (double) rand() / (RAND_MAX + 1.0); if ((random_num > EPSILON) || zeros){ best_choice = available_actions_state_index[ rand() % index ]; } return best_choice; } /* Opponent random choose a action to do. Args: - short *table (array's address): state table for Q-Learning - short *board (array's address): chessboards' status - int state (integer, state hash): hash for board's status Results: - short choice (integer): random, -1 means no available action to choose */ short opponent_random_action(float *table, short *board, int state){ // get available actions for choosing short available_actions[9]; short available_action_length; get_available_actions(board, available_actions, &available_action_length); if (available_action_length == 0){ return -1; } // random short choice; choice = (short)( rand() % available_action_length ); choice = available_actions[choice]; return choice; } /* Inilialize the Q-Table Args: - float *table (two-dim array's start address) Results: - None. */ void init_table(float *table){ for (int i=0; i