#include #include #include #include #include #include "constant.h" #include "enviroment.h" #include "hash-table.h" /* Return the index with the max value in the array Args: - float *arr (array's address) - short length (integer): array's length Results: - short index (integer): the index with the max value */ short float_argmax(float *arr, short length){ float ans = -1, max = -FLT_MAX; for (short i=0; i max){ max = arr[i]; ans = i; } } return ans; } /* Choose the next action with Epsilon-Greedy. EPSILON means the probability to choose the best action in this state from Q-Table. (1-EPSILON) to random an action to do. Args: - short *table (array's address): state table for Q-Learning - short *board (array's address): chessboards' status - char *state (string, state hash): hash for board's status Results: - short best_choice */ short bot_choose_action(struct Node **map, short *board, char *state){ // get available actions for choosing short available_actions[ACTION_NUM]; short available_actions_length; get_available_actions(board, available_actions, &available_actions_length); // use argmax() to find the best choise, // first we should build an available_actions_state array for saving the state for all available choise. float available_actions_state[ACTION_NUM]; short available_actions_state_index[ACTION_NUM]; short available_actions_state_length, index = 0; short temp_index, best_choice; bool zeros = true; bool find; float state_weights[ACTION_NUM]; // find weights in the hash table search(map, state, &find, state_weights); if (!find) { for (short i=0; i EPSILON -> random a action // If random number < EPSILON -> choose the best action in this state. double random_num = (double) rand() / (RAND_MAX + 1.0); if ((random_num > EPSILON) || zeros){ best_choice = available_actions_state_index[ rand() % index ]; } return best_choice; } /* Opponent random choose a action to do. Args: - short *board (array's address): chessboards' status Results: - short choice (integer): random, -1 means no available action to choose */ short opponent_random_action(short *board){ // get available actions for choosing short available_actions[ACTION_NUM]; short available_action_length; get_available_actions(board, available_actions, &available_action_length); if (available_action_length == 0){ return -1; } // random short choice; choice = (short)( rand() % available_action_length ); choice = available_actions[choice]; return choice; } // Use Hash Table, so we needn't initilize Q-Table // // /* // Inilialize the Q-Table // Args: // - float *table (two-dim array's start address) // Results: // - None. // */ // void init_table(float *table){ // for (int i=0; i