From 7a68a06c8691b071960a215a6fdbbd2f643e5a7b Mon Sep 17 00:00:00 2001 From: eeeXun Date: Wed, 31 May 2023 11:31:15 +0800 Subject: [PATCH] style(format): run clang-format --- constant.h | 6 +- enviroment.c | 205 ++++++++++++++++++++++++++------------------------- enviroment.h | 16 ++-- main.c | 17 +++-- q-learning.c | 185 ++++++++++++++++++++++++---------------------- q-learning.h | 12 +-- 6 files changed, 227 insertions(+), 214 deletions(-) diff --git a/constant.h b/constant.h index 3c0bd69..b8d2ce4 100644 --- a/constant.h +++ b/constant.h @@ -1,9 +1,9 @@ #define BOT_SYMBOL 1 #define OPPONENT_SYMBOL 2 -#define EPSILON 0.9 // Epsilon-greedy -#define LR 0.1 // learning rate -#define LAMBDA 0.9 // discount factor +#define EPSILON 0.9 // Epsilon-greedy +#define LR 0.1 // learning rate +#define LAMBDA 0.9 // discount factor #define STATE_NUM 19683 #define ACTION_NUM 9 diff --git a/enviroment.c b/enviroment.c index 6f5b721..f94e3cb 100644 --- a/enviroment.c +++ b/enviroment.c @@ -1,146 +1,151 @@ -#include -#include -#include -#include "constant.h" #include "enviroment.h" +#include "constant.h" +#include +#include +#include short PATHS[8][3] = { - {0, 1, 2}, {3, 4, 5}, {6, 7, 8}, - {0, 3, 6}, {1, 4, 7}, {2, 5, 8}, - {0, 4, 8}, {2, 4, 6} + { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 }, + { 0, 3, 6 }, { 1, 4, 7 }, { 2, 5, 8 }, + { 0, 4, 8 }, { 2, 4, 6 } }; /* - Reset the game, clear the chessboard. + Reset the game, clear the chessboard. - Args: - - short *board (array's address): chessboard's status + Args: + - short *board (array's address): chessboard's status - Results: - - None, set all blocks on the chessboard to zero. + Results: + - None, set all blocks on the chessboard to zero. */ -void reset(short* board){ - for (short i=0; i<9; i++) - board[i] = 0; +void reset(short* board) +{ + for (short i = 0; i < 9; i++) + board[i] = 0; } /* - Print the chessboard on the console. + Print the chessboard on the console. - Args: - - short *board (array's address): chessboard's status + Args: + - short *board (array's address): chessboard's status - Results: - - None. Only printing. + Results: + - None. Only printing. */ -void show(short *board){ - short loc; - printf("┼───┼───┼───┼\n"); - for (short i=0; i<3; i++){ - printf("│ "); - for (short j=0; j<3; j++){ - loc = 3*i+j; - if (board[loc] == 0) - printf(" │ "); - else if (board[loc] == BOT_SYMBOL) - printf("○ │ "); - else - printf("✕ │ "); - } - printf("\n"); - printf("┼───┼───┼───┼\n"); - } - printf("\n\n"); +void show(short* board) +{ + short loc; + printf("┼───┼───┼───┼\n"); + for (short i = 0; i < 3; i++) { + printf("│ "); + for (short j = 0; j < 3; j++) { + loc = 3 * i + j; + if (board[loc] == 0) + printf(" │ "); + else if (board[loc] == BOT_SYMBOL) + printf("○ │ "); + else + printf("✕ │ "); + } + printf("\n"); + printf("┼───┼───┼───┼\n"); + } + printf("\n\n"); } /* - Save all available actions into the "result" array. + Save all available actions into the "result" array. - Args: - - short *board (array's address): chessboard's status - - short *result (array's address): To save all available actions. - - short *length (integer's pointer): To save the number of available actions. + Args: + - short *board (array's address): chessboard's status + - short *result (array's address): To save all available actions. + - short *length (integer's pointer): To save the number of available actions. - Results: - - None. All available actions are saved into "result" and the number of actions is saved in "length" + Results: + - None. All available actions are saved into "result" and the number of actions is saved in "length" */ -void get_available_actions(short *board, short *result, short *length){ - short index = 0; - for (int i=0; i<9; i++) - if (board[i] == 0) - result[index++] = i; - *length = index; +void get_available_actions(short* board, short* result, short* length) +{ + short index = 0; + for (int i = 0; i < 9; i++) + if (board[i] == 0) + result[index++] = i; + *length = index; } /* - Return winner's number; + Return winner's number; - Args: - - short *board (array's address): chessboard's status + Args: + - short *board (array's address): chessboard's status - Results: - - short winner_number(integer): winner's number, 0 for no winner now, 1 for Bot, 2 for opponent + Results: + - short winner_number(integer): winner's number, 0 for no winner now, 1 for Bot, 2 for opponent */ -short get_winner(short *board){ - int a, b, c; - for (int i=0; i<8; i++){ - a = PATHS[i][0]; b = PATHS[i][1]; c = PATHS[i][2]; - if ((board[a] == board[b]) && (board[b] == board[c]) && (board[a] != 0)){ - return board[a]; - } - } - return 0; +short get_winner(short* board) +{ + int a, b, c; + for (int i = 0; i < 8; i++) { + a = PATHS[i][0]; + b = PATHS[i][1]; + c = PATHS[i][2]; + if ((board[a] == board[b]) && (board[b] == board[c]) && (board[a] != 0)) { + return board[a]; + } + } + return 0; } /* - Hash chesstable's status into hash. + Hash chesstable's status into hash. - Args: - - short *board (array's address): chessboard's status + Args: + - short *board (array's address): chessboard's status - Results: - - int hash (integer): chessboard's status in i-th block * pow(3, i) + Results: + - int hash (integer): chessboard's status in i-th block * pow(3, i) */ -int state_hash(short *board){ - int base, hash = 0; - for (int i=0; i<9; i++){ - base = pow(3, i); - hash += (base * board[i]); - } - return hash; +int state_hash(short* board) +{ + int base, hash = 0; + for (int i = 0; i < 9; i++) { + base = pow(3, i); + hash += (base * board[i]); + } + return hash; } - /* - Act on the chessboard. + Act on the chessboard. - Args: - - short *board (array's address): chessboards' status - - struct action *a (a action's pointer): include player & choose loc - - int *state (pointer): for return. To save the chessboard's state hash which after doing this action - - float *reward (pointer): for return. To save the number of rewards which the player gets after doing this action. - - float *opponent_reward (pointer): for return. To save the number of rewards which the opponents gets after the player doing this action. - - short *winner (pointer): for return. To save the winner in this action. If haven't finish, it will be zero. + Args: + - short *board (array's address): chessboards' status + - struct action *a (a action's pointer): include player & choose loc + - int *state (pointer): for return. To save the chessboard's state hash which after doing this action + - float *reward (pointer): for return. To save the number of rewards which the player gets after doing this action. + - float *opponent_reward (pointer): for return. To save the number of rewards which the opponents gets after the player doing this action. + - short *winner (pointer): for return. To save the winner in this action. If haven't finish, it will be zero. - Results: - - None. Save in state & reward & winner + Results: + - None. Save in state & reward & winner */ -void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner){ +void act(short* board, struct action* a, int* state, float* reward, float* opponent_reward, short* winner) +{ // printf("Act( player=%d, action=%d )\n", a->player, a->loc); assert(board[a->loc] == 0); - board[a->loc] = a->player; - *winner = get_winner(board); - *state = state_hash(board); - if (*winner == a->player){ - *reward = 1.0; + board[a->loc] = a->player; + *winner = get_winner(board); + *state = state_hash(board); + if (*winner == a->player) { + *reward = 1.0; *opponent_reward = -1.0; - } - else if(*winner != 0){ - *reward = -1.0; + } else if (*winner != 0) { + *reward = -1.0; *opponent_reward = 1.0; - } - else{ - *reward = 0; + } else { + *reward = 0; *opponent_reward = 0; } } diff --git a/enviroment.h b/enviroment.h index 4a27df6..9b05f10 100644 --- a/enviroment.h +++ b/enviroment.h @@ -1,11 +1,11 @@ -struct action{ - short player; - short loc; +struct action { + short player; + short loc; }; void reset(short* board); -void show(short *board); -void get_available_actions(short *board, short *result, short *length); -short get_winner(short *board); -int state_hash(short *board); -void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner); +void show(short* board); +void get_available_actions(short* board, short* result, short* length); +short get_winner(short* board); +int state_hash(short* board); +void act(short* board, struct action* a, int* state, float* reward, float* opponent_reward, short* winner); diff --git a/main.c b/main.c index 202c16c..5f3d854 100644 --- a/main.c +++ b/main.c @@ -1,16 +1,17 @@ -#include -#include -#include -#include #include "constant.h" #include "enviroment.h" #include "q-learning.h" +#include +#include +#include +#include -int main(){ - short board[9]= {0}; // tic tac toe's chessboard - float table[STATE_NUM][ACTION_NUM]; // q-learning table +int main() +{ + short board[9] = { 0 }; // tic tac toe's chessboard + float table[STATE_NUM][ACTION_NUM]; // q-learning table - srand(time(NULL)); + srand(time(NULL)); init_table(&table[0][0]); run(&table[0][0], board, false, 10000, false); diff --git a/q-learning.c b/q-learning.c index 3470e23..2061f39 100644 --- a/q-learning.c +++ b/q-learning.c @@ -1,80 +1,81 @@ -#include #include -#include #include +#include +#include #include #include "constant.h" #include "enviroment.h" /* - Return the index with the max value in the array + Return the index with the max value in the array - Args: - - float *arr (array's address) - - short length (integer): array's length + Args: + - float *arr (array's address) + - short length (integer): array's length - Results: - - short index (integer): the index with the max value + Results: + - short index (integer): the index with the max value */ -short float_argmax(float *arr, short length){ - float ans = -1, max = -FLT_MAX; - for (short i=0; i max){ - max = arr[i]; - ans = i; - } - } - return ans; +short float_argmax(float* arr, short length) +{ + float ans = -1, max = -FLT_MAX; + for (short i = 0; i < length; i++) { + if (arr[i] > max) { + max = arr[i]; + ans = i; + } + } + return ans; } - /* - Choose the next action with Epsilon-Greedy. - EPSILON means the probability to choose the best action in this state from Q-Table. - (1-EPSILON) to random an action to do. + Choose the next action with Epsilon-Greedy. + EPSILON means the probability to choose the best action in this state from Q-Table. + (1-EPSILON) to random an action to do. - Args: - - short *table (array's address): state table for Q-Learning - - short *board (array's address): chessboards' status - - int state (integer, state hash): hash for board's status + Args: + - short *table (array's address): state table for Q-Learning + - short *board (array's address): chessboards' status + - int state (integer, state hash): hash for board's status - Results: - - short best_choice + Results: + - short best_choice */ -short bot_choose_action(float *table, short *board, int state){ +short bot_choose_action(float* table, short* board, int state) +{ - // get available actions for choosing - short available_actions[9]; - short available_actions_length; - get_available_actions(board, available_actions, &available_actions_length); + // get available actions for choosing + short available_actions[9]; + short available_actions_length; + get_available_actions(board, available_actions, &available_actions_length); - // use argmax() to find the best choise, - // first we should build an available_actions_state array for saving the state for all available choise. - float available_actions_state[9]; - short available_actions_state_index[9]; - short available_actions_state_length, index = 0; - short temp_index, best_choice; - bool zeros = true; - for (short i=0; i EPSILON -> random a action - // If random number < EPSILON -> choose the best action in this state. - double random_num = (double) rand() / (RAND_MAX + 1.0); - if ((random_num > EPSILON) || zeros){ - best_choice = available_actions_state_index[ rand() % index ]; - } + // Epsilon-Greedy + // If random number > EPSILON -> random a action + // If random number < EPSILON -> choose the best action in this state. + double random_num = (double)rand() / (RAND_MAX + 1.0); + if ((random_num > EPSILON) || zeros) { + best_choice = available_actions_state_index[rand() % index]; + } return best_choice; } @@ -83,27 +84,28 @@ short bot_choose_action(float *table, short *board, int state){ Opponent random choose a action to do. Args: - - short *table (array's address): state table for Q-Learning - - short *board (array's address): chessboards' status - - int state (integer, state hash): hash for board's status + - short *table (array's address): state table for Q-Learning + - short *board (array's address): chessboards' status + - int state (integer, state hash): hash for board's status - Results: - - short choice (integer): random, -1 means no available action to choose + Results: + - short choice (integer): random, -1 means no available action to choose */ -short opponent_random_action(float *table, short *board, int state){ +short opponent_random_action(float* table, short* board, int state) +{ // get available actions for choosing short available_actions[9]; short available_action_length; get_available_actions(board, available_actions, &available_action_length); - if (available_action_length == 0){ + if (available_action_length == 0) { return -1; } // random short choice; - choice = (short)( rand() % available_action_length ); + choice = (short)(rand() % available_action_length); choice = available_actions[choice]; return choice; @@ -118,9 +120,10 @@ short opponent_random_action(float *table, short *board, int state){ Results: - None. */ -void init_table(float *table){ - for (int i=0; i