style(format): run clang-format

2023-05-31 11:31:15 +08:00 · 2023-05-31 11:31:15 +08:00 · 7a68a06c86
commit 7a68a06c86
parent 7ba9db7f83
6 changed files with 227 additions and 214 deletions
--- a/enviroment.c
+++ b/enviroment.c
@ -1,13 +1,13 @@
 #include <stdio.h>
 #include <math.h>
 #include <assert.h>
 #include "constant.h"
 #include "enviroment.h"
 #include "constant.h"
 #include <assert.h>
 #include <math.h>
 #include <stdio.h>
 short PATHS[8][3] = {
-	{0, 1, 2}, {3, 4, 5}, {6, 7, 8},
+    { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
-	{0, 3, 6}, {1, 4, 7}, {2, 5, 8},
+    { 0, 3, 6 }, { 1, 4, 7 }, { 2, 5, 8 },
-	{0, 4, 8}, {2, 4, 6}
+    { 0, 4, 8 }, { 2, 4, 6 }
 };
 /*
@ -19,8 +19,9 @@ short PATHS[8][3] = {
    Results:
        - None, set all blocks on the chessboard to zero.
 */
-void reset(short* board){
+void reset(short* board)
-	for (short i=0; i<9; i++)
+{
    for (short i = 0; i < 9; i++)
        board[i] = 0;
 }
@ -33,13 +34,14 @@ void reset(short* board){
    Results:
        - None. Only printing.
 */
-void show(short *board){
+void show(short* board)
 {
    short loc;
    printf("┼───┼───┼───┼\n");
-	for (short i=0; i<3; i++){
+    for (short i = 0; i < 3; i++) {
        printf("│ ");
-		for (short j=0; j<3; j++){
+        for (short j = 0; j < 3; j++) {
-			loc = 3*i+j;
+            loc = 3 * i + j;
            if (board[loc] == 0)
                printf("  │ ");
            else if (board[loc] == BOT_SYMBOL)
@ -64,9 +66,10 @@ void show(short *board){
    Results:
        - None. All available actions are saved into "result" and the number of actions is saved in "length"
 */
-void get_available_actions(short *board, short *result, short *length){
+void get_available_actions(short* board, short* result, short* length)
 {
    short index = 0;
-	for (int i=0; i<9; i++)
+    for (int i = 0; i < 9; i++)
        if (board[i] == 0)
            result[index++] = i;
    *length = index;
@ -81,11 +84,14 @@ void get_available_actions(short *board, short *result, short *length){
    Results:
        - short winner_number(integer): winner's number, 0 for no winner now, 1 for Bot, 2 for opponent
 */
-short get_winner(short *board){
+short get_winner(short* board)
 {
    int a, b, c;
-	for (int i=0; i<8; i++){
+    for (int i = 0; i < 8; i++) {
-		a = PATHS[i][0]; b = PATHS[i][1]; c = PATHS[i][2];
+        a = PATHS[i][0];
-		if ((board[a] == board[b]) && (board[b] == board[c]) && (board[a] != 0)){
+        b = PATHS[i][1];
        c = PATHS[i][2];
        if ((board[a] == board[b]) && (board[b] == board[c]) && (board[a] != 0)) {
            return board[a];
        }
    }
@ -101,16 +107,16 @@ short get_winner(short *board){
    Results:
        - int hash (integer): chessboard's status in i-th block * pow(3, i)
 */
-int state_hash(short *board){
+int state_hash(short* board)
 {
    int base, hash = 0;
-	for (int i=0; i<9; i++){
+    for (int i = 0; i < 9; i++) {
        base = pow(3, i);
        hash += (base * board[i]);
    }
    return hash;
 }
 /*
    Act on the chessboard.
@ -125,21 +131,20 @@ int state_hash(short *board){
    Results:
        - None. Save in state & reward & winner
 */
-void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner){
+void act(short* board, struct action* a, int* state, float* reward, float* opponent_reward, short* winner)
 {
    // printf("Act( player=%d, action=%d )\n", a->player, a->loc);
    assert(board[a->loc] == 0);
    board[a->loc] = a->player;
    *winner = get_winner(board);
    *state = state_hash(board);
-	if (*winner == a->player){
+    if (*winner == a->player) {
        *reward = 1.0;
        *opponent_reward = -1.0;
-    }
+    } else if (*winner != 0) {
 	else if(*winner != 0){
        *reward = -1.0;
        *opponent_reward = 1.0;
-    }
+    } else {
 	else{
        *reward = 0;
        *opponent_reward = 0;
    }
--- a/enviroment.h
+++ b/enviroment.h
@ -1,11 +1,11 @@
-struct action{
+struct action {
    short player;
    short loc;
 };
 void reset(short* board);
-void show(short *board);
+void show(short* board);
-void get_available_actions(short *board, short *result, short *length);
+void get_available_actions(short* board, short* result, short* length);
-short get_winner(short *board);
+short get_winner(short* board);
-int state_hash(short *board);
+int state_hash(short* board);
-void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner);
+void act(short* board, struct action* a, int* state, float* reward, float* opponent_reward, short* winner);
--- a/main.c
+++ b/main.c
@ -1,13 +1,14 @@
 #include <stdio.h>
 #include <time.h>
 #include <stdlib.h>
 #include <stdbool.h>
 #include "constant.h"
 #include "enviroment.h"
 #include "q-learning.h"
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>
-int main(){
+int main()
-	short board[9]= {0};			        // tic tac toe's chessboard
+{
    short board[9] = { 0 }; // tic tac toe's chessboard
    float table[STATE_NUM][ACTION_NUM]; // q-learning table
    srand(time(NULL));
--- a/q-learning.c
+++ b/q-learning.c
@ -1,7 +1,7 @@
 #include <stdio.h>
 #include <float.h>
 #include <stdbool.h>
 #include <limits.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "constant.h"
@ -17,10 +17,11 @@
    Results:
        - short index (integer): the index with the max value
 */
-short float_argmax(float *arr, short length){
+short float_argmax(float* arr, short length)
 {
    float ans = -1, max = -FLT_MAX;
-	for (short i=0; i<length; i++){
+    for (short i = 0; i < length; i++) {
-		if (arr[i] > max){
+        if (arr[i] > max) {
            max = arr[i];
            ans = i;
        }
@ -28,7 +29,6 @@ short float_argmax(float *arr, short length){
    return ans;
 }
 /*
    Choose the next action with Epsilon-Greedy.
    EPSILON means the probability to choose the best action in this state from Q-Table.
@ -42,7 +42,8 @@ short float_argmax(float *arr, short length){
    Results:
        - short best_choice
 */
-short bot_choose_action(float *table, short *board, int state){
+short bot_choose_action(float* table, short* board, int state)
 {
    // get available actions for choosing
    short available_actions[9];
@ -56,10 +57,10 @@ short bot_choose_action(float *table, short *board, int state){
    short available_actions_state_length, index = 0;
    short temp_index, best_choice;
    bool zeros = true;
-	for (short i=0; i<available_actions_length; i++){
+    for (short i = 0; i < available_actions_length; i++) {
        temp_index = available_actions[i];
        available_actions_state[index] = *(table + state * ACTION_NUM + temp_index);
-		if (available_actions_state[index] != 0.0){
+        if (available_actions_state[index] != 0.0) {
            zeros = false;
        }
        available_actions_state_index[index] = temp_index;
@ -71,9 +72,9 @@ short bot_choose_action(float *table, short *board, int state){
    // Epsilon-Greedy
    // If random number > EPSILON   ->   random a action
    // If random number < EPSILON   ->   choose the best action in this state.
-	double random_num = (double) rand() / (RAND_MAX + 1.0);
+    double random_num = (double)rand() / (RAND_MAX + 1.0);
-	if ((random_num > EPSILON) || zeros){
+    if ((random_num > EPSILON) || zeros) {
-		best_choice = available_actions_state_index[ rand() % index ];
+        best_choice = available_actions_state_index[rand() % index];
    }
    return best_choice;
@ -90,20 +91,21 @@ short bot_choose_action(float *table, short *board, int state){
    Results:
        - short choice (integer): random, -1 means no available action to choose
 */
-short opponent_random_action(float *table, short *board, int state){
+short opponent_random_action(float* table, short* board, int state)
 {
    // get available actions for choosing
    short available_actions[9];
    short available_action_length;
    get_available_actions(board, available_actions, &available_action_length);
-    if (available_action_length == 0){
+    if (available_action_length == 0) {
        return -1;
    }
    // random
    short choice;
-    choice = (short)( rand() % available_action_length );
+    choice = (short)(rand() % available_action_length);
    choice = available_actions[choice];
    return choice;
@ -118,9 +120,10 @@ short opponent_random_action(float *table, short *board, int state){
    Results:
        - None.
 */
-void init_table(float *table){
+void init_table(float* table)
-    for (int i=0; i<STATE_NUM; i++){
+{
-        for (int j=0; j<ACTION_NUM; j++){
+    for (int i = 0; i < STATE_NUM; i++) {
        for (int j = 0; j < ACTION_NUM; j++) {
            *(table + i * ACTION_NUM + j) = 0;
        }
    }
@ -137,13 +140,14 @@ void init_table(float *table){
    Results:
        - int max_reward
 */
-float get_estimate_reward(float *table, short *board, int state){
+float get_estimate_reward(float* table, short* board, int state)
 {
    short available_actions[9];
    short available_action_length;
    get_available_actions(board, available_actions, &available_action_length);
    float available_actions_state[9];
-    for (short i=0; i<available_action_length; i++){
+    for (short i = 0; i < available_action_length; i++) {
        available_actions_state[i] = *(table + state * ACTION_NUM + available_actions[i]); // table[state][available_actions[i]]
    }
@ -165,7 +169,8 @@ float get_estimate_reward(float *table, short *board, int state){
    Results:
        - None
 */
-void run(float *table, short *board, bool train, int times, bool plot){
+void run(float* table, short* board, bool train, int times, bool plot)
 {
    short available_actions[9];
    short available_actions_length;
    short winner;
@ -176,10 +181,10 @@ void run(float *table, short *board, bool train, int times, bool plot){
    int win = 0;
-    for (int episode=0; episode<times; episode++){
+    for (int episode = 0; episode < times; episode++) {
        reset(board);
        state = state_hash(board);
-        while (1){
+        while (1) {
            // bot choose the action
            choice = bot_choose_action(table, board, state);
            a.loc = choice;
@ -187,22 +192,24 @@ void run(float *table, short *board, bool train, int times, bool plot){
            estimate_r = *(table + state * ACTION_NUM + choice);
            act(board, &a, &_state, &r, &opponent_r, &winner);
-            if (plot) show(board);
+            if (plot)
                show(board);
            // opponent random
-            if (winner == 0){
+            if (winner == 0) {
                opponent_choice = opponent_random_action(table, board, state_hash(board));
-                if (opponent_choice != -1){
+                if (opponent_choice != -1) {
                    a.loc = opponent_choice;
                    a.player = OPPONENT_SYMBOL;
                    act(board, &a, &_state, &opponent_r, &r, &winner);
-                    if (plot) show(board);
+                    if (plot)
                        show(board);
                }
            }
            get_available_actions(board, available_actions, &available_actions_length);
-            if ((winner != 0) || (available_actions_length == 0)){
+            if ((winner != 0) || (available_actions_length == 0)) {
-                if (plot){
+                if (plot) {
                    printf("winner: %d, reward: %f, oppo reward: %f\n", winner, r, opponent_r);
                    printf("==========================================================\n");
                }
@ -211,15 +218,15 @@ void run(float *table, short *board, bool train, int times, bool plot){
                estimate_r_ = get_estimate_reward(table, board, _state);
                real_r = r + LAMBDA * estimate_r_;
            }
-            if (train){
+            if (train) {
                // printf("update");
-                *(table + state * ACTION_NUM + choice) += ( LR * (real_r - estimate_r) );       // table[state][choice] += LR * (real_r - estimate_r)
+                *(table + state * ACTION_NUM + choice) += (LR * (real_r - estimate_r)); // table[state][choice] += LR * (real_r - estimate_r)
            }
            state = _state;
-            if ((winner != 0) || (available_actions_length == 0)){
+            if ((winner != 0) || (available_actions_length == 0)) {
                // printf("break\n");
-                if (winner == 1){
+                if (winner == 1) {
                    win += 1;
                }
                break;
@ -228,5 +235,5 @@ void run(float *table, short *board, bool train, int times, bool plot){
    }
    if (!train)
-        printf("%d/%d, %f\%\n", win, 10000, (float)win/10000);
+        printf("%d/%d, %f\%\n", win, 10000, (float)win / 10000);
 }
--- a/q-learning.h
+++ b/q-learning.h
@ -1,6 +1,6 @@
-short float_argmax(float *arr, short length);
+short float_argmax(float* arr, short length);
-short bot_choose_action(float *table, short *board, int state);
+short bot_choose_action(float* table, short* board, int state);
-short opponent_random_action(float *table, short *board, int state);
+short opponent_random_action(float* table, short* board, int state);
-void init_table(float *table);
+void init_table(float* table);
-float get_estimate_reward(float *table, short *board, int state);
+float get_estimate_reward(float* table, short* board, int state);
-void run(float *table, short *board, bool train, int times, bool plot);
+void run(float* table, short* board, bool train, int times, bool plot);