From 605d9f6dd9f959d51a11d134dc6be09ba10b05c8 Mon Sep 17 00:00:00 2001 From: snsd0805 Date: Fri, 2 Jun 2023 03:27:55 +0800 Subject: [PATCH 1/6] feat: Chaining hash table --- hash-table.c | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 hash-table.c diff --git a/hash-table.c b/hash-table.c new file mode 100644 index 0000000..6ba4f70 --- /dev/null +++ b/hash-table.c @@ -0,0 +1,134 @@ +#include +#include +#include +#include +#include + +#define TABLE_SIZE 10 + +struct Node { + char key[48]; + int value; + struct Node *next; +}; + +long long hash_function(char *key) { + long long hash = 0; + for (int i=0; ikey, key); + node->value = value; + node->next = NULL; + + if (table[hash] == NULL){ + table[hash] = node; + printf("Create.\n"); + } else { + printf("Add.\n"); + temp = table[hash]; + past = NULL; + while(temp != NULL){ + assert(temp->key != key); + printf("%s -> ", temp->key); + past = temp; + temp = temp->next; + } + printf("\n"); + past->next = node; + } +} + +void long_to_str(long long num, char *s, int length) { + int temp; + for (int i=length-1; i>=0; i--) { + temp = num % 10; + num /= 10; + s[i] = (char)(temp + 48); + } +} + +int search(struct Node **table, char *key) { + long long hash = hash_function(key); + struct Node *temp, *past; + + if (table[hash] == NULL){ + return -1; + } else { + temp = table[hash]; + past = NULL; + + while(temp != NULL){ + // printf("%s - %s\n", temp->key, key); + if (strcmp(temp->key, key) == 0){ + return temp->value; + } + past = temp; + temp = temp->next; + } + return -1; + } +} + +void update(struct Node **table, char *key, int value) { + long long hash = hash_function(key); + struct Node *temp, *past; + + temp = table[hash]; + past = NULL; + while(temp != NULL){ + if (strcmp(temp->key, key) == 0){ + temp->value = value; + break; + } + past = temp; + temp = temp->next; + } +} + +int main(){ + struct Node ** table; // pointer to pointer + int size; + srand(time(NULL)); + + table = malloc(TABLE_SIZE * sizeof(struct Node*)); + for (int i=0; i "); + scanf("%lli", &a); + printf("HERE\n"); + long_to_str(a, s, 20); + printf("HERE\n"); + + update(table, s, 100); + ans = search(table, s); + printf("%d\n\n", ans); + } + // long long a = hash_function("9999999999999"); + // printf("%lli\n", a); +} \ No newline at end of file From 821bc5727f6e1457c8c8995c19df172c21ca5273 Mon Sep 17 00:00:00 2001 From: snsd0805 Date: Fri, 2 Jun 2023 15:47:02 +0800 Subject: [PATCH 2/6] feat: set up 'four in a row' enviroment --- constant.h | 3 ++ enviroment.c | 145 ++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 119 insertions(+), 29 deletions(-) diff --git a/constant.h b/constant.h index 3c0bd69..fdd0e00 100644 --- a/constant.h +++ b/constant.h @@ -9,3 +9,6 @@ #define ACTION_NUM 9 #define EPISODE_NUM 100000 #define FIRST true + +#define ROW_NUM 6 +#define COL_NUM 7 \ No newline at end of file diff --git a/enviroment.c b/enviroment.c index 6f5b721..83c984d 100644 --- a/enviroment.c +++ b/enviroment.c @@ -14,13 +14,13 @@ short PATHS[8][3] = { Reset the game, clear the chessboard. Args: - - short *board (array's address): chessboard's status + - short *board (array's start address): chessboard's status Results: - None, set all blocks on the chessboard to zero. */ void reset(short* board){ - for (short i=0; i<9; i++) + for (short i=0; i<(ROW_NUM*COL_NUM); i++) board[i] = 0; } @@ -35,22 +35,23 @@ void reset(short* board){ */ void show(short *board){ short loc; - printf("┼───┼───┼───┼\n"); - for (short i=0; i<3; i++){ - printf("│ "); - for (short j=0; j<3; j++){ - loc = 3*i+j; - if (board[loc] == 0) - printf(" │ "); - else if (board[loc] == BOT_SYMBOL) - printf("○ │ "); - else - printf("✕ │ "); - } - printf("\n"); - printf("┼───┼───┼───┼\n"); - } - printf("\n\n"); + for (short i=0; i=0; i--){ + if (board[i] == BOT_SYMBOL) { + printf("● "); + } else if(board[i] == OPPONENT_SYMBOL) { + printf("◴ "); + } else { + printf("◌ "); + } + if (i%COL_NUM == 0){ + printf("\n"); + } + } + printf("\n\n"); } /* @@ -66,12 +67,33 @@ void show(short *board){ */ void get_available_actions(short *board, short *result, short *length){ short index = 0; - for (int i=0; i<9; i++) - if (board[i] == 0) + for (int i=0; i= ROW_NUM) || (row < 0)) { + return -1; + } + if ((col >= COL_NUM) || (col < 0)) { + return -1; + } + return board[row*COL_NUM+col]; +} + /* Return winner's number; @@ -80,14 +102,58 @@ void get_available_actions(short *board, short *result, short *length){ Results: - short winner_number(integer): winner's number, 0 for no winner now, 1 for Bot, 2 for opponent + + board's coodinate diagram + ^ + | 5 + | 4 + | 3 + | 2 + | 1 + | 0 + <----------------------------- + 6 5 4 3 2 1 0 | */ short get_winner(short *board){ - int a, b, c; - for (int i=0; i<8; i++){ - a = PATHS[i][0]; b = PATHS[i][1]; c = PATHS[i][2]; - if ((board[a] == board[b]) && (board[b] == board[c]) && (board[a] != 0)){ - return board[a]; - } + short a, b, c, d; + for (short i=0; iloc)); + while (*ptr == 0) { + // printf("%d ", *ptr); + ptr -= COL_NUM; + } + *(ptr+COL_NUM) = a->player; +} /* Act on the chessboard. @@ -127,10 +213,11 @@ int state_hash(short *board){ */ void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner){ // printf("Act( player=%d, action=%d )\n", a->player, a->loc); - assert(board[a->loc] == 0); - board[a->loc] = a->player; + assert(board[(ROW_NUM*COL_NUM-1)-(a->loc)] == 0); + + fall(board, a); *winner = get_winner(board); - *state = state_hash(board); + // *state = state_hash(board); if (*winner == a->player){ *reward = 1.0; *opponent_reward = -1.0; From 7fcadce548fb63afb0ed6e2b4b08a77004527107 Mon Sep 17 00:00:00 2001 From: snsd0805 Date: Fri, 2 Jun 2023 16:52:59 +0800 Subject: [PATCH 3/6] feat: Big num for state representation --- bignum.c | 41 +++++++++++++++++++++++++++++++++++++++++ bignum.h | 7 +++++++ 2 files changed, 48 insertions(+) create mode 100644 bignum.c create mode 100644 bignum.h diff --git a/bignum.c b/bignum.c new file mode 100644 index 0000000..7d63a45 --- /dev/null +++ b/bignum.c @@ -0,0 +1,41 @@ +#include +#include +#include "bignum.h" + +struct BigNum long_to_BigNum(long long num) { + struct BigNum ans; + int temp; + for (int i=BIGNUM_LEN-1; i>=0; i--) { + temp = num % 10; + num /= 10; + ans.num[i] = (char)(temp + 48); + } + return ans; +} + +struct BigNum add(struct BigNum a, struct BigNum b) { + struct BigNum ans; + bool carry; + short s; + for (short i=BIGNUM_LEN-1; i>=0; i--) { + s = (a.num[i]-48) + (b.num[i]-48) + carry; + carry = s / 10; + s %= 10; + ans.num[i] = (char)(s+48); + } + return ans; +} + +struct BigNum mul(struct BigNum a, int b) { + struct BigNum ans; + short s, carry; + + for (short i=BIGNUM_LEN-1; i>=0; i--) { + s = (a.num[i]-48) * b + carry; + carry = s / 10; + s %= 10; + ans.num[i] = (char)(s+48); + } + + return ans; +} \ No newline at end of file diff --git a/bignum.h b/bignum.h new file mode 100644 index 0000000..5e894e8 --- /dev/null +++ b/bignum.h @@ -0,0 +1,7 @@ +#include "constant.h" +struct BigNum { + char num[BIGNUM_LEN]; +}; +struct BigNum long_to_BigNum(long long num); +struct BigNum add(struct BigNum a, struct BigNum b); +struct BigNum mul(struct BigNum a, int b); \ No newline at end of file From b024eec8e4a3150acae1342cdd5fe67031d04527 Mon Sep 17 00:00:00 2001 From: snsd0805 Date: Fri, 2 Jun 2023 20:19:46 +0800 Subject: [PATCH 4/6] feat: calculate state hash --- bignum.c | 10 ++++---- bignum.h | 3 ++- constant.h | 4 +++- enviroment.c | 64 +++++++++++++++++++++++++++++++++++----------------- enviroment.h | 4 ++-- 5 files changed, 56 insertions(+), 29 deletions(-) diff --git a/bignum.c b/bignum.c index 7d63a45..5f65ed0 100644 --- a/bignum.c +++ b/bignum.c @@ -1,6 +1,7 @@ #include #include #include "bignum.h" +#include "constant.h" struct BigNum long_to_BigNum(long long num) { struct BigNum ans; @@ -15,8 +16,8 @@ struct BigNum long_to_BigNum(long long num) { struct BigNum add(struct BigNum a, struct BigNum b) { struct BigNum ans; - bool carry; - short s; + short s, carry=0; + for (short i=BIGNUM_LEN-1; i>=0; i--) { s = (a.num[i]-48) + (b.num[i]-48) + carry; carry = s / 10; @@ -28,14 +29,15 @@ struct BigNum add(struct BigNum a, struct BigNum b) { struct BigNum mul(struct BigNum a, int b) { struct BigNum ans; - short s, carry; + short s, carry=0; for (short i=BIGNUM_LEN-1; i>=0; i--) { s = (a.num[i]-48) * b + carry; carry = s / 10; s %= 10; ans.num[i] = (char)(s+48); + // printf("index(%hd): %c\n", i, (char)(s+48)); } return ans; -} \ No newline at end of file +} diff --git a/bignum.h b/bignum.h index 5e894e8..afd4c60 100644 --- a/bignum.h +++ b/bignum.h @@ -1,6 +1,7 @@ #include "constant.h" + struct BigNum { - char num[BIGNUM_LEN]; + char num[BIGNUM_LEN+1]; }; struct BigNum long_to_BigNum(long long num); struct BigNum add(struct BigNum a, struct BigNum b); diff --git a/constant.h b/constant.h index fdd0e00..bf73899 100644 --- a/constant.h +++ b/constant.h @@ -11,4 +11,6 @@ #define FIRST true #define ROW_NUM 6 -#define COL_NUM 7 \ No newline at end of file +#define COL_NUM 7 + +#define BIGNUM_LEN 22 \ No newline at end of file diff --git a/enviroment.c b/enviroment.c index 83c984d..19a4321 100644 --- a/enviroment.c +++ b/enviroment.c @@ -3,11 +3,18 @@ #include #include "constant.h" #include "enviroment.h" +#include "bignum.h" -short PATHS[8][3] = { - {0, 1, 2}, {3, 4, 5}, {6, 7, 8}, - {0, 3, 6}, {1, 4, 7}, {2, 5, 8}, - {0, 4, 8}, {2, 4, 6} +struct BigNum POWs[42] = { + "0000000000000000000001", "0000000000000000000003", "0000000000000000000009", "0000000000000000000027", "0000000000000000000081", + "0000000000000000000243", "0000000000000000000729", "0000000000000000002187", "0000000000000000006561", "0000000000000000019683", + "0000000000000000059049", "0000000000000000177147", "0000000000000000531441", "0000000000000001594323", "0000000000000004782969", + "0000000000000014348907", "0000000000000043046721", "0000000000000129140163", "0000000000000387420489", "0000000000001162261467", + "0000000000003486784401", "0000000000010460353203", "0000000000031381059609", "0000000000094143178827", "0000000000282429536481", + "0000000000847288609443", "0000000002541865828329", "0000000007625597484987", "0000000022876792454961", "0000000068630377364883", + "0000000205891132094649", "0000000617673396283947", "0000001853020188851841", "0000005559060566555523", "0000016677181699666569", + "0000050031545098999707", "0000150094635296999121", "0000450283905890997363", "0001350851717672992089", "0004052555153018976267", + "0012157665459056928801", "0036472996377170786403" }; /* @@ -123,7 +130,7 @@ short get_winner(short *board){ b = get_loc_status(board, i, j+1); c = get_loc_status(board, i, j+2); d = get_loc_status(board, i, j+3); - if ((a == b) && (b == c) && (c == d)) { + if ((a == b) && (b == c) && (c == d) && (a!=0)) { return a; } @@ -132,7 +139,7 @@ short get_winner(short *board){ b = get_loc_status(board, i+1, j); c = get_loc_status(board, i+2, j); d = get_loc_status(board, i+3, j); - if ((a == b) && (b == c) && (c == d)) { + if ((a == b) && (b == c) && (c == d) && (a!=0)) { return a; } @@ -141,7 +148,7 @@ short get_winner(short *board){ b = get_loc_status(board, i+1, j-1); c = get_loc_status(board, i+2, j-2); d = get_loc_status(board, i+3, j-3); - if ((a == b) && (b == c) && (c == d)) { + if ((a == b) && (b == c) && (c == d) && (a!=0)) { return a; } @@ -150,7 +157,7 @@ short get_winner(short *board){ b = get_loc_status(board, i+1, j+1); c = get_loc_status(board, i+2, j+2); d = get_loc_status(board, i+3, j+3); - if ((a == b) && (b == c) && (c == d)) { + if ((a == b) && (b == c) && (c == d) && (a!=0)) { return a; } } @@ -163,19 +170,34 @@ short get_winner(short *board){ Args: - short *board (array's address): chessboard's status + - char *hash (a string): size is BIGNUM_LEN, the hash will be wrote here Results: - - int hash (integer): chessboard's status in i-th block * pow(3, i) - - =========================================== Use big number ==================================================== + - None. */ -int state_hash(short *board){ - int base, hash = 0; - for (int i=0; i<9; i++){ - base = pow(3, i); - hash += (base * board[i]); +void state_hash(short *board, char *hash){ + struct BigNum sum, temp; + for (short i=0; iloc)); - while (*ptr == 0) { + while ((*ptr == 0) && (ptr>=board)) { // printf("%d ", *ptr); ptr -= COL_NUM; } @@ -203,7 +225,7 @@ void fall(short *board, struct action *a) { Args: - short *board (array's address): chessboards' status - struct action *a (a action's pointer): include player & choose loc - - int *state (pointer): for return. To save the chessboard's state hash which after doing this action + - char *state (a string): for return. To save the chessboard's state hash which after doing this action - float *reward (pointer): for return. To save the number of rewards which the player gets after doing this action. - float *opponent_reward (pointer): for return. To save the number of rewards which the opponents gets after the player doing this action. - short *winner (pointer): for return. To save the winner in this action. If haven't finish, it will be zero. @@ -211,13 +233,13 @@ void fall(short *board, struct action *a) { Results: - None. Save in state & reward & winner */ -void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner){ +void act(short *board, struct action *a, char *state, float *reward, float *opponent_reward, short *winner){ // printf("Act( player=%d, action=%d )\n", a->player, a->loc); assert(board[(ROW_NUM*COL_NUM-1)-(a->loc)] == 0); fall(board, a); *winner = get_winner(board); - // *state = state_hash(board); + state_hash(board, state); if (*winner == a->player){ *reward = 1.0; *opponent_reward = -1.0; diff --git a/enviroment.h b/enviroment.h index 4a27df6..d915e97 100644 --- a/enviroment.h +++ b/enviroment.h @@ -7,5 +7,5 @@ void reset(short* board); void show(short *board); void get_available_actions(short *board, short *result, short *length); short get_winner(short *board); -int state_hash(short *board); -void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner); +void state_hash(short *board, char *hash); +void act(short *board, struct action *a, char *state, float *reward, float *opponent_reward, short *winner); From 0fb79b3e1ee0fae460a54f0fa616e78142ea1ab2 Mon Sep 17 00:00:00 2001 From: snsd0805 Date: Fri, 2 Jun 2023 23:34:37 +0800 Subject: [PATCH 5/6] feat: change q-learning method to fit 'on 4 in a row' --- constant.h | 7 +-- hash-table.c | 90 +++++++------------------------ hash-table.h | 13 +++++ main.c | 22 +++++--- q-learning.c | 146 ++++++++++++++++++++++++++++++++------------------- q-learning.h | 11 ++-- 6 files changed, 148 insertions(+), 141 deletions(-) create mode 100644 hash-table.h diff --git a/constant.h b/constant.h index bf73899..5452100 100644 --- a/constant.h +++ b/constant.h @@ -6,11 +6,12 @@ #define LAMBDA 0.9 // discount factor #define STATE_NUM 19683 -#define ACTION_NUM 9 -#define EPISODE_NUM 100000 +#define ACTION_NUM 7 +#define EPISODE_NUM 1000000 #define FIRST true #define ROW_NUM 6 #define COL_NUM 7 -#define BIGNUM_LEN 22 \ No newline at end of file +#define BIGNUM_LEN 22 +#define TABLE_SIZE 1000000000 diff --git a/hash-table.c b/hash-table.c index 6ba4f70..1045882 100644 --- a/hash-table.c +++ b/hash-table.c @@ -3,14 +3,8 @@ #include #include #include - -#define TABLE_SIZE 10 - -struct Node { - char key[48]; - int value; - struct Node *next; -}; +#include "hash-table.h" +#include "constant.h" long long hash_function(char *key) { long long hash = 0; @@ -20,115 +14,69 @@ long long hash_function(char *key) { return hash ; } -void insert(struct Node **table, char *key, int value) { +void insert(struct Node **table, char *key) { long long hash = hash_function(key); - printf("Hash: %lli\n", hash); struct Node *node = malloc(sizeof(struct Node)); struct Node *temp, *past; strcpy(node->key, key); - node->value = value; + // init + for (short i=0; ivalue[i] = 0.0; + } node->next = NULL; if (table[hash] == NULL){ table[hash] = node; - printf("Create.\n"); } else { - printf("Add.\n"); temp = table[hash]; past = NULL; while(temp != NULL){ - assert(temp->key != key); - printf("%s -> ", temp->key); + assert(strcmp(temp->key, key)!=0); past = temp; temp = temp->next; } - printf("\n"); past->next = node; } } -void long_to_str(long long num, char *s, int length) { - int temp; - for (int i=length-1; i>=0; i--) { - temp = num % 10; - num /= 10; - s[i] = (char)(temp + 48); - } -} - -int search(struct Node **table, char *key) { +void search(struct Node **table, char *key, bool *find, float *ans) { long long hash = hash_function(key); struct Node *temp, *past; + *find = false; - if (table[hash] == NULL){ - return -1; - } else { + if (table[hash] != NULL){ temp = table[hash]; past = NULL; while(temp != NULL){ - // printf("%s - %s\n", temp->key, key); if (strcmp(temp->key, key) == 0){ - return temp->value; + *find = true; + for (short i=0; ivalue[i]; + } + break; } past = temp; temp = temp->next; } - return -1; } } -void update(struct Node **table, char *key, int value) { +void update(struct Node **table, char *key, short action, float value) { long long hash = hash_function(key); struct Node *temp, *past; + assert(table[hash]!=NULL); temp = table[hash]; past = NULL; while(temp != NULL){ if (strcmp(temp->key, key) == 0){ - temp->value = value; + temp->value[action] = value; break; } past = temp; temp = temp->next; } -} - -int main(){ - struct Node ** table; // pointer to pointer - int size; - srand(time(NULL)); - - table = malloc(TABLE_SIZE * sizeof(struct Node*)); - for (int i=0; i "); - scanf("%lli", &a); - printf("HERE\n"); - long_to_str(a, s, 20); - printf("HERE\n"); - - update(table, s, 100); - ans = search(table, s); - printf("%d\n\n", ans); - } - // long long a = hash_function("9999999999999"); - // printf("%lli\n", a); } \ No newline at end of file diff --git a/hash-table.h b/hash-table.h new file mode 100644 index 0000000..e47e916 --- /dev/null +++ b/hash-table.h @@ -0,0 +1,13 @@ +#include "constant.h" +#include + +struct Node { + char key[BIGNUM_LEN+1]; + float value[ACTION_NUM]; + struct Node *next; +}; + +long long hash_function(char *key); +void insert(struct Node **table, char *key); +void search(struct Node **table, char *key, bool *find, float *ans); +void update(struct Node **table, char *key, short action, float value); \ No newline at end of file diff --git a/main.c b/main.c index 202c16c..9fea68c 100644 --- a/main.c +++ b/main.c @@ -7,13 +7,21 @@ #include "q-learning.h" int main(){ - short board[9]= {0}; // tic tac toe's chessboard - float table[STATE_NUM][ACTION_NUM]; // q-learning table + short board[ROW_NUM][COL_NUM]= {0}; + short winner; + struct Node ** map; // pointer to pointer, hash table + bool find; + float state[ACTION_NUM]; - srand(time(NULL)); - init_table(&table[0][0]); + srand(time(NULL)); - run(&table[0][0], board, false, 10000, false); - run(&table[0][0], board, true, EPISODE_NUM, false); - run(&table[0][0], board, false, 10000, false); + // init hash table + map = malloc(TABLE_SIZE * sizeof(struct Node*)); + for (int i=0; i EPSILON -> random a action @@ -83,17 +97,15 @@ short bot_choose_action(float *table, short *board, int state){ Opponent random choose a action to do. Args: - - short *table (array's address): state table for Q-Learning - short *board (array's address): chessboards' status - - int state (integer, state hash): hash for board's status Results: - short choice (integer): random, -1 means no available action to choose */ -short opponent_random_action(float *table, short *board, int state){ +short opponent_random_action(short *board){ // get available actions for choosing - short available_actions[9]; + short available_actions[ACTION_NUM]; short available_action_length; get_available_actions(board, available_actions, &available_action_length); @@ -109,22 +121,24 @@ short opponent_random_action(float *table, short *board, int state){ return choice; } -/* - Inilialize the Q-Table +// Use Hash Table, so we needn't initilize Q-Table +// +// /* +// Inilialize the Q-Table - Args: - - float *table (two-dim array's start address) +// Args: +// - float *table (two-dim array's start address) - Results: - - None. -*/ -void init_table(float *table){ - for (int i=0; i Date: Fri, 2 Jun 2023 23:36:06 +0800 Subject: [PATCH 6/6] fix: update Makefile --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 5f679e7..fb9d2e7 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ all: a.out -a.out: main.c enviroment.c enviroment.h q-learning.c q-learning.h constant.h - gcc main.c enviroment.c q-learning.c -lm +a.out: main.c enviroment.c enviroment.h q-learning.c q-learning.h bignum.c bignum.h hash-table.c hash-table.h constant.h + gcc main.c enviroment.c q-learning.c bignum.c constant.h hash-table.c -lm run: ./a.out