diff --git a/Makefile b/Makefile index 5f679e7..fb9d2e7 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ all: a.out -a.out: main.c enviroment.c enviroment.h q-learning.c q-learning.h constant.h - gcc main.c enviroment.c q-learning.c -lm +a.out: main.c enviroment.c enviroment.h q-learning.c q-learning.h bignum.c bignum.h hash-table.c hash-table.h constant.h + gcc main.c enviroment.c q-learning.c bignum.c constant.h hash-table.c -lm run: ./a.out diff --git a/bignum.c b/bignum.c new file mode 100644 index 0000000..5f65ed0 --- /dev/null +++ b/bignum.c @@ -0,0 +1,43 @@ +#include +#include +#include "bignum.h" +#include "constant.h" + +struct BigNum long_to_BigNum(long long num) { + struct BigNum ans; + int temp; + for (int i=BIGNUM_LEN-1; i>=0; i--) { + temp = num % 10; + num /= 10; + ans.num[i] = (char)(temp + 48); + } + return ans; +} + +struct BigNum add(struct BigNum a, struct BigNum b) { + struct BigNum ans; + short s, carry=0; + + for (short i=BIGNUM_LEN-1; i>=0; i--) { + s = (a.num[i]-48) + (b.num[i]-48) + carry; + carry = s / 10; + s %= 10; + ans.num[i] = (char)(s+48); + } + return ans; +} + +struct BigNum mul(struct BigNum a, int b) { + struct BigNum ans; + short s, carry=0; + + for (short i=BIGNUM_LEN-1; i>=0; i--) { + s = (a.num[i]-48) * b + carry; + carry = s / 10; + s %= 10; + ans.num[i] = (char)(s+48); + // printf("index(%hd): %c\n", i, (char)(s+48)); + } + + return ans; +} diff --git a/bignum.h b/bignum.h new file mode 100644 index 0000000..afd4c60 --- /dev/null +++ b/bignum.h @@ -0,0 +1,8 @@ +#include "constant.h" + +struct BigNum { + char num[BIGNUM_LEN+1]; +}; +struct BigNum long_to_BigNum(long long num); +struct BigNum add(struct BigNum a, struct BigNum b); +struct BigNum mul(struct BigNum a, int b); \ No newline at end of file diff --git a/constant.h b/constant.h index b8d2ce4..3476d4e 100644 --- a/constant.h +++ b/constant.h @@ -6,6 +6,12 @@ #define LAMBDA 0.9 // discount factor #define STATE_NUM 19683 -#define ACTION_NUM 9 -#define EPISODE_NUM 100000 +#define ACTION_NUM 7 +#define EPISODE_NUM 1000000 #define FIRST true + +#define ROW_NUM 6 +#define COL_NUM 7 + +#define BIGNUM_LEN 22 +#define TABLE_SIZE 1000000000 diff --git a/enviroment.c b/enviroment.c index f94e3cb..67d532d 100644 --- a/enviroment.c +++ b/enviroment.c @@ -1,28 +1,34 @@ -#include "enviroment.h" -#include "constant.h" -#include -#include #include +#include +#include +#include "constant.h" +#include "enviroment.h" +#include "bignum.h" -short PATHS[8][3] = { - { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 }, - { 0, 3, 6 }, { 1, 4, 7 }, { 2, 5, 8 }, - { 0, 4, 8 }, { 2, 4, 6 } +struct BigNum POWs[42] = { + "0000000000000000000001", "0000000000000000000003", "0000000000000000000009", "0000000000000000000027", "0000000000000000000081", + "0000000000000000000243", "0000000000000000000729", "0000000000000000002187", "0000000000000000006561", "0000000000000000019683", + "0000000000000000059049", "0000000000000000177147", "0000000000000000531441", "0000000000000001594323", "0000000000000004782969", + "0000000000000014348907", "0000000000000043046721", "0000000000000129140163", "0000000000000387420489", "0000000000001162261467", + "0000000000003486784401", "0000000000010460353203", "0000000000031381059609", "0000000000094143178827", "0000000000282429536481", + "0000000000847288609443", "0000000002541865828329", "0000000007625597484987", "0000000022876792454961", "0000000068630377364883", + "0000000205891132094649", "0000000617673396283947", "0000001853020188851841", "0000005559060566555523", "0000016677181699666569", + "0000050031545098999707", "0000150094635296999121", "0000450283905890997363", "0001350851717672992089", "0004052555153018976267", + "0012157665459056928801", "0036472996377170786403" }; /* Reset the game, clear the chessboard. - Args: - - short *board (array's address): chessboard's status + Args: + - short *board (array's start address): chessboard's status Results: - None, set all blocks on the chessboard to zero. */ -void reset(short* board) -{ - for (short i = 0; i < 9; i++) - board[i] = 0; +void reset(short* board){ + for (short i=0; i<(ROW_NUM*COL_NUM); i++) + board[i] = 0; } /* @@ -34,23 +40,23 @@ void reset(short* board) Results: - None. Only printing. */ -void show(short* board) -{ - short loc; - printf("┼───┼───┼───┼\n"); - for (short i = 0; i < 3; i++) { - printf("│ "); - for (short j = 0; j < 3; j++) { - loc = 3 * i + j; - if (board[loc] == 0) - printf(" │ "); - else if (board[loc] == BOT_SYMBOL) - printf("○ │ "); - else - printf("✕ │ "); +void show(short *board){ + short loc; + for (short i=0; i=0; i--){ + if (board[i] == BOT_SYMBOL) { + printf("● "); + } else if(board[i] == OPPONENT_SYMBOL) { + printf("◴ "); + } else { + printf("◌ "); + } + if (i%COL_NUM == 0){ + printf("\n"); } - printf("\n"); - printf("┼───┼───┼───┼\n"); } printf("\n\n"); } @@ -66,80 +72,177 @@ void show(short* board) Results: - None. All available actions are saved into "result" and the number of actions is saved in "length" */ -void get_available_actions(short* board, short* result, short* length) -{ - short index = 0; - for (int i = 0; i < 9; i++) - if (board[i] == 0) - result[index++] = i; - *length = index; + +void get_available_actions(short *board, short *result, short *length){ + short index = 0; + for (int i=0; i= ROW_NUM) || (row < 0)) { + return -1; + } + if ((col >= COL_NUM) || (col < 0)) { + return -1; + } + return board[row*COL_NUM+col]; +} + +/* + Return winner's number; Args: - short *board (array's address): chessboard's status - Results: - - short winner_number(integer): winner's number, 0 for no winner now, 1 for Bot, 2 for opponent + Results: + - short winner_number(integer): winner's number, 0 for no winner now, 1 for Bot, 2 for opponent + + board's coodinate diagram + ^ + | 5 + | 4 + | 3 + | 2 + | 1 + | 0 + <----------------------------- + 6 5 4 3 2 1 0 | */ -short get_winner(short* board) -{ - int a, b, c; - for (int i = 0; i < 8; i++) { - a = PATHS[i][0]; - b = PATHS[i][1]; - c = PATHS[i][2]; - if ((board[a] == board[b]) && (board[b] == board[c]) && (board[a] != 0)) { - return board[a]; +short get_winner(short *board){ + short a, b, c, d; + for (short i=0; iloc)); + while ((*ptr == 0) && (ptr>=board)) { + // printf("%d ", *ptr); + ptr -= COL_NUM; } - return hash; + *(ptr+COL_NUM) = a->player; } /* Act on the chessboard. - Args: - - short *board (array's address): chessboards' status - - struct action *a (a action's pointer): include player & choose loc - - int *state (pointer): for return. To save the chessboard's state hash which after doing this action - - float *reward (pointer): for return. To save the number of rewards which the player gets after doing this action. - - float *opponent_reward (pointer): for return. To save the number of rewards which the opponents gets after the player doing this action. - - short *winner (pointer): for return. To save the winner in this action. If haven't finish, it will be zero. + Args: + - short *board (array's address): chessboards' status + - struct action *a (a action's pointer): include player & choose loc + - char *state (a string): for return. To save the chessboard's state hash which after doing this action + - float *reward (pointer): for return. To save the number of rewards which the player gets after doing this action. + - float *opponent_reward (pointer): for return. To save the number of rewards which the opponents gets after the player doing this action. + - short *winner (pointer): for return. To save the winner in this action. If haven't finish, it will be zero. Results: - None. Save in state & reward & winner */ -void act(short* board, struct action* a, int* state, float* reward, float* opponent_reward, short* winner) -{ +void act(short *board, struct action *a, char *state, float *reward, float *opponent_reward, short *winner){ // printf("Act( player=%d, action=%d )\n", a->player, a->loc); - assert(board[a->loc] == 0); - board[a->loc] = a->player; - *winner = get_winner(board); - *state = state_hash(board); - if (*winner == a->player) { - *reward = 1.0; + assert(board[(ROW_NUM*COL_NUM-1)-(a->loc)] == 0); + + fall(board, a); + *winner = get_winner(board); + state_hash(board, state); + if (*winner == a->player){ + *reward = 1.0; *opponent_reward = -1.0; } else if (*winner != 0) { *reward = -1.0; diff --git a/enviroment.h b/enviroment.h index 9b05f10..7335226 100644 --- a/enviroment.h +++ b/enviroment.h @@ -4,8 +4,8 @@ struct action { }; void reset(short* board); -void show(short* board); -void get_available_actions(short* board, short* result, short* length); -short get_winner(short* board); -int state_hash(short* board); -void act(short* board, struct action* a, int* state, float* reward, float* opponent_reward, short* winner); +void show(short *board); +void get_available_actions(short *board, short *result, short *length); +short get_winner(short *board); +void state_hash(short *board, char *hash); +void act(short *board, struct action *a, char *state, float *reward, float *opponent_reward, short *winner); diff --git a/hash-table.c b/hash-table.c new file mode 100644 index 0000000..1045882 --- /dev/null +++ b/hash-table.c @@ -0,0 +1,82 @@ +#include +#include +#include +#include +#include +#include "hash-table.h" +#include "constant.h" + +long long hash_function(char *key) { + long long hash = 0; + for (int i=0; ikey, key); + // init + for (short i=0; ivalue[i] = 0.0; + } + node->next = NULL; + + if (table[hash] == NULL){ + table[hash] = node; + } else { + temp = table[hash]; + past = NULL; + while(temp != NULL){ + assert(strcmp(temp->key, key)!=0); + past = temp; + temp = temp->next; + } + past->next = node; + } +} + +void search(struct Node **table, char *key, bool *find, float *ans) { + long long hash = hash_function(key); + struct Node *temp, *past; + *find = false; + + if (table[hash] != NULL){ + temp = table[hash]; + past = NULL; + + while(temp != NULL){ + if (strcmp(temp->key, key) == 0){ + *find = true; + for (short i=0; ivalue[i]; + } + break; + } + past = temp; + temp = temp->next; + } + } +} + +void update(struct Node **table, char *key, short action, float value) { + long long hash = hash_function(key); + struct Node *temp, *past; + assert(table[hash]!=NULL); + + temp = table[hash]; + past = NULL; + while(temp != NULL){ + if (strcmp(temp->key, key) == 0){ + temp->value[action] = value; + break; + } + past = temp; + temp = temp->next; + } +} \ No newline at end of file diff --git a/hash-table.h b/hash-table.h new file mode 100644 index 0000000..e47e916 --- /dev/null +++ b/hash-table.h @@ -0,0 +1,13 @@ +#include "constant.h" +#include + +struct Node { + char key[BIGNUM_LEN+1]; + float value[ACTION_NUM]; + struct Node *next; +}; + +long long hash_function(char *key); +void insert(struct Node **table, char *key); +void search(struct Node **table, char *key, bool *find, float *ans); +void update(struct Node **table, char *key, short action, float value); \ No newline at end of file diff --git a/main.c b/main.c index 5f3d854..f0aaf80 100644 --- a/main.c +++ b/main.c @@ -6,15 +6,22 @@ #include #include -int main() -{ - short board[9] = { 0 }; // tic tac toe's chessboard - float table[STATE_NUM][ACTION_NUM]; // q-learning table +int main(){ + short board[ROW_NUM][COL_NUM]= {0}; + short winner; + struct Node ** map; // pointer to pointer, hash table + bool find; + float state[ACTION_NUM]; srand(time(NULL)); - init_table(&table[0][0]); - run(&table[0][0], board, false, 10000, false); - run(&table[0][0], board, true, EPISODE_NUM, false); - run(&table[0][0], board, false, 10000, false); + // init hash table + map = malloc(TABLE_SIZE * sizeof(struct Node*)); + for (int i=0; i