feat: calculate state hash

This commit is contained in:
snsd0805 2023-06-02 20:19:46 +08:00
parent 7fcadce548
commit b024eec8e4
Signed by: snsd0805
GPG Key ID: 569349933C77A854
5 changed files with 56 additions and 29 deletions

View File

@ -1,6 +1,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdbool.h> #include <stdbool.h>
#include "bignum.h" #include "bignum.h"
#include "constant.h"
struct BigNum long_to_BigNum(long long num) { struct BigNum long_to_BigNum(long long num) {
struct BigNum ans; struct BigNum ans;
@ -15,8 +16,8 @@ struct BigNum long_to_BigNum(long long num) {
struct BigNum add(struct BigNum a, struct BigNum b) { struct BigNum add(struct BigNum a, struct BigNum b) {
struct BigNum ans; struct BigNum ans;
bool carry; short s, carry=0;
short s;
for (short i=BIGNUM_LEN-1; i>=0; i--) { for (short i=BIGNUM_LEN-1; i>=0; i--) {
s = (a.num[i]-48) + (b.num[i]-48) + carry; s = (a.num[i]-48) + (b.num[i]-48) + carry;
carry = s / 10; carry = s / 10;
@ -28,13 +29,14 @@ struct BigNum add(struct BigNum a, struct BigNum b) {
struct BigNum mul(struct BigNum a, int b) { struct BigNum mul(struct BigNum a, int b) {
struct BigNum ans; struct BigNum ans;
short s, carry; short s, carry=0;
for (short i=BIGNUM_LEN-1; i>=0; i--) { for (short i=BIGNUM_LEN-1; i>=0; i--) {
s = (a.num[i]-48) * b + carry; s = (a.num[i]-48) * b + carry;
carry = s / 10; carry = s / 10;
s %= 10; s %= 10;
ans.num[i] = (char)(s+48); ans.num[i] = (char)(s+48);
// printf("index(%hd): %c\n", i, (char)(s+48));
} }
return ans; return ans;

View File

@ -1,6 +1,7 @@
#include "constant.h" #include "constant.h"
struct BigNum { struct BigNum {
char num[BIGNUM_LEN]; char num[BIGNUM_LEN+1];
}; };
struct BigNum long_to_BigNum(long long num); struct BigNum long_to_BigNum(long long num);
struct BigNum add(struct BigNum a, struct BigNum b); struct BigNum add(struct BigNum a, struct BigNum b);

View File

@ -12,3 +12,5 @@
#define ROW_NUM 6 #define ROW_NUM 6
#define COL_NUM 7 #define COL_NUM 7
#define BIGNUM_LEN 22

View File

@ -3,11 +3,18 @@
#include <assert.h> #include <assert.h>
#include "constant.h" #include "constant.h"
#include "enviroment.h" #include "enviroment.h"
#include "bignum.h"
short PATHS[8][3] = { struct BigNum POWs[42] = {
{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, "0000000000000000000001", "0000000000000000000003", "0000000000000000000009", "0000000000000000000027", "0000000000000000000081",
{0, 3, 6}, {1, 4, 7}, {2, 5, 8}, "0000000000000000000243", "0000000000000000000729", "0000000000000000002187", "0000000000000000006561", "0000000000000000019683",
{0, 4, 8}, {2, 4, 6} "0000000000000000059049", "0000000000000000177147", "0000000000000000531441", "0000000000000001594323", "0000000000000004782969",
"0000000000000014348907", "0000000000000043046721", "0000000000000129140163", "0000000000000387420489", "0000000000001162261467",
"0000000000003486784401", "0000000000010460353203", "0000000000031381059609", "0000000000094143178827", "0000000000282429536481",
"0000000000847288609443", "0000000002541865828329", "0000000007625597484987", "0000000022876792454961", "0000000068630377364883",
"0000000205891132094649", "0000000617673396283947", "0000001853020188851841", "0000005559060566555523", "0000016677181699666569",
"0000050031545098999707", "0000150094635296999121", "0000450283905890997363", "0001350851717672992089", "0004052555153018976267",
"0012157665459056928801", "0036472996377170786403"
}; };
/* /*
@ -123,7 +130,7 @@ short get_winner(short *board){
b = get_loc_status(board, i, j+1); b = get_loc_status(board, i, j+1);
c = get_loc_status(board, i, j+2); c = get_loc_status(board, i, j+2);
d = get_loc_status(board, i, j+3); d = get_loc_status(board, i, j+3);
if ((a == b) && (b == c) && (c == d)) { if ((a == b) && (b == c) && (c == d) && (a!=0)) {
return a; return a;
} }
@ -132,7 +139,7 @@ short get_winner(short *board){
b = get_loc_status(board, i+1, j); b = get_loc_status(board, i+1, j);
c = get_loc_status(board, i+2, j); c = get_loc_status(board, i+2, j);
d = get_loc_status(board, i+3, j); d = get_loc_status(board, i+3, j);
if ((a == b) && (b == c) && (c == d)) { if ((a == b) && (b == c) && (c == d) && (a!=0)) {
return a; return a;
} }
@ -141,7 +148,7 @@ short get_winner(short *board){
b = get_loc_status(board, i+1, j-1); b = get_loc_status(board, i+1, j-1);
c = get_loc_status(board, i+2, j-2); c = get_loc_status(board, i+2, j-2);
d = get_loc_status(board, i+3, j-3); d = get_loc_status(board, i+3, j-3);
if ((a == b) && (b == c) && (c == d)) { if ((a == b) && (b == c) && (c == d) && (a!=0)) {
return a; return a;
} }
@ -150,7 +157,7 @@ short get_winner(short *board){
b = get_loc_status(board, i+1, j+1); b = get_loc_status(board, i+1, j+1);
c = get_loc_status(board, i+2, j+2); c = get_loc_status(board, i+2, j+2);
d = get_loc_status(board, i+3, j+3); d = get_loc_status(board, i+3, j+3);
if ((a == b) && (b == c) && (c == d)) { if ((a == b) && (b == c) && (c == d) && (a!=0)) {
return a; return a;
} }
} }
@ -163,19 +170,34 @@ short get_winner(short *board){
Args: Args:
- short *board (array's address): chessboard's status - short *board (array's address): chessboard's status
- char *hash (a string): size is BIGNUM_LEN, the hash will be wrote here
Results: Results:
- int hash (integer): chessboard's status in i-th block * pow(3, i) - None.
=========================================== Use big number ====================================================
*/ */
int state_hash(short *board){ void state_hash(short *board, char *hash){
int base, hash = 0; struct BigNum sum, temp;
for (int i=0; i<9; i++){ for (short i=0; i<BIGNUM_LEN; i++){
base = pow(3, i); sum.num[i] = '0';
hash += (base * board[i]); }
for (short i=0; i<(ROW_NUM*COL_NUM); i++) {
// printf("MUL:\n");
// printf("%s\n", POWs[i].num);
temp = mul(POWs[i], board[i]);
// printf("%s\n\n", temp.num);
// printf("ADD:\n");
// printf("%s\n", sum.num);
// printf("%s\n", temp.num);
sum = add(sum, temp);
// printf("%s\n\n", sum.num);
}
for (int i=0; i<BIGNUM_LEN; i++){
hash[i] = sum.num[i];
} }
return hash;
} }
/* /*
@ -190,7 +212,7 @@ int state_hash(short *board){
*/ */
void fall(short *board, struct action *a) { void fall(short *board, struct action *a) {
short *ptr = (board + ROW_NUM * COL_NUM - 1 - (a->loc)); short *ptr = (board + ROW_NUM * COL_NUM - 1 - (a->loc));
while (*ptr == 0) { while ((*ptr == 0) && (ptr>=board)) {
// printf("%d ", *ptr); // printf("%d ", *ptr);
ptr -= COL_NUM; ptr -= COL_NUM;
} }
@ -203,7 +225,7 @@ void fall(short *board, struct action *a) {
Args: Args:
- short *board (array's address): chessboards' status - short *board (array's address): chessboards' status
- struct action *a (a action's pointer): include player & choose loc - struct action *a (a action's pointer): include player & choose loc
- int *state (pointer): for return. To save the chessboard's state hash which after doing this action - char *state (a string): for return. To save the chessboard's state hash which after doing this action
- float *reward (pointer): for return. To save the number of rewards which the player gets after doing this action. - float *reward (pointer): for return. To save the number of rewards which the player gets after doing this action.
- float *opponent_reward (pointer): for return. To save the number of rewards which the opponents gets after the player doing this action. - float *opponent_reward (pointer): for return. To save the number of rewards which the opponents gets after the player doing this action.
- short *winner (pointer): for return. To save the winner in this action. If haven't finish, it will be zero. - short *winner (pointer): for return. To save the winner in this action. If haven't finish, it will be zero.
@ -211,13 +233,13 @@ void fall(short *board, struct action *a) {
Results: Results:
- None. Save in state & reward & winner - None. Save in state & reward & winner
*/ */
void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner){ void act(short *board, struct action *a, char *state, float *reward, float *opponent_reward, short *winner){
// printf("Act( player=%d, action=%d )\n", a->player, a->loc); // printf("Act( player=%d, action=%d )\n", a->player, a->loc);
assert(board[(ROW_NUM*COL_NUM-1)-(a->loc)] == 0); assert(board[(ROW_NUM*COL_NUM-1)-(a->loc)] == 0);
fall(board, a); fall(board, a);
*winner = get_winner(board); *winner = get_winner(board);
// *state = state_hash(board); state_hash(board, state);
if (*winner == a->player){ if (*winner == a->player){
*reward = 1.0; *reward = 1.0;
*opponent_reward = -1.0; *opponent_reward = -1.0;

View File

@ -7,5 +7,5 @@ void reset(short* board);
void show(short *board); void show(short *board);
void get_available_actions(short *board, short *result, short *length); void get_available_actions(short *board, short *result, short *length);
short get_winner(short *board); short get_winner(short *board);
int state_hash(short *board); void state_hash(short *board, char *hash);
void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner); void act(short *board, struct action *a, char *state, float *reward, float *opponent_reward, short *winner);