docs: update README

Merge: replace 'tic-tac-toe' with '4 in a row'
'tic-tac-toe' version is located on the other branch.
2023-06-02 23:57:46 +08:00 · 2023-06-02 23:48:55 +08:00 · 2023-06-02 23:36:06 +08:00 · 2023-06-02 23:34:37 +08:00 · 2023-06-02 20:19:46 +08:00 · 2023-06-02 16:52:59 +08:00
14 changed files with 747 additions and 191 deletions
--- a/.clang-format
+++ b/.clang-format
@ -0,0 +1,225 @@
+---
+Language:        Cpp
+# BasedOnStyle:  WebKit
+AccessModifierOffset: -4
+AlignAfterOpenBracket: DontAlign
+AlignArrayOfStructures: None
+AlignConsecutiveAssignments:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  PadOperators:    true
+AlignConsecutiveBitFields:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  PadOperators:    false
+AlignConsecutiveDeclarations:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  PadOperators:    false
+AlignConsecutiveMacros:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  PadOperators:    false
+AlignEscapedNewlines: Right
+AlignOperands:   DontAlign
+AlignTrailingComments:
+  Kind:            Never
+  OverEmptyLines:  0
+AllowAllArgumentsOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: Empty
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortEnumsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLambdasOnASingleLine: All
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: MultiLine
+AttributeMacros:
+  - __capability
+BinPackArguments: true
+BinPackParameters: true
+BitFieldColonSpacing: Both
+BraceWrapping:
+  AfterCaseLabel:  false
+  AfterClass:      false
+  AfterControlStatement: Never
+  AfterEnum:       false
+  AfterExternBlock: false
+  AfterFunction:   true
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  BeforeCatch:     false
+  BeforeElse:      false
+  BeforeLambdaBody: false
+  BeforeWhile:     false
+  IndentBraces:    false
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
+  SplitEmptyNamespace: true
+BreakAfterAttributes: Never
+BreakAfterJavaFieldAnnotations: false
+BreakArrays:     true
+BreakBeforeBinaryOperators: All
+BreakBeforeConceptDeclarations: Always
+BreakBeforeBraces: WebKit
+BreakBeforeInlineASMColon: OnlyMultiline
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializers: BeforeComma
+BreakInheritanceList: BeforeColon
+BreakStringLiterals: true
+ColumnLimit:     0
+CommentPragmas:  '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: false
+DerivePointerAlignment: false
+DisableFormat:   false
+EmptyLineAfterAccessModifier: Never
+EmptyLineBeforeAccessModifier: LogicalBlock
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: false
+ForEachMacros:
+  - foreach
+  - Q_FOREACH
+  - BOOST_FOREACH
+IfMacros:
+  - KJ_IF_MAYBE
+IncludeBlocks:   Preserve
+IncludeCategories:
+  - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
+    Priority:        2
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '^(<|"(gtest|gmock|isl|json)/)'
+    Priority:        3
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '.*'
+    Priority:        1
+    SortPriority:    0
+    CaseSensitive:   false
+IncludeIsMainRegex: '(Test)?$'
+IncludeIsMainSourceRegex: ''
+IndentAccessModifiers: false
+IndentCaseBlocks: false
+IndentCaseLabels: false
+IndentExternBlock: AfterExternBlock
+IndentGotoLabels: true
+IndentPPDirectives: None
+IndentRequiresClause: true
+IndentWidth:     4
+IndentWrappedFunctionNames: false
+InsertBraces:    false
+InsertNewlineAtEOF: false
+InsertTrailingCommas: None
+IntegerLiteralSeparator:
+  Binary:          0
+  BinaryMinDigits: 0
+  Decimal:         0
+  DecimalMinDigits: 0
+  Hex:             0
+  HexMinDigits:    0
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: true
+LambdaBodyIndentation: Signature
+LineEnding:      DeriveLF
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: Inner
+ObjCBinPackProtocolList: Auto
+ObjCBlockIndentWidth: 4
+ObjCBreakBeforeNestedBlockParam: true
+ObjCSpaceAfterProperty: true
+ObjCSpaceBeforeProtocolList: true
+PackConstructorInitializers: BinPack
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakOpenParenthesis: 0
+PenaltyBreakString: 1000
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyIndentedWhitespace: 0
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Left
+PPIndentWidth:   -1
+QualifierAlignment: Leave
+ReferenceAlignment: Pointer
+ReflowComments:  true
+RemoveBracesLLVM: false
+RemoveSemicolon: false
+RequiresClausePosition: OwnLine
+RequiresExpressionIndentation: OuterScope
+SeparateDefinitionBlocks: Leave
+ShortNamespaceLines: 1
+SortIncludes:    CaseSensitive
+SortJavaStaticImport: Before
+SortUsingDeclarations: LexicographicNumeric
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceAroundPointerQualifiers: Default
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCaseColon: false
+SpaceBeforeCpp11BracedList: true
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+SpaceBeforeParensOptions:
+  AfterControlStatements: true
+  AfterForeachMacros: true
+  AfterFunctionDefinitionName: false
+  AfterFunctionDeclarationName: false
+  AfterIfMacros:   true
+  AfterOverloadedOperator: false
+  AfterRequiresInClause: false
+  AfterRequiresInExpression: false
+  BeforeNonEmptyParentheses: false
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceBeforeSquareBrackets: false
+SpaceInEmptyBlock: true
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  Never
+SpacesInConditionalStatement: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInLineCommentPrefix:
+  Minimum:         1
+  Maximum:         -1
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Latest
+StatementAttributeLikeMacros:
+  - Q_EMIT
+StatementMacros:
+  - Q_UNUSED
+  - QT_REQUIRE_VERSION
+TabWidth:        8
+UseTab:          Never
+WhitespaceSensitiveMacros:
+  - BOOST_PP_STRINGIZE
+  - CF_SWIFT_NAME
+  - NS_SWIFT_NAME
+  - PP_STRINGIZE
+  - STRINGIZE
+...
+
--- a/.github/workflows/format.yml
+++ b/.github/workflows/format.yml
@ -0,0 +1,26 @@
+name: format
+
+on:
+  push:
+    branches:
+      - master
+    paths:
+      - "**.c"
+      - "**.h"
+
+jobs:
+  format:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+      - name: Install clang-format
+        run: pip install clang-format
+      - name: Format with clang-format
+        run: clang-format -i *.c *.h
+      - name: Commit changes
+        uses: stefanzweifel/git-auto-commit-action@v4
+        with:
+          commit_message: "style(format): run clang-format"
--- a/4
+++ b/4
@ -1,5 +1,5 @@
 all: a.out
-a.out: main.c enviroment.c enviroment.h q-learning.c q-learning.h constant.h
-	gcc main.c enviroment.c q-learning.c -lm
+a.out: main.c enviroment.c enviroment.h q-learning.c q-learning.h bignum.c bignum.h hash-table.c hash-table.h constant.h
+	gcc main.c enviroment.c q-learning.c bignum.c constant.h hash-table.c -lm
 run:
 	./a.out
--- a/README.md
+++ b/README.md
@ -1,10 +1,10 @@
-# Q-Learning-with-Tic-Tac-Toe
+# Q-Learning-with-Four-in-a-Row

 Project for 1112 NCNU CSIE "Parallel Programming with the Message-Passing Interface"

 ## Setup
 ```
-git clone https://github.com/snsd0805/Q-learning-with-Tic-Tac-Toe.git
+git clone https://github.com/snsd0805/Q-learning-with-Four-in-a-Row.git
 ```

 ### Compile
--- a/bignum.c
+++ b/bignum.c
@ -0,0 +1,43 @@
+#include <stdio.h>
+#include <stdbool.h>
+#include "bignum.h"
+#include "constant.h"
+
+struct BigNum long_to_BigNum(long long num) {
+    struct BigNum ans;
+    int temp;
+    for (int i=BIGNUM_LEN-1; i>=0; i--) {
+        temp = num % 10;
+        num /= 10;
+        ans.num[i] = (char)(temp + 48);
+    }
+    return ans;
+}
+
+struct BigNum add(struct BigNum a, struct BigNum b) {
+    struct BigNum ans;
+    short s, carry=0;
+
+    for (short i=BIGNUM_LEN-1; i>=0; i--) {
+        s = (a.num[i]-48) + (b.num[i]-48) + carry;
+        carry = s / 10;
+        s %= 10;
+        ans.num[i] = (char)(s+48);
+    }
+    return ans;
+}
+
+struct BigNum mul(struct BigNum a, int b) {
+    struct BigNum ans;
+    short s, carry=0;
+
+    for (short i=BIGNUM_LEN-1; i>=0; i--) {
+        s = (a.num[i]-48) * b + carry;
+        carry = s / 10;
+        s %= 10;
+        ans.num[i] = (char)(s+48);
+        // printf("index(%hd): %c\n", i, (char)(s+48));
+    }
+
+    return ans;
+}
--- a/bignum.h
+++ b/bignum.h
@ -0,0 +1,8 @@
+#include "constant.h"
+
+struct BigNum {
+    char num[BIGNUM_LEN+1];
+};
+struct BigNum long_to_BigNum(long long num);
+struct BigNum add(struct BigNum a, struct BigNum b);
+struct BigNum mul(struct BigNum a, int b);
--- a/constant.h
+++ b/constant.h
@ -1,11 +1,17 @@
 #define BOT_SYMBOL 1
 #define OPPONENT_SYMBOL 2

-#define EPSILON 0.9						// Epsilon-greedy
-#define LR 0.1							// learning rate
-#define LAMBDA 0.9						// discount factor
+#define EPSILON 0.9 // Epsilon-greedy
+#define LR 0.1 // learning rate
+#define LAMBDA 0.9 // discount factor

 #define STATE_NUM 19683
-#define ACTION_NUM 9
-#define EPISODE_NUM 100000
+#define ACTION_NUM 7
+#define EPISODE_NUM 1000000
 #define FIRST true
+
+#define ROW_NUM 6
+#define COL_NUM 7
+
+#define BIGNUM_LEN 22
+#define TABLE_SIZE 1000000000
--- a/enviroment.c
+++ b/enviroment.c
@ -3,144 +3,252 @@
 #include <assert.h>
 #include "constant.h"
 #include "enviroment.h"
+#include "bignum.h"

-short PATHS[8][3] = {
-	{0, 1, 2}, {3, 4, 5}, {6, 7, 8},
-	{0, 3, 6}, {1, 4, 7}, {2, 5, 8},
-	{0, 4, 8}, {2, 4, 6}
+struct BigNum POWs[42] = {
+    "0000000000000000000001", "0000000000000000000003", "0000000000000000000009", "0000000000000000000027", "0000000000000000000081",
+    "0000000000000000000243", "0000000000000000000729", "0000000000000000002187", "0000000000000000006561", "0000000000000000019683",
+    "0000000000000000059049", "0000000000000000177147", "0000000000000000531441", "0000000000000001594323", "0000000000000004782969",
+    "0000000000000014348907", "0000000000000043046721", "0000000000000129140163", "0000000000000387420489", "0000000000001162261467",
+    "0000000000003486784401", "0000000000010460353203", "0000000000031381059609", "0000000000094143178827", "0000000000282429536481",
+    "0000000000847288609443", "0000000002541865828329", "0000000007625597484987", "0000000022876792454961", "0000000068630377364883",
+    "0000000205891132094649", "0000000617673396283947", "0000001853020188851841", "0000005559060566555523", "0000016677181699666569",
+    "0000050031545098999707", "0000150094635296999121", "0000450283905890997363", "0001350851717672992089", "0004052555153018976267",
+    "0012157665459056928801", "0036472996377170786403"
 };

 /*
-	Reset the game, clear the chessboard.
+    Reset the game, clear the chessboard.

 	Args:
-		- short *board (array's address): chessboard's status
+		- short *board (array's start address): chessboard's status

-	Results:
-		- None, set all blocks on the chessboard to zero.
+    Results:
+        - None, set all blocks on the chessboard to zero.
 */
 void reset(short* board){
-	for (short i=0; i<9; i++)
+	for (short i=0; i<(ROW_NUM*COL_NUM); i++)
 		board[i] = 0;
 }

 /*
-	Print the chessboard on the console.
+    Print the chessboard on the console.

-	Args:
-		- short *board (array's address): chessboard's status
+    Args:
+        - short *board (array's address): chessboard's status

-	Results:
-		- None. Only printing.
+    Results:
+        - None. Only printing.
 */
 void show(short *board){
 	short loc;
-	printf("┼───┼───┼───┼\n");
-	for (short i=0; i<3; i++){
-		printf("│ ");
-		for (short j=0; j<3; j++){
-			loc = 3*i+j;
-			if (board[loc] == 0)
-				printf("  │ ");
-			else if (board[loc] == BOT_SYMBOL)
-				printf("○ │ ");
-			else
-				printf("✕ │ ");
-		}
-		printf("\n");
-		printf("┼───┼───┼───┼\n");
-	}
-	printf("\n\n");
+    for (short i=0; i<COL_NUM; i++){
+        printf("%d ", i);
+    }
+    printf("\n");
+    for (short i=(ROW_NUM*COL_NUM-1); i>=0; i--){
+        if (board[i] == BOT_SYMBOL) {
+            printf("● ");
+        } else if(board[i] == OPPONENT_SYMBOL) {
+            printf("◴ ");
+        } else {
+            printf("◌ ");
+        }
+        if (i%COL_NUM == 0){
+            printf("\n");
+        }
+    }
+    printf("\n\n");
 }

 /*
-	Save all available actions into the "result" array.
+    Save all available actions into the "result" array.

-	Args:
-		- short *board (array's address): chessboard's status
-		- short *result (array's address): To save all available actions.
-		- short *length (integer's pointer): To save the number of available actions.
+    Args:
+        - short *board (array's address): chessboard's status
+        - short *result (array's address): To save all available actions.
+        - short *length (integer's pointer): To save the number of available actions.

-	Results:
-		- None. All available actions are saved into "result" and the number of actions is saved in "length"
+    Results:
+        - None. All available actions are saved into "result" and the number of actions is saved in "length"
 */
+
 void get_available_actions(short *board, short *result, short *length){
 	short index = 0;
-	for (int i=0; i<9; i++)
-		if (board[i] == 0)
+	for (int i=0; i<COL_NUM; i++)
+		if (board[(ROW_NUM*COL_NUM-1)-i] == 0)
 			result[index++] = i;
 	*length = index;
 }

+/*
+    Get value in the board with validation.
+
+    Args:
+        - short *board (array's start pointer): chessboard's status
+        - short row (integer): loc's row number
+        - short col (integer): loc's col number
+
+    Results:
+        - short value (integer): means the value in chessboard[row][col]
+*/
+short get_loc_status(short *board, short row, short col) {
+    if ((row >= ROW_NUM) || (row < 0)) {
+        return -1;
+    }
+    if ((col >= COL_NUM) || (col < 0)) {
+        return -1;
+    }
+    return board[row*COL_NUM+col];
+}
+
 /*
 	Return winner's number;

-	Args:
-		- short *board (array's address): chessboard's status
+    Args:
+        - short *board (array's address): chessboard's status

 	Results:
 		- short winner_number(integer): winner's number, 0 for no winner now, 1 for Bot, 2 for opponent
+
+    board's coodinate diagram
+                                ^
+                                | 5
+                                | 4
+                                | 3
+                                | 2
+                                | 1
+                                | 0
+    <-----------------------------
+      6   5   4   3   2   1   0 |
 */
 short get_winner(short *board){
-	int a, b, c;
-	for (int i=0; i<8; i++){
-		a = PATHS[i][0]; b = PATHS[i][1]; c = PATHS[i][2];
-		if ((board[a] == board[b]) && (board[b] == board[c]) && (board[a] != 0)){
-			return board[a];
-		}
+	short a, b, c, d;
+	for (short i=0; i<ROW_NUM; i++){
+        for (short j=0; j<COL_NUM; j++){
+            // horizontal
+            a = get_loc_status(board, i, j);
+            b = get_loc_status(board, i, j+1);
+            c = get_loc_status(board, i, j+2);
+            d = get_loc_status(board, i, j+3);
+            if ((a == b) && (b == c) && (c == d) && (a!=0)) {
+                return a;
+            }
+
+            // vertical
+            a = get_loc_status(board, i, j);
+            b = get_loc_status(board, i+1, j);
+            c = get_loc_status(board, i+2, j);
+            d = get_loc_status(board, i+3, j);
+            if ((a == b) && (b == c) && (c == d) && (a!=0)) {
+                return a;
+            }
+
+            // slash (/)
+            a = get_loc_status(board, i, j);
+            b = get_loc_status(board, i+1, j-1);
+            c = get_loc_status(board, i+2, j-2);
+            d = get_loc_status(board, i+3, j-3);
+            if ((a == b) && (b == c) && (c == d) && (a!=0)) {
+                return a;
+            }
+
+            // backslash (\)
+            a = get_loc_status(board, i, j);
+            b = get_loc_status(board, i+1, j+1);
+            c = get_loc_status(board, i+2, j+2);
+            d = get_loc_status(board, i+3, j+3);
+            if ((a == b) && (b == c) && (c == d) && (a!=0)) {
+                return a;
+            }
+        }
 	}
 	return 0;
 }

 /*
-	Hash chesstable's status into hash.
+    Hash chesstable's status into hash.

 	Args:
 		- short *board (array's address): chessboard's status
+        - char *hash (a string): size is BIGNUM_LEN, the hash will be wrote here

 	Results:
-		- int hash (integer): chessboard's status in i-th block * pow(3, i)
+		- None.
 */
-int state_hash(short *board){
-	int base, hash = 0;
-	for (int i=0; i<9; i++){
-		base = pow(3, i);
-		hash += (base * board[i]);
+void state_hash(short *board, char *hash){
+    struct BigNum sum, temp;
+    for (short i=0; i<BIGNUM_LEN; i++){
+        sum.num[i] = '0';
+    }
+
+    for (short i=0; i<(ROW_NUM*COL_NUM); i++) {
+        // printf("MUL:\n");
+        // printf("%s\n", POWs[i].num);
+        temp = mul(POWs[i], board[i]);
+        // printf("%s\n\n", temp.num);
+
+        // printf("ADD:\n");
+        // printf("%s\n", sum.num);
+        // printf("%s\n", temp.num);
+        sum = add(sum, temp);
+        // printf("%s\n\n", sum.num);
+
+    }
+
+	for (int i=0; i<BIGNUM_LEN; i++){
+        hash[i] = sum.num[i];
 	}
-	return hash;
 }

+/*
+    Fall the chess on the board.
+
+    Args:
+        - short *board: chessboard
+        - struct action *a (struct pointer): action's loc & player
+
+    Results:
+        - None. Fall chess on the chessboard
+*/
+void fall(short *board, struct action *a) {
+    short *ptr = (board + ROW_NUM * COL_NUM - 1 - (a->loc));
+    while ((*ptr == 0) && (ptr>=board)) {
+        // printf("%d ", *ptr);
+        ptr -= COL_NUM;
+    }
+    *(ptr+COL_NUM) = a->player;
+}

 /*
-	Act on the chessboard.
+    Act on the chessboard.

 	Args:
 		- short *board (array's address): chessboards' status
 		- struct action *a (a action's pointer): include player & choose loc
-		- int *state (pointer): for return. To save the chessboard's state hash which after doing this action
+		- char *state (a string): for return. To save the chessboard's state hash which after doing this action
 		- float *reward (pointer): for return. To save the number of rewards which the player gets after doing this action.
 		- float *opponent_reward (pointer): for return. To save the number of rewards which the opponents gets after the player doing this action.
 		- short *winner (pointer): for return. To save the winner in this action. If haven't finish, it will be zero.

-	Results:
-		- None. Save in state & reward & winner
+    Results:
+        - None. Save in state & reward & winner
 */
-void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner){
+void act(short *board, struct action *a, char *state, float *reward, float *opponent_reward, short *winner){
    // printf("Act( player=%d, action=%d )\n", a->player, a->loc);
-    assert(board[a->loc] == 0);
-	board[a->loc] = a->player;
+    assert(board[(ROW_NUM*COL_NUM-1)-(a->loc)] == 0);
+
+    fall(board, a);
 	*winner = get_winner(board);
-	*state = state_hash(board);
+	state_hash(board, state);
 	if (*winner == a->player){
 		*reward = 1.0;
        *opponent_reward = -1.0;
-    }
-	else if(*winner != 0){
-		*reward = -1.0;
+    } else if (*winner != 0) {
+        *reward = -1.0;
        *opponent_reward = 1.0;
-    }
-	else{
-		*reward = 0;
+    } else {
+        *reward = 0;
        *opponent_reward = 0;
    }
 }
--- a/enviroment.h
+++ b/enviroment.h
@ -1,11 +1,11 @@
-struct action{
-	short player;
-	short loc;
+struct action {
+    short player;
+    short loc;
 };

 void reset(short* board);
 void show(short *board);
 void get_available_actions(short *board, short *result, short *length);
 short get_winner(short *board);
-int state_hash(short *board);
-void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner);
+void state_hash(short *board, char *hash);
+void act(short *board, struct action *a, char *state, float *reward, float *opponent_reward, short *winner);
--- a/hash-table.c
+++ b/hash-table.c
@ -0,0 +1,82 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <time.h>
+#include "hash-table.h"
+#include "constant.h"
+
+long long hash_function(char *key) {
+    long long hash = 0;
+    for (int i=0; i<strlen(key); i++){
+        hash = ((hash * 33) + key[i]) % TABLE_SIZE;
+    }
+    return hash ;
+}
+
+void insert(struct Node **table, char *key) {
+    long long hash = hash_function(key);
+
+
+    struct Node *node = malloc(sizeof(struct Node));
+    struct Node *temp, *past;
+    strcpy(node->key, key);
+    // init
+    for (short i=0; i<ACTION_NUM; i++){
+        node->value[i] = 0.0;
+    }
+    node->next = NULL;
+
+    if (table[hash] == NULL){
+        table[hash] = node;
+    } else {
+        temp = table[hash];
+        past = NULL;
+        while(temp != NULL){
+            assert(strcmp(temp->key, key)!=0);
+            past = temp;
+            temp = temp->next;
+        }
+        past->next = node;
+    }
+}
+
+void search(struct Node **table, char *key, bool *find, float *ans) {
+    long long hash = hash_function(key);
+    struct Node *temp, *past;
+    *find = false;
+
+    if (table[hash] != NULL){
+        temp = table[hash];
+        past = NULL;
+
+        while(temp != NULL){
+            if (strcmp(temp->key, key) == 0){
+                *find = true;
+                for (short i=0; i<ACTION_NUM; i++){
+                    ans[i] = temp->value[i];
+                }
+                break;
+            }
+            past = temp;
+            temp = temp->next;
+        }
+    }
+}
+
+void update(struct Node **table, char *key, short action, float value) {
+    long long hash = hash_function(key);
+    struct Node *temp, *past;
+    assert(table[hash]!=NULL);
+
+    temp = table[hash];
+    past = NULL;
+    while(temp != NULL){
+        if (strcmp(temp->key, key) == 0){
+            temp->value[action] = value;
+            break;
+        }
+        past = temp;
+        temp = temp->next;
+    }
+}
--- a/hash-table.h
+++ b/hash-table.h
@ -0,0 +1,13 @@
+#include "constant.h"
+#include <stdbool.h>
+
+struct Node {
+    char key[BIGNUM_LEN+1];
+    float value[ACTION_NUM];
+    struct Node *next;
+};
+
+long long hash_function(char *key);
+void insert(struct Node **table, char *key);
+void search(struct Node **table, char *key, bool *find, float *ans);
+void update(struct Node **table, char *key, short action, float value);
--- a/main.c
+++ b/main.c
@ -1,19 +1,27 @@
-#include <stdio.h>
-#include <time.h>
-#include <stdlib.h>
-#include <stdbool.h>
 #include "constant.h"
 #include "enviroment.h"
 #include "q-learning.h"
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>

 int main(){
-	short board[9]= {0};			        // tic tac toe's chessboard
-    float table[STATE_NUM][ACTION_NUM];     // q-learning table
+	short board[ROW_NUM][COL_NUM]= {0};
+    short winner;
+    struct Node ** map;      // pointer to pointer, hash table
+    bool find;
+    float state[ACTION_NUM];

-	srand(time(NULL));
-    init_table(&table[0][0]);
+    srand(time(NULL));

-    run(&table[0][0], board, false, 10000, false);
-    run(&table[0][0], board, true, EPISODE_NUM, false);
-    run(&table[0][0], board, false, 10000, false);
+    // init hash table
+    map = malloc(TABLE_SIZE * sizeof(struct Node*));
+    for (int i=0; i<TABLE_SIZE; i++){
+        map[i] = NULL;
+    }
+
+    run(map, &board[0][0], false, 10000, false);
+    run(map, &board[0][0], true, EPISODE_NUM, false);
+    run(map, &board[0][0], false, 10000, true);
 }
--- a/q-learning.c
+++ b/q-learning.c
@ -1,80 +1,94 @@
-#include <stdio.h>
 #include <float.h>
-#include <stdbool.h>
 #include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
 #include <stdlib.h>

 #include "constant.h"
 #include "enviroment.h"
+#include "hash-table.h"

 /*
-	Return the index with the max value in the array
+    Return the index with the max value in the array

-	Args:
-		- float *arr (array's address)
-		- short length (integer): array's length
+    Args:
+        - float *arr (array's address)
+        - short length (integer): array's length

-	Results:
-		- short index (integer): the index with the max value
+    Results:
+        - short index (integer): the index with the max value
 */
-short float_argmax(float *arr, short length){
-	float ans = -1, max = -FLT_MAX;
-	for (short i=0; i<length; i++){
-		if (arr[i] > max){
-			max = arr[i];
-			ans = i;
-		}
-	}
-	return ans;
+short float_argmax(float* arr, short length)
+{
+    float ans = -1, max = -FLT_MAX;
+    for (short i = 0; i < length; i++) {
+        if (arr[i] > max) {
+            max = arr[i];
+            ans = i;
+        }
+    }
+    return ans;
 }

-
 /*
-	Choose the next action with Epsilon-Greedy.
-	EPSILON means the probability to choose the best action in this state from Q-Table.
-	(1-EPSILON) to random an action to do.
+    Choose the next action with Epsilon-Greedy.
+    EPSILON means the probability to choose the best action in this state from Q-Table.
+    (1-EPSILON) to random an action to do.

 	Args:
 		- short *table (array's address): state table for Q-Learning
 		- short *board (array's address): chessboards' status
-		- int state (integer, state hash): hash for board's status
+		- char *state (string, state hash): hash for board's status

-	Results:
-		- short best_choice
+    Results:
+        - short best_choice
 */
-short bot_choose_action(float *table, short *board, int state){
+short bot_choose_action(struct Node **map, short *board, char *state){

 	// get available actions for choosing
-	short available_actions[9];
+	short available_actions[ACTION_NUM];
 	short available_actions_length;
 	get_available_actions(board, available_actions, &available_actions_length);

 	// use argmax() to find the best choise,
 	// first we should build an available_actions_state array for saving the state for all available choise.
-	float available_actions_state[9];
-	short available_actions_state_index[9];
+	float available_actions_state[ACTION_NUM];
+	short available_actions_state_index[ACTION_NUM];
 	short available_actions_state_length, index = 0;
 	short temp_index, best_choice;
 	bool zeros = true;
-	for (short i=0; i<available_actions_length; i++){
-		temp_index = available_actions[i];
-		available_actions_state[index] = *(table + state * ACTION_NUM + temp_index);
-		if (available_actions_state[index] != 0.0){
-			zeros = false;
-		}
-		available_actions_state_index[index] = temp_index;
-		index++;
-	}
-	best_choice = float_argmax(available_actions_state, index);
-	best_choice = available_actions_state_index[best_choice];
+    bool find;
+    float state_weights[ACTION_NUM];

-	// Epsilon-Greedy
-	// If random number > EPSILON   ->   random a action
-	// If random number < EPSILON   ->   choose the best action in this state.
-	double random_num = (double) rand() / (RAND_MAX + 1.0);
-	if ((random_num > EPSILON) || zeros){
-		best_choice = available_actions_state_index[ rand() % index ];
-	}
+    // find weights in the hash table
+    search(map, state, &find, state_weights);
+    if (!find) {
+        for (short i=0; i<ACTION_NUM; i++){
+            state_weights[i] = 0.0;
+        }
+    }
+
+    // get the best choice
+    for (short i=0; i<available_actions_length; i++){
+        temp_index = available_actions[i];
+
+        available_actions_state[index] = state_weights[temp_index];
+        if (available_actions_state[index] != 0.0){
+            zeros = false;
+        }
+        available_actions_state_index[index] = temp_index;
+        index++;
+    }
+    best_choice = float_argmax(available_actions_state, index);
+    best_choice = available_actions_state_index[best_choice];
+
+    // Epsilon-Greedy
+    // If random number > EPSILON   ->   random a action
+    // If random number < EPSILON   ->   choose the best action in this state.
+    double random_num = (double)rand() / (RAND_MAX + 1.0);
+    if ((random_num > EPSILON) || zeros) {
+        best_choice = available_actions_state_index[rand() % index];
+    }

    return best_choice;
 }
@ -83,48 +97,48 @@ short bot_choose_action(float *table, short *board, int state){
    Opponent random choose a action to do.

    Args:
-		- short *table (array's address): state table for Q-Learning
 		- short *board (array's address): chessboards' status
-		- int state (integer, state hash): hash for board's status

-	Results:
-		- short choice (integer): random, -1 means no available action to choose
+    Results:
+        - short choice (integer): random, -1 means no available action to choose
 */
-short opponent_random_action(float *table, short *board, int state){
+short opponent_random_action(short *board){

    // get available actions for choosing
-    short available_actions[9];
+    short available_actions[ACTION_NUM];
    short available_action_length;
    get_available_actions(board, available_actions, &available_action_length);

-    if (available_action_length == 0){
+    if (available_action_length == 0) {
        return -1;
    }

    // random
    short choice;
-    choice = (short)( rand() % available_action_length );
+    choice = (short)(rand() % available_action_length);
    choice = available_actions[choice];

    return choice;
 }

-/*
-    Inilialize the Q-Table
+//     Use Hash Table, so we needn't initilize Q-Table
+//
+// /*
+//     Inilialize the Q-Table

-    Args:
-        - float *table (two-dim array's start address)
+//     Args:
+//         - float *table (two-dim array's start address)

-    Results:
-        - None.
-*/
-void init_table(float *table){
-    for (int i=0; i<STATE_NUM; i++){
-        for (int j=0; j<ACTION_NUM; j++){
-            *(table + i * ACTION_NUM + j) = 0;
-        }
-    }
-}
+//     Results:
+//         - None.
+// */
+// void init_table(float *table){
+//     for (int i=0; i<STATE_NUM; i++){
+//         for (int j=0; j<ACTION_NUM; j++){
+//             *(table + i * ACTION_NUM + j) = 0;
+//         }
+//     }
+// }

 /*
    Give the chessboard & state, it will return the max reward with the best choice
@ -137,14 +151,24 @@ void init_table(float *table){
    Results:
        - int max_reward
 */
-float get_estimate_reward(float *table, short *board, int state){
-    short available_actions[9];
+float get_estimate_reward(struct Node **map, short *board, char *state){
+    short available_actions[ACTION_NUM];
    short available_action_length;
    get_available_actions(board, available_actions, &available_action_length);

-	float available_actions_state[9];
+    // find weights in the hash table
+    float state_weights[ACTION_NUM];
+    bool find;
+    search(map, state, &find, state_weights);
+    if (!find) {
+        for (short i=0; i<ACTION_NUM; i++){
+            state_weights[i] = 0.0;
+        }
+    }
+
+	float available_actions_state[ACTION_NUM];
    for (short i=0; i<available_action_length; i++){
-        available_actions_state[i] = *(table + state * ACTION_NUM + available_actions[i]);   // table[state][available_actions[i]]
+        available_actions_state[i] = state_weights[available_actions[i]];   // table[state][available_actions[i]]
    }

    short ans_index;
@ -165,60 +189,71 @@ float get_estimate_reward(float *table, short *board, int state){
    Results:
        - None
 */
-void run(float *table, short *board, bool train, int times, bool plot){
-	short available_actions[9];
+void run(struct Node **map, short *board, bool train, int times, bool plot){
+	short available_actions[ACTION_NUM];
 	short available_actions_length;
 	short winner;
    short choice, opponent_choice;
-    int state, _state;
+    char state[BIGNUM_LEN], _state[BIGNUM_LEN];
    float estimate_r, estimate_r_, real_r, r, opponent_r;
    struct action a;
-
+    float state_weights[ACTION_NUM];
+    bool find;
    int win = 0;

-    for (int episode=0; episode<times; episode++){
+    for (int episode = 0; episode < times; episode++) {
        reset(board);
-        state = state_hash(board);
+        state_hash(board, state);
        while (1){
            // bot choose the action
-            choice = bot_choose_action(table, board, state);
+            choice = bot_choose_action(map, board, state);
            a.loc = choice;
            a.player = BOT_SYMBOL;

-            estimate_r = *(table + state * ACTION_NUM + choice);
-            act(board, &a, &_state, &r, &opponent_r, &winner);
+            search(map, state, &find, state_weights);
+            if (!find) {
+                for (short i=0; i<ACTION_NUM; i++){
+                    state_weights[i] = 0.0;
+                }
+                if (train)
+                    insert(map, state);
+            }
+            estimate_r = state_weights[choice];
+            act(board, &a, _state, &r, &opponent_r, &winner);
            if (plot) show(board);

-            // opponent random
+            // // opponent random
            if (winner == 0){
-                opponent_choice = opponent_random_action(table, board, state_hash(board));
+                opponent_choice = opponent_random_action(board);
                if (opponent_choice != -1){
                    a.loc = opponent_choice;
                    a.player = OPPONENT_SYMBOL;
-                    act(board, &a, &_state, &opponent_r, &r, &winner);
+                    act(board, &a, _state, &opponent_r, &r, &winner);
                    if (plot) show(board);
                }
            }
            get_available_actions(board, available_actions, &available_actions_length);

-            if ((winner != 0) || (available_actions_length == 0)){
-                if (plot){
+            if ((winner != 0) || (available_actions_length == 0)) {
+                if (plot) {
                    printf("winner: %d, reward: %f, oppo reward: %f\n", winner, r, opponent_r);
                    printf("==========================================================\n");
                }
                real_r = r;
            } else {
-                estimate_r_ = get_estimate_reward(table, board, _state);
+                estimate_r_ = get_estimate_reward(map, board, _state);
                real_r = r + LAMBDA * estimate_r_;
            }
            if (train){
-                // printf("update");
-                *(table + state * ACTION_NUM + choice) += ( LR * (real_r - estimate_r) );       // table[state][choice] += LR * (real_r - estimate_r)
+                state_weights[choice] += (LR * (real_r - estimate_r));
+                update(map, state, choice, state_weights[choice]);
            }
-            state = _state;
+            for (int i=0; i<BIGNUM_LEN; i++){
+                state[i] = _state[i];
+            }
+

            if ((winner != 0) || (available_actions_length == 0)){
-                // printf("break\n");
                if (winner == 1){
                    win += 1;
                }
@ -228,5 +263,6 @@ void run(float *table, short *board, bool train, int times, bool plot){
    }

    if (!train)
-        printf("%d/%d, %f\%\n", win, 10000, (float)win/10000);
+        // printf("%d/%d, %f\%\n", win, 10000, (float)win/10000);
+        printf("%f\n", (float)win/times);
 }
--- a/q-learning.h
+++ b/q-learning.h
@ -1,6 +1,7 @@
+#include "hash-table.h"
+
 short float_argmax(float *arr, short length);
-short bot_choose_action(float *table, short *board, int state);
-short opponent_random_action(float *table, short *board, int state);
-void init_table(float *table);
-float get_estimate_reward(float *table, short *board, int state);
-void run(float *table, short *board, bool train, int times, bool plot);
+short bot_choose_action(struct Node **map, short *board, char *state);
+short opponent_random_action(short *board);
+float get_estimate_reward(struct Node **map, short *board, char *state);
+void run(struct Node **map, short *board, bool train, int times, bool plot);
Author	SHA1	Message	Date
snsd0805	d1279f5c6d	docs: update README	2023-06-02 23:57:46 +08:00
snsd0805	c3a0335ff1	Merge: replace 'tic-tac-toe' with '4 in a row' 'tic-tac-toe' version is located on the other branch.	2023-06-02 23:48:55 +08:00
snsd0805	cc369c7094	fix: update Makefile	2023-06-02 23:36:06 +08:00
snsd0805	0fb79b3e1e	feat: change q-learning method to fit 'on 4 in a row'	2023-06-02 23:34:37 +08:00
snsd0805	b024eec8e4	feat: calculate state hash	2023-06-02 20:19:46 +08:00
snsd0805	7fcadce548	feat: Big num for state representation	2023-06-02 16:52:59 +08:00
snsd0805	821bc5727f	feat: set up 'four in a row' enviroment	2023-06-02 15:47:02 +08:00
snsd0805	605d9f6dd9	feat: Chaining hash table	2023-06-02 03:27:55 +08:00
Ting-Jun Wang	5cf2ef7936	Merge pull request #1 from eeeXun/clang-format GitHub-Action auto format with clang-format	2023-05-31 16:45:50 +08:00
eeeXun	7a68a06c86	style(format): run clang-format	2023-05-31 11:31:15 +08:00
eeeXun	7ba9db7f83	chore: github-action auto format with clang-format	2023-05-31 11:21:44 +08:00