Merge pull request #1 from eeeXun/clang-format

GitHub-Action auto format with clang-format
This commit is contained in:
Ting-Jun Wang 2023-05-31 16:45:50 +08:00 committed by GitHub
commit 5cf2ef7936
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 478 additions and 214 deletions

225
.clang-format Normal file
View File

@ -0,0 +1,225 @@
---
Language: Cpp
# BasedOnStyle: WebKit
AccessModifierOffset: -4
AlignAfterOpenBracket: DontAlign
AlignArrayOfStructures: None
AlignConsecutiveAssignments:
Enabled: false
AcrossEmptyLines: false
AcrossComments: false
AlignCompound: false
PadOperators: true
AlignConsecutiveBitFields:
Enabled: false
AcrossEmptyLines: false
AcrossComments: false
AlignCompound: false
PadOperators: false
AlignConsecutiveDeclarations:
Enabled: false
AcrossEmptyLines: false
AcrossComments: false
AlignCompound: false
PadOperators: false
AlignConsecutiveMacros:
Enabled: false
AcrossEmptyLines: false
AcrossComments: false
AlignCompound: false
PadOperators: false
AlignEscapedNewlines: Right
AlignOperands: DontAlign
AlignTrailingComments:
Kind: Never
OverEmptyLines: 0
AllowAllArgumentsOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: Empty
AllowShortCaseLabelsOnASingleLine: false
AllowShortEnumsOnASingleLine: true
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: All
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: MultiLine
AttributeMacros:
- __capability
BinPackArguments: true
BinPackParameters: true
BitFieldColonSpacing: Both
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: Never
AfterEnum: false
AfterExternBlock: false
AfterFunction: true
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
BeforeCatch: false
BeforeElse: false
BeforeLambdaBody: false
BeforeWhile: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakAfterAttributes: Never
BreakAfterJavaFieldAnnotations: false
BreakArrays: true
BreakBeforeBinaryOperators: All
BreakBeforeConceptDeclarations: Always
BreakBeforeBraces: WebKit
BreakBeforeInlineASMColon: OnlyMultiline
BreakBeforeTernaryOperators: true
BreakConstructorInitializers: BeforeComma
BreakInheritanceList: BeforeColon
BreakStringLiterals: true
ColumnLimit: 0
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: false
DerivePointerAlignment: false
DisableFormat: false
EmptyLineAfterAccessModifier: Never
EmptyLineBeforeAccessModifier: LogicalBlock
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: false
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IfMacros:
- KJ_IF_MAYBE
IncludeBlocks: Preserve
IncludeCategories:
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
Priority: 2
SortPriority: 0
CaseSensitive: false
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
Priority: 3
SortPriority: 0
CaseSensitive: false
- Regex: '.*'
Priority: 1
SortPriority: 0
CaseSensitive: false
IncludeIsMainRegex: '(Test)?$'
IncludeIsMainSourceRegex: ''
IndentAccessModifiers: false
IndentCaseBlocks: false
IndentCaseLabels: false
IndentExternBlock: AfterExternBlock
IndentGotoLabels: true
IndentPPDirectives: None
IndentRequiresClause: true
IndentWidth: 4
IndentWrappedFunctionNames: false
InsertBraces: false
InsertNewlineAtEOF: false
InsertTrailingCommas: None
IntegerLiteralSeparator:
Binary: 0
BinaryMinDigits: 0
Decimal: 0
DecimalMinDigits: 0
Hex: 0
HexMinDigits: 0
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: true
LambdaBodyIndentation: Signature
LineEnding: DeriveLF
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: Inner
ObjCBinPackProtocolList: Auto
ObjCBlockIndentWidth: 4
ObjCBreakBeforeNestedBlockParam: true
ObjCSpaceAfterProperty: true
ObjCSpaceBeforeProtocolList: true
PackConstructorInitializers: BinPack
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakOpenParenthesis: 0
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyIndentedWhitespace: 0
PenaltyReturnTypeOnItsOwnLine: 60
PointerAlignment: Left
PPIndentWidth: -1
QualifierAlignment: Leave
ReferenceAlignment: Pointer
ReflowComments: true
RemoveBracesLLVM: false
RemoveSemicolon: false
RequiresClausePosition: OwnLine
RequiresExpressionIndentation: OuterScope
SeparateDefinitionBlocks: Leave
ShortNamespaceLines: 1
SortIncludes: CaseSensitive
SortJavaStaticImport: Before
SortUsingDeclarations: LexicographicNumeric
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceAroundPointerQualifiers: Default
SpaceBeforeAssignmentOperators: true
SpaceBeforeCaseColon: false
SpaceBeforeCpp11BracedList: true
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeParensOptions:
AfterControlStatements: true
AfterForeachMacros: true
AfterFunctionDefinitionName: false
AfterFunctionDeclarationName: false
AfterIfMacros: true
AfterOverloadedOperator: false
AfterRequiresInClause: false
AfterRequiresInExpression: false
BeforeNonEmptyParentheses: false
SpaceBeforeRangeBasedForLoopColon: true
SpaceBeforeSquareBrackets: false
SpaceInEmptyBlock: true
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: Never
SpacesInConditionalStatement: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInLineCommentPrefix:
Minimum: 1
Maximum: -1
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Latest
StatementAttributeLikeMacros:
- Q_EMIT
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 8
UseTab: Never
WhitespaceSensitiveMacros:
- BOOST_PP_STRINGIZE
- CF_SWIFT_NAME
- NS_SWIFT_NAME
- PP_STRINGIZE
- STRINGIZE
...

26
.github/workflows/format.yml vendored Normal file
View File

@ -0,0 +1,26 @@
name: format
on:
push:
branches:
- master
paths:
- "**.c"
- "**.h"
jobs:
format:
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
- name: Install clang-format
run: pip install clang-format
- name: Format with clang-format
run: clang-format -i *.c *.h
- name: Commit changes
uses: stefanzweifel/git-auto-commit-action@v4
with:
commit_message: "style(format): run clang-format"

View File

@ -1,9 +1,9 @@
#define BOT_SYMBOL 1 #define BOT_SYMBOL 1
#define OPPONENT_SYMBOL 2 #define OPPONENT_SYMBOL 2
#define EPSILON 0.9 // Epsilon-greedy #define EPSILON 0.9 // Epsilon-greedy
#define LR 0.1 // learning rate #define LR 0.1 // learning rate
#define LAMBDA 0.9 // discount factor #define LAMBDA 0.9 // discount factor
#define STATE_NUM 19683 #define STATE_NUM 19683
#define ACTION_NUM 9 #define ACTION_NUM 9

View File

@ -1,146 +1,151 @@
#include <stdio.h>
#include <math.h>
#include <assert.h>
#include "constant.h"
#include "enviroment.h" #include "enviroment.h"
#include "constant.h"
#include <assert.h>
#include <math.h>
#include <stdio.h>
short PATHS[8][3] = { short PATHS[8][3] = {
{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
{0, 3, 6}, {1, 4, 7}, {2, 5, 8}, { 0, 3, 6 }, { 1, 4, 7 }, { 2, 5, 8 },
{0, 4, 8}, {2, 4, 6} { 0, 4, 8 }, { 2, 4, 6 }
}; };
/* /*
Reset the game, clear the chessboard. Reset the game, clear the chessboard.
Args: Args:
- short *board (array's address): chessboard's status - short *board (array's address): chessboard's status
Results: Results:
- None, set all blocks on the chessboard to zero. - None, set all blocks on the chessboard to zero.
*/ */
void reset(short* board){ void reset(short* board)
for (short i=0; i<9; i++) {
board[i] = 0; for (short i = 0; i < 9; i++)
board[i] = 0;
} }
/* /*
Print the chessboard on the console. Print the chessboard on the console.
Args: Args:
- short *board (array's address): chessboard's status - short *board (array's address): chessboard's status
Results: Results:
- None. Only printing. - None. Only printing.
*/ */
void show(short *board){ void show(short* board)
short loc; {
printf("┼───┼───┼───┼\n"); short loc;
for (short i=0; i<3; i++){ printf("┼───┼───┼───┼\n");
printf(""); for (short i = 0; i < 3; i++) {
for (short j=0; j<3; j++){ printf("");
loc = 3*i+j; for (short j = 0; j < 3; j++) {
if (board[loc] == 0) loc = 3 * i + j;
printf(""); if (board[loc] == 0)
else if (board[loc] == BOT_SYMBOL) printf("");
printf("○ │ "); else if (board[loc] == BOT_SYMBOL)
else printf("○ │ ");
printf("✕ │ "); else
} printf("✕ │ ");
printf("\n"); }
printf("┼───┼───┼───┼\n"); printf("\n");
} printf("┼───┼───┼───┼\n");
printf("\n\n"); }
printf("\n\n");
} }
/* /*
Save all available actions into the "result" array. Save all available actions into the "result" array.
Args: Args:
- short *board (array's address): chessboard's status - short *board (array's address): chessboard's status
- short *result (array's address): To save all available actions. - short *result (array's address): To save all available actions.
- short *length (integer's pointer): To save the number of available actions. - short *length (integer's pointer): To save the number of available actions.
Results: Results:
- None. All available actions are saved into "result" and the number of actions is saved in "length" - None. All available actions are saved into "result" and the number of actions is saved in "length"
*/ */
void get_available_actions(short *board, short *result, short *length){ void get_available_actions(short* board, short* result, short* length)
short index = 0; {
for (int i=0; i<9; i++) short index = 0;
if (board[i] == 0) for (int i = 0; i < 9; i++)
result[index++] = i; if (board[i] == 0)
*length = index; result[index++] = i;
*length = index;
} }
/* /*
Return winner's number; Return winner's number;
Args: Args:
- short *board (array's address): chessboard's status - short *board (array's address): chessboard's status
Results: Results:
- short winner_number(integer): winner's number, 0 for no winner now, 1 for Bot, 2 for opponent - short winner_number(integer): winner's number, 0 for no winner now, 1 for Bot, 2 for opponent
*/ */
short get_winner(short *board){ short get_winner(short* board)
int a, b, c; {
for (int i=0; i<8; i++){ int a, b, c;
a = PATHS[i][0]; b = PATHS[i][1]; c = PATHS[i][2]; for (int i = 0; i < 8; i++) {
if ((board[a] == board[b]) && (board[b] == board[c]) && (board[a] != 0)){ a = PATHS[i][0];
return board[a]; b = PATHS[i][1];
} c = PATHS[i][2];
} if ((board[a] == board[b]) && (board[b] == board[c]) && (board[a] != 0)) {
return 0; return board[a];
}
}
return 0;
} }
/* /*
Hash chesstable's status into hash. Hash chesstable's status into hash.
Args: Args:
- short *board (array's address): chessboard's status - short *board (array's address): chessboard's status
Results: Results:
- int hash (integer): chessboard's status in i-th block * pow(3, i) - int hash (integer): chessboard's status in i-th block * pow(3, i)
*/ */
int state_hash(short *board){ int state_hash(short* board)
int base, hash = 0; {
for (int i=0; i<9; i++){ int base, hash = 0;
base = pow(3, i); for (int i = 0; i < 9; i++) {
hash += (base * board[i]); base = pow(3, i);
} hash += (base * board[i]);
return hash; }
return hash;
} }
/* /*
Act on the chessboard. Act on the chessboard.
Args: Args:
- short *board (array's address): chessboards' status - short *board (array's address): chessboards' status
- struct action *a (a action's pointer): include player & choose loc - struct action *a (a action's pointer): include player & choose loc
- int *state (pointer): for return. To save the chessboard's state hash which after doing this action - int *state (pointer): for return. To save the chessboard's state hash which after doing this action
- float *reward (pointer): for return. To save the number of rewards which the player gets after doing this action. - float *reward (pointer): for return. To save the number of rewards which the player gets after doing this action.
- float *opponent_reward (pointer): for return. To save the number of rewards which the opponents gets after the player doing this action. - float *opponent_reward (pointer): for return. To save the number of rewards which the opponents gets after the player doing this action.
- short *winner (pointer): for return. To save the winner in this action. If haven't finish, it will be zero. - short *winner (pointer): for return. To save the winner in this action. If haven't finish, it will be zero.
Results: Results:
- None. Save in state & reward & winner - None. Save in state & reward & winner
*/ */
void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner){ void act(short* board, struct action* a, int* state, float* reward, float* opponent_reward, short* winner)
{
// printf("Act( player=%d, action=%d )\n", a->player, a->loc); // printf("Act( player=%d, action=%d )\n", a->player, a->loc);
assert(board[a->loc] == 0); assert(board[a->loc] == 0);
board[a->loc] = a->player; board[a->loc] = a->player;
*winner = get_winner(board); *winner = get_winner(board);
*state = state_hash(board); *state = state_hash(board);
if (*winner == a->player){ if (*winner == a->player) {
*reward = 1.0; *reward = 1.0;
*opponent_reward = -1.0; *opponent_reward = -1.0;
} } else if (*winner != 0) {
else if(*winner != 0){ *reward = -1.0;
*reward = -1.0;
*opponent_reward = 1.0; *opponent_reward = 1.0;
} } else {
else{ *reward = 0;
*reward = 0;
*opponent_reward = 0; *opponent_reward = 0;
} }
} }

View File

@ -1,11 +1,11 @@
struct action{ struct action {
short player; short player;
short loc; short loc;
}; };
void reset(short* board); void reset(short* board);
void show(short *board); void show(short* board);
void get_available_actions(short *board, short *result, short *length); void get_available_actions(short* board, short* result, short* length);
short get_winner(short *board); short get_winner(short* board);
int state_hash(short *board); int state_hash(short* board);
void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner); void act(short* board, struct action* a, int* state, float* reward, float* opponent_reward, short* winner);

17
main.c
View File

@ -1,16 +1,17 @@
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <stdbool.h>
#include "constant.h" #include "constant.h"
#include "enviroment.h" #include "enviroment.h"
#include "q-learning.h" #include "q-learning.h"
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int main(){ int main()
short board[9]= {0}; // tic tac toe's chessboard {
float table[STATE_NUM][ACTION_NUM]; // q-learning table short board[9] = { 0 }; // tic tac toe's chessboard
float table[STATE_NUM][ACTION_NUM]; // q-learning table
srand(time(NULL)); srand(time(NULL));
init_table(&table[0][0]); init_table(&table[0][0]);
run(&table[0][0], board, false, 10000, false); run(&table[0][0], board, false, 10000, false);

View File

@ -1,80 +1,81 @@
#include <stdio.h>
#include <float.h> #include <float.h>
#include <stdbool.h>
#include <limits.h> #include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "constant.h" #include "constant.h"
#include "enviroment.h" #include "enviroment.h"
/* /*
Return the index with the max value in the array Return the index with the max value in the array
Args: Args:
- float *arr (array's address) - float *arr (array's address)
- short length (integer): array's length - short length (integer): array's length
Results: Results:
- short index (integer): the index with the max value - short index (integer): the index with the max value
*/ */
short float_argmax(float *arr, short length){ short float_argmax(float* arr, short length)
float ans = -1, max = -FLT_MAX; {
for (short i=0; i<length; i++){ float ans = -1, max = -FLT_MAX;
if (arr[i] > max){ for (short i = 0; i < length; i++) {
max = arr[i]; if (arr[i] > max) {
ans = i; max = arr[i];
} ans = i;
} }
return ans; }
return ans;
} }
/* /*
Choose the next action with Epsilon-Greedy. Choose the next action with Epsilon-Greedy.
EPSILON means the probability to choose the best action in this state from Q-Table. EPSILON means the probability to choose the best action in this state from Q-Table.
(1-EPSILON) to random an action to do. (1-EPSILON) to random an action to do.
Args: Args:
- short *table (array's address): state table for Q-Learning - short *table (array's address): state table for Q-Learning
- short *board (array's address): chessboards' status - short *board (array's address): chessboards' status
- int state (integer, state hash): hash for board's status - int state (integer, state hash): hash for board's status
Results: Results:
- short best_choice - short best_choice
*/ */
short bot_choose_action(float *table, short *board, int state){ short bot_choose_action(float* table, short* board, int state)
{
// get available actions for choosing // get available actions for choosing
short available_actions[9]; short available_actions[9];
short available_actions_length; short available_actions_length;
get_available_actions(board, available_actions, &available_actions_length); get_available_actions(board, available_actions, &available_actions_length);
// use argmax() to find the best choise, // use argmax() to find the best choise,
// first we should build an available_actions_state array for saving the state for all available choise. // first we should build an available_actions_state array for saving the state for all available choise.
float available_actions_state[9]; float available_actions_state[9];
short available_actions_state_index[9]; short available_actions_state_index[9];
short available_actions_state_length, index = 0; short available_actions_state_length, index = 0;
short temp_index, best_choice; short temp_index, best_choice;
bool zeros = true; bool zeros = true;
for (short i=0; i<available_actions_length; i++){ for (short i = 0; i < available_actions_length; i++) {
temp_index = available_actions[i]; temp_index = available_actions[i];
available_actions_state[index] = *(table + state * ACTION_NUM + temp_index); available_actions_state[index] = *(table + state * ACTION_NUM + temp_index);
if (available_actions_state[index] != 0.0){ if (available_actions_state[index] != 0.0) {
zeros = false; zeros = false;
} }
available_actions_state_index[index] = temp_index; available_actions_state_index[index] = temp_index;
index++; index++;
} }
best_choice = float_argmax(available_actions_state, index); best_choice = float_argmax(available_actions_state, index);
best_choice = available_actions_state_index[best_choice]; best_choice = available_actions_state_index[best_choice];
// Epsilon-Greedy // Epsilon-Greedy
// If random number > EPSILON -> random a action // If random number > EPSILON -> random a action
// If random number < EPSILON -> choose the best action in this state. // If random number < EPSILON -> choose the best action in this state.
double random_num = (double) rand() / (RAND_MAX + 1.0); double random_num = (double)rand() / (RAND_MAX + 1.0);
if ((random_num > EPSILON) || zeros){ if ((random_num > EPSILON) || zeros) {
best_choice = available_actions_state_index[ rand() % index ]; best_choice = available_actions_state_index[rand() % index];
} }
return best_choice; return best_choice;
} }
@ -83,27 +84,28 @@ short bot_choose_action(float *table, short *board, int state){
Opponent random choose a action to do. Opponent random choose a action to do.
Args: Args:
- short *table (array's address): state table for Q-Learning - short *table (array's address): state table for Q-Learning
- short *board (array's address): chessboards' status - short *board (array's address): chessboards' status
- int state (integer, state hash): hash for board's status - int state (integer, state hash): hash for board's status
Results: Results:
- short choice (integer): random, -1 means no available action to choose - short choice (integer): random, -1 means no available action to choose
*/ */
short opponent_random_action(float *table, short *board, int state){ short opponent_random_action(float* table, short* board, int state)
{
// get available actions for choosing // get available actions for choosing
short available_actions[9]; short available_actions[9];
short available_action_length; short available_action_length;
get_available_actions(board, available_actions, &available_action_length); get_available_actions(board, available_actions, &available_action_length);
if (available_action_length == 0){ if (available_action_length == 0) {
return -1; return -1;
} }
// random // random
short choice; short choice;
choice = (short)( rand() % available_action_length ); choice = (short)(rand() % available_action_length);
choice = available_actions[choice]; choice = available_actions[choice];
return choice; return choice;
@ -118,9 +120,10 @@ short opponent_random_action(float *table, short *board, int state){
Results: Results:
- None. - None.
*/ */
void init_table(float *table){ void init_table(float* table)
for (int i=0; i<STATE_NUM; i++){ {
for (int j=0; j<ACTION_NUM; j++){ for (int i = 0; i < STATE_NUM; i++) {
for (int j = 0; j < ACTION_NUM; j++) {
*(table + i * ACTION_NUM + j) = 0; *(table + i * ACTION_NUM + j) = 0;
} }
} }
@ -137,14 +140,15 @@ void init_table(float *table){
Results: Results:
- int max_reward - int max_reward
*/ */
float get_estimate_reward(float *table, short *board, int state){ float get_estimate_reward(float* table, short* board, int state)
{
short available_actions[9]; short available_actions[9];
short available_action_length; short available_action_length;
get_available_actions(board, available_actions, &available_action_length); get_available_actions(board, available_actions, &available_action_length);
float available_actions_state[9]; float available_actions_state[9];
for (short i=0; i<available_action_length; i++){ for (short i = 0; i < available_action_length; i++) {
available_actions_state[i] = *(table + state * ACTION_NUM + available_actions[i]); // table[state][available_actions[i]] available_actions_state[i] = *(table + state * ACTION_NUM + available_actions[i]); // table[state][available_actions[i]]
} }
short ans_index; short ans_index;
@ -165,10 +169,11 @@ float get_estimate_reward(float *table, short *board, int state){
Results: Results:
- None - None
*/ */
void run(float *table, short *board, bool train, int times, bool plot){ void run(float* table, short* board, bool train, int times, bool plot)
short available_actions[9]; {
short available_actions_length; short available_actions[9];
short winner; short available_actions_length;
short winner;
short choice, opponent_choice; short choice, opponent_choice;
int state, _state; int state, _state;
float estimate_r, estimate_r_, real_r, r, opponent_r; float estimate_r, estimate_r_, real_r, r, opponent_r;
@ -176,10 +181,10 @@ void run(float *table, short *board, bool train, int times, bool plot){
int win = 0; int win = 0;
for (int episode=0; episode<times; episode++){ for (int episode = 0; episode < times; episode++) {
reset(board); reset(board);
state = state_hash(board); state = state_hash(board);
while (1){ while (1) {
// bot choose the action // bot choose the action
choice = bot_choose_action(table, board, state); choice = bot_choose_action(table, board, state);
a.loc = choice; a.loc = choice;
@ -187,22 +192,24 @@ void run(float *table, short *board, bool train, int times, bool plot){
estimate_r = *(table + state * ACTION_NUM + choice); estimate_r = *(table + state * ACTION_NUM + choice);
act(board, &a, &_state, &r, &opponent_r, &winner); act(board, &a, &_state, &r, &opponent_r, &winner);
if (plot) show(board); if (plot)
show(board);
// opponent random // opponent random
if (winner == 0){ if (winner == 0) {
opponent_choice = opponent_random_action(table, board, state_hash(board)); opponent_choice = opponent_random_action(table, board, state_hash(board));
if (opponent_choice != -1){ if (opponent_choice != -1) {
a.loc = opponent_choice; a.loc = opponent_choice;
a.player = OPPONENT_SYMBOL; a.player = OPPONENT_SYMBOL;
act(board, &a, &_state, &opponent_r, &r, &winner); act(board, &a, &_state, &opponent_r, &r, &winner);
if (plot) show(board); if (plot)
show(board);
} }
} }
get_available_actions(board, available_actions, &available_actions_length); get_available_actions(board, available_actions, &available_actions_length);
if ((winner != 0) || (available_actions_length == 0)){ if ((winner != 0) || (available_actions_length == 0)) {
if (plot){ if (plot) {
printf("winner: %d, reward: %f, oppo reward: %f\n", winner, r, opponent_r); printf("winner: %d, reward: %f, oppo reward: %f\n", winner, r, opponent_r);
printf("==========================================================\n"); printf("==========================================================\n");
} }
@ -211,15 +218,15 @@ void run(float *table, short *board, bool train, int times, bool plot){
estimate_r_ = get_estimate_reward(table, board, _state); estimate_r_ = get_estimate_reward(table, board, _state);
real_r = r + LAMBDA * estimate_r_; real_r = r + LAMBDA * estimate_r_;
} }
if (train){ if (train) {
// printf("update"); // printf("update");
*(table + state * ACTION_NUM + choice) += ( LR * (real_r - estimate_r) ); // table[state][choice] += LR * (real_r - estimate_r) *(table + state * ACTION_NUM + choice) += (LR * (real_r - estimate_r)); // table[state][choice] += LR * (real_r - estimate_r)
} }
state = _state; state = _state;
if ((winner != 0) || (available_actions_length == 0)){ if ((winner != 0) || (available_actions_length == 0)) {
// printf("break\n"); // printf("break\n");
if (winner == 1){ if (winner == 1) {
win += 1; win += 1;
} }
break; break;
@ -228,5 +235,5 @@ void run(float *table, short *board, bool train, int times, bool plot){
} }
if (!train) if (!train)
printf("%d/%d, %f\%\n", win, 10000, (float)win/10000); printf("%d/%d, %f\%\n", win, 10000, (float)win / 10000);
} }

View File

@ -1,6 +1,6 @@
short float_argmax(float *arr, short length); short float_argmax(float* arr, short length);
short bot_choose_action(float *table, short *board, int state); short bot_choose_action(float* table, short* board, int state);
short opponent_random_action(float *table, short *board, int state); short opponent_random_action(float* table, short* board, int state);
void init_table(float *table); void init_table(float* table);
float get_estimate_reward(float *table, short *board, int state); float get_estimate_reward(float* table, short* board, int state);
void run(float *table, short *board, bool train, int times, bool plot); void run(float* table, short* board, bool train, int times, bool plot);