Merge pull request #1 from eeeXun/clang-format
GitHub-Action auto format with clang-format
This commit is contained in:
commit
5cf2ef7936
225
.clang-format
Normal file
225
.clang-format
Normal file
@ -0,0 +1,225 @@
|
||||
---
|
||||
Language: Cpp
|
||||
# BasedOnStyle: WebKit
|
||||
AccessModifierOffset: -4
|
||||
AlignAfterOpenBracket: DontAlign
|
||||
AlignArrayOfStructures: None
|
||||
AlignConsecutiveAssignments:
|
||||
Enabled: false
|
||||
AcrossEmptyLines: false
|
||||
AcrossComments: false
|
||||
AlignCompound: false
|
||||
PadOperators: true
|
||||
AlignConsecutiveBitFields:
|
||||
Enabled: false
|
||||
AcrossEmptyLines: false
|
||||
AcrossComments: false
|
||||
AlignCompound: false
|
||||
PadOperators: false
|
||||
AlignConsecutiveDeclarations:
|
||||
Enabled: false
|
||||
AcrossEmptyLines: false
|
||||
AcrossComments: false
|
||||
AlignCompound: false
|
||||
PadOperators: false
|
||||
AlignConsecutiveMacros:
|
||||
Enabled: false
|
||||
AcrossEmptyLines: false
|
||||
AcrossComments: false
|
||||
AlignCompound: false
|
||||
PadOperators: false
|
||||
AlignEscapedNewlines: Right
|
||||
AlignOperands: DontAlign
|
||||
AlignTrailingComments:
|
||||
Kind: Never
|
||||
OverEmptyLines: 0
|
||||
AllowAllArgumentsOnNextLine: true
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortBlocksOnASingleLine: Empty
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortEnumsOnASingleLine: true
|
||||
AllowShortFunctionsOnASingleLine: All
|
||||
AllowShortIfStatementsOnASingleLine: Never
|
||||
AllowShortLambdasOnASingleLine: All
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakAfterDefinitionReturnType: None
|
||||
AlwaysBreakAfterReturnType: None
|
||||
AlwaysBreakBeforeMultilineStrings: false
|
||||
AlwaysBreakTemplateDeclarations: MultiLine
|
||||
AttributeMacros:
|
||||
- __capability
|
||||
BinPackArguments: true
|
||||
BinPackParameters: true
|
||||
BitFieldColonSpacing: Both
|
||||
BraceWrapping:
|
||||
AfterCaseLabel: false
|
||||
AfterClass: false
|
||||
AfterControlStatement: Never
|
||||
AfterEnum: false
|
||||
AfterExternBlock: false
|
||||
AfterFunction: true
|
||||
AfterNamespace: false
|
||||
AfterObjCDeclaration: false
|
||||
AfterStruct: false
|
||||
AfterUnion: false
|
||||
BeforeCatch: false
|
||||
BeforeElse: false
|
||||
BeforeLambdaBody: false
|
||||
BeforeWhile: false
|
||||
IndentBraces: false
|
||||
SplitEmptyFunction: true
|
||||
SplitEmptyRecord: true
|
||||
SplitEmptyNamespace: true
|
||||
BreakAfterAttributes: Never
|
||||
BreakAfterJavaFieldAnnotations: false
|
||||
BreakArrays: true
|
||||
BreakBeforeBinaryOperators: All
|
||||
BreakBeforeConceptDeclarations: Always
|
||||
BreakBeforeBraces: WebKit
|
||||
BreakBeforeInlineASMColon: OnlyMultiline
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializers: BeforeComma
|
||||
BreakInheritanceList: BeforeColon
|
||||
BreakStringLiterals: true
|
||||
ColumnLimit: 0
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
CompactNamespaces: false
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
ContinuationIndentWidth: 4
|
||||
Cpp11BracedListStyle: false
|
||||
DerivePointerAlignment: false
|
||||
DisableFormat: false
|
||||
EmptyLineAfterAccessModifier: Never
|
||||
EmptyLineBeforeAccessModifier: LogicalBlock
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
FixNamespaceComments: false
|
||||
ForEachMacros:
|
||||
- foreach
|
||||
- Q_FOREACH
|
||||
- BOOST_FOREACH
|
||||
IfMacros:
|
||||
- KJ_IF_MAYBE
|
||||
IncludeBlocks: Preserve
|
||||
IncludeCategories:
|
||||
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
|
||||
Priority: 2
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
|
||||
Priority: 3
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '.*'
|
||||
Priority: 1
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
IncludeIsMainRegex: '(Test)?$'
|
||||
IncludeIsMainSourceRegex: ''
|
||||
IndentAccessModifiers: false
|
||||
IndentCaseBlocks: false
|
||||
IndentCaseLabels: false
|
||||
IndentExternBlock: AfterExternBlock
|
||||
IndentGotoLabels: true
|
||||
IndentPPDirectives: None
|
||||
IndentRequiresClause: true
|
||||
IndentWidth: 4
|
||||
IndentWrappedFunctionNames: false
|
||||
InsertBraces: false
|
||||
InsertNewlineAtEOF: false
|
||||
InsertTrailingCommas: None
|
||||
IntegerLiteralSeparator:
|
||||
Binary: 0
|
||||
BinaryMinDigits: 0
|
||||
Decimal: 0
|
||||
DecimalMinDigits: 0
|
||||
Hex: 0
|
||||
HexMinDigits: 0
|
||||
JavaScriptQuotes: Leave
|
||||
JavaScriptWrapImports: true
|
||||
KeepEmptyLinesAtTheStartOfBlocks: true
|
||||
LambdaBodyIndentation: Signature
|
||||
LineEnding: DeriveLF
|
||||
MacroBlockBegin: ''
|
||||
MacroBlockEnd: ''
|
||||
MaxEmptyLinesToKeep: 1
|
||||
NamespaceIndentation: Inner
|
||||
ObjCBinPackProtocolList: Auto
|
||||
ObjCBlockIndentWidth: 4
|
||||
ObjCBreakBeforeNestedBlockParam: true
|
||||
ObjCSpaceAfterProperty: true
|
||||
ObjCSpaceBeforeProtocolList: true
|
||||
PackConstructorInitializers: BinPack
|
||||
PenaltyBreakAssignment: 2
|
||||
PenaltyBreakBeforeFirstCallParameter: 19
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyBreakOpenParenthesis: 0
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakTemplateDeclaration: 10
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyIndentedWhitespace: 0
|
||||
PenaltyReturnTypeOnItsOwnLine: 60
|
||||
PointerAlignment: Left
|
||||
PPIndentWidth: -1
|
||||
QualifierAlignment: Leave
|
||||
ReferenceAlignment: Pointer
|
||||
ReflowComments: true
|
||||
RemoveBracesLLVM: false
|
||||
RemoveSemicolon: false
|
||||
RequiresClausePosition: OwnLine
|
||||
RequiresExpressionIndentation: OuterScope
|
||||
SeparateDefinitionBlocks: Leave
|
||||
ShortNamespaceLines: 1
|
||||
SortIncludes: CaseSensitive
|
||||
SortJavaStaticImport: Before
|
||||
SortUsingDeclarations: LexicographicNumeric
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceAfterLogicalNot: false
|
||||
SpaceAfterTemplateKeyword: true
|
||||
SpaceAroundPointerQualifiers: Default
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeCaseColon: false
|
||||
SpaceBeforeCpp11BracedList: true
|
||||
SpaceBeforeCtorInitializerColon: true
|
||||
SpaceBeforeInheritanceColon: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpaceBeforeParensOptions:
|
||||
AfterControlStatements: true
|
||||
AfterForeachMacros: true
|
||||
AfterFunctionDefinitionName: false
|
||||
AfterFunctionDeclarationName: false
|
||||
AfterIfMacros: true
|
||||
AfterOverloadedOperator: false
|
||||
AfterRequiresInClause: false
|
||||
AfterRequiresInExpression: false
|
||||
BeforeNonEmptyParentheses: false
|
||||
SpaceBeforeRangeBasedForLoopColon: true
|
||||
SpaceBeforeSquareBrackets: false
|
||||
SpaceInEmptyBlock: true
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 1
|
||||
SpacesInAngles: Never
|
||||
SpacesInConditionalStatement: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInLineCommentPrefix:
|
||||
Minimum: 1
|
||||
Maximum: -1
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
Standard: Latest
|
||||
StatementAttributeLikeMacros:
|
||||
- Q_EMIT
|
||||
StatementMacros:
|
||||
- Q_UNUSED
|
||||
- QT_REQUIRE_VERSION
|
||||
TabWidth: 8
|
||||
UseTab: Never
|
||||
WhitespaceSensitiveMacros:
|
||||
- BOOST_PP_STRINGIZE
|
||||
- CF_SWIFT_NAME
|
||||
- NS_SWIFT_NAME
|
||||
- PP_STRINGIZE
|
||||
- STRINGIZE
|
||||
...
|
||||
|
||||
26
.github/workflows/format.yml
vendored
Normal file
26
.github/workflows/format.yml
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
name: format
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- "**.c"
|
||||
- "**.h"
|
||||
|
||||
jobs:
|
||||
format:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v4
|
||||
- name: Install clang-format
|
||||
run: pip install clang-format
|
||||
- name: Format with clang-format
|
||||
run: clang-format -i *.c *.h
|
||||
- name: Commit changes
|
||||
uses: stefanzweifel/git-auto-commit-action@v4
|
||||
with:
|
||||
commit_message: "style(format): run clang-format"
|
||||
@ -1,9 +1,9 @@
|
||||
#define BOT_SYMBOL 1
|
||||
#define OPPONENT_SYMBOL 2
|
||||
|
||||
#define EPSILON 0.9 // Epsilon-greedy
|
||||
#define LR 0.1 // learning rate
|
||||
#define LAMBDA 0.9 // discount factor
|
||||
#define EPSILON 0.9 // Epsilon-greedy
|
||||
#define LR 0.1 // learning rate
|
||||
#define LAMBDA 0.9 // discount factor
|
||||
|
||||
#define STATE_NUM 19683
|
||||
#define ACTION_NUM 9
|
||||
|
||||
205
enviroment.c
205
enviroment.c
@ -1,146 +1,151 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include "constant.h"
|
||||
#include "enviroment.h"
|
||||
#include "constant.h"
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
|
||||
short PATHS[8][3] = {
|
||||
{0, 1, 2}, {3, 4, 5}, {6, 7, 8},
|
||||
{0, 3, 6}, {1, 4, 7}, {2, 5, 8},
|
||||
{0, 4, 8}, {2, 4, 6}
|
||||
{ 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
|
||||
{ 0, 3, 6 }, { 1, 4, 7 }, { 2, 5, 8 },
|
||||
{ 0, 4, 8 }, { 2, 4, 6 }
|
||||
};
|
||||
|
||||
/*
|
||||
Reset the game, clear the chessboard.
|
||||
Reset the game, clear the chessboard.
|
||||
|
||||
Args:
|
||||
- short *board (array's address): chessboard's status
|
||||
Args:
|
||||
- short *board (array's address): chessboard's status
|
||||
|
||||
Results:
|
||||
- None, set all blocks on the chessboard to zero.
|
||||
Results:
|
||||
- None, set all blocks on the chessboard to zero.
|
||||
*/
|
||||
void reset(short* board){
|
||||
for (short i=0; i<9; i++)
|
||||
board[i] = 0;
|
||||
void reset(short* board)
|
||||
{
|
||||
for (short i = 0; i < 9; i++)
|
||||
board[i] = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
Print the chessboard on the console.
|
||||
Print the chessboard on the console.
|
||||
|
||||
Args:
|
||||
- short *board (array's address): chessboard's status
|
||||
Args:
|
||||
- short *board (array's address): chessboard's status
|
||||
|
||||
Results:
|
||||
- None. Only printing.
|
||||
Results:
|
||||
- None. Only printing.
|
||||
*/
|
||||
void show(short *board){
|
||||
short loc;
|
||||
printf("┼───┼───┼───┼\n");
|
||||
for (short i=0; i<3; i++){
|
||||
printf("│ ");
|
||||
for (short j=0; j<3; j++){
|
||||
loc = 3*i+j;
|
||||
if (board[loc] == 0)
|
||||
printf(" │ ");
|
||||
else if (board[loc] == BOT_SYMBOL)
|
||||
printf("○ │ ");
|
||||
else
|
||||
printf("✕ │ ");
|
||||
}
|
||||
printf("\n");
|
||||
printf("┼───┼───┼───┼\n");
|
||||
}
|
||||
printf("\n\n");
|
||||
void show(short* board)
|
||||
{
|
||||
short loc;
|
||||
printf("┼───┼───┼───┼\n");
|
||||
for (short i = 0; i < 3; i++) {
|
||||
printf("│ ");
|
||||
for (short j = 0; j < 3; j++) {
|
||||
loc = 3 * i + j;
|
||||
if (board[loc] == 0)
|
||||
printf(" │ ");
|
||||
else if (board[loc] == BOT_SYMBOL)
|
||||
printf("○ │ ");
|
||||
else
|
||||
printf("✕ │ ");
|
||||
}
|
||||
printf("\n");
|
||||
printf("┼───┼───┼───┼\n");
|
||||
}
|
||||
printf("\n\n");
|
||||
}
|
||||
|
||||
/*
|
||||
Save all available actions into the "result" array.
|
||||
Save all available actions into the "result" array.
|
||||
|
||||
Args:
|
||||
- short *board (array's address): chessboard's status
|
||||
- short *result (array's address): To save all available actions.
|
||||
- short *length (integer's pointer): To save the number of available actions.
|
||||
Args:
|
||||
- short *board (array's address): chessboard's status
|
||||
- short *result (array's address): To save all available actions.
|
||||
- short *length (integer's pointer): To save the number of available actions.
|
||||
|
||||
Results:
|
||||
- None. All available actions are saved into "result" and the number of actions is saved in "length"
|
||||
Results:
|
||||
- None. All available actions are saved into "result" and the number of actions is saved in "length"
|
||||
*/
|
||||
void get_available_actions(short *board, short *result, short *length){
|
||||
short index = 0;
|
||||
for (int i=0; i<9; i++)
|
||||
if (board[i] == 0)
|
||||
result[index++] = i;
|
||||
*length = index;
|
||||
void get_available_actions(short* board, short* result, short* length)
|
||||
{
|
||||
short index = 0;
|
||||
for (int i = 0; i < 9; i++)
|
||||
if (board[i] == 0)
|
||||
result[index++] = i;
|
||||
*length = index;
|
||||
}
|
||||
|
||||
/*
|
||||
Return winner's number;
|
||||
Return winner's number;
|
||||
|
||||
Args:
|
||||
- short *board (array's address): chessboard's status
|
||||
Args:
|
||||
- short *board (array's address): chessboard's status
|
||||
|
||||
Results:
|
||||
- short winner_number(integer): winner's number, 0 for no winner now, 1 for Bot, 2 for opponent
|
||||
Results:
|
||||
- short winner_number(integer): winner's number, 0 for no winner now, 1 for Bot, 2 for opponent
|
||||
*/
|
||||
short get_winner(short *board){
|
||||
int a, b, c;
|
||||
for (int i=0; i<8; i++){
|
||||
a = PATHS[i][0]; b = PATHS[i][1]; c = PATHS[i][2];
|
||||
if ((board[a] == board[b]) && (board[b] == board[c]) && (board[a] != 0)){
|
||||
return board[a];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
short get_winner(short* board)
|
||||
{
|
||||
int a, b, c;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
a = PATHS[i][0];
|
||||
b = PATHS[i][1];
|
||||
c = PATHS[i][2];
|
||||
if ((board[a] == board[b]) && (board[b] == board[c]) && (board[a] != 0)) {
|
||||
return board[a];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
Hash chesstable's status into hash.
|
||||
Hash chesstable's status into hash.
|
||||
|
||||
Args:
|
||||
- short *board (array's address): chessboard's status
|
||||
Args:
|
||||
- short *board (array's address): chessboard's status
|
||||
|
||||
Results:
|
||||
- int hash (integer): chessboard's status in i-th block * pow(3, i)
|
||||
Results:
|
||||
- int hash (integer): chessboard's status in i-th block * pow(3, i)
|
||||
*/
|
||||
int state_hash(short *board){
|
||||
int base, hash = 0;
|
||||
for (int i=0; i<9; i++){
|
||||
base = pow(3, i);
|
||||
hash += (base * board[i]);
|
||||
}
|
||||
return hash;
|
||||
int state_hash(short* board)
|
||||
{
|
||||
int base, hash = 0;
|
||||
for (int i = 0; i < 9; i++) {
|
||||
base = pow(3, i);
|
||||
hash += (base * board[i]);
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Act on the chessboard.
|
||||
Act on the chessboard.
|
||||
|
||||
Args:
|
||||
- short *board (array's address): chessboards' status
|
||||
- struct action *a (a action's pointer): include player & choose loc
|
||||
- int *state (pointer): for return. To save the chessboard's state hash which after doing this action
|
||||
- float *reward (pointer): for return. To save the number of rewards which the player gets after doing this action.
|
||||
- float *opponent_reward (pointer): for return. To save the number of rewards which the opponents gets after the player doing this action.
|
||||
- short *winner (pointer): for return. To save the winner in this action. If haven't finish, it will be zero.
|
||||
Args:
|
||||
- short *board (array's address): chessboards' status
|
||||
- struct action *a (a action's pointer): include player & choose loc
|
||||
- int *state (pointer): for return. To save the chessboard's state hash which after doing this action
|
||||
- float *reward (pointer): for return. To save the number of rewards which the player gets after doing this action.
|
||||
- float *opponent_reward (pointer): for return. To save the number of rewards which the opponents gets after the player doing this action.
|
||||
- short *winner (pointer): for return. To save the winner in this action. If haven't finish, it will be zero.
|
||||
|
||||
Results:
|
||||
- None. Save in state & reward & winner
|
||||
Results:
|
||||
- None. Save in state & reward & winner
|
||||
*/
|
||||
void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner){
|
||||
void act(short* board, struct action* a, int* state, float* reward, float* opponent_reward, short* winner)
|
||||
{
|
||||
// printf("Act( player=%d, action=%d )\n", a->player, a->loc);
|
||||
assert(board[a->loc] == 0);
|
||||
board[a->loc] = a->player;
|
||||
*winner = get_winner(board);
|
||||
*state = state_hash(board);
|
||||
if (*winner == a->player){
|
||||
*reward = 1.0;
|
||||
board[a->loc] = a->player;
|
||||
*winner = get_winner(board);
|
||||
*state = state_hash(board);
|
||||
if (*winner == a->player) {
|
||||
*reward = 1.0;
|
||||
*opponent_reward = -1.0;
|
||||
}
|
||||
else if(*winner != 0){
|
||||
*reward = -1.0;
|
||||
} else if (*winner != 0) {
|
||||
*reward = -1.0;
|
||||
*opponent_reward = 1.0;
|
||||
}
|
||||
else{
|
||||
*reward = 0;
|
||||
} else {
|
||||
*reward = 0;
|
||||
*opponent_reward = 0;
|
||||
}
|
||||
}
|
||||
|
||||
16
enviroment.h
16
enviroment.h
@ -1,11 +1,11 @@
|
||||
struct action{
|
||||
short player;
|
||||
short loc;
|
||||
struct action {
|
||||
short player;
|
||||
short loc;
|
||||
};
|
||||
|
||||
void reset(short* board);
|
||||
void show(short *board);
|
||||
void get_available_actions(short *board, short *result, short *length);
|
||||
short get_winner(short *board);
|
||||
int state_hash(short *board);
|
||||
void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner);
|
||||
void show(short* board);
|
||||
void get_available_actions(short* board, short* result, short* length);
|
||||
short get_winner(short* board);
|
||||
int state_hash(short* board);
|
||||
void act(short* board, struct action* a, int* state, float* reward, float* opponent_reward, short* winner);
|
||||
|
||||
17
main.c
17
main.c
@ -1,16 +1,17 @@
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include "constant.h"
|
||||
#include "enviroment.h"
|
||||
#include "q-learning.h"
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
int main(){
|
||||
short board[9]= {0}; // tic tac toe's chessboard
|
||||
float table[STATE_NUM][ACTION_NUM]; // q-learning table
|
||||
int main()
|
||||
{
|
||||
short board[9] = { 0 }; // tic tac toe's chessboard
|
||||
float table[STATE_NUM][ACTION_NUM]; // q-learning table
|
||||
|
||||
srand(time(NULL));
|
||||
srand(time(NULL));
|
||||
init_table(&table[0][0]);
|
||||
|
||||
run(&table[0][0], board, false, 10000, false);
|
||||
|
||||
185
q-learning.c
185
q-learning.c
@ -1,80 +1,81 @@
|
||||
#include <stdio.h>
|
||||
#include <float.h>
|
||||
#include <stdbool.h>
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "constant.h"
|
||||
#include "enviroment.h"
|
||||
|
||||
/*
|
||||
Return the index with the max value in the array
|
||||
Return the index with the max value in the array
|
||||
|
||||
Args:
|
||||
- float *arr (array's address)
|
||||
- short length (integer): array's length
|
||||
Args:
|
||||
- float *arr (array's address)
|
||||
- short length (integer): array's length
|
||||
|
||||
Results:
|
||||
- short index (integer): the index with the max value
|
||||
Results:
|
||||
- short index (integer): the index with the max value
|
||||
*/
|
||||
short float_argmax(float *arr, short length){
|
||||
float ans = -1, max = -FLT_MAX;
|
||||
for (short i=0; i<length; i++){
|
||||
if (arr[i] > max){
|
||||
max = arr[i];
|
||||
ans = i;
|
||||
}
|
||||
}
|
||||
return ans;
|
||||
short float_argmax(float* arr, short length)
|
||||
{
|
||||
float ans = -1, max = -FLT_MAX;
|
||||
for (short i = 0; i < length; i++) {
|
||||
if (arr[i] > max) {
|
||||
max = arr[i];
|
||||
ans = i;
|
||||
}
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Choose the next action with Epsilon-Greedy.
|
||||
EPSILON means the probability to choose the best action in this state from Q-Table.
|
||||
(1-EPSILON) to random an action to do.
|
||||
Choose the next action with Epsilon-Greedy.
|
||||
EPSILON means the probability to choose the best action in this state from Q-Table.
|
||||
(1-EPSILON) to random an action to do.
|
||||
|
||||
Args:
|
||||
- short *table (array's address): state table for Q-Learning
|
||||
- short *board (array's address): chessboards' status
|
||||
- int state (integer, state hash): hash for board's status
|
||||
Args:
|
||||
- short *table (array's address): state table for Q-Learning
|
||||
- short *board (array's address): chessboards' status
|
||||
- int state (integer, state hash): hash for board's status
|
||||
|
||||
Results:
|
||||
- short best_choice
|
||||
Results:
|
||||
- short best_choice
|
||||
*/
|
||||
short bot_choose_action(float *table, short *board, int state){
|
||||
short bot_choose_action(float* table, short* board, int state)
|
||||
{
|
||||
|
||||
// get available actions for choosing
|
||||
short available_actions[9];
|
||||
short available_actions_length;
|
||||
get_available_actions(board, available_actions, &available_actions_length);
|
||||
// get available actions for choosing
|
||||
short available_actions[9];
|
||||
short available_actions_length;
|
||||
get_available_actions(board, available_actions, &available_actions_length);
|
||||
|
||||
// use argmax() to find the best choise,
|
||||
// first we should build an available_actions_state array for saving the state for all available choise.
|
||||
float available_actions_state[9];
|
||||
short available_actions_state_index[9];
|
||||
short available_actions_state_length, index = 0;
|
||||
short temp_index, best_choice;
|
||||
bool zeros = true;
|
||||
for (short i=0; i<available_actions_length; i++){
|
||||
temp_index = available_actions[i];
|
||||
available_actions_state[index] = *(table + state * ACTION_NUM + temp_index);
|
||||
if (available_actions_state[index] != 0.0){
|
||||
zeros = false;
|
||||
}
|
||||
available_actions_state_index[index] = temp_index;
|
||||
index++;
|
||||
}
|
||||
best_choice = float_argmax(available_actions_state, index);
|
||||
best_choice = available_actions_state_index[best_choice];
|
||||
// use argmax() to find the best choise,
|
||||
// first we should build an available_actions_state array for saving the state for all available choise.
|
||||
float available_actions_state[9];
|
||||
short available_actions_state_index[9];
|
||||
short available_actions_state_length, index = 0;
|
||||
short temp_index, best_choice;
|
||||
bool zeros = true;
|
||||
for (short i = 0; i < available_actions_length; i++) {
|
||||
temp_index = available_actions[i];
|
||||
available_actions_state[index] = *(table + state * ACTION_NUM + temp_index);
|
||||
if (available_actions_state[index] != 0.0) {
|
||||
zeros = false;
|
||||
}
|
||||
available_actions_state_index[index] = temp_index;
|
||||
index++;
|
||||
}
|
||||
best_choice = float_argmax(available_actions_state, index);
|
||||
best_choice = available_actions_state_index[best_choice];
|
||||
|
||||
// Epsilon-Greedy
|
||||
// If random number > EPSILON -> random a action
|
||||
// If random number < EPSILON -> choose the best action in this state.
|
||||
double random_num = (double) rand() / (RAND_MAX + 1.0);
|
||||
if ((random_num > EPSILON) || zeros){
|
||||
best_choice = available_actions_state_index[ rand() % index ];
|
||||
}
|
||||
// Epsilon-Greedy
|
||||
// If random number > EPSILON -> random a action
|
||||
// If random number < EPSILON -> choose the best action in this state.
|
||||
double random_num = (double)rand() / (RAND_MAX + 1.0);
|
||||
if ((random_num > EPSILON) || zeros) {
|
||||
best_choice = available_actions_state_index[rand() % index];
|
||||
}
|
||||
|
||||
return best_choice;
|
||||
}
|
||||
@ -83,27 +84,28 @@ short bot_choose_action(float *table, short *board, int state){
|
||||
Opponent random choose a action to do.
|
||||
|
||||
Args:
|
||||
- short *table (array's address): state table for Q-Learning
|
||||
- short *board (array's address): chessboards' status
|
||||
- int state (integer, state hash): hash for board's status
|
||||
- short *table (array's address): state table for Q-Learning
|
||||
- short *board (array's address): chessboards' status
|
||||
- int state (integer, state hash): hash for board's status
|
||||
|
||||
Results:
|
||||
- short choice (integer): random, -1 means no available action to choose
|
||||
Results:
|
||||
- short choice (integer): random, -1 means no available action to choose
|
||||
*/
|
||||
short opponent_random_action(float *table, short *board, int state){
|
||||
short opponent_random_action(float* table, short* board, int state)
|
||||
{
|
||||
|
||||
// get available actions for choosing
|
||||
short available_actions[9];
|
||||
short available_action_length;
|
||||
get_available_actions(board, available_actions, &available_action_length);
|
||||
|
||||
if (available_action_length == 0){
|
||||
if (available_action_length == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// random
|
||||
short choice;
|
||||
choice = (short)( rand() % available_action_length );
|
||||
choice = (short)(rand() % available_action_length);
|
||||
choice = available_actions[choice];
|
||||
|
||||
return choice;
|
||||
@ -118,9 +120,10 @@ short opponent_random_action(float *table, short *board, int state){
|
||||
Results:
|
||||
- None.
|
||||
*/
|
||||
void init_table(float *table){
|
||||
for (int i=0; i<STATE_NUM; i++){
|
||||
for (int j=0; j<ACTION_NUM; j++){
|
||||
void init_table(float* table)
|
||||
{
|
||||
for (int i = 0; i < STATE_NUM; i++) {
|
||||
for (int j = 0; j < ACTION_NUM; j++) {
|
||||
*(table + i * ACTION_NUM + j) = 0;
|
||||
}
|
||||
}
|
||||
@ -137,14 +140,15 @@ void init_table(float *table){
|
||||
Results:
|
||||
- int max_reward
|
||||
*/
|
||||
float get_estimate_reward(float *table, short *board, int state){
|
||||
float get_estimate_reward(float* table, short* board, int state)
|
||||
{
|
||||
short available_actions[9];
|
||||
short available_action_length;
|
||||
get_available_actions(board, available_actions, &available_action_length);
|
||||
|
||||
float available_actions_state[9];
|
||||
for (short i=0; i<available_action_length; i++){
|
||||
available_actions_state[i] = *(table + state * ACTION_NUM + available_actions[i]); // table[state][available_actions[i]]
|
||||
float available_actions_state[9];
|
||||
for (short i = 0; i < available_action_length; i++) {
|
||||
available_actions_state[i] = *(table + state * ACTION_NUM + available_actions[i]); // table[state][available_actions[i]]
|
||||
}
|
||||
|
||||
short ans_index;
|
||||
@ -165,10 +169,11 @@ float get_estimate_reward(float *table, short *board, int state){
|
||||
Results:
|
||||
- None
|
||||
*/
|
||||
void run(float *table, short *board, bool train, int times, bool plot){
|
||||
short available_actions[9];
|
||||
short available_actions_length;
|
||||
short winner;
|
||||
void run(float* table, short* board, bool train, int times, bool plot)
|
||||
{
|
||||
short available_actions[9];
|
||||
short available_actions_length;
|
||||
short winner;
|
||||
short choice, opponent_choice;
|
||||
int state, _state;
|
||||
float estimate_r, estimate_r_, real_r, r, opponent_r;
|
||||
@ -176,10 +181,10 @@ void run(float *table, short *board, bool train, int times, bool plot){
|
||||
|
||||
int win = 0;
|
||||
|
||||
for (int episode=0; episode<times; episode++){
|
||||
for (int episode = 0; episode < times; episode++) {
|
||||
reset(board);
|
||||
state = state_hash(board);
|
||||
while (1){
|
||||
while (1) {
|
||||
// bot choose the action
|
||||
choice = bot_choose_action(table, board, state);
|
||||
a.loc = choice;
|
||||
@ -187,22 +192,24 @@ void run(float *table, short *board, bool train, int times, bool plot){
|
||||
|
||||
estimate_r = *(table + state * ACTION_NUM + choice);
|
||||
act(board, &a, &_state, &r, &opponent_r, &winner);
|
||||
if (plot) show(board);
|
||||
if (plot)
|
||||
show(board);
|
||||
|
||||
// opponent random
|
||||
if (winner == 0){
|
||||
if (winner == 0) {
|
||||
opponent_choice = opponent_random_action(table, board, state_hash(board));
|
||||
if (opponent_choice != -1){
|
||||
if (opponent_choice != -1) {
|
||||
a.loc = opponent_choice;
|
||||
a.player = OPPONENT_SYMBOL;
|
||||
act(board, &a, &_state, &opponent_r, &r, &winner);
|
||||
if (plot) show(board);
|
||||
if (plot)
|
||||
show(board);
|
||||
}
|
||||
}
|
||||
get_available_actions(board, available_actions, &available_actions_length);
|
||||
|
||||
if ((winner != 0) || (available_actions_length == 0)){
|
||||
if (plot){
|
||||
if ((winner != 0) || (available_actions_length == 0)) {
|
||||
if (plot) {
|
||||
printf("winner: %d, reward: %f, oppo reward: %f\n", winner, r, opponent_r);
|
||||
printf("==========================================================\n");
|
||||
}
|
||||
@ -211,15 +218,15 @@ void run(float *table, short *board, bool train, int times, bool plot){
|
||||
estimate_r_ = get_estimate_reward(table, board, _state);
|
||||
real_r = r + LAMBDA * estimate_r_;
|
||||
}
|
||||
if (train){
|
||||
if (train) {
|
||||
// printf("update");
|
||||
*(table + state * ACTION_NUM + choice) += ( LR * (real_r - estimate_r) ); // table[state][choice] += LR * (real_r - estimate_r)
|
||||
*(table + state * ACTION_NUM + choice) += (LR * (real_r - estimate_r)); // table[state][choice] += LR * (real_r - estimate_r)
|
||||
}
|
||||
state = _state;
|
||||
|
||||
if ((winner != 0) || (available_actions_length == 0)){
|
||||
if ((winner != 0) || (available_actions_length == 0)) {
|
||||
// printf("break\n");
|
||||
if (winner == 1){
|
||||
if (winner == 1) {
|
||||
win += 1;
|
||||
}
|
||||
break;
|
||||
@ -228,5 +235,5 @@ void run(float *table, short *board, bool train, int times, bool plot){
|
||||
}
|
||||
|
||||
if (!train)
|
||||
printf("%d/%d, %f\%\n", win, 10000, (float)win/10000);
|
||||
printf("%d/%d, %f\%\n", win, 10000, (float)win / 10000);
|
||||
}
|
||||
|
||||
12
q-learning.h
12
q-learning.h
@ -1,6 +1,6 @@
|
||||
short float_argmax(float *arr, short length);
|
||||
short bot_choose_action(float *table, short *board, int state);
|
||||
short opponent_random_action(float *table, short *board, int state);
|
||||
void init_table(float *table);
|
||||
float get_estimate_reward(float *table, short *board, int state);
|
||||
void run(float *table, short *board, bool train, int times, bool plot);
|
||||
short float_argmax(float* arr, short length);
|
||||
short bot_choose_action(float* table, short* board, int state);
|
||||
short opponent_random_action(float* table, short* board, int state);
|
||||
void init_table(float* table);
|
||||
float get_estimate_reward(float* table, short* board, int state);
|
||||
void run(float* table, short* board, bool train, int times, bool plot);
|
||||
Loading…
Reference in New Issue
Block a user