style(format): run clang-format
This commit is contained in:
parent
7ba9db7f83
commit
7a68a06c86
37
enviroment.c
37
enviroment.c
@ -1,8 +1,8 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include "constant.h"
|
||||
#include "enviroment.h"
|
||||
#include "constant.h"
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
|
||||
short PATHS[8][3] = {
|
||||
{ 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
|
||||
@ -19,7 +19,8 @@ short PATHS[8][3] = {
|
||||
Results:
|
||||
- None, set all blocks on the chessboard to zero.
|
||||
*/
|
||||
void reset(short* board){
|
||||
void reset(short* board)
|
||||
{
|
||||
for (short i = 0; i < 9; i++)
|
||||
board[i] = 0;
|
||||
}
|
||||
@ -33,7 +34,8 @@ void reset(short* board){
|
||||
Results:
|
||||
- None. Only printing.
|
||||
*/
|
||||
void show(short *board){
|
||||
void show(short* board)
|
||||
{
|
||||
short loc;
|
||||
printf("┼───┼───┼───┼\n");
|
||||
for (short i = 0; i < 3; i++) {
|
||||
@ -64,7 +66,8 @@ void show(short *board){
|
||||
Results:
|
||||
- None. All available actions are saved into "result" and the number of actions is saved in "length"
|
||||
*/
|
||||
void get_available_actions(short *board, short *result, short *length){
|
||||
void get_available_actions(short* board, short* result, short* length)
|
||||
{
|
||||
short index = 0;
|
||||
for (int i = 0; i < 9; i++)
|
||||
if (board[i] == 0)
|
||||
@ -81,10 +84,13 @@ void get_available_actions(short *board, short *result, short *length){
|
||||
Results:
|
||||
- short winner_number(integer): winner's number, 0 for no winner now, 1 for Bot, 2 for opponent
|
||||
*/
|
||||
short get_winner(short *board){
|
||||
short get_winner(short* board)
|
||||
{
|
||||
int a, b, c;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
a = PATHS[i][0]; b = PATHS[i][1]; c = PATHS[i][2];
|
||||
a = PATHS[i][0];
|
||||
b = PATHS[i][1];
|
||||
c = PATHS[i][2];
|
||||
if ((board[a] == board[b]) && (board[b] == board[c]) && (board[a] != 0)) {
|
||||
return board[a];
|
||||
}
|
||||
@ -101,7 +107,8 @@ short get_winner(short *board){
|
||||
Results:
|
||||
- int hash (integer): chessboard's status in i-th block * pow(3, i)
|
||||
*/
|
||||
int state_hash(short *board){
|
||||
int state_hash(short* board)
|
||||
{
|
||||
int base, hash = 0;
|
||||
for (int i = 0; i < 9; i++) {
|
||||
base = pow(3, i);
|
||||
@ -110,7 +117,6 @@ int state_hash(short *board){
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Act on the chessboard.
|
||||
|
||||
@ -125,7 +131,8 @@ int state_hash(short *board){
|
||||
Results:
|
||||
- None. Save in state & reward & winner
|
||||
*/
|
||||
void act(short *board, struct action *a, int *state, float *reward, float *opponent_reward, short *winner){
|
||||
void act(short* board, struct action* a, int* state, float* reward, float* opponent_reward, short* winner)
|
||||
{
|
||||
// printf("Act( player=%d, action=%d )\n", a->player, a->loc);
|
||||
assert(board[a->loc] == 0);
|
||||
board[a->loc] = a->player;
|
||||
@ -134,12 +141,10 @@ void act(short *board, struct action *a, int *state, float *reward, float *oppon
|
||||
if (*winner == a->player) {
|
||||
*reward = 1.0;
|
||||
*opponent_reward = -1.0;
|
||||
}
|
||||
else if(*winner != 0){
|
||||
} else if (*winner != 0) {
|
||||
*reward = -1.0;
|
||||
*opponent_reward = 1.0;
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
*reward = 0;
|
||||
*opponent_reward = 0;
|
||||
}
|
||||
|
||||
11
main.c
11
main.c
@ -1,12 +1,13 @@
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include "constant.h"
|
||||
#include "enviroment.h"
|
||||
#include "q-learning.h"
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
int main(){
|
||||
int main()
|
||||
{
|
||||
short board[9] = { 0 }; // tic tac toe's chessboard
|
||||
float table[STATE_NUM][ACTION_NUM]; // q-learning table
|
||||
|
||||
|
||||
29
q-learning.c
29
q-learning.c
@ -1,7 +1,7 @@
|
||||
#include <stdio.h>
|
||||
#include <float.h>
|
||||
#include <stdbool.h>
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "constant.h"
|
||||
@ -17,7 +17,8 @@
|
||||
Results:
|
||||
- short index (integer): the index with the max value
|
||||
*/
|
||||
short float_argmax(float *arr, short length){
|
||||
short float_argmax(float* arr, short length)
|
||||
{
|
||||
float ans = -1, max = -FLT_MAX;
|
||||
for (short i = 0; i < length; i++) {
|
||||
if (arr[i] > max) {
|
||||
@ -28,7 +29,6 @@ short float_argmax(float *arr, short length){
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Choose the next action with Epsilon-Greedy.
|
||||
EPSILON means the probability to choose the best action in this state from Q-Table.
|
||||
@ -42,7 +42,8 @@ short float_argmax(float *arr, short length){
|
||||
Results:
|
||||
- short best_choice
|
||||
*/
|
||||
short bot_choose_action(float *table, short *board, int state){
|
||||
short bot_choose_action(float* table, short* board, int state)
|
||||
{
|
||||
|
||||
// get available actions for choosing
|
||||
short available_actions[9];
|
||||
@ -90,7 +91,8 @@ short bot_choose_action(float *table, short *board, int state){
|
||||
Results:
|
||||
- short choice (integer): random, -1 means no available action to choose
|
||||
*/
|
||||
short opponent_random_action(float *table, short *board, int state){
|
||||
short opponent_random_action(float* table, short* board, int state)
|
||||
{
|
||||
|
||||
// get available actions for choosing
|
||||
short available_actions[9];
|
||||
@ -118,7 +120,8 @@ short opponent_random_action(float *table, short *board, int state){
|
||||
Results:
|
||||
- None.
|
||||
*/
|
||||
void init_table(float *table){
|
||||
void init_table(float* table)
|
||||
{
|
||||
for (int i = 0; i < STATE_NUM; i++) {
|
||||
for (int j = 0; j < ACTION_NUM; j++) {
|
||||
*(table + i * ACTION_NUM + j) = 0;
|
||||
@ -137,7 +140,8 @@ void init_table(float *table){
|
||||
Results:
|
||||
- int max_reward
|
||||
*/
|
||||
float get_estimate_reward(float *table, short *board, int state){
|
||||
float get_estimate_reward(float* table, short* board, int state)
|
||||
{
|
||||
short available_actions[9];
|
||||
short available_action_length;
|
||||
get_available_actions(board, available_actions, &available_action_length);
|
||||
@ -165,7 +169,8 @@ float get_estimate_reward(float *table, short *board, int state){
|
||||
Results:
|
||||
- None
|
||||
*/
|
||||
void run(float *table, short *board, bool train, int times, bool plot){
|
||||
void run(float* table, short* board, bool train, int times, bool plot)
|
||||
{
|
||||
short available_actions[9];
|
||||
short available_actions_length;
|
||||
short winner;
|
||||
@ -187,7 +192,8 @@ void run(float *table, short *board, bool train, int times, bool plot){
|
||||
|
||||
estimate_r = *(table + state * ACTION_NUM + choice);
|
||||
act(board, &a, &_state, &r, &opponent_r, &winner);
|
||||
if (plot) show(board);
|
||||
if (plot)
|
||||
show(board);
|
||||
|
||||
// opponent random
|
||||
if (winner == 0) {
|
||||
@ -196,7 +202,8 @@ void run(float *table, short *board, bool train, int times, bool plot){
|
||||
a.loc = opponent_choice;
|
||||
a.player = OPPONENT_SYMBOL;
|
||||
act(board, &a, &_state, &opponent_r, &r, &winner);
|
||||
if (plot) show(board);
|
||||
if (plot)
|
||||
show(board);
|
||||
}
|
||||
}
|
||||
get_available_actions(board, available_actions, &available_actions_length);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user