Q-learning-in-C/constant.h
2023-05-31 11:31:15 +08:00

12 lines
248 B
C

#define BOT_SYMBOL 1
#define OPPONENT_SYMBOL 2
#define EPSILON 0.9 // Epsilon-greedy
#define LR 0.1 // learning rate
#define LAMBDA 0.9 // discount factor
#define STATE_NUM 19683
#define ACTION_NUM 9
#define EPISODE_NUM 100000
#define FIRST true