-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlearn.cpp
143 lines (110 loc) · 4.69 KB
/
learn.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#include <iostream>
#include <vector>
#include <Eigen/Dense>
#include "common.hpp"
#include "board.hpp"
#include "linearHeuristic.hpp"
#include "timeHeuristic.hpp"
#include "boardNodeLearning.hpp"
#define LEARN_RATE .02
#define LEARN_SLOW_RATE .97
#define LAMBDA 0.7
#define NUM_BATCHES 50
#define BATCH_SIZE 100
#define PREGAME_MOVES 6
#define SEARCH_DEPTH 5
#define INPUT_WEIGHTS "weights/time.weights"
#define OUTPUT_WEIGHTS "weights/time.weights"
// Use this file to test your minimax implementation (2-ply depth, with a
// heuristic of the difference in number of pieces).
int main(int argc, char *argv[]) {
cerr << endl;
TimeHeuristic* heuristic = new TimeHeuristic(INPUT_WEIGHTS);
heuristic->saveWeights(OUTPUT_WEIGHTS);
for (int batch = 0; batch < NUM_BATCHES; batch++) {
// Zero delta for new batch
VectorXd weightDelta = VectorXd::Zero(NUM_TOTAL_WEIGHTS);
int numDeltas = 0;
for (int game = 0; game < BATCH_SIZE; game++) {
cerr << "\rGame " << game+1 << "/" << BATCH_SIZE;
// Init Board
Board *board = new Board();
bool movingSide = BLACK;
// Randomly play first few moves to get (hopefully) unique boardstate
// I estimate there is on the order of 1000 possible states 6 moves in
for (int i = 0; i < PREGAME_MOVES; i++) {
vector<Move> moves = board->possibleMoves(movingSide);
Move chosenMove = moves[rand() % moves.size()];
board->doMove(chosenMove);
movingSide = !movingSide;
}
// Setup for game to be played
BoardNodeLearning* root = nullptr;
vector<Board*> principals[2];
principals[BLACK] = vector<Board*>();
principals[WHITE] = vector<Board*>();
// Play Game
while (!board->isDone()) {
root = new BoardNodeLearning(board, movingSide);
Move move = root->getBestChoice(SEARCH_DEPTH, heuristic);
board->doMove(move);
principals[movingSide].push_back(root->getPrincipalBoard());
delete root;
movingSide = !movingSide;
}
// Setup for principal data extraction
vector<double> diffs[2];
diffs[BLACK] = vector<double>();
diffs[WHITE] = vector<double>();
vector<VectorXd> derivs[2];
derivs[BLACK] = vector<VectorXd>();
derivs[WHITE] = vector<VectorXd>();
// Pull data from principals
for (int i = 0; i < (int)principals[BLACK].size() - 1; i++) {
Board* principal = principals[BLACK][i];
Board* nextPrincipal = principals[BLACK][i+1];
derivs[BLACK].push_back(heuristic->getGrad(principal, BLACK));
diffs[BLACK].push_back(heuristic->getScore(nextPrincipal, BLACK)
- heuristic->getScore(principal, BLACK));
}
for (int i = 0; i < (int)principals[WHITE].size() - 1; i++) {
Board* principal = principals[WHITE][i];
Board* nextPrincipal = principals[WHITE][i+1];
derivs[WHITE].push_back(heuristic->getGrad(principal, WHITE));
diffs[WHITE].push_back(heuristic->getScore(nextPrincipal, WHITE)
- heuristic->getScore(principal, WHITE));
}
// Calculate delta
for (int side = 0; side <= 1; side++) {
for (int i = 0; i < (int)derivs[side].size(); i++) {
double scalar = 0;
for (int j = i; j < (int)diffs[side].size(); j++) {
scalar += pow(LAMBDA, j-i) * diffs[side][j];
}
weightDelta += pow(LEARN_SLOW_RATE, batch) * LEARN_RATE * derivs[side][i] * scalar;
numDeltas ++;
}
}
// Cleanup
for (int i = 0; i < (int)principals[BLACK].size(); i++) {
delete principals[BLACK][i];
}
for (int i = 0; i < (int)principals[WHITE].size(); i++) {
delete principals[WHITE][i];
}
principals[BLACK].clear();
principals[WHITE].clear();
delete board;
}
weightDelta /= (double)numDeltas;
cerr << endl;
cerr << endl;
cerr << "Batch " << batch+1 << "/" << NUM_BATCHES << " complete. Deltas:" << endl;
cerr << weightDelta << endl;
cerr << endl;
heuristic->updateWeights(weightDelta);
heuristic->saveWeights(OUTPUT_WEIGHTS);
}
delete heuristic;
return 0;
}