-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate.py
237 lines (196 loc) · 9.25 KB
/
generate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
"""
Chessort - Generate data for the game
=============================================
This script analyzes chess positions from FEN strings using Stockfish and retrieves the top N moves.
It filters out positions that do not meet a minimum number of distinct move evaluations and saves the
results to a CSV file.
Requirements:
- Python 3.x
- python-chess library
- Stockfish engine
Ensure the Stockfish engine path is correctly set in the 'STOCKFISH_PATH' environment variable.
Configuration Parameters:
- STOCKFISH_PATH: Path to the Stockfish executable.
- LICHESS_PUZZLE_FILE: Path to the input Lichess puzzle CSV file (relative to current directory).
- BASE_OUTPUT_FILE_PATH: Base path to the output CSV file (relative to current directory).
- MIN_MOVES_REQUIRED: Minimum number of moves required to process this result (after first opponent move is made).
- MIN_POPULARITY_REQUIRED: Minimum popularity score required.
- MIN_NUMBER_PLAYS_REQUIRED: Minimum number of plays required.
- MAX_RATING_DEVIATION: Maximum rating deviation allowed.
- LICHESS_PUZZLE_FILE_OFFSET: Line offset to start processing from in the Lichess puzzle file.
- LICHESS_PUZZLE_FILE_LIMIT: Number of lines to process from the Lichess puzzle file.
- MULTI_PV: Number of top moves to evaluate for each FEN.
- EVALUATION_DEPTH: Depth to which Stockfish engine evaluates the positions.
- MAX_WORKERS: Number of worker processes to use.
- STOCKFISH_THREADS_PER_ENGINE: Number of threads per Stockfish engine instance.
- HASH_SIZE: Size of the hash table for the Stockfish engine in MB.
Output CSV format:
- LichessPuzzleId: ID of the puzzle.
- FEN: FEN string of the position.
- Rating: Rating of the puzzle.
- EvaluatedMoves: Comma-separated list of moves in UCI format followed by their evaluation.
Metadata JSON format:
- stockfishVersion: Version of the Stockfish engine used.
- offset: Line offset in the input file from which processing started.
- limit: Number of lines processed.
- evaluationDepth: Depth to which Stockfish evaluated the positions.
- multipv: Number of top moves evaluated.
- minimumMovesRequired: Minimum number of moves required.
- minPopularityRequired: Minimum popularity score required.
- minNumberPlaysRequired: Minimum number of plays required.
- maxRatingDeviation: Maximum rating deviation allowed.
- inputLichessFileSha256: SHA-256 hash of the input Lichess puzzle file.
- outputFileSha256: SHA-256 hash of the generated output CSV file.
"""
import os
import csv
import chess
import chess.engine
from concurrent.futures import ProcessPoolExecutor, as_completed
import hashlib
import json
# Configuration
STOCKFISH_PATH = os.getenv('STOCKFISH_PATH', '/usr/local/bin/stockfish')
LICHESS_PUZZLE_FILE = os.path.join(os.getcwd(), 'lichess-data', 'lichess_db_puzzle.csv')
BASE_OUTPUT_FILE_PATH = os.path.join(os.getcwd(), 'out', 'chessort')
MIN_MOVES_REQUIRED = 4
MIN_POPULARITY_REQUIRED = 90
MIN_NUMBER_PLAYS_REQUIRED = 100
MAX_RATING_DEVIATION = 100
LICHESS_PUZZLE_FILE_OFFSET = 1700000
LICHESS_PUZZLE_FILE_LIMIT = 1000
MULTI_PV = 50
EVALUATION_DEPTH = 25
MAX_WORKERS = 24
STOCKFISH_THREADS_PER_ENGINE = 1
HASH_SIZE = 1875
def get_stockfish_version():
with chess.engine.SimpleEngine.popen_uci(STOCKFISH_PATH) as engine:
version_info = engine.protocol.id.get("name", "Unknown Version")
return version_info.replace('Stockfish ', '')
def sha256sum(filename):
h = hashlib.sha256()
with open(filename, 'rb') as file:
while chunk := file.read(8192):
h.update(chunk)
return h.hexdigest()
def switch_eval(engine_eval):
""" Convert the engine evaluation to other players perspective """
if isinstance(engine_eval, chess.engine.Mate):
mate_moves = -engine_eval.moves
return chess.engine.Mate(mate_moves)
elif isinstance(engine_eval, chess.engine.Cp):
eval_value = -engine_eval.score()
return chess.engine.Cp(eval_value)
else:
raise ValueError(f"Unexpected evaluation type: {engine_eval}")
# Analyze the top moves for a given FEN
def analyze_top_moves(fen, top_n, depth):
with chess.engine.SimpleEngine.popen_uci(STOCKFISH_PATH) as engine:
engine.configure({"Threads": STOCKFISH_THREADS_PER_ENGINE, "Hash": HASH_SIZE})
board = chess.Board(fen)
result = engine.analyse(board, chess.engine.Limit(depth=depth), multipv=top_n)
moves = []
for info in result:
move = info['pv'][0].uci()
score = info['score'].relative
moves.append((move, score))
return moves
# Process an FEN
def process_puzzle(puzzle):
lichess_puzzle_id = puzzle['PuzzleId']
original_fen = puzzle['FEN']
rating = puzzle['Rating']
moves = puzzle['Moves'].split()
print(f"[{lichess_puzzle_id}] Processing...")
# FEN is the position before the opponent makes their move.
# The position to present to the player is after applying the first move to that FEN.
# The second move is the beginning of the solution.
# See: https://database.lichess.org/#puzzles
board = chess.Board(original_fen)
opponent_first_move = chess.Move.from_uci(moves[0])
# Analyze the board state before making the first move
pre_last_move_top_moves = analyze_top_moves(original_fen, top_n=1, depth=EVALUATION_DEPTH)
if not pre_last_move_top_moves:
print(f"[{lichess_puzzle_id}] Skipped. No evaluation for the original position.")
return None
pre_last_move_evaluation = pre_last_move_top_moves[0][1]
# Make opponents move (the first move) now
board.push(opponent_first_move)
# Generate top moves
top_moves = analyze_top_moves(board.fen(), top_n=MULTI_PV, depth=EVALUATION_DEPTH)
# Ensure we have the minimum number of total moves
if len(top_moves) < MIN_MOVES_REQUIRED:
print(f"[{lichess_puzzle_id}] Skipped. Not enough total moves. Found: {len(top_moves)}. Need: {MIN_MOVES_REQUIRED}.")
return None
# Create a comma-separated list of moves with evaluations
moves_str = ','.join([f"{move} {score}" for move, score in top_moves])
best_top_move_evaluation = top_moves[0][1]
best_top_move_evaluation_relative_to_opponent = switch_eval(best_top_move_evaluation)
return {
'LichessPuzzleId': lichess_puzzle_id,
'FEN': board.fen(),
'Rating': rating,
'PreLastMovePositionEvaluation': pre_last_move_evaluation,
'LastMove': f"{opponent_first_move.uci()} {best_top_move_evaluation_relative_to_opponent}",
'CurrentPositionEvaluation': best_top_move_evaluation,
'EvaluatedMoves': moves_str,
}
def meets_criteria(row):
return (
int(row['Popularity']) >= MIN_POPULARITY_REQUIRED and
int(row['NbPlays']) >= MIN_NUMBER_PLAYS_REQUIRED and
int(row['RatingDeviation']) <= MAX_RATING_DEVIATION
)
# Process input file into a puzzle
def process_input_file(file_path, offset=0, limit=10):
output_file_path = f"{BASE_OUTPUT_FILE_PATH}-{offset}-{limit}.csv"
metadata_file_path = f"{BASE_OUTPUT_FILE_PATH}-{offset}-{limit}.metadata.json"
# Short circuit if the output file already exists
if os.path.exists(output_file_path):
print(f"Output file {output_file_path} already exists. Exiting.")
return
stockfish_version = get_stockfish_version()
input_lichess_file_sha256 = sha256sum(file_path)
with open(file_path, newline='') as csvfile:
reader = csv.DictReader(csvfile)
with ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor:
futures = {
executor.submit(process_puzzle, row): row for i, row in enumerate(reader)
if i >= offset and i < offset + limit and meets_criteria(row)
}
with open(output_file_path, 'w', newline='') as csvfile:
fieldnames = ['LichessPuzzleId', 'FEN', 'Rating', 'PreLastMovePositionEvaluation', 'LastMove', 'CurrentPositionEvaluation', 'EvaluatedMoves']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for future in as_completed(futures):
result = future.result()
if result:
writer.writerow(result)
csvfile.flush()
print(f"[{result['LichessPuzzleId']}] Successfully processed.")
# Generate and store meta data file
output_file_sha256 = sha256sum(output_file_path)
metadata = {
"stockfishVersion": stockfish_version,
"offset": offset,
"limit": limit,
"evaluationDepth": EVALUATION_DEPTH,
"multipv": MULTI_PV,
"minimumMovesRequired": MIN_MOVES_REQUIRED,
"minPopularityRequired": MIN_POPULARITY_REQUIRED,
"minNumberPlaysRequired": MIN_NUMBER_PLAYS_REQUIRED,
"maxRatingDeviation": MAX_RATING_DEVIATION,
"inputLichessFileSha256": input_lichess_file_sha256,
"outputFileSha256": output_file_sha256
}
with open(metadata_file_path, 'w') as metafile:
json.dump(metadata, metafile, indent=4)
def main():
# Create directory if it doesn't exist
output_dir = os.path.dirname(BASE_OUTPUT_FILE_PATH)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
process_input_file(LICHESS_PUZZLE_FILE, LICHESS_PUZZLE_FILE_OFFSET, LICHESS_PUZZLE_FILE_LIMIT)
if __name__ == "__main__":
main()