Skip to content

Commit 9d408b0

Browse files
authored
Extract lock-per-file cache function. NFC (#24071)
1 parent ff60f9c commit 9d408b0

File tree

1 file changed

+50
-34
lines changed

1 file changed

+50
-34
lines changed

tools/link.py

+50-34
Original file line numberDiff line numberDiff line change
@@ -188,17 +188,56 @@ def setup_environment_settings():
188188

189189

190190
def generate_js_sym_info():
191-
# Runs the js compiler to generate a list of all symbols available in the JS
192-
# libraries. This must be done separately for each linker invocation since the
193-
# list of symbols depends on what settings are used.
194-
# TODO(sbc): Find a way to optimize this. Potentially we could add a super-set
195-
# mode of the js compiler that would generate a list of all possible symbols
196-
# that could be checked in.
191+
"""Runs the js compiler to generate a list of all symbols available in the JS
192+
libraries. This must be done separately for each linker invocation since the
193+
list of symbols depends on what settings are used.
194+
TODO(sbc): Find a way to optimize this. Potentially we could add a super-set
195+
mode of the js compiler that would generate a list of all possible symbols
196+
that could be checked in.
197+
"""
197198
_, forwarded_data = emscripten.compile_javascript(symbols_only=True)
198199
# When running in symbols_only mode compiler.mjs outputs a flat list of C symbols.
199200
return json.loads(forwarded_data)
200201

201202

203+
def get_cached_file(filetype, filename, generator, cache_limit):
204+
"""This function implements a file cache which lives inside the main
205+
emscripten cache directory but uses a per-file lock rather than a
206+
cache-wide lock.
207+
208+
The cache is pruned (by removing the oldest files) if it grows above
209+
a certain number of files.
210+
"""
211+
root = cache.get_path(filetype)
212+
utils.safe_ensure_dirs(root)
213+
214+
cache_file = os.path.join(root, filename)
215+
216+
with filelock.FileLock(cache_file + '.lock'):
217+
if os.path.exists(cache_file):
218+
# Cache hit, read the file
219+
file_content = read_file(cache_file)
220+
else:
221+
# Cache miss, generate the symbol list and write the file
222+
file_content = generator()
223+
write_file(cache_file, file_content)
224+
225+
if len([f for f in os.listdir(root) if not f.endswith('.lock')]) > cache_limit:
226+
with filelock.FileLock(cache.get_path(f'{filetype}.lock')):
227+
files = []
228+
for f in os.listdir(root):
229+
if not f.endswith('.lock'):
230+
f = os.path.join(root, f)
231+
files.append((f, os.path.getmtime(f)))
232+
files.sort(key=lambda x: x[1])
233+
# Delete all but the newest N files
234+
for f, _ in files[:-cache_limit]:
235+
with filelock.FileLock(f + '.lock'):
236+
delete_file(f)
237+
238+
return file_content
239+
240+
202241
@ToolchainProfiler.profile_block('JS symbol generation')
203242
def get_js_sym_info():
204243
# Avoiding using the cache when generating struct info since
@@ -220,39 +259,16 @@ def get_js_sym_info():
220259
input_files.append(read_file(jslib))
221260
content = '\n'.join(input_files)
222261
content_hash = hashlib.sha1(content.encode('utf-8')).hexdigest()
223-
library_syms = None
224262

225-
cache_file = str(cache.get_path(f'symbol_lists/{content_hash}.json'))
226-
227-
utils.safe_ensure_dirs(cache.get_path('symbol_lists'))
228-
with filelock.FileLock(cache_file + '.lock'):
229-
if os.path.exists(cache_file):
230-
# Cache hit, read the file
231-
library_syms = json.loads(read_file(cache_file))
232-
else:
233-
# Cache miss, generate the symbol list and write the file
234-
library_syms = generate_js_sym_info()
235-
write_file(cache_file, json.dumps(library_syms, separators=(',', ':'), indent=2))
263+
def generate_json():
264+
library_syms = generate_js_sym_info()
265+
return json.dumps(library_syms, separators=(',', ':'), indent=2)
236266

237267
# Limit of the overall size of the cache.
238268
# This code will get test coverage since a full test run of `other` or `core`
239269
# generates ~1000 unique symbol lists.
240-
cache_limit = 500
241-
root = cache.get_path('symbol_lists')
242-
if len([f for f in os.listdir(root) if not f.endswith('.lock')]) > cache_limit:
243-
with filelock.FileLock(cache.get_path('symbol_lists.lock')):
244-
files = []
245-
for f in os.listdir(root):
246-
if not f.endswith('.lock'):
247-
f = os.path.join(root, f)
248-
files.append((f, os.path.getmtime(f)))
249-
files.sort(key=lambda x: x[1])
250-
# Delete all but the newest N files
251-
for f, _ in files[:-cache_limit]:
252-
with filelock.FileLock(f + '.lock'):
253-
delete_file(f)
254-
255-
return library_syms
270+
file_content = get_cached_file('symbol_lists', f'{content_hash}.json', generate_json, cache_limit=500)
271+
return json.loads(file_content)
256272

257273

258274
def filter_link_flags(flags, using_lld):

0 commit comments

Comments
 (0)