-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Open
Labels
Description
Check duplicate issues.
- Checked for duplicates
Description
I am seeing a crash when I try to use a Dask client to read files with FromSpec using threads. Processes seem to work.
Reproducer
import json
import ROOT
import numpy
import matplotlib.pyplot as plt
from ROOT import RDF # type: ignore
from dask.distributed import Client, LocalCluster
NFILES = 100
PATH = 'files'
# -------------------------------------
def create_connection(nproc : int):
cluster = LocalCluster(
n_workers =1,
threads_per_worker=nproc,
processes =False,
memory_limit ='2GiB')
client = Client(cluster)
return client
# -------------------------------------
def _create_files(index : int):
main = f"{PATH}/main_{index:03}.root"
friend = f"{PATH}/frnd_{index:03}.root"
df = ROOT.RDataFrame(100000)
df.Define("x", "gRandom->Rndm()").Snapshot("Events", main)
df = ROOT.RDataFrame(100000)
df.Define("y", "gRandom->Rndm()").Snapshot("Events", friend)
# -------------------------------------
def _get_array(client) -> numpy.ndarray:
main_files = [ f'{PATH}/main_{index:03}.root' for index in range(NFILES) ]
frnd_files = [ f'{PATH}/frnd_{index:03}.root' for index in range(NFILES) ]
spec = {
"samples": {
"main": {
"trees": ["Events"],
"files": main_files,
}
},
"friends": {
"friend": {
"trees": ["Events"],
"files": frnd_files,
}
}
}
with open("spec.json", "w") as f:
json.dump(spec, f, indent=4)
df= RDF.Experimental.FromSpec("spec.json", executor = client)
return df.AsNumpy(['x'])['x']
# -------------------------------------
def main():
'''
Entry point
'''
for index in range(NFILES):
_create_files(index = index)
client = create_connection(nproc = 5)
arr_1 = _get_array(client)
arr_2 = _get_array(client)
plt.scatter(arr_1[:1000], arr_2[:1000])
plt.savefig('plot.png')
plt.close('all')
# ----------------------
if __name__ == '__main__':
main()
# -------------------------------------ROOT version
6.38
Installation method
micromamba
Operating system
almalinux9
Additional context
The version using processes (which seems to work) could be a workaround for: