Skip to content

Commit f20030d

Browse files
authored
Merge pull request #457 from fmihpc/master
dev up-to-date
2 parents 5ee3a10 + 1a2fe7b commit f20030d

3 files changed

Lines changed: 52 additions & 46 deletions

File tree

.github/workflows/make_sbatch_job.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ fi
88
# first with srun (in case the file has not yet updated on the front end)
99
# second if srun fails (in case communication failure) it tries to cat it on the frontend (may be empty if file has not been updated pyproject)
1010

11-
sbatch -W --array=1-$ARRAY_SIZE -o "$1" ./testpackage/$1.sh $2 > jobid_$1 || (srun cat $1 || cat $1 || echo "cat failed exit code $?" && exit 1)
11+
sbatch -W --array=1-$ARRAY_SIZE -o "$1" ./testpackage/$1.sh $2 > jobid_$1 || true
1212

1313
#in case we do exit 0 successfully
1414
LOG=$(srun cat $1 || cat $1)

analysator/vlsvfile/vlsvcache.py

Lines changed: 29 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,10 @@ def __init__(self, *args, **kwargs):
5555

5656
class VariableCache:
5757
''' Class for handling in-memory variable/reducer caching.
58+
TODO: Add some maximum memory use guards.
5859
'''
59-
def __init__(self, reader):
60+
def __init__(self):
6061
self.__varcache = {} # {(varname, operator):data}
61-
self.__reader = reader
6262

6363
def keys(self):
6464
return self.__varcache.keys()
@@ -70,7 +70,7 @@ def __setitem__(self, key, value):
7070
self.__varcache[key] = value
7171

7272

73-
def read_variable_from_cache(self, name, cellids, operator):
73+
def read_variable_from_cache(self, reader, name, cellids, operator):
7474
''' Read variable from cache instead of the vlsv file.
7575
:param name: Name of the variable
7676
:param cellids: a value of -1 reads all data
@@ -90,10 +90,10 @@ def read_variable_from_cache(self, name, cellids, operator):
9090
if cellids == -1:
9191
return var_data
9292
else:
93-
return var_data[self.__reader.get_cellid_locations()[cellids]]
93+
return var_data[reader.get_cellid_locations()[cellids]]
9494
else:
9595
if(len(cellids) > 0):
96-
indices = np.array(itemgetter(*cellids)(self.__reader.get_cellid_locations()),dtype=np.int64)
96+
indices = np.array(itemgetter(*cellids)(reader.get_cellid_locations()),dtype=np.int64)
9797
else:
9898
indices = np.array([],dtype=np.int64)
9999
if value_len == 1:
@@ -106,22 +106,21 @@ class FileCache:
106106
'''
107107

108108
def __init__(self, reader) -> None:
109-
self.__reader = reader
110109
self.__metadata_dict = {}
111110
self.__metadata_read = False
112111

113112
self.__rtree_index_files = []
114113
self.__rtree_index = None
115-
self.__rtree_idxfile = os.path.join(self.get_cache_folder(),"rtree.idx")
116-
self.__rtree_datfile = os.path.join(self.get_cache_folder(),"rtree.dat")
114+
self.__rtree_idxfile = os.path.join(self.get_cache_folder(reader),"rtree.idx")
115+
self.__rtree_datfile = os.path.join(self.get_cache_folder(reader),"rtree.dat")
117116

118117
self.__rtree_properties = rtree.index.Property()
119118
self.__rtree_properties.dimension = 3
120119
self.__rtree_properties.overwrite=True
121120

122121

123-
def get_cache_folder(self):
124-
fn = self.__reader.file_name
122+
def get_cache_folder(self, reader):
123+
fn = reader.file_name
125124

126125
head,tail = os.path.split(fn)
127126
path = head
@@ -147,14 +146,14 @@ def get_cache_folder(self):
147146

148147
return path
149148

150-
def clear_cache_folder(self):
151-
path = self.get_cache_folder()
149+
def clear_cache_folder(self, reader):
150+
path = self.get_cache_folder(reader)
152151
import shutil
153152
shutil.rmtree(path)
154153

155-
def set_cellid_spatial_index(self, force = False):
156-
if not os.path.exists(self.get_cache_folder()):
157-
os.makedirs(self.get_cache_folder())
154+
def set_cellid_spatial_index(self, reader, force = False):
155+
if not os.path.exists(self.get_cache_folder(reader)):
156+
os.makedirs(self.get_cache_folder(reader))
158157

159158
if force:
160159
if os.path.exists(self.__rtree_idxfile):
@@ -164,7 +163,7 @@ def set_cellid_spatial_index(self, force = False):
164163
if(not os.path.isfile(self.__rtree_idxfile) or not os.path.isfile(self.__rtree_datfile)):
165164
t0 = time.time()
166165

167-
bboxes = self.__reader.get_mesh_domain_extents("SpatialGrid")
166+
bboxes = reader.get_mesh_domain_extents("SpatialGrid")
168167
bboxes = bboxes.reshape((-1,6), order='C')
169168

170169
self.__rtree_index = rtree.index.Index(self.__rtree_idxfile[:-4],properties=self.__rtree_properties, interleaved=False)
@@ -175,39 +174,39 @@ def set_cellid_spatial_index(self, force = False):
175174
else:
176175
pass
177176

178-
def get_cellid_spatial_index(self, force = False):
177+
def get_cellid_spatial_index(self, reader, force = False):
179178
if self.__rtree_index == None:
180179
if(force):
181-
self.set_cellid_spatial_index(force)
180+
self.set_cellid_spatial_index(reader, force)
182181
elif not os.path.isfile(self.__rtree_idxfile) or not os.path.isfile(self.__rtree_datfile):
183182
self.__rtree_index = None
184183
else:
185184
self.__rtree_index = rtree.index.Index(self.__rtree_idxfile[:-4], properties=self.__rtree_properties, interleaved=False)
186185

187186
return self.__rtree_index
188187

189-
def add_metadata(self, key, value):
188+
def add_metadata(self, reader, key, value):
190189
self.__metadata_dict[key] = value
191-
self.save_metadata()
190+
self.save_metadata(reader)
192191

193-
def get_metadata_filename(self):
194-
pth, base = os.path.split(self.__reader.file_name)
195-
path = self.get_cache_folder()
192+
def get_metadata_filename(self, reader):
193+
pth, base = os.path.split(reader.file_name)
194+
path = self.get_cache_folder(reader)
196195

197196
s = os.path.join(path,"metadata.pkl")
198197
return s
199198

200-
def save_metadata(self):
201-
fn = self.get_metadata_filename()
202-
if not os.path.exists(self.get_cache_folder()):
203-
os.makedirs(self.get_cache_folder())
199+
def save_metadata(self, reader):
200+
fn = self.get_metadata_filename(reader)
201+
if not os.path.exists(self.get_cache_folder(reader)):
202+
os.makedirs(self.get_cache_folder(reader))
204203
try:
205204
with open(fn,'wb') as f:
206205
pickle.dump(self.__metadata_dict,f)
207206
except Exception as e:
208207
logging.warning("Could not save metadata file, error: "+str(e))
209208

210-
def get_metadata(self, key, default):
209+
def get_metadata(self, reader, key, default):
211210
''' Read metadata from metadata file/memory, and if not available,
212211
return the given default value.
213212
@@ -217,11 +216,11 @@ def get_metadata(self, key, default):
217216

218217
if not self.__metadata_read:
219218
try:
220-
fn = self.get_metadata_filename()
219+
fn = self.get_metadata_filename(reader)
221220
with open(fn,'rb') as f:
222221
self.__metadata_dict = pickle.load(f)
223222
except Exception as e:
224-
logging.debug("No metadata file found at "+self.get_metadata_filename()+":\n"+str(e))
223+
logging.debug("No metadata file found at "+self.get_metadata_filename(reader)+":\n"+str(e))
225224

226225
self.__metadata_read = True
227226

analysator/vlsvfile/vlsvreader.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ def __init__(self, file_name, fsGridDecomposition=None, file_cache = 0):
202202
self.__order_for_cellid_blocks = {} # per-pop
203203
self.__vg_indexes_on_fg = np.array([]) # SEE: map_vg_onto_fg(self)
204204

205-
self.__variable_cache = vlsvcache.VariableCache(self) # {(varname, operator):data}
205+
self.__variable_cache = vlsvcache.VariableCache() # {(varname, operator):data}
206206
self.__params_cache = {} # {name:data}
207207

208208
self.__pops_init = False
@@ -323,11 +323,10 @@ def get_linked_readers_filename(self):
323323
'''Need to go to a consolidated metadata handler - keeping human-readable for now'''
324324
pth, base = os.path.split(self.file_name)
325325

326-
s = os.path.join(self.__metadata_cache.get_cache_folder(),"linked_readers.txt")
326+
s = os.path.join(self.__metadata_cache.get_cache_folder(self),"linked_readers.txt")
327327
return s
328328

329329
def get_linked_readers(self, reload=False):
330-
# self.__linked_files = self.__metadata_cache.get_metadata("linked_reader_files", set())
331330
if len(self.__linked_files)==0 or reload:
332331
if(os.path.isfile(self.get_linked_readers_filename())):
333332
with open(self.get_linked_readers_filename(), 'r') as f:
@@ -549,7 +548,7 @@ def __read_fileindex_for_cellid(self):
549548
# print("fileindex!")
550549
cellids=self.read(mesh="SpatialGrid",name="CellID", tag="VARIABLE")
551550

552-
#Check if it is not iterable. If it is a scale then make it a list
551+
#Check if it is not iterable. If it is a scalar then make it a list (single-cell runs?)
553552
if(not isinstance(cellids, Iterable)):
554553
cellids=[ cellids ]
555554
# self.__fileindex_for_cellid = {cellid:index for index,cellid in enumerate(cellids)}
@@ -1430,7 +1429,12 @@ def read(self, name="", tag="", mesh="", operator="pass", cellids=-1):
14301429
data=data.reshape(result_size, vector_size)
14311430

14321431
if not isinstance(cellids, numbers.Number):
1433-
data_out = np.full_like(data, np.nan, shape=(len(cellids),*data.shape[1:]))
1432+
if np.issubdtype(data.dtype, np.floating):
1433+
data_out = np.full_like(data, np.nan, shape=(len(cellids),*data.shape[1:]))
1434+
elif np.issubdtype(data.dtype, np.integer):
1435+
data_out = np.full_like(data, np.iinfo(data.dtype).min, shape=(len(cellids),*data.shape[1:]))
1436+
else:
1437+
raise ValueError("unexpected dtype encountered in read ("+str(data.dtype)+")")
14341438
data_out[cellids!=0,...] = data
14351439
data = data_out
14361440

@@ -2074,7 +2078,7 @@ def read_fsgrid_variable_cellid(self, name, cellids=-1, operator="pass"):
20742078
def get_fsgrid_decomposition(self):
20752079
# Try if in metadata
20762080
if(self.__fsGridDecomposition is not None):
2077-
print("read ",self.__fsGridDecomposition)
2081+
logging.info("read " + str(self.__fsGridDecomposition))
20782082

20792083
if self.__fsGridDecomposition is None:
20802084
self.__fsGridDecomposition = self.read(tag="MESH_DECOMPOSITION",mesh='fsgrid')
@@ -2085,7 +2089,7 @@ def get_fsgrid_decomposition(self):
20852089
logging.info("Did not find FsGrid decomposition from vlsv file.")
20862090

20872091
if self.__fsGridDecomposition is None:
2088-
self.__fsGridDecomposition = self.__metadata_cache.get_metadata(("MESH_DECOMPOSITION","fsgrid"),None)
2092+
self.__fsGridDecomposition = self.__metadata_cache.get_metadata(self,("MESH_DECOMPOSITION","fsgrid"),None)
20892093
if self.__fsGridDecomposition is not None:
20902094
logging.info("Found FsGrid decomposition from metadata file: " + str(self.__fsGridDecomposition))
20912095
return self.__fsGridDecomposition
@@ -2098,7 +2102,7 @@ def get_fsgrid_decomposition(self):
20982102
logging.info("Calculating fsGrid decomposition from the file")
20992103
self.__fsGridDecomposition = fsDecompositionFromGlobalIds(self)
21002104
logging.info("Computed FsGrid decomposition to be: " + str(self.__fsGridDecomposition))
2101-
self.__metadata_cache.add_metadata(("MESH_DECOMPOSITION","fsgrid"), self.__fsGridDecomposition)
2105+
self.__metadata_cache.add_metadata(self,("MESH_DECOMPOSITION","fsgrid"), self.__fsGridDecomposition)
21022106
return self.__fsGridDecomposition
21032107
else:
21042108
# Decomposition is a list (or fail assertions below) - use it instead
@@ -2245,8 +2249,8 @@ def print_metadata_cache(self):
22452249
''' Prints the contents of the metadata cache file.
22462250
'''
22472251

2248-
print("Metadata cache at "+self.__metadata_cache.get_metadata_filename()+":")
2249-
self.__metadata_cache.get_metadata("dummy",None) # Dummy call to read in the metadata file
2252+
print("Metadata cache at "+self.__metadata_cache.get_metadata_filename(self)+":")
2253+
self.__metadata_cache.get_metadata(self,"dummy",None) # Dummy call to read in the metadata file
22502254
for k,v in self.__metadata_cache._FileCache__metadata_dict.items():
22512255
print(k, v)
22522256

@@ -2690,8 +2694,11 @@ def do_partial_fileindex_update(self, coords):
26902694
if coords.shape[0] == 0:
26912695
return
26922696

2697+
# We already know everything, do nothing and return
2698+
if self.__full_fileindex_for_cellid:
2699+
return
2700+
26932701
if self.get_cellid_spatial_index() == None:
2694-
26952702
self.__read_fileindex_for_cellid()
26962703
return
26972704

@@ -4347,17 +4354,17 @@ def load_neighbor_stencils_from_filecache(self):
43474354
self.__neighbors_cache_loaded = True
43484355

43494356
def set_cellid_spatial_index(self, force=False):
4350-
self.__cellid_spatial_index = self.__metadata_cache.set_cellid_spatial_index(force)
4357+
self.__cellid_spatial_index = self.__metadata_cache.set_cellid_spatial_index(self, force)
43514358

43524359
def get_cellid_spatial_index(self, force=False):
43534360
return None
43544361
if not force:
43554362
if self.__cellid_spatial_index is None:
4356-
self.__cellid_spatial_index = self.__metadata_cache.get_cellid_spatial_index(force)
4363+
self.__cellid_spatial_index = self.__metadata_cache.get_cellid_spatial_index(self, force)
43574364
else:
43584365
pass
43594366
else:
4360-
self.__cellid_spatial_index = self.__metadata_cache.set_cellid_spatial_index(force)
4367+
self.__cellid_spatial_index = self.__metadata_cache.set_cellid_spatial_index(self, force)
43614368

43624369
return self.__cellid_spatial_index
43634370

@@ -4370,4 +4377,4 @@ def cache_optimization_files(self, force=False):
43704377
''' Create cached optimization files for this reader object (e.g. spatial index)
43714378
43724379
'''
4373-
self.__metadata_cache.set_cellid_spatial_index(force)
4380+
self.__metadata_cache.set_cellid_spatial_index(self, force)

0 commit comments

Comments
 (0)