diff --git a/analysator/vlsvfile/__init__.py b/analysator/vlsvfile/__init__.py
index 0c94f37a..1a51a470 100644
--- a/analysator/vlsvfile/__init__.py
+++ b/analysator/vlsvfile/__init__.py
@@ -46,7 +46,7 @@
import logging
from .vlsvreader import VlsvReader
-from .vlsvreader import fsDecompositionFromGlobalIds,fsReadGlobalIdsPerRank,fsGlobalIdToGlobalIndex
+from .vlsvreader import fsDecompositionFromGlobalIds,fsReadGlobalIdsPerRank,fsGlobalIdToGlobalIndex,dict_keys_exist
from .vlsvwriter import VlsvWriter
from .vlasiatorreader import VlasiatorReader
try:
diff --git a/analysator/vlsvfile/vlsvreader.py b/analysator/vlsvfile/vlsvreader.py
index 2e9a9f8f..7e14a992 100644
--- a/analysator/vlsvfile/vlsvreader.py
+++ b/analysator/vlsvfile/vlsvreader.py
@@ -178,8 +178,15 @@ def __init__(self, file_name, fsGridDecomposition=None, file_cache = 0):
raise e
self.__xml_root = ET.fromstring("")
+
+ self.query_cellid_exist = self.__query_cellid_exists_dict
+ self.get_cellid_fileindices = self.__get_cellid_fileindices_dict
+
self.__fileindex_for_cellid={}
- self.__full_fileindex_for_cellid = False
+ self.__cellids_ordered = np.array([],dtype=np.int64)
+ self.__cellid_fileindex_ordered = np.array([],dtype=np.int64)
+ self.set_cellid_mapping_method("ordered")
+ self.__full_fileindex_for_cellid = False # to be deprecated?
self.__cellid_spatial_index=None
self.__rankwise_fileindex_for_cellid = {} # { : {cellid: offset}}
self.__loaded_fileindex_ranks = set()
@@ -310,9 +317,59 @@ def __init__(self, file_name, fsGridDecomposition=None, file_cache = 0):
# Update list of active populations
if not popname in self.active_populations: self.active_populations.append(popname)
-
self.__fptr.close()
+ def __set_cellid_indices_ordered(self):
+ if self.check_variable("CellID_ordered") and self.check_variable("CellID_fileindex_ordered"):
+ self.__cellids_ordered = self.read_variable("CellID_ordered")
+ self.__cellid_fileindex_ordered = self.read_variable("CellID_fileindex_ordered")
+ else:
+ cids = self.read_variable("CellID")
+ index = np.array([i for i,c in enumerate(cids)],dtype=np.int64)
+ ids = np.argsort(cids)
+ self.__cellids_ordered = cids[ids]
+ self.__cellid_fileindex_ordered = index[ids]
+
+
+ def __query_cellid_exists_ordered(self, cellids):
+ if len(self.__cellids_ordered) == 0:
+ self.__set_cellid_indices_ordered()
+ qi = np.searchsorted(self.__cellids_ordered, cellids)
+ mask = qi < len(self.__cellids_ordered)
+ mask[mask] = self.__cellids_ordered[qi[mask]] == cellids[mask]
+ return mask
+
+ def __query_cellid_exists_dict(self, cellids):
+ self.__read_fileindex_for_cellid()
+ return dict_keys_exist(self.__fileindex_for_cellid, cellids)
+
+ def __get_cellid_fileindices_ordered(self, cellids):
+ if len(self.__cellids_ordered) == 0:
+ self.__set_cellid_indices_ordered()
+ qi = np.searchsorted(self.__cellids_ordered, cellids)
+ mask = qi < len(self.__cellids_ordered)
+ mask[mask] = self.__cellids_ordered[qi[mask]] == cellids[mask]
+ return self.__cellid_fileindex_ordered[qi[mask]]
+
+ def __get_cellid_fileindices_dict(self, cellids):
+ self.__read_fileindex_for_cellid()
+ return itemgetter(*cellids)(self.__fileindex_for_cellid)
+
+ def set_cellid_mapping_method(self, method="dict"):
+ ''' Set the methods for querying cellid existence and file index. "dict" is the usual Python
+ dict implementation, which is slow to construct but fast for repeated accesses.
+
+ :kwarg method: string, method to use (default "dict", "dict" and "ordered" available)
+ '''
+ if method == "dict":
+ self.query_cellid_exist = self.__query_cellid_exists_dict
+ self.get_cellid_fileindices = self.__get_cellid_fileindices_dict
+ elif method == "ordered":
+ self.query_cellid_exist = self.__query_cellid_exists_ordered
+ self.get_cellid_fileindices = self.__get_cellid_fileindices_ordered
+ else:
+ raise ValueError("Unknown method (" + method +") given for set_cellid_mapping_method")
+
def get_grid_epsilon(self):
if self.__grid_epsilon is None:
# one-thousandth of the max refined cell; self.get_max_refinement_level() however reads all cellids, so just temp here by assuming 8 refinement levels.. which is plenty for now
@@ -1296,41 +1353,44 @@ def get_read_offsets(self, cellids, array_size, element_size, vector_size):
try:
result_size = len(cellids)
read_size = 1
- read_offsets = [self.__fileindex_for_cellid[cid]*element_size*vector_size for cid in cellids]
+ # read_offsets = [self.__fileindex_for_cellid[cid]*element_size*vector_size for cid in cellids]
+ read_offsets = self.get_cellid_fileindices(cellids)*element_size*vector_size
except:
self.__read_fileindex_for_cellid()
result_size = len(cellids)
read_size = 1
- read_offsets = [self.__fileindex_for_cellid[cid]*element_size*vector_size for cid in cellids]
+ # read_offsets = [self.__fileindex_for_cellid[cid]*element_size*vector_size for cid in cellids]
+ read_offsets = self.get_cellid_fileindices(cellids)*element_size*vector_size
else: # single cell or all cells
if cellids < 0: # -1, read all cells
result_size = array_size
read_size = array_size
read_offsets = [0]
else: # single cell id
- if self.__full_fileindex_for_cellid: # Fileindex already exists, we might as well use it
- result_size = 1
- read_size = 1
- read_offsets = [self.__fileindex_for_cellid[cellids]*element_size*vector_size]
- elif self.__cellid_spatial_index != None:
- # Here a faster alternative
- result_size = 1
- read_size = 1
- eps = self.get_grid_epsilon()
- qp = self.get_cell_coordinates(cellids)
- qqp = np.hstack((qp-eps,qp+eps))
- pq = np.array([qqp[0],qqp[3],qqp[1],qqp[4],qqp[2],qqp[5]])
- rankids = self.__cellid_spatial_index.intersection()
- read_offsets = None
- for rankid in rankids:
- indexdict = self.get_cellid_locations_rank(rankid)
- if cellids in indexdict.keys():
- read_offsets = [indexdict[cellids]*element_size*vector_size]
- else:
- self.__read_fileindex_for_cellid()
- result_size = 1
- read_size = 1
- read_offsets = [self.__fileindex_for_cellid[cellids]*element_size*vector_size]
+ # if self.__full_fileindex_for_cellid: # Fileindex already exists, we might as well use it
+ result_size = 1
+ read_size = 1
+ # read_offsets = [self.__fileindex_for_cellid[cellids]*element_size*vector_size]
+ read_offsets = self.get_cellid_fileindices([cellids])*element_size*vector_size
+ # elif self.__cellid_spatial_index != None:
+ # # Here a faster alternative
+ # result_size = 1
+ # read_size = 1
+ # eps = self.get_grid_epsilon()
+ # qp = self.get_cell_coordinates(cellids)
+ # qqp = np.hstack((qp-eps,qp+eps))
+ # pq = np.array([qqp[0],qqp[3],qqp[1],qqp[4],qqp[2],qqp[5]])
+ # rankids = self.__cellid_spatial_index.intersection()
+ # read_offsets = None
+ # for rankid in rankids:
+ # indexdict = self.get_cellid_locations_rank(rankid)
+ # if cellids in indexdict.keys():
+ # read_offsets = [indexdict[cellids]*element_size*vector_size]
+ # else:
+ # self.__read_fileindex_for_cellid()
+ # result_size = 1
+ # read_size = 1
+ # read_offsets = [self.__fileindex_for_cellid[cellids]*element_size*vector_size]
return read_size, read_offsets, result_size, reorder_data
@@ -1414,11 +1474,13 @@ def read(self, name="", tag="", mesh="", operator="pass", cellids=-1):
if vector_size > 1:
arraydata=arraydata.reshape(-1, vector_size)
- mask = ~dict_keys_exist(self.__fileindex_for_cellid, cellids_nonzero)
+ # mask = ~dict_keys_exist(self.__fileindex_for_cellid, cellids_nonzero)
+ mask = ~self.query_cellid_exist(cellids_nonzero)
- self.do_partial_fileindex_update(self.get_cell_coordinates(cellids_nonzero[mask]))
+ # self.do_partial_fileindex_update(self.get_cell_coordinates(cellids_nonzero[mask]))
- append_offsets = [self.__fileindex_for_cellid[cid] for cid in cellids_nonzero]
+ # append_offsets = [self.__fileindex_for_cellid[cid] for cid in cellids_nonzero]
+ append_offsets = self.get_cellid_fileindices(cellids_nonzero)
data = arraydata[append_offsets,...]
data = np.squeeze(data)
@@ -2746,7 +2808,6 @@ def do_partial_fileindex_update(self, coords):
# print("Fallback")
self.__read_fileindex_for_cellid()
-
def get_cellid(self, coords):
''' Returns the cell ids at given coordinates
@@ -2807,7 +2868,8 @@ def get_cellid(self, coords):
while AMR_count < refmax +1:
- drop = ~dict_keys_exist(self.__fileindex_for_cellid, cellids[mask])
+ # drop = ~dict_keys_exist(self.__fileindex_for_cellid, cellids[mask])
+ drop = ~self.query_cellid_exist(cellids[mask])
mask[mask] = mask[mask] & drop
@@ -2823,7 +2885,8 @@ def get_cellid(self, coords):
# Get the cell id:
cellids[mask] = ncells_lowerlevel + cellindices[mask,0] + 2**(AMR_count)*cellindices[mask,1] * self.__xcells + 4**(AMR_count) * cellindices[mask,2] * self.__xcells * self.__ycells + 1
- drop = ~dict_keys_exist(self.__fileindex_for_cellid, cellids[mask])
+ # drop = ~dict_keys_exist(self.__fileindex_for_cellid, cellids[mask])
+ drop = ~self.query_cellid_exist(cellids[mask])
mask[mask] = mask[mask] & drop
cellids[mask] = 0 # set missing cells to null cell
if stack:
@@ -4256,6 +4319,8 @@ def optimize_clear_fileindex_for_cellid(self):
.. note:: This should only be used for optimization purposes.
'''
self.__fileindex_for_cellid = {}
+ self.__cellids_ordered = np.array([],dtype=np.int64)
+ self.__cellid_fileindex_ordered = np.array([],dtype=np.int64)
def get_mesh_domain_extents(self, mesh):
if (mesh == 'fsgrid'):
diff --git a/analysator/vlsvfile/vlsvvtkinterface.py b/analysator/vlsvfile/vlsvvtkinterface.py
index 98a7c1d3..e0e52953 100644
--- a/analysator/vlsvfile/vlsvvtkinterface.py
+++ b/analysator/vlsvfile/vlsvvtkinterface.py
@@ -90,8 +90,7 @@ def GetFileName(self):
def buildDescriptor(self):
f = self.__reader
- f._VlsvReader__read_fileindex_for_cellid()
- fileindex_for_cellid = f._VlsvReader__fileindex_for_cellid
+ fileindex_for_cellid = f.get_cellid_locations()
xc = f._VlsvReader__xcells
yc = f._VlsvReader__ycells
zc = f._VlsvReader__zcells
@@ -187,7 +186,7 @@ def getDescriptor(self, reinit=False):
def getCellIDtoIdxMap(self):
if len(self.__cellIDtoIdx) == 0:
FileIndexToID = {v:k for k,v in self.getDescriptor()[1].items()}
- for c,fi in self.__reader._VlsvReader__fileindex_for_cellid.items():
+ for c,fi in self.__reader.get_cellid_locations().items():
self.__cellIDtoIdx[c] = FileIndexToID[fi]
return self.__cellIDtoIdx