diff --git a/analysator/vlsvfile/__init__.py b/analysator/vlsvfile/__init__.py index 0c94f37a..1a51a470 100644 --- a/analysator/vlsvfile/__init__.py +++ b/analysator/vlsvfile/__init__.py @@ -46,7 +46,7 @@ import logging from .vlsvreader import VlsvReader -from .vlsvreader import fsDecompositionFromGlobalIds,fsReadGlobalIdsPerRank,fsGlobalIdToGlobalIndex +from .vlsvreader import fsDecompositionFromGlobalIds,fsReadGlobalIdsPerRank,fsGlobalIdToGlobalIndex,dict_keys_exist from .vlsvwriter import VlsvWriter from .vlasiatorreader import VlasiatorReader try: diff --git a/analysator/vlsvfile/vlsvreader.py b/analysator/vlsvfile/vlsvreader.py index 2e9a9f8f..7e14a992 100644 --- a/analysator/vlsvfile/vlsvreader.py +++ b/analysator/vlsvfile/vlsvreader.py @@ -178,8 +178,15 @@ def __init__(self, file_name, fsGridDecomposition=None, file_cache = 0): raise e self.__xml_root = ET.fromstring("") + + self.query_cellid_exist = self.__query_cellid_exists_dict + self.get_cellid_fileindices = self.__get_cellid_fileindices_dict + self.__fileindex_for_cellid={} - self.__full_fileindex_for_cellid = False + self.__cellids_ordered = np.array([],dtype=np.int64) + self.__cellid_fileindex_ordered = np.array([],dtype=np.int64) + self.set_cellid_mapping_method("ordered") + self.__full_fileindex_for_cellid = False # to be deprecated? self.__cellid_spatial_index=None self.__rankwise_fileindex_for_cellid = {} # { : {cellid: offset}} self.__loaded_fileindex_ranks = set() @@ -310,9 +317,59 @@ def __init__(self, file_name, fsGridDecomposition=None, file_cache = 0): # Update list of active populations if not popname in self.active_populations: self.active_populations.append(popname) - self.__fptr.close() + def __set_cellid_indices_ordered(self): + if self.check_variable("CellID_ordered") and self.check_variable("CellID_fileindex_ordered"): + self.__cellids_ordered = self.read_variable("CellID_ordered") + self.__cellid_fileindex_ordered = self.read_variable("CellID_fileindex_ordered") + else: + cids = self.read_variable("CellID") + index = np.array([i for i,c in enumerate(cids)],dtype=np.int64) + ids = np.argsort(cids) + self.__cellids_ordered = cids[ids] + self.__cellid_fileindex_ordered = index[ids] + + + def __query_cellid_exists_ordered(self, cellids): + if len(self.__cellids_ordered) == 0: + self.__set_cellid_indices_ordered() + qi = np.searchsorted(self.__cellids_ordered, cellids) + mask = qi < len(self.__cellids_ordered) + mask[mask] = self.__cellids_ordered[qi[mask]] == cellids[mask] + return mask + + def __query_cellid_exists_dict(self, cellids): + self.__read_fileindex_for_cellid() + return dict_keys_exist(self.__fileindex_for_cellid, cellids) + + def __get_cellid_fileindices_ordered(self, cellids): + if len(self.__cellids_ordered) == 0: + self.__set_cellid_indices_ordered() + qi = np.searchsorted(self.__cellids_ordered, cellids) + mask = qi < len(self.__cellids_ordered) + mask[mask] = self.__cellids_ordered[qi[mask]] == cellids[mask] + return self.__cellid_fileindex_ordered[qi[mask]] + + def __get_cellid_fileindices_dict(self, cellids): + self.__read_fileindex_for_cellid() + return itemgetter(*cellids)(self.__fileindex_for_cellid) + + def set_cellid_mapping_method(self, method="dict"): + ''' Set the methods for querying cellid existence and file index. "dict" is the usual Python + dict implementation, which is slow to construct but fast for repeated accesses. + + :kwarg method: string, method to use (default "dict", "dict" and "ordered" available) + ''' + if method == "dict": + self.query_cellid_exist = self.__query_cellid_exists_dict + self.get_cellid_fileindices = self.__get_cellid_fileindices_dict + elif method == "ordered": + self.query_cellid_exist = self.__query_cellid_exists_ordered + self.get_cellid_fileindices = self.__get_cellid_fileindices_ordered + else: + raise ValueError("Unknown method (" + method +") given for set_cellid_mapping_method") + def get_grid_epsilon(self): if self.__grid_epsilon is None: # one-thousandth of the max refined cell; self.get_max_refinement_level() however reads all cellids, so just temp here by assuming 8 refinement levels.. which is plenty for now @@ -1296,41 +1353,44 @@ def get_read_offsets(self, cellids, array_size, element_size, vector_size): try: result_size = len(cellids) read_size = 1 - read_offsets = [self.__fileindex_for_cellid[cid]*element_size*vector_size for cid in cellids] + # read_offsets = [self.__fileindex_for_cellid[cid]*element_size*vector_size for cid in cellids] + read_offsets = self.get_cellid_fileindices(cellids)*element_size*vector_size except: self.__read_fileindex_for_cellid() result_size = len(cellids) read_size = 1 - read_offsets = [self.__fileindex_for_cellid[cid]*element_size*vector_size for cid in cellids] + # read_offsets = [self.__fileindex_for_cellid[cid]*element_size*vector_size for cid in cellids] + read_offsets = self.get_cellid_fileindices(cellids)*element_size*vector_size else: # single cell or all cells if cellids < 0: # -1, read all cells result_size = array_size read_size = array_size read_offsets = [0] else: # single cell id - if self.__full_fileindex_for_cellid: # Fileindex already exists, we might as well use it - result_size = 1 - read_size = 1 - read_offsets = [self.__fileindex_for_cellid[cellids]*element_size*vector_size] - elif self.__cellid_spatial_index != None: - # Here a faster alternative - result_size = 1 - read_size = 1 - eps = self.get_grid_epsilon() - qp = self.get_cell_coordinates(cellids) - qqp = np.hstack((qp-eps,qp+eps)) - pq = np.array([qqp[0],qqp[3],qqp[1],qqp[4],qqp[2],qqp[5]]) - rankids = self.__cellid_spatial_index.intersection() - read_offsets = None - for rankid in rankids: - indexdict = self.get_cellid_locations_rank(rankid) - if cellids in indexdict.keys(): - read_offsets = [indexdict[cellids]*element_size*vector_size] - else: - self.__read_fileindex_for_cellid() - result_size = 1 - read_size = 1 - read_offsets = [self.__fileindex_for_cellid[cellids]*element_size*vector_size] + # if self.__full_fileindex_for_cellid: # Fileindex already exists, we might as well use it + result_size = 1 + read_size = 1 + # read_offsets = [self.__fileindex_for_cellid[cellids]*element_size*vector_size] + read_offsets = self.get_cellid_fileindices([cellids])*element_size*vector_size + # elif self.__cellid_spatial_index != None: + # # Here a faster alternative + # result_size = 1 + # read_size = 1 + # eps = self.get_grid_epsilon() + # qp = self.get_cell_coordinates(cellids) + # qqp = np.hstack((qp-eps,qp+eps)) + # pq = np.array([qqp[0],qqp[3],qqp[1],qqp[4],qqp[2],qqp[5]]) + # rankids = self.__cellid_spatial_index.intersection() + # read_offsets = None + # for rankid in rankids: + # indexdict = self.get_cellid_locations_rank(rankid) + # if cellids in indexdict.keys(): + # read_offsets = [indexdict[cellids]*element_size*vector_size] + # else: + # self.__read_fileindex_for_cellid() + # result_size = 1 + # read_size = 1 + # read_offsets = [self.__fileindex_for_cellid[cellids]*element_size*vector_size] return read_size, read_offsets, result_size, reorder_data @@ -1414,11 +1474,13 @@ def read(self, name="", tag="", mesh="", operator="pass", cellids=-1): if vector_size > 1: arraydata=arraydata.reshape(-1, vector_size) - mask = ~dict_keys_exist(self.__fileindex_for_cellid, cellids_nonzero) + # mask = ~dict_keys_exist(self.__fileindex_for_cellid, cellids_nonzero) + mask = ~self.query_cellid_exist(cellids_nonzero) - self.do_partial_fileindex_update(self.get_cell_coordinates(cellids_nonzero[mask])) + # self.do_partial_fileindex_update(self.get_cell_coordinates(cellids_nonzero[mask])) - append_offsets = [self.__fileindex_for_cellid[cid] for cid in cellids_nonzero] + # append_offsets = [self.__fileindex_for_cellid[cid] for cid in cellids_nonzero] + append_offsets = self.get_cellid_fileindices(cellids_nonzero) data = arraydata[append_offsets,...] data = np.squeeze(data) @@ -2746,7 +2808,6 @@ def do_partial_fileindex_update(self, coords): # print("Fallback") self.__read_fileindex_for_cellid() - def get_cellid(self, coords): ''' Returns the cell ids at given coordinates @@ -2807,7 +2868,8 @@ def get_cellid(self, coords): while AMR_count < refmax +1: - drop = ~dict_keys_exist(self.__fileindex_for_cellid, cellids[mask]) + # drop = ~dict_keys_exist(self.__fileindex_for_cellid, cellids[mask]) + drop = ~self.query_cellid_exist(cellids[mask]) mask[mask] = mask[mask] & drop @@ -2823,7 +2885,8 @@ def get_cellid(self, coords): # Get the cell id: cellids[mask] = ncells_lowerlevel + cellindices[mask,0] + 2**(AMR_count)*cellindices[mask,1] * self.__xcells + 4**(AMR_count) * cellindices[mask,2] * self.__xcells * self.__ycells + 1 - drop = ~dict_keys_exist(self.__fileindex_for_cellid, cellids[mask]) + # drop = ~dict_keys_exist(self.__fileindex_for_cellid, cellids[mask]) + drop = ~self.query_cellid_exist(cellids[mask]) mask[mask] = mask[mask] & drop cellids[mask] = 0 # set missing cells to null cell if stack: @@ -4256,6 +4319,8 @@ def optimize_clear_fileindex_for_cellid(self): .. note:: This should only be used for optimization purposes. ''' self.__fileindex_for_cellid = {} + self.__cellids_ordered = np.array([],dtype=np.int64) + self.__cellid_fileindex_ordered = np.array([],dtype=np.int64) def get_mesh_domain_extents(self, mesh): if (mesh == 'fsgrid'): diff --git a/analysator/vlsvfile/vlsvvtkinterface.py b/analysator/vlsvfile/vlsvvtkinterface.py index 98a7c1d3..e0e52953 100644 --- a/analysator/vlsvfile/vlsvvtkinterface.py +++ b/analysator/vlsvfile/vlsvvtkinterface.py @@ -90,8 +90,7 @@ def GetFileName(self): def buildDescriptor(self): f = self.__reader - f._VlsvReader__read_fileindex_for_cellid() - fileindex_for_cellid = f._VlsvReader__fileindex_for_cellid + fileindex_for_cellid = f.get_cellid_locations() xc = f._VlsvReader__xcells yc = f._VlsvReader__ycells zc = f._VlsvReader__zcells @@ -187,7 +186,7 @@ def getDescriptor(self, reinit=False): def getCellIDtoIdxMap(self): if len(self.__cellIDtoIdx) == 0: FileIndexToID = {v:k for k,v in self.getDescriptor()[1].items()} - for c,fi in self.__reader._VlsvReader__fileindex_for_cellid.items(): + for c,fi in self.__reader.get_cellid_locations().items(): self.__cellIDtoIdx[c] = FileIndexToID[fi] return self.__cellIDtoIdx