diff --git a/src/assessment/compression_methods.py b/src/assessment/compression_methods.py index 0732ef9..47dc26b 100644 --- a/src/assessment/compression_methods.py +++ b/src/assessment/compression_methods.py @@ -15,7 +15,7 @@ VDF_TYPE_SIZE=4 def sparsify(vdf,sparsity): - vdf[vdf np.iinfo(rle_type).max: + print("Error: runcount overflow") + rle_list_of_tuples.append((runcount, v)) + v0 = v + runcount = 0 + + init = False + + return rle_list_of_tuples + + # print(pywt.wavelist(kind='discrete')) + + vdf_3d = vdf.copy().astype(np.float64) + + loga = False + offset = np.float64(1) + if loga: + vdf_3d[vdf_3d<0]=0 + # print(np.nanmax(vdf_3d)) + vdf_3d = np.log10(vdf_3d + offset) + # print(np.nanmax(vdf_3d)) + if loga: + norm = np.nanmax(vdf_3d) + else: + norm = np.nanmax(vdf_3d) + print("norm", norm) + # norm = 1 + vdf_3d /= norm + orig_shape = vdf_3d.shape + if loga: + pass + # vdf_3d[np.isinf(vdf_3d)] = -np.inf + # vdf_3d[np.isnan(vdf_3d)] = 0 + + comp_type = np.dtype(np.uint16) + # comp_type = None + # quant = np.iinfo(comp_type).max/3 + + + + + # print(np.nanmax(vdf_3d),np.nanmin(vdf_3d[np.isfinite(vdf_3d)]), norm) + + threshold = np.float64(1e-16) + wavelet = 'db9'#'bior1.3' + # wavelet = 'bior1.3' + # wavelet = 'db9' + + dwtn_mlevel = pywt.dwtn_max_level(vdf_3d.shape,wavelet) + level_delta = 0 + discard_level = dwtn_mlevel+1 + print("Decomposing to ", max(dwtn_mlevel-level_delta,0), "levels out of ", dwtn_mlevel, 'discarding at level', discard_level ) + coeffs3 = pywt.wavedecn(vdf_3d,wavelet=wavelet, level = max(dwtn_mlevel-level_delta,0)) + + # print(coeffs3) + coeffs3_comp = coeffs3.copy() + + print(type(coeffs3_comp)) + + zeros = 0 + nonzeros = 0 + + + dtype0 = np.dtype(np.float32) + dtype = np.dtype(np.float32) + if loga: + threshold = np.log10(threshold+offset)/norm + else: + threshold = threshold/norm + print(dtype0.itemsize) + + coeffs3_comp_qdat = {} + + for i,a in enumerate(coeffs3_comp): + + if(type(a) == type(np.ndarray(1))): + # print(type(a), a.dtype) + # print(a) + + # print(coeffs3_comp[i]) + if loga: + coeffs3_comp[i] = dwt_threshold_coeffs(coeffs3_comp[i], 1e-17,1e-17) + else: + coeffs3_comp[i] = dwt_threshold_coeffs(coeffs3_comp[i], threshold=threshold, high=threshold*1000) + # coeffs3_comp[i] = coeffs3_comp[i].astype(dtype) + coeffs3_comp[i], coeffs3_comp_qdat[i] = dwt_quantize_coeffs(coeffs3_comp[i], comp_type) + # print(coeffs3_comp[i]) + mask = np.abs(coeffs3_comp[i]) == 0#< threshold + zeros += np.sum(mask) + nonzeros += np.sum(~mask) + # nonzeros += np.prod(a.shape) + if not loga: + coeffs3_comp[i][mask] = 0 + else: + coeffs3_comp_qdat.setdefault(i,{}) + print('dict at level',i) + for k,v in a.items(): + if i >= discard_level: + print('discarding at level', i) + coeffs3_comp[i][k] = np.zeros_like(coeffs3_comp[i][k]) + else: + if loga: + coeffs3_comp[i][k] = dwt_threshold_coeffs(coeffs3_comp[i][k], threshold = 1e-17, high = 1e-17) + else: + coeffs3_comp[i][k] = dwt_threshold_coeffs(coeffs3_comp[i][k], threshold=threshold, high=threshold*1000) + + # coeffs3_comp[i][k] = coeffs3_comp[i][k].astype(dtype) + coeffs3_comp[i][k], coeffs3_comp_qdat[i][k] = dwt_quantize_coeffs(coeffs3_comp[i][k], quantization_dtype=comp_type) + + mask = np.abs(coeffs3_comp[i][k]) == 0# < threshold + + if not loga: + coeffs3_comp[i][k][mask] = 0 + zeros += np.sum(mask) + nonzeros += np.sum(~mask) + + print("number of zeros:", zeros, "nonzeros:", nonzeros) + + + rle_bytes = 0 + rle0 =rle(coeffs3_comp[0]) + # print() + # print("len rle0:",len(rle0),"manual calculation of RLE bytes for first:", len(rle0)*(rle_type.itemsize+comp_type.itemsize),"sys bytes:", sys.getsizeof(rle0)) + + for i,a in enumerate(coeffs3_comp): + if(type(a) == type(np.ndarray(1))): + rle0 = rle(coeffs3_comp[i]) + morebytes = len(rle0)*(rle_type.itemsize+comp_type.itemsize) + # print('rle bytes added: ', morebytes, 'for i',i) + rle_bytes += morebytes + coeffs3_comp[i] = dwt_dequantize_coeffs(coeffs3_comp[i], quantization_dtype=comp_type, qdat_tuple=coeffs3_comp_qdat[i]) + else: + for k,v in a.items(): + rle0 = rle(coeffs3_comp[i][k]) + morebytes = len(rle0)*(rle_type.itemsize+comp_type.itemsize) + # print('rle bytes added: ', morebytes, 'for i,k', i,k) + rle_bytes += morebytes + coeffs3_comp[i][k] = dwt_dequantize_coeffs(coeffs3_comp[i][k], quantization_dtype=comp_type, qdat_tuple=coeffs3_comp_qdat[i][k]) -# DWT + vdf_rec = pywt.waverecn(coeffs3_comp,wavelet=wavelet)*norm + # print(np.min(vdf_rec),np.max(vdf_rec)) + if loga: + vdf_rec = 10**vdf_rec.astype(np.float64) - offset + + # print(np.min(vdf_rec),np.max(vdf_rec)) + vdf_bytes_inflated = np.prod(vdf_3d.shape)*4 + print('Bytes in RLE-encoded coefficients:', rle_bytes, 'bytes in vdf (inflated):', vdf_bytes_inflated, 'bytes in sparse VDF', true_mem) + # print('vdf_rec',vdf_rec) + print('Nonzero bytes in coefficients', nonzeros*comp_type.itemsize) + + # compression = np.prod(vdf_3d.shape)/nonzeros + # if comp_type is not None: + # compression *= dtype0.itemsize/comp_type.itemsize + + compression = rle_bytes/vdf_bytes_inflated + + # volume_compressed = 0 + # for k,v in coeffs3_compress.items(): + # volume_compressed += sys.getsizeof(v) + # volume_orig = 0 + # for k,v in coeffs3.items(): + # volume_orig += sys.getsizeof(v) + # compression = volume_orig/volume_compressed + + print("compression (naive):", compression) + print("compression (true):", rle_bytes/true_bytes) + + # print(np.min(vdf_rec),np.max(vdf_rec)) + # if ~loga: + # project_tools.plot_vdfs(vdf,vdf_rec.astype(dtype0)) + # project_tools.print_comparison_stats(vdf,vdf_rec.astype(dtype0)) + + return cid, np.array(vdf_rec, dtype=np.float32),rle_bytes/true_bytes + + +# DWT-old def reconstruct_cid_dwt(f, cid,sparsity): import pywt max_indexes, vdf,len = vdf_extract.extract(f, cid,sparsity) @@ -328,10 +592,12 @@ def reconstruct_cid_dwt(f, cid,sparsity): # DCT -def reconstruct_cid_dct(f, cid,sparsity): +def reconstruct_cid_dct(f, cid,sparsity, blocksize = 8, keep_n = 4): from scipy.fft import dctn, idctn - blocksize = 8 - keep_n = 4 + # blocksize = 8 + # keep_n = 4 + assert keep_n > 0 + assert keep_n <= blocksize max_indexes, vdf,len = vdf_extract.extract(f, cid,sparsity) nx, ny, nz = np.shape(vdf) assert nx == ny == nz @@ -378,6 +644,70 @@ def reconstruct_cid_dct(f, cid,sparsity): sparsify(final_vdf,sparsity) return cid, np.array(final_vdf, dtype=np.float32),1 +# DCT +# def reconstruct_cid_dct_sparse(f, cid, sparsity, keep_n = 2): +# from scipy.fft import dctn, idctn +# WID = f.get_velocity_block_size() +# WID3 = WID**3 +# assert keep_n > 0 +# assert keep_n <= WID +# max_indexes, vdf,len = vdf_extract.extract(f, cid,sparsity) +# vdata = f.read_velocity_cells(cid) +# blocks = np.array(list(vdata.keys())).reshape((-1,WID3)) +# blockdata_all = np.array(list(vdata.values())).reshape((-1,WID3)) +# blockdata_new = np.zeros_like(blockdata_all) +# coefficients = np.zeros_like((blockdata_all)) + +# blockids = blocks[:,0]//WID + +# nx, ny, nz = np.shape(vdf) +# assert nx == ny == nz +# orig_shape = vdf.shape +# vdf[np.isnan(vdf)] = 0 + +# paddings = (np.ceil(np.array(vdf.shape)/8)).astype(int)*8 - vdf.shape +# paddings = ((0,paddings[0]),(0,paddings[1]),(0,paddings[2])) +# vdf = np.pad(vdf, paddings) + +# for i, blockdata in enumerate(blockdata_all): + + +# for i in range(0,vdf.shape[0], WID): +# for j in range(0, vdf.shape[1], WID): +# for k in range(0, vdf.shape[2], WID): +# coefficients[i:i+WID,j:j+WID, k:k+WID] = dctn(vdf[i:i+WID,j:j+WID, k:k+WID]) + +# zeroed = np.zeros_like(block_data) +# for i in range(keep_n): +# for j in range(keep_n): +# for k in range(keep_n): +# zeroed[i::WID,j::WID,k::WID] = block_data[i::WID,j::WID,k::WID] + + +# volume_compressed = np.prod(keep_n*np.ceil(np.array(vdf.shape)/8)) +# volume_orig = np.prod(vdf.shape) +# compression = volume_orig/volume_compressed + +# vdf_rec = np.zeros_like(vdf) +# for i in range(0,vdf.shape[0], WID): +# for j in range(0, vdf.shape[1], WID): +# for k in range(0, vdf.shape[2], WID): +# vdf_rec[i:i+WID,j:j+WID, k:k+WID] = idctn(zeroed[i:i+WID,j:j+WID, k:k+WID]) + +# reconstructed_vdf = vdf_rec[0:orig_shape[0],0:orig_shape[1],0:orig_shape[2]] +# reconstructed_vdf= np.array(reconstructed_vdf,dtype=np.double) +# reconstructed_vdf= np.reshape(reconstructed_vdf,orig_shape,order='C') +# mesh = f.get_velocity_mesh_size() +# final_vdf = np.zeros((int(4 * mesh[0]), int(4 * mesh[1]), int(4 * mesh[2]))) +# final_vdf[ +# max_indexes[0] - len : max_indexes[0] + len, +# max_indexes[1] - len : max_indexes[1] + len, +# max_indexes[2] - len : max_indexes[2] + len, +# ] = reconstructed_vdf +# sparsify(final_vdf,sparsity) +# return cid, np.array(final_vdf, dtype=np.float32),1 + + def reconstruct_cid_vqvae(f, cid,sparsity): import torch import torch.nn as nn diff --git a/src/assessment/vdf_extract.py b/src/assessment/vdf_extract.py index 1b7df3a..8a8c06d 100644 --- a/src/assessment/vdf_extract.py +++ b/src/assessment/vdf_extract.py @@ -1,6 +1,6 @@ #some math to calculate the bulk velocity and its location in the VDF space def get_bulk_v(vdf,f,sparse=1e-16): - from scipy.integrate import simps + from scipy.integrate import simpson as simps import numpy as np mesh=f.get_velocity_mesh_extent() sz=f.get_velocity_mesh_size() @@ -9,10 +9,10 @@ def get_bulk_v(vdf,f,sparse=1e-16): v_y = np.linspace(mesh[1], mesh[4], int(wid*sz[1])) v_z = np.linspace(mesh[2], mesh[5], int(wid*sz[2])) Vx, Vy, Vz = np.meshgrid(v_x, v_y, v_z, indexing='ij') - term_1_x = simps(simps(simps(Vx * vdf, v_z), v_y), v_x) - term_1_y = simps(simps(simps(Vy * vdf, v_z), v_y), v_x) - term_1_z = simps(simps(simps(Vz * vdf, v_z), v_y), v_x) - term_2 = simps(simps(simps(vdf, v_z), v_y), v_x) + term_1_x = simps(simps(simps(Vx * vdf, x=v_z), x=v_y), x=v_x) + term_1_y = simps(simps(simps(Vy * vdf, x=v_z), x=v_y), x=v_x) + term_1_z = simps(simps(simps(Vz * vdf, x=v_z), x=v_y), x=v_x) + term_2 = simps(simps(simps(vdf, x=v_z), x=v_y), x=v_x) bulk_v = np.array([term_1_x / term_2,term_1_y / term_2,term_1_z / term_2]) index_x = (np.abs(v_x - bulk_v[0])).argmin() index_y = (np.abs(v_y - bulk_v[1])).argmin() @@ -40,22 +40,16 @@ def pad_array(input,shape,val=0): width.append((left,right)) return np.pad(input,width,mode="constant",constant_values=val) -def extract(f, cid,sparsity=1e-16,restrict_box=True): +def inflate(f, blockids, values, pop="proton"): import numpy as np - assert cid > 0 - - # -- read phase space density - vcells = f.read_velocity_cells(cid) - keys = list(vcells.keys()) - values = list(vcells.values()) - + # -- generate a velocity space size = f.get_velocity_mesh_size() vids = np.arange(4 * 4 * 4 * int(size[0]) * int(size[1]) * int(size[2])) # -- put phase space density into array dist = np.zeros_like(vids, dtype=float) - dist[keys] = values + dist[blockids] = values # -- sort vspace by velocity v = f.get_velocity_cell_coordinates(vids) @@ -77,16 +71,47 @@ def extract(f, cid,sparsity=1e-16,restrict_box=True): dist = dist[k] dist = dist.reshape(4 * int(size[0]), 4 * int(size[1]), 4 * int(size[2])) - vdf = dist + return dist + +def restrict_bbox(f, vdf, sparsity): + import numpy as np + + bulk_v,bulk_v_loc=get_bulk_v(vdf,f,sparsity) + + bbox=get_vdf_bounding_box(vdf,sparsity) + bbox_max_side=np.max([bbox[3]-bbox[0]+1,bbox[4]-bbox[1]+1,bbox[5]-bbox[2]+1]) + boxlen=bbox_max_side//2 + + data = vdf[(bulk_v_loc[0] - boxlen) : (bulk_v_loc[0] + boxlen), (bulk_v_loc[1] - boxlen) : (bulk_v_loc[1] + boxlen), (bulk_v_loc[2] - boxlen) : (bulk_v_loc[2] + boxlen)] + return bulk_v_loc,np.array(data, dtype=np.double),boxlen + + + +def extract(f, cid,sparsity=1e-16,restrict_box=True): + import numpy as np + assert cid > 0 + + # -- read phase space density + vcells = f.read_velocity_cells(cid) + keys = list(vcells.keys()) + values = list(vcells.values()) + + + + vdf = inflate(f, keys, values) + bulk_v,bulk_v_loc=get_bulk_v(vdf,f,sparsity) bbox=get_vdf_bounding_box(vdf,sparsity) bbox_max_side=np.max([bbox[3]-bbox[0]+1,bbox[4]-bbox[1]+1,bbox[5]-bbox[2]+1]) - len=bbox_max_side//2 + boxlen=bbox_max_side//2 if (not restrict_box): - return bulk_v_loc,np.array(vdf, dtype=np.double),len - data = vdf[(bulk_v_loc[0] - len) : (bulk_v_loc[0] + len), (bulk_v_loc[1] - len) : (bulk_v_loc[1] + len), (bulk_v_loc[2] - len) : (bulk_v_loc[2] + len)] + return bulk_v_loc,np.array(vdf, dtype=np.double),boxlen + else: + bulk_v_loc,vdf,boxlen = restrict_bbox(f, vdf, sparsity) + return bulk_v_loc,vdf,boxlen + data = vdf[(bulk_v_loc[0] - boxlen) : (bulk_v_loc[0] + boxlen), (bulk_v_loc[1] - boxlen) : (bulk_v_loc[1] + boxlen), (bulk_v_loc[2] - boxlen) : (bulk_v_loc[2] + boxlen)] # print(f"Extracted VDF shape = {np.shape(data)}") - return bulk_v_loc,np.array(data, dtype=np.double),len + return bulk_v_loc,np.array(data, dtype=np.double),boxlen if __name__ == "__main__": diff --git a/src/assessment/vdf_replace.py b/src/assessment/vdf_replace.py index a10e0f8..e6a74ae 100644 --- a/src/assessment/vdf_replace.py +++ b/src/assessment/vdf_replace.py @@ -298,16 +298,84 @@ def add_reconstructed_velocity_space_mpi(dst, cellid, blocks_and_values, bpc): return +def blockdata_to_vdfmap(f, blocks, blockdata): + # Construct velocity cells: + WID = f.get_WID() + WID2 = WID*WID + WID3 = WID2*WID + velocity_cell_ids = [] + for kv in range(WID): + for jv in range(WID): + for iv in range(WID): + velocity_cell_ids.append(kv*WID2 + jv*WID + iv) + + array_size = len(blockdata) + velocity_cells = {} + + for i in range(array_size): + velocity_block_id = blocks[i] #data_block_ids[i] + avgIndex = 0 + avgs =blockdata[i] + + for j in velocity_cell_ids + WID3*velocity_block_id: + velocity_cells[(int)(j)] = avgs[avgIndex] + avgIndex = avgIndex + 1 + + return list(velocity_cells.keys()), list(velocity_cells.values()) + +def get_blocks_to_keep(f, blocks, block_mins, sparsity, pop = "proton"): + + block_keep = np.zeros(blocks.shape, dtype=bool) + block_keep_n = np.zeros(blocks.shape, dtype=bool) + extents = f.get_velocity_mesh_extent(pop=pop) + size = f.get_velocity_mesh_size(pop=pop) + dv = f.get_velocity_mesh_dv(pop=pop) + dv = dv[0] + WID = f.get_WID() + + for blockid in blocks: + block_coords = f.get_velocity_block_coordinates(blockid) + + rx = int(np.floor((block_coords[0] - extents[0]) / (dv*WID))) + ry = int(np.floor((block_coords[1] - extents[1]) / (dv*WID))) + rz = int(np.floor((block_coords[2] - extents[2]) / (dv*WID))) + + + if block_mins[blockid] >= sparsity: + block_keep[blockid] = True + + # Expand to the neighborhood of ns velocity blocks - ns should at least be 1 + ns = 1 + ds = list(range(-ns,ns+1)) + + neighbors = np.array([[rx+dx,ry+dy,rz+dz] for dx in ds for dy in ds for dz in ds if not (dx == 0 and dy == 0 and dz == 0)]) + for d in [0,1,2]: + neighbors[:,d] = np.clip(neighbors[:,d],0,size[d]-1) -def reconstruct_vdf(f, cid,sparsity ,reconstruction_method): + bIX = neighbors[:,0] + bIY = neighbors[:,1] + bIZ = neighbors[:,2] + + nIDs = bIZ + bIY*size[2] + bIX*size[2]*size[1] + for n in nIDs: + block_keep_n[n] = True + + # for blockid in blocks[~block_keep]: + # block_data[blockid,:] = 0 + # print(np.sum(block_keep), np.sum(block_keep_n), np.sum(block_keep | block_keep_n)) + blocks_keep = block_keep | block_keep_n + return blocks_keep + + +def reconstruct_vdf(f, cid, sparsity, reconstruction_method, sparsify = False, method_kwargs = {}): """ f: VlsvReader Object len : boxed limits of vdfs that get reconstructed cid: the cellid to reconstruct - reconstruction_method: function that performs the reconstruction + reconstruction_method: function that performs the reconstruction (e.g. cm.reconstruct_cid_zfp) """ print(f"Extracting CellID {cid}") - _, reconstructed,cm_ratio = reconstruction_method(f, cid,sparsity) + _, reconstructed,cm_ratio = reconstruction_method(f, cid,sparsity, **method_kwargs) extents = f.get_velocity_mesh_extent() size = f.get_velocity_mesh_size() dv = f.get_velocity_mesh_dv() @@ -318,6 +386,7 @@ def reconstruct_vdf(f, cid,sparsity ,reconstruction_method): blocks = np.arange(0, np.prod(size), dtype=np.int32) reconstructed = np.array(reconstructed, dtype=np.float32) block_data = np.zeros((np.prod(size), np.power(WID, 3)), dtype=np.float32) + block_mins = np.full(blocks.shape,fill_value=np.inf, dtype=np.float32) for blockid in blocks: block_coords = f.get_velocity_block_coordinates(blockid) rx = int(np.floor((block_coords[0] - extents[0]) / dv)) @@ -328,8 +397,18 @@ def reconstruct_vdf(f, cid,sparsity ,reconstruction_method): for bz in range(0, WID): localid = bz * WID**2 + by * WID + bx block_data[blockid, localid] = reconstructed[rz + bz, ry + by, rx + bx] + block_mins[blockid] = np.min(block_data[blockid, :]) + + if sparsify: + blocks_keep = get_blocks_to_keep(f, blocks, block_mins, sparsity) + blocks = blocks[blocks_keep] + block_data = block_data[blocks_keep,:] + else: + pass + + print("Blocks reconstructed", len(blocks)) - return blocks, block_data , cm_ratio + return blocks, block_data, cm_ratio def reconstruct_vdf_debug(f, cid,sparsity ,reconstruction_method): diff --git a/src/tools.py b/src/tools.py index 5ac7327..ed5e109 100644 --- a/src/tools.py +++ b/src/tools.py @@ -66,8 +66,8 @@ def plot_vdfs(a, b, vdf_vmin=1e-16,save=False,output_name=None): slicer2d = np.s_[:, :, nz // 2] slicer1d = np.s_[:, ny // 2, nz // 2] - im1 = ax[0, 0].imshow(a[slicer2d], norm=colors.LogNorm(vmin=vdf_vmin)) - im2 = ax[0, 1].imshow(b[slicer2d], norm=colors.LogNorm(vmin=vdf_vmin)) + im1 = ax[0, 0].imshow(a[slicer2d], norm=colors.LogNorm(vmin=vdf_vmin),interpolation_stage="rgba") + im2 = ax[0, 1].imshow(b[slicer2d], norm=colors.LogNorm(vmin=vdf_vmin),interpolation_stage="rgba") ax[1, 0].semilogy(b[slicer1d], label="Reconstructed") ax[1, 0].semilogy(a[slicer1d], label="Original") ax2 = ax[1, 0].twinx()