Skip to content
This repository was archived by the owner on Sep 29, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
340 changes: 335 additions & 5 deletions src/assessment/compression_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
VDF_TYPE_SIZE=4

def sparsify(vdf,sparsity):
vdf[vdf<sparsity]=0.0
# vdf[vdf<sparsity]=0.0
return

# MLP with fourier features
Expand Down Expand Up @@ -273,8 +273,272 @@ def reconstruct_cid_gmm(f, cid,sparsity):
sparsify(final_vdf,sparsity)
return cid, np.array(final_vdf, dtype=np.float32),1

#DWT
def reconstruct_cid_dwt_new(f, cid, sparsity):
import pywt

vcells = f.read_velocity_cells(cid)
keys = list(vcells.keys())
values = list(vcells.values())

true_bytes = sys.getsizeof(keys[::4**3])+sys.getsizeof(values)

max_indexes, vdf,len = vdf_extract.extract(f, cid,sparsity)
nx, ny, nz = np.shape(vdf)
threshold = sparsity

def dwt_quantize_coeffs(c, quantization_dtype = None):

if quantization_dtype == None:
return c, [0, 1]

if quantization_dtype.kind == 'u':
# print(c)
# cmax, coffset = np.nanmax(c[np.isfinite(c)]), np.nanmin(c[np.isfinite(c)])
cmax, coffset = np.nanmax(c), np.nanmin(c)
# print(cmax, coffset)

crange = cmax-coffset
dtype_max = np.iinfo(quantization_dtype).max - 1
dtype_min = np.iinfo(quantization_dtype).min
mask = ~np.isfinite(c)
c = (c - coffset)/(crange) * dtype_max
c[mask] = np.iinfo(quantization_dtype).max
c = c.astype(quantization_dtype)

return c, (coffset, crange)
elif quantization_dtype.kind == 'f':
cmax, coffset = np.nanmax(c), np.nanmin(c)
# print(cmax, coffset)

crange = cmax-coffset
dtype_max = np.finfo(quantization_dtype).max
dtype_min = 0

c = (c - coffset)/(crange) * dtype_max
c[~np.isfinite(c)] = 0 #np.finfo(quantization_dtype).max

return c.astype(quantization_dtype), (coffset, crange)

def dwt_dequantize_coeffs(c, quantization_dtype = None, qdat_tuple = (0,1)):
if quantization_dtype == None:
return c
c = c.astype(np.float32)
if quantization_dtype.kind == 'u':
mask = c == np.iinfo(quantization_dtype).max
c = c/(np.iinfo(quantization_dtype).max-1)
c = c*qdat_tuple[1] + qdat_tuple[0]
c[mask] = qdat_tuple[0]
elif quantization_dtype.kind == 'f':
c = c/(np.finfo(quantization_dtype).max)
c = c*qdat_tuple[1] + qdat_tuple[0]
return c

def dwt_threshold_coeffs(c, threshold = 1e-16, high = 1e-15):

def soft_threshold(x, threshold = 1e-16):
return np.sign(x) * np.maximum(np.abs(x)-threshold, 0)

def hard_threshold(x, threshold = 1e-16):
x[x < threshold] = 0
return x

c = soft_threshold(c, threshold=threshold)
# print('value_low', threshold, 'value_high', high)
# c = pywt.threshold_firm(c, value_low=threshold, value_high=high)
# c = hard_threshold(c, threshold=threshold)

return c

#run-length encoding
rle_type = np.dtype(np.uint32)
def rle(c):
# print(c.shape, c.dtype)
rle_list_of_tuples = []
runcount = 0
v0 = 0
init = True
for v in np.nditer(c):
if v == v0:
runcount += 1
else:
if not init:
if runcount > np.iinfo(rle_type).max:
print("Error: runcount overflow")
rle_list_of_tuples.append((runcount, v))
v0 = v
runcount = 0

init = False

return rle_list_of_tuples

# print(pywt.wavelist(kind='discrete'))

vdf_3d = vdf.copy().astype(np.float64)

loga = False
offset = np.float64(1)
if loga:
vdf_3d[vdf_3d<0]=0
# print(np.nanmax(vdf_3d))
vdf_3d = np.log10(vdf_3d + offset)
# print(np.nanmax(vdf_3d))
if loga:
norm = np.nanmax(vdf_3d)
else:
norm = np.nanmax(vdf_3d)
print("norm", norm)
# norm = 1
vdf_3d /= norm
orig_shape = vdf_3d.shape
if loga:
pass
# vdf_3d[np.isinf(vdf_3d)] = -np.inf
# vdf_3d[np.isnan(vdf_3d)] = 0

comp_type = np.dtype(np.uint16)
# comp_type = None
# quant = np.iinfo(comp_type).max/3




# print(np.nanmax(vdf_3d),np.nanmin(vdf_3d[np.isfinite(vdf_3d)]), norm)

threshold = np.float64(1e-16)
wavelet = 'db9'#'bior1.3'
# wavelet = 'bior1.3'
# wavelet = 'db9'

dwtn_mlevel = pywt.dwtn_max_level(vdf_3d.shape,wavelet)
level_delta = 0
discard_level = dwtn_mlevel+1
print("Decomposing to ", max(dwtn_mlevel-level_delta,0), "levels out of ", dwtn_mlevel, 'discarding at level', discard_level )
coeffs3 = pywt.wavedecn(vdf_3d,wavelet=wavelet, level = max(dwtn_mlevel-level_delta,0))

# print(coeffs3)
coeffs3_comp = coeffs3.copy()

print(type(coeffs3_comp))

zeros = 0
nonzeros = 0


dtype0 = np.dtype(np.float32)
dtype = np.dtype(np.float32)
if loga:
threshold = np.log10(threshold+offset)/norm
else:
threshold = threshold/norm
print(dtype0.itemsize)

coeffs3_comp_qdat = {}

for i,a in enumerate(coeffs3_comp):

if(type(a) == type(np.ndarray(1))):
# print(type(a), a.dtype)
# print(a)

# print(coeffs3_comp[i])
if loga:
coeffs3_comp[i] = dwt_threshold_coeffs(coeffs3_comp[i], 1e-17,1e-17)
else:
coeffs3_comp[i] = dwt_threshold_coeffs(coeffs3_comp[i], threshold=threshold, high=threshold*1000)
# coeffs3_comp[i] = coeffs3_comp[i].astype(dtype)
coeffs3_comp[i], coeffs3_comp_qdat[i] = dwt_quantize_coeffs(coeffs3_comp[i], comp_type)
# print(coeffs3_comp[i])
mask = np.abs(coeffs3_comp[i]) == 0#< threshold
zeros += np.sum(mask)
nonzeros += np.sum(~mask)
# nonzeros += np.prod(a.shape)
if not loga:
coeffs3_comp[i][mask] = 0
else:
coeffs3_comp_qdat.setdefault(i,{})
print('dict at level',i)
for k,v in a.items():
if i >= discard_level:
print('discarding at level', i)
coeffs3_comp[i][k] = np.zeros_like(coeffs3_comp[i][k])
else:
if loga:
coeffs3_comp[i][k] = dwt_threshold_coeffs(coeffs3_comp[i][k], threshold = 1e-17, high = 1e-17)
else:
coeffs3_comp[i][k] = dwt_threshold_coeffs(coeffs3_comp[i][k], threshold=threshold, high=threshold*1000)

# coeffs3_comp[i][k] = coeffs3_comp[i][k].astype(dtype)
coeffs3_comp[i][k], coeffs3_comp_qdat[i][k] = dwt_quantize_coeffs(coeffs3_comp[i][k], quantization_dtype=comp_type)

mask = np.abs(coeffs3_comp[i][k]) == 0# < threshold

if not loga:
coeffs3_comp[i][k][mask] = 0
zeros += np.sum(mask)
nonzeros += np.sum(~mask)

print("number of zeros:", zeros, "nonzeros:", nonzeros)


rle_bytes = 0
rle0 =rle(coeffs3_comp[0])
# print()
# print("len rle0:",len(rle0),"manual calculation of RLE bytes for first:", len(rle0)*(rle_type.itemsize+comp_type.itemsize),"sys bytes:", sys.getsizeof(rle0))

for i,a in enumerate(coeffs3_comp):
if(type(a) == type(np.ndarray(1))):
rle0 = rle(coeffs3_comp[i])
morebytes = len(rle0)*(rle_type.itemsize+comp_type.itemsize)
# print('rle bytes added: ', morebytes, 'for i',i)
rle_bytes += morebytes
coeffs3_comp[i] = dwt_dequantize_coeffs(coeffs3_comp[i], quantization_dtype=comp_type, qdat_tuple=coeffs3_comp_qdat[i])
else:
for k,v in a.items():
rle0 = rle(coeffs3_comp[i][k])
morebytes = len(rle0)*(rle_type.itemsize+comp_type.itemsize)
# print('rle bytes added: ', morebytes, 'for i,k', i,k)
rle_bytes += morebytes
coeffs3_comp[i][k] = dwt_dequantize_coeffs(coeffs3_comp[i][k], quantization_dtype=comp_type, qdat_tuple=coeffs3_comp_qdat[i][k])

# DWT
vdf_rec = pywt.waverecn(coeffs3_comp,wavelet=wavelet)*norm
# print(np.min(vdf_rec),np.max(vdf_rec))
if loga:
vdf_rec = 10**vdf_rec.astype(np.float64) - offset

# print(np.min(vdf_rec),np.max(vdf_rec))
vdf_bytes_inflated = np.prod(vdf_3d.shape)*4
print('Bytes in RLE-encoded coefficients:', rle_bytes, 'bytes in vdf (inflated):', vdf_bytes_inflated, 'bytes in sparse VDF', true_mem)
# print('vdf_rec',vdf_rec)
print('Nonzero bytes in coefficients', nonzeros*comp_type.itemsize)

# compression = np.prod(vdf_3d.shape)/nonzeros
# if comp_type is not None:
# compression *= dtype0.itemsize/comp_type.itemsize

compression = rle_bytes/vdf_bytes_inflated

# volume_compressed = 0
# for k,v in coeffs3_compress.items():
# volume_compressed += sys.getsizeof(v)
# volume_orig = 0
# for k,v in coeffs3.items():
# volume_orig += sys.getsizeof(v)
# compression = volume_orig/volume_compressed

print("compression (naive):", compression)
print("compression (true):", rle_bytes/true_bytes)

# print(np.min(vdf_rec),np.max(vdf_rec))
# if ~loga:
# project_tools.plot_vdfs(vdf,vdf_rec.astype(dtype0))
# project_tools.print_comparison_stats(vdf,vdf_rec.astype(dtype0))

return cid, np.array(vdf_rec, dtype=np.float32),rle_bytes/true_bytes


# DWT-old
def reconstruct_cid_dwt(f, cid,sparsity):
import pywt
max_indexes, vdf,len = vdf_extract.extract(f, cid,sparsity)
Expand Down Expand Up @@ -328,10 +592,12 @@ def reconstruct_cid_dwt(f, cid,sparsity):


# DCT
def reconstruct_cid_dct(f, cid,sparsity):
def reconstruct_cid_dct(f, cid,sparsity, blocksize = 8, keep_n = 4):
from scipy.fft import dctn, idctn
blocksize = 8
keep_n = 4
# blocksize = 8
# keep_n = 4
assert keep_n > 0
assert keep_n <= blocksize
max_indexes, vdf,len = vdf_extract.extract(f, cid,sparsity)
nx, ny, nz = np.shape(vdf)
assert nx == ny == nz
Expand Down Expand Up @@ -378,6 +644,70 @@ def reconstruct_cid_dct(f, cid,sparsity):
sparsify(final_vdf,sparsity)
return cid, np.array(final_vdf, dtype=np.float32),1

# DCT
# def reconstruct_cid_dct_sparse(f, cid, sparsity, keep_n = 2):
# from scipy.fft import dctn, idctn
# WID = f.get_velocity_block_size()
# WID3 = WID**3
# assert keep_n > 0
# assert keep_n <= WID
# max_indexes, vdf,len = vdf_extract.extract(f, cid,sparsity)
# vdata = f.read_velocity_cells(cid)
# blocks = np.array(list(vdata.keys())).reshape((-1,WID3))
# blockdata_all = np.array(list(vdata.values())).reshape((-1,WID3))
# blockdata_new = np.zeros_like(blockdata_all)
# coefficients = np.zeros_like((blockdata_all))

# blockids = blocks[:,0]//WID

# nx, ny, nz = np.shape(vdf)
# assert nx == ny == nz
# orig_shape = vdf.shape
# vdf[np.isnan(vdf)] = 0

# paddings = (np.ceil(np.array(vdf.shape)/8)).astype(int)*8 - vdf.shape
# paddings = ((0,paddings[0]),(0,paddings[1]),(0,paddings[2]))
# vdf = np.pad(vdf, paddings)

# for i, blockdata in enumerate(blockdata_all):


# for i in range(0,vdf.shape[0], WID):
# for j in range(0, vdf.shape[1], WID):
# for k in range(0, vdf.shape[2], WID):
# coefficients[i:i+WID,j:j+WID, k:k+WID] = dctn(vdf[i:i+WID,j:j+WID, k:k+WID])

# zeroed = np.zeros_like(block_data)
# for i in range(keep_n):
# for j in range(keep_n):
# for k in range(keep_n):
# zeroed[i::WID,j::WID,k::WID] = block_data[i::WID,j::WID,k::WID]


# volume_compressed = np.prod(keep_n*np.ceil(np.array(vdf.shape)/8))
# volume_orig = np.prod(vdf.shape)
# compression = volume_orig/volume_compressed

# vdf_rec = np.zeros_like(vdf)
# for i in range(0,vdf.shape[0], WID):
# for j in range(0, vdf.shape[1], WID):
# for k in range(0, vdf.shape[2], WID):
# vdf_rec[i:i+WID,j:j+WID, k:k+WID] = idctn(zeroed[i:i+WID,j:j+WID, k:k+WID])

# reconstructed_vdf = vdf_rec[0:orig_shape[0],0:orig_shape[1],0:orig_shape[2]]
# reconstructed_vdf= np.array(reconstructed_vdf,dtype=np.double)
# reconstructed_vdf= np.reshape(reconstructed_vdf,orig_shape,order='C')
# mesh = f.get_velocity_mesh_size()
# final_vdf = np.zeros((int(4 * mesh[0]), int(4 * mesh[1]), int(4 * mesh[2])))
# final_vdf[
# max_indexes[0] - len : max_indexes[0] + len,
# max_indexes[1] - len : max_indexes[1] + len,
# max_indexes[2] - len : max_indexes[2] + len,
# ] = reconstructed_vdf
# sparsify(final_vdf,sparsity)
# return cid, np.array(final_vdf, dtype=np.float32),1


def reconstruct_cid_vqvae(f, cid,sparsity):
import torch
import torch.nn as nn
Expand Down
Loading