diff --git a/sfft/CustomizedPacket.py b/sfft/CustomizedPacket.py index e4cd3de..fcd011e 100644 --- a/sfft/CustomizedPacket.py +++ b/sfft/CustomizedPacket.py @@ -1,3 +1,7 @@ +import cupy as cp +import tracemalloc +import logging +import sys import time import numpy as np import os.path as pa @@ -13,7 +17,7 @@ class Customized_Packet: @staticmethod def CP(FITS_REF, FITS_SCI, FITS_mREF, FITS_mSCI, ForceConv, GKerHW, \ FITS_DIFF=None, FITS_Solution=None, KerPolyOrder=2, BGPolyOrder=2, ConstPhotRatio=True, \ - BACKEND_4SUBTRACT='Cupy', CUDA_DEVICE_4SUBTRACT='0', NUM_CPU_THREADS_4SUBTRACT=8, VERBOSE_LEVEL=2): + BACKEND_4SUBTRACT='Cupy', CUDA_DEVICE_4SUBTRACT='0', NUM_CPU_THREADS_4SUBTRACT=8, VERBOSE_LEVEL=2, logger=None): """ * Parameters for Customized SFFT @@ -85,11 +89,20 @@ def CP(FITS_REF, FITS_SCI, FITS_mREF, FITS_mSCI, ForceConv, GKerHW, \ """ + # Configure logger (Rob) + _logger = logging.getLogger(f'sfft.CustomizedPacket.CP') + if not _logger.hasHandlers(): + log_out = logging.StreamHandler(sys.stderr) + formatter = logging.Formatter(f'[%(asctime)s - %(levelname)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + log_out.setFormatter(formatter) + _logger.addHandler(log_out) + _logger.setLevel(logging.DEBUG) # ERROR, WARNING, INFO, or DEBUG (in that order by increasing detail) + # * Read input images - PixA_REF = fits.getdata(FITS_REF, ext=0).T - PixA_SCI = fits.getdata(FITS_SCI, ext=0).T - PixA_mREF = fits.getdata(FITS_mREF, ext=0).T - PixA_mSCI = fits.getdata(FITS_mSCI, ext=0).T + PixA_REF = fits.getdata(FITS_REF, memmap=False, ext=0).T + PixA_SCI = fits.getdata(FITS_SCI, memmap=False, ext=0).T + PixA_mREF = fits.getdata(FITS_mREF, memmap=False, ext=0).T + PixA_mSCI = fits.getdata(FITS_mSCI, memmap=False, ext=0).T if not PixA_REF.flags['C_CONTIGUOUS']: PixA_REF = np.ascontiguousarray(PixA_REF, np.float64) @@ -131,15 +144,29 @@ def CP(FITS_REF, FITS_SCI, FITS_mREF, FITS_mSCI, ForceConv, GKerHW, \ print('MeLOn CheckPoint: TRIGGER Function Compilations of SFFT-SUBTRACTION!') Tcomp_start = time.time() + + ### I ADDED DEBUG MEM TRACING STATEMENTS + # tracemalloc.start() SFFTConfig = SingleSFFTConfigure.SSC(NX=PixA_REF.shape[0], NY=PixA_REF.shape[1], KerHW=KerHW, \ KerPolyOrder=KerPolyOrder, BGPolyOrder=BGPolyOrder, ConstPhotRatio=ConstPhotRatio, \ BACKEND_4SUBTRACT=BACKEND_4SUBTRACT, NUM_CPU_THREADS_4SUBTRACT=NUM_CPU_THREADS_4SUBTRACT, \ VERBOSE_LEVEL=VERBOSE_LEVEL) + # size, peak = tracemalloc.get_traced_memory() + # logger.debug(f'MEMORY IN CUSTOMIZEDPACKET FROM SFFTCONFIG size={size}, peak={peak}') + # tracemalloc.clear_traces() + # tracemalloc.stop() + if VERBOSE_LEVEL in [1, 2]: _message = 'Function Compilations of SFFT-SUBTRACTION TAKES [%.3f s]' %(time.time() - Tcomp_start) print('\nMeLOn Report: %s' %_message) + mempool = cp.get_default_memory_pool() + pinned_mempool = cp.get_default_pinned_memory_pool() + # print('MEMPOOL USED BYTES', mempool.used_bytes()) + # print('MEMPOOL TOTAL BYTES', mempool.total_bytes()) + # print('PINNED MEMPOOL FREE BLOCKS', pinned_mempool.n_free_blocks()) + # * Perform SFFT Subtraction if ConvdSide == 'REF': PixA_mI, PixA_mJ = PixA_mREF, PixA_mSCI @@ -160,11 +187,26 @@ def CP(FITS_REF, FITS_SCI, FITS_mREF, FITS_mSCI, ForceConv, GKerHW, \ if VERBOSE_LEVEL in [0, 1, 2]: print('MeLOn CheckPoint: TRIGGER SFFT-SUBTRACTION!') + # print('MEMPOOL USED BYTES', mempool.used_bytes()) + # print('MEMPOOL TOTAL BYTES', mempool.total_bytes()) + # print('PINNED MEMPOOL FREE BLOCKS', pinned_mempool.n_free_blocks()) + Tsub_start = time.time() + + # tracemalloc.start() + _tmp = GeneralSFFTSubtract.GSS(PixA_I=PixA_I, PixA_J=PixA_J, PixA_mI=PixA_mI, PixA_mJ=PixA_mJ, \ SFFTConfig=SFFTConfig, ContamMask_I=None, BACKEND_4SUBTRACT=BACKEND_4SUBTRACT, \ NUM_CPU_THREADS_4SUBTRACT=NUM_CPU_THREADS_4SUBTRACT, VERBOSE_LEVEL=VERBOSE_LEVEL) + # print('MEMPOOL USED BYTES', mempool.used_bytes()) + # print('MEMPOOL TOTAL BYTES', mempool.total_bytes()) + # print('PINNED MEMPOOL FREE BLOCKS', pinned_mempool.n_free_blocks()) + + # size, peak = tracemalloc.get_traced_memory() + # logger.debug(f'MEMORY IN CUSTOMIZEDPACKET FROM GENERALSFFTSUBTRACT size={size}, peak={peak}') + # tracemalloc.clear_traces() + Solution, PixA_DIFF = _tmp[:2] if VERBOSE_LEVEL in [1, 2]: _message = 'SFFT-SUBTRACTION TAKES [%.3f s]' %(time.time() - Tsub_start) diff --git a/sfft/utils/ConvKernelConvertion.py b/sfft/utils/ConvKernelConvertion.py index fe59b51..832ea83 100644 --- a/sfft/utils/ConvKernelConvertion.py +++ b/sfft/utils/ConvKernelConvertion.py @@ -1,4 +1,5 @@ import numpy as np +import cupy __author__ = "Lei Hu " __version__ = "v1.0" @@ -17,16 +18,16 @@ def CSZ(ConvKernel, N0, N1): L0, L1 = ConvKernel.shape w0, w1 = (L0-1) // 2, (L1-1) // 2 pd0, pd1 = N0 - L0, N1 - L1 - TailZP = np.lib.pad(ConvKernel, ((0, pd0), (0, pd1)), 'constant', constant_values=(0, 0)) # Tail-Zero-Padding - KIMG_CSZ = np.roll(np.roll(TailZP, -w0, axis=0), -w1, axis=1) # Circular-Shift + TailZP = cupy.pad(ConvKernel, ((0, pd0), (0, pd1)), 'constant', constant_values=(0, 0)) # Tail-Zero-Padding + KIMG_CSZ = cupy.roll(np.roll(TailZP, -w0, axis=0), -w1, axis=1) # Circular-Shift return KIMG_CSZ def iCSZ(KIMG, L0, L1): N0, N1 = KIMG.shape w0, w1 = (L0-1) // 2, (L1-1) // 2 - KIMG_iCSZ = np.roll(np.roll(KIMG, w1, axis=1), w0, axis=0) # inverse Circular-Shift + KIMG_iCSZ = cupy.roll(cupy.roll(KIMG, w1, axis=1), w0, axis=0) # inverse Circular-Shift ConvKernel = KIMG_iCSZ[:L0, :L1] # Tail-Truncation - lost_weight = 1.0 - np.sum(np.abs(ConvKernel)) / np.sum(np.abs(KIMG_iCSZ)) + lost_weight = 1.0 - cupy.sum(cupy.abs(ConvKernel)) / cupy.sum(cupy.abs(KIMG_iCSZ)) print('MeLOn CheckPoint: Tail-Truncation Lost-Weight [%.4f %s] (Absolute Percentage Error) ' %(lost_weight*100, '%')) return ConvKernel diff --git a/sfft/utils/CudaResampling.py b/sfft/utils/CudaResampling.py new file mode 100644 index 0000000..20a738b --- /dev/null +++ b/sfft/utils/CudaResampling.py @@ -0,0 +1,410 @@ +import time +import warnings +import cupy as cp +import numpy as np +from math import floor +from astropy.wcs import WCS, FITSFixedWarning +from sfft.utils.CupyWCSTransform import Cupy_WCS_Transform + +__last_update__ = "2024-09-19" +__author__ = "Lei Hu " + +class Cuda_Resampling: + def __init__(self, RESAMP_METHOD="BILINEAR", VERBOSE_LEVEL=2): + """ + Image resampling using CUDA. + """ + self.RESAMP_METHOD = RESAMP_METHOD + self.VERBOSE_LEVEL = VERBOSE_LEVEL + return None + + def projection_cd(self, hdr_obj, hdr_targ, CDKEY="CD"): + """Mapping the target pixel centers to the object frame using Cupy for CD WCS (NO distortion)""" + NTX = int(hdr_targ["NAXIS1"]) + NTY = int(hdr_targ["NAXIS2"]) + + XX_targ_GPU, YY_targ_GPU = cp.meshgrid( + cp.arange(0, NTX) + 1., + cp.arange(0, NTY) + 1., + indexing='ij' + ) + + CWT = Cupy_WCS_Transform() + # * perform CD transformation through target WCS + if True: + # read header [target] + KEYDICT, CD_GPU = CWT.read_cd_wcs(hdr_wcs=hdr_targ, CDKEY=CDKEY) + CRPIX1_targ, CRPIX2_targ = KEYDICT["CRPIX1"], KEYDICT["CRPIX2"] + + # relative to reference point [target] + u_GPU, v_GPU = XX_targ_GPU.flatten() - CRPIX1_targ, YY_targ_GPU.flatten() - CRPIX2_targ + + # CD transformation [target] + x_GPU, y_GPU = CWT.cd_transform(IMAGE_X_GPU=u_GPU, IMAGE_Y_GPU=v_GPU, CD_GPU=CD_GPU) + + # * perform CD^-1 transformation through object WCS + if True: + # read header [object] + KEYDICT, CD_GPU = CWT.read_cd_wcs(hdr_wcs=hdr_obj, CDKEY=CDKEY) + CRPIX1_obj, CRPIX2_obj = KEYDICT["CRPIX1"], KEYDICT["CRPIX2"] + + # relative to reference point, consider offset between the WCS reference points [object] + # WARNING: the offset calculations may be wrong when the WCS reference points are very close to the N/S Poles! + x_GPU += (CRPIX1_targ - CRPIX1_obj + 180.0) % 360.0 - 180.0 # from using target reference point to object + y_GPU += CRPIX2_targ - CRPIX2_obj # ~ + + # inverse CD transformation [object] + u_GPU, v_GPU = CWT.cd_transform_inv(WORLD_X_GPU=x_GPU, WORLD_Y_GPU=y_GPU, CD_GPU=CD_GPU) + + # relative to image origin [object] + XX_proj_GPU = CRPIX1_obj + u_GPU.reshape((NTX, NTY)) + YY_proj_GPU = CRPIX2_obj + v_GPU.reshape((NTX, NTY)) + + return XX_proj_GPU, YY_proj_GPU + + def projection_sip(self, hdr_obj, hdr_targ, Nsamp=1024, RANDOM_SEED=10086): + """Mapping the target pixel centers to the object frame using Cupy for SIP WCS""" + NTX = int(hdr_targ["NAXIS1"]) + NTY = int(hdr_targ["NAXIS2"]) + + XX_targ_GPU, YY_targ_GPU = cp.meshgrid( + cp.arange(0, NTX) + 1., + cp.arange(0, NTY) + 1., + indexing='ij' + ) + + CWT = Cupy_WCS_Transform() + # * perform forward transformation (+CD) through target WCS + if True: + # read header [target] + KEYDICT, CD_GPU, A_SIP_GPU, B_SIP_GPU = CWT.read_sip_wcs(hdr_wcs=hdr_targ) + CRPIX1_targ, CRPIX2_targ = KEYDICT["CRPIX1"], KEYDICT["CRPIX2"] + CRVAL1_targ, CRVAL2_targ = KEYDICT["CRVAL1"], KEYDICT["CRVAL2"] + + # relative to reference point [target] + u_GPU, v_GPU = XX_targ_GPU.flatten() - CRPIX1_targ, YY_targ_GPU.flatten() - CRPIX2_targ + + # forward transformation for target grid of pixel centers [target] + U_GPU, V_GPU = CWT.sip_forward_transform(u_GPU=u_GPU, v_GPU=v_GPU, A_SIP_GPU=A_SIP_GPU, B_SIP_GPU=B_SIP_GPU) + + # plus CD transformation [target] + x_GPU, y_GPU = CWT.cd_transform(IMAGE_X_GPU=U_GPU, IMAGE_Y_GPU=V_GPU, CD_GPU=CD_GPU) + + # * perform backward transformation (+CD^-1) through object WCS + if True: + # read header [object] + KEYDICT, CD_GPU, A_SIP_GPU, B_SIP_GPU = CWT.read_sip_wcs(hdr_wcs=hdr_obj) + CRPIX1_obj, CRPIX2_obj = KEYDICT["CRPIX1"], KEYDICT["CRPIX2"] + CRVAL1_obj, CRVAL2_obj = KEYDICT["CRVAL1"], KEYDICT["CRVAL2"] + + # sampling random coordinates [object] + u_GPU, v_GPU = CWT.random_coord_sampling(N0=KEYDICT["N0"], N1=KEYDICT["N1"], + CRPIX1=CRPIX1_obj, CRPIX2=CRPIX2_obj, Nsamp=Nsamp, RANDOM_SEED=RANDOM_SEED) + + # fit polynomial form backward transformation [object] + U_GPU, V_GPU = CWT.sip_forward_transform(u_GPU=u_GPU, v_GPU=v_GPU, A_SIP_GPU=A_SIP_GPU, B_SIP_GPU=B_SIP_GPU) + AP_lstsq_GPU, BP_lstsq_GPU = CWT.lstsq_sip_backward_transform(u_GPU=u_GPU, v_GPU=v_GPU, + U_GPU=U_GPU, V_GPU=V_GPU, A_ORDER=KEYDICT["A_ORDER"], B_ORDER=KEYDICT["B_ORDER"])[2:4] + + # relative to reference point, consider offset between the WCS reference points [object] + # WARNING: the offset calculations may be wrong when the WCS reference points are very close to the N/S Poles! + # TODO: this is not accruate, ~ 1pix. + offset1 = ((CRVAL1_targ - CRVAL1_obj + 180.0) % 360.0 - 180.0) * cp.cos(cp.deg2rad((CRVAL2_targ + CRVAL2_obj)/2.)) + offset2 = CRVAL2_targ - CRVAL2_obj + x_GPU += offset1 + y_GPU += offset2 + + # inverse CD transformation [object] + U_GPU, V_GPU = CWT.cd_transform_inv(WORLD_X_GPU=x_GPU, WORLD_Y_GPU=y_GPU, CD_GPU=CD_GPU) + + # backward transformation [object] + FP_UV_GPU = CWT.sip_backward_matrix(U_GPU=U_GPU, V_GPU=V_GPU, ORDER=KEYDICT["A_ORDER"]) + GP_UV_GPU = CWT.sip_backward_matrix(U_GPU=U_GPU, V_GPU=V_GPU, ORDER=KEYDICT["B_ORDER"]) + + u_GPU, v_GPU = CWT.sip_backward_transform(U_GPU=U_GPU, V_GPU=V_GPU, + FP_UV_GPU=FP_UV_GPU, GP_UV_GPU=GP_UV_GPU, AP_GPU=AP_lstsq_GPU, BP_GPU=BP_lstsq_GPU) + + # relative to image origin [object] + XX_proj_GPU = CRPIX1_obj + u_GPU.reshape((NTX, NTY)) + YY_proj_GPU = CRPIX2_obj + v_GPU.reshape((NTX, NTY)) + + return XX_proj_GPU, YY_proj_GPU + + def projection_astropy(self, hdr_obj, hdr_targ): + """Mapping the target pixel centers to the object frame using Astropy""" + # * read object WCS and target WCS + def _readWCS(hdr, VERBOSE_LEVEL=2): + with warnings.catch_warnings(): + if VERBOSE_LEVEL in [0, 1]: behavior = 'ignore' + if VERBOSE_LEVEL in [2]: behavior = 'default' + warnings.filterwarnings(behavior, category=FITSFixedWarning) + + if hdr['CTYPE1'] == 'RA---TAN' and 'PV1_0' in hdr: + _hdr = hdr.copy() + _hdr['CTYPE1'] = 'RA---TPV' + _hdr['CTYPE2'] = 'DEC--TPV' + else: _hdr = hdr + w = WCS(_hdr) + return w + + w_obj = _readWCS(hdr=hdr_obj, VERBOSE_LEVEL=self.VERBOSE_LEVEL) + w_targ = _readWCS(hdr=hdr_targ, VERBOSE_LEVEL=self.VERBOSE_LEVEL) + + # * maaping target pixel centers to the object frame + NTX = int(hdr_targ["NAXIS1"]) + NTY = int(hdr_targ["NAXIS2"]) + + XX_targ, YY_targ = np.meshgrid(np.arange(0, NTX)+1., np.arange(0, NTY)+1., indexing='ij') + XY_targ = np.array([XX_targ.flatten(), YY_targ.flatten()]).T + XY_proj = w_obj.all_world2pix(w_targ.all_pix2world(XY_targ, 1), 1) + + XX_proj = XY_proj[:, 0].reshape((NTX, NTY)) + YY_proj = XY_proj[:, 1].reshape((NTX, NTY)) + + XX_proj_GPU = cp.array(XX_proj, dtype=np.float64) + YY_proj_GPU = cp.array(YY_proj, dtype=np.float64) + + return XX_proj_GPU, YY_proj_GPU + + def frame_extension(self, XX_proj_GPU, YY_proj_GPU, PixA_obj_GPU): + """Extend the object frame for resampling""" + NTX, NTY = XX_proj_GPU.shape + NOX, NOY = PixA_obj_GPU.shape + + # * padding the object frame + if self.RESAMP_METHOD == 'BILINEAR': + KERHW = (1, 1) + + if self.RESAMP_METHOD == 'LANCZOS3': + KERHW = (3, 3) + + # find the root index and shift with maximal kernel halfwidth (KERHW) + # Note: the index ranges (RMIN --- RMAX) and (CMIN --- CMAX) can cover + # all pixels that interpolation may be use. + + RMIN = (floor(XX_proj_GPU.min().item()) - 1) - KERHW[0] + RMAX = (floor(XX_proj_GPU.max().item()) - 1) + KERHW[0] + RPAD = (-np.min([RMIN, 0]), np.max([RMAX - (NOX - 1), 0])) + + CMIN = (floor(YY_proj_GPU.min().item()) - 1) - KERHW[1] + CMAX = (floor(YY_proj_GPU.max().item()) - 1) + KERHW[1] + CPAD = (-np.min([CMIN, 0]), np.max([CMAX - (NOY - 1), 0])) + + PAD_WIDTH = (RPAD, CPAD) + PixA_Eobj_GPU = cp.pad(PixA_obj_GPU, PAD_WIDTH, mode='constant', constant_values=0.) + NEOX, NEOY = PixA_Eobj_GPU.shape + + XX_Eproj_GPU = XX_proj_GPU + PAD_WIDTH[0][0] + YY_Eproj_GPU = YY_proj_GPU + PAD_WIDTH[1][0] + + RMIN_E = (floor(XX_Eproj_GPU.min().item()) - 1) - KERHW[0] + RMAX_E = (floor(XX_Eproj_GPU.max().item()) - 1) + KERHW[0] + + CMIN_E = (floor(YY_Eproj_GPU.min().item()) - 1) - KERHW[1] + CMAX_E = (floor(YY_Eproj_GPU.max().item()) - 1) + KERHW[1] + + assert RMIN_E >= 0 and CMIN_E >= 0 + assert RMAX_E < NEOX and CMAX_E < NEOY + + EProjDict = {} + EProjDict['NTX'] = NTX + EProjDict['NTY'] = NTY + + EProjDict['NOX'] = NOX + EProjDict['NOY'] = NOY + + EProjDict['NEOX'] = NEOX + EProjDict['NEOY'] = NEOY + + EProjDict['XX_Eproj_GPU'] = XX_Eproj_GPU + EProjDict['YY_Eproj_GPU'] = YY_Eproj_GPU + + return PixA_Eobj_GPU, EProjDict + + def resampling(self, PixA_Eobj_GPU, EProjDict): + """Resampling the object frame to the target frame using CUDA""" + NTX = EProjDict['NTX'] + NTY = EProjDict['NTY'] + + NEOX = EProjDict['NEOX'] + NEOY = EProjDict['NEOY'] + + XX_Eproj_GPU = EProjDict['XX_Eproj_GPU'] + YY_Eproj_GPU = EProjDict['YY_Eproj_GPU'] + + # * Cupy configuration + MaxThreadPerB = 8 + GPUManage = lambda NT: ((NT-1)//MaxThreadPerB + 1, min(NT, MaxThreadPerB)) + BpG_PIX0, TpB_PIX0 = GPUManage(NTX) + BpG_PIX1, TpB_PIX1 = GPUManage(NTY) + BpG_PIX, TpB_PIX = (BpG_PIX0, BpG_PIX1), (TpB_PIX0, TpB_PIX1, 1) + + PixA_resamp_GPU = cp.zeros((NTX, NTY), dtype=np.float64) + + if self.RESAMP_METHOD == "BILINEAR": + + # * perform bilinear resampling using CUDA + # input: PixA_Eobj | (NEOX, NEOY) + # input: XX_Eproj, YY_Eproj | (NTX, NTY) + # output: PixA_resamp | (NTX, NTY) + + _refdict = {'NTX': NTX, 'NTY': NTY, 'NEOX': NEOX, 'NEOY': NEOY} + _funcstr = r""" + extern "C" __global__ void kmain(double XX_Eproj_GPU[%(NTX)s][%(NTY)s], double YY_Eproj_GPU[%(NTX)s][%(NTY)s], + double PixA_Eobj_GPU[%(NEOX)s][%(NEOY)s], double PixA_resamp_GPU[%(NTX)s][%(NTY)s]) + { + int ROW = blockIdx.x*blockDim.x+threadIdx.x; + int COL = blockIdx.y*blockDim.y+threadIdx.y; + + int NTX = %(NTX)s; + int NTY = %(NTY)s; + int NEOX = %(NEOX)s; + int NEOY = %(NEOY)s; + + if (ROW < NTX && COL < NTY) { + + double x = XX_Eproj_GPU[ROW][COL]; + double y = YY_Eproj_GPU[ROW][COL]; + + int r1 = floor(x) - 1; + int c1 = floor(y) - 1; + int r2 = r1 + 1; + int c2 = c1 + 1; + + double dx = x - floor(x); + double dy = y - floor(y); + + double w11 = (1-dx) * (1-dy); + double w12 = (1-dx) * dy; + double w21 = dx * (1-dy); + double w22 = dx * dy; + + PixA_resamp_GPU[ROW][COL] = w11 * PixA_Eobj_GPU[r1][c1] + w12 * PixA_Eobj_GPU[r1][c2] + + w21 * PixA_Eobj_GPU[r2][c1] + w22 * PixA_Eobj_GPU[r2][c2]; + } + } + """ + _code = _funcstr % _refdict + _module = cp.RawModule(code=_code, backend=u'nvcc', translate_cucomplex=False) + resamp_func = _module.get_function('kmain') + + t0 = time.time() + resamp_func(args=(XX_Eproj_GPU, YY_Eproj_GPU, PixA_Eobj_GPU, PixA_resamp_GPU), block=TpB_PIX, grid=BpG_PIX) + if self.VERBOSE_LEVEL in [1, 2]: + print('MeLOn CheckPoint: Cuda resampling takes [%.6f s]' %(time.time() - t0)) + + if self.RESAMP_METHOD == "LANCZOS3": + + # * perform LANCZOS-3 resampling using CUDA + # input: XX_Eproj, YY_Eproj | (NTX, NTY) + # output: PixA_resamp | (NTX, NTY) + + _refdict = {'NTX': NTX, 'NTY': NTY} + _funcstr = r""" + extern "C" __global__ void kmain(double XX_Eproj_GPU[%(NTX)s][%(NTY)s], double YY_Eproj_GPU[%(NTX)s][%(NTY)s], + double LKERNEL_X_GPU[6][%(NTX)s][%(NTY)s], double LKERNEL_Y_GPU[6][%(NTX)s][%(NTY)s]) + { + int ROW = blockIdx.x*blockDim.x+threadIdx.x; + int COL = blockIdx.y*blockDim.y+threadIdx.y; + + int NTX = %(NTX)s; + int NTY = %(NTY)s; + + double PI = 3.141592653589793; + double PIS = 9.869604401089358; + + if (ROW < NTX && COL < NTY) { + + double x = XX_Eproj_GPU[ROW][COL]; + double y = YY_Eproj_GPU[ROW][COL]; + + double dx = x - floor(x); + double dy = y - floor(y); + + // LANCZOS3 weights in x axis + double wx0 = 3.0 * sin(PI*(-2.0 - dx)) * sin(PI*(-2.0 - dx)/3.0) / (PIS*(-2.0 - dx) * (-2.0 - dx)); + double wx1 = 3.0 * sin(PI*(-1.0 - dx)) * sin(PI*(-1.0 - dx)/3.0) / (PIS*(-1.0 - dx) * (-1.0 - dx)); + double wx2 = 1.0; + if (fabs(dx) > 1e-4) { + wx2 = 3.0 * sin(PI*(-dx)) * sin(PI*(-dx)/3.0) / (PIS*(-dx) * (-dx)); + } + double wx3 = 3.0 * sin(PI*(1.0 - dx)) * sin(PI*(1.0 - dx)/3.0) / (PIS*(1.0 - dx) * (1.0 - dx)); + double wx4 = 3.0 * sin(PI*(2.0 - dx)) * sin(PI*(2.0 - dx)/3.0) / (PIS*(2.0 - dx) * (2.0 - dx)); + double wx5 = 3.0 * sin(PI*(3.0 - dx)) * sin(PI*(3.0 - dx)/3.0) / (PIS*(3.0 - dx) * (3.0 - dx)); + + LKERNEL_X_GPU[0][ROW][COL] = wx0; + LKERNEL_X_GPU[1][ROW][COL] = wx1; + LKERNEL_X_GPU[2][ROW][COL] = wx2; + LKERNEL_X_GPU[3][ROW][COL] = wx3; + LKERNEL_X_GPU[4][ROW][COL] = wx4; + LKERNEL_X_GPU[5][ROW][COL] = wx5; + + // LANCZOS3 weights in y axis + double wy0 = 3.0 * sin(PI*(-2.0 - dy)) * sin(PI*(-2.0 - dy)/3.0) / (PIS*(-2.0 - dy) * (-2.0 - dy)); + double wy1 = 3.0 * sin(PI*(-1.0 - dy)) * sin(PI*(-1.0 - dy)/3.0) / (PIS*(-1.0 - dy) * (-1.0 - dy)); + double wy2 = 1.0; + if (fabs(dy) > 1e-4) { + wy2 = 3.0 * sin(PI*(-dy)) * sin(PI*(-dy)/3.0) / (PIS*(-dy) * (-dy)); + } + double wy3 = 3.0 * sin(PI*(1.0 - dy)) * sin(PI*(1.0 - dy)/3.0) / (PIS*(1.0 - dy) * (1.0 - dy)); + double wy4 = 3.0 * sin(PI*(2.0 - dy)) * sin(PI*(2.0 - dy)/3.0) / (PIS*(2.0 - dy) * (2.0 - dy)); + double wy5 = 3.0 * sin(PI*(3.0 - dy)) * sin(PI*(3.0 - dy)/3.0) / (PIS*(3.0 - dy) * (3.0 - dy)); + + LKERNEL_Y_GPU[0][ROW][COL] = wy0; + LKERNEL_Y_GPU[1][ROW][COL] = wy1; + LKERNEL_Y_GPU[2][ROW][COL] = wy2; + LKERNEL_Y_GPU[3][ROW][COL] = wy3; + LKERNEL_Y_GPU[4][ROW][COL] = wy4; + LKERNEL_Y_GPU[5][ROW][COL] = wy5; + } + } + """ + _code = _funcstr % _refdict + _module = cp.RawModule(code=_code, backend=u'nvcc', translate_cucomplex=False) + weightkernel_func = _module.get_function('kmain') + + _refdict = {'NTX': NTX, 'NTY': NTY, 'NEOX': NEOX, 'NEOY': NEOY} + _funcstr = r""" + extern "C" __global__ void kmain(double XX_Eproj_GPU[%(NTX)s][%(NTY)s], double YY_Eproj_GPU[%(NTX)s][%(NTY)s], + double LKERNEL_X_GPU[6][%(NTX)s][%(NTY)s], double LKERNEL_Y_GPU[6][%(NTX)s][%(NTY)s], + double PixA_Eobj_GPU[%(NEOX)s][%(NEOY)s], double PixA_resamp_GPU[%(NTX)s][%(NTY)s]) + { + int ROW = blockIdx.x*blockDim.x+threadIdx.x; + int COL = blockIdx.y*blockDim.y+threadIdx.y; + + int NTX = %(NTX)s; + int NTY = %(NTY)s; + int NEOX = %(NEOX)s; + int NEOY = %(NEOY)s; + + if (ROW < NTX && COL < NTY) { + + double x = XX_Eproj_GPU[ROW][COL]; + double y = YY_Eproj_GPU[ROW][COL]; + + int r0 = floor(x) - 3; + int c0 = floor(y) - 3; + + for(int i = 0; i < 6; ++i){ + for(int j = 0; j < 6; ++j){ + double w = LKERNEL_X_GPU[i][ROW][COL] * LKERNEL_Y_GPU[j][ROW][COL]; + PixA_resamp_GPU[ROW][COL] += w * PixA_Eobj_GPU[r0 + i][c0 + j]; + } + } + } + } + """ + _code = _funcstr % _refdict + _module = cp.RawModule(code=_code, backend=u'nvcc', translate_cucomplex=False) + resamp_func = _module.get_function('kmain') + + t0 = time.time() + LKERNEL_X_GPU = cp.zeros((6, NTX, NTY), dtype=np.float64) + LKERNEL_Y_GPU = cp.zeros((6, NTX, NTY), dtype=np.float64) + weightkernel_func(args=(XX_Eproj_GPU, YY_Eproj_GPU, LKERNEL_X_GPU, LKERNEL_Y_GPU), block=TpB_PIX, grid=BpG_PIX) + resamp_func(args=(XX_Eproj_GPU, YY_Eproj_GPU, LKERNEL_X_GPU, LKERNEL_Y_GPU, + PixA_Eobj_GPU, PixA_resamp_GPU), block=TpB_PIX, grid=BpG_PIX) + if self.VERBOSE_LEVEL in [1, 2]: + print('MeLOn CheckPoint: Cuda resampling takes [%.6f s]' %(time.time() - t0)) + + return PixA_resamp_GPU diff --git a/sfft/utils/CupyWCSTransform.py b/sfft/utils/CupyWCSTransform.py new file mode 100644 index 0000000..ceb47b0 --- /dev/null +++ b/sfft/utils/CupyWCSTransform.py @@ -0,0 +1,241 @@ +import cupy as cp + +__last_update__ = "2024-09-19" +__author__ = "Shu Liu & Lei Hu " + +class Cupy_WCS_Transform: + def __init__(self): + return None + + def read_cd_wcs(self, hdr_wcs, CDKEY="CD"): + """ + # * Note on the CD matrix transformation: + The sky coordinate (x, y) relative to reference point can be connected with + the image coordinate (u, v) relative to reference point by the CD matrix: + [x] [CD1_1 CD1_2] [u] + [y] = [CD2_1 CD2_2] [v] + where CD1_1, CD1_2, CD2_1, CD2_2 are stored in the FITS header. + + """ + assert hdr_wcs["CTYPE1"] == "RA---TAN" + assert hdr_wcs["CTYPE2"] == "DEC--TAN" + + N0 = int(hdr_wcs["NAXIS1"]) + N1 = int(hdr_wcs["NAXIS2"]) + + CRPIX1 = float(hdr_wcs["CRPIX1"]) + CRPIX2 = float(hdr_wcs["CRPIX2"]) + + CRVAL1 = float(hdr_wcs["CRVAL1"]) + CRVAL2 = float(hdr_wcs["CRVAL2"]) + + KEYDICT = { + "N0": N0, "N1": N1, + "CRPIX1": CRPIX1, "CRPIX2": CRPIX2, + "CRVAL1": CRVAL1, "CRVAL2": CRVAL2 + } + + CD1_1 = hdr_wcs[f"{CDKEY}1_1"] + CD1_2 = hdr_wcs[f"{CDKEY}1_2"] if f"{CDKEY}1_2" in hdr_wcs else 0. + CD2_1 = hdr_wcs[f"{CDKEY}2_1"] if f"{CDKEY}2_1" in hdr_wcs else 0. + CD2_2 = hdr_wcs[f"{CDKEY}2_2"] + + CD_GPU = cp.array([ + [CD1_1, CD1_2], + [CD2_1, CD2_2] + ], dtype=cp.float64) + + return KEYDICT, CD_GPU + + def read_sip_wcs(self, hdr_wcs): + """ + # * Note on the SIP transformation: + The image coordinate (u, v) relative to reference point and we have undistorted image coordinate (U, V): + U = u + f(u, v) + V = v + g(u, v) + where f(u, v) and g(u, v) are the SIP distortion functions with polynomial form: + f(u, v) = sum_{p, q} A_{pq} u^p v^q, p + q <= A_ORDER + g(u, v) = sum_{p, q} B_{pq} u^p v^q, p + q <= B_ORDER + These coefficients are stored in the FITS header and define a foward transformation from (u, v) to (U, V). + + The sky coordinate (x, y) relative to reference point can be connected with (U, V) by the reversible CD matrix: + [x] [CD1_1 CD1_2] [U] + [y] = [CD2_1 CD2_2] [V] + So the forward transformation & CD matrix (with considering the reference point) is equivalent to a pix2world function. + + The reverse question is how to obtain the backward transformation from undistorted (U, V) to distorted (u, v). + Once we have the backward transformation, we can combine a reversed CD matrix + (with considering the reference point) to get the world2pix function. + + A simple approach (not accurate) is to assume the forward transformation also has a polynomial form: + u = U + fp(U, V) + v = V + gp(U, V) + where + fp(U, V) = sum_{p, q} AP_{pq} U^p V^q, p + q <= A_ORDER + gp(U, V) = sum_{p, q} BP_{pq} U^p V^q, p + q <= B_ORDER + + The coefficients AP_{pq} and BP_{pq} can be obtained by solving the linear equations separately. + [u - U] = [U^p*V^q] [AP_{pq}] + + shapes: + b: [u - U] | (N, 1), + A: [U^p*V^q] | (N, (A_ORDER+1)*(A_ORDER+2)/2) + x: [AP_{pq}] | ((A_ORDER+1)*(A_ORDER+2)/2, 1) + + [v - V] = [U^p*V^q] [BP_{pq}] + b: [v - V] | (N, 1), + A: [U^p*V^q] | (N, (B_ORDER+1)*(B_ORDER+2)/2) + x: [BP_{pq}] | ((B_ORDER+1)*(B_ORDER+2)/2, 1) + + The two matrices encode the "backward" transformation in fp(U, V) and gp(U, V). + Let's denote the two matrices as FP_UV and GP_UV, respectively. + + # * A WCS example with SIP distortion + CTYPE1 = 'RA---TAN-SIP' + CTYPE2 = 'DEC--TAN-SIP' + + CRPIX1 = 2044.0 + CRPIX2 = 2044.0 + + CD1_1 = 2.65458074767927E-05 + CD1_2 = -1.2630331175158E-05 + CD2_1 = 1.38917634264203E-05 + CD2_2 = 2.57785917553667E-05 + + CRVAL1 = 9.299707734492053 + CRVAL2 = -43.984663860136145 + + A_ORDER = 4 + A_0_2 = -4.774423702E-10 + A_0_3 = -2.462744787E-14 + A_0_4 = 2.28913156E-17 + A_1_1 = 1.076615801E-09 + A_1_2 = -9.939904553E-14 + A_1_3 = -5.845336863E-17 + A_2_0 = -3.717865118E-10 + A_2_1 = -4.966118494E-15 + A_2_2 = 6.615859199E-17 + A_3_0 = -3.817793356E-15 + A_3_1 = 3.310020049E-17 + A_4_0 = -5.166048303E-19 + + B_ORDER = 4 + B_0_2 = 1.504792792E-09 + B_0_3 = -2.662833066E-15 + B_0_4 = -8.383877471E-20 + B_1_1 = 1.204553316E-10 + B_1_2 = 6.57748238E-14 + B_1_3 = -6.998508554E-18 + B_2_0 = 4.136013664E-10 + B_2_1 = 5.339208582E-14 + B_2_2 = 6.063403412E-17 + B_3_0 = 1.486316541E-14 + B_3_1 = -9.102668806E-17 + B_4_0 = -5.174323631E-17 + + """ + assert hdr_wcs["CTYPE1"] == "RA---TAN-SIP" + assert hdr_wcs["CTYPE2"] == "DEC--TAN-SIP" + + N0 = int(hdr_wcs["NAXIS1"]) + N1 = int(hdr_wcs["NAXIS2"]) + + CRPIX1 = float(hdr_wcs["CRPIX1"]) + CRPIX2 = float(hdr_wcs["CRPIX2"]) + + CRVAL1 = float(hdr_wcs["CRVAL1"]) + CRVAL2 = float(hdr_wcs["CRVAL2"]) + + A_ORDER = int(hdr_wcs["A_ORDER"]) + B_ORDER = int(hdr_wcs["B_ORDER"]) + + KEYDICT = { + "N0": N0, "N1": N1, + "CRPIX1": CRPIX1, "CRPIX2": CRPIX2, + "CRVAL1": CRVAL1, "CRVAL2": CRVAL2, + "A_ORDER": A_ORDER, "B_ORDER": B_ORDER + } + + CD_GPU = cp.array([ + [hdr_wcs["CD1_1"], hdr_wcs["CD1_2"]], + [hdr_wcs["CD2_1"], hdr_wcs["CD2_2"]] + ], dtype=cp.float64) + + A_SIP_GPU = cp.zeros((A_ORDER+1, A_ORDER+1), dtype=cp.float64) + B_SIP_GPU = cp.zeros((B_ORDER+1, B_ORDER+1), dtype=cp.float64) + + for p in range(A_ORDER + 1): + for q in range(0, A_ORDER - p + 1): + keyword = f"A_{p}_{q}" + if keyword in hdr_wcs: + A_SIP_GPU[p, q] = hdr_wcs[keyword] + + for p in range(B_ORDER + 1): + for q in range(0, B_ORDER - p + 1): + keyword = f"B_{p}_{q}" + if keyword in hdr_wcs: + B_SIP_GPU[p, q] = hdr_wcs[keyword] + + return KEYDICT, CD_GPU, A_SIP_GPU, B_SIP_GPU + + def cd_transform(self, IMAGE_X_GPU, IMAGE_Y_GPU, CD_GPU): + """CD matrix transformation from image to world""" + WORLD_X_GPU, WORLD_Y_GPU = cp.matmul(CD_GPU, cp.array([IMAGE_X_GPU, IMAGE_Y_GPU])) + return WORLD_X_GPU, WORLD_Y_GPU + + def cd_transform_inv(self, WORLD_X_GPU, WORLD_Y_GPU, CD_GPU): + """CD matrix transformation from world to image""" + # CD_inv_GPU = cp.linalg.inv(CD_GPU) + CD_inv_GPU = 1. / (CD_GPU[0, 0] * CD_GPU[1, 1] - CD_GPU[0, 1] * CD_GPU[1, 0]) * \ + cp.array([[CD_GPU[1, 1], -CD_GPU[0, 1]], [-CD_GPU[1, 0], CD_GPU[0, 0]]]) + IMAGE_X_GPU, IMAGE_Y_GPU = cp.matmul(CD_inv_GPU, cp.array([WORLD_X_GPU, WORLD_Y_GPU])) + return IMAGE_X_GPU, IMAGE_Y_GPU + + def sip_forward_transform(self, u_GPU, v_GPU, A_SIP_GPU, B_SIP_GPU): + """Forward SIP transformation from (u, v) to (U, V)""" + A_ORDER = A_SIP_GPU.shape[0] - 1 + U_GPU = u_GPU + cp.zeros_like(u_GPU) + for p in range(A_ORDER + 1): + for q in range(A_ORDER - p + 1): + U_GPU += A_SIP_GPU[p, q] * u_GPU**p * v_GPU**q + + B_ORDER = B_SIP_GPU.shape[0] - 1 + V_GPU = v_GPU + cp.zeros_like(v_GPU) + for p in range(B_ORDER + 1): + for q in range(B_ORDER - p + 1): + V_GPU += B_SIP_GPU[p, q] * u_GPU**p * v_GPU**q + return U_GPU, V_GPU + + def sip_backward_matrix(self, U_GPU, V_GPU, ORDER): + """Compute the backward matrix P_UV (FP_UV or GP_UV)""" + Nsamp = len(U_GPU) + P_UV_GPU = cp.zeros((Nsamp, (ORDER+1)*(ORDER+2)//2), dtype=cp.float64) + idx = 0 + for p in range(ORDER + 1): + for q in range(ORDER - p + 1): + P_UV_GPU[:, idx] = U_GPU**p * V_GPU**q + idx += 1 + return P_UV_GPU + + def lstsq_sip_backward_transform(self, u_GPU, v_GPU, U_GPU, V_GPU, A_ORDER, B_ORDER): + """Compute the backward transformation from (U, V) to (u, v)""" + FP_UV_GPU = self.sip_backward_matrix(U_GPU=U_GPU, V_GPU=V_GPU, ORDER=A_ORDER) + GP_UV_GPU = self.sip_backward_matrix(U_GPU=U_GPU, V_GPU=V_GPU, ORDER=B_ORDER) + + AP_lstsq_GPU = cp.linalg.lstsq(FP_UV_GPU, (u_GPU - U_GPU).reshape(-1, 1), rcond=None)[0] + BP_lstsq_GPU = cp.linalg.lstsq(GP_UV_GPU, (v_GPU - V_GPU).reshape(-1, 1), rcond=None)[0] + return FP_UV_GPU, GP_UV_GPU, AP_lstsq_GPU, BP_lstsq_GPU + + def sip_backward_transform(self, U_GPU, V_GPU, FP_UV_GPU, GP_UV_GPU, AP_GPU, BP_GPU): + """Backward transformation from (U, V) to (u, v)""" + u_GPU = U_GPU + cp.matmul(FP_UV_GPU, AP_GPU)[:, 0] + v_GPU = V_GPU + cp.matmul(GP_UV_GPU, BP_GPU)[:, 0] + return u_GPU, v_GPU + + def random_coord_sampling(self, N0, N1, CRPIX1, CRPIX2, Nsamp=1024, RANDOM_SEED=10086): + """Random sampling of coordinates""" + if RANDOM_SEED is not None: + cp.random.seed(RANDOM_SEED) + u_GPU = cp.random.uniform(0.5, N0+0.5, Nsamp, dtype=cp.float64) - CRPIX1 + v_GPU = cp.random.uniform(0.5, N1+0.5, Nsamp, dtype=cp.float64) - CRPIX2 + return u_GPU, v_GPU diff --git a/sfft/utils/DeCorrelationCalculator.py b/sfft/utils/DeCorrelationCalculator.py index a0592a0..aa70d70 100644 --- a/sfft/utils/DeCorrelationCalculator.py +++ b/sfft/utils/DeCorrelationCalculator.py @@ -1,5 +1,6 @@ import math import numpy as np +import cupy from sfft.utils.ConvKernelConvertion import ConvKernel_Convertion # version: Mar 10, 2023 @@ -69,23 +70,24 @@ def DCC(MK_JLst, SkySig_JLst, MK_ILst=[], SkySig_ILst=[], MK_Fin=None, KERatio=2 # construct the DeNonimator (a real-positive map) in Fourier Space uMK = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]]).astype(float) def Get_JTerm(MKj, skysig): - if MKj is not None: KIMG_CSZ = ConvKernel_Convertion.CSZ(MKj, N0, N1) - if MKj is None: KIMG_CSZ = ConvKernel_Convertion.CSZ(uMK, N0, N1) - kft = np.fft.fft2(KIMG_CSZ) - kft2 = (np.conj(kft) * kft).real - term = (skysig**2 * kft2) / NumJ**2 + if MKj is not None: KIMG_CSZ = ConvKernel_Convertion.CSZ(cupy.array( MKj ), N0, N1) + if MKj is None: KIMG_CSZ = ConvKernel_Convertion.CSZ(cupy.array( uMK ), N0, N1) + kft = cupy.fft.fft2(KIMG_CSZ) + kft2 = (cupy.conj(kft) * kft).real + term = (skysig**2 * kft2) / NumJ**2 return term - if MK_Fin is not None: KIMG_CSZ = ConvKernel_Convertion.CSZ(MK_Fin, N0, N1) - if MK_Fin is None: KIMG_CSZ = ConvKernel_Convertion.CSZ(uMK, N0, N1) - kft = np.fft.fft2(KIMG_CSZ) - kft2_Fin = (np.conj(kft) * kft).real + if MK_Fin is not None: KIMG_CSZ = ConvKernel_Convertion.CSZ(cupy.array( MK_Fin ), N0, N1) + if MK_Fin is None: KIMG_CSZ = ConvKernel_Convertion.CSZ(cupy.array( uMK ), N0, N1) + cu_KIMG_CSZ = cupy.array( KIMG_CSZ ) + kft = cupy.fft.fft2(cu_KIMG_CSZ) + kft2_Fin = (cupy.conj(kft) * kft).real def Get_ITerm(MKi, skysig): - if MKi is not None: KIMG_CSZ = ConvKernel_Convertion.CSZ(MKi, N0, N1) - if MKi is None: KIMG_CSZ = ConvKernel_Convertion.CSZ(uMK, N0, N1) - kft = np.fft.fft2(KIMG_CSZ) - kft2 = (np.conj(kft) * kft).real + if MKi is not None: KIMG_CSZ = ConvKernel_Convertion.CSZ(cupy.array( MKi ), N0, N1) + if MKi is None: KIMG_CSZ = ConvKernel_Convertion.CSZ(cupy.array( uMK ), N0, N1) + kft = cupy.fft.fft2(KIMG_CSZ) + kft2 = (cupy.conj(kft) * kft).real term = (skysig**2 * kft2 * kft2_Fin) / NumI**2 return term @@ -96,9 +98,12 @@ def Get_ITerm(MKi, skysig): for MKi, skysig in zip(MK_ILst, SkySig_ILst): DeNo += Get_ITerm(MKi, skysig) - FDeCo = np.sqrt(1.0 / DeNo) # real & conjugate-symmetric - DeCo = np.fft.ifft2(FDeCo).real # no imaginary part + FDeCo = cupy.sqrt(1.0 / DeNo) # real & conjugate-symmetric + DeCo = cupy.fft.ifft2(FDeCo).real # no imaginary part + + # made this cupy: KDeCo = ConvKernel_Convertion.iCSZ(DeCo, L0_KDeCo, L1_KDeCo) - KDeCo = KDeCo / np.sum(KDeCo) # rescale to have Unit kernel sum + KDeCo = KDeCo / cupy.sum(KDeCo) # rescale to have Unit kernel sum + KDeCo = cupy.asnumpy( KDeCo ) return KDeCo diff --git a/sfft/utils/ImageZoomRotate.py b/sfft/utils/ImageZoomRotate.py index 712f94d..48efa3e 100644 --- a/sfft/utils/ImageZoomRotate.py +++ b/sfft/utils/ImageZoomRotate.py @@ -1,19 +1,19 @@ import os import math +import cupy as cp import numpy as np from astropy.io import fits from astropy.wcs import WCS from tempfile import mkdtemp -from sfft.utils.pyAstroMatic.PYSWarp import PY_SWarp -# version: Apr 22, 2024 +from sfft.utils.CudaResampling import Cuda_Resampling +__last_update__ = "2024-09-19" __author__ = "Lei Hu " -__version__ = "v1.4" class Image_ZoomRotate: @staticmethod def IZR(PixA_obj, ZOOM_SCALE_X=1.0, ZOOM_SCALE_Y=1.0, OUTSIZE_PARIRY_X='UNCHANGED', OUTSIZE_PARIRY_Y='UNCHANGED', \ - PATTERN_ROTATE_ANGLE=0.0, RESAMPLING_TYPE='LANCZOS4', FILL_VALUE=0.0, VERBOSE_LEVEL=2): + PATTERN_ROTATE_ANGLE=0.0, RESAMPLING_TYPE='LANCZOS3', FILL_VALUE=0.0, VERBOSE_LEVEL=2): """ # Remarks on Image Zoom & Rotate @@ -164,19 +164,57 @@ def func_coordtrans(X_ORI, Y_ORI): PixA_empty = np.zeros((wcsinfo_TARG['NAXIS1'], wcsinfo_TARG['NAXIS2'])).astype(float) hdl_TARG = fits.HDUList([fits.PrimaryHDU(data=PixA_empty.T, header=header_TARG)]) + """ + # DEPRECATED hdl_TARG[0].header['GAIN'] = (1.0, 'MeLOn: PlaceHolder') hdl_TARG[0].header['SATURATE'] = (50000.0, 'MeLOn: PlaceHolder') hdl_TARG[0].header['EXPTIME'] = (60.0, 'MeLOn: PlaceHolder') hdl_TARG[0].header['MJD-OBS'] = (58849.0, 'MeLOn: PlaceHolder') + + """ FITS_TARG = TDIR + '/target_empty_image.fits' hdl_TARG.writeto(FITS_TARG, overwrite=True) # * run SWarp to perform the image resampling FITS_resamp = TDIR + '/resampled_image.fits' - PY_SWarp.PS(FITS_obj=FITS_ORI, FITS_ref=FITS_TARG, FITS_resamp=FITS_resamp, \ - GAIN_KEY='GAIN', SATUR_KEY='SATURATE', OVERSAMPLING=1, RESAMPLING_TYPE=RESAMPLING_TYPE, \ - SUBTRACT_BACK='N', FILL_VALUE=FILL_VALUE, VERBOSE_LEVEL=VERBOSE_LEVEL) + """ + # DEPRECATED + CR = Cuda_Resampling(FITS_obj=FITS_ORI, FITS_targ=FITS_TARG, + METHOD=RESAMPLING_TYPE, VERBOSE_LEVEL=VERBOSE_LEVEL) + PixA_Eobj, MappingDICT = CR.mapping() + PixA_resamp = CR.resampling(PixA_Eobj=PixA_Eobj, MappingDICT=MappingDICT) + + """ + def run_resample(FITS_obj, FITS_targ): + """run resampling using CUDA""" + hdr_obj = fits.getheader(FITS_obj, ext=0) + hdr_targ = fits.getheader(FITS_targ, ext=0) + + PixA_obj = fits.getdata(FITS_obj, ext=0).T + PixA_targ = fits.getdata(FITS_targ, ext=0).T + + if not PixA_obj.flags['C_CONTIGUOUS']: + PixA_obj = np.ascontiguousarray(PixA_obj, np.float64) + PixA_obj_GPU = cp.array(PixA_obj) + else: PixA_obj_GPU = cp.array(PixA_obj.astype(np.float64)) + + if not PixA_targ.flags['C_CONTIGUOUS']: + PixA_targ = np.ascontiguousarray(PixA_targ, np.float64) + PixA_targ_GPU = cp.array(PixA_targ) + else: PixA_targ_GPU = cp.array(PixA_targ.astype(np.float64)) + + CR = Cuda_Resampling(RESAMP_METHOD='BILINEAR', VERBOSE_LEVEL=1) + XX_proj_GPU, YY_proj_GPU = CR.projection_cd(hdr_obj=hdr_obj, hdr_targ=hdr_targ, CDKEY="PC") + PixA_Eobj_GPU, EProjDict = CR.frame_extension(XX_proj_GPU=XX_proj_GPU, YY_proj_GPU=YY_proj_GPU, PixA_obj_GPU=PixA_obj_GPU) + PixA_resamp = cp.asnumpy(CR.resampling(PixA_Eobj_GPU=PixA_Eobj_GPU, EProjDict=EProjDict)) + return PixA_resamp + + PixA_resamp = run_resample(FITS_obj=FITS_ORI, FITS_targ=FITS_TARG) + PixA_resamp[np.isnan(PixA_resamp)] = FILL_VALUE + with fits.open(FITS_TARG) as hdl: + hdl[0].data[:, :] = PixA_resamp.T + hdl.writeto(FITS_resamp, overwrite=True) PixA_resamp = fits.getdata(FITS_resamp, ext=0).T os.system('rm -rf %s' %TDIR) diff --git a/sfft/utils/SExSkySubtract.py b/sfft/utils/SExSkySubtract.py index ad423c6..fa0e439 100644 --- a/sfft/utils/SExSkySubtract.py +++ b/sfft/utils/SExSkySubtract.py @@ -119,4 +119,4 @@ def SSS(FITS_obj, FITS_skysub=None, FITS_sky=None, FITS_skyrms=None, SATUR_KEY=' hdl[0].data[:, :] = PixA_skyrms.T hdl.writeto(FITS_skyrms, overwrite=True) - return SKYDIP, SKYPEAK, PixA_skysub, PixA_sky, PixA_skyrms + return SKYDIP, SKYPEAK, PixA_skysub, PixA_sky, PixA_skyrms, DETECT_MASK diff --git a/sfft/utils/pyAstroMatic/PYSEx.py b/sfft/utils/pyAstroMatic/PYSEx.py index c58af2b..758473c 100644 --- a/sfft/utils/pyAstroMatic/PYSEx.py +++ b/sfft/utils/pyAstroMatic/PYSEx.py @@ -19,11 +19,11 @@ class PY_SEx: @staticmethod def PS(FITS_obj, PSF_obj=None, FITS_ref=None, SExParam=None, CATALOG_TYPE='FITS_LDAC', \ - GAIN_KEY='GAIN', SATUR_KEY='SATURATE', PIXEL_SCALE=1.0, SEEING_FWHM=1.2, BACK_TYPE='AUTO', \ - BACK_VALUE=0.0, BACK_SIZE=64, BACK_FILTERSIZE=3, USE_FILT=True, DETECT_THRESH=1.5, ANALYSIS_THRESH=1.5, \ - DETECT_MINAREA=5, DETECT_MAXAREA=0, DEBLEND_NTHRESH=32, DEBLEND_MINCONT=0.005, CLEAN='Y', \ - BACKPHOTO_TYPE='LOCAL', PHOT_APERTURES=5.0, NegativeCorr=True, CHECKIMAGE_TYPE='NONE', \ - VIGNET=None, STAMP_IMGSIZE=None, AddRD=False, ONLY_FLAGS=None, XBoundary=0.0, YBoundary=0.0, \ + GAIN_KEY='GAIN', SATUR_KEY='SATURATE', DEFAULT_GAIN=0., DEFAULT_SATUR=50000., PIXEL_SCALE=1.0, + SEEING_FWHM=1.2, BACK_TYPE='AUTO', BACK_VALUE=0.0, BACK_SIZE=64, BACK_FILTERSIZE=3, USE_FILT=True, \ + DETECT_THRESH=1.5, ANALYSIS_THRESH=1.5, DETECT_MINAREA=5, DETECT_MAXAREA=0, DEBLEND_NTHRESH=32, \ + DEBLEND_MINCONT=0.005, CLEAN='Y', BACKPHOTO_TYPE='LOCAL', PHOT_APERTURES=5.0, NegativeCorr=True, \ + CHECKIMAGE_TYPE='NONE', VIGNET=None, STAMP_IMGSIZE=None, AddRD=False, ONLY_FLAGS=None, XBoundary=0.0, YBoundary=0.0, \ Coor4Match='XY_', XY_Quest=None, Match_xytol=2.0, RD_Quest=None, Match_rdtol=1.0, \ Preserve_NoMatch=False, MDIR=None, VERBOSE_TYPE='QUIET', VERBOSE_LEVEL=2): @@ -40,7 +40,7 @@ def PS(FITS_obj, PSF_obj=None, FITS_ref=None, SExParam=None, CATALOG_TYPE='FITS_ # (b) -FITS_ref != None mean dual image mode: # SEx detection on -FITS_ref & SEx photometry on -FITS_obj - -SExParam [None] # Parameter List (Python list here) of SExtractor output catalog + -SExParam [None] # Parameter List (Python list here) of SExtractor output catalog # one can use command line 'sex -dp' to find all available parameters # Configurations for SExtractor: @@ -55,6 +55,10 @@ def PS(FITS_obj, PSF_obj=None, FITS_ref=None, SExParam=None, CATALOG_TYPE='FITS_ -SATUR_KEY ['SATURATE'] # SExtractor Parameter SATUR_KEY # i.e., keyword of the saturation level in the FITS image header + -DEFAULT_GAIN [0.] # Set default gain if GAIN_KEY not found + + -DEFAULT_SATUR [50000.] # Set default saturation if SATUR_KEY not found + -PIXEL_SCALE [1.0] # SExtractor Parameter PIXEL_SCALE # size of pixel in arcsec (0=use FITS WCS info) # P.S. it only works for surface brightness parameters, @@ -487,9 +491,9 @@ def PS(FITS_obj, PSF_obj=None, FITS_ref=None, SExParam=None, CATALOG_TYPE='FITS_ _message = 'SExtractor uses GAIN = [%s] from keyword [%s]!' %(GAIN, GAIN_KEY) print('MeLOn CheckPoint [%s]: %s' %(objname, _message)) else: - GAIN = 0.0 # infinite GAIN, Poission noise ignored + GAIN = DEFAULT_GAIN # infinite GAIN, Poission noise ignored if VERBOSE_LEVEL in [0, 1, 2]: - _warn_message = 'SExtractor has to use default GAIN = 0!' + _warn_message = f'SExtractor has to use default GAIN = {DEFAULT_GAIN}!' warnings.warn('MeLOn WARNING [%s]: %s' %(objname, _warn_message)) if SATUR_KEY in phr_obj: @@ -498,9 +502,9 @@ def PS(FITS_obj, PSF_obj=None, FITS_ref=None, SExParam=None, CATALOG_TYPE='FITS_ _message = 'SExtractor uses SATURATION = [%s] from keyword [%s]!' %(SATURATION, SATUR_KEY) print('MeLOn CheckPoint [%s]: %s' %(objname, _message)) else: - SATURATION = 50000.0 + SATURATION = DEFAULT_SATUR if VERBOSE_LEVEL in [0, 1, 2]: - _warn_message = 'SExtractor has to use default SATURATION = 50000.0!' + _warn_message = f'SExtractor has to use default SATURATION = {DEFAULT_SATUR}!' warnings.warn('MeLOn WARNING [%s]: %s' %(objname, _warn_message)) """ diff --git a/sfft/utils/pyAstroMatic/PYSWarp.py b/sfft/utils/pyAstroMatic/PYSWarp.py index bb38afd..5fce77c 100644 --- a/sfft/utils/pyAstroMatic/PYSWarp.py +++ b/sfft/utils/pyAstroMatic/PYSWarp.py @@ -14,20 +14,13 @@ class PY_SWarp: @staticmethod - def PS(FITS_obj, FITS_ref, FITS_resamp=None, \ - GAIN_KEY='GAIN', SATUR_KEY='SATURATE', GAIN_DEFAULT=0.0, SATLEV_DEFAULT=50000.0, \ - OVERSAMPLING=1, RESAMPLING_TYPE='LANCZOS3', SUBTRACT_BACK='N', FILL_VALUE=None, \ - TMPDIR_ROOT=None, VERBOSE_TYPE='NORMAL', VERBOSE_LEVEL=2): - - """ - # Inputs & Outputs: - - -FITS_obj [] # FITS file path of the input image to be resampled - - -FITS_ref [] # FITS file path of the input image as resampling reference - - -FITS_resamp [None] # FITS file path of the output image of resampled -FITS_obj + def Mk_ConfigDict(GAIN_KEY='GAIN', SATUR_KEY='SATURATE', GAIN_DEFAULT=0.0, SATLEV_DEFAULT=50000.0, OVERSAMPLING=1, \ + RESAMPLE='Y', RESAMPLING_TYPE='LANCZOS3', SUBTRACT_BACK='N', PROJECTION_TYPE='TAN', RESCALE_WEIGHTS='Y', + WEIGHT_TYPE='NONE', COMBINE='Y', CENTER_TYPE='ALL', CENTER = 0.0, CELESTIAL_TYPE='NATIVE', + PIXELSCALE_TYPE='MEDIAN', PIXEL_SCALE=0.0, NTHREADS=0, \ + COMBINE_TYPE='MEDIAN', IMAGE_SIZE=0, WEIGHT_SUFFIX='.weight.fits', WRITE_XML='N', VERBOSE_TYPE='NORMAL'): + """ # SWarp parameters: -GAIN_KEY ['GAIN'] # SWarp Parameter GAIN_KEYWORD @@ -46,6 +39,7 @@ def PS(FITS_obj, FITS_ref, FITS_resamp=None, \ # P.S. Here I changed the default value from 0 to 1 # NOTE: large OVERSAMPLING may cause higher pixel correlation + -RESAMPLE ['Y'] # SWarp parameter RESAMPLE -RESAMPLING_TYPE ['LANCZOS3'] # SWarp Parameter RESAMPLING_TYPE # NEAREST,BILINEAR,LANCZOS2,LANCZOS3 # LANCZOS4 (1 per axis) or FLAGS @@ -54,9 +48,77 @@ def PS(FITS_obj, FITS_ref, FITS_resamp=None, \ -SUBTRACT_BACK ['N'] # SWarp Parameter SUBTRACT_BACK # Subtraction sky background (Y/N)? (all or for each image) # P.S. Here I changed the default value from 'Y' to 'N' + + -PROJECTION_TYPE ['TAN'] # SWarp Parameter PROJECTION_TYPE + + -CENTER_TYPE ['ALL'] # SWarp parameter CENTER_TYPE + -CENTER [0.0] # SWarp parameter CENTER + -CELESTIAL_TYPE ['NATIVE'] # SWarp parameter CELESTIAL_TYPE + + -PIXELSCALE_TYPE ['MEDIAN'] # SWarp parameter PIXELSCALE_TYPE + -PIXEL_SCALE [0.0] # SWarp parameter PIXEL_SCALE + + -NTHREADS [0] # SWarp parameter NTHREADS + + -RESCALE_WEIGHTS ['Y'] # SWarp Parameter RESCALE_WEIGHTS + -WEIGHT_TYPE ['NONE'] # SWarp Parameter WEIGHT_TYPE + + -IMAGE_SIZE [0] # SWarp parameter IMAGE_SIZE -VERBOSE_TYPE ['NORMAL'] # SWarp Parameter VERBOSE_TYPE - # QUIET,LOG,NORMAL, or FULL + # QUIET, LOG, NORMAL, or FULL + + """ + + ConfigDict = {} + ConfigDict['GAIN_KEYWORD'] = '%s' %GAIN_KEY + ConfigDict['SATLEV_KEYWORD'] = '%s' %SATUR_KEY + + ConfigDict['GAIN_DEFAULT'] = '%s' %GAIN_DEFAULT + ConfigDict['SATLEV_DEFAULT'] = '%s' %SATLEV_DEFAULT + + ConfigDict['OVERSAMPLING'] = '%d' %OVERSAMPLING + ConfigDict['RESAMPLE'] = '%s' %RESAMPLE + ConfigDict['RESAMPLING_TYPE'] = '%s' %RESAMPLING_TYPE + ConfigDict['SUBTRACT_BACK'] = '%s' %SUBTRACT_BACK + + ConfigDict['PROJECTION_TYPE'] = '%s' %PROJECTION_TYPE + ConfigDict['CENTER_TYPE'] = '%s' %CENTER_TYPE + ConfigDict['CENTER'] = '%s' %CENTER + ConfigDict['CELESTIAL_TYPE'] = '%s' %CELESTIAL_TYPE + + ConfigDict['PIXELSCALE_TYPE'] = '%s' %PIXELSCALE_TYPE + ConfigDict['PIXEL_SCALE'] = '%d' %PIXEL_SCALE + + ConfigDict['NTHREADS'] = '%s' %NTHREADS + + ConfigDict['WEIGHT_TYPE'] = '%s' %WEIGHT_TYPE + ConfigDict['RESCALE_WEIGHTS'] = '%s' %RESCALE_WEIGHTS + ConfigDict['IMAGE_SIZE'] = '%d' %IMAGE_SIZE + + ConfigDict['COMBINE'] = '%s' %COMBINE # trivial here + ConfigDict['COMBINE_TYPE'] = '%s' %COMBINE_TYPE # trivial here + + ConfigDict['WEIGHT_SUFFIX'] = '%s' %WEIGHT_SUFFIX # trivial here + ConfigDict['WRITE_XML'] = 'N' + ConfigDict['VERBOSE_TYPE'] = '%s' %VERBOSE_TYPE + + return ConfigDict + + @staticmethod + def PS(FITS_obj, FITS_ref, ConfigDict, FITS_resamp=None, \ + FILL_VALUE=None, TMPDIR_ROOT=None, VERBOSE_LEVEL=2): + + """ + # Inputs & Outputs: + + -FITS_obj [] # FITS file path of the input image to be resampled + + -FITS_ref [] # FITS file path of the input image as resampling reference + + -ConfigDict [] # Dictionary for config file, input from PY_SWarp.Mk_ConfigDict(). + + -FITS_resamp [None] # FITS file path of the output image of resampled -FITS_obj # Other parameters: @@ -125,25 +187,7 @@ def PS(FITS_obj, FITS_ref, FITS_resamp=None, \ # * make directory as a workplace TDIR = mkdtemp(suffix=None, prefix='PYSWarp_', dir=TMPDIR_ROOT) - # * create SWarp configuration file in TDIR - ConfigDict = {} - ConfigDict['GAIN_KEYWORD'] = '%s' %GAIN_KEY - ConfigDict['SATLEV_KEYWORD'] = '%s' %SATUR_KEY - - ConfigDict['GAIN_DEFAULT'] = '%s' %GAIN_DEFAULT - ConfigDict['SATLEV_DEFAULT'] = '%s' %SATLEV_DEFAULT - - ConfigDict['OVERSAMPLING'] = '%d' %OVERSAMPLING - ConfigDict['RESAMPLING_TYPE'] = '%s' %RESAMPLING_TYPE - ConfigDict['SUBTRACT_BACK'] = '%s' %SUBTRACT_BACK - - ConfigDict['COMBINE'] = 'Y' # trivial here - ConfigDict['COMBINE_TYPE'] = 'MEDIAN' # trivial here - - ConfigDict['WEIGHT_SUFFIX'] = '.weight.fits' # trivial here - ConfigDict['WRITE_XML'] = 'N' - ConfigDict['VERBOSE_TYPE'] = '%s' %VERBOSE_TYPE - + # * create SWarp configuration file in TDIR swarp_config_path = AMConfig_Maker.AMCM(MDIR=TDIR, \ AstroMatic_KEY='swarp', ConfigDict=ConfigDict, tag='PYSWarp') @@ -178,8 +222,8 @@ def PS(FITS_obj, FITS_ref, FITS_resamp=None, \ # * update header by the SWarp generated saturation level NEW_SATUR = fits.getheader(tFITS_resamp, ext=0)['SATURATE'] - if SATUR_KEY in hdr_op: - hdr_op[SATUR_KEY] = (NEW_SATUR, 'MeLOn: PYSWarp') + if ConfigDict['SATLEV_KEYWORD'] in hdr_op: + hdr_op[ConfigDict['SATLEV_KEYWORD']] = (NEW_SATUR, 'MeLOn: PYSWarp') # * add history hdr_op['SWARP_O'] = (pa.basename(FITS_obj), 'MeLOn: PYSWarp') @@ -208,3 +252,108 @@ def PS(FITS_obj, FITS_ref, FITS_resamp=None, \ os.system('rm -rf %s'%TDIR) return PixA_resamp, MissingMask + + @staticmethod + def Coadd(FITS_obj, FITS_ref, ConfigDict, OUT_path, \ + FILL_VALUE=None, TMPDIR_ROOT=None, VERBOSE_LEVEL=2): + + """ + # Inputs & Outputs: + + -FITS_obj [] # List of FITS file paths of the input images to be resampled + + -FITS_ref [] # FITS file path of the input image as resampling reference + + -ConfigDict [] # Dictionary for config file, input from PY_SWarp.Mk_ConfigDict(). + + -FITS_resamp [None] # FITS file path of the output image of resampled, coadded -FITS_obj + + # Other parameters: + + -FILL_VALUE [None] # How to fill the invalid (boundary) pixels in -FITS_resamp and -FITS_resamp_weight + # e.g., -FILL_VALUE = 0.0, -FILL_VALUE = np.nan + + -VERBOSE_LEVEL [2] # The level of verbosity, can be [0, 1, 2] + # 0/1/2: QUIET/NORMAL/FULL mode + # NOTE: it only controls the verbosity out of SWarp. + + -TMPDIR_ROOT [None] # Specify root directory of temporary working directory. + + # Returns: + + PixA_resamp # Pixel array of the resampled, coadded image + # P.S. PixA_resamp = fits.getdata(OUT_path, ext=0).T + + PixA_resamp_weight # Pixel array of the weight map + + # * Remarks on PYSWarp + # + # [1] In SWarp, one need provide a target WCS-frame stored in fname.head, with name coincident with FITS_resamp. + # If no target WCS-frame is available, the output image will have a canonical N-E orientation. + # However, the current python wrapper PYSWarp must read a target WCS-frame from a given FITS_ref. + # + # [2] SWarp allows users to feed a corresponding weight map as input. + # However, the current python wrapper PYSWarp does not support this feature. + # + # [3] SWarp will automatically calculate the following keywords and set them into the output image. + # > EXPTIME: sum of exposure times in the part of coadd with the most overlaps. + # > GAIN: effective gain with flux and weighting applied. + # Note that any change in pixel scale only redistribute the flux with total flux conservative, + # therefore, the image resampling does not alter the GAIN value. + # > SATURATE: Minimum of all input saturation levels with flux scaling applied. + # Note that a change in pixel scale can alter the saturation level. + # > MJD-OBS: MJD of earliest start of exposures + # + """ + + # * check FITS_obj headers + for obj in FITS_obj: + phr_obj = fits.getheader(obj, ext=0) + + # a mild warning + if VERBOSE_LEVEL in [1, 2]: + if 'EXPTIME' not in phr_obj: + _warn_message = 'SWarp cannot find keyword EXPTIME in FITS header of [%s]!' %obj + warnings.warn('MeLOn WARNING: %s' %_warn_message) + + # a mild warning + if VERBOSE_LEVEL in [1, 2]: + if 'MJD-OBS' not in phr_obj: + _warn_message = 'SWarp cannot find keyword MJD-OBS in FITS header of [%s]!' %obj + warnings.warn('MeLOn WARNING: %s' %_warn_message) + + # * make directory as a workplace + TDIR = mkdtemp(suffix=None, prefix='PYSWarp_', dir=TMPDIR_ROOT) + + # * create SWarp configuration file in TDIR + swarp_config_path = AMConfig_Maker.AMCM(MDIR=TDIR, \ + AstroMatic_KEY='swarp', ConfigDict=ConfigDict, tag='PYSWarp') + + FITS_obj_str = ' '.join(FITS_obj) + weightOUT_path = OUT_path[:-5] + '.weight.fits' + + # * trigger SWarp + command = "cd %s && swarp %s -IMAGEOUT_NAME %s -WEIGHTOUT_NAME %s -c %s" \ + %(TDIR, FITS_obj_str, OUT_path, weightOUT_path, swarp_config_path) + print('MeLOn CheckPoint: Run SWarp Command ... \n %s' %command) + os.system(command) + os.system('rm -rf %s'%TDIR) + + # * fill the missing data in resampled image [SWarp default 0] + PixA_resamp, MissingMask = None, None + try: + PixA_resamp = fits.getdata(OUT_path, ext=0).T + PixA_resamp_weight = fits.getdata(weightOUT_path, ext=0).T + MissingMask = PixA_resamp_weight == 0 + if FILL_VALUE is not None: + PixA_resamp[MissingMask] = FILL_VALUE + if OUT_path is not None: + hdr_op = fits.getheader(OUT_path, ext=0) + hdl_op = fits.HDUList(fits.PrimaryHDU(PixA_resamp.T, header=hdr_op)) + hdl_op.writeto(OUT_path, overwrite=True) + except: + if VERBOSE_LEVEL in [0, 1, 2]: + _warn_message = 'SWarp FAILED on [%s]!' %FITS_obj + warnings.warn('MeLOn WARNING: %s' %_warn_message) + + return PixA_resamp, PixA_resamp_weight \ No newline at end of file