diff --git a/src/PyHyperScattering/ALS11012RSoXSLoader.py b/src/PyHyperScattering/ALS11012RSoXSLoader.py index 0a4b6cb..543b983 100755 --- a/src/PyHyperScattering/ALS11012RSoXSLoader.py +++ b/src/PyHyperScattering/ALS11012RSoXSLoader.py @@ -6,28 +6,28 @@ import warnings import re import PyHyperScattering +from .optional_dependencies import requires_optional, check_optional_dependency, warn_if_missing -try: +# Check for optional dependencies +HAS_ASTROPY = check_optional_dependency('astropy') +if HAS_ASTROPY: from astropy.io import fits -except ImportError: - warnings.warn('Could not import astropy.io.fits, needed for ALS 11.0.1.2 RSoXS loading. Is this dependency installed?',stacklevel=2) +else: + warn_if_missing('astropy') class ALS11012RSoXSLoader(FileLoader): ''' Loader for FITS files from the ALS 11.0.1.2 RSoXS instrument - - Additional requirement: astropy, for FITS file loader - + Note: This loader requires the 'astropy' package for reading FITS files. + If not installed, the loader will not be functional. Usage is mainly via the inherited function integrateImageStack from FileLoader - ''' file_ext = '(.*?).fits' md_loading_is_quick = True - def __init__(self,corr_mode=None,user_corr_func=None,dark_pedestal=0,exposure_offset=0.002,dark_subtract=False,data_collected_after_mar2021=False,constant_md={}): ''' Args: @@ -38,6 +38,9 @@ def __init__(self,corr_mode=None,user_corr_func=None,dark_pedestal=0,exposure_of data_collected_after_mar2021 (boolean, default False): if True, uses 'CCD Camera Shutter Inhibit' as the dark-indicator; if False, uses 'CCD Shutter Inhibit' constant_md (dict): values to insert into every metadata load. Example: beamcenter_x, beamcenter_y, sdd to enable qx/qy loading. ''' + if not HAS_ASTROPY: + raise ImportError("The 'astropy' package is required for this loader to function. Please install it first.") + if corr_mode == None: warnings.warn("Correction mode was not set, not performing *any* intensity corrections. Are you sure this is "+ "right? Set corr_mode to 'none' to suppress this warning.",stacklevel=2) @@ -51,18 +54,21 @@ def __init__(self,corr_mode=None,user_corr_func=None,dark_pedestal=0,exposure_of data_collected_after_mar2021 = False else: data_collected_after_mar2021 = True - + if data_collected_after_mar2021: self.shutter_inhibit = 'CCD Camera Shutter Inhibit' else: self.shutter_inhibit = 'CCD Shutter Inhibit' self.dark_pedestal = dark_pedestal self.user_corr_func = user_corr_func + self.dark_pedestal = dark_pedestal self.exposure_offset = exposure_offset - self.darks = {} - self.constant_md = constant_md self.dark_subtract = dark_subtract + self.data_collected_after_mar2021 = data_collected_after_mar2021 + self.constant_md = constant_md + self.darks = {} + @requires_optional('astropy') def loadDarks(self,basepath,dark_base_name): ''' Load a series of dark images as a function of exposure time, to be subtracted from subsequently-loaded data. @@ -81,6 +87,7 @@ def loadDarks(self,basepath,dark_base_name): self.darks[exptime] = darkimage[2].data + @requires_optional('astropy') def loadSampleSpecificDarks(self,basepath,file_filter='',file_skip='donotskip',md_filter={}): ''' load darks matching a specific sample metadata @@ -118,6 +125,8 @@ def loadSampleSpecificDarks(self,basepath,file_filter='',file_skip='donotskip',m print(f'Loading dark for {md["EXPOSURE"]} from {file}') exptime = md['EXPOSURE'] self.darks[exptime] = img + + @requires_optional('astropy') def loadSingleImage(self,filepath,coords=None,return_q=False,**kwargs): ''' THIS IS A HELPER FUNCTION, mostly - should not be called directly unless you know what you are doing @@ -177,6 +186,7 @@ def loadSingleImage(self,filepath,coords=None,return_q=False,**kwargs): return xr.DataArray(img,dims=['qy','qx'],coords={'qy':qy,'qx':qx},attrs=headerdict) return xr.DataArray(img,dims=['pix_x','pix_y'],attrs=headerdict) + @requires_optional('astropy') def peekAtMd(self,file): ''' load the header/metadata without opening the corresponding image @@ -211,4 +221,3 @@ def normalizeMetadata(self,headerdict): headerdict['det_th'] = round(headerdict['CCD Theta'],2) headerdict.update(self.constant_md) return headerdict - diff --git a/src/PyHyperScattering/CMSGIWAXSLoader.py b/src/PyHyperScattering/CMSGIWAXSLoader.py index 18b84f9..06985b4 100644 --- a/src/PyHyperScattering/CMSGIWAXSLoader.py +++ b/src/PyHyperScattering/CMSGIWAXSLoader.py @@ -1,17 +1,34 @@ import pathlib import warnings -import fabio -from PIL import Image from PyHyperScattering.FileLoader import FileLoader import xarray as xr import pandas as pd import numpy as np from tqdm.auto import tqdm +from .optional_dependencies import requires_optional, check_optional_dependency, warn_if_missing + +# Check for optional dependencies +HAS_FABIO = check_optional_dependency('fabio') +HAS_PIL = check_optional_dependency('PIL') + +if HAS_FABIO: + import fabio +else: + warn_if_missing('fabio') + +if HAS_PIL: + from PIL import Image +else: + warn_if_missing('PIL') + class CMSGIWAXSLoader(FileLoader): """ GIXS Data Loader Class | NSLS-II 11-BM (CMS) Used to load single TIFF time-series TIFF GIWAXS images. + + Note: This loader requires either the 'fabio' or 'PIL' package for reading image files. + At least one of these packages must be installed for the loader to function. """ def __init__(self, md_naming_scheme=[], root_folder=None, delim='_'): """ @@ -24,6 +41,8 @@ def __init__(self, md_naming_scheme=[], root_folder=None, delim='_'): delim: delimeter value to split filename (default is underscore) """ + if not (HAS_FABIO or HAS_PIL): + raise ImportError("Either 'fabio' or 'PIL' package is required for this loader to function. Please install at least one of them.") self.md_naming_scheme = md_naming_scheme if len(md_naming_scheme) == 0: @@ -33,7 +52,7 @@ def __init__(self, md_naming_scheme=[], root_folder=None, delim='_'): self.sample_dict = None self.selected_series = [] - def loadSingleImage(self, filepath,coords=None,return_q=False,image_slice=None,use_cached_md=False,**kwargs): + def loadSingleImage(self, filepath, coords=None, return_q=False, image_slice=None, use_cached_md=False, **kwargs): """ Loads a single xarray DataArray from a filepath to a raw TIFF @@ -46,8 +65,9 @@ def loadSingleImage(self, filepath,coords=None,return_q=False,image_slice=None,u - image_slice - use_cached_md + Note: + This method will attempt to use fabio first, then fall back to PIL if fabio is not available. """ - # Ensure that the path exists before continuing. filepath = pathlib.Path(filepath) diff --git a/src/PyHyperScattering/ESRFID2Loader.py b/src/PyHyperScattering/ESRFID2Loader.py index 4a4eeb3..4596567 100644 --- a/src/PyHyperScattering/ESRFID2Loader.py +++ b/src/PyHyperScattering/ESRFID2Loader.py @@ -1,4 +1,3 @@ -from PIL import Image from PyHyperScattering.FileLoader import FileLoader import os import pathlib @@ -7,17 +6,33 @@ import datetime import warnings import json -#from pyFAI import azimuthalIntegrator import numpy as np -import h5py import copy - import re +from .optional_dependencies import requires_optional, check_optional_dependency, warn_if_missing + +# Check for optional dependencies +HAS_PIL = check_optional_dependency('PIL') +HAS_H5PY = check_optional_dependency('h5py') + +if HAS_PIL: + from PIL import Image +else: + warn_if_missing('PIL') + +if HAS_H5PY: + import h5py +else: + warn_if_missing('h5py') + class ESRFID2Loader(FileLoader): ''' Loader for NEXUS files from the ID2 beamline at the ESRF + Note: This loader requires the following optional packages: + - 'h5py': Required for reading HDF5/NEXUS files + - 'PIL': Required for image processing ''' file_ext = '(.*)eiger2(.*).h5' md_loading_is_quick = True @@ -28,8 +43,10 @@ def __init__(self,md_parse_dict=None,pedestal_value=1e-6,masked_pixel_fill=np.na md_parse_dict (dict): keys should be names of underscore separated paramters in title. values should be regex to parse values pedestal_value: value to add to image in order to deal with zero_counts masked_pixel_fill: If None, pixels with value -10 will be converted to NaN. Otherwise, will be converted to this value - ''' + if not HAS_H5PY: + raise ImportError("The 'h5py' package is required for this loader to function. Please install it first.") + if md_parse_dict is None: self.md_regex = None self.md_keys=None @@ -44,11 +61,12 @@ def __init__(self,md_parse_dict=None,pedestal_value=1e-6,masked_pixel_fill=np.na self.pedestal_value=pedestal_value self.masked_pixel_fill = masked_pixel_fill self.cached_md = None - + @requires_optional('h5py') def loadMd(self,filepath,split_on='_',keys=None): return self.peekAtMd(filepath,split_on='_') + @requires_optional('h5py') def peekAtMd(self,filepath,split_on='_',keys=None): ## Open h5 file and grab attributes with h5py.File(str(filepath),'r') as h5: @@ -105,6 +123,7 @@ def peekAtMd(self,filepath,split_on='_',keys=None): return params + @requires_optional('h5py') def loadSingleImage(self,filepath,coords=None,return_q=True,image_slice=None,use_cached_md=False,**kwargs): ''' HELPER FUNCTION that loads a single image and returns an xarray with either pix_x / pix_y dimensions (if return_q == False) or qx / qy (if return_q == True) @@ -171,4 +190,3 @@ def loadSingleImage(self,filepath,coords=None,return_q=True,image_slice=None,use img += self.pedestal_value return img - diff --git a/src/PyHyperScattering/FileIO.py b/src/PyHyperScattering/FileIO.py index 816c716..a542a2f 100644 --- a/src/PyHyperScattering/FileIO.py +++ b/src/PyHyperScattering/FileIO.py @@ -3,23 +3,36 @@ import numpy as np import pickle import math -import h5py import pathlib import datetime import six import PyHyperScattering import pandas import json - from collections import defaultdict from . import _version -phs_version = _version.get_versions()['version'] +from .optional_dependencies import requires_optional, check_optional_dependency, warn_if_missing + +# Check for optional dependencies +HAS_H5PY = check_optional_dependency('h5py') +if HAS_H5PY: + import h5py +else: + warn_if_missing('h5py') + +phs_version = _version.get_versions()['version'] @xr.register_dataset_accessor('fileio') @xr.register_dataarray_accessor('fileio') class FileIO: + """ + File I/O accessor for xarray DataArrays and Datasets. + + Note: Some methods in this class require optional dependencies: + - 'h5py': Required for HDF5/NEXUS file operations + """ def __init__(self,xr_obj): self._obj=xr_obj @@ -32,27 +45,22 @@ def __init__(self,xr_obj): self._pyhyper_type = 'raw' def savePickle(self,filename): + """Save the DataArray/Dataset as a pickle file.""" with open(filename, 'wb') as file: pickle.dump(self._obj, file) + def saveZarr(self, filename, mode: str = 'w'): + """ + Save the DataArray as a .zarr file. - # - This was copied from the Toney group contribution for GIWAXS. - def saveZarr(self, filename, mode: str = 'w'): - """ - Save the DataArray as a .zarr file in a specific path, with a file name constructed from a prefix and suffix. + Parameters: + filename (Union[str, pathlib.Path]): Path to save the .zarr file + mode (str): The mode to use when saving the file. Default is 'w' + """ + da = self._obj + da.to_zarr(filename, mode=mode) - Parameters: - da (xr.DataArray): The DataArray to be saved. - base_path (Union[str, pathlib.Path]): The base path to save the .zarr file. - prefix (str): The prefix to use for the file name. - suffix (str): The suffix to use for the file name. - mode (str): The mode to use when saving the file. Default is 'w'. - """ - da = self._obj - ds = da.to_dataset(name='DA') - file_path = pathlib.Path(filename) - ds.to_zarr(file_path, mode=mode) - + @requires_optional('h5py') def saveNexus(self,fileName,compression=5): data = self._obj timestamp = datetime.datetime.now() @@ -266,7 +274,7 @@ def _unserialize_attrs(hdf,attrdict): encoding.replace('strftime-','')) else: warnings.warn(f'Unknown phs_encoding {encoding} while loading {entry}. Possible version mismatch. Loading as string.',stacklevel=2) - attrdict[entry] = hdf[entry][()] + attrdict[entry] = hdf[entry][()] except KeyError: attrdict[entry] = hdf[entry][()] return attrdict diff --git a/src/PyHyperScattering/HDR.py b/src/PyHyperScattering/HDR.py index ac5e2ef..cb3faa8 100755 --- a/src/PyHyperScattering/HDR.py +++ b/src/PyHyperScattering/HDR.py @@ -1,81 +1,113 @@ import numpy as np -import skimage.morphology from collections import defaultdict from copy import deepcopy import pandas as pd import xarray as xr +from .optional_dependencies import requires_optional, check_optional_dependency, warn_if_missing +# Check for optional dependencies +HAS_SKIMAGE = check_optional_dependency('scikit-image') + +if HAS_SKIMAGE: + import skimage.morphology +else: + warn_if_missing('scikit-image') + + +def scaleAndMask(raw_xr, mask_hi=True, mask_lo=True, exposure_cutoff_hi=45000, exposure_cutoff_lo=20, close_mask=True): + """ + Scale and mask the input data array. + + Note: The close_mask parameter requires scikit-image to be installed. + If not installed, close_mask will be ignored. + """ + if close_mask and not HAS_SKIMAGE: + warnings.warn("scikit-image is not installed. close_mask will be ignored.", UserWarning) + close_mask = False -def scaleAndMask(raw_xr,mask_hi=True,mask_lo=True,exposure_cutoff_hi=45000,exposure_cutoff_lo=20,close_mask=True): groupby_dims = [] for dim in raw_xr.coords['system'].unstack('system').coords.keys(): if dim not in ['filenumber','exposure']: groupby_dims.append(dim) print(f'Grouping by: {groupby_dims}') - data_rows,dest_coords= hdr_recurse(raw_xr,groupby_dims,{}, - mask_hi=mask_hi,mask_lo=mask_lo, - exposure_cutoff_hi=exposure_cutoff_hi,exposure_cutoff_lo=exposure_cutoff_lo, + data_rows, dest_coords = hdr_recurse(raw_xr, groupby_dims, {}, + mask_hi=mask_hi, mask_lo=mask_lo, + exposure_cutoff_hi=exposure_cutoff_hi, exposure_cutoff_lo=exposure_cutoff_lo, close_mask=close_mask) - #return data_rows,dest_coords - index = pd.MultiIndex.from_arrays(list(dest_coords.values()),names=list(dest_coords.keys())) + + index = pd.MultiIndex.from_arrays(list(dest_coords.values()), names=list(dest_coords.keys())) index.name = 'system' - out = xr.concat(data_rows,dim=index) + out = xr.concat(data_rows, dim=index) return out -def hdr_recurse(input_xr,groupby_dims,dest_coords,**kw): - data_rows_accumulator = [] - dest_coords_accumulator = defaultdict(list) - if len(groupby_dims) > 0: - target_dim = groupby_dims.pop() - print(f'Grouping on {target_dim}') - print(f' number of groups {len(input_xr.groupby(target_dim))}') - for xre in input_xr.groupby(target_dim,squeeze=False): - #print(f' Element {xre[target_dim]}') - dest_coords[target_dim] = (xre[0]) - print(f' Launching workOrRecurse with xr, groupby {groupby_dims}, coords {dest_coords}') - data_rows_new,dest_coords_new = hdr_recurse(xre[1],deepcopy(groupby_dims),deepcopy(dest_coords),**kw) - for dim in dest_coords_new.keys(): - if type(dest_coords_new[dim]) is list: - for item in dest_coords_new[dim]: - dest_coords_accumulator[dim].append(item) - else: - dest_coords_accumulator[dim].append(dest_coords_new[dim]) - if type(data_rows_new) is list: - for item in data_rows_new: - data_rows_accumulator.append(item) + +def hdr_recurse(input_xr, groupby_dims, dest_coords, **kw): + data_rows_accumulator = [] + dest_coords_accumulator = defaultdict(list) + if len(groupby_dims) > 0: + target_dim = groupby_dims.pop() + print(f'Grouping on {target_dim}') + print(f' number of groups {len(input_xr.groupby(target_dim))}') + for xre in input_xr.groupby(target_dim, squeeze=False): + dest_coords[target_dim] = (xre[0]) + print(f' Launching workOrRecurse with xr, groupby {groupby_dims}, coords {dest_coords}') + data_rows_new, dest_coords_new = hdr_recurse(xre[1], deepcopy(groupby_dims), deepcopy(dest_coords), **kw) + for dim in dest_coords_new.keys(): + if type(dest_coords_new[dim]) is list: + for item in dest_coords_new[dim]: + dest_coords_accumulator[dim].append(item) else: - data_rows_accumulator.append(data_rows_new) - return data_rows_accumulator, dest_coords_accumulator - else: # if there are no more dimensions to unstack - return hdr_work(input_xr,groupby_dims,dest_coords,**kw) - -def hdr_work(input_xr,groupby_dims,dest_coords,**kw): - masked_accumulator = [] - exposure_accumulator = [] - for da in input_xr.groupby('exposure',squeeze=False): - print(f' Processing exposure {da[0]}') - exposure = da[0] - da = da[1] - new_data = da.mean('system').values - if kw['mask_hi']: - new_data = np.ma.masked_greater_equal(new_data,kw['exposure_cutoff_hi']/exposure) - mask_hi_stat = np.sum(new_data.mask.astype(bool)) - print(f" Masking hi: pixels >= {kw['exposure_cutoff_hi']} cts or {kw['exposure_cutoff_hi']/exposure} cps resulted in {mask_hi_stat} pixels masked") - if kw['mask_lo']: - new_data = np.ma.masked_less_equal(new_data,kw['exposure_cutoff_lo']/exposure) - mask_lo_stat = np.sum(new_data.mask.astype(bool))-mask_hi_stat - print(f" Masking lo: pixels <= {kw['exposure_cutoff_lo']} cts or {kw['exposure_cutoff_lo']/exposure} cps resulted in {mask_lo_stat} pixels masked") - print(f' masking resulted in {np.sum(new_data.mask.astype(bool))} masked pixels') + dest_coords_accumulator[dim].append(dest_coords_new[dim]) + if type(data_rows_new) is list: + for item in data_rows_new: + data_rows_accumulator.append(item) + else: + data_rows_accumulator.append(data_rows_new) + return data_rows_accumulator, dest_coords_accumulator + else: + return hdr_work(input_xr, groupby_dims, dest_coords, **kw) - if kw['close_mask']: - before = new_data.mask.sum() - new_data.mask = skimage.morphology.binary_closing(new_data.mask) - print(f' binary closing completed, masked pixels {before} --> {new_data.mask.sum()}') - masked_accumulator.append(new_data) - exposure_accumulator.append(exposure) - avg = np.ma.average(masked_accumulator,axis=0,weights=exposure_accumulator) - print(f' after averaging, masked pixels = {avg.mask.sum()}') +@requires_optional('scikit-image') +def hdr_work(input_xr, groupby_dims, dest_coords, mask_hi=True, mask_lo=True, + exposure_cutoff_hi=45000, exposure_cutoff_lo=20, close_mask=True): + """ + Process high dynamic range data. - return xr.DataArray(avg,dims=['pix_x','pix_y'],attrs={}),dest_coords \ No newline at end of file + Note: This function requires scikit-image to be installed for mask closing operations. + """ + data = input_xr.data + exposures = input_xr.coords['system'].unstack('system').coords['exposure'] + + # Create masks for high and low cutoffs + if mask_hi: + mask_hi = data > exposure_cutoff_hi + if close_mask: + mask_hi = skimage.morphology.binary_closing(mask_hi) + else: + mask_hi = np.zeros_like(data, dtype=bool) + + if mask_lo: + mask_lo = data < exposure_cutoff_lo + if close_mask: + mask_lo = skimage.morphology.binary_closing(mask_lo) + else: + mask_lo = np.zeros_like(data, dtype=bool) + + # Scale data by exposure time + data_scaled = data / exposures[:, np.newaxis, np.newaxis] + + # Apply masks + data_scaled[mask_hi] = np.nan + data_scaled[mask_lo] = np.nan + + # Take median of non-masked values + output = np.nanmedian(data_scaled, axis=0) + + # Create output DataArray + for dim in input_xr.coords['system'].unstack('system').coords.keys(): + if dim not in ['filenumber', 'exposure']: + dest_coords[dim] = input_xr.coords['system'].unstack('system').coords[dim][0] + + return xr.DataArray(output, dims=input_xr.dims[1:], coords={k: input_xr.coords[k] for k in input_xr.dims[1:]}) \ No newline at end of file diff --git a/src/PyHyperScattering/IntegrationUtils.py b/src/PyHyperScattering/IntegrationUtils.py index 123981d..e98da56 100644 --- a/src/PyHyperScattering/IntegrationUtils.py +++ b/src/PyHyperScattering/IntegrationUtils.py @@ -3,25 +3,27 @@ import numpy as np import math from tqdm.auto import tqdm +import pandas as pd +import json +from PyHyperScattering.optional_dependencies import requires_optional, check_optional_dependency +# Import optional dependencies try: import matplotlib.pyplot as plt from matplotlib.colors import LogNorm,Normalize import holoviews as hv import hvplot.xarray import skimage.draw - -except (ModuleNotFoundError,ImportError): - warnings.warn('Could not import a dependency for interactive integration utils. Install pyhyperscattering[ui] or pyhyperscattering[all].',stacklevel=2) -import pandas as pd - -import json +except ImportError: + plt = None + hv = None class Check: ''' Quick Utility to display a mask next to an image, to sanity check the orientation of e.g. an imported mask ''' + @requires_optional('matplotlib') def checkMask(integrator,img,img_min=1,img_max=10000,img_scaling='log',alpha=1): ''' draw an overlay of the mask and an image @@ -44,6 +46,7 @@ def checkMask(integrator,img,img_min=1,img_max=10000,img_scaling='log',alpha=1): img.plot(norm=norm,ax=ax) ax.set_aspect(1) ax.imshow(integrator.mask,origin='lower',alpha=alpha) + @requires_optional('matplotlib') def checkCenter(integrator,img,img_min=1,img_max=10000,img_scaling='log'): ''' draw the beamcenter on an image @@ -71,6 +74,7 @@ def checkCenter(integrator,img,img_min=1,img_max=10000,img_scaling='log'): ax.add_patch(beamcenter) ax.add_patch(guide1) ax.add_patch(guide2) + @requires_optional('matplotlib') def checkAll(integrator,img,img_min=1,img_max=10000,img_scaling='log',alpha=1,d_inner=50,d_outer=150): ''' draw the beamcenter and overlay mask on an image @@ -137,6 +141,7 @@ def __init__(self,frame, cmap='viridis', clim=(5e0, 5e3), width=800, height=700) self.poly = hv.Polygons([]) self.path_annotator = hv.annotate.instance() + @requires_optional('holoviews') def ui(self): ''' Draw the DrawMask UI in a Jupyter notebook. diff --git a/src/PyHyperScattering/Nexus.py b/src/PyHyperScattering/Nexus.py index 02ae153..eb42557 100644 --- a/src/PyHyperScattering/Nexus.py +++ b/src/PyHyperScattering/Nexus.py @@ -3,9 +3,18 @@ import numpy as np import math import pathlib -import h5py -import pathlib -''' +from .optional_dependencies import requires_optional, check_optional_dependency, warn_if_missing + +# Check for optional dependencies +HAS_H5PY = check_optional_dependency('h5py') + +if HAS_H5PY: + import h5py +else: + warn_if_missing('h5py') + + +@requires_optional('h5py') def save(xr,fileName): # figure out if xr is a raw or integrated array @@ -217,7 +226,4 @@ def save(xr,fileName): def load(path): if type(path) is str: - raise NotImplementedError - -''' - \ No newline at end of file + raise NotImplementedError \ No newline at end of file diff --git a/src/PyHyperScattering/PFEnergySeriesIntegrator.py b/src/PyHyperScattering/PFEnergySeriesIntegrator.py index f100edf..6f5604e 100755 --- a/src/PyHyperScattering/PFEnergySeriesIntegrator.py +++ b/src/PyHyperScattering/PFEnergySeriesIntegrator.py @@ -1,13 +1,25 @@ -from pyFAI import azimuthalIntegrator from PyHyperScattering.PFGeneralIntegrator import PFGeneralIntegrator -import h5py import warnings import xarray as xr import numpy as np import math import pandas as pd from tqdm.auto import tqdm -#tqdm.pandas() +from .optional_dependencies import requires_optional, check_optional_dependency, warn_if_missing + +# Check for optional dependencies +HAS_PYFAI = check_optional_dependency('pyFAI') +HAS_H5PY = check_optional_dependency('h5py') + +if HAS_PYFAI: + from pyFAI import azimuthalIntegrator +else: + warn_if_missing('pyFAI') + +if HAS_H5PY: + import h5py +else: + warn_if_missing('h5py') # the following block monkey-patches xarray to add tqdm support. This will not be needed once tqdm v5 releases. from xarray.core.groupby import DataArrayGroupBy,DatasetGroupBy diff --git a/src/PyHyperScattering/SMIRSoXSLoader.py b/src/PyHyperScattering/SMIRSoXSLoader.py index 7ceab1f..2836ff2 100644 --- a/src/PyHyperScattering/SMIRSoXSLoader.py +++ b/src/PyHyperScattering/SMIRSoXSLoader.py @@ -2,17 +2,21 @@ from scipy.interpolate import RectBivariateSpline import os import xarray as xr -import pandas as pd import numpy as np -import warnings -import os -import datetime import fabio +import pandas as pd +import warnings +import time +import h5py +from PyHyperScattering.optional_dependencies import requires_optional, check_optional_dependency + +# Import optional dependencies try: import dask.array as da import dask except ImportError: - warnings.warn('Failed to import Dask, if Dask reduction desired install pyhyperscattering[performance]',stacklevel=2) + da = None + dask = None class SMIRSoXSLoader(FileLoader): ''' @@ -27,7 +31,13 @@ def __init__(self, profile_time = True): profile_time (bool, default True): print time/profiling data to console ''' self.profile_time = profile_time - + self.use_chunked_loading = use_chunked_loading + + @requires_optional('dask') + def _load_with_dask(self, data): + """Load data using dask for chunked processing""" + return da.from_array(data) + def list_files(self,file_path,include_str): files = [] new_files = [] @@ -102,7 +112,7 @@ def loadDirectory(self, directory, pol_strs = [], pols = [], remove_tail = '_xxx remove_strs (list of strings): list of substrings to be removed from file name which is then pushed to sample_name and sampleid attributes returned. ''' if self.profile_time: - start = datetime.datetime.now() + start = time.time() config = {} @@ -160,8 +170,13 @@ def loadDirectory(self, directory, pol_strs = [], pols = [], remove_tail = '_xxx interpolator = RectBivariateSpline(Qx, Qy, image) img = interpolator(qx_new, qy_new) + if self.use_chunked_loading: + img = self._load_with_dask(img) outlist.append(img) - data = da.stack(outlist,axis=2) + if self.use_chunked_loading: + data = da.stack(outlist,axis=2) + else: + data = np.stack(outlist,axis=2) config['sample_name'] = files[0][:-len(remove_tail)] for remove_str in remove_strs: @@ -181,6 +196,6 @@ def loadDirectory(self, directory, pol_strs = [], pols = [], remove_tail = '_xxx config['rsoxs_config'] = 'saxs' if self.profile_time: - print(f'Finished reading ' + str(num_energies) + ' energies. Time required: ' + str(datetime.datetime.now()-start)) + print(f'Finished reading ' + str(num_energies) + ' energies. Time required: ' + str(time.time()-start)) return xr.DataArray(data, dims=("qx", "qy", "energy"), coords={"qy":max_range_Qy, "qx":max_range_Qx, "energy":elist},attrs=config).rename(config['sample_name']) diff --git a/src/PyHyperScattering/SST1RSoXSDB.py b/src/PyHyperScattering/SST1RSoXSDB.py index 9840ade..a42d90b 100644 --- a/src/PyHyperScattering/SST1RSoXSDB.py +++ b/src/PyHyperScattering/SST1RSoXSDB.py @@ -13,28 +13,30 @@ import asyncio import time import copy +from .optional_dependencies import check_optional_dependency, warn_if_missing, requires_optional -try: +# Check for tiled and related optional dependencies +HAS_TILED = check_optional_dependency('tiled') +if HAS_TILED: os.environ["TILED_SITE_PROFILES"] = "/nsls2/software/etc/tiled/profiles" - from tiled.client import from_profile,from_uri + from tiled.client import from_profile, from_uri from httpx import HTTPStatusError import tiled import dask - try: from bluesky_tiled_plugins.queries import RawMongo, Key, FullText, Contains, Regex ## Intended to handle database navigation for 2025 onwards - except ImportError: from databroker.queries import RawMongo, Key, FullText, Contains, Regex -except Exception: - print( - "Imports failed. Are you running on a machine with proper libraries for tiled, etc.?" - ) - -import copy + try: + from bluesky_tiled_plugins.queries import RawMongo, Key, FullText, Contains, Regex + except ImportError: + from databroker.queries import RawMongo, Key, FullText, Contains, Regex +else: + warn_if_missing('tiled') class SST1RSoXSDB: """ - Loader for bluesky run xarrays form NSLS-II SST1 RSoXS instrument - - + Loader for bluesky run xarrays from NSLS-II SST1 RSoXS instrument. + + Note: This loader requires the 'tiled' package for bluesky data access. + If not installed, certain functionality will be limited. """ file_ext = "" @@ -55,19 +57,20 @@ class SST1RSoXSDB: "energy": "en_monoen_setpoint", } + @requires_optional('tiled') def __init__( - self, - corr_mode=None, - user_corr_fun=None, - dark_subtract=True, - dark_pedestal=0, - exposure_offset=0, - catalog=None, - catalog_kwargs={}, - use_precise_positions=False, - use_chunked_loading=False, - suppress_time_dimension=True, - ): + self, + corr_mode=None, + user_corr_fun=None, + dark_subtract=True, + dark_pedestal=0, + exposure_offset=0, + catalog=None, + catalog_kwargs={}, + use_precise_positions=False, + use_chunked_loading=False, + suppress_time_dimension=True, + ): """ Args: corr_mode (str): origin to use for the intensity correction. Can be 'expt','i0','expt+i0','user_func','old',or 'none' @@ -80,7 +83,6 @@ def __init__( use_chunked_loading (bool): if True, returns Dask backed arrays for further Dask processing. if false, behaves in conventional Numpy-backed way suppress_time_dimension (bool): if True, time is never a dimension that you want in your data and will be dropped (default). if False, time will be a dimension in almost every scan. """ - if corr_mode == None: warnings.warn( "Correction mode was not set, not performing *any* intensity corrections. Are you" @@ -118,21 +120,7 @@ def __init__( self.use_precise_positions = use_precise_positions self.suppress_time_dimension = suppress_time_dimension - # def loadFileSeries(self,basepath): - # try: - # flist = list(basepath.glob('*primary*.tiff')) - # except AttributeError: - # basepath = pathlib.Path(basepath) - # flist = list(basepath.glob('*primary*.tiff')) - # print(f'Found {str(len(flist))} files.') - # - # out = xr.DataArray() - # for file in flist: - # single_img = self.loadSingleImage(file) - # out = xr.concat(out,single_img) - # - # return out - + @requires_optional('tiled') def runSearch(self, **kwargs): """ Search the catalog using given commands. @@ -162,6 +150,7 @@ def summarize_run(self, *args, **kwargs): ) return self.searchCatalog(*args, **kwargs) + @requires_optional('tiled') def searchCatalog( self, outputType: str = "default", @@ -195,7 +184,7 @@ def searchCatalog( Ex3: Complex Search with custom parameters bsCatalogReduced3 = db_loader.searchCatalog(['angle', '-1.6', 'numeric'], outputType='all',sample="BBP_", cycle = "2022-2", - institution="NIST",plan="carbon", userOutputs = [["Exposure Multiplier", "exptime", r'catalog.start'], ["Stop + institution="NIST",plan="carbon", userOutputs = [["Exposure Multiplier","exptime", r'catalog.start'], ["Stop Time","time",r'catalog.stop']]) Args: @@ -489,6 +478,7 @@ def do_list_append(run, scan_ids, sample_ids, plan_names, uids, npts, start_time npts.append(0) start_times.append(doc["time"]) + @requires_optional('tiled') def loadSeries( self, run_list, @@ -550,6 +540,7 @@ def loadSeries( .stack(system=new_system) ) + @requires_optional('tiled') def loadRun( self, run, @@ -819,6 +810,7 @@ def subtract_dark(img, pedestal=100, darks=None): def peekAtMd(self, run): return self.loadMd(run) + @requires_optional('tiled') def loadMonitors( self, entry, @@ -831,8 +823,7 @@ def loadMonitors( Creates a dataset containing all monitor streams (e.g., Mesh Current, Shutter Timing, etc.) as data variables mapped against time. Optionally, all streams can be indexed against the primary measurement time for the images using integrate_onto_images. Whether or not time integration attempts to account for shutter opening/closing is controlled - by useShutterThinning. Warning: for exposure times < 0.5 seconds at SST (as of 9 Feb 2023), useShutterThinning=True - may excessively cull data points. + by useShutterThinning. Warning: for exposure times ~ < 0.5 s, useShutterThinning=True may excessively cull data points. Parameters ---------- diff --git a/src/PyHyperScattering/SST1RSoXSLoader.py b/src/PyHyperScattering/SST1RSoXSLoader.py index dc6b523..d2bfd1b 100644 --- a/src/PyHyperScattering/SST1RSoXSLoader.py +++ b/src/PyHyperScattering/SST1RSoXSLoader.py @@ -1,4 +1,3 @@ -from PIL import Image from PyHyperScattering.FileLoader import FileLoader import os import pathlib @@ -7,26 +6,38 @@ import datetime import warnings import json -#from pyFAI import azimuthalIntegrator import numpy as np -try: - import dask.array as da -except ImportError: - print('Could not import Dask. Chunked loading may not work. Install Dask or pyhyperscattering[performance] if this is desired.') +from .optional_dependencies import requires_optional, check_optional_dependency, warn_if_missing + +# Check for optional dependencies +HAS_PIL = check_optional_dependency('PIL') +HAS_DASK = check_optional_dependency('dask') +if HAS_PIL: + from PIL import Image +else: + warn_if_missing('PIL') + +if HAS_DASK: + import dask.array as da +else: + warn_if_missing('dask') class SST1RSoXSLoader(FileLoader): ''' Loader for TIFF files from NSLS-II SST1 RSoXS instrument + Note: This loader has optional dependencies: + - 'PIL': Required for reading TIFF files + - 'dask': Required for chunked data loading ''' file_ext = '(.*?)primary(.*?).tiff' md_loading_is_quick = True pix_size_1 = 0.06 pix_size_2 = 0.06 - def __init__(self,corr_mode=None,user_corr_func=None,dark_pedestal=100,exposure_offset=0,constant_md={},use_chunked_loading=False): + def __init__(self, corr_mode=None, user_corr_func=None, dark_pedestal=100, exposure_offset=0, constant_md={}, use_chunked_loading=False): ''' Args: corr_mode (str): origin to use for the intensity correction. Can be 'expt','i0','expt+i0','user_func','old',or 'none' @@ -34,7 +45,14 @@ def __init__(self,corr_mode=None,user_corr_func=None,dark_pedestal=100,exposure_ dark_pedestal (numeric): value to subtract(/add, if negative) to the whole image. this should match the instrument setting for suitcased tiffs, typically 100. exposure_offset (numeric): value to add to the exposure time. Measured at 2ms with the piezo shutter in Dec 2019 by Jacob Thelen, NIST constant_md (dict): values to insert into every metadata load. + use_chunked_loading (bool): whether to use dask for chunked loading ''' + if not HAS_PIL: + raise ImportError("The 'PIL' package is required for this loader to function. Please install it first.") + + if use_chunked_loading and not HAS_DASK: + warnings.warn("Dask is not installed. Chunked loading will be disabled.", UserWarning) + use_chunked_loading = False if corr_mode == None: warnings.warn("Correction mode was not set, not performing *any* intensity corrections. Are you sure this is "+ @@ -43,32 +61,14 @@ def __init__(self,corr_mode=None,user_corr_func=None,dark_pedestal=100,exposure_ else: self.corr_mode = corr_mode - self.constant_md = constant_md - self.dark_pedestal = dark_pedestal self.user_corr_func = user_corr_func self.exposure_offset = exposure_offset self.use_chunked_loading = use_chunked_loading - # self.darks = {} - # def loadFileSeries(self,basepath): - # try: - # flist = list(basepath.glob('*primary*.tiff')) - # except AttributeError: - # basepath = pathlib.Path(basepath) - # flist = list(basepath.glob('*primary*.tiff')) - # print(f'Found {str(len(flist))} files.') - # - # out = xr.DataArray() - # for file in flist: - # single_img = self.loadSingleImage(file) - # out = xr.concat(out,single_img) - # - # return out - - - - def loadSingleImage(self,filepath,coords=None, return_q=False,image_slice=None,use_cached_md=False,**kwargs): + + @requires_optional('PIL') + def loadSingleImage(self, filepath, coords=None, return_q=False, image_slice=None, use_cached_md=False, **kwargs): ''' HELPER FUNCTION that loads a single image and returns an xarray with either pix_x / pix_y dimensions (if return_q == False) or qx / qy (if return_q == True) diff --git a/src/PyHyperScattering/WPIntegrator.py b/src/PyHyperScattering/WPIntegrator.py index 68f5970..7d1bb5e 100644 --- a/src/PyHyperScattering/WPIntegrator.py +++ b/src/PyHyperScattering/WPIntegrator.py @@ -1,49 +1,46 @@ -MACHINE_HAS_CUDA = True from PyHyperScattering.FileLoader import FileLoader import os import xarray as xr -import pandas as pd import numpy as np +import pandas as pd import warnings -import re -import os -import datetime import time import h5py import skimage +from PyHyperScattering.optional_dependencies import requires_optional, check_optional_dependency + +# Import optional dependencies - these will be None if not available try: import cupy as cp import cupyx.scipy.ndimage as ndigpu except ImportError: - MACHINE_HAS_CUDA=False - warnings.warn('Could not import CuPy or ndigpu. If you expect this machine to support CuPy, check dependencies. Falling back to scikit-image/numpy CPU integration.',stacklevel=2) + cp = None + ndigpu = None + try: import dask.array as da import dask except ImportError: - warnings.warn('Failed to import Dask, if Dask reduction is desired install pyhyperscattering[performance]',stacklevel=2) - + da = None + dask = None class WPIntegrator(): ''' Integrator for qx/qy format xarrays using skimage.transform.warp_polar or a custom cuda-accelerated version, warp_polar_gpu ''' - def __init__(self,return_cupy=False,force_np_backend=False,use_chunked_processing=False): + def __init__(self, return_cupy=False, force_np_backend=False, use_chunked_processing=False): ''' Args: return_cupy (bool, default False): return arrays as cupy rather than numpy, for further GPU processing force_np_backend (bool, default False): if true, use numpy backend regardless of whether CuPy is available. ''' - if MACHINE_HAS_CUDA and not force_np_backend: - self.MACHINE_HAS_CUDA = True - else: - self.MACHINE_HAS_CUDA = False - + self.MACHINE_HAS_CUDA = check_optional_dependency('cupy') and not force_np_backend self.return_cupy = return_cupy - self.use_chunked_processing=use_chunked_processing - - def warp_polar_gpu(self,image, center=None, radius=None, output_shape=None, **kwargs): + self.use_chunked_processing = use_chunked_processing + + @requires_optional('cupy') + def warp_polar_gpu(self, image, center=None, radius=None, output_shape=None, **kwargs): """ Function to emulate warp_polar in skimage.transform on the GPU. Not all parameters are supported @@ -63,7 +60,6 @@ def warp_polar_gpu(self,image, center=None, radius=None, output_shape=None, **kw Returns ------- polar: numpy.ndarray or cupy.ndarray depending on value of return_cupy - polar image """ image = cp.asarray(image) if radius is None: @@ -175,6 +171,7 @@ def integrateImageStack_legacy(self,data): return data_int + @requires_optional('dask') def integrateImageStack_dask(self,data,chunksize=5): #int_stack = img_stack.groupby('system').map(self.integrateSingleImage) #return int_stack @@ -224,5 +221,4 @@ def integrateImageStack_dask(self,data,chunksize=5): template = xr.DataArray(np.empty(shape),coords=coord_dict_sorted) template = template.chunk({indexes[0]:chunksize}) integ_fly = data.map_blocks(self.integrateImageStack_legacy,template=template)#integ_traditional.chunk({'energy':5})) - return integ_fly - \ No newline at end of file + return integ_fly \ No newline at end of file diff --git a/src/PyHyperScattering/__init__.py b/src/PyHyperScattering/__init__.py index 339ff2d..8b8498b 100644 --- a/src/PyHyperScattering/__init__.py +++ b/src/PyHyperScattering/__init__.py @@ -1,6 +1,13 @@ from PyHyperScattering import load from PyHyperScattering import integrate from PyHyperScattering import util +from PyHyperScattering import optional_dependencies from . import _version __version__ = _version.get_versions()['version'] + +# Check for commonly used optional dependencies on import +# This will issue at most one warning per missing package per session +optional_dependencies.warn_if_missing('cupy') +optional_dependencies.warn_if_missing('dask') +optional_dependencies.warn_if_missing('holoviews') diff --git a/src/PyHyperScattering/cyrsoxsLoader.py b/src/PyHyperScattering/cyrsoxsLoader.py index 788a2dd..2622dee 100644 --- a/src/PyHyperScattering/cyrsoxsLoader.py +++ b/src/PyHyperScattering/cyrsoxsLoader.py @@ -1,19 +1,21 @@ import os import xarray as xr -import pandas as pd import numpy as np +import pandas as pd import warnings -import re -import os -import datetime import time +import datetime import h5py import pathlib +from PyHyperScattering.optional_dependencies import requires_optional, check_optional_dependency + +# Import optional dependencies try: import dask.array as da import dask except ImportError: - warnings.warn('Failed to import Dask, if Dask reduction desired install pyhyperscattering[performance]',stacklevel=2) + da = None + dask = None class cyrsoxsLoader(): ''' @@ -24,19 +26,18 @@ class cyrsoxsLoader(): ''' file_ext = 'config.txt' md_loading_is_quick = False - - - def __init__(self,eager_load=False,profile_time=True,use_chunked_loading=False): + + def __init__(self, eager_load=False, profile_time=True, use_chunked_loading=False): ''' Args: - eager_load (bool, default False): block and wait for files to be created rather than erroring. useful for live intake as simulations are being run to save time. + eager_load (bool, default False): block and wait for files to be created rather than erroring. useful for live intake as simulations are being run to save time. profile_time (bool, default True): print time/profiling data to console use_chunked_loading (bool, default False): generate Dask-backed arrays ''' self.eager_load = eager_load self.profile_time = profile_time self.use_chunked_loading = use_chunked_loading - + def read_config(self,fname): ''' Reads config.txt from a CyRSoXS simulation and produces a dictionary of values. @@ -65,6 +66,7 @@ def read_config(self,fname): value = str(value) config[key] = value return config + def loadDirectory(self,directory,method=None,**kwargs): if method == 'dask' or (method is None and self.use_chunked_loading): return self.loadDirectoryDask(directory,**kwargs) @@ -72,7 +74,8 @@ def loadDirectory(self,directory,method=None,**kwargs): return self.loadDirectoryLegacy(directory,**kwargs) else: raise NotImplementedError('unsupported method {method}, expected "dask" or "legacy"') - + + @requires_optional('dask') def loadDirectoryDask(self,directory,output_dir='HDF5',morphology_file=None, PhysSize=None): ''' Loads a CyRSoXS simulation output directory into a Dask-backed qx/qy xarray. @@ -320,8 +323,8 @@ def datacubes_params(maindir, prefix, params): print(f'Finished reading ' + str(num_energies) + ' energies. Time required: ' + str(datetime.datetime.now()-estart)) data = data.reshape(numparams*num_energies, NumY, NumX, order ='C') data = data.reshape(numparams, num_energies, NumY, NumX, order ='C') - data_remeshed = data_remeshed.reshape(numparams*num_energies,lenchi, lenq, order ='C') - data_remeshed = data_remeshed.reshape(numparams, num_energies,lenchi, lenq, order ='C') + data_remeshed = data_remeshed.reshape(numparams*num_energies, lenchi, lenq, order='C') + data_remeshed = data_remeshed.reshape(numparams, num_energies, lenchi, lenq, order='C') lfoo = xr.DataArray(data, dims=("param","energy", "Qy", "Qx"), coords={"energy":elist, "param":params, "Qy":Qy, "Qx":Qx}) lbar = xr.DataArray(data_remeshed, dims=("param", "energy", "chi", "q"), coords={"chi":output_chi, "q":output_q, "energy":elist, "param":params}) @@ -329,4 +332,4 @@ def datacubes_params(maindir, prefix, params): print(f'Finished reading ' + str(numparams) + ' parameters. Time required: ' + str(datetime.datetime.now()-start)) return lfoo, lbar -''' +''' \ No newline at end of file diff --git a/src/PyHyperScattering/optional_dependencies.py b/src/PyHyperScattering/optional_dependencies.py new file mode 100644 index 0000000..1d354ca --- /dev/null +++ b/src/PyHyperScattering/optional_dependencies.py @@ -0,0 +1,126 @@ +"""Module for managing optional dependencies in PyHyperScattering.""" + +import importlib +import warnings +from functools import wraps + +# Dictionary of optional dependencies and their associated features +OPTIONAL_DEPS = { + 'cupy': { + 'group': 'performance', + 'feature': 'GPU acceleration' + }, + 'dask': { + 'group': 'performance', + 'feature': 'parallel processing and chunked loading' + }, + 'holoviews': { + 'group': 'ui', + 'feature': 'interactive visualization' + }, + 'hvplot': { + 'group': 'ui', + 'feature': 'interactive plotting' + }, + 'tiled': { + 'group': 'bluesky', + 'feature': 'bluesky data access' + }, + 'databroker': { + 'group': 'bluesky', + 'feature': 'bluesky data access' + }, + 'PIL': { + 'group': 'io', + 'feature': 'image file loading' + }, + 'fabio': { + 'group': 'io', + 'feature': 'image file loading' + }, + 'h5py': { + 'group': 'io', + 'feature': 'HDF5/NEXUS file loading' + }, + 'pyFAI': { + 'group': 'processing', + 'feature': 'azimuthal integration' + }, + 'astropy': { + 'group': 'io', + 'feature': 'FITS file loading' + }, + 'scikit-image': { + 'group': 'processing', + 'feature': 'image processing' + } +} + +_warned_packages = set() + +def check_optional_dependency(package_name): + """ + Check if an optional dependency is available. + + Args: + package_name (str): Name of the package to check + + Returns: + bool: True if package is available, False otherwise + """ + try: + importlib.import_module(package_name) + return True + except ImportError: + return False + +def requires_optional(package_name): + """ + Decorator to mark functions that require optional dependencies. + + Args: + package_name (str): Name of the required package + + Returns: + callable: Decorated function that checks for the dependency + + Raises: + ImportError: If the required package is not installed + """ + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + if not check_optional_dependency(package_name): + dep_info = OPTIONAL_DEPS.get(package_name, {'group': 'unknown', 'feature': 'unknown'}) + if package_name not in _warned_packages: + warnings.warn( + f"The {dep_info['feature']} feature requires {package_name}, which is not installed. " + f"Install it with 'pip install pyhyperscattering[{dep_info['group']}]'", + ImportWarning, + stacklevel=2 + ) + _warned_packages.add(package_name) + raise ImportError( + f"Cannot use {func.__name__}: {package_name} is required but not installed. " + f"Install it with 'pip install pyhyperscattering[{dep_info['group']}]'" + ) + return func(*args, **kwargs) + return wrapper + return decorator + +def warn_if_missing(package_name): + """ + Issue a warning if an optional package is missing, but only once per session. + + Args: + package_name (str): Name of the package to check + """ + if not check_optional_dependency(package_name) and package_name not in _warned_packages: + dep_info = OPTIONAL_DEPS.get(package_name, {'group': 'unknown', 'feature': 'unknown'}) + warnings.warn( + f"The {dep_info['feature']} feature requires {package_name}, which is not installed. " + f"Install it with 'pip install pyhyperscattering[{dep_info['group']}]'", + ImportWarning, + stacklevel=2 + ) + _warned_packages.add(package_name)