diff --git a/bin/run_vega.py b/bin/run_vega.py old mode 100644 new mode 100755 diff --git a/vega/build_config.py b/vega/build_config.py index 9bcd5f70..e01156fd 100644 --- a/vega/build_config.py +++ b/vega/build_config.py @@ -74,6 +74,8 @@ def __init__(self, options={}, overwrite=False): self.options['marginalize-all-rmin-cuts'] = options.get('marginalize-all-rmin-cuts', False) self.options['marginalize-prior-sigma'] = options.get('marginalize-prior-sigma', 10.0) self.options['fit-marginalized-scales'] = options.get('fit-marginalized-scales', False) + self.options['marginalize-match-data-bins'] = options.get( + 'marginalize-match-data-bins', False) self.options['hcd_model'] = options.get('hcd_model', None) self.options['fvoigt_model'] = options.get('fvoigt_model', 'exp') @@ -344,6 +346,8 @@ def _build_corr_config(self, name, corr_info, git_hash): self.options['marginalize-all-rmin-cuts']) config['model']['marginalize-prior-sigma'] = str(self.options['marginalize-prior-sigma']) config['model']['fit-marginalized-scales'] = str(self.options['fit-marginalized-scales']) + config['model']['marginalize-match-data-bins'] = str( + self.options['marginalize-match-data-bins']) if 'skip-nl-model-in-peak' in self.options: config['model']['skip-nl-model-in-peak'] = str(self.options['skip-nl-model-in-peak']) diff --git a/vega/correlation_item.py b/vega/correlation_item.py index 40087a20..dee0d1a9 100644 --- a/vega/correlation_item.py +++ b/vega/correlation_item.py @@ -66,6 +66,8 @@ def __init__(self, config, model_pk=False): if marginalize_all: self.marginalize_small_scales['all-rmin'] = True + self.marginalize_match_data_bins = config['model'].getboolean( + "marginalize-match-data-bins", False) self.fit_marg_scales = config['model'].getboolean("fit-marginalized-scales", False) self.has_metals = False @@ -221,11 +223,33 @@ def get_undist_xi_marg_templates(self): "based on scale cuts." ) - N = self.model_coordinates.rt_regular_grid.size - d = np.ones(common_idx.size) + if self.marginalize_match_data_bins: + rp = self.model_coordinates.rp_grid[common_idx] + rt = self.model_coordinates.rt_grid[common_idx] + dist_rp = self.dist_model_coordinates.rp_grid + dist_rt = self.dist_model_coordinates.rt_grid + indices_in_data_bins = ( + (dist_rp[None, :] - rp[:, None])**2 + (dist_rt[None, :] - rt[:, None])**2 + ).argmin(axis=1) + + unique_indices_in_data_bins = np.unique(indices_in_data_bins) + # Vectorized construction of COO data: Map each element + # of indices_in_data_bins to its position in unique_indices_in_data_bins + row_indices = np.searchsorted(unique_indices_in_data_bins, indices_in_data_bins) + d = np.ones(common_idx.size, dtype=float) + templates = coo_array( + (d, (row_indices, common_idx)), + shape=( + unique_indices_in_data_bins.size, + self.model_coordinates.rt_regular_grid.size, + ) + ).tocsr().T + else: + N = self.model_coordinates.rt_regular_grid.size + d = np.ones(common_idx.size, dtype=float) - templates = coo_array( - (d, (np.arange(d.size), common_idx)), shape=(d.size, N) - ).tocsr().T + templates = coo_array( + (d, (np.arange(d.size), common_idx)), shape=(d.size, N) + ).tocsr().T return templates diff --git a/vega/data.py b/vega/data.py index b3c54ccf..7d0da00f 100644 --- a/vega/data.py +++ b/vega/data.py @@ -1,6 +1,5 @@ import numpy as np from astropy.io import fits -from scipy import linalg from scipy import sparse from scipy.sparse import csr_array @@ -28,7 +27,7 @@ class Data: model_coordinates = None data_coordinates = None - def __init__(self, corr_item): + def __init__(self, corr_item, marginalize_in_fit=False): """Read the data and initialize the coordinate grids. Parameters @@ -95,24 +94,31 @@ def __init__(self, corr_item): self.marg_templates, self.cov_marg_update = self.get_dist_xi_marg_templates() # if not self.corr_item.low_mem_mode: - print('Updating covariance with marginalization templates.') + # print('Updating covariance with marginalization templates.') ntemps = self.marg_templates.shape[1] # Invert the matrix but do not save it + self._inv_masked_cov = None _inv_masked_cov = self.inv_masked_cov self._inv_masked_cov = None - self._cov_mat[np.ix_(self.data_mask, self.data_mask)] += self.cov_marg_update + if not marginalize_in_fit: + self._cov_mat[np.ix_(self.data_mask, self.data_mask)] += self.cov_marg_update + else: + self.cov_marg_update = None # Construct solution matrix, G becomes an ndarray templates_masked = self.marg_templates[self.model_mask, :] G = templates_masked.T.dot(_inv_masked_cov) + A = templates_masked.T.dot(G.T).T - S = np.diag(np.full( - ntemps, self.corr_item.marginalize_small_scales_prior_sigma**-2 - )) - Ainv = np.linalg.inv(templates_masked.T.dot(G.T).T + S) + if not (self.corr_item.fit_marg_scales and self.corr_item.marginalize_match_data_bins): + S = np.diag(np.full( + ntemps, self.corr_item.marginalize_small_scales_prior_sigma**-2 + )) + A = A + S # should be positive definite + Ainv = np.linalg.pinvh(A) # When multiplied by data - bestfit model, the below matrix will # give the coefficients for each template. Total marginalized model # is given by marg_templates.dot(marg_diff2coeff_matrix.dot(diff)) @@ -507,7 +513,8 @@ def _init_metals(self, metal_config): list list of all metal correlations we need to compute """ - metals_in_tracer1, metals_in_tracer2, tracer_catalog = self._init_metal_tracers(metal_config) + metals_in_tracer1, metals_in_tracer2, tracer_catalog = self._init_metal_tracers( + metal_config) self.metal_mats = {} self.metal_coordinates = {} @@ -666,9 +673,9 @@ def create_monte_carlo(self, fiducial_model, scale=None, seed=None, forecast=Fal if self.cholesky_masked_cov: masked_cov = self.cov_mat[:, self.data_mask] masked_cov = masked_cov[self.data_mask, :] - self._cholesky = linalg.cholesky(self._scale * masked_cov) + self._cholesky = np.linalg.cholesky(self._scale * masked_cov) else: - self._cholesky = linalg.cholesky(self._scale * self.cov_mat) + self._cholesky = np.linalg.cholesky(self._scale * self.cov_mat) # Create the mock if seed is not None: diff --git a/vega/vega_interface.py b/vega/vega_interface.py index e58789dd..402ee6be 100644 --- a/vega/vega_interface.py +++ b/vega/vega_interface.py @@ -58,6 +58,11 @@ def __init__(self, main_path): self.low_mem_mode = self.main_config['control'].getboolean('low_mem_mode', False) self.low_mem_mode &= global_cov_file is not None + self.marginalize_in_fit = self.main_config['control'].getboolean( + 'marginalize-in-fit', False) + if self.marginalize_in_fit: + print("Marginalizing in fit") + # Initialize the individual components self.corr_items = {} for path in ini_files: @@ -99,7 +104,7 @@ def __init__(self, main_path): # Initialize the data for name, corr_item in self.corr_items.items(): if self._has_data: - self.data[name] = data.Data(corr_item) + self.data[name] = data.Data(corr_item, marginalize_in_fit=self.marginalize_in_fit) else: self.data[name] = None @@ -255,6 +260,16 @@ def chi2(self, params=None, direct_pk=None, return_marg_coeff=False): self.models[name].PktoXi.cache_pars = None return 1e100 + # Get marginalization coefficients + if return_marg_coeff or self.marginalize_in_fit: + marg_coeff = self.compute_marg_coeff(model_cf) + + if self.marginalize_in_fit: + # Fit on the fly the template correction + for name in self.data: + if self.data[name].marg_templates is not None: + model_cf[name] += self.data[name].marg_templates.dot(marg_coeff[name]) + # Compute chisq for the case where we use the global covariance if self._use_global_cov: if self.monte_carlo: @@ -286,7 +301,6 @@ def chi2(self, params=None, direct_pk=None, return_marg_coeff=False): if not return_marg_coeff: return chi2 - marg_coeff = self.compute_marg_coeff(model_cf) return chi2, marg_coeff def log_lik(self, params=None, direct_pk=None, return_marg_coeff=False): @@ -443,8 +457,10 @@ def initialize_monte_carlo(self, scale=None, print_func=print): def compute_marg_coeff(self, model_cf): bestfit_marg_coeff = {} for name in self.corr_items: - corr_data = self.data[name] + if not self.data[name].marginalize_small_scales: + pass + corr_data = self.data[name] if self.monte_carlo: diff = corr_data.masked_mc_mock \ - model_cf[name][corr_data.model_mask] @@ -803,7 +819,8 @@ def read_global_cov(self, global_cov_file, scale=None): if self.corr_items[name].marginalize_small_scales: M1 = self.global_cov[j:j + ndata, j:j + ndata] - M1[np.ix_(wd, wd)] += data.cov_marg_update + if data.cov_marg_update is not None: + M1[np.ix_(wd, wd)] += data.cov_marg_update if self.low_mem_mode: del data.cov_marg_update