andreicuceu · andreicuceu · Mar 12, 2026 · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026
diff --git a/bin/run_vega.py b/bin/run_vega.py
diff --git a/vega/build_config.py b/vega/build_config.py
@@ -74,6 +74,8 @@ def __init__(self, options={}, overwrite=False):
         self.options['marginalize-all-rmin-cuts'] = options.get('marginalize-all-rmin-cuts', False)
         self.options['marginalize-prior-sigma'] = options.get('marginalize-prior-sigma', 10.0)
         self.options['fit-marginalized-scales'] = options.get('fit-marginalized-scales', False)
+        self.options['marginalize-match-data-bins'] = options.get(
+            'marginalize-match-data-bins', False)
 
         self.options['hcd_model'] = options.get('hcd_model', None)
         self.options['fvoigt_model'] = options.get('fvoigt_model', 'exp')
@@ -344,6 +346,8 @@ def _build_corr_config(self, name, corr_info, git_hash):
             self.options['marginalize-all-rmin-cuts'])
         config['model']['marginalize-prior-sigma'] = str(self.options['marginalize-prior-sigma'])
         config['model']['fit-marginalized-scales'] = str(self.options['fit-marginalized-scales'])
+        config['model']['marginalize-match-data-bins'] = str(
+            self.options['marginalize-match-data-bins'])
 
         if 'skip-nl-model-in-peak' in self.options:
             config['model']['skip-nl-model-in-peak'] = str(self.options['skip-nl-model-in-peak'])

diff --git a/vega/correlation_item.py b/vega/correlation_item.py
@@ -66,8 +66,11 @@ def __init__(self, config, model_pk=False):
         if marginalize_all:
             self.marginalize_small_scales['all-rmin'] = True
 
+        self.marginalize_match_data_bins = config['model'].getboolean(
+            "marginalize-match-data-bins", False)
         self.fit_marg_scales = config['model'].getboolean("fit-marginalized-scales", False)
 
+
         self.has_metals = False
         self.has_bb = False
 
@@ -221,11 +224,37 @@ def get_undist_xi_marg_templates(self):
                 "based on scale cuts."
             )
 
-        N = self.model_coordinates.rt_regular_grid.size
-        d = np.ones(common_idx.size)
+        if self.marginalize_match_data_bins:
+            print('set marginalization templates matching the data bins')
+            rp = self.model_coordinates.rp_grid[common_idx]
+            rt = self.model_coordinates.rt_grid[common_idx]
+            dist_rp = self.dist_model_coordinates.rp_grid
+            dist_rt = self.dist_model_coordinates.rt_grid
+            indices_in_data_bins = (
+                (dist_rp[None, :] - rp[:, None])**2 + (dist_rt[None, :] - rt[:, None])**2
+            ).argmin(axis=1)
+
+            unique_indices_in_data_bins = np.unique(indices_in_data_bins)
+            val = []
+            tempindex = []
+            coordindex = []
+            for i, data_idx in enumerate(unique_indices_in_data_bins):
+                model_indices = common_idx[indices_in_data_bins == data_idx]
+                val += np.ones(model_indices.size).tolist()
+                tempindex += np.repeat(i, model_indices.size).tolist()
+                coordindex += model_indices.tolist()
+            templates = coo_array(
+                (val, (tempindex, coordindex)), shape=(
+                    unique_indices_in_data_bins.size, self.model_coordinates.rt_regular_grid.size)
+            ).tocsr().T
+        else:
+            N = self.model_coordinates.rt_regular_grid.size
+            d = np.ones(common_idx.size)
+
+            templates = coo_array(
+                (d, (np.arange(d.size), common_idx)), shape=(d.size, N)
+            ).tocsr().T
 
-        templates = coo_array(
-            (d, (np.arange(d.size), common_idx)), shape=(d.size, N)
-        ).tocsr().T
+        print("marginalization templates shape=", templates.shape)
 
         return templates
diff --git a/vega/data.py b/vega/data.py
@@ -28,7 +28,7 @@ class Data:
     model_coordinates = None
     data_coordinates = None
 
-    def __init__(self, corr_item):
+    def __init__(self, corr_item, marginalize_in_fit=False):
         """Read the data and initialize the coordinate grids.
 
         Parameters
@@ -95,23 +95,32 @@ def __init__(self, corr_item):
             self.marg_templates, self.cov_marg_update = self.get_dist_xi_marg_templates()
 
             # if not self.corr_item.low_mem_mode:
-            print('Updating covariance with marginalization templates.')
+            # print('Updating covariance with marginalization templates.')
             ntemps = self.marg_templates.shape[1]
 
             # Invert the matrix but do not save it
+            self._inv_masked_cov = None
             _inv_masked_cov = self.inv_masked_cov
             self._inv_masked_cov = None
 
-            self._cov_mat[np.ix_(self.data_mask, self.data_mask)] += self.cov_marg_update
+            if not marginalize_in_fit:
+                print('add to covariance for small scale marginalization')
+                self._cov_mat[np.ix_(self.data_mask, self.data_mask)] += self.cov_marg_update
+            else:
+                print('do not add anything to covariance, because will fit small scale templates')
+                self.cov_marg_update = None
 
             # Construct solution matrix, G becomes an ndarray
             templates_masked = self.marg_templates[self.model_mask, :]
             G = templates_masked.T.dot(_inv_masked_cov)
 
-            S = np.diag(np.full(
-                ntemps, self.corr_item.marginalize_small_scales_prior_sigma**-2
-            ))
-            Ainv = np.linalg.inv(templates_masked.T.dot(G.T).T + S)
+            if not (self.corr_item.fit_marg_scales and self.corr_item.marginalize_match_data_bins):
+                S = np.diag(np.full(
+                    ntemps, self.corr_item.marginalize_small_scales_prior_sigma**-2
+                ))
+                Ainv = np.linalg.inv(templates_masked.T.dot(G.T).T + S)
+            else:
+                Ainv = np.linalg.inv(templates_masked.T.dot(G.T).T)  # should be positive definite
 
             # When multiplied by data - bestfit model, the below matrix will
             # give the coefficients for each template. Total marginalized model
@@ -507,7 +516,8 @@ def _init_metals(self, metal_config):
         list
             list of all metal correlations we need to compute
         """
-        metals_in_tracer1, metals_in_tracer2, tracer_catalog = self._init_metal_tracers(metal_config)
+        metals_in_tracer1, metals_in_tracer2, tracer_catalog = self._init_metal_tracers(
+            metal_config)
 
         self.metal_mats = {}
         self.metal_coordinates = {}

diff --git a/vega/vega_interface.py b/vega/vega_interface.py
@@ -58,6 +58,11 @@ def __init__(self, main_path):
         self.low_mem_mode = self.main_config['control'].getboolean('low_mem_mode', False)
         self.low_mem_mode &= global_cov_file is not None
 
+        self.marginalize_in_fit = self.main_config['control'].getboolean(
+            'marginalize_in_fit', False)
+        if self.marginalize_in_fit:
+            print("will do marginalize_in_fit")
+
         # Initialize the individual components
         self.corr_items = {}
         for path in ini_files:
@@ -99,7 +104,7 @@ def __init__(self, main_path):
         # Initialize the data
         for name, corr_item in self.corr_items.items():
             if self._has_data:
-                self.data[name] = data.Data(corr_item)
+                self.data[name] = data.Data(corr_item, marginalize_in_fit=self.marginalize_in_fit)
             else:
                 self.data[name] = None
 
@@ -255,6 +260,13 @@ def chi2(self, params=None, direct_pk=None, return_marg_coeff=False):
                 self.models[name].PktoXi.cache_pars = None
             return 1e100
 
+        if self.marginalize_in_fit:
+            # Fit on the fly the template correction
+            marg_coeff = self.compute_marg_coeff(model_cf)
+            for name in self.data:
+                if self.data[name].marg_templates is not None:
+                    model_cf[name] += self.data[name].marg_templates.dot(marg_coeff[name])
+
         # Compute chisq for the case where we use the global covariance
         if self._use_global_cov:
             if self.monte_carlo:
@@ -286,7 +298,10 @@ def chi2(self, params=None, direct_pk=None, return_marg_coeff=False):
         if not return_marg_coeff:
             return chi2
 
-        marg_coeff = self.compute_marg_coeff(model_cf)
+        if not self.marginalize_in_fit:
+            # otherwise already computed
+            marg_coeff = self.compute_marg_coeff(model_cf)
+
         return chi2, marg_coeff
 
     def log_lik(self, params=None, direct_pk=None, return_marg_coeff=False):
@@ -803,7 +818,8 @@ def read_global_cov(self, global_cov_file, scale=None):
 
                 if self.corr_items[name].marginalize_small_scales:
                     M1 = self.global_cov[j:j + ndata, j:j + ndata]
-                    M1[np.ix_(wd, wd)] += data.cov_marg_update
+                    if data.cov_marg_update is not None :
+                        M1[np.ix_(wd, wd)] += data.cov_marg_update
 
                     if self.low_mem_mode:
                         del data.cov_marg_update