Merge pull request #3621 from sscini/Example-seed-issue

blnicho · web-flow · commit 9cff66829345 · 2025-07-01T09:33:05.000-06:00
Fixing inconsistent results when using seed in parmest examples
diff --git a/pyomo/contrib/parmest/examples/reactor_design/bootstrap_example.py b/pyomo/contrib/parmest/examples/reactor_design/bootstrap_example.py
@@ -39,7 +39,7 @@ def main():
     obj, theta = pest.theta_est()
 
     # Parameter estimation with bootstrap resampling
-    bootstrap_theta = pest.theta_est_bootstrap(50)
+    bootstrap_theta = pest.theta_est_bootstrap(50, seed=524)
 
     # Plot results
     parmest.graphics.pairwise_plot(bootstrap_theta, title="Bootstrap theta")
diff --git a/pyomo/contrib/parmest/examples/reactor_design/confidence_region_example.py b/pyomo/contrib/parmest/examples/reactor_design/confidence_region_example.py
@@ -39,11 +39,11 @@ def main():
     obj, theta = pest.theta_est()
 
     # Bootstrapping
-    bootstrap_theta = pest.theta_est_bootstrap(10)
+    bootstrap_theta = pest.theta_est_bootstrap(10, seed=524)
     print(bootstrap_theta)
 
     # Confidence region test
-    CR = pest.confidence_region_test(bootstrap_theta, "MVN", [0.5, 0.75, 1.0])
+    CR = pest.confidence_region_test(bootstrap_theta, "MVN", [0.5, 0.75, 1.0], seed=524)
     print(CR)
 
 
diff --git a/pyomo/contrib/parmest/examples/reactor_design/leaveNout_example.py b/pyomo/contrib/parmest/examples/reactor_design/leaveNout_example.py
@@ -24,6 +24,9 @@ def main():
     file_name = abspath(join(file_dirname, "reactor_data.csv"))
     data = pd.read_csv(file_name)
 
+    seed = 524  # Set seed for reproducibility
+    np.random.seed(seed)  # Set seed for reproducibility
+
     # Create more data for the example
     N = 50
     df_std = data.std().to_frame().transpose()
@@ -50,10 +53,10 @@ def main():
     ### Parameter estimation with 'leave-N-out'
     # Example use case: For each combination of data where one data point is left
     # out, estimate theta
-    lNo_theta = pest.theta_est_leaveNout(1)
+    lNo_theta = pest.theta_est_leaveNout(1, seed=524)
     print(lNo_theta.head())
 
-    parmest.graphics.pairwise_plot(lNo_theta, theta)
+    parmest.graphics.pairwise_plot(lNo_theta, theta, seed=524)
 
     ### Leave one out/boostrap analysis
     # Example use case: leave 25 data points out, run 20 bootstrap samples with the
@@ -81,6 +84,8 @@ def main():
             alpha,
             ["MVN"],
             title="Alpha: " + str(alpha) + ", " + str(theta_est_N.loc[0, alpha]),
+            seed=524 + i,  # setting the seed for testing repeatability,
+            # for typical use cases, this should not be set
         )
 
     # Extract the percent of points that are within the alpha region
diff --git a/pyomo/contrib/parmest/graphics.py b/pyomo/contrib/parmest/graphics.py
@@ -218,6 +218,7 @@ def pairwise_plot(
     add_obj_contour=True,
     add_legend=True,
     filename=None,
+    seed=None,
 ):
     """
     Plot pairwise relationship for theta values, and optionally alpha-level
@@ -272,6 +273,9 @@ def pairwise_plot(
         Add a legend to the plot
     filename: string, optional
         Filename used to save the figure
+    seed: int, optional
+        Random seed used to generate theta values if theta_values is a tuple.
+        If None, the seed is not set.
     """
     assert isinstance(theta_values, (pd.DataFrame, tuple))
     assert isinstance(theta_star, (type(None), dict, pd.Series, pd.DataFrame))
@@ -283,6 +287,9 @@ def pairwise_plot(
     assert isinstance(add_obj_contour, bool)
     assert isinstance(filename, (type(None), str))
 
+    if seed is not None:
+        np.random.seed(seed)
+
     # If theta_values is a tuple containing (mean, cov, n), create a DataFrame of values
     if isinstance(theta_values, tuple):
         assert len(theta_values) == 3
@@ -292,7 +299,7 @@ def pairwise_plot(
         if isinstance(mean, dict):
             mean = pd.Series(mean)
         theta_names = mean.index
-        mvn_dist = stats.multivariate_normal(mean, cov)
+        mvn_dist = stats.multivariate_normal(mean, cov, seed=seed)
         theta_values = pd.DataFrame(
             mvn_dist.rvs(n, random_state=1), columns=theta_names
         )
@@ -402,7 +409,7 @@ def pairwise_plot(
                 )
 
             elif dist == "MVN":
-                mvn_dist = fit_mvn_dist(thetas)
+                mvn_dist = fit_mvn_dist(thetas, seed=seed)
                 Z = mvn_dist.pdf(thetas)
                 score = stats.scoreatpercentile(Z, (1 - alpha) * 100)
                 g.map_offdiag(
@@ -419,7 +426,7 @@ def pairwise_plot(
                 )
 
             elif dist == "KDE":
-                kde_dist = fit_kde_dist(thetas)
+                kde_dist = fit_kde_dist(thetas, seed=seed)
                 Z = kde_dist.pdf(thetas.transpose())
                 score = stats.scoreatpercentile(Z, (1 - alpha) * 100)
                 g.map_offdiag(
@@ -512,7 +519,7 @@ def fit_rect_dist(theta_values, alpha):
     return lower_bound, upper_bound
 
 
-def fit_mvn_dist(theta_values):
+def fit_mvn_dist(theta_values, seed=None):
     """
     Fit a multivariate normal distribution to theta values
 
@@ -527,13 +534,17 @@ def fit_mvn_dist(theta_values):
     """
     assert isinstance(theta_values, pd.DataFrame)
 
+    if seed is not None:
+        np.random.seed(seed)
+
     dist = stats.multivariate_normal(
-        theta_values.mean(), theta_values.cov(), allow_singular=True
+        theta_values.mean(), theta_values.cov(), allow_singular=True, seed=seed
     )
+
     return dist
 
 
-def fit_kde_dist(theta_values):
+def fit_kde_dist(theta_values, seed=None):
     """
     Fit a Gaussian kernel-density distribution to theta values
 
@@ -547,6 +558,8 @@ def fit_kde_dist(theta_values):
     scipy.stats.gaussian_kde distribution
     """
     assert isinstance(theta_values, pd.DataFrame)
+    if seed is not None:
+        np.random.seed(seed)
 
     dist = stats.gaussian_kde(theta_values.transpose().values)
 
diff --git a/pyomo/contrib/parmest/parmest.py b/pyomo/contrib/parmest/parmest.py
@@ -407,7 +407,6 @@ def _create_parmest_model(self, experiment_number):
 
         # Add objective function (optional)
         if self.obj_function:
-
             # Check for component naming conflicts
             reserved_names = [
                 'Total_Cost_Objective',
@@ -1123,13 +1122,14 @@ def leaveNout_bootstrap_test(
 
             obj, theta = self.theta_est()
 
-            bootstrap_theta = self.theta_est_bootstrap(bootstrap_samples)
+            bootstrap_theta = self.theta_est_bootstrap(bootstrap_samples, seed=seed)
 
             training, test = self.confidence_region_test(
                 bootstrap_theta,
                 distribution=distribution,
                 alphas=alphas,
                 test_theta_values=theta,
+                seed=seed,
             )
 
             results.append((sample, test, training))
@@ -1287,7 +1287,7 @@ def likelihood_ratio_test(
             return LR
 
     def confidence_region_test(
-        self, theta_values, distribution, alphas, test_theta_values=None
+        self, theta_values, distribution, alphas, test_theta_values=None, seed=None
     ):
         """
         Confidence region test to determine if theta values are within a
@@ -1341,6 +1341,9 @@ def confidence_region_test(
         if test_theta_values is not None:
             test_result = test_theta_values.copy()
 
+        if seed is not None:
+            np.random.seed(seed)
+
         for a in alphas:
             if distribution == 'Rect':
                 lb, ub = graphics.fit_rect_dist(theta_values, a)
@@ -1355,7 +1358,7 @@ def confidence_region_test(
                     ).all(axis=1)
 
             elif distribution == 'MVN':
-                dist = graphics.fit_mvn_dist(theta_values)
+                dist = graphics.fit_mvn_dist(theta_values, seed=seed)
                 Z = dist.pdf(theta_values)
                 score = scipy.stats.scoreatpercentile(Z, (1 - a) * 100)
                 training_results[a] = Z >= score
@@ -1366,7 +1369,7 @@ def confidence_region_test(
                     test_result[a] = Z >= score
 
             elif distribution == 'KDE':
-                dist = graphics.fit_kde_dist(theta_values)
+                dist = graphics.fit_kde_dist(theta_values, seed=seed)
                 Z = dist.pdf(theta_values.transpose())
                 score = scipy.stats.scoreatpercentile(Z, (1 - a) * 100)
                 training_results[a] = Z >= score
diff --git a/pyomo/contrib/parmest/tests/test_graphics.py b/pyomo/contrib/parmest/tests/test_graphics.py
@@ -31,6 +31,8 @@
 import pyomo.contrib.parmest.parmest as parmest
 import pyomo.contrib.parmest.graphics as graphics
 
+from pyomo.contrib.parmest.tests.test_parmest import _RANDOM_SEED_FOR_TESTING
+
 testdir = os.path.dirname(os.path.abspath(__file__))
 
 
@@ -47,6 +49,7 @@
 )
 class TestGraphics(unittest.TestCase):
     def setUp(self):
+        np.random.seed(_RANDOM_SEED_FOR_TESTING)
         self.A = pd.DataFrame(
             np.random.randint(0, 100, size=(100, 4)), columns=list('ABCD')
         )
@@ -55,7 +58,12 @@ def setUp(self):
         )
 
     def test_pairwise_plot(self):
-        graphics.pairwise_plot(self.A, alpha=0.8, distributions=['Rect', 'MVN', 'KDE'])
+        graphics.pairwise_plot(
+            self.A,
+            alpha=0.8,
+            distributions=['Rect', 'MVN', 'KDE'],
+            seed=_RANDOM_SEED_FOR_TESTING,
+        )
 
     def test_grouped_boxplot(self):
         graphics.grouped_boxplot(self.A, self.B, normalize=True, group_names=['A', 'B'])
diff --git a/pyomo/contrib/parmest/tests/test_parmest.py b/pyomo/contrib/parmest/tests/test_parmest.py
@@ -33,6 +33,9 @@
 pynumero_ASL_available = AmplInterface.available()
 testdir = this_file_dir()
 
+# Set the global seed for random number generation in tests
+_RANDOM_SEED_FOR_TESTING = 524
+
 
 @unittest.skipIf(
     not parmest.parmest_available,
@@ -45,6 +48,8 @@ def setUp(self):
             RooneyBieglerExperiment,
         )
 
+        np.random.seed(_RANDOM_SEED_FOR_TESTING)  # Set seed for reproducibility
+
         # Note, the data used in this test has been corrected to use
         # data.loc[5,'hour'] = 7 (instead of 6)
         data = pd.DataFrame(
@@ -93,7 +98,9 @@ def test_bootstrap(self):
         objval, thetavals = self.pest.theta_est()
 
         num_bootstraps = 10
-        theta_est = self.pest.theta_est_bootstrap(num_bootstraps, return_samples=True)
+        theta_est = self.pest.theta_est_bootstrap(
+            num_bootstraps, return_samples=True, seed=_RANDOM_SEED_FOR_TESTING
+        )
 
         num_samples = theta_est["samples"].apply(len)
         self.assertEqual(len(theta_est.index), 10)
@@ -109,9 +116,15 @@ def test_bootstrap(self):
         self.assertEqual(CR[0.75].sum(), 7)
         self.assertEqual(CR[1.0].sum(), 10)  # all true
 
-        graphics.pairwise_plot(theta_est)
-        graphics.pairwise_plot(theta_est, thetavals)
-        graphics.pairwise_plot(theta_est, thetavals, 0.8, ["MVN", "KDE", "Rect"])
+        graphics.pairwise_plot(theta_est, seed=_RANDOM_SEED_FOR_TESTING)
+        graphics.pairwise_plot(theta_est, thetavals, seed=_RANDOM_SEED_FOR_TESTING)
+        graphics.pairwise_plot(
+            theta_est,
+            thetavals,
+            0.8,
+            ["MVN", "KDE", "Rect"],
+            seed=_RANDOM_SEED_FOR_TESTING,
+        )
 
     @unittest.skipIf(
         not graphics.imports_available, "parmest.graphics imports are unavailable"
@@ -140,7 +153,7 @@ def test_leaveNout(self):
         self.assertTrue(lNo_theta.shape == (6, 2))
 
         results = self.pest.leaveNout_bootstrap_test(
-            1, None, 3, "Rect", [0.5, 1.0], seed=5436
+            1, None, 3, "Rect", [0.5, 1.0], seed=_RANDOM_SEED_FOR_TESTING
         )
         self.assertEqual(len(results), 6)  # 6 lNo samples
         i = 1
@@ -335,6 +348,7 @@ def setUp(self):
             RooneyBieglerExperiment,
         )
 
+        np.random.seed(_RANDOM_SEED_FOR_TESTING)  # Set seed for reproducibility
         self.data = pd.DataFrame(
             data=[[1, 8.3], [2, 10.3], [3, 19.0], [4, 16.0], [5, 15.6], [7, 19.8]],
             columns=["hour", "y"],
@@ -710,6 +724,8 @@ def ABC_model(data):
             cb_meas = data["cb"]
             cc_meas = data["cc"]
 
+            np.random.seed(_RANDOM_SEED_FOR_TESTING)  # Set seed for reproducibility
+
             if isinstance(data, pd.DataFrame):
                 meas_t = data.index  # time index
             else:  # dictionary
@@ -1140,7 +1156,7 @@ def test_leaveNout(self):
         self.assertTrue(lNo_theta.shape == (6, 2))
 
         results = self.pest.leaveNout_bootstrap_test(
-            1, None, 3, "Rect", [0.5, 1.0], seed=5436
+            1, None, 3, "Rect", [0.5, 1.0], seed=_RANDOM_SEED_FOR_TESTING
         )
         self.assertTrue(len(results) == 6)  # 6 lNo samples
         i = 1