Skip to content

Commit 9cff668

Browse files
authored
Merge pull request #3621 from sscini/Example-seed-issue
Fixing inconsistent results when using seed in parmest examples
2 parents fe163bf + d7db8d3 commit 9cff668

File tree

7 files changed

+68
-23
lines changed

7 files changed

+68
-23
lines changed

pyomo/contrib/parmest/examples/reactor_design/bootstrap_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def main():
3939
obj, theta = pest.theta_est()
4040

4141
# Parameter estimation with bootstrap resampling
42-
bootstrap_theta = pest.theta_est_bootstrap(50)
42+
bootstrap_theta = pest.theta_est_bootstrap(50, seed=524)
4343

4444
# Plot results
4545
parmest.graphics.pairwise_plot(bootstrap_theta, title="Bootstrap theta")

pyomo/contrib/parmest/examples/reactor_design/confidence_region_example.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,11 @@ def main():
3939
obj, theta = pest.theta_est()
4040

4141
# Bootstrapping
42-
bootstrap_theta = pest.theta_est_bootstrap(10)
42+
bootstrap_theta = pest.theta_est_bootstrap(10, seed=524)
4343
print(bootstrap_theta)
4444

4545
# Confidence region test
46-
CR = pest.confidence_region_test(bootstrap_theta, "MVN", [0.5, 0.75, 1.0])
46+
CR = pest.confidence_region_test(bootstrap_theta, "MVN", [0.5, 0.75, 1.0], seed=524)
4747
print(CR)
4848

4949

pyomo/contrib/parmest/examples/reactor_design/leaveNout_example.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ def main():
2424
file_name = abspath(join(file_dirname, "reactor_data.csv"))
2525
data = pd.read_csv(file_name)
2626

27+
seed = 524 # Set seed for reproducibility
28+
np.random.seed(seed) # Set seed for reproducibility
29+
2730
# Create more data for the example
2831
N = 50
2932
df_std = data.std().to_frame().transpose()
@@ -50,10 +53,10 @@ def main():
5053
### Parameter estimation with 'leave-N-out'
5154
# Example use case: For each combination of data where one data point is left
5255
# out, estimate theta
53-
lNo_theta = pest.theta_est_leaveNout(1)
56+
lNo_theta = pest.theta_est_leaveNout(1, seed=524)
5457
print(lNo_theta.head())
5558

56-
parmest.graphics.pairwise_plot(lNo_theta, theta)
59+
parmest.graphics.pairwise_plot(lNo_theta, theta, seed=524)
5760

5861
### Leave one out/boostrap analysis
5962
# Example use case: leave 25 data points out, run 20 bootstrap samples with the
@@ -81,6 +84,8 @@ def main():
8184
alpha,
8285
["MVN"],
8386
title="Alpha: " + str(alpha) + ", " + str(theta_est_N.loc[0, alpha]),
87+
seed=524 + i, # setting the seed for testing repeatability,
88+
# for typical use cases, this should not be set
8489
)
8590

8691
# Extract the percent of points that are within the alpha region

pyomo/contrib/parmest/graphics.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ def pairwise_plot(
218218
add_obj_contour=True,
219219
add_legend=True,
220220
filename=None,
221+
seed=None,
221222
):
222223
"""
223224
Plot pairwise relationship for theta values, and optionally alpha-level
@@ -272,6 +273,9 @@ def pairwise_plot(
272273
Add a legend to the plot
273274
filename: string, optional
274275
Filename used to save the figure
276+
seed: int, optional
277+
Random seed used to generate theta values if theta_values is a tuple.
278+
If None, the seed is not set.
275279
"""
276280
assert isinstance(theta_values, (pd.DataFrame, tuple))
277281
assert isinstance(theta_star, (type(None), dict, pd.Series, pd.DataFrame))
@@ -283,6 +287,9 @@ def pairwise_plot(
283287
assert isinstance(add_obj_contour, bool)
284288
assert isinstance(filename, (type(None), str))
285289

290+
if seed is not None:
291+
np.random.seed(seed)
292+
286293
# If theta_values is a tuple containing (mean, cov, n), create a DataFrame of values
287294
if isinstance(theta_values, tuple):
288295
assert len(theta_values) == 3
@@ -292,7 +299,7 @@ def pairwise_plot(
292299
if isinstance(mean, dict):
293300
mean = pd.Series(mean)
294301
theta_names = mean.index
295-
mvn_dist = stats.multivariate_normal(mean, cov)
302+
mvn_dist = stats.multivariate_normal(mean, cov, seed=seed)
296303
theta_values = pd.DataFrame(
297304
mvn_dist.rvs(n, random_state=1), columns=theta_names
298305
)
@@ -402,7 +409,7 @@ def pairwise_plot(
402409
)
403410

404411
elif dist == "MVN":
405-
mvn_dist = fit_mvn_dist(thetas)
412+
mvn_dist = fit_mvn_dist(thetas, seed=seed)
406413
Z = mvn_dist.pdf(thetas)
407414
score = stats.scoreatpercentile(Z, (1 - alpha) * 100)
408415
g.map_offdiag(
@@ -419,7 +426,7 @@ def pairwise_plot(
419426
)
420427

421428
elif dist == "KDE":
422-
kde_dist = fit_kde_dist(thetas)
429+
kde_dist = fit_kde_dist(thetas, seed=seed)
423430
Z = kde_dist.pdf(thetas.transpose())
424431
score = stats.scoreatpercentile(Z, (1 - alpha) * 100)
425432
g.map_offdiag(
@@ -512,7 +519,7 @@ def fit_rect_dist(theta_values, alpha):
512519
return lower_bound, upper_bound
513520

514521

515-
def fit_mvn_dist(theta_values):
522+
def fit_mvn_dist(theta_values, seed=None):
516523
"""
517524
Fit a multivariate normal distribution to theta values
518525
@@ -527,13 +534,17 @@ def fit_mvn_dist(theta_values):
527534
"""
528535
assert isinstance(theta_values, pd.DataFrame)
529536

537+
if seed is not None:
538+
np.random.seed(seed)
539+
530540
dist = stats.multivariate_normal(
531-
theta_values.mean(), theta_values.cov(), allow_singular=True
541+
theta_values.mean(), theta_values.cov(), allow_singular=True, seed=seed
532542
)
543+
533544
return dist
534545

535546

536-
def fit_kde_dist(theta_values):
547+
def fit_kde_dist(theta_values, seed=None):
537548
"""
538549
Fit a Gaussian kernel-density distribution to theta values
539550
@@ -547,6 +558,8 @@ def fit_kde_dist(theta_values):
547558
scipy.stats.gaussian_kde distribution
548559
"""
549560
assert isinstance(theta_values, pd.DataFrame)
561+
if seed is not None:
562+
np.random.seed(seed)
550563

551564
dist = stats.gaussian_kde(theta_values.transpose().values)
552565

pyomo/contrib/parmest/parmest.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,6 @@ def _create_parmest_model(self, experiment_number):
407407

408408
# Add objective function (optional)
409409
if self.obj_function:
410-
411410
# Check for component naming conflicts
412411
reserved_names = [
413412
'Total_Cost_Objective',
@@ -1123,13 +1122,14 @@ def leaveNout_bootstrap_test(
11231122

11241123
obj, theta = self.theta_est()
11251124

1126-
bootstrap_theta = self.theta_est_bootstrap(bootstrap_samples)
1125+
bootstrap_theta = self.theta_est_bootstrap(bootstrap_samples, seed=seed)
11271126

11281127
training, test = self.confidence_region_test(
11291128
bootstrap_theta,
11301129
distribution=distribution,
11311130
alphas=alphas,
11321131
test_theta_values=theta,
1132+
seed=seed,
11331133
)
11341134

11351135
results.append((sample, test, training))
@@ -1287,7 +1287,7 @@ def likelihood_ratio_test(
12871287
return LR
12881288

12891289
def confidence_region_test(
1290-
self, theta_values, distribution, alphas, test_theta_values=None
1290+
self, theta_values, distribution, alphas, test_theta_values=None, seed=None
12911291
):
12921292
"""
12931293
Confidence region test to determine if theta values are within a
@@ -1341,6 +1341,9 @@ def confidence_region_test(
13411341
if test_theta_values is not None:
13421342
test_result = test_theta_values.copy()
13431343

1344+
if seed is not None:
1345+
np.random.seed(seed)
1346+
13441347
for a in alphas:
13451348
if distribution == 'Rect':
13461349
lb, ub = graphics.fit_rect_dist(theta_values, a)
@@ -1355,7 +1358,7 @@ def confidence_region_test(
13551358
).all(axis=1)
13561359

13571360
elif distribution == 'MVN':
1358-
dist = graphics.fit_mvn_dist(theta_values)
1361+
dist = graphics.fit_mvn_dist(theta_values, seed=seed)
13591362
Z = dist.pdf(theta_values)
13601363
score = scipy.stats.scoreatpercentile(Z, (1 - a) * 100)
13611364
training_results[a] = Z >= score
@@ -1366,7 +1369,7 @@ def confidence_region_test(
13661369
test_result[a] = Z >= score
13671370

13681371
elif distribution == 'KDE':
1369-
dist = graphics.fit_kde_dist(theta_values)
1372+
dist = graphics.fit_kde_dist(theta_values, seed=seed)
13701373
Z = dist.pdf(theta_values.transpose())
13711374
score = scipy.stats.scoreatpercentile(Z, (1 - a) * 100)
13721375
training_results[a] = Z >= score

pyomo/contrib/parmest/tests/test_graphics.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
import pyomo.contrib.parmest.parmest as parmest
3232
import pyomo.contrib.parmest.graphics as graphics
3333

34+
from pyomo.contrib.parmest.tests.test_parmest import _RANDOM_SEED_FOR_TESTING
35+
3436
testdir = os.path.dirname(os.path.abspath(__file__))
3537

3638

@@ -47,6 +49,7 @@
4749
)
4850
class TestGraphics(unittest.TestCase):
4951
def setUp(self):
52+
np.random.seed(_RANDOM_SEED_FOR_TESTING)
5053
self.A = pd.DataFrame(
5154
np.random.randint(0, 100, size=(100, 4)), columns=list('ABCD')
5255
)
@@ -55,7 +58,12 @@ def setUp(self):
5558
)
5659

5760
def test_pairwise_plot(self):
58-
graphics.pairwise_plot(self.A, alpha=0.8, distributions=['Rect', 'MVN', 'KDE'])
61+
graphics.pairwise_plot(
62+
self.A,
63+
alpha=0.8,
64+
distributions=['Rect', 'MVN', 'KDE'],
65+
seed=_RANDOM_SEED_FOR_TESTING,
66+
)
5967

6068
def test_grouped_boxplot(self):
6169
graphics.grouped_boxplot(self.A, self.B, normalize=True, group_names=['A', 'B'])

pyomo/contrib/parmest/tests/test_parmest.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@
3333
pynumero_ASL_available = AmplInterface.available()
3434
testdir = this_file_dir()
3535

36+
# Set the global seed for random number generation in tests
37+
_RANDOM_SEED_FOR_TESTING = 524
38+
3639

3740
@unittest.skipIf(
3841
not parmest.parmest_available,
@@ -45,6 +48,8 @@ def setUp(self):
4548
RooneyBieglerExperiment,
4649
)
4750

51+
np.random.seed(_RANDOM_SEED_FOR_TESTING) # Set seed for reproducibility
52+
4853
# Note, the data used in this test has been corrected to use
4954
# data.loc[5,'hour'] = 7 (instead of 6)
5055
data = pd.DataFrame(
@@ -93,7 +98,9 @@ def test_bootstrap(self):
9398
objval, thetavals = self.pest.theta_est()
9499

95100
num_bootstraps = 10
96-
theta_est = self.pest.theta_est_bootstrap(num_bootstraps, return_samples=True)
101+
theta_est = self.pest.theta_est_bootstrap(
102+
num_bootstraps, return_samples=True, seed=_RANDOM_SEED_FOR_TESTING
103+
)
97104

98105
num_samples = theta_est["samples"].apply(len)
99106
self.assertEqual(len(theta_est.index), 10)
@@ -109,9 +116,15 @@ def test_bootstrap(self):
109116
self.assertEqual(CR[0.75].sum(), 7)
110117
self.assertEqual(CR[1.0].sum(), 10) # all true
111118

112-
graphics.pairwise_plot(theta_est)
113-
graphics.pairwise_plot(theta_est, thetavals)
114-
graphics.pairwise_plot(theta_est, thetavals, 0.8, ["MVN", "KDE", "Rect"])
119+
graphics.pairwise_plot(theta_est, seed=_RANDOM_SEED_FOR_TESTING)
120+
graphics.pairwise_plot(theta_est, thetavals, seed=_RANDOM_SEED_FOR_TESTING)
121+
graphics.pairwise_plot(
122+
theta_est,
123+
thetavals,
124+
0.8,
125+
["MVN", "KDE", "Rect"],
126+
seed=_RANDOM_SEED_FOR_TESTING,
127+
)
115128

116129
@unittest.skipIf(
117130
not graphics.imports_available, "parmest.graphics imports are unavailable"
@@ -140,7 +153,7 @@ def test_leaveNout(self):
140153
self.assertTrue(lNo_theta.shape == (6, 2))
141154

142155
results = self.pest.leaveNout_bootstrap_test(
143-
1, None, 3, "Rect", [0.5, 1.0], seed=5436
156+
1, None, 3, "Rect", [0.5, 1.0], seed=_RANDOM_SEED_FOR_TESTING
144157
)
145158
self.assertEqual(len(results), 6) # 6 lNo samples
146159
i = 1
@@ -335,6 +348,7 @@ def setUp(self):
335348
RooneyBieglerExperiment,
336349
)
337350

351+
np.random.seed(_RANDOM_SEED_FOR_TESTING) # Set seed for reproducibility
338352
self.data = pd.DataFrame(
339353
data=[[1, 8.3], [2, 10.3], [3, 19.0], [4, 16.0], [5, 15.6], [7, 19.8]],
340354
columns=["hour", "y"],
@@ -710,6 +724,8 @@ def ABC_model(data):
710724
cb_meas = data["cb"]
711725
cc_meas = data["cc"]
712726

727+
np.random.seed(_RANDOM_SEED_FOR_TESTING) # Set seed for reproducibility
728+
713729
if isinstance(data, pd.DataFrame):
714730
meas_t = data.index # time index
715731
else: # dictionary
@@ -1140,7 +1156,7 @@ def test_leaveNout(self):
11401156
self.assertTrue(lNo_theta.shape == (6, 2))
11411157

11421158
results = self.pest.leaveNout_bootstrap_test(
1143-
1, None, 3, "Rect", [0.5, 1.0], seed=5436
1159+
1, None, 3, "Rect", [0.5, 1.0], seed=_RANDOM_SEED_FOR_TESTING
11441160
)
11451161
self.assertTrue(len(results) == 6) # 6 lNo samples
11461162
i = 1

0 commit comments

Comments
 (0)