Skip to content

Commit 6fe46ca

Browse files
motusbpkroth
andauthored
Fix the coercion of scores to floats in the optimizer (#789)
Closes #785 Also, add more unit tests to make sure the optimizer handles string inputs correctly. --------- Co-authored-by: Brian Kroth <[email protected]>
1 parent f77a816 commit 6fe46ca

File tree

2 files changed

+101
-7
lines changed

2 files changed

+101
-7
lines changed

mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py

+15-7
Original file line numberDiff line numberDiff line change
@@ -116,20 +116,19 @@ def bulk_register(
116116
pd.DataFrame([{} if score is None else score for score in scores])
117117
)
118118

119-
opt_targets = list(self._opt_targets)
120119
if status is not None:
121120
# Select only the completed trials, set scores for failed trials to +inf.
122121
df_status = pd.Series(status)
123122
# TODO: Be more flexible with values used for failed trials (not just +inf).
124123
# Issue: https://github.com/microsoft/MLOS/issues/523
125-
df_scores.loc[df_status != Status.SUCCEEDED, opt_targets] = float("inf")
124+
df_scores[df_status != Status.SUCCEEDED] = float("inf")
126125
df_status_completed = df_status.apply(Status.is_completed)
127126
df_configs = df_configs[df_status_completed]
128127
df_scores = df_scores[df_status_completed]
129128

130129
# TODO: Specify (in the config) which metrics to pass to the optimizer.
131130
# Issue: https://github.com/microsoft/MLOS/issues/745
132-
self._opt.register(configs=df_configs, scores=df_scores[opt_targets].astype(float))
131+
self._opt.register(configs=df_configs, scores=df_scores)
133132

134133
if _LOG.isEnabledFor(logging.DEBUG):
135134
(score, _) = self.get_best_observation()
@@ -138,10 +137,19 @@ def bulk_register(
138137
return True
139138

140139
def _adjust_signs_df(self, df_scores: pd.DataFrame) -> pd.DataFrame:
141-
"""In-place adjust the signs of the scores for MINIMIZATION problem."""
142-
for opt_target, opt_dir in self._opt_targets.items():
143-
df_scores[opt_target] *= opt_dir
144-
return df_scores
140+
"""Coerce optimization target scores to floats and adjust the signs for
141+
MINIMIZATION problem.
142+
"""
143+
df_targets = df_scores[list(self._opt_targets)]
144+
try:
145+
return df_targets.astype(float) * self._opt_targets.values()
146+
except ValueError as ex:
147+
_LOG.error(
148+
"Some score values cannot be converted to float - check the data ::\n%s",
149+
df_targets,
150+
exc_info=True,
151+
)
152+
raise ValueError("Some score values cannot be converted to float") from ex
145153

146154
def _to_df(self, configs: Sequence[Dict[str, TunableValue]]) -> pd.DataFrame:
147155
"""

mlos_bench/mlos_bench/tests/optimizers/mlos_core_opt_df_test.py

+86
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ def mlos_core_optimizer(tunable_groups: TunableGroups) -> MlosCoreOptimizer:
2323
"optimizer_type": "FLAML",
2424
"max_suggestions": 10,
2525
"seed": SEED,
26+
"optimization_targets": {
27+
"latency": "min",
28+
"throughput": "max",
29+
},
2630
}
2731
return MlosCoreOptimizer(tunable_groups, test_opt_config)
2832

@@ -74,3 +78,85 @@ def test_df(mlos_core_optimizer: MlosCoreOptimizer, mock_configs: List[dict]) ->
7478
"vmSize": "Standard_B2s",
7579
},
7680
]
81+
82+
83+
def test_df_str(mlos_core_optimizer: MlosCoreOptimizer, mock_configs: List[dict]) -> None:
84+
"""Test `MlosCoreOptimizer._to_df()` type coercion on tunables with string
85+
values.
86+
"""
87+
df_config_orig = mlos_core_optimizer._to_df(mock_configs)
88+
df_config_str = mlos_core_optimizer._to_df(
89+
[{key: str(val) for (key, val) in config.items()} for config in mock_configs]
90+
)
91+
assert df_config_orig.equals(df_config_str)
92+
93+
94+
def test_adjust_signs_df(mlos_core_optimizer: MlosCoreOptimizer) -> None:
95+
"""Test `MlosCoreOptimizer._adjust_signs_df()` on different types of inputs."""
96+
df_scores_input = pandas.DataFrame(
97+
{
98+
"latency": [88.88, 66.66, 99.99, None],
99+
"throughput": [111, 222, 333, None],
100+
}
101+
)
102+
103+
df_scores_output = pandas.DataFrame(
104+
{
105+
"latency": [88.88, 66.66, 99.99, float("NaN")],
106+
"throughput": [-111, -222, -333, float("NaN")],
107+
}
108+
)
109+
110+
# Make sure we adjust the signs for minimization.
111+
df_scores = mlos_core_optimizer._adjust_signs_df(df_scores_input)
112+
assert df_scores.equals(df_scores_output)
113+
114+
# Check that the same operation works for string inputs.
115+
df_scores = mlos_core_optimizer._adjust_signs_df(df_scores_input.astype(str))
116+
assert df_scores.equals(df_scores_output)
117+
118+
119+
def test_adjust_signs_df_nan(mlos_core_optimizer: MlosCoreOptimizer) -> None:
120+
"""Test `MlosCoreOptimizer._adjust_signs_df()` handling None, NaN, and Inf
121+
values.
122+
"""
123+
df_scores = mlos_core_optimizer._adjust_signs_df(
124+
pandas.DataFrame(
125+
{
126+
"latency": ["88.88", "NaN", "Inf", "-Inf", None],
127+
"throughput": ["111", "NaN", "Inf", "-Inf", None],
128+
}
129+
)
130+
)
131+
132+
assert df_scores.equals(
133+
pandas.DataFrame(
134+
{
135+
"latency": [88.88, float("NaN"), float("Inf"), float("-Inf"), float("NaN")],
136+
"throughput": [-111, float("NaN"), float("-Inf"), float("Inf"), float("NaN")],
137+
}
138+
)
139+
)
140+
141+
142+
def test_adjust_signs_df_invalid(mlos_core_optimizer: MlosCoreOptimizer) -> None:
143+
"""Test `MlosCoreOptimizer._adjust_signs_df()` on invalid inputs."""
144+
with pytest.raises(ValueError):
145+
mlos_core_optimizer._adjust_signs_df(
146+
pandas.DataFrame(
147+
{
148+
"latency": ["INVALID"],
149+
"throughput": ["no input"],
150+
}
151+
)
152+
)
153+
154+
with pytest.raises(ValueError):
155+
mlos_core_optimizer._adjust_signs_df(
156+
pandas.DataFrame(
157+
{
158+
"latency": ["88.88", ""],
159+
"throughput": ["111", ""],
160+
}
161+
)
162+
)

0 commit comments

Comments
 (0)