Skip to content

Commit 7dce3d1

Browse files
authored
Fixups and testing for cli config file parsing (#722)
Further fixups to #717 Some parameters were not being respected from `--config test-cli-config.jsonc` files. Split out from #720
1 parent 6fe46ca commit 7dce3d1

File tree

6 files changed

+215
-80
lines changed

6 files changed

+215
-80
lines changed

mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
"config": {
88
"trial_config_repeat_count": 3,
9-
"max_trials": -1, // Limited only in hte Optimizer logic/config.
9+
"max_trials": -1, // Limited only in the Optimizer logic/config.
1010
"teardown": false
1111
}
1212
}

mlos_bench/mlos_bench/launcher.py

+79-35
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class Launcher:
4444

4545
def __init__(self, description: str, long_text: str = "", argv: Optional[List[str]] = None):
4646
# pylint: disable=too-many-statements
47+
# pylint: disable=too-many-locals
4748
_LOG.info("Launch: %s", description)
4849
epilog = """
4950
Additional --key=value pairs can be specified to augment or override
@@ -56,7 +57,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st
5657
<https://github.com/microsoft/MLOS/tree/main/mlos_bench/>
5758
"""
5859
parser = argparse.ArgumentParser(description=f"{description} : {long_text}", epilog=epilog)
59-
(args, args_rest) = self._parse_args(parser, argv)
60+
(args, path_args, args_rest) = self._parse_args(parser, argv)
6061

6162
# Bootstrap config loader: command line takes priority.
6263
config_path = args.config_path or []
@@ -87,11 +88,25 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st
8788

8889
self._parent_service: Service = LocalExecService(parent=self._config_loader)
8990

91+
# Prepare global_config from a combination of global config files, cli
92+
# configs, and cli args.
93+
args_dict = vars(args)
94+
# teardown (bool) conflicts with Environment configs that use it for shell
95+
# commands (list), so we exclude it from copying over
96+
excluded_cli_args = path_args + ["teardown"]
97+
# Include (almost) any item from the cli config file that either isn't in
98+
# the cli args at all or whose cli arg is missing.
99+
cli_config_args = {
100+
key: val
101+
for (key, val) in config.items()
102+
if (args_dict.get(key) is None) and key not in excluded_cli_args
103+
}
104+
90105
self.global_config = self._load_config(
91-
config.get("globals", []) + (args.globals or []),
92-
(args.config_path or []) + config.get("config_path", []),
93-
args_rest,
94-
{key: val for (key, val) in config.items() if key not in vars(args)},
106+
args_globals=config.get("globals", []) + (args.globals or []),
107+
config_path=(args.config_path or []) + config.get("config_path", []),
108+
args_rest=args_rest,
109+
global_config=cli_config_args,
95110
)
96111
# experiment_id is generally taken from --globals files, but we also allow
97112
# overriding it on the CLI.
@@ -168,19 +183,35 @@ def service(self) -> Service:
168183
def _parse_args(
169184
parser: argparse.ArgumentParser,
170185
argv: Optional[List[str]],
171-
) -> Tuple[argparse.Namespace, List[str]]:
186+
) -> Tuple[argparse.Namespace, List[str], List[str]]:
172187
"""Parse the command line arguments."""
173-
parser.add_argument(
188+
189+
class PathArgsTracker:
190+
"""Simple class to help track which arguments are paths."""
191+
192+
def __init__(self, parser: argparse.ArgumentParser):
193+
self._parser = parser
194+
self.path_args: List[str] = []
195+
196+
def add_argument(self, *args: Any, **kwargs: Any) -> None:
197+
"""Add an argument to the parser and track its destination."""
198+
self.path_args.append(self._parser.add_argument(*args, **kwargs).dest)
199+
200+
path_args_tracker = PathArgsTracker(parser)
201+
202+
path_args_tracker.add_argument(
174203
"--config",
175204
required=False,
176-
help="Main JSON5 configuration file. Its keys are the same as the"
177-
+ " command line options and can be overridden by the latter.\n"
178-
+ "\n"
179-
+ " See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ "
180-
+ " for additional config examples for this and other arguments.",
205+
help=(
206+
"Main JSON5 configuration file. Its keys are the same as the "
207+
"command line options and can be overridden by the latter.\n"
208+
"\n"
209+
"See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ "
210+
"for additional config examples for this and other arguments."
211+
),
181212
)
182213

183-
parser.add_argument(
214+
path_args_tracker.add_argument(
184215
"--log_file",
185216
"--log-file",
186217
required=False,
@@ -192,11 +223,13 @@ def _parse_args(
192223
"--log-level",
193224
required=False,
194225
type=str,
195-
help=f"Logging level. Default is {logging.getLevelName(_LOG_LEVEL)}."
196-
+ " Set to DEBUG for debug, WARNING for warnings only.",
226+
help=(
227+
f"Logging level. Default is {logging.getLevelName(_LOG_LEVEL)}. "
228+
"Set to DEBUG for debug, WARNING for warnings only."
229+
),
197230
)
198231

199-
parser.add_argument(
232+
path_args_tracker.add_argument(
200233
"--config_path",
201234
"--config-path",
202235
"--config-paths",
@@ -207,7 +240,7 @@ def _parse_args(
207240
help="One or more locations of JSON config files.",
208241
)
209242

210-
parser.add_argument(
243+
path_args_tracker.add_argument(
211244
"--service",
212245
"--services",
213246
nargs="+",
@@ -219,17 +252,19 @@ def _parse_args(
219252
),
220253
)
221254

222-
parser.add_argument(
255+
path_args_tracker.add_argument(
223256
"--environment",
224257
required=False,
225258
help="Path to JSON file with the configuration of the benchmarking environment(s).",
226259
)
227260

228-
parser.add_argument(
261+
path_args_tracker.add_argument(
229262
"--optimizer",
230263
required=False,
231-
help="Path to the optimizer configuration file. If omitted, run"
232-
+ " a single trial with default (or specified in --tunable_values).",
264+
help=(
265+
"Path to the optimizer configuration file. If omitted, run "
266+
"a single trial with default (or specified in --tunable_values)."
267+
),
233268
)
234269

235270
parser.add_argument(
@@ -243,18 +278,22 @@ def _parse_args(
243278
),
244279
)
245280

246-
parser.add_argument(
281+
path_args_tracker.add_argument(
247282
"--scheduler",
248283
required=False,
249-
help="Path to the scheduler configuration file. By default, use"
250-
+ " a single worker synchronous scheduler.",
284+
help=(
285+
"Path to the scheduler configuration file. By default, use "
286+
"a single worker synchronous scheduler."
287+
),
251288
)
252289

253-
parser.add_argument(
290+
path_args_tracker.add_argument(
254291
"--storage",
255292
required=False,
256-
help="Path to the storage configuration file."
257-
+ " If omitted, use the ephemeral in-memory SQL storage.",
293+
help=(
294+
"Path to the storage configuration file. "
295+
"If omitted, use the ephemeral in-memory SQL storage."
296+
),
258297
)
259298

260299
parser.add_argument(
@@ -275,24 +314,28 @@ def _parse_args(
275314
help="Seed to use with --random_init",
276315
)
277316

278-
parser.add_argument(
317+
path_args_tracker.add_argument(
279318
"--tunable_values",
280319
"--tunable-values",
281320
nargs="+",
282321
action="extend",
283322
required=False,
284-
help="Path to one or more JSON files that contain values of the tunable"
285-
+ " parameters. This can be used for a single trial (when no --optimizer"
286-
+ " is specified) or as default values for the first run in optimization.",
323+
help=(
324+
"Path to one or more JSON files that contain values of the tunable "
325+
"parameters. This can be used for a single trial (when no --optimizer "
326+
"is specified) or as default values for the first run in optimization."
327+
),
287328
)
288329

289-
parser.add_argument(
330+
path_args_tracker.add_argument(
290331
"--globals",
291332
nargs="+",
292333
action="extend",
293334
required=False,
294-
help="Path to one or more JSON files that contain additional"
295-
+ " [private] parameters of the benchmarking environment.",
335+
help=(
336+
"Path to one or more JSON files that contain additional "
337+
"[private] parameters of the benchmarking environment."
338+
),
296339
)
297340

298341
parser.add_argument(
@@ -328,7 +371,7 @@ def _parse_args(
328371
argv = sys.argv[1:].copy()
329372
(args, args_rest) = parser.parse_known_args(argv)
330373

331-
return (args, args_rest)
374+
return (args, path_args_tracker.path_args, args_rest)
332375

333376
@staticmethod
334377
def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]:
@@ -361,6 +404,7 @@ def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]:
361404

362405
def _load_config(
363406
self,
407+
*,
364408
args_globals: Iterable[str],
365409
config_path: Iterable[str],
366410
args_rest: Iterable[str],

mlos_bench/mlos_bench/optimizers/base_optimizer.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -135,20 +135,23 @@ def __exit__(
135135
@property
136136
def current_iteration(self) -> int:
137137
"""
138-
The current number of iterations (trials) registered.
138+
The current number of iterations (suggestions) registered.
139139
140140
Note: this may or may not be the same as the number of configurations.
141-
See Also: Launcher.trial_config_repeat_count.
141+
See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.
142142
"""
143143
return self._iter
144144

145+
# TODO: finish renaming iterations to suggestions.
146+
# See Also: https://github.com/microsoft/MLOS/pull/713
147+
145148
@property
146149
def max_iterations(self) -> int:
147150
"""
148-
The maximum number of iterations (trials) to run.
151+
The maximum number of iterations (suggestions) to run.
149152
150153
Note: this may or may not be the same as the number of configurations.
151-
See Also: Launcher.trial_config_repeat_count.
154+
See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.
152155
"""
153156
return self._max_iter
154157

mlos_bench/mlos_bench/schedulers/base_scheduler.py

+32
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from pytz import UTC
1515
from typing_extensions import Literal
1616

17+
from mlos_bench.config.schemas import ConfigSchema
1718
from mlos_bench.environments.base_environment import Environment
1819
from mlos_bench.optimizers.base_optimizer import Optimizer
1920
from mlos_bench.storage.base_storage import Storage
@@ -64,6 +65,7 @@ def __init__( # pylint: disable=too-many-arguments
6465
source=global_config,
6566
required_keys=["experiment_id", "trial_id"],
6667
)
68+
self._validate_json_config(config)
6769

6870
self._experiment_id = config["experiment_id"].strip()
6971
self._trial_id = int(config["trial_id"])
@@ -88,6 +90,36 @@ def __init__( # pylint: disable=too-many-arguments
8890

8991
_LOG.debug("Scheduler instantiated: %s :: %s", self, config)
9092

93+
def _validate_json_config(self, config: dict) -> None:
94+
"""Reconstructs a basic json config that this class might have been instantiated
95+
from in order to validate configs provided outside the file loading
96+
mechanism.
97+
"""
98+
json_config: dict = {
99+
"class": self.__class__.__module__ + "." + self.__class__.__name__,
100+
}
101+
if config:
102+
json_config["config"] = config.copy()
103+
# The json schema does not allow for -1 as a valid value for config_id.
104+
# As it is just a default placeholder value, and not required, we can
105+
# remove it from the config copy prior to validation safely.
106+
config_id = json_config["config"].get("config_id")
107+
if config_id is not None and isinstance(config_id, int) and config_id < 0:
108+
json_config["config"].pop("config_id")
109+
ConfigSchema.SCHEDULER.validate(json_config)
110+
111+
@property
112+
def trial_config_repeat_count(self) -> int:
113+
"""Gets the number of trials to run for a given config."""
114+
return self._trial_config_repeat_count
115+
116+
@property
117+
def max_trials(self) -> int:
118+
"""Gets the maximum number of trials to run for a given experiment, or -1 for no
119+
limit.
120+
"""
121+
return self._max_trials
122+
91123
def __repr__(self) -> str:
92124
"""
93125
Produce a human-readable version of the Scheduler (mostly for logging).

mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"services/remote/mock/mock_fileshare_service.jsonc"
1818
],
1919

20-
"trial_config_repeat_count": 1,
20+
"trial_config_repeat_count": 2,
2121

2222
"random_seed": 42,
2323
"random_init": true

0 commit comments

Comments
 (0)