From f6ee35b1517993853f4712b7eaa2860fb4b109c2 Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Mon, 9 Jan 2023 12:57:58 +0100 Subject: [PATCH 01/74] Add MO facade with todos --- smac/facade/multi_objective_facade.py | 191 ++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 smac/facade/multi_objective_facade.py diff --git a/smac/facade/multi_objective_facade.py b/smac/facade/multi_objective_facade.py new file mode 100644 index 000000000..311244a00 --- /dev/null +++ b/smac/facade/multi_objective_facade.py @@ -0,0 +1,191 @@ +from __future__ import annotations + +from ConfigSpace import Configuration + +from smac.acquisition.function.expected_improvement import EI +from smac.acquisition.maximizer.local_and_random_search import ( + LocalAndSortedRandomSearch, +) +from smac.facade.abstract_facade import AbstractFacade +from smac.initial_design.default_design import DefaultInitialDesign +from smac.intensifier.intensifier import Intensifier +from smac.model.random_forest.random_forest import RandomForest +from smac.multi_objective.aggregation_strategy import MeanAggregationStrategy +from smac.random_design.probability_design import ProbabilityRandomDesign +from smac.runhistory.encoder.encoder import RunHistoryEncoder +from smac.scenario import Scenario +from smac.utils.logging import get_logger + +__copyright__ = "Copyright 2022, automl.org" +__license__ = "3-clause BSD" + + +logger = get_logger(__name__) + + +class MultiObjectiveFacade(AbstractFacade): + @staticmethod + def get_model( # type: ignore + scenario: Scenario, + *, + n_trees: int = 10, + ratio_features: float = 5.0 / 6.0, + min_samples_split: int = 3, + min_samples_leaf: int = 3, + max_depth: int = 20, + bootstrapping: bool = True, + pca_components: int = 4, + ) -> RandomForest: + """Returns a random forest as surrogate model. + + Parameters + ---------- + n_trees : int, defaults to 10 + The number of trees in the random forest. + ratio_features : float, defaults to 5.0 / 6.0 + The ratio of features that are considered for splitting. + min_samples_split : int, defaults to 3 + The minimum number of data points to perform a split. + min_samples_leaf : int, defaults to 3 + The minimum number of data points in a leaf. + max_depth : int, defaults to 20 + The maximum depth of a single tree. + bootstrapping : bool, defaults to True + Enables bootstrapping. + pca_components : float, defaults to 4 + Number of components to keep when using PCA to reduce dimensionality of instance features. + """ + return RandomForest( + configspace=scenario.configspace, + n_trees=n_trees, + ratio_features=ratio_features, + min_samples_split=min_samples_split, + min_samples_leaf=min_samples_leaf, + max_depth=max_depth, + bootstrapping=bootstrapping, + log_y=False, + instance_features=scenario.instance_features, + pca_components=pca_components, + seed=scenario.seed, + ) + + @staticmethod + # TODO update intensifier + def get_intensifier(scenario: Scenario) -> AbstractIntensifier: + return super().get_intensifier(scenario) + + @staticmethod + # TODO update acquisition function + def get_acquisition_function( # type: ignore + scenario: Scenario, + *, + xi: float = 0.0, + ) -> EI: + """Returns an Expected Improvement acquisition function. + + Parameters + ---------- + scenario : Scenario + xi : float, defaults to 0.0 + Controls the balance between exploration and exploitation of the + acquisition function. + """ + return EI(xi=xi) + + @staticmethod + # TODO update acq optimizer + def get_acquisition_maximizer( # type: ignore + scenario: Scenario, + ) -> LocalAndSortedRandomSearch: + """Returns local and sorted random search as acquisition maximizer.""" + optimizer = LocalAndSortedRandomSearch( + scenario.configspace, + seed=scenario.seed, + ) + + return optimizer + + @staticmethod + # TODO update intensifier + def get_intensifier( + scenario: Scenario, + *, + max_config_calls: int = 2000, + max_incumbents: int = 10, + ) -> Intensifier: + """Returns ``Intensifier`` as intensifier. Supports budgets. + + Parameters + ---------- + max_config_calls : int, defaults to 3 + Maximum number of configuration evaluations. Basically, how many instance-seed keys should be evaluated at + maximum for a configuration. + max_incumbents : int, defaults to 10 + How many incumbents to keep track of in the case of multi-objective. + """ + return Intensifier( + scenario=scenario, + max_config_calls=max_config_calls, + max_incumbents=max_incumbents, + ) + + @staticmethod + # TODO update initial design to LHD + def get_initial_design( # type: ignore + scenario: Scenario, + *, + additional_configs: list[Configuration] = [], + ) -> DefaultInitialDesign: + """Returns an initial design, which returns the default configuration. + + Parameters + ---------- + additional_configs: list[Configuration], defaults to [] + Adds additional configurations to the initial design. + """ + return DefaultInitialDesign( + scenario=scenario, + additional_configs=additional_configs, + ) + + @staticmethod + def get_random_design( # type: ignore + scenario: Scenario, + *, + probability: float = 0.5, + ) -> ProbabilityRandomDesign: + """Returns ``ProbabilityRandomDesign`` for interleaving configurations. + + Parameters + ---------- + probability : float, defaults to 0.5 + Probability that a configuration will be drawn at random. + """ + return ProbabilityRandomDesign(probability=probability, seed=scenario.seed) + + @staticmethod + # TODO update mo algo (no aggregation) + def get_multi_objective_algorithm( # type: ignore + scenario: Scenario, + *, + objective_weights: list[float] | None = None, + ) -> MeanAggregationStrategy: + """Returns the mean aggregation strategy for the multi objective algorithm. + + Parameters + ---------- + scenario : Scenario + objective_weights : list[float] | None, defaults to None + Weights for averaging the objectives in a weighted manner. Must be of the same length as the number of + objectives. + """ + return MeanAggregationStrategy( + scenario=scenario, + objective_weights=objective_weights, + ) + + @staticmethod + # TODO update rh encoder + def get_runhistory_encoder(scenario: Scenario) -> RunHistoryEncoder: + """Returns the default runhistory encoder.""" + return RunHistoryEncoder(scenario) From 2b97fca39d393f4d43179cb220e221d24e309e25 Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Mon, 9 Jan 2023 12:58:10 +0100 Subject: [PATCH 02/74] Add NoAggregatuonStrategy --- smac/multi_objective/aggregation_strategy.py | 22 ++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/smac/multi_objective/aggregation_strategy.py b/smac/multi_objective/aggregation_strategy.py index bac0a0097..e139d5265 100644 --- a/smac/multi_objective/aggregation_strategy.py +++ b/smac/multi_objective/aggregation_strategy.py @@ -42,3 +42,25 @@ def meta(self) -> dict[str, Any]: def __call__(self, values: list[float]) -> float: # noqa: D102 return float(np.average(values, axis=0, weights=self._objective_weights)) + + +class NoAggregationStrategy(AbstractMultiObjectiveAlgorithm): + """ + A class to not aggregate multi-objective losses into a single objective losses. + """ + + def __call__(self, values: list[float]) -> list[float]: + """ + Not transform a multi-objective loss to a single loss. + + Parameters + ---------- + values : list[float] + Normalized cost values. + + Returns + ------- + costs : list[float] + costs. + """ + return values From 556ad37d66dd3b15bbeffbcbc9cd09fb968b5ed4 Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Mon, 9 Jan 2023 13:00:34 +0100 Subject: [PATCH 03/74] Update aggregation strategy --- smac/facade/multi_objective_facade.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/smac/facade/multi_objective_facade.py b/smac/facade/multi_objective_facade.py index 311244a00..f486d020b 100644 --- a/smac/facade/multi_objective_facade.py +++ b/smac/facade/multi_objective_facade.py @@ -10,7 +10,7 @@ from smac.initial_design.default_design import DefaultInitialDesign from smac.intensifier.intensifier import Intensifier from smac.model.random_forest.random_forest import RandomForest -from smac.multi_objective.aggregation_strategy import MeanAggregationStrategy +from smac.multi_objective.aggregation_strategy import NoAggregationStrategy from smac.random_design.probability_design import ProbabilityRandomDesign from smac.runhistory.encoder.encoder import RunHistoryEncoder from smac.scenario import Scenario @@ -164,12 +164,9 @@ def get_random_design( # type: ignore return ProbabilityRandomDesign(probability=probability, seed=scenario.seed) @staticmethod - # TODO update mo algo (no aggregation) def get_multi_objective_algorithm( # type: ignore scenario: Scenario, - *, - objective_weights: list[float] | None = None, - ) -> MeanAggregationStrategy: + ) -> NoAggregationStrategy: """Returns the mean aggregation strategy for the multi objective algorithm. Parameters @@ -179,9 +176,8 @@ def get_multi_objective_algorithm( # type: ignore Weights for averaging the objectives in a weighted manner. Must be of the same length as the number of objectives. """ - return MeanAggregationStrategy( + return NoAggregationStrategy( scenario=scenario, - objective_weights=objective_weights, ) @staticmethod From 672389f0df0b7bbf158853bfa24298bdd7de3383 Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Mon, 9 Jan 2023 13:09:28 +0100 Subject: [PATCH 04/74] Limit value to bounds region This is a safety measure. Normally, everytime we update the runhistory, the objective bounds are updated so that the value to normalize should be inside the bound. --- smac/utils/multi_objective.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/smac/utils/multi_objective.py b/smac/utils/multi_objective.py index 2dec4da94..28500ee7e 100644 --- a/smac/utils/multi_objective.py +++ b/smac/utils/multi_objective.py @@ -30,6 +30,10 @@ def normalize_costs( costs = [] for v, b in zip(values, bounds): assert type(v) != list + + # limit value to bounds region + v = min(max(v, b[0]), b[1]) + p = v - b[0] q = b[1] - b[0] From 09160b7e85d3dcbee8fef88518fd51e4c295a93b Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Mon, 9 Jan 2023 13:27:22 +0100 Subject: [PATCH 05/74] Factor out creating a unique list Created a helper method to create a set with preserved order from a list --- smac/acquisition/maximizer/local_search.py | 32 +++++++++++++++++----- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/smac/acquisition/maximizer/local_search.py b/smac/acquisition/maximizer/local_search.py index ff8a78760..def437dcb 100644 --- a/smac/acquisition/maximizer/local_search.py +++ b/smac/acquisition/maximizer/local_search.py @@ -219,19 +219,37 @@ def _get_init_points_from_previous_configs( else: additional_start_points = [] - init_points = [] - init_points_as_set: set[Configuration] = set() - for cand in itertools.chain( + candidates = itertools.chain( configs_previous_runs_sorted, previous_configs_sorted_by_cost, additional_start_points, - ): - if cand not in init_points_as_set: - init_points.append(cand) - init_points_as_set.add(cand) + ) + init_points = self._unique_list(candidates) return init_points + @staticmethod + def _unique_list(elements: list | itertools.chain) -> list: + """ + Returns the list with only unique elements while remaining the list order. + + Parameters + ---------- + elements : list | itertools.chain + + Returns + ------- + A list with unique elements with preserved order + """ + return_list = [] + return_list_as_set = set() + for e in elements: + if e not in return_list_as_set: + return_list.append(e) + return_list_as_set.add(e) + + return return_list + def _search( self, start_points: list[Configuration], From 1359f19f02f20a72bd64a48faed5b37fcfa31f72 Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Mon, 9 Jan 2023 13:51:05 +0100 Subject: [PATCH 06/74] More debug logging --- smac/acquisition/maximizer/local_search.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/smac/acquisition/maximizer/local_search.py b/smac/acquisition/maximizer/local_search.py index def437dcb..5c491bfc2 100644 --- a/smac/acquisition/maximizer/local_search.py +++ b/smac/acquisition/maximizer/local_search.py @@ -292,13 +292,13 @@ def _search( local_search_steps = [0] * num_candidates # tracking the number of neighbors looked at for logging purposes neighbors_looked_at = [0] * num_candidates - # tracking the number of neighbors generated for logging purposse + # tracking the number of neighbors generated for logging purposes neighbors_generated = [0] * num_candidates # how many neighbors were obtained for the i-th local search. Important to map the individual acquisition # function values to the correct local search run obtain_n = [self._vectorization_min_obtain] * num_candidates # Tracking the time it takes to compute the acquisition function - times = [] + times_per_iteration: list[float] = [] # Set up the neighborhood generators neighborhood_iterators = [] @@ -341,11 +341,12 @@ def _search( obtain_n[i] = len(neighbors_for_i) neighbors.extend(neighbors_for_i) + logger.debug(f"Iteration {num_iters} with {np.count_nonzero(active)} active searches and {len(neighbors)} aqcuisition function calls.") if len(neighbors) != 0: start_time = time.time() acq_val = self._acquisition_function(neighbors) end_time = time.time() - times.append(end_time - start_time) + times_per_iteration.append(end_time - start_time) if np.ndim(acq_val.shape) == 0: acq_val = np.asarray([acq_val]) @@ -423,11 +424,13 @@ def _search( ) logger.debug( - "Local searches took %s steps and looked at %s configurations. Computing the acquisition function in " - "vectorized for took %f seconds on average.", + "Local searches took %s steps and looked at %s configurations. Computing the acquisition function for " + "each search took %f (prev %f) seconds on average and each acquisition function call took %f seconds on average.", local_search_steps, neighbors_looked_at, - np.mean(times), + np.sum(times_per_iteration)/num_candidates, + np.mean(times_per_iteration), + times_per_iteration/np.sum(neighbors_looked_at), ) return [(a, i) for a, i in zip(acq_val_candidates, candidates)] From 733f94dda9b81091058162490af2bd61197ff05b Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Mon, 9 Jan 2023 14:18:43 +0100 Subject: [PATCH 07/74] Factor out sorting of costs Previously: random scalarization of MO costs bc ParEGO was the only MO algo. Now: separate function which can be overwritten. --- smac/acquisition/maximizer/local_search.py | 29 ++++++++++++++++------ 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/smac/acquisition/maximizer/local_search.py b/smac/acquisition/maximizer/local_search.py index 5c491bfc2..74531d66f 100644 --- a/smac/acquisition/maximizer/local_search.py +++ b/smac/acquisition/maximizer/local_search.py @@ -193,20 +193,16 @@ def _get_init_points_from_previous_configs( costs = self._acquisition_function.model.predict_marginalized(conf_array)[0] assert len(conf_array) == len(costs), (conf_array.shape, costs.shape) - # In case of the predictive model returning the prediction for more than one objective per configuration - # (for example multi-objective or EIPS) it is not immediately clear how to sort according to the cost - # of a configuration. Therefore, we simply follow the ParEGO approach and use a random scalarization. + sort_objectives = [costs.flatten()] if len(costs.shape) == 2 and costs.shape[1] > 1: - weights = np.array([self._rng.rand() for _ in range(costs.shape[1])]) - weights = weights / np.sum(weights) - costs = costs @ weights + sort_objectives = self._create_sort_keys(costs=costs) # From here # http://stackoverflow.com/questions/20197990/how-to-make-argsort-result-to-be-random-between-equal-values random = self._rng.rand(len(costs)) # Last column is primary sort key! - indices = np.lexsort((random.flatten(), costs.flatten())) + indices = np.lexsort((random.flatten(), *sort_objectives)) # Cannot use zip here because the indices array cannot index the # rand_configs list, because the second is a pure python list @@ -228,6 +224,25 @@ def _get_init_points_from_previous_configs( return init_points + def _create_sort_keys(self, costs) -> list[list[float]]: + """Create sort keys to sort configs + + In case of the predictive model returning the prediction for more than one objective per configuration + (for example multi-objective or EIPS) it is not immediately clear how to sort according to the cost + of a configuration. Therefore, we simply follow the ParEGO approach and use a random scalarization. + + Returns + ------- + list[list[float]] + Sorting ids for lexsort + """ + weights = np.array([self._rng.rand() for _ in range(costs.shape[1])]) + weights = weights / np.sum(weights) + costs = costs @ weights + sort_objectives = [costs.flatten()] + return sort_objectives + + @staticmethod def _unique_list(elements: list | itertools.chain) -> list: """ From 3e015c0f75c0f17fd9d540d5e0529e1578908daa Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Mon, 9 Jan 2023 14:19:06 +0100 Subject: [PATCH 08/74] Better docstring --- smac/acquisition/maximizer/local_search.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/smac/acquisition/maximizer/local_search.py b/smac/acquisition/maximizer/local_search.py index 74531d66f..751d98e89 100644 --- a/smac/acquisition/maximizer/local_search.py +++ b/smac/acquisition/maximizer/local_search.py @@ -224,17 +224,22 @@ def _get_init_points_from_previous_configs( return init_points - def _create_sort_keys(self, costs) -> list[list[float]]: - """Create sort keys to sort configs + def _create_sort_keys(self, costs: np.array) -> list[list[float]]: + """Sort costs by random scalarization In case of the predictive model returning the prediction for more than one objective per configuration (for example multi-objective or EIPS) it is not immediately clear how to sort according to the cost of a configuration. Therefore, we simply follow the ParEGO approach and use a random scalarization. + Parameters + ---------- + costs : np.array + Cost(s) per config + Returns ------- list[list[float]] - Sorting ids for lexsort + Sorting sequence for lexsort """ weights = np.array([self._rng.rand() for _ in range(costs.shape[1])]) weights = weights / np.sum(weights) From 171958b148945a27f85860b989d5e185fdf8873f Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Mon, 9 Jan 2023 14:27:59 +0100 Subject: [PATCH 09/74] Add MO acq maximizer --- .../maximizer/multi_objective_search.py | 135 ++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 smac/acquisition/maximizer/multi_objective_search.py diff --git a/smac/acquisition/maximizer/multi_objective_search.py b/smac/acquisition/maximizer/multi_objective_search.py new file mode 100644 index 000000000..13283b732 --- /dev/null +++ b/smac/acquisition/maximizer/multi_objective_search.py @@ -0,0 +1,135 @@ +from __future__ import annotations + +from typing import Any + +import itertools +import time + +from pygmo import fast_non_dominated_sorting + +import numpy as np +from ConfigSpace import Configuration, ConfigurationSpace +from ConfigSpace.exceptions import ForbiddenValueError + +from smac.acquisition.function import AbstractAcquisitionFunction +from smac.acquisition.maximizer.local_search import LocalSearch +from smac.acquisition.maximizer.local_and_random_search import LocalAndSortedRandomSearch +from smac.utils.configspace import ( + convert_configurations_to_array, + get_one_exchange_neighbourhood, +) +from smac.utils.logging import get_logger + +__copyright__ = "Copyright 2022, automl.org" +__license__ = "3-clause BSD" + +logger = get_logger(__name__) + + +class MOLocalSearch(LocalSearch): + def _get_initial_points( + self, + previous_configs: list[Configuration], + n_points: int, + additional_start_points: list[tuple[float, Configuration]] | None, + ) -> list[Configuration]: + """Get initial points to start search from. + + If we already have a population, add those to the initial points. + + Parameters + ---------- + previous_configs : list[Configuration] + Previous configuration (e.g., from the runhistory). + n_points : int + Number of initial points to be generated. + additional_start_points : list[tuple[float, Configuration]] | None + Additional starting points. + + Returns + ------- + list[Configuration] + A list of initial points/configurations. + """ + init_points = super()._get_initial_points(previous_configs=previous_configs, n_points=n_points, additional_start_points=additional_start_points) + + # Add population to Local search + # TODO where is population saved? update accordingly + if len(stats.population) > 0: + population = [runhistory.ids_config[confid] for confid in stats.population] + init_points = self._unique_list(itertools.chain(population, init_points)) + return init_points + + def _create_sort_keys(self, costs: np.array) -> list[list[float]]: + """Non-Dominated Sorting of Costs + + In case of the predictive model returning the prediction for more than one objective per configuration + (for example multi-objective or EIPS) we sort here based on the dominance order. In each front + configurations are sorted on the number of points they dominate overall. + + Parameters + ---------- + costs : np.array + Cost(s) per config + + Returns + ------- + list[list[float]] + Sorting sequence for lexsort + """ + _, domination_list, _, non_domination_rank = fast_non_dominated_sorting(costs) + domination_list = [len(i) for i in domination_list] + sort_objectives = [domination_list, non_domination_rank] # Last column is primary sort key! + return sort_objectives + + +class MOLocalAndSortedRandomSearch(LocalAndSortedRandomSearch): + """Local and Random Search for Multi-Objective + + This optimizer performs local search from the previous best points according, to the acquisition function, uses the + acquisition function to sort randomly sampled configurations. Random configurations are interleaved by the main SMAC + code. + + Parameters + ---------- + configspace : ConfigurationSpace + acquisition_function : AbstractAcquisitionFunction | None, defaults to None + challengers : int, defaults to 5000 + Number of challengers. + max_steps: int | None, defaults to None + [LocalSearch] Maximum number of steps that the local search will perform. + n_steps_plateau_walk: int, defaults to 10 + [LocalSearch] number of steps during a plateau walk before local search terminates. + local_search_iterations: int, defauts to 10 + [Local Search] number of local search iterations. + seed : int, defaults to 0 + """ + + def __init__( + self, + configspace: ConfigurationSpace, + acquisition_function: AbstractAcquisitionFunction | None = None, + challengers: int = 5000, + max_steps: int | None = None, + n_steps_plateau_walk: int = 10, + local_search_iterations: int = 10, + seed: int = 0, + ) -> None: + super().__init__( + configspace=configspace, + acquisition_function=acquisition_function, + challengers=challengers, + max_steps=max_steps, + n_steps_plateau_walk=n_steps_plateau_walk, + local_search_iterations=local_search_iterations, + seed=seed, + ) + + self.local_search = MOLocalSearch( + configspace=configspace, + acquisition_function=acquisition_function, + challengers=challengers, + max_steps=max_steps, + n_steps_plateau_walk=n_steps_plateau_walk, + seed=seed + ) From 0fe8e7d04774e5dac0bb2f492d8a8cbf2da267f6 Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Mon, 9 Jan 2023 14:29:39 +0100 Subject: [PATCH 10/74] Update acq optimizer --- smac/facade/multi_objective_facade.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/smac/facade/multi_objective_facade.py b/smac/facade/multi_objective_facade.py index f486d020b..a861e5d23 100644 --- a/smac/facade/multi_objective_facade.py +++ b/smac/facade/multi_objective_facade.py @@ -3,9 +3,6 @@ from ConfigSpace import Configuration from smac.acquisition.function.expected_improvement import EI -from smac.acquisition.maximizer.local_and_random_search import ( - LocalAndSortedRandomSearch, -) from smac.facade.abstract_facade import AbstractFacade from smac.initial_design.default_design import DefaultInitialDesign from smac.intensifier.intensifier import Intensifier @@ -15,6 +12,7 @@ from smac.runhistory.encoder.encoder import RunHistoryEncoder from smac.scenario import Scenario from smac.utils.logging import get_logger +from smac.acquisition.maximizer.multi_objective_search import MOLocalAndSortedRandomSearch __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" @@ -93,12 +91,11 @@ def get_acquisition_function( # type: ignore return EI(xi=xi) @staticmethod - # TODO update acq optimizer def get_acquisition_maximizer( # type: ignore scenario: Scenario, - ) -> LocalAndSortedRandomSearch: + ) -> MOLocalAndSortedRandomSearch: """Returns local and sorted random search as acquisition maximizer.""" - optimizer = LocalAndSortedRandomSearch( + optimizer = MOLocalAndSortedRandomSearch( scenario.configspace, seed=scenario.seed, ) From 00591556c7505c754c15c0de588b1fa59705089c Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Mon, 9 Jan 2023 15:36:56 +0100 Subject: [PATCH 11/74] Stop local search after max steps is reached Also, reset obtain to the kwargs (no magic numbers) Better debug message --- smac/acquisition/maximizer/local_search.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/smac/acquisition/maximizer/local_search.py b/smac/acquisition/maximizer/local_search.py index 751d98e89..af60893eb 100644 --- a/smac/acquisition/maximizer/local_search.py +++ b/smac/acquisition/maximizer/local_search.py @@ -65,7 +65,7 @@ def __init__( seed=seed, ) - self._max_steps = max_steps + self._max_steps = max_steps if not None else np.inf self._n_steps_plateau_walk = n_steps_plateau_walk self._vectorization_min_obtain = vectorization_min_obtain self._vectorization_max_obtain = vectorization_max_obtain @@ -423,7 +423,7 @@ def _search( continue if obtain_n[i] == 0 or improved[i]: - obtain_n[i] = 2 + obtain_n[i] = self._vectorization_min_obtain else: obtain_n[i] = obtain_n[i] * 2 obtain_n[i] = min(obtain_n[i], self._vectorization_max_obtain) @@ -434,7 +434,12 @@ def _search( candidates[i] = neighbors_w_equal_acq[i][0] neighbors_w_equal_acq[i] = [] n_no_plateau_walk[i] += 1 - if n_no_plateau_walk[i] >= self._n_steps_plateau_walk: + + if n_no_plateau_walk[i] >= self._n_steps_plateau_walk or local_search_steps[i] >= self._max_steps: + message = f"Local search {i}: Stop search after walking {n_no_plateau_walk[i]} plateaus after {neighbors_looked_at[i]}." + if local_search_steps[i] >= self._max_steps: + message += f" Reached max_steps ({self._max_steps}) of local search." + logger.debug(message) active[i] = False continue From 5b0a1bfb0e7a6ff81468e8d41a1647e724baba55 Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Mon, 9 Jan 2023 16:59:01 +0100 Subject: [PATCH 12/74] Abstract away population trimming and pareto front calculation --- smac/intensifier/abstract_intensifier.py | 91 +++++++++++++++++++----- smac/utils/pareto_front.py | 2 +- 2 files changed, 75 insertions(+), 18 deletions(-) diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index 6d896ecdb..be15a43a2 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -56,7 +56,7 @@ def __init__( scenario: Scenario, n_seeds: int | None = None, max_config_calls: int | None = None, - max_incumbents: int = 10, + max_incumbents: int = 10, # TODO set in MO facade seed: int | None = None, ): self._scenario = scenario @@ -541,7 +541,7 @@ def update_incumbents(self, config: Configuration) -> None: all_incumbent_isb_keys.append(self.get_instance_seed_budget_keys(incumbent)) # We compare the incumbents now and only return the ones on the pareto front - new_incumbents = calculate_pareto_front(rh, incumbents, all_incumbent_isb_keys) + new_incumbents = self._calculate_pareto_front(rh, incumbents, all_incumbent_isb_keys) new_incumbent_ids = [rh.get_config_id(c) for c in new_incumbents] if len(previous_incumbents) == len(new_incumbents): @@ -551,22 +551,10 @@ def update_incumbents(self, config: Configuration) -> None: return else: # In this case, we have to determine which config replaced which incumbent and reject it - removed_incumbent_id = list(set(previous_incumbent_ids) - set(new_incumbent_ids))[0] - removed_incumbent_hash = get_config_hash(rh.get_config(removed_incumbent_id)) - self._add_rejected_config(removed_incumbent_id) + # We will remove the oldest configuration (the one with the lowest id) because + # set orders the ids ascending. + self._remove_incumbent(config=config, previous_incumbent_ids=previous_incumbent_ids, new_incumbent_ids=new_incumbent_ids) - if removed_incumbent_id == config_id: - logger.debug( - f"Rejected config {config_hash} because it is not better than the incumbents on " - f"{len(config_isb_keys)} instances." - ) - else: - self._remove_rejected_config(config_id) - logger.info( - f"Added config {config_hash} and rejected config {removed_incumbent_hash} as incumbent because " - f"it is not better than the incumbents on {len(config_isb_keys)} instances:" - ) - print_config_changes(rh.get_config(removed_incumbent_id), config, logger=logger) elif len(previous_incumbents) < len(new_incumbents): # Config becomes a new incumbent; nothing is rejected in this case self._remove_rejected_config(config_id) @@ -587,6 +575,7 @@ def update_incumbents(self, config: Configuration) -> None: # Cut incumbents: We only want to keep a specific number of incumbents # We use the crowding distance for that if len(new_incumbents) > self._max_incumbents: + # TODO adjust. Other option: statistical test or HV new_incumbents = sort_by_crowding_distance(rh, new_incumbents, all_incumbent_isb_keys) new_incumbents = new_incumbents[: self._max_incumbents] @@ -602,6 +591,74 @@ def update_incumbents(self, config: Configuration) -> None: self._update_trajectory(new_incumbents) + def _remove_incumbent(self, config: Configuration, previous_incumbent_ids: list[int], new_incumbent_ids: list[int]) -> None: + """Remove incumbents if population is too big + + If new and old incumbents differ. + Remove the oldest (the one with the lowest id) from the set of new and old incumbents. + If the current config is not discarded, it is added to the new incumbents. + + Parameters + ---------- + config : Configuration + Newly evaluated trial + previous_incumbent_ids : list[int] + Incumbents before + new_incumbent_ids : list[int] + Incumbents considering/maybe including config + """ + assert len(previous_incumbent_ids) == len(new_incumbent_ids) + assert previous_incumbent_ids != new_incumbent_ids + rh = self.runhistory + config_isb_keys = self.get_instance_seed_budget_keys(config) + config_id = rh.get_config_id(config) + config_hash = get_config_hash(config) + + removed_incumbent_id = list(set(previous_incumbent_ids) - set(new_incumbent_ids))[0] + removed_incumbent_hash = get_config_hash(rh.get_config(removed_incumbent_id)) + self._add_rejected_config(removed_incumbent_id) + + if removed_incumbent_id == config_id: + logger.debug( + f"Rejected config {config_hash} because it is not better than the incumbents on " + f"{len(config_isb_keys)} instances." + ) + else: + self._remove_rejected_config(config_id) + logger.info( + f"Added config {config_hash} and rejected config {removed_incumbent_hash} as incumbent because " + f"it is not better than the incumbents on {len(config_isb_keys)} instances:" + ) + print_config_changes(rh.get_config(removed_incumbent_id), config, logger=logger) + + def _calculate_pareto_front( + self, + runhistory: RunHistory, + configs: list[Configuration], + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], + ) -> list[Configuration]: + """Compares the passed configurations and returns only the ones on the pareto front. + + Parameters + ---------- + runhistory : RunHistory + The runhistory containing the given configurations. + configs : list[Configuration] + The configurations from which the Pareto front should be computed. + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] + The instance-seed budget keys for the configurations on the basis of which the Pareto front should be computed. + + Returns + ------- + pareto_front : list[Configuration] + The pareto front computed from the given configurations. + """ + return calculate_pareto_front( + runhistory=runhistory, + configs=configs, + config_instance_seed_budget_keys=config_instance_seed_budget_keys, + ) + @abstractmethod def __iter__(self) -> Iterator[TrialInfo]: """Main loop of the intensifier. This method always returns a TrialInfo object, although the intensifier diff --git a/smac/utils/pareto_front.py b/smac/utils/pareto_front.py index 8d2ee6bb4..9f4aef080 100644 --- a/smac/utils/pareto_front.py +++ b/smac/utils/pareto_front.py @@ -50,7 +50,7 @@ def calculate_pareto_front( configs: list[Configuration], config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], ) -> list[Configuration]: - """Compares the passed configurations and returns only the ones on the pareto front. + """Calculate pareto front based on non-dominance Parameters ---------- From a0bed50a542bb717d3a6057221f7cc61588b8639 Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Mon, 9 Jan 2023 16:59:11 +0100 Subject: [PATCH 13/74] Add MO intensifier draft --- .../multi_objective_intensifier.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 smac/intensifier/multi_objective_intensifier.py diff --git a/smac/intensifier/multi_objective_intensifier.py b/smac/intensifier/multi_objective_intensifier.py new file mode 100644 index 000000000..b9af4e0e7 --- /dev/null +++ b/smac/intensifier/multi_objective_intensifier.py @@ -0,0 +1,69 @@ +# TODO does this work for multi-fidelity? +# Yes, then pass a pareto front calculation function to the abstract intensifier instead of subclassing it + +from __future__ import annotations + +from abc import abstractmethod +from typing import Any, Callable, Iterator + +import dataclasses +import json +from collections import defaultdict +from pathlib import Path + +import numpy as np +from ConfigSpace import Configuration + +import smac +from smac.callback import Callback +from smac.constants import MAXINT +from smac.main.config_selector import ConfigSelector +from smac.runhistory import TrialInfo +from smac.runhistory.dataclasses import ( + InstanceSeedBudgetKey, + InstanceSeedKey, + TrajectoryItem, + TrialValue, +) +from smac.runhistory.runhistory import RunHistory +from smac.scenario import Scenario +from smac.utils.configspace import get_config_hash, print_config_changes +from smac.utils.logging import get_logger +from smac.utils.pareto_front import calculate_pareto_front, sort_by_crowding_distance +from smac.intensifier.abstract_intensifier import AbstractIntensifier +from smac.intensifier.hyperband import Hyperband +from smac.intensifier.successive_halving import SuccessiveHalving +from smac.intensifier.intensifier import Intensifier + + +__copyright__ = "Copyright 2022, automl.org" +__license__ = "3-clause BSD" + +logger = get_logger(__name__) + + +class MOIntensifierMixin(object): + def _calculate_pareto_front( + self, + runhistory: RunHistory, + configs: list[Configuration], + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], + ) -> list[Configuration]: + return calculate_pareto_front( + runhistory=runhistory, + configs=configs, + config_instance_seed_budget_keys=config_instance_seed_budget_keys, + ) + + def _remove_incumbent(self, config: Configuration, previous_incumbent_ids: list[int], new_incumbent_ids: list[int]) -> None: + # TODO adjust + raise NotImplementedError + +class MOIntensifier(Intensifier, MOIntensifierMixin): + pass + +class MOSuccessiveHalving(SuccessiveHalving, MOIntensifierMixin): + pass + +class MOHyperband(Hyperband, MOIntensifierMixin): + pass \ No newline at end of file From 325cb5c9fcb0b385a1cb6cd7361cc15b36aa0189 Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Tue, 10 Jan 2023 15:01:21 +0100 Subject: [PATCH 14/74] Add comment --- smac/intensifier/intensifier.py | 1 + 1 file changed, 1 insertion(+) diff --git a/smac/intensifier/intensifier.py b/smac/intensifier/intensifier.py index 761796103..ca82e7e75 100644 --- a/smac/intensifier/intensifier.py +++ b/smac/intensifier/intensifier.py @@ -96,6 +96,7 @@ def __iter__(self) -> Iterator[TrialInfo]: queue. - If all incumbents are evaluated on the same trials, a new trial is added to one of the incumbents. - Only challengers which are not rejected/running/incumbent are intensified by N*2. + - If the intensifier cannot find any new trials for n _retries, exit Returns ------- From 227ceb779ce2efba9a6d8dc1ed261f4cb9d56881 Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Tue, 10 Jan 2023 15:01:41 +0100 Subject: [PATCH 15/74] Add todos --- smac/intensifier/multi_objective_intensifier.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/smac/intensifier/multi_objective_intensifier.py b/smac/intensifier/multi_objective_intensifier.py index b9af4e0e7..05b77746e 100644 --- a/smac/intensifier/multi_objective_intensifier.py +++ b/smac/intensifier/multi_objective_intensifier.py @@ -41,6 +41,7 @@ logger = get_logger(__name__) +# TODO add minimum population size? class MOIntensifierMixin(object): def _calculate_pareto_front( @@ -49,6 +50,7 @@ def _calculate_pareto_front( configs: list[Configuration], config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], ) -> list[Configuration]: + # TODO use fast non dominance sorting return calculate_pareto_front( runhistory=runhistory, configs=configs, From c320f04b0f73c89060647d0ca2f36ca0ba974c08 Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Tue, 10 Jan 2023 15:02:05 +0100 Subject: [PATCH 16/74] Pass rh's incumbents to acquisition function --- smac/main/config_selector.py | 1 + 1 file changed, 1 insertion(+) diff --git a/smac/main/config_selector.py b/smac/main/config_selector.py index 312e6d05f..e3b17cc7f 100644 --- a/smac/main/config_selector.py +++ b/smac/main/config_selector.py @@ -204,6 +204,7 @@ def __iter__(self) -> Iterator[Configuration]: incumbent_array=x_best_array, num_data=len(self._get_evaluated_configs()), X=X_configurations, + incumbents=self._runhistory.incumbents, ) # We want to cache how many entries we used because if we have the same number of entries From 67eefecc808155d997985a5f1444fa4daa0ebc5f Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Tue, 10 Jan 2023 15:02:31 +0100 Subject: [PATCH 17/74] Add incumbents data structure in runhistory --- smac/runhistory/runhistory.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py index e25b0c7de..b6b8e361d 100644 --- a/smac/runhistory/runhistory.py +++ b/smac/runhistory/runhistory.py @@ -137,6 +137,10 @@ def reset(self) -> None: self._n_objectives: int = -1 self._objective_bounds: list[tuple[float, float]] = [] + # Store incumbents. Gets updated whenever the incumbents in the + # intensifier are updated + self._incumbents: list[Configuration] = [] + def __contains__(self, k: object) -> bool: """Dictionary semantics for `k in runhistory`.""" return k in self._data @@ -157,6 +161,14 @@ def __eq__(self, other: Any) -> bool: """Enables to check equality of runhistory if the run is continued.""" return self._data == other._data + @property + def incumbents(self) -> list[Configuration]: + return self._incumbents + + @incumbents.setter + def incumbents(self, incumbents: list[Configuration]) -> None: + self._incumbents = incumbents + def empty(self) -> bool: """Check whether the RunHistory is empty. From b297a9831cba4f4e030bc8ff99e04dec925cd32e Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Tue, 10 Jan 2023 15:53:18 +0100 Subject: [PATCH 18/74] Add property for incumbents Updates incumbents of runhistory automatically if updated --- smac/intensifier/abstract_intensifier.py | 29 ++++++++++++++++-------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index be15a43a2..597c524e3 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -88,11 +88,20 @@ def reset(self) -> None: self._instance_seed_keys_validation: list[InstanceSeedKey] | None = None # Incumbent variables - self._incumbents: list[Configuration] = [] + self.incumbents: list[Configuration] = [] self._incumbents_changed = 0 self._rejected_config_ids: list[int] = [] self._trajectory: list[TrajectoryItem] = [] + @property + def incumbents(self) -> list[Configuration]: + return self._incumbents + + @incumbents.setter + def incumbents(self, incumbents: list[Configuration]) -> None: + self._incumbents = incumbents + self.runhistory.incumbents = incumbents + @property def meta(self) -> dict[str, Any]: """Returns the meta data of the created object.""" @@ -354,11 +363,11 @@ def get_incumbent(self) -> Configuration | None: if self._scenario.count_objectives() > 1: raise ValueError("Cannot get a single incumbent for multi-objective optimization.") - if len(self._incumbents) == 0: + if len(self.incumbents) == 0: return None - assert len(self._incumbents) == 1 - return self._incumbents[0] + assert len(self.incumbents) == 1 + return self.incumbents[0] def get_incumbents(self, sort_by: str | None = None) -> list[Configuration]: """Returns the incumbents (points on the pareto front) of the runhistory as copy. In case of a single-objective @@ -375,11 +384,11 @@ def get_incumbents(self, sort_by: str | None = None) -> list[Configuration]: rh = self.runhistory if sort_by == "cost": - return list(sorted(self._incumbents, key=lambda config: rh._cost_per_config[rh.get_config_id(config)])) + return list(sorted(self.incumbents, key=lambda config: rh._cost_per_config[rh.get_config_id(config)])) elif sort_by == "num_trials": - return list(sorted(self._incumbents, key=lambda config: len(rh.get_trials(config)))) + return list(sorted(self.incumbents, key=lambda config: len(rh.get_trials(config)))) elif sort_by is None: - return list(self._incumbents) + return list(self.incumbents) else: raise ValueError(f"Unknown sort_by value: {sort_by}.") @@ -686,7 +695,7 @@ def save(self, filename: str | Path) -> None: filename.parent.mkdir(parents=True, exist_ok=True) data = { - "incumbent_ids": [self.runhistory.get_config_id(config) for config in self._incumbents], + "incumbent_ids": [self.runhistory.get_config_id(config) for config in self.incumbents], "rejected_config_ids": self._rejected_config_ids, "incumbents_changed": self._incumbents_changed, "trajectory": [dataclasses.asdict(item) for item in self._trajectory], @@ -715,7 +724,7 @@ def load(self, filename: str | Path) -> None: if self._runhistory is not None: self.runhistory = self._runhistory - self._incumbents = [self.runhistory.get_config(config_id) for config_id in data["incumbent_ids"]] + self.incumbents = [self.runhistory.get_config(config_id) for config_id in data["incumbent_ids"]] self._incumbents_changed = data["incumbents_changed"] self._rejected_config_ids = data["rejected_config_ids"] self._trajectory = [TrajectoryItem(**item) for item in data["trajectory"]] @@ -726,7 +735,7 @@ def _update_trajectory(self, configs: list[Configuration]) -> None: config_ids = [rh.get_config_id(c) for c in configs] costs = [rh.average_cost(c, normalize=False) for c in configs] - self._incumbents = configs + self.incumbents = configs self._incumbents_changed += 1 self._trajectory.append( TrajectoryItem( From 6042beda4039a81368e2ab608aaea19573300953 Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Tue, 10 Jan 2023 15:55:40 +0100 Subject: [PATCH 19/74] Add EHVI acq fun --- .../function/expected_hypervolume.py | 246 ++++++++++++++++++ 1 file changed, 246 insertions(+) create mode 100644 smac/acquisition/function/expected_hypervolume.py diff --git a/smac/acquisition/function/expected_hypervolume.py b/smac/acquisition/function/expected_hypervolume.py new file mode 100644 index 000000000..7e1820ad5 --- /dev/null +++ b/smac/acquisition/function/expected_hypervolume.py @@ -0,0 +1,246 @@ +from __future__ import annotations + +from typing import Any, Iterator + +from ConfigSpace import Configuration + +import pygmo +import numpy as np +from torch import Tensor +from abc import ABC + +from smac.intensifier.abstract_intensifier import AbstractIntensifier +from smac.runhistory import TrialInfo, RunHistory +from smac.runhistory.dataclasses import InstanceSeedBudgetKey +from smac.scenario import Scenario +from smac.utils.configspace import get_config_hash +from smac.utils.logging import get_logger +from smac.acquisition.function.abstract_acquisition_function import AbstractAcquisitionFunction +from smac.model.abstract_model import AbstractModel + +import torch +from botorch.acquisition.multi_objective import ExpectedHypervolumeImprovement +from botorch.models.model import Model +from botorch.utils.multi_objective.box_decompositions.non_dominated import ( + NondominatedPartitioning, +) + +__copyright__ = "Copyright 2022, automl.org" +__license__ = "3-clause BSD" + +logger = get_logger(__name__) + +class _PosteriorProxy(object): + def __init__(self) -> None: + self.mean: Tensor = [] + self.variance: Tensor = [] + + +class _ModelProxy(Model, ABC): + def __init__(self, model: AbstractModel): + self.model = model + + def posterior(self, X: Tensor, **kwargs: Any) -> _PosteriorProxy: + """Docstring + X: A `b x q x d`-dim Tensor, where `d` is the dimension of the + feature space, `q` is the number of points considered jointly, + and `b` is the batch dimension. + + + A `Posterior` object, representing a batch of `b` joint distributions + over `q` points and `m` outputs each. + """ + assert X.shape[1] == 1 + X = X.reshape([X.shape[0], -1]).numpy() # 3D -> 2D + + # predict + # start_time = time.time() + # print(f"Start predicting ") + mean, var_ = self.model.predict_marginalized_over_instances(X) + # print(f"Done in {time.time() - start_time}s") + post = _PosteriorProxy() + post.mean = torch.asarray(mean).reshape(X.shape[0], 1, -1) # 2D -> 3D + post.variance = torch.asarray(var_).reshape(X.shape[0], 1, -1) # 2D -> 3D + + return post + +class EHVI(AbstractAcquisitionFunction): + def __init__(self): + super(EHVI, self).__init__() + self._required_updates = ("model",) + self._ehvi: ExpectedHypervolumeImprovement | None = None + + @property + def name(self) -> str: + return "Expected Hypervolume Improvement" + + def _update(self, **kwargs: Any) -> None: + # TODO either pass runhistory in config_selector + # and store incumbents in runhistory -or- work + # with a callback. This class can own a callback + # updating the partitioning and the ehv model + super(EHVI, self).update(**kwargs) + + incumbents: list[Configuration] = kwargs.get("incumbents", None) + if incumbents is None: + raise ValueError(f"Incumbents are not passed properly.") + if len(incumbents) > 0: + raise ValueError(f"No incumbents here. Did the intensifier properly " + "update the incumbents in the runhistory?") + + # Update EHVI + # Prediction all + population_configs = incumbents + population_X = np.array([config.get_array() for config in population_configs]) + population_costs, _ = self.model.predict_marginalized_over_instances(population_X) + + # Compute HV + # population_hv = self.get_hypervolume(population_costs) + + # BOtorch EHVI implementation + bomodel = _ModelProxy(self.model) + ref_point = pygmo.hypervolume(population_costs).refpoint( + offset=1 + ) # TODO get proper reference points from user/cutoffs + # ref_point = torch.asarray(ref_point) + # TODO partition from all runs instead of only population? + # TODO NondominatedPartitioning and ExpectedHypervolumeImprovement seem no too difficult to implement natively + # TODO pass along RNG + # Transfrom the objective space to cells based on the population + partitioning = NondominatedPartitioning(torch.asarray(ref_point), torch.asarray(population_costs)) + self._ehvi = ExpectedHypervolumeImprovement(bomodel, ref_point, partitioning) + + def _compute(self, X: np.ndarray) -> np.ndarray: + """Computes the EHVI values and its derivatives. + + Parameters + ---------- + X: np.ndarray(N, D), The input points where the acquisition function + should be evaluated. The dimensionality of X is (N, D), with N as + the number of points to evaluate at and D is the number of + dimensions of one X. + + Returns + ------- + np.ndarray(N,1) + Expected HV Improvement of X + """ + if self._ehvi is None: + raise ValueError(f"The expected hypervolume improvement is not defined yet. Call self.update.") + + if len(X.shape) == 1: + X = X[:, np.newaxis] + + # m, var_ = self.model.predict_marginalized_over_instances(X) + # Find a way to propagate the variance into the HV + boX = torch.asarray(X).reshape(X.shape[0], 1, -1) # 2D -> #3D + improvements = self._ehvi(boX).numpy().reshape(-1, 1) # TODO here are the expected hv improvements computed. + return improvements + + # TODO non-dominated sorting of costs. Compute EHVI only until the EHVI is not expected to improve anymore. + # Option 1: Supplement missing instances of population with acq. function to get predicted performance over + # all instances. Idea is this prevents optimizing for the initial instances which get it stuck in local optima + # Option 2: Only on instances of population + # Option 3: EVHI per instance and aggregate afterwards + # ehvi = np.zeros(len(X)) + # for i, indiv in enumerate(m): + # ehvi[i] = self.get_hypervolume(population_costs + [indiv]) - population_hv + # + # return ehvi.reshape(-1, 1) + +class PHVI(AbstractAcquisitionFunction): + def __init__(self, model: BaseEPM, stats: Stats, runhistory: RunHistory): + """Computes for a given x the predicted hypervolume improvement as + acquisition value. + + Parameters + ---------- + model : BaseEPM + A model that implements at least + - predict_marginalized_over_instances(X) + """ + super(PHVI, self).__init__(model) + self.long_name = "Expected Hypervolume improvement" + self.stats = stats + self.runhistory = runhistory + self._required_updates = ("model",) + # self._ehvi = None + self.population_hv = None + self.population_costs = None + + def update(self, **kwargs: Any) -> None: + super(PHVI, self).update(**kwargs) + + #Update EHVI + # Get points of population + population_configs = [self.runhistory.ids_config[config_id] for config_id in self.stats.population] + # population_costs_actual = [self.runhistory.get_cost(c, aggregate=False) for c in population_configs] + # Prediction all + population_X = np.array([config.get_array() for config in population_configs]) + population_costs, _ = self.model.predict_marginalized_over_instances(population_X) + + # Compute HV + population_hv = self.get_hypervolume(population_costs, (1.1, 1.1)) + + self.population_costs = population_costs + self.population_hv = population_hv + + self.logger.info(f"NEW POPULATION HV: {population_hv}") + + + + def get_hypervolume(self, points: np.ndarray = None, reference_point: list = None) -> float: + """ + Compute the hypervolume + + Parameters + ---------- + points : np.ndarray + A 2d numpy array. 1st dimension is an entity and the 2nd dimension are the costs + reference_point : list + + Return + ------ + hypervolume: float + """ + hv = pygmo.hypervolume(points) + if reference_point is None: + reference_point = hv.refpoint(offset=1) + return hv.compute(reference_point) # TODO: Fix reference points + + def _compute(self, X: np.ndarray) -> np.ndarray: + """Computes the PHVI values and its derivatives. + + Parameters + ---------- + X: np.ndarray(N, D), The input points where the acquisition function + should be evaluated. The dimensionality of X is (N, D), with N as + the number of points to evaluate at and D is the number of + dimensions of one X. + + Returns + ------- + np.ndarray(N,1) + Expected HV Improvement of X + """ + if len(X.shape) == 1: + X = X[:, np.newaxis] + + # TODO non-dominated sorting of costs. Compute EHVI only until the EHVI is not expected to improve anymore. + # Option 1: Supplement missing instances of population with acq. function to get predicted performance over + # all instances. Idea is this prevents optimizing for the initial instances which get it stuck in local optima + # Option 2: Only on instances of population + # Option 3: EVHI per instance and aggregate afterwards + mean, var_ = self.model.predict_marginalized_over_instances(X) + + phvi = np.zeros(len(X)) + for i, indiv in enumerate(mean): + phvi[i] = self.get_hypervolume(list(self.population_costs) + [indiv], (1.1, 1.1)) - self.population_hv + + # if len(X) == 10000: + # for op in ["max", "min", "mean", "median"]: + # val = getattr(np, op)(phvi) + # print(f"{op:6} - {val}") + # time.sleep(1.5) + + return phvi.reshape(-1, 1) From a96172d933595307cefbc82262f0843fdeff3840 Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Tue, 10 Jan 2023 16:01:45 +0100 Subject: [PATCH 20/74] Update PHVI --- .../function/expected_hypervolume.py | 38 +++++++++---------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/smac/acquisition/function/expected_hypervolume.py b/smac/acquisition/function/expected_hypervolume.py index 7e1820ad5..3ab5fe915 100644 --- a/smac/acquisition/function/expected_hypervolume.py +++ b/smac/acquisition/function/expected_hypervolume.py @@ -149,33 +149,33 @@ def _compute(self, X: np.ndarray) -> np.ndarray: # return ehvi.reshape(-1, 1) class PHVI(AbstractAcquisitionFunction): - def __init__(self, model: BaseEPM, stats: Stats, runhistory: RunHistory): + def __init__(self): """Computes for a given x the predicted hypervolume improvement as acquisition value. - - Parameters - ---------- - model : BaseEPM - A model that implements at least - - predict_marginalized_over_instances(X) """ - super(PHVI, self).__init__(model) - self.long_name = "Expected Hypervolume improvement" - self.stats = stats - self.runhistory = runhistory + super(PHVI, self).__init__() self._required_updates = ("model",) - # self._ehvi = None self.population_hv = None self.population_costs = None - def update(self, **kwargs: Any) -> None: + @property + def name(self) -> str: + return "Predicted Hypervolume Improvement" + + def _update(self, **kwargs: Any) -> None: super(PHVI, self).update(**kwargs) - #Update EHVI - # Get points of population - population_configs = [self.runhistory.ids_config[config_id] for config_id in self.stats.population] - # population_costs_actual = [self.runhistory.get_cost(c, aggregate=False) for c in population_configs] + # TODO abstract this away in a general HVI class + incumbents: list[Configuration] = kwargs.get("incumbents", None) + if incumbents is None: + raise ValueError(f"Incumbents are not passed properly.") + if len(incumbents) > 0: + raise ValueError(f"No incumbents here. Did the intensifier properly " + "update the incumbents in the runhistory?") + + # Update EHVI # Prediction all + population_configs = incumbents population_X = np.array([config.get_array() for config in population_configs]) population_costs, _ = self.model.predict_marginalized_over_instances(population_X) @@ -185,9 +185,7 @@ def update(self, **kwargs: Any) -> None: self.population_costs = population_costs self.population_hv = population_hv - self.logger.info(f"NEW POPULATION HV: {population_hv}") - - + logger.info(f"New population HV: {population_hv}") def get_hypervolume(self, points: np.ndarray = None, reference_point: list = None) -> float: """ From 75a2077d86bdbd8f809772c4372cf812225a15ea Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Tue, 10 Jan 2023 16:46:47 +0100 Subject: [PATCH 21/74] Add ACLib runner draft --- smac/runner/aclib_runner.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 smac/runner/aclib_runner.py diff --git a/smac/runner/aclib_runner.py b/smac/runner/aclib_runner.py new file mode 100644 index 000000000..3605588f4 --- /dev/null +++ b/smac/runner/aclib_runner.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +__copyright__ = "Copyright 2022, automl.org" +__license__ = "3-clause BSD" + + +from abc import ABC, abstractmethod +from typing import Any, Iterator + +import time +import traceback + +import numpy as np +from ConfigSpace import Configuration + +from smac.runhistory import StatusType, TrialInfo, TrialValue +from smac.scenario import Scenario +from smac.utils.logging import get_logger +from smac.runner.target_function_script_runner import TargetFunctionScriptRunner + +logger = get_logger(__name__) + +class ACLibRunner(TargetFunctionScriptRunner): + def __call__(self, algorithm_kwargs: dict[str, Any]) -> tuple[str, str]: + # TODO fill correct kwargs + # kwargs has "instance", "seed" and "budget" --> translate those + return super().__call__(algorithm_kwargs) + From a5902d5a0f54a7873f11ce4e6c9893a65267b226 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Wed, 1 Mar 2023 10:37:31 +0100 Subject: [PATCH 22/74] Native objective support --- smac/runhistory/encoder/abstract_encoder.py | 4 ++++ smac/runhistory/encoder/encoder.py | 7 +++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/smac/runhistory/encoder/abstract_encoder.py b/smac/runhistory/encoder/abstract_encoder.py index 42454771e..fc9ff83c9 100644 --- a/smac/runhistory/encoder/abstract_encoder.py +++ b/smac/runhistory/encoder/abstract_encoder.py @@ -32,6 +32,7 @@ class AbstractRunHistoryEncoder: scale_percentage : int, defaults to 5 Scaled y-transformation use a percentile to estimate distance to optimum. Only used in some sub-classes. seed : int | None, defaults to none + native_multi_objective: bool, defaults to False Raises ------ @@ -50,6 +51,7 @@ def __init__( lower_budget_states: list[StatusType] = [], scale_percentage: int = 5, seed: int | None = None, + native_multi_objective: bool = False, ) -> None: if considered_states is None: raise TypeError("No success states are given.") @@ -86,6 +88,8 @@ def __init__( self._multi_objective_algorithm: AbstractMultiObjectiveAlgorithm | None = None self._runhistory: RunHistory | None = None + self._native_multi_objective = native_multi_objective + @property def meta(self) -> dict[str, Any]: """ diff --git a/smac/runhistory/encoder/encoder.py b/smac/runhistory/encoder/encoder.py index 25672a92f..9a90ba072 100644 --- a/smac/runhistory/encoder/encoder.py +++ b/smac/runhistory/encoder/encoder.py @@ -29,8 +29,11 @@ def _build_matrix( X = np.ones([n_rows, n_cols + self._n_features]) * np.nan # For now we keep it as 1 - # TODO: Extend for native multi-objective - y = np.ones([n_rows, 1]) + # TODO: Extend with checks for native multi-objective (return size of multi_objective_algorithm) + if self._native_multi_objective: + y = np.ones([n_rows, self._n_objectives]) + else: + y = np.ones([n_rows, 1]) # Then populate matrix for row, (key, run) in enumerate(trials.items()): From 5e7d880719868533614a434533ff9ca5e0a07d25 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Wed, 1 Mar 2023 10:52:59 +0100 Subject: [PATCH 23/74] Fix typo --- smac/acquisition/maximizer/local_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smac/acquisition/maximizer/local_search.py b/smac/acquisition/maximizer/local_search.py index 2595ed76a..f9a5bafb2 100644 --- a/smac/acquisition/maximizer/local_search.py +++ b/smac/acquisition/maximizer/local_search.py @@ -65,7 +65,7 @@ def __init__( seed=seed, ) - self._max_steps = max_steps if not None else np.inf + self._max_steps = max_steps if max_steps is not None else np.inf self._n_steps_plateau_walk = n_steps_plateau_walk self._vectorization_min_obtain = vectorization_min_obtain self._vectorization_max_obtain = vectorization_max_obtain From 3cdf96a71c73c5bb20cfc2e8eadcf84a44bef012 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Wed, 1 Mar 2023 10:53:54 +0100 Subject: [PATCH 24/74] Initial modifications for mo facade --- smac/facade/multi_objective_facade.py | 71 ++++++++++++++++++--------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/smac/facade/multi_objective_facade.py b/smac/facade/multi_objective_facade.py index a861e5d23..bfd6ec320 100644 --- a/smac/facade/multi_objective_facade.py +++ b/smac/facade/multi_objective_facade.py @@ -6,7 +6,9 @@ from smac.facade.abstract_facade import AbstractFacade from smac.initial_design.default_design import DefaultInitialDesign from smac.intensifier.intensifier import Intensifier +from smac.intensifier.multi_objective_intensifier import MOIntensifier from smac.model.random_forest.random_forest import RandomForest +from smac.model.multi_objective_model import MultiObjectiveModel from smac.multi_objective.aggregation_strategy import NoAggregationStrategy from smac.random_design.probability_design import ProbabilityRandomDesign from smac.runhistory.encoder.encoder import RunHistoryEncoder @@ -53,27 +55,53 @@ def get_model( # type: ignore pca_components : float, defaults to 4 Number of components to keep when using PCA to reduce dimensionality of instance features. """ - return RandomForest( - configspace=scenario.configspace, - n_trees=n_trees, - ratio_features=ratio_features, - min_samples_split=min_samples_split, - min_samples_leaf=min_samples_leaf, - max_depth=max_depth, - bootstrapping=bootstrapping, - log_y=False, - instance_features=scenario.instance_features, - pca_components=pca_components, - seed=scenario.seed, - ) + + models = [] + for objective in scenario.objectives: + models.append( + RandomForest( + configspace=scenario.configspace, + n_trees=n_trees, + ratio_features=ratio_features, + min_samples_split=min_samples_split, + min_samples_leaf=min_samples_leaf, + max_depth=max_depth, + bootstrapping=bootstrapping, + log_y=False, + instance_features=scenario.instance_features, + pca_components=pca_components, + seed=scenario.seed, + ) + ) + + return MultiObjectiveModel(models=models, objectives=scenario.objectives) @staticmethod - # TODO update intensifier - def get_intensifier(scenario: Scenario) -> AbstractIntensifier: - return super().get_intensifier(scenario) + def get_intensifier( # type: ignore + scenario: Scenario, + *, + max_config_calls: int = 3, + max_incumbents: int = 10, + ) -> Intensifier: + """Returns ``MOIntensifier`` as intensifier. Uses the default configuration for ``race_against``. + + Parameters + ---------- + scenario : Scenario + max_config_calls : int, defaults to 3 + Maximum number of configuration evaluations. Basically, how many instance-seed keys should be max evaluated + for a configuration. + max_incumbents : int, defaults to 10 + How many incumbents to keep track of in the case of multi-objective. + """ + return MOIntensifier( + scenario=scenario, + max_config_calls=max_config_calls, + max_incumbents=max_incumbents, + ) @staticmethod - # TODO update acquisition function + # TODO update acquisition function with EIHV and PIHV def get_acquisition_function( # type: ignore scenario: Scenario, *, @@ -173,12 +201,9 @@ def get_multi_objective_algorithm( # type: ignore Weights for averaging the objectives in a weighted manner. Must be of the same length as the number of objectives. """ - return NoAggregationStrategy( - scenario=scenario, - ) + return NoAggregationStrategy() @staticmethod - # TODO update rh encoder def get_runhistory_encoder(scenario: Scenario) -> RunHistoryEncoder: - """Returns the default runhistory encoder.""" - return RunHistoryEncoder(scenario) + """Returns the default runhistory encoder with native multi objective support enabled.""" + return RunHistoryEncoder(scenario, native_multi_objective=True) From 087d7c8c17b90a3f510c9ed3ba14629177ad19ab Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Wed, 1 Mar 2023 13:30:51 +0100 Subject: [PATCH 25/74] Make the HV based acquisition functions work --- .../function/abstract_acquisition_function.py | 4 ++-- smac/acquisition/function/expected_hypervolume.py | 14 +++++++------- smac/facade/multi_objective_facade.py | 5 +++-- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/smac/acquisition/function/abstract_acquisition_function.py b/smac/acquisition/function/abstract_acquisition_function.py index 519f5b3d0..42b94675e 100644 --- a/smac/acquisition/function/abstract_acquisition_function.py +++ b/smac/acquisition/function/abstract_acquisition_function.py @@ -50,7 +50,7 @@ def update(self, model: AbstractModel, **kwargs: Any) -> None: This method will be called after fitting the model, but before maximizing the acquisition function. As an examples, EI uses it to update the current fmin. The default implementation only updates the - attributes of the acqusition function which are already present. + attributes of the acquisition function which are already present. Calls `_update` to update the acquisition function attributes. @@ -65,7 +65,7 @@ def update(self, model: AbstractModel, **kwargs: Any) -> None: self._update(**kwargs) def _update(self, **kwargs: Any) -> None: - """Update acsquisition function attributes + """Update acquisition function attributes Might be different for each child class. """ diff --git a/smac/acquisition/function/expected_hypervolume.py b/smac/acquisition/function/expected_hypervolume.py index 3ab5fe915..afebf28ee 100644 --- a/smac/acquisition/function/expected_hypervolume.py +++ b/smac/acquisition/function/expected_hypervolume.py @@ -56,7 +56,7 @@ def posterior(self, X: Tensor, **kwargs: Any) -> _PosteriorProxy: # predict # start_time = time.time() # print(f"Start predicting ") - mean, var_ = self.model.predict_marginalized_over_instances(X) + mean, var_ = self.model.predict_marginalized(X) # print(f"Done in {time.time() - start_time}s") post = _PosteriorProxy() post.mean = torch.asarray(mean).reshape(X.shape[0], 1, -1) # 2D -> 3D @@ -79,12 +79,12 @@ def _update(self, **kwargs: Any) -> None: # and store incumbents in runhistory -or- work # with a callback. This class can own a callback # updating the partitioning and the ehv model - super(EHVI, self).update(**kwargs) + super(EHVI, self)._update(**kwargs) incumbents: list[Configuration] = kwargs.get("incumbents", None) if incumbents is None: raise ValueError(f"Incumbents are not passed properly.") - if len(incumbents) > 0: + if len(incumbents) == 0: raise ValueError(f"No incumbents here. Did the intensifier properly " "update the incumbents in the runhistory?") @@ -92,7 +92,7 @@ def _update(self, **kwargs: Any) -> None: # Prediction all population_configs = incumbents population_X = np.array([config.get_array() for config in population_configs]) - population_costs, _ = self.model.predict_marginalized_over_instances(population_X) + population_costs, _ = self.model.predict_marginalized(population_X) # Compute HV # population_hv = self.get_hypervolume(population_costs) @@ -163,7 +163,7 @@ def name(self) -> str: return "Predicted Hypervolume Improvement" def _update(self, **kwargs: Any) -> None: - super(PHVI, self).update(**kwargs) + super(PHVI, self)._update(**kwargs) # TODO abstract this away in a general HVI class incumbents: list[Configuration] = kwargs.get("incumbents", None) @@ -177,7 +177,7 @@ def _update(self, **kwargs: Any) -> None: # Prediction all population_configs = incumbents population_X = np.array([config.get_array() for config in population_configs]) - population_costs, _ = self.model.predict_marginalized_over_instances(population_X) + population_costs, _ = self.model.predict_marginalized(population_X) # Compute HV population_hv = self.get_hypervolume(population_costs, (1.1, 1.1)) @@ -229,7 +229,7 @@ def _compute(self, X: np.ndarray) -> np.ndarray: # all instances. Idea is this prevents optimizing for the initial instances which get it stuck in local optima # Option 2: Only on instances of population # Option 3: EVHI per instance and aggregate afterwards - mean, var_ = self.model.predict_marginalized_over_instances(X) + mean, var_ = self.model.predict_marginalized(X) phvi = np.zeros(len(X)) for i, indiv in enumerate(mean): diff --git a/smac/facade/multi_objective_facade.py b/smac/facade/multi_objective_facade.py index bfd6ec320..df267ca1c 100644 --- a/smac/facade/multi_objective_facade.py +++ b/smac/facade/multi_objective_facade.py @@ -3,6 +3,7 @@ from ConfigSpace import Configuration from smac.acquisition.function.expected_improvement import EI +from smac.acquisition.function.expected_hypervolume import EHVI, PHVI from smac.facade.abstract_facade import AbstractFacade from smac.initial_design.default_design import DefaultInitialDesign from smac.intensifier.intensifier import Intensifier @@ -106,7 +107,7 @@ def get_acquisition_function( # type: ignore scenario: Scenario, *, xi: float = 0.0, - ) -> EI: + ) -> EHVI: """Returns an Expected Improvement acquisition function. Parameters @@ -116,7 +117,7 @@ def get_acquisition_function( # type: ignore Controls the balance between exploration and exploitation of the acquisition function. """ - return EI(xi=xi) + return EHVI() @staticmethod def get_acquisition_maximizer( # type: ignore From 1b2010626d72717f092397b80dcfc23233b0fa0a Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Wed, 1 Mar 2023 15:02:31 +0100 Subject: [PATCH 26/74] Logic fix --- smac/acquisition/function/expected_hypervolume.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/smac/acquisition/function/expected_hypervolume.py b/smac/acquisition/function/expected_hypervolume.py index afebf28ee..5f41e912b 100644 --- a/smac/acquisition/function/expected_hypervolume.py +++ b/smac/acquisition/function/expected_hypervolume.py @@ -169,11 +169,11 @@ def _update(self, **kwargs: Any) -> None: incumbents: list[Configuration] = kwargs.get("incumbents", None) if incumbents is None: raise ValueError(f"Incumbents are not passed properly.") - if len(incumbents) > 0: + if len(incumbents) == 0: raise ValueError(f"No incumbents here. Did the intensifier properly " "update the incumbents in the runhistory?") - # Update EHVI + # Update PHVI # Prediction all population_configs = incumbents population_X = np.array([config.get_array() for config in population_configs]) From a0577333ec95665e112a4346cf1a21298f71dcad Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Fri, 3 Mar 2023 13:27:00 +0100 Subject: [PATCH 27/74] AClib runner --- smac/runner/aclib_runner.py | 48 +++++++++++++++++++- smac/runner/target_function_script_runner.py | 4 +- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/smac/runner/aclib_runner.py b/smac/runner/aclib_runner.py index 3605588f4..691fce02f 100644 --- a/smac/runner/aclib_runner.py +++ b/smac/runner/aclib_runner.py @@ -21,8 +21,52 @@ logger = get_logger(__name__) class ACLibRunner(TargetFunctionScriptRunner): + def __init__(self, + target_function: str, + scenario: Scenario, + required_arguments: list[str] = [], + target_function_arguments: dict[str, str] | None = None, + ): + + self._target_function_arguments = target_function_arguments + + super().__init__(target_function, scenario, required_arguments) def __call__(self, algorithm_kwargs: dict[str, Any]) -> tuple[str, str]: - # TODO fill correct kwargs # kwargs has "instance", "seed" and "budget" --> translate those - return super().__call__(algorithm_kwargs) + + cmd = self._target_function.split(" ") + if self._target_function_arguments is not None: + for k, v in self._target_function_arguments.items(): + cmd += [f"--{k}={v}"] + + if self._scenario.trial_walltime_limit is not None: + cmd += [f"--cutoff={self._scenario.trial_walltime_limit}"] + + config = ["--config"] + + for k, v in algorithm_kwargs.items(): + v = str(v) + k = str(k) + + # Let's remove some spaces + v = v.replace(" ", "") + + if k in ["instance", "seed"]: + cmd += [f"--{k}={v}"] + elif k == "instance_features": + continue + else: + config += [k, v] + + cmd += config + + logger.debug(f"Calling: {' '.join(cmd)}") + print(f"Calling: {' '.join(cmd)}") + p = Popen(cmd, shell=False, stdout=PIPE, stderr=PIPE, universal_newlines=True) + output, error = p.communicate() + + logger.debug("Stdout: %s" % output) + logger.debug("Stderr: %s" % error) + + return output, error diff --git a/smac/runner/target_function_script_runner.py b/smac/runner/target_function_script_runner.py index 17feffc98..f01970a98 100644 --- a/smac/runner/target_function_script_runner.py +++ b/smac/runner/target_function_script_runner.py @@ -40,7 +40,7 @@ class TargetFunctionScriptRunner(AbstractSerialRunner): Parameters ---------- - target_function : Callable + target_function : str The target function. scenario : Scenario required_arguments : list[str] @@ -199,7 +199,7 @@ def __call__( algorithm_kwargs: dict[str, Any], ) -> tuple[str, str]: """Calls the algorithm, which is processed in the ``run`` method.""" - cmd = [self._target_function] + cmd = self._target_function.split(" ") for k, v in algorithm_kwargs.items(): v = str(v) k = str(k) From 6c0bcd1cd4b5cd3eb00a44be4fe364819724a7cf Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Fri, 3 Mar 2023 15:47:41 +0100 Subject: [PATCH 28/74] AClib runner fixes --- smac/runner/aclib_runner.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/smac/runner/aclib_runner.py b/smac/runner/aclib_runner.py index 691fce02f..dc3472dcf 100644 --- a/smac/runner/aclib_runner.py +++ b/smac/runner/aclib_runner.py @@ -3,12 +3,13 @@ __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" - +import re from abc import ABC, abstractmethod from typing import Any, Iterator import time import traceback +from subprocess import Popen, PIPE import numpy as np from ConfigSpace import Configuration @@ -61,12 +62,23 @@ def __call__(self, algorithm_kwargs: dict[str, Any]) -> tuple[str, str]: cmd += config logger.debug(f"Calling: {' '.join(cmd)}") - print(f"Calling: {' '.join(cmd)}") p = Popen(cmd, shell=False, stdout=PIPE, stderr=PIPE, universal_newlines=True) output, error = p.communicate() logger.debug("Stdout: %s" % output) logger.debug("Stderr: %s" % error) - return output, error + result_begin = "Result for SMAC3v2: " + outputline = "" + for line in output.split("\n"): + line = line.strip() + if re.match(result_begin, line): + print("match") + outputline = line[len(result_begin):] + + logger.debug(f"Found result in output: {outputline}") + + #Parse output to form of key=value;key2=value2;...;cost=value1,value2;... + + return outputline, error From 71409ceca22627711614f46f9d16ea3a08020740 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Fri, 3 Mar 2023 15:48:00 +0100 Subject: [PATCH 29/74] MO utils initial expansion --- smac/utils/pareto_front.py | 59 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/smac/utils/pareto_front.py b/smac/utils/pareto_front.py index 9f4aef080..5e8bdc660 100644 --- a/smac/utils/pareto_front.py +++ b/smac/utils/pareto_front.py @@ -153,3 +153,62 @@ def sort_by_crowding_distance( config_with_crowding = sorted(config_with_crowding, key=lambda x: x[1], reverse=True) return [c for c, _ in config_with_crowding] + +def sort_by_hypervolume_contribution( + runhistory: RunHistory, + configs: list[Configuration], + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], +) -> list[Configuration]: + """ Sorts the passed configurations by their hypervolume contribution. + + Parameters + ---------- + runhistory : RunHistory + The runhistory containing the given configurations. + configs : list[Configuration] + The configurations which should be sorted. + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] + The instance-seed budget keys for the configurations which should be sorted. + + Returns + ------- + sorted_list : list[Configuration] + Configurations sorted by hypervolume contribution. + """ + + # Get the average costs per configuration + + # Normalize the costs per objective + + # Compute a reference point (with the local points or all observed history) + + # Apply reduce procedure + + # Sort based on HV contribution + + raise NotImplementedError + +def calculate_hypervolume( + runhistory: RunHistory, + configs: list[Configuration], + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], + reference_point: list[float] | None = None, +) -> float: + if reference_point is None: + reference_point = calculate_reference_point(runhistory) + + + raise NotImplementedError + +def calculate_reference_point( + runhistory: RunHistory, + configs: list[Configuration] | None = None, + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] | None = None, +) -> list[float]: + if configs is None: + # Compute over the complete runhistory + costs = [trail.cost for trial in runhistory.values()] + return np.max(np.array(costs), axis=1) + else: + assert len(configs) == len(config_instance_seed_budget_keys) + raise NotImplementedError \ No newline at end of file From 0587938cbec6d54d627a1954c35596a9be138e88 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Fri, 3 Mar 2023 15:48:17 +0100 Subject: [PATCH 30/74] MO intensifier --- smac/intensifier/abstract_intensifier.py | 63 ++++++++++++------- .../multi_objective_intensifier.py | 21 +++++++ 2 files changed, 62 insertions(+), 22 deletions(-) diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index 597c524e3..be4517c15 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -62,7 +62,7 @@ def __init__( self._scenario = scenario self._config_selector: ConfigSelector | None = None self._config_generator: Iterator[ConfigSelector] | None = None - self._runhistory: RunHistory | None = None + self._runhistory: RunHistory | None = RunHistory if seed is None: seed = self._scenario.seed @@ -406,7 +406,7 @@ def get_incumbent_instance_seed_budget_keys(self, compare: bool = False) -> list incumbents = self.get_incumbents() if len(incumbents) > 0: - # We want to calculate the smallest set of trials that is used by all incumbents + # We want to calculate the largest set of trials that is used by all incumbents # Reason: We can not fairly compare otherwise incumbent_isb_keys = [self.get_instance_seed_budget_keys(incumbent, compare) for incumbent in incumbents] instances = list(set.intersection(*map(set, incumbent_isb_keys))) # type: ignore @@ -528,6 +528,7 @@ def update_incumbents(self, config: Configuration) -> None: ) # The config has to go to a queue now as it is a challenger and a potential incumbent + #TODO JG find out where it is decided to continue with the challenger return else: # If all instances are available and the config is incumbent and even evaluated on more trials @@ -549,6 +550,13 @@ def update_incumbents(self, config: Configuration) -> None: for incumbent in incumbents: all_incumbent_isb_keys.append(self.get_instance_seed_budget_keys(incumbent)) + #TODO JG it is guaruanteed that the challenger has ran on the intersection of isb_keys + # of the incumbents, however this is not the case in this part of the code. + # Here, all the runs of each incumbent used. Maybe the intensifier ensures that the incumbents + # have ran on the same isb keys in the first place? + + #TODO JG get intersection for all incumbent_isb_keys and check if it breaks budget. + # We compare the incumbents now and only return the ones on the pareto front new_incumbents = self._calculate_pareto_front(rh, incumbents, all_incumbent_isb_keys) new_incumbent_ids = [rh.get_config_id(c) for c in new_incumbents] @@ -570,7 +578,7 @@ def update_incumbents(self, config: Configuration) -> None: logger.info( f"Config {config_hash} is a new incumbent. " f"Total number of incumbents: {len(new_incumbents)}." ) - else: + else: # len(previous_incumbents) > len(new_incumbents) # There might be situations that the incumbents might be removed because of updated cost information of # config for incumbent in previous_incumbents: @@ -584,21 +592,26 @@ def update_incumbents(self, config: Configuration) -> None: # Cut incumbents: We only want to keep a specific number of incumbents # We use the crowding distance for that if len(new_incumbents) > self._max_incumbents: - # TODO adjust. Other option: statistical test or HV - new_incumbents = sort_by_crowding_distance(rh, new_incumbents, all_incumbent_isb_keys) - new_incumbents = new_incumbents[: self._max_incumbents] + new_incumbents = self._cut_incumbents(new_incumbents, all_incumbent_isb_keys) + #TODO JG adjust. Other option: statistical test or HV (SMS-EMOA reduce function) - # or random? - # idx = self._rng.randint(0, len(new_incumbents)) - # del new_incumbents[idx] - # del new_incumbent_ids[idx] + self._update_trajectory(new_incumbents) - logger.info( - f"Removed one incumbent using crowding distance because more than {self._max_incumbents} are " - "available." - ) + def _cut_incumbents(self, incumbent_ids: list[int], all_incumbent_isb_keys: list[list[InstanceSeedBudgetKey]]) -> list[int]: + new_incumbents = sort_by_crowding_distance(self.runhistory, incumbent_ids, all_incumbent_isb_keys) + new_incumbents = new_incumbents[: self._max_incumbents] - self._update_trajectory(new_incumbents) + # or random? + # idx = self._rng.randint(0, len(new_incumbents)) + # del new_incumbents[idx] + # del new_incumbent_ids[idx] + + logger.info( + f"Removed one incumbent using crowding distance because more than {self._max_incumbents} are " + "available." + ) + + return new_incumbents def _remove_incumbent(self, config: Configuration, previous_incumbent_ids: list[int], new_incumbent_ids: list[int]) -> None: """Remove incumbents if population is too big @@ -684,6 +697,18 @@ def set_state(self, state: dict[str, Any]) -> None: """Sets the state of the intensifier. Used to restore the state of the intensifier when continuing a run.""" pass + def get_save_data(self) -> dict: + data = { + "incumbent_ids": [self.runhistory.get_config_id(config) for config in + self.incumbents], + "rejected_config_ids": self._rejected_config_ids, + "incumbents_changed": self._incumbents_changed, + "trajectory": [dataclasses.asdict(item) for item in self._trajectory], + "state": self.get_state(), + } + + return data + def save(self, filename: str | Path) -> None: """Saves the current state of the intensifier. In addition to the state (retrieved by ``get_state``), this method also saves the incumbents and trajectory. @@ -694,13 +719,7 @@ def save(self, filename: str | Path) -> None: assert str(filename).endswith(".json") filename.parent.mkdir(parents=True, exist_ok=True) - data = { - "incumbent_ids": [self.runhistory.get_config_id(config) for config in self.incumbents], - "rejected_config_ids": self._rejected_config_ids, - "incumbents_changed": self._incumbents_changed, - "trajectory": [dataclasses.asdict(item) for item in self._trajectory], - "state": self.get_state(), - } + data = self.get_save_data() with open(filename, "w") as fp: json.dump(data, fp, indent=2) diff --git a/smac/intensifier/multi_objective_intensifier.py b/smac/intensifier/multi_objective_intensifier.py index 05b77746e..a616533aa 100644 --- a/smac/intensifier/multi_objective_intensifier.py +++ b/smac/intensifier/multi_objective_intensifier.py @@ -61,6 +61,27 @@ def _remove_incumbent(self, config: Configuration, previous_incumbent_ids: list[ # TODO adjust raise NotImplementedError + def _cut_incumbents(self, incumbent_ids: list[int], all_incumbent_isb_keys: list[list[InstanceSeedBudgetKey]]) -> list[int]: + #TODO JG sort by hypervolume + new_incumbents = sort_by_crowding_distance(self.runhistory, incumbent_ids, all_incumbent_isb_keys) + new_incumbents = new_incumbents[: self._max_incumbents] + + logger.info( + f"Removed one incumbent using their reduction in hypervolume because more than {self._max_incumbents} are " + "available." + ) + + return new_incumbents + + def get_instance_seed_budget_keys( + self, config: Configuration, compare: bool = False + ) -> list[InstanceSeedBudgetKey]: + """Returns the instance-seed-budget keys for a given configuration. This method is *used for + updating the incumbents* and might differ for different intensifiers. For example, if incumbents should only + be compared on the highest observed budgets. + """ + return self.runhistory.get_instance_seed_budget_keys(config, highest_observed_budget_only=True) + class MOIntensifier(Intensifier, MOIntensifierMixin): pass From bd31d32cc4fda627c2e4a034b2ac0c17e4d77dab Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Mon, 20 Mar 2023 13:20:06 +0100 Subject: [PATCH 31/74] Expanded debugging message --- smac/acquisition/maximizer/local_search.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/smac/acquisition/maximizer/local_search.py b/smac/acquisition/maximizer/local_search.py index f9a5bafb2..9a67f2e33 100644 --- a/smac/acquisition/maximizer/local_search.py +++ b/smac/acquisition/maximizer/local_search.py @@ -488,13 +488,10 @@ def _search( ) logger.debug( - "Local searches took %s steps and looked at %s configurations. Computing the acquisition function for " - "each search took %f (prev %f) seconds on average and each acquisition function call took %f seconds on average.", - local_search_steps, - neighbors_looked_at, - np.sum(times_per_iteration)/num_candidates, - np.mean(times_per_iteration), - times_per_iteration/np.sum(neighbors_looked_at), + f"Local searches took {local_search_steps} steps and looked at {neighbors_looked_at} configurations." + f"Computing the acquisition function for each search took {np.sum(times_per_iteration)/num_candidates}" + f"(prev {np.mean(times_per_iteration)}) seconds on average and each acquisition function call took {times_per_iteration/np.sum(neighbors_looked_at)} seconds on average." + f"In total the whole procedure took {np.sum(times_per_iteration)} seconds to look at {np.sum(neighbors_looked_at)} configurations." ) return [(a, i) for a, i in zip(acq_val_candidates, candidates)] From 4322cfbe75ab18181b340785405b21ff358d259c Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Mon, 20 Mar 2023 13:20:56 +0100 Subject: [PATCH 32/74] Allow saving the intensifier when no incumbent is chosen yet. --- smac/intensifier/abstract_intensifier.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index 72e130a56..8feb14b4e 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -721,9 +721,16 @@ def set_state(self, state: dict[str, Any]) -> None: pass def get_save_data(self) -> dict: + incumbent_ids = [] + for config in self.incumbents: + try: + incumbent_ids.append(self.runhistory.get_config_id(config)) + except KeyError: + incumbent_ids.append(-1) #Should not happen, but occurs sometimes with small-budget runs + logger.warning(f"{config} does not exist in runhistory, but is part of the incumbent!") + data = { - "incumbent_ids": [self.runhistory.get_config_id(config) for config in - self.incumbents], + "incumbent_ids": incumbent_ids, "rejected_config_ids": self._rejected_config_ids, "incumbents_changed": self._incumbents_changed, "trajectory": [dataclasses.asdict(item) for item in self._trajectory], From 6113c18be8152a59f148fd9ca9fad3ef39138017 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Mon, 20 Mar 2023 13:21:34 +0100 Subject: [PATCH 33/74] Bugfix for passing checks when MO model with features --- smac/model/multi_objective_model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/smac/model/multi_objective_model.py b/smac/model/multi_objective_model.py index 23a765a8f..e2baca382 100644 --- a/smac/model/multi_objective_model.py +++ b/smac/model/multi_objective_model.py @@ -53,6 +53,8 @@ def __init__( seed=seed, ) + self._n_features = self._models[0]._n_features #TODO JG make more elegant + @property def models(self) -> list[AbstractModel]: """The internally used surrogate models.""" From 8cd499f973f0790f91f5954e25abedf878f89454 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Wed, 22 Mar 2023 17:15:04 +0100 Subject: [PATCH 34/74] Added support to retrain the surrogate model and acquisition loop in ratio of the total runtime as was the case with the Java implementation of SMAC2. --- smac/facade/abstract_facade.py | 8 ++++-- smac/main/config_selector.py | 51 +++++++++++++++++++++++++++++----- 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/smac/facade/abstract_facade.py b/smac/facade/abstract_facade.py index 728194de6..10b96629f 100644 --- a/smac/facade/abstract_facade.py +++ b/smac/facade/abstract_facade.py @@ -385,11 +385,15 @@ def get_multi_objective_algorithm(scenario: Scenario) -> AbstractMultiObjectiveA def get_config_selector( scenario: Scenario, *, - retrain_after: int = 8, + retrain_after: int | None = 8, + retrain_wallclock_ratio: int | None = None, retries: int = 16, ) -> ConfigSelector: """Returns the default configuration selector.""" - return ConfigSelector(scenario, retrain_after=retrain_after, retries=retries) + return ConfigSelector(scenario, + retrain_after=retrain_after, + retrain_wallclock_ratio=retrain_wallclock_ratio, + retries=retries) def _get_optimizer(self) -> SMBO: """Fills the SMBO with all the pre-initialized components.""" diff --git a/smac/main/config_selector.py b/smac/main/config_selector.py index e3b17cc7f..3a5c2564e 100644 --- a/smac/main/config_selector.py +++ b/smac/main/config_selector.py @@ -1,5 +1,6 @@ from __future__ import annotations +import time from typing import Any, Iterator import copy @@ -50,7 +51,8 @@ def __init__( self, scenario: Scenario, *, - retrain_after: int = 8, + retrain_after: int | None = 8, + retrain_wallclock_ratio: float | None = None, retries: int = 16, min_trials: int = 1, ) -> None: @@ -70,6 +72,7 @@ def __init__( # And other variables self._retrain_after = retrain_after + self._retrain_wallclock_ratio = retrain_wallclock_ratio self._previous_entries = -1 self._predict_x_best = True self._min_trials = min_trials @@ -78,10 +81,18 @@ def __init__( # How often to retry receiving a new configuration # (counter increases if the received config was already returned before) self._retries = retries + self._counter = 0 + + self._wallclock_start_time: float = time.time() + self._acquisition_training_times: list[float] = [] # Processed configurations should be stored here; this is important to not return the same configuration twice self._processed_configs: list[Configuration] = [] + #Check if there is at least one retrain condition + if self._retrain_after is None and self._retrain_wallclock_ratio is None: + raise ValueError("No retrain condition specified!") + def _set_components( self, initial_design: AbstractInitialDesign, @@ -186,6 +197,7 @@ def __iter__(self) -> Iterator[Configuration]: continue # Check if X/Y differs from the last run, otherwise use cached results + start_time = time.time() if self._previous_entries != Y.shape[0]: self._model.train(X, Y) @@ -218,22 +230,21 @@ def __iter__(self) -> Iterator[Configuration]: random_design=self._random_design, ) - counter = 0 + self._acquisition_training_times.append(time.time() - start_time) + + self._counter = 0 failed_counter = 0 for config in challengers: if config not in self._processed_configs: - counter += 1 + self._counter += 1 self._processed_configs.append(config) self._call_callbacks_on_end(config) yield config - retrain = counter == self._retrain_after + retrain = self._check_for_retrain() self._call_callbacks_on_start() # We break to enforce a new iteration of the while loop (i.e. we retrain the surrogate model) if retrain: - logger.debug( - f"Yielded {counter} configurations. Start new iteration and retrain surrogate model." - ) break else: failed_counter += 1 @@ -243,6 +254,32 @@ def __iter__(self) -> Iterator[Configuration]: logger.warning(f"Could not return a new configuration after {self._retries} retries." "") return + def _check_for_retrain(self) -> bool: + if self._retrain_after is not None: + if self._counter >= self._retrain_after: + logger.debug( + f"Yielded {self._counter} configurations. Start new iteration and retrain surrogate model." + ) + return True + + if self._retrain_wallclock_ratio is not None: + # Total elapsed wallcock time + elapsed_time = time.time() - self._wallclock_start_time + + # Total time spend on getting configurations with the surrogate model + acquisition_training_time = sum(self._acquisition_training_times) + + # Retrain when more time has been spend + if elapsed_time * self._retrain_wallclock_ratio > acquisition_training_time: + logger.debug( + f"Less than {self._retrain_wallclock_ratio:.2%} ({acquisition_training_time / elapsed_time:.2f}) " + f"of the elapsed wallclock time ({elapsed_time:.2f}s) has been spend on finding new configurations " + f"with the surrogate model. Start new iteration and retrain surrogate model." + ) + return True + + return False + def _call_callbacks_on_start(self) -> None: for callback in self._callbacks: callback.on_next_configurations_start(self) From a26b7c9c45531880a2033be974e7b18d76abfc8e Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Tue, 28 Mar 2023 10:59:41 +0200 Subject: [PATCH 35/74] Added a minimal number of configuration that need to be yielded before retraining and forces to run the maximization procedure immediately to time the computation cost. --- smac/main/config_selector.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/smac/main/config_selector.py b/smac/main/config_selector.py index 3a5c2564e..85ab4997d 100644 --- a/smac/main/config_selector.py +++ b/smac/main/config_selector.py @@ -36,8 +36,13 @@ class ConfigSelector: Parameters ---------- - retrain_after : int, defaults to 8 + retrain_after : int | None, defaults to 8 How many configurations should be returned before the surrogate model is retrained. + retrain_wallclock_ratio: float | None, default to None + How much time of the total elapsed wallclock time should be spend on retraining the surrogate model + and the acquisition function look. Example ratio of 0.1 would result in that only 10% of the wallclock time is spend on retraining. + min_configurations: int, defaults to 2 + The minimum number of configurations that need to yield before retraining can occur. Should be lower or equal to retrain_after. retries : int, defaults to 8 How often to retry receiving a new configuration before giving up. min_trials: int, defaults to 1 @@ -53,6 +58,7 @@ def __init__( *, retrain_after: int | None = 8, retrain_wallclock_ratio: float | None = None, + min_configurations: int = 2, retries: int = 16, min_trials: int = 1, ) -> None: @@ -73,6 +79,7 @@ def __init__( # And other variables self._retrain_after = retrain_after self._retrain_wallclock_ratio = retrain_wallclock_ratio + self._min_configurations = min_configurations self._previous_entries = -1 self._predict_x_best = True self._min_trials = min_trials @@ -93,6 +100,10 @@ def __init__( if self._retrain_after is None and self._retrain_wallclock_ratio is None: raise ValueError("No retrain condition specified!") + if self._retrain_after is not None: + if self._retrain_after < self._min_configurations: + raise ValueError("retrain_after should be higher or equal to min_configurations") + def _set_components( self, initial_design: AbstractInitialDesign, @@ -230,9 +241,14 @@ def __iter__(self) -> Iterator[Configuration]: random_design=self._random_design, ) + if self._retrain_wallclock_ratio is not None: + len(challengers) # TODO hacky: Forces actual computation of the acquisition function maximizer + + time.sleep(15) + self._acquisition_training_times.append(time.time() - start_time) - self._counter = 0 + failed_counter = 0 for config in challengers: if config not in self._processed_configs: @@ -263,6 +279,9 @@ def _check_for_retrain(self) -> bool: return True if self._retrain_wallclock_ratio is not None: + if self._counter < self._min_configurations: + return False + # Total elapsed wallcock time elapsed_time = time.time() - self._wallclock_start_time @@ -270,7 +289,7 @@ def _check_for_retrain(self) -> bool: acquisition_training_time = sum(self._acquisition_training_times) # Retrain when more time has been spend - if elapsed_time * self._retrain_wallclock_ratio > acquisition_training_time: + if acquisition_training_time / elapsed_time < self._retrain_wallclock_ratio: logger.debug( f"Less than {self._retrain_wallclock_ratio:.2%} ({acquisition_training_time / elapsed_time:.2f}) " f"of the elapsed wallclock time ({elapsed_time:.2f}s) has been spend on finding new configurations " From 37ae76373956522dd68d53f9454d19785e5ffc11 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Tue, 28 Mar 2023 11:01:09 +0200 Subject: [PATCH 36/74] Remove sleep call used for testing --- smac/main/config_selector.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/smac/main/config_selector.py b/smac/main/config_selector.py index 85ab4997d..414892873 100644 --- a/smac/main/config_selector.py +++ b/smac/main/config_selector.py @@ -244,8 +244,6 @@ def __iter__(self) -> Iterator[Configuration]: if self._retrain_wallclock_ratio is not None: len(challengers) # TODO hacky: Forces actual computation of the acquisition function maximizer - time.sleep(15) - self._acquisition_training_times.append(time.time() - start_time) From 9b85222bdbd52365f18a8ea368c2a6fe7f048d9b Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Tue, 28 Mar 2023 15:51:26 +0200 Subject: [PATCH 37/74] Only compute Pareto fronts on the same subset of isb_keys. Early termination of unfruitful configurations. --- smac/intensifier/abstract_intensifier.py | 76 ++++++++++++++++--- smac/intensifier/intensifier.py | 97 +++++++++++++++++------- 2 files changed, 134 insertions(+), 39 deletions(-) diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index 8feb14b4e..7f6f3fb0f 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -370,7 +370,7 @@ def get_incumbent(self) -> Configuration | None: return self.incumbents[0] def get_incumbents(self, sort_by: str | None = None) -> list[Configuration]: - """Returns the incumbents (points on the pareto front) of the runhistory as copy. In case of a single-objective + """Returns the incumbents (points on the Pareto front) of the runhistory as copy. In case of a single-objective optimization, only one incumbent (if is) is returned. Returns @@ -460,6 +460,20 @@ def on_tell_end(self, smbo: smac.main.smbo.SMBO, info: TrialInfo, value: TrialVa return RunHistoryCallback(self) + def _check_for_intermediate_comparison(self, config: Configuration) -> bool: + """ + + Parameters + ---------- + config: Configuration + + Returns + ------- + A boolean which decides if the current configuration should be compared against the incumbent. + """ + + return False + def update_incumbents(self, config: Configuration) -> None: """Updates the incumbents. This method is called everytime a trial is added to the runhistory. Since only the affected config and the current incumbents are used, this method is very efficient. Furthermore, a @@ -489,6 +503,8 @@ def update_incumbents(self, config: Configuration) -> None: # What happens if a config was rejected, but it appears again? Give it another try even if it # has already been evaluated? Yes! + #TODO what to do when config is part of the incumbent? + # Associated trials and id config_isb_keys = self.get_instance_seed_budget_keys(config) config_id = rh.get_config_id(config) @@ -539,23 +555,59 @@ def update_incumbents(self, config: Configuration) -> None: # will remove the budgets from the keys. config_isb_comparison_keys = self.get_instance_seed_budget_keys(config, compare=True) # Find the lowest intersection of instance-seed-budget keys for all incumbents. - config_incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) + config_incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) # Intersection # Now we have to check if the new config has been evaluated on the same keys as the incumbents + # TODO If the config is part of the incumbent then it should always be a subset of the intersection if not all([key in config_isb_comparison_keys for key in config_incumbent_isb_comparison_keys]): # We can not tell if the new config is better/worse than the incumbents because it has not been # evaluated on the necessary trials - logger.debug( - f"Could not compare config {config_hash} with incumbents because it's evaluated on " - f"different trials." - ) - # The config has to go to a queue now as it is a challenger and a potential incumbent - #TODO JG find out where it is decided to continue with the challenger - return + # TODO JG add procedure to check if intermediate comparison + if self._check_for_intermediate_comparison(config): + logger.debug( + f"Perform intermediate comparions of config {config_hash} with incumbents to see if it is worse" + ) + #TODO perform comparison with incumbent on current instances. + # Check if the config with these number of trials is part of the Pareto front + + #Check if the incumbents ran on all the ones of this config + if not all([key in config_incumbent_isb_comparison_keys for key in config_isb_keys]): + logger.debug("Config ran on other isb_keys than the incumbents. Should not happen.") + return + + #Ensure that the config is not part of the incumbent + if config in incumbents: + return + + incumbents.append(config) + # Only the trials of the challenger + all_incumbent_isb_keys = [config_isb_comparison_keys for _ in incumbents] + #TODO IDEA only compare domination between one incumbent (as relaxation measure) + new_incumbents = self._calculate_pareto_front(rh, incumbents, all_incumbent_isb_keys) + + if config not in new_incumbents: + #Reject config + logger.debug(f"Rejected config {config_hash} in an intermediate comparison because it is dominated by the incumbents on {len(config_isb_keys)} trials.") + self._add_rejected_config(config) + + return + + else: + #TODO + + logger.debug( + f"Could not compare config {config_hash} with incumbents because it's evaluated on " + f"different trials." + ) + + # The config has to go to a queue now as it is a challenger and a potential incumbent + return else: # If all instances are available and the config is incumbent and even evaluated on more trials # then there's nothing we can do + # TODO JG: Will always be false, because the incumbent with the smallest number of trials has been ran. + # TODO JG: Hence: len(config_isb_keys) == len(incumbent_isb_keys) if config in incumbents and len(config_isb_keys) > len(incumbent_isb_keys): logger.debug( "Config is already an incumbent but can not be compared to other incumbents because " @@ -571,16 +623,18 @@ def update_incumbents(self, config: Configuration) -> None: # Now we get all instance-seed-budget keys for each incumbent (they might be different when using budgets) all_incumbent_isb_keys = [] for incumbent in incumbents: - all_incumbent_isb_keys.append(self.get_instance_seed_budget_keys(incumbent)) + # all_incumbent_isb_keys.append(self.get_instance_seed_budget_keys(incumbent)) + all_incumbent_isb_keys.append(self.get_incumbent_instance_seed_budget_keys()) # !!!!! #TODO JG it is guaruanteed that the challenger has ran on the intersection of isb_keys # of the incumbents, however this is not the case in this part of the code. # Here, all the runs of each incumbent used. Maybe the intensifier ensures that the incumbents # have ran on the same isb keys in the first place? + # FIXED IN LINE 580 #TODO JG get intersection for all incumbent_isb_keys and check if it breaks budget. - # We compare the incumbents now and only return the ones on the pareto front + # We compare the incumbents now and only return the ones on the Pareto front new_incumbents = self._calculate_pareto_front(rh, incumbents, all_incumbent_isb_keys) new_incumbent_ids = [rh.get_config_id(c) for c in new_incumbents] diff --git a/smac/intensifier/intensifier.py b/smac/intensifier/intensifier.py index ca82e7e75..fd9ef5344 100644 --- a/smac/intensifier/intensifier.py +++ b/smac/intensifier/intensifier.py @@ -139,7 +139,7 @@ def __iter__(self) -> Iterator[TrialInfo]: # Also, incorporate ``get_incumbent_instance_seed_budget_keys`` here because challengers are only allowed to # sample from the incumbent's instances incumbents = self.get_incumbents(sort_by="num_trials") - incumbent_isb_keys = self.get_incumbent_instance_seed_budget_keys() + incumbent_isb_keys = self.get_incumbent_instance_seed_budget_keys() # Intersection # Check if configs in queue are still running all_configs_running = True @@ -148,7 +148,7 @@ def __iter__(self) -> Iterator[TrialInfo]: all_configs_running = False break - if len(self._queue) == 0 or all_configs_running: + if len(self._queue) == 0 or all_configs_running: # Incumbents if len(self._queue) == 0: logger.debug("Queue is empty:") else: @@ -208,6 +208,7 @@ def __iter__(self) -> Iterator[TrialInfo]: logger.debug(f"--- Finished yielding for config {incumbent_hash}.") # We break here because we only want to intensify one more trial of one incumbent + # TODO intensify until the incumbents are all of equal size (N+1 of biggest incumbent) break else: # assert len(incumbent_isb_keys) == self._max_config_calls @@ -252,46 +253,86 @@ def __iter__(self) -> Iterator[TrialInfo]: self._queue.remove((config, N)) continue + logger.debug(f"--- Config {config_hash} origin ({config.origin})") + # And then we yield as many trials as we specified N # However, only the same instances as the incumbents are used isk_keys: list[InstanceSeedBudgetKey] | None = None if len(incumbent_isb_keys) > 0: isk_keys = incumbent_isb_keys - # TODO: What to do if there are no incumbent instances? (Use-case: call multiple asks) trials = self._get_next_trials(config, N=N, from_keys=isk_keys) - logger.debug(f"--- Yielding {len(trials)} trials to evaluate config {config_hash}...") - for trial in trials: - fails = -1 - yield trial - - logger.debug(f"--- Finished yielding for config {config_hash}.") - - # Now we have to remove the config - self._queue.remove((config, N)) - logger.debug(f"--- Removed config {config_hash} with N={N} from queue.") - - # Finally, we add the same config to the queue with a higher N - # If the config was rejected by the runhistory, then it's been removed in the next iteration - if N < self._max_config_calls: - new_pair = (config, N * 2) - if new_pair not in self._queue: - logger.debug( - f"--- Doubled trials of config {config_hash} to N={N*2} and added it to the queue " - "again." - ) - self._queue.append((config, N * 2)) - - # Also reset fails here + if len(trials) == 0: + # We remove the config and do not add it back to the queue. + self._queue.remove((config, N)) + logger.debug(f"--- No trails to evaluate for config {config_hash}. " + f"Removed config {config_hash} with N={N} from queue.") + else: + logger.debug(f"--- Yielding {len(trials)} trials to evaluate config {config_hash}...") + for trial in trials: fails = -1 - else: - logger.debug(f"--- Config {config_hash} with N={N*2} is already in the queue.") + yield trial + + logger.debug(f"--- Finished yielding for config {config_hash}.") + + # Now we have to remove the config + self._queue.remove((config, N)) + logger.debug(f"--- Removed config {config_hash} with N={N} from queue.") + + # Finally, we add the same config to the queue with a higher N + # If the config was rejected by the runhistory, then it's been removed in the next iteration + if N < self._max_config_calls: + new_pair = (config, N * 2) + if new_pair not in self._queue: + logger.debug( + f"--- Doubled trials of config {config_hash} to N={N*2} and added it to the queue " + "again." + ) + self._queue.append((config, N * 2)) + + # Also reset fails here + fails = -1 + else: + logger.debug(f"--- Config {config_hash} with N={N*2} is already in the queue.") # If we are at this point, it really is important to break because otherwise, we would intensify # all configs in the queue in one iteration break + def _check_for_intermediate_comparison(self, config: Configuration) -> bool: + """ + + Parameters + ---------- + config: Configuration + + Returns + ------- + A boolean which decides if the current configuration should be compared against the incumbent. + """ + + config_isb_keys = self.get_instance_seed_budget_keys(config) + config_id = self.runhistory.get_config_id(config) + config_hash = get_config_hash(config) + + # Do not compare very early in the process + if len(config_isb_keys) < 4: + return False + + # Find N in _queue + N = None + for c, cn in self._queue: + if config == c: + N = cn + break + + if N is None: + logger.debug(f"This should not happen, but config {config_hash} is not in the queue.") + return False + + return len(config_isb_keys) == N + def _get_next_trials( self, config: Configuration, From 8c114c01641e1c91442364aea7cbd16a4728b2e4 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Mon, 3 Apr 2023 15:11:08 +0200 Subject: [PATCH 38/74] Compute actual isb differences --- smac/intensifier/abstract_intensifier.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index 7f6f3fb0f..a44b3356c 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -428,12 +428,16 @@ def get_incumbent_instance_seed_budget_key_differences(self, compare: bool = Fal if len(incumbent_isb_keys) <= 1: return [] - incumbent_isb_keys = list(set.difference(*map(set, incumbent_isb_keys))) # type: ignore + # union - intersection + intersection_isb_keys = set.intersection(*map(set, incumbent_isb_keys)) + union_isb_keys = set.union(*map(set, incumbent_isb_keys)) + incumbent_isb_keys_differences = list(union_isb_keys - intersection_isb_keys) + # incumbent_isb_keys = list(set.difference(*map(set, incumbent_isb_keys))) # type: ignore - if len(incumbent_isb_keys) == 0: + if len(incumbent_isb_keys_differences) == 0: return [] - return incumbent_isb_keys # type: ignore + return incumbent_isb_keys_differences # type: ignore return [] @@ -624,7 +628,7 @@ def update_incumbents(self, config: Configuration) -> None: all_incumbent_isb_keys = [] for incumbent in incumbents: # all_incumbent_isb_keys.append(self.get_instance_seed_budget_keys(incumbent)) - all_incumbent_isb_keys.append(self.get_incumbent_instance_seed_budget_keys()) # !!!!! + all_incumbent_isb_keys.append(self.get_incumbent_instance_seed_budget_keys()) # !!!!! #TODO JG it is guaruanteed that the challenger has ran on the intersection of isb_keys # of the incumbents, however this is not the case in this part of the code. From 2bc73837d5b400c4ce35b4fa42d28564bba2e2dd Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Mon, 3 Apr 2023 15:12:28 +0200 Subject: [PATCH 39/74] Aclib runner --- smac/runner/aclib_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smac/runner/aclib_runner.py b/smac/runner/aclib_runner.py index dc3472dcf..d84aedff3 100644 --- a/smac/runner/aclib_runner.py +++ b/smac/runner/aclib_runner.py @@ -73,7 +73,7 @@ def __call__(self, algorithm_kwargs: dict[str, Any]) -> tuple[str, str]: for line in output.split("\n"): line = line.strip() if re.match(result_begin, line): - print("match") + # print("match") outputline = line[len(result_begin):] logger.debug(f"Found result in output: {outputline}") From 6ddc94cad75c21c25094b6e7e4c402e33eb13118 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Mon, 3 Apr 2023 16:08:16 +0200 Subject: [PATCH 40/74] Reset counter when retrain is triggered --- smac/main/config_selector.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/smac/main/config_selector.py b/smac/main/config_selector.py index 414892873..a6452ed82 100644 --- a/smac/main/config_selector.py +++ b/smac/main/config_selector.py @@ -148,7 +148,7 @@ def __iter__(self) -> Iterator[Configuration]: Note ---- When SMAC continues a run, processed configurations from the runhistory are ignored. For example, if the - intitial design configurations already have been processed, they are ignored here. After the run is + initial design configurations already have been processed, they are ignored here. After the run is continued, however, the surrogate model is trained based on the runhistory in all cases. Returns @@ -259,6 +259,7 @@ def __iter__(self) -> Iterator[Configuration]: # We break to enforce a new iteration of the while loop (i.e. we retrain the surrogate model) if retrain: + self._counter = 0 break else: failed_counter += 1 From 24a749fce76991d8c5c97e73294ef2aa5c571396 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Wed, 12 Apr 2023 10:51:39 +0200 Subject: [PATCH 41/74] Comparison on one config from the incumbent --- smac/intensifier/abstract_intensifier.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index a44b3356c..8f2c3e641 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -584,15 +584,21 @@ def update_incumbents(self, config: Configuration) -> None: if config in incumbents: return - incumbents.append(config) + # IDEA only compare domination between one incumbent (as relaxation measure) + iid = self._rng.choice(len(incumbents)) + incumbents = [incumbents[iid], config] + # incumbents.append(config) + # Only the trials of the challenger all_incumbent_isb_keys = [config_isb_comparison_keys for _ in incumbents] - #TODO IDEA only compare domination between one incumbent (as relaxation measure) + new_incumbents = self._calculate_pareto_front(rh, incumbents, all_incumbent_isb_keys) if config not in new_incumbents: #Reject config - logger.debug(f"Rejected config {config_hash} in an intermediate comparison because it is dominated by the incumbents on {len(config_isb_keys)} trials.") + logger.debug(f"Rejected config {config_hash} in an intermediate comparison because it " + f"is dominated by a randomly sampled config from the incumbents on " + f"{len(config_isb_keys)} trials.") self._add_rejected_config(config) return From 944425b565a89f6d469763e4defe48332cdb8b01 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Thu, 13 Apr 2023 21:46:45 +0200 Subject: [PATCH 42/74] Make dask runner work --- smac/runhistory/runhistory.py | 3 +++ smac/runner/dask_runner.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py index 89f07914b..aac97dd9e 100644 --- a/smac/runhistory/runhistory.py +++ b/smac/runhistory/runhistory.py @@ -603,6 +603,9 @@ def get_config(self, config_id: int) -> Configuration: def get_config_id(self, config: Configuration) -> int: """Returns the configuration id from a configuration.""" + if config not in self._config_ids: + logger.warning("Requested id of unknown configuration!") + return -1 return self._config_ids[config] def get_configs(self, sort_by: str | None = None) -> list[Configuration]: diff --git a/smac/runner/dask_runner.py b/smac/runner/dask_runner.py index 2afad7bd3..e9a6f15c6 100644 --- a/smac/runner/dask_runner.py +++ b/smac/runner/dask_runner.py @@ -91,7 +91,7 @@ def __init__( ) if self._scenario.output_directory is not None: - self._scheduler_file = self._scenario.output_directory / ".dask_scheduler_file" + self._scheduler_file = Path(self._scenario.output_directory).joinpath(".dask_scheduler_file") self._client.write_scheduler_file(scheduler_file=str(self._scheduler_file)) else: # We just use their set up From 84964613766bc94a48e0ce1b6226c5e1a036e674 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Thu, 20 Apr 2023 14:34:25 +0200 Subject: [PATCH 43/74] Added different intermediate update methods that can be mixed with the intensifier class. --- smac/intensifier/abstract_intensifier.py | 448 ++++++++++++------ smac/intensifier/intensifier.py | 1 - smac/intensifier/mixins/__init__.py | 3 + .../intensifier/mixins/intermediate_update.py | 289 +++++++++++ .../multi_objective_intensifier.py | 6 +- 5 files changed, 592 insertions(+), 155 deletions(-) create mode 100644 smac/intensifier/mixins/__init__.py create mode 100644 smac/intensifier/mixins/intermediate_update.py diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index 8f2c3e641..f921cc3dd 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -1,5 +1,6 @@ from __future__ import annotations +import copy from abc import abstractmethod from typing import Any, Callable, Iterator @@ -465,7 +466,8 @@ def on_tell_end(self, smbo: smac.main.smbo.SMBO, info: TrialInfo, value: TrialVa return RunHistoryCallback(self) def _check_for_intermediate_comparison(self, config: Configuration) -> bool: - """ + """Checks if the configuration should be evaluated against the incumbent while it + did not run on all the trails the incumbents did. Parameters ---------- @@ -478,64 +480,95 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: return False - def update_incumbents(self, config: Configuration) -> None: - """Updates the incumbents. This method is called everytime a trial is added to the runhistory. Since only - the affected config and the current incumbents are used, this method is very efficient. Furthermore, a - configuration is only considered incumbent if it has a better performance on all incumbent instances. - - Crucially, if there is no incumbent (at the start) then, the first configuration assumes - incumbent status. For the next configuration, we need to check if the configuration - is better on all instances that have been evaluated for the incumbent. If this is the - case, then we can replace the incumbent. Otherwise, a) we need to requeue the config to - obtain the missing instance-seed-budget combination or b) mark this configuration as - inferior ("rejected") to not consider it again. The comparison behaviour is controlled by - self.get_instance_seed_budget_keys() and self.get_incumbent_instance_seed_budget_keys(). - - Notably, this method is written to support both multi-fidelity and multi-objective - optimization. While the get_instance_seed_budget_keys() method and - self.get_incumbent_instance_seed_budget_keys() are used for the multi-fidelity behaviour, - calculate_pareto_front() is used as a hard coded way to support multi-objective - optimization, including the single objective as special case. calculate_pareto_front() - is called on the set of all (in case of MO) incumbents amended with the challenger - configuration, provided it has a sufficient overlap in seed-instance-budget combinations. - - Lastly, if we have a self._max_incumbents and the pareto front provides more than this - specified amount, we cut the incumbents using crowding distance. + def _intermediate_comparison(self, config: Configuration) -> bool: + """Compares the configuration against the incumbent + + Parameters + ---------- + config: Configuration + + Returns + ------- + A boolean which indicates if we should continue with this configuration. + """ + config_hash = get_config_hash(config) + incumbents = self.get_incumbents() + config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) + + + logger.debug( + f"Perform intermediate comparions of config {config_hash} with incumbents to see if it is worse" + ) + # TODO perform comparison with incumbent on current instances. + # Check if the config with these number of trials is part of the Pareto front + + # Check if the incumbents ran on all the ones of this config + if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): + logger.debug("Config ran on other isb_keys than the incumbents. Should not happen.") + return True + + # Ensure that the config is not part of the incumbent + if config in incumbents: + return True + + # Only compare domination between one incumbent (as relaxation measure) + iid = self._rng.choice(len(incumbents)) + incumbents = [incumbents[iid], config] + # incumbents.append(config) + + # Only the trials of the challenger + all_incumbent_isb_keys = [config_isb_keys for _ in incumbents] + + new_incumbents = self._calculate_pareto_front(self.runhistory, incumbents, all_incumbent_isb_keys) + + return config in new_incumbents + + def _update_incumbent(self, config: Configuration) -> list[Configuration]: + """Updates the incumbent with the config (which can be the challenger) + + Parameters + ---------- + config: Configuration + + Returns + ------- """ rh = self.runhistory - # What happens if a config was rejected, but it appears again? Give it another try even if it - # has already been evaluated? Yes! + incumbents = self.get_incumbents() - #TODO what to do when config is part of the incumbent? + if config not in incumbents: + incumbents.append(config) - # Associated trials and id - config_isb_keys = self.get_instance_seed_budget_keys(config) - config_id = rh.get_config_id(config) + isb_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) + all_incumbent_isb_keys = [isb_keys for _ in range(len(incumbents))] + + + # We compare the incumbents now and only return the ones on the Pareto front + # _calculate_pareto_front returns only non-dominated points + new_incumbents = self._calculate_pareto_front(rh, incumbents, all_incumbent_isb_keys) + return new_incumbents + + def update_incumbents(self, config: Configuration) -> None: + incumbents = self.get_incumbents() config_hash = get_config_hash(config) - # We skip updating incumbents if no instances are available + config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + incumbent_isb_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) + + #Check if config holds keys # Note: This is especially the case if trials of a config are still running # because if trials are running, the runhistory does not update the trials in the fast data structure if len(config_isb_keys) == 0: logger.debug(f"No relevant instances evaluated for config {config_hash}. Updating incumbents is skipped.") return - # Now we get the incumbents and see which trials have been used - incumbents = self.get_incumbents() - incumbent_ids = [rh.get_config_id(c) for c in incumbents] - # Find the lowest intersection of instance-seed-budget keys for all incumbents. - incumbent_isb_keys = self.get_incumbent_instance_seed_budget_keys() - - # Save for later - previous_incumbents = incumbents.copy() - previous_incumbent_ids = incumbent_ids.copy() - - # Little sanity check here for consistency if len(incumbents) > 0: assert incumbent_isb_keys is not None assert len(incumbent_isb_keys) > 0 + # Check if incumbent exists # If there are no incumbents at all, we just use the new config as new incumbent # Problem: We can add running incumbents if len(incumbents) == 0: # incumbent_isb_keys is None and len(incumbents) == 0: @@ -545,123 +578,51 @@ def update_incumbents(self, config: Configuration) -> None: # Nothing else to do return - # Comparison keys - # This one is a bit tricky: We would have problems if we compare with budgets because we might have different - # scenarios (depending on the incumbent selection specified in Successive Halving). - # 1) Any budget/highest observed budget: We want to get rid of the budgets because if we know it is calculated - # on the same instance-seed already then we are ready to go. Imagine we would check for the same budgets, - # then the configs can not be compared although the user does not care on which budgets configurations have - # been evaluated. - # 2) Highest budget: We only want to compare the configs if they are evaluated on the highest budget. - # Here we do actually care about the budgets. Please see the ``get_instance_seed_budget_keys`` method from - # Successive Halving to get more information. - # Noitce: compare=True only takes effect when subclass implemented it. -- e.g. in SH it - # will remove the budgets from the keys. - config_isb_comparison_keys = self.get_instance_seed_budget_keys(config, compare=True) - # Find the lowest intersection of instance-seed-budget keys for all incumbents. - config_incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) # Intersection - - # Now we have to check if the new config has been evaluated on the same keys as the incumbents - # TODO If the config is part of the incumbent then it should always be a subset of the intersection - if not all([key in config_isb_comparison_keys for key in config_incumbent_isb_comparison_keys]): - # We can not tell if the new config is better/worse than the incumbents because it has not been - # evaluated on the necessary trials - - # TODO JG add procedure to check if intermediate comparison + #Check if config isb is subset of incumbents + # if not all([isb_key in incumbent_isb_keys for isb_key in config_isb_keys]): + # # If the config is part of the incumbents this could happen + # logger.info(f"Config {config_hash} did run on more instances than the incumbent. Cannot make a proper comparison.") + # return + + # Config did not run on all isb keys of incumbent + # Now we have to check if we should continue with this configuration + if not set(config_isb_keys) == set(incumbent_isb_keys): + # Config did not run on all trials if self._check_for_intermediate_comparison(config): - logger.debug( - f"Perform intermediate comparions of config {config_hash} with incumbents to see if it is worse" - ) - #TODO perform comparison with incumbent on current instances. - # Check if the config with these number of trials is part of the Pareto front - - #Check if the incumbents ran on all the ones of this config - if not all([key in config_incumbent_isb_comparison_keys for key in config_isb_keys]): - logger.debug("Config ran on other isb_keys than the incumbents. Should not happen.") - return - - #Ensure that the config is not part of the incumbent - if config in incumbents: - return - - # IDEA only compare domination between one incumbent (as relaxation measure) - iid = self._rng.choice(len(incumbents)) - incumbents = [incumbents[iid], config] - # incumbents.append(config) - - # Only the trials of the challenger - all_incumbent_isb_keys = [config_isb_comparison_keys for _ in incumbents] - - new_incumbents = self._calculate_pareto_front(rh, incumbents, all_incumbent_isb_keys) - - if config not in new_incumbents: - #Reject config - logger.debug(f"Rejected config {config_hash} in an intermediate comparison because it " - f"is dominated by a randomly sampled config from the incumbents on " - f"{len(config_isb_keys)} trials.") + if not self._intermediate_comparison(config): + logger.debug(f"Rejected config {config_hash} in an intermediate comparison on {len(config_isb_keys)} trials.") self._add_rejected_config(config) + return - return - - else: - #TODO - - logger.debug( - f"Could not compare config {config_hash} with incumbents because it's evaluated on " - f"different trials." - ) - - # The config has to go to a queue now as it is a challenger and a potential incumbent - return - else: - # If all instances are available and the config is incumbent and even evaluated on more trials - # then there's nothing we can do - # TODO JG: Will always be false, because the incumbent with the smallest number of trials has been ran. - # TODO JG: Hence: len(config_isb_keys) == len(incumbent_isb_keys) - if config in incumbents and len(config_isb_keys) > len(incumbent_isb_keys): - logger.debug( - "Config is already an incumbent but can not be compared to other incumbents because " - "the others are missing trials." - ) - return - - # Add config to incumbents so that we compare only the new config and existing incumbents - if config not in incumbents: - incumbents.append(config) - incumbent_ids.append(config_id) - - # Now we get all instance-seed-budget keys for each incumbent (they might be different when using budgets) - all_incumbent_isb_keys = [] - for incumbent in incumbents: - # all_incumbent_isb_keys.append(self.get_instance_seed_budget_keys(incumbent)) - all_incumbent_isb_keys.append(self.get_incumbent_instance_seed_budget_keys()) # !!!!! - - #TODO JG it is guaruanteed that the challenger has ran on the intersection of isb_keys - # of the incumbents, however this is not the case in this part of the code. - # Here, all the runs of each incumbent used. Maybe the intensifier ensures that the incumbents - # have ran on the same isb keys in the first place? - # FIXED IN LINE 580 - - #TODO JG get intersection for all incumbent_isb_keys and check if it breaks budget. + # Config did run on all isb keys of incumbent + # Here we really update the incumbent by: + # 1. Removing incumbents that are now dominated by another configuration in the incumbent + # 2. Add in the challenger to the incumbent + rh = self.runhistory - # We compare the incumbents now and only return the ones on the Pareto front - new_incumbents = self._calculate_pareto_front(rh, incumbents, all_incumbent_isb_keys) + previous_incumbents = copy.copy(incumbents) + previous_incumbent_ids = [rh.get_config_id(c) for c in previous_incumbents] + new_incumbents = self._update_incumbent(config) new_incumbent_ids = [rh.get_config_id(c) for c in new_incumbents] - if len(previous_incumbents) == len(new_incumbents): - if previous_incumbents == new_incumbents: - # No changes in the incumbents - self._remove_rejected_config(config_id) - return - else: - # In this case, we have to determine which config replaced which incumbent and reject it - # We will remove the oldest configuration (the one with the lowest id) because - # set orders the ids ascending. - self._remove_incumbent(config=config, previous_incumbent_ids=previous_incumbent_ids, new_incumbent_ids=new_incumbent_ids) - + # Update trajectory + if previous_incumbents == new_incumbents: + # No changes in the incumbents -> Challenger is rejected, incumbent remains + # if config not in new_incumbents: + # self._add_rejected_config(config) # TODO JG: Here the config was initially removed from the rejected list... + # else: + self._remove_rejected_config(config) + return + elif len(previous_incumbents) == len(new_incumbents): + # In this case, we have to determine which config replaced which incumbent and reject it + # We will remove the oldest configuration (the one with the lowest id) because + # set orders the ids ascending. + self._remove_incumbent(config=config, + previous_incumbent_ids=previous_incumbent_ids, + new_incumbent_ids=new_incumbent_ids) elif len(previous_incumbents) < len(new_incumbents): # Config becomes a new incumbent; nothing is rejected in this case - self._remove_rejected_config(config_id) + self._remove_rejected_config(config) logger.info( f"Config {config_hash} is a new incumbent. " f"Total number of incumbents: {len(new_incumbents)}." ) @@ -679,11 +640,196 @@ def update_incumbents(self, config: Configuration) -> None: # Cut incumbents: We only want to keep a specific number of incumbents # We use the crowding distance for that if len(new_incumbents) > self._max_incumbents: + all_incumbent_isb_keys = [incumbent_isb_keys for i in range(len(new_incumbents))] new_incumbents = self._cut_incumbents(new_incumbents, all_incumbent_isb_keys) #TODO JG adjust. Other option: statistical test or HV (SMS-EMOA reduce function) self._update_trajectory(new_incumbents) + + # def update_incumbents(self, config: Configuration) -> None: + # """Updates the incumbents. This method is called everytime a trial is added to the runhistory. Since only + # the affected config and the current incumbents are used, this method is very efficient. Furthermore, a + # configuration is only considered incumbent if it has a better performance on all incumbent instances. + # + # Crucially, if there is no incumbent (at the start) then, the first configuration assumes + # incumbent status. For the next configuration, we need to check if the configuration + # is better on all instances that have been evaluated for the incumbent. If this is the + # case, then we can replace the incumbent. Otherwise, a) we need to requeue the config to + # obtain the missing instance-seed-budget combination or b) mark this configuration as + # inferior ("rejected") to not consider it again. The comparison behaviour is controlled by + # self.get_instance_seed_budget_keys() and self.get_incumbent_instance_seed_budget_keys(). + # + # Notably, this method is written to support both multi-fidelity and multi-objective + # optimization. While the get_instance_seed_budget_keys() method and + # self.get_incumbent_instance_seed_budget_keys() are used for the multi-fidelity behaviour, + # calculate_pareto_front() is used as a hard coded way to support multi-objective + # optimization, including the single objective as special case. calculate_pareto_front() + # is called on the set of all (in case of MO) incumbents amended with the challenger + # configuration, provided it has a sufficient overlap in seed-instance-budget combinations. + # + # Lastly, if we have a self._max_incumbents and the pareto front provides more than this + # specified amount, we cut the incumbents using crowding distance. + # """ + # rh = self.runhistory + # + # # What happens if a config was rejected, but it appears again? Give it another try even if it + # # has already been evaluated? Yes! + # + # #TODO what to do when config is part of the incumbent? + # + # # Associated trials and id + # config_isb_keys = self.get_instance_seed_budget_keys(config) + # config_id = rh.get_config_id(config) + # config_hash = get_config_hash(config) + # + # # We skip updating incumbents if no instances are available + # # Note: This is especially the case if trials of a config are still running + # # because if trials are running, the runhistory does not update the trials in the fast data structure + # if len(config_isb_keys) == 0: + # logger.debug(f"No relevant instances evaluated for config {config_hash}. Updating incumbents is skipped.") + # return + # + # # Now we get the incumbents and see which trials have been used + # incumbents = self.get_incumbents() + # incumbent_ids = [rh.get_config_id(c) for c in incumbents] + # # Find the lowest intersection of instance-seed-budget keys for all incumbents. + # incumbent_isb_keys = self.get_incumbent_instance_seed_budget_keys() + # + # # Save for later + # previous_incumbents = incumbents.copy() + # previous_incumbent_ids = incumbent_ids.copy() + # + # # Little sanity check here for consistency + # if len(incumbents) > 0: + # assert incumbent_isb_keys is not None + # assert len(incumbent_isb_keys) > 0 + # + # # If there are no incumbents at all, we just use the new config as new incumbent + # # Problem: We can add running incumbents + # if len(incumbents) == 0: # incumbent_isb_keys is None and len(incumbents) == 0: + # logger.info(f"Added config {config_hash} as new incumbent because there are no incumbents yet.") + # self._update_trajectory([config]) + # + # # Nothing else to do + # return + # + # # Comparison keys + # # This one is a bit tricky: We would have problems if we compare with budgets because we might have different + # # scenarios (depending on the incumbent selection specified in Successive Halving). + # # 1) Any budget/highest observed budget: We want to get rid of the budgets because if we know it is calculated + # # on the same instance-seed already then we are ready to go. Imagine we would check for the same budgets, + # # then the configs can not be compared although the user does not care on which budgets configurations have + # # been evaluated. + # # 2) Highest budget: We only want to compare the configs if they are evaluated on the highest budget. + # # Here we do actually care about the budgets. Please see the ``get_instance_seed_budget_keys`` method from + # # Successive Halving to get more information. + # # Noitce: compare=True only takes effect when subclass implemented it. -- e.g. in SH it + # # will remove the budgets from the keys. + # config_isb_comparison_keys = self.get_instance_seed_budget_keys(config, compare=True) + # # Find the lowest intersection of instance-seed-budget keys for all incumbents. + # config_incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) # Intersection + # + # # Now we have to check if the new config has been evaluated on the same keys as the incumbents + # # TODO If the config is part of the incumbent then it should always be a subset of the intersection + # if not all([key in config_isb_comparison_keys for key in config_incumbent_isb_comparison_keys]): + # # We can not tell if the new config is better/worse than the incumbents because it has not been + # # evaluated on the necessary trials + # + # # TODO JG add procedure to check if intermediate comparison + # if self._check_for_intermediate_comparison(config): + # if not self._intermediate_comparison(config): + # #Reject config + # logger.debug(f"Rejected config {config_hash} in an intermediate comparison because it " + # f"is dominated by a randomly sampled config from the incumbents on " + # f"{len(config_isb_keys)} trials.") + # self._add_rejected_config(config) + # + # return + # + # else: + # #TODO + # + # logger.debug( + # f"Could not compare config {config_hash} with incumbents because it's evaluated on " + # f"different trials." + # ) + # + # # The config has to go to a queue now as it is a challenger and a potential incumbent + # return + # else: + # # If all instances are available and the config is incumbent and even evaluated on more trials + # # then there's nothing we can do + # # TODO JG: Will always be false, because the incumbent with the smallest number of trials has been ran. + # # TODO JG: Hence: len(config_isb_keys) == len(incumbent_isb_keys) + # if config in incumbents and len(config_isb_keys) > len(incumbent_isb_keys): + # logger.debug( + # "Config is already an incumbent but can not be compared to other incumbents because " + # "the others are missing trials." + # ) + # return + # + # if self._final_comparison(config): + # + # + # # Add config to incumbents so that we compare only the new config and existing incumbents + # if config not in incumbents: + # incumbents.append(config) + # incumbent_ids.append(config_id) + # + # # Now we get all instance-seed-budget keys for each incumbent (they might be different when using budgets) + # all_incumbent_isb_keys = [] + # for incumbent in incumbents: + # # all_incumbent_isb_keys.append(self.get_instance_seed_budget_keys(incumbent)) + # all_incumbent_isb_keys.append(self.get_incumbent_instance_seed_budget_keys()) # !!!!! + # + # #TODO JG it is guaruanteed that the challenger has ran on the intersection of isb_keys + # # of the incumbents, however this is not the case in this part of the code. + # # Here, all the runs of each incumbent used. Maybe the intensifier ensures that the incumbents + # # have ran on the same isb keys in the first place? + # # FIXED IN LINE 580 + # + # #TODO JG get intersection for all incumbent_isb_keys and check if it breaks budget. + # + # # We compare the incumbents now and only return the ones on the Pareto front + # new_incumbents = self._calculate_pareto_front(rh, incumbents, all_incumbent_isb_keys) + # new_incumbent_ids = [rh.get_config_id(c) for c in new_incumbents] + # + # if len(previous_incumbents) == len(new_incumbents): + # if previous_incumbents == new_incumbents: + # # No changes in the incumbents + # self._remove_rejected_config(config_id) # This means that the challenger is not rejected!! + # return + # else: + # # In this case, we have to determine which config replaced which incumbent and reject it + # # We will remove the oldest configuration (the one with the lowest id) because + # # set orders the ids ascending. + # self._remove_incumbent(config=config, previous_incumbent_ids=previous_incumbent_ids, new_incumbent_ids=new_incumbent_ids) + # elif len(previous_incumbents) < len(new_incumbents): + # # Config becomes a new incumbent; nothing is rejected in this case + # self._remove_rejected_config(config_id) + # logger.info( + # f"Config {config_hash} is a new incumbent. " f"Total number of incumbents: {len(new_incumbents)}." + # ) + # else: # len(previous_incumbents) > len(new_incumbents) + # # There might be situations that the incumbents might be removed because of updated cost information of + # # config + # for incumbent in previous_incumbents: + # if incumbent not in new_incumbents: + # self._add_rejected_config(incumbent) + # logger.debug( + # f"Removed incumbent {get_config_hash(incumbent)} because of the updated costs from config " + # f"{config_hash}." + # ) + # + # # Cut incumbents: We only want to keep a specific number of incumbents + # # We use the crowding distance for that + # if len(new_incumbents) > self._max_incumbents: + # new_incumbents = self._cut_incumbents(new_incumbents, all_incumbent_isb_keys) + # #TODO JG adjust. Other option: statistical test or HV (SMS-EMOA reduce function) + # + # self._update_trajectory(new_incumbents) + def _cut_incumbents(self, incumbent_ids: list[int], all_incumbent_isb_keys: list[list[InstanceSeedBudgetKey]]) -> list[int]: new_incumbents = sort_by_crowding_distance(self.runhistory, incumbent_ids, all_incumbent_isb_keys) new_incumbents = new_incumbents[: self._max_incumbents] diff --git a/smac/intensifier/intensifier.py b/smac/intensifier/intensifier.py index fd9ef5344..705c4ab82 100644 --- a/smac/intensifier/intensifier.py +++ b/smac/intensifier/intensifier.py @@ -311,7 +311,6 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: ------- A boolean which decides if the current configuration should be compared against the incumbent. """ - config_isb_keys = self.get_instance_seed_budget_keys(config) config_id = self.runhistory.get_config_id(config) config_hash = get_config_hash(config) diff --git a/smac/intensifier/mixins/__init__.py b/smac/intensifier/mixins/__init__.py new file mode 100644 index 000000000..7b024699a --- /dev/null +++ b/smac/intensifier/mixins/__init__.py @@ -0,0 +1,3 @@ +""" +Mixin are used to overwrite single functions in the intensifier classes +""" \ No newline at end of file diff --git a/smac/intensifier/mixins/intermediate_update.py b/smac/intensifier/mixins/intermediate_update.py new file mode 100644 index 000000000..6ec504c8f --- /dev/null +++ b/smac/intensifier/mixins/intermediate_update.py @@ -0,0 +1,289 @@ +from __future__ import annotations + +import copy +import itertools +from abc import abstractmethod +from typing import Any, Callable, Iterator +from scipy.stats import binom + +import dataclasses +import json +from collections import defaultdict +from pathlib import Path + +import numpy as np +from ConfigSpace import Configuration + +import smac +from smac.callback import Callback +from smac.constants import MAXINT +from smac.main.config_selector import ConfigSelector +from smac.runhistory import TrialInfo +from smac.runhistory.dataclasses import ( + InstanceSeedBudgetKey, + InstanceSeedKey, + TrajectoryItem, + TrialValue, +) +from smac.runhistory.runhistory import RunHistory +from smac.scenario import Scenario +from smac.utils.configspace import get_config_hash, print_config_changes +from smac.utils.logging import get_logger +from smac.utils.pareto_front import calculate_pareto_front, sort_by_crowding_distance + +__copyright__ = "Copyright 2022, automl.org" +__license__ = "3-clause BSD" + +logger = get_logger(__name__) + +class DebugComparison(object): + + def _register_comparison(self, **kwargs): + if not hasattr(self, "_intermediate_comparisons_log"): + self._intermediate_comparisons_log = [] + self._intermediate_comparisons_log.append(kwargs) + +class FullIncumbentComparison(DebugComparison): + + def _intermediate_comparison(self, config: Configuration) -> bool: + """Compares the configuration against the incumbent + + Parameters + ---------- + config: Configuration + + Returns + ------- + A boolean which indicates if we should continue with this configuration. + """ + config_hash = get_config_hash(config) + incumbents = self.get_incumbents() + config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( + compare=True) + + logger.debug( + f"Perform intermediate comparions of config {config_hash} with incumbents to see if it is worse" + ) + # TODO perform comparison with incumbent on current instances. + # Check if the config with these number of trials is part of the Pareto front + + # Check if the incumbents ran on all the ones of this config + if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): + logger.debug( + "Config ran on other isb_keys than the incumbents. Should not happen.") + return True + + # Ensure that the config is not part of the incumbent + if config in incumbents: + return True + + # Only compare domination between one incumbent (as relaxation measure) + if config not in incumbents: + incumbents.append(config) + + # Only the trials of the challenger + all_incumbent_isb_keys = [config_isb_keys for _ in incumbents] + + new_incumbents = self._calculate_pareto_front(self.runhistory, incumbents, + all_incumbent_isb_keys) + + verdict = config in new_incumbents + self._register_comparison(config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + prediction=verdict, + name="FullInc") + + return config in new_incumbents + +class SingleIncumbentComparison(DebugComparison): + def _intermediate_comparison(self, config: Configuration) -> bool: + """Compares the configuration against the incumbent + + Parameters + ---------- + config: Configuration + + Returns + ------- + A boolean which indicates if we should continue with this configuration. + """ + config_hash = get_config_hash(config) + incumbents = self.get_incumbents() + config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( + compare=True) + + logger.debug( + f"Perform intermediate comparions of config {config_hash} with incumbents to see if it is worse" + ) + + # Check if the incumbents ran on all the ones of this config + if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): + logger.debug( + "Config ran on other isb_keys than the incumbents. Should not happen.") + return True + + # Ensure that the config is not part of the incumbent + if config in incumbents: + return True + + # Only compare domination between one incumbent (as relaxation measure) + iid = self._rng.choice(len(incumbents)) + incumbents = [incumbents[iid], config] + + # Only the trials of the challenger + all_incumbent_isb_keys = [config_isb_keys for _ in incumbents] + + new_incumbents = self._calculate_pareto_front(self.runhistory, + incumbents, + all_incumbent_isb_keys) + + verdict = config in new_incumbents + self._register_comparison(config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + prediction=verdict, + name="SingleInc") + + return config in new_incumbents + +class RandomComparison(DebugComparison): + def _intermediate_comparison(self, config: Configuration) -> bool: + """Compares the configuration against the incumbent + + Parameters + ---------- + config: Configuration + + Returns + ------- + A boolean which indicates if we should continue with this configuration. + """ + config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + verdict = self._rng.random() >= 0.5 + self._register_comparison(config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + prediction=verdict, + name="Random") + return verdict + +class BootstrapComparison(DebugComparison): + + def _intermediate_comparison(self, config: Configuration) -> bool: + """Compares the configuration by generating bootstraps + + Parameters + ---------- + config: Configuration + + Returns + ------- + A boolean which indicates if we should continue with this configuration. + """ + + + config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + incumbents = self.get_incumbents() + if config not in incumbents: + incumbents.append(config) + + n_samples = 1000 + if len(config_isb_keys) < 7: # When there are only a limited number of trials available we run all combinations + samples = list(itertools.combinations_with_replacement(list(range(len(config_isb_keys))), r=len(config_isb_keys))) + n_samples = len(samples) + else: + samples = np.random.choice(len(config_isb_keys), + (n_samples, len(config_isb_keys)), + replace=True) + + verdicts = np.zeros(n_samples, dtype=bool) + + + for sid, sample in enumerate(samples): + sample_isb_keys = [config_isb_keys[i] for i in sample] + all_incumbent_isb_keys = [sample_isb_keys]*len(incumbents) + new_incumbents = self._calculate_pareto_front(self.runhistory, + incumbents, + all_incumbent_isb_keys) + + verdicts[sid] = config in new_incumbents + + verdict = np.count_nonzero(verdicts) >= 0.5 * n_samples # The config is in more than 50% of the times non-dominated + #P = np.count_nonzero(verdicts)/n_samples + #print(f"P = {np.count_nonzero(verdicts)}/{n_samples}={P:.2f}") + self._register_comparison(config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + prediction=verdict, + name="Bootstrap", + probability=np.count_nonzero(verdicts)/n_samples, + n_samples=n_samples) + return verdict + +class SRaceComparison(DebugComparison): + def _intermediate_comparison(self, config: Configuration) -> bool: + """Compares the configuration by generating bootstraps + + Parameters + ---------- + config: Configuration + + Returns + ------- + A boolean which indicates if we should continue with this configuration. + """ + + def get_alpha(delta, n_instances): + steps = 0 + n = 1 + inst = 0 + while inst < n_instances: + steps += 1 + inst += n + n *= 2 + + return (1 - delta) / (n_instances) * (steps - 1) + + def dominates(a, b): + # Checks if a dominates b + a = np.array(a) + b = np.array(b) + return 1 if np.count_nonzero(a <= b) >= len(a) and np.count_nonzero(a < b) >= 1 else 0 + + config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + incumbents = self.get_incumbents() + + p_values = [] + chall_perf = self.runhistory._cost(config, config_isb_keys) + for incumbent in incumbents: + inc_perf = self.runhistory._cost(incumbent, config_isb_keys) + n_ij = sum([dominates(*x) for x in zip(chall_perf, inc_perf)]) # Number of times the incumbent candidate dominates the challenger + n_ji = sum([dominates(*x) for x in zip(inc_perf, chall_perf)]) # Number of times the challenger dominates the incumbent candidate + p_value = 1 - binom.cdf(n_ij - 1, n_ij + n_ji, .5) + p_values.append(p_value) + + pvalues_order = np.argsort(p_values) + + # Holm-Bonferroni + reject = np.zeros(len(p_values), dtype=bool) # Do not reject any test by default + alpha = get_alpha(0.05, len(config_isb_keys)) + for i, index in enumerate(pvalues_order): + corrected_alpha = alpha / (len(p_values) - i) # Holm-Bonferroni + if pvalues_order[index] < corrected_alpha: + # Reject H0 -> winner > candidate + reject[index] = True + else: + break + + verdict = np.count_nonzero(reject) != 0 + #P = np.count_nonzero(verdicts)/n_samples + #print(f"P = {np.count_nonzero(verdicts)}/{n_samples}={P:.2f}") + self._register_comparison(config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + prediction=verdict, + name="S-Race") + return verdict \ No newline at end of file diff --git a/smac/intensifier/multi_objective_intensifier.py b/smac/intensifier/multi_objective_intensifier.py index a616533aa..c6278ecf4 100644 --- a/smac/intensifier/multi_objective_intensifier.py +++ b/smac/intensifier/multi_objective_intensifier.py @@ -82,11 +82,11 @@ def get_instance_seed_budget_keys( """ return self.runhistory.get_instance_seed_budget_keys(config, highest_observed_budget_only=True) -class MOIntensifier(Intensifier, MOIntensifierMixin): +class MOIntensifier(MOIntensifierMixin, Intensifier): pass -class MOSuccessiveHalving(SuccessiveHalving, MOIntensifierMixin): +class MOSuccessiveHalving(MOIntensifierMixin, SuccessiveHalving): pass -class MOHyperband(Hyperband, MOIntensifierMixin): +class MOHyperband(MOIntensifierMixin, Hyperband): pass \ No newline at end of file From da0bb6bf766a38ff5387a03c0feef62bfca473bc Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Wed, 26 Apr 2023 11:50:03 +0200 Subject: [PATCH 44/74] Make normalization of costs in the mo setting a choice --- smac/runhistory/encoder/abstract_encoder.py | 3 +++ smac/runhistory/encoder/encoder.py | 3 ++- smac/utils/pareto_front.py | 5 ++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/smac/runhistory/encoder/abstract_encoder.py b/smac/runhistory/encoder/abstract_encoder.py index fc9ff83c9..3f4f3a2fa 100644 --- a/smac/runhistory/encoder/abstract_encoder.py +++ b/smac/runhistory/encoder/abstract_encoder.py @@ -52,6 +52,7 @@ def __init__( scale_percentage: int = 5, seed: int | None = None, native_multi_objective: bool = False, + normalize: bool = True, ) -> None: if considered_states is None: raise TypeError("No success states are given.") @@ -89,6 +90,7 @@ def __init__( self._runhistory: RunHistory | None = None self._native_multi_objective = native_multi_objective + self._normalize = normalize @property def meta(self) -> dict[str, Any]: @@ -303,3 +305,4 @@ def transform_response_values( transformed_values : np.ndarray """ raise NotImplementedError + diff --git a/smac/runhistory/encoder/encoder.py b/smac/runhistory/encoder/encoder.py index 9a90ba072..05ac3fe20 100644 --- a/smac/runhistory/encoder/encoder.py +++ b/smac/runhistory/encoder/encoder.py @@ -18,6 +18,7 @@ class RunHistoryEncoder(AbstractRunHistoryEncoder): + def _build_matrix( self, trials: Mapping[TrialKey, TrialValue], @@ -54,7 +55,7 @@ def _build_matrix( # Let's normalize y here # We use the objective_bounds calculated by the runhistory - y_ = normalize_costs(run.cost, self.runhistory.objective_bounds) + y_ = normalize_costs(run.cost, self.runhistory.objective_bounds) if self._normalize else run.cost y_agg = self._multi_objective_algorithm(y_) y[row] = y_agg else: diff --git a/smac/utils/pareto_front.py b/smac/utils/pareto_front.py index 5e8bdc660..79f4676b5 100644 --- a/smac/utils/pareto_front.py +++ b/smac/utils/pareto_front.py @@ -11,6 +11,7 @@ def _get_costs( runhistory: RunHistory, configs: list[Configuration], config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], + normalize: bool = False, ) -> np.ndarray: """Returns the costs of the passed configurations. @@ -22,6 +23,8 @@ def _get_costs( The configs for which the costs should be returned. config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] The instance-seed budget keys for the configs for which the costs should be returned. + normalize: bool + If the costs should be normalised Returns ------- @@ -38,7 +41,7 @@ def _get_costs( # configuration # However, we only want to consider the config trials # Average cost is a list of floats (one for each objective) - average_cost = runhistory.average_cost(config, isb_keys, normalize=False) + average_cost = runhistory.average_cost(config, isb_keys, normalize=normalize) average_costs += [average_cost] # Let's work with a numpy array for efficiency From 2ca601c854aed8f587a8ae6d9ef3a9bfa5fba609 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Wed, 26 Apr 2023 11:53:21 +0200 Subject: [PATCH 45/74] In the native MO setting the EPM are trained by using the costs retrieved from the RunHistoryEncoder (which could mean they are not normalized. Therefore the predictions are not either. Hence, we need to normalize the predictions of the objectives here before computing the hypervolumes. --- .../function/expected_hypervolume.py | 46 ++++++++++++++++--- smac/main/config_selector.py | 6 +++ 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/smac/acquisition/function/expected_hypervolume.py b/smac/acquisition/function/expected_hypervolume.py index 5f41e912b..734b26885 100644 --- a/smac/acquisition/function/expected_hypervolume.py +++ b/smac/acquisition/function/expected_hypervolume.py @@ -11,12 +11,14 @@ from smac.intensifier.abstract_intensifier import AbstractIntensifier from smac.runhistory import TrialInfo, RunHistory +from smac.runhistory.encoder import AbstractRunHistoryEncoder from smac.runhistory.dataclasses import InstanceSeedBudgetKey from smac.scenario import Scenario from smac.utils.configspace import get_config_hash from smac.utils.logging import get_logger from smac.acquisition.function.abstract_acquisition_function import AbstractAcquisitionFunction from smac.model.abstract_model import AbstractModel +from smac.utils.multi_objective import normalize_costs import torch from botorch.acquisition.multi_objective import ExpectedHypervolumeImprovement @@ -157,6 +159,28 @@ def __init__(self): self._required_updates = ("model",) self.population_hv = None self.population_costs = None + self._reference_point = None + self._objective_bounds = None + + self._runhistory: RunHistory | None = None + self._runhistory_encoder: AbstractRunHistoryEncoder | None = None + + + @property + def runhistory(self) -> RunHistory: + return self._runhistory + + @runhistory.setter + def runhistory(self, runhistory: RunHistory): + self._runhistory = runhistory + + @property + def runhistory_encoder(self) -> AbstractRunHistoryEncoder: + return self._runhistory_encoder + + @runhistory_encoder.setter + def runhistory_encoder(self, runhistory_encoder: AbstractRunHistoryEncoder): + self._runhistory_encoder = runhistory_encoder @property def name(self) -> str: @@ -179,8 +203,12 @@ def _update(self, **kwargs: Any) -> None: population_X = np.array([config.get_array() for config in population_configs]) population_costs, _ = self.model.predict_marginalized(population_X) + objective_bounds = np.array(self.runhistory.objective_bounds) + self._objective_bounds = self.runhistory_encoder.transform_response_values(objective_bounds) + self._reference_point = [1.1]*len(self._objective_bounds) + # Compute HV - population_hv = self.get_hypervolume(population_costs, (1.1, 1.1)) + population_hv = self.get_hypervolume(population_costs) self.population_costs = population_costs self.population_hv = population_hv @@ -201,10 +229,13 @@ def get_hypervolume(self, points: np.ndarray = None, reference_point: list = Non ------ hypervolume: float """ + # Normalize the objectives here to give equal attention to the objectives when computing the HV + points = [normalize_costs(p, self._objective_bounds) for p in points] + hv = pygmo.hypervolume(points) - if reference_point is None: - reference_point = hv.refpoint(offset=1) - return hv.compute(reference_point) # TODO: Fix reference points + # if reference_point is None: + # self._reference_point = hv.refpoint(offset=1) + return hv.compute(self._reference_point) def _compute(self, X: np.ndarray) -> np.ndarray: """Computes the PHVI values and its derivatives. @@ -229,11 +260,14 @@ def _compute(self, X: np.ndarray) -> np.ndarray: # all instances. Idea is this prevents optimizing for the initial instances which get it stuck in local optima # Option 2: Only on instances of population # Option 3: EVHI per instance and aggregate afterwards - mean, var_ = self.model.predict_marginalized(X) + mean, var_ = self.model.predict_marginalized(X) #Expected to be not normalized + phvi = np.zeros(len(X)) for i, indiv in enumerate(mean): - phvi[i] = self.get_hypervolume(list(self.population_costs) + [indiv], (1.1, 1.1)) - self.population_hv + points = list(self.population_costs) + [indiv] + hv = self.get_hypervolume(points) + phvi[i] = hv - self.population_hv # if len(X) == 10000: # for op in ["max", "min", "mean", "median"]: diff --git a/smac/main/config_selector.py b/smac/main/config_selector.py index a6452ed82..a77b66322 100644 --- a/smac/main/config_selector.py +++ b/smac/main/config_selector.py @@ -127,6 +127,12 @@ def _set_components( if len(self._initial_design_configs) == 0: raise RuntimeError("SMAC needs initial configurations to work.") + if hasattr(self._acquisition_function, "runhistory"): + self._acquisition_function.runhistory = runhistory + + if hasattr(self._acquisition_function, "runhistory_encoder"): + self._acquisition_function.runhistory_encoder = runhistory_encoder + @property def meta(self) -> dict[str, Any]: """Returns the meta data of the created object.""" From 603182a23cf7ac61208edabc3d9137bccb60b509 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Thu, 27 Apr 2023 11:34:52 +0200 Subject: [PATCH 46/74] Generic HVI class --- .../function/expected_hypervolume.py | 197 +++++++++++------- 1 file changed, 125 insertions(+), 72 deletions(-) diff --git a/smac/acquisition/function/expected_hypervolume.py b/smac/acquisition/function/expected_hypervolume.py index 734b26885..a023fb9b7 100644 --- a/smac/acquisition/function/expected_hypervolume.py +++ b/smac/acquisition/function/expected_hypervolume.py @@ -39,8 +39,10 @@ def __init__(self) -> None: class _ModelProxy(Model, ABC): - def __init__(self, model: AbstractModel): + def __init__(self, model: AbstractModel, objective_bounds: list[tuple[float, float]]): + super(_ModelProxy).__init__() self.model = model + self._objective_bounds = objective_bounds def posterior(self, X: Tensor, **kwargs: Any) -> _PosteriorProxy: """Docstring @@ -59,6 +61,10 @@ def posterior(self, X: Tensor, **kwargs: Any) -> _PosteriorProxy: # start_time = time.time() # print(f"Start predicting ") mean, var_ = self.model.predict_marginalized(X) + normalized_mean = np.array([normalize_costs(m, self._objective_bounds) for m in mean]) + scale = normalized_mean / mean + var_ *= scale # Scale variance accordingly + mean = normalized_mean # print(f"Done in {time.time() - start_time}s") post = _PosteriorProxy() post.mean = torch.asarray(mean).reshape(X.shape[0], 1, -1) # 2D -> 3D @@ -66,22 +72,41 @@ def posterior(self, X: Tensor, **kwargs: Any) -> _PosteriorProxy: return post -class EHVI(AbstractAcquisitionFunction): +class AbstractHVI(AbstractAcquisitionFunction): def __init__(self): - super(EHVI, self).__init__() + """Computes for a given x the predicted hypervolume improvement as + acquisition value. + """ + super(AbstractHVI, self).__init__() self._required_updates = ("model",) - self._ehvi: ExpectedHypervolumeImprovement | None = None + self._reference_point = None + self._objective_bounds = None + + self._runhistory: RunHistory | None = None + self._runhistory_encoder: AbstractRunHistoryEncoder | None = None + + @property + def runhistory(self) -> RunHistory: + return self._runhistory + + @runhistory.setter + def runhistory(self, runhistory: RunHistory): + self._runhistory = runhistory + + @property + def runhistory_encoder(self) -> AbstractRunHistoryEncoder: + return self._runhistory_encoder + + @runhistory_encoder.setter + def runhistory_encoder(self, runhistory_encoder: AbstractRunHistoryEncoder): + self._runhistory_encoder = runhistory_encoder @property def name(self) -> str: - return "Expected Hypervolume Improvement" + return "Abstract Hypervolume Improvement" def _update(self, **kwargs: Any) -> None: - # TODO either pass runhistory in config_selector - # and store incumbents in runhistory -or- work - # with a callback. This class can own a callback - # updating the partitioning and the ehv model - super(EHVI, self)._update(**kwargs) + super(AbstractHVI, self)._update(**kwargs) incumbents: list[Configuration] = kwargs.get("incumbents", None) if incumbents is None: @@ -90,20 +115,100 @@ def _update(self, **kwargs: Any) -> None: raise ValueError(f"No incumbents here. Did the intensifier properly " "update the incumbents in the runhistory?") + objective_bounds = np.array(self.runhistory.objective_bounds) + self._objective_bounds = self.runhistory_encoder.transform_response_values( + objective_bounds) + self._reference_point = [1.1] * len(self._objective_bounds) + + def get_hypervolume(self, points: np.ndarray = None, reference_point: list = None) -> float: + """ + Compute the hypervolume + + Parameters + ---------- + points : np.ndarray + A 2d numpy array. 1st dimension is an entity and the 2nd dimension are the costs + reference_point : list + + Return + ------ + hypervolume: float + """ + # Normalize the objectives here to give equal attention to the objectives when computing the HV + points = [normalize_costs(p, self._objective_bounds) for p in points] + + hv = pygmo.hypervolume(points) + # if reference_point is None: + # self._reference_point = hv.refpoint(offset=1) + return hv.compute(self._reference_point) + + def _compute(self, X: np.ndarray) -> np.ndarray: + """Computes the PHVI values and its derivatives. + + Parameters + ---------- + X: np.ndarray(N, D), The input points where the acquisition function + should be evaluated. The dimensionality of X is (N, D), with N as + the number of points to evaluate at and D is the number of + dimensions of one X. + + Returns + ------- + np.ndarray(N,1) + Expected HV Improvement of X + """ + if len(X.shape) == 1: + X = X[:, np.newaxis] + + # TODO non-dominated sorting of costs. Compute EHVI only until the EHVI is not expected to improve anymore. + # Option 1: Supplement missing instances of population with acq. function to get predicted performance over + # all instances. Idea is this prevents optimizing for the initial instances which get it stuck in local optima + # Option 2: Only on instances of population + # Option 3: EVHI per instance and aggregate afterwards + mean, var_ = self.model.predict_marginalized(X) #Expected to be not normalized + + + phvi = np.zeros(len(X)) + for i, indiv in enumerate(mean): + points = list(self.population_costs) + [indiv] + hv = self.get_hypervolume(points) + phvi[i] = hv - self.population_hv + + # if len(X) == 10000: + # for op in ["max", "min", "mean", "median"]: + # val = getattr(np, op)(phvi) + # print(f"{op:6} - {val}") + # time.sleep(1.5) + + return phvi.reshape(-1, 1) + +class EHVI(AbstractHVI): + def __init__(self): + super(EHVI, self).__init__() + self._ehvi: ExpectedHypervolumeImprovement | None = None + + @property + def name(self) -> str: + return "Expected Hypervolume Improvement" + + def _update(self, **kwargs: Any) -> None: + super(EHVI, self)._update(**kwargs) + incumbents: list[Configuration] = kwargs.get("incumbents", None) + # Update EHVI # Prediction all population_configs = incumbents population_X = np.array([config.get_array() for config in population_configs]) population_costs, _ = self.model.predict_marginalized(population_X) - - # Compute HV - # population_hv = self.get_hypervolume(population_costs) + # Normalize the objectives here to give equal attention to the objectives when computing the HV + population_costs = [normalize_costs(p, self._objective_bounds) for p in population_costs] # BOtorch EHVI implementation - bomodel = _ModelProxy(self.model) - ref_point = pygmo.hypervolume(population_costs).refpoint( - offset=1 - ) # TODO get proper reference points from user/cutoffs + bomodel = _ModelProxy(self.model, self._objective_bounds) + # ref_point = pygmo.hypervolume(population_costs).refpoint( + # offset=1 + # ) # TODO get proper reference points from user/cutoffs + ref_point = [1.1] * len(self._objective_bounds) # ref_point = torch.asarray(ref_point) # TODO partition from all runs instead of only population? # TODO NondominatedPartitioning and ExpectedHypervolumeImprovement seem no too difficult to implement natively @@ -150,37 +255,12 @@ def _compute(self, X: np.ndarray) -> np.ndarray: # # return ehvi.reshape(-1, 1) -class PHVI(AbstractAcquisitionFunction): +class PHVI(AbstractHVI): + def __init__(self): - """Computes for a given x the predicted hypervolume improvement as - acquisition value. - """ super(PHVI, self).__init__() - self._required_updates = ("model",) self.population_hv = None self.population_costs = None - self._reference_point = None - self._objective_bounds = None - - self._runhistory: RunHistory | None = None - self._runhistory_encoder: AbstractRunHistoryEncoder | None = None - - - @property - def runhistory(self) -> RunHistory: - return self._runhistory - - @runhistory.setter - def runhistory(self, runhistory: RunHistory): - self._runhistory = runhistory - - @property - def runhistory_encoder(self) -> AbstractRunHistoryEncoder: - return self._runhistory_encoder - - @runhistory_encoder.setter - def runhistory_encoder(self, runhistory_encoder: AbstractRunHistoryEncoder): - self._runhistory_encoder = runhistory_encoder @property def name(self) -> str: @@ -188,14 +268,7 @@ def name(self) -> str: def _update(self, **kwargs: Any) -> None: super(PHVI, self)._update(**kwargs) - - # TODO abstract this away in a general HVI class incumbents: list[Configuration] = kwargs.get("incumbents", None) - if incumbents is None: - raise ValueError(f"Incumbents are not passed properly.") - if len(incumbents) == 0: - raise ValueError(f"No incumbents here. Did the intensifier properly " - "update the incumbents in the runhistory?") # Update PHVI # Prediction all @@ -203,10 +276,6 @@ def _update(self, **kwargs: Any) -> None: population_X = np.array([config.get_array() for config in population_configs]) population_costs, _ = self.model.predict_marginalized(population_X) - objective_bounds = np.array(self.runhistory.objective_bounds) - self._objective_bounds = self.runhistory_encoder.transform_response_values(objective_bounds) - self._reference_point = [1.1]*len(self._objective_bounds) - # Compute HV population_hv = self.get_hypervolume(population_costs) @@ -231,10 +300,7 @@ def get_hypervolume(self, points: np.ndarray = None, reference_point: list = Non """ # Normalize the objectives here to give equal attention to the objectives when computing the HV points = [normalize_costs(p, self._objective_bounds) for p in points] - hv = pygmo.hypervolume(points) - # if reference_point is None: - # self._reference_point = hv.refpoint(offset=1) return hv.compute(self._reference_point) def _compute(self, X: np.ndarray) -> np.ndarray: @@ -255,24 +321,11 @@ def _compute(self, X: np.ndarray) -> np.ndarray: if len(X.shape) == 1: X = X[:, np.newaxis] - # TODO non-dominated sorting of costs. Compute EHVI only until the EHVI is not expected to improve anymore. - # Option 1: Supplement missing instances of population with acq. function to get predicted performance over - # all instances. Idea is this prevents optimizing for the initial instances which get it stuck in local optima - # Option 2: Only on instances of population - # Option 3: EVHI per instance and aggregate afterwards - mean, var_ = self.model.predict_marginalized(X) #Expected to be not normalized - - + mean, _ = self.model.predict_marginalized(X) #Expected to be not normalized phvi = np.zeros(len(X)) for i, indiv in enumerate(mean): points = list(self.population_costs) + [indiv] hv = self.get_hypervolume(points) phvi[i] = hv - self.population_hv - # if len(X) == 10000: - # for op in ["max", "min", "mean", "median"]: - # val = getattr(np, op)(phvi) - # print(f"{op:6} - {val}") - # time.sleep(1.5) - return phvi.reshape(-1, 1) From a109f48f8e66b2281f65650b65509fa1fc83a4b6 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Tue, 2 May 2023 14:26:51 +0200 Subject: [PATCH 47/74] Decomposed the intensifier decision logic and created mixins to easily play around with different implementations. --- smac/facade/multi_objective_facade.py | 32 +- smac/intensifier/abstract_intensifier.py | 6 +- .../mixins/intermediate_decision.py | 172 ++++++++++ .../intensifier/mixins/intermediate_update.py | 307 +++++++++++++++++- smac/intensifier/mixins/update_incumbent.py | 132 ++++++++ .../multi_objective_intensifier.py | 6 +- smac/utils/pareto_front.py | 2 +- 7 files changed, 617 insertions(+), 40 deletions(-) create mode 100644 smac/intensifier/mixins/intermediate_decision.py create mode 100644 smac/intensifier/mixins/update_incumbent.py diff --git a/smac/facade/multi_objective_facade.py b/smac/facade/multi_objective_facade.py index df267ca1c..5846db079 100644 --- a/smac/facade/multi_objective_facade.py +++ b/smac/facade/multi_objective_facade.py @@ -13,6 +13,7 @@ from smac.multi_objective.aggregation_strategy import NoAggregationStrategy from smac.random_design.probability_design import ProbabilityRandomDesign from smac.runhistory.encoder.encoder import RunHistoryEncoder +from smac.runhistory.encoder.log_encoder import RunHistoryLogEncoder from smac.scenario import Scenario from smac.utils.logging import get_logger from smac.acquisition.maximizer.multi_objective_search import MOLocalAndSortedRandomSearch @@ -81,7 +82,7 @@ def get_model( # type: ignore def get_intensifier( # type: ignore scenario: Scenario, *, - max_config_calls: int = 3, + max_config_calls: int = 2000, max_incumbents: int = 10, ) -> Intensifier: """Returns ``MOIntensifier`` as intensifier. Uses the default configuration for ``race_against``. @@ -89,7 +90,7 @@ def get_intensifier( # type: ignore Parameters ---------- scenario : Scenario - max_config_calls : int, defaults to 3 + max_config_calls : int, defaults to 2000 Maximum number of configuration evaluations. Basically, how many instance-seed keys should be max evaluated for a configuration. max_incumbents : int, defaults to 10 @@ -131,30 +132,6 @@ def get_acquisition_maximizer( # type: ignore return optimizer - @staticmethod - # TODO update intensifier - def get_intensifier( - scenario: Scenario, - *, - max_config_calls: int = 2000, - max_incumbents: int = 10, - ) -> Intensifier: - """Returns ``Intensifier`` as intensifier. Supports budgets. - - Parameters - ---------- - max_config_calls : int, defaults to 3 - Maximum number of configuration evaluations. Basically, how many instance-seed keys should be evaluated at - maximum for a configuration. - max_incumbents : int, defaults to 10 - How many incumbents to keep track of in the case of multi-objective. - """ - return Intensifier( - scenario=scenario, - max_config_calls=max_config_calls, - max_incumbents=max_incumbents, - ) - @staticmethod # TODO update initial design to LHD def get_initial_design( # type: ignore @@ -207,4 +184,5 @@ def get_multi_objective_algorithm( # type: ignore @staticmethod def get_runhistory_encoder(scenario: Scenario) -> RunHistoryEncoder: """Returns the default runhistory encoder with native multi objective support enabled.""" - return RunHistoryEncoder(scenario, native_multi_objective=True) + return RunHistoryEncoder(scenario, native_multi_objective=True, normalize=False) + # return RunHistoryLogEncoder(scenario, native_multi_objective=True, normalize=False) diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index f921cc3dd..fe2a770cd 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -606,11 +606,7 @@ def update_incumbents(self, config: Configuration) -> None: new_incumbent_ids = [rh.get_config_id(c) for c in new_incumbents] # Update trajectory - if previous_incumbents == new_incumbents: - # No changes in the incumbents -> Challenger is rejected, incumbent remains - # if config not in new_incumbents: - # self._add_rejected_config(config) # TODO JG: Here the config was initially removed from the rejected list... - # else: + if previous_incumbents == new_incumbents: # Only happens with incumbent config self._remove_rejected_config(config) return elif len(previous_incumbents) == len(new_incumbents): diff --git a/smac/intensifier/mixins/intermediate_decision.py b/smac/intensifier/mixins/intermediate_decision.py new file mode 100644 index 000000000..4f8884831 --- /dev/null +++ b/smac/intensifier/mixins/intermediate_decision.py @@ -0,0 +1,172 @@ +from __future__ import annotations + +import copy +import itertools +from abc import abstractmethod +from typing import Any, Callable, Iterator +from scipy.stats import binom + +import dataclasses +import json +from collections import defaultdict +from pathlib import Path + +import numpy as np +from ConfigSpace import Configuration + +import smac +from smac.callback import Callback +from smac.constants import MAXINT +from smac.main.config_selector import ConfigSelector +from smac.runhistory import TrialInfo +from smac.runhistory.dataclasses import ( + InstanceSeedBudgetKey, + InstanceSeedKey, + TrajectoryItem, + TrialValue, +) +from smac.runhistory.runhistory import RunHistory +from smac.scenario import Scenario +from smac.utils.configspace import get_config_hash, print_config_changes +from smac.utils.logging import get_logger +from smac.utils.pareto_front import calculate_pareto_front, sort_by_crowding_distance, _get_costs + +__copyright__ = "Copyright 2022, automl.org" +__license__ = "3-clause BSD" + +logger = get_logger(__name__) + + +def _dominates(a, b) -> bool: + # Checks if a dominates b + a = np.array(a) + b = np.array(b) + return np.count_nonzero(a <= b) >= len(a) and np.count_nonzero(a < b) >= 1 + +class NewCostDominatesOldCost(): + + def _check_for_intermediate_comparison(self, config: Configuration) -> bool: + """ + + Parameters + ---------- + config: Configuration + + Returns + ------- + A boolean which decides if the current configuration should be compared against the incumbent. + """ + config_isb_keys = self.get_instance_seed_budget_keys(config) + + if not hasattr(self, "_old_config_cost"): + self._old_config_cost = {} # TODO remove configuration when done + + new_cost = self.runhistory.average_cost(config, config_isb_keys) + if config not in self._old_config_cost: + self._old_config_cost[config] = new_cost + return True + + old_cost = self._old_config_cost[config] + if _dominates(new_cost, old_cost): + self._old_config_cost[config] = new_cost + return True + return False + +class NewCostDominatesOldCostSkipFirst(): + + def _check_for_intermediate_comparison(self, config: Configuration) -> bool: + """ Do the first comparison with the incumbent when the configuration dominates the cost after finishing its first trial + + Parameters + ---------- + config: Configuration + + Returns + ------- + A boolean which decides if the current configuration should be compared against the incumbent. + """ + config_isb_keys = self.get_instance_seed_budget_keys(config) + + if not hasattr(self, "_old_config_cost"): + self._old_config_cost = {} # TODO remove configuration when done + + new_cost = self.runhistory.average_cost(config, config_isb_keys) + if config not in self._old_config_cost: + self._old_config_cost[config] = new_cost + return False + + old_cost = self._old_config_cost[config] + if _dominates(new_cost, old_cost): + self._old_config_cost[config] = new_cost + return True + return False + +class DoublingNComparison(): + + def _check_for_intermediate_comparison(self, config: Configuration) -> bool: + """ + + Parameters + ---------- + config: Configuration + + Returns + ------- + A boolean which decides if the current configuration should be compared against the incumbent. + """ + config_isb_keys = self.get_instance_seed_budget_keys(config) + config_id = self.runhistory.get_config_id(config) + config_hash = get_config_hash(config) + + # Do not compare very early in the process + # if len(config_isb_keys) < 4: + # return False + + # Find N in _queue + N = None + for c, cn in self._queue: + if config == c: + N = cn + break + + if N is None: + logger.debug( + f"This should not happen, but config {config_hash} is not in the queue.") + return False + + return len(config_isb_keys) == N + +class DoublingNComparisonFour(): + + def _check_for_intermediate_comparison(self, config: Configuration) -> bool: + """ + + Parameters + ---------- + config: Configuration + + Returns + ------- + A boolean which decides if the current configuration should be compared against the incumbent. + """ + config_isb_keys = self.get_instance_seed_budget_keys(config) + config_id = self.runhistory.get_config_id(config) + config_hash = get_config_hash(config) + + # Do not compare very early in the process + if len(config_isb_keys) < 4: + return False + + # Find N in _queue + N = None + for c, cn in self._queue: + if config == c: + N = cn + break + + if N is None: + logger.debug( + f"This should not happen, but config {config_hash} is not in the queue.") + return False + + return len(config_isb_keys) == N diff --git a/smac/intensifier/mixins/intermediate_update.py b/smac/intensifier/mixins/intermediate_update.py index 6ec504c8f..b444e8c77 100644 --- a/smac/intensifier/mixins/intermediate_update.py +++ b/smac/intensifier/mixins/intermediate_update.py @@ -29,13 +29,14 @@ from smac.scenario import Scenario from smac.utils.configspace import get_config_hash, print_config_changes from smac.utils.logging import get_logger -from smac.utils.pareto_front import calculate_pareto_front, sort_by_crowding_distance +from smac.utils.pareto_front import calculate_pareto_front, sort_by_crowding_distance, _get_costs __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" logger = get_logger(__name__) + class DebugComparison(object): def _register_comparison(self, **kwargs): @@ -43,6 +44,17 @@ def _register_comparison(self, **kwargs): self._intermediate_comparisons_log = [] self._intermediate_comparisons_log.append(kwargs) + def _get_costs_comp(self, config: Configuration) -> dict: + incumbents = self.get_incumbents() + if config not in incumbents: + incumbents.append(config) + config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + all_incumbent_isb_keys = [config_isb_keys for _ in incumbents] + costs = _get_costs(self.runhistory, incumbents, all_incumbent_isb_keys) + + return {conf: cost for conf, cost in zip(incumbents, costs)} + + class FullIncumbentComparison(DebugComparison): def _intermediate_comparison(self, config: Configuration) -> bool: @@ -92,11 +104,13 @@ def _intermediate_comparison(self, config: Configuration) -> bool: self._register_comparison(config=config, incumbent=self.get_incumbents(), isb_keys=len(config_isb_keys), + costs=self._get_costs_comp(config), prediction=verdict, name="FullInc") return config in new_incumbents + class SingleIncumbentComparison(DebugComparison): def _intermediate_comparison(self, config: Configuration) -> bool: """Compares the configuration against the incumbent @@ -144,11 +158,72 @@ def _intermediate_comparison(self, config: Configuration) -> bool: self._register_comparison(config=config, incumbent=self.get_incumbents(), isb_keys=len(config_isb_keys), + costs=self._get_costs_comp(config), prediction=verdict, name="SingleInc") return config in new_incumbents + +class ClosestIncumbentComparison(DebugComparison): + def _intermediate_comparison(self, config: Configuration) -> bool: + """Compares the configuration against the incumbent + + Parameters + ---------- + config: Configuration + + Returns + ------- + A boolean which indicates if we should continue with this configuration. + """ + config_hash = get_config_hash(config) + incumbents = self.get_incumbents() + config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( + compare=True) + + logger.debug( + f"Perform intermediate comparions of config {config_hash} with incumbents to see if it is worse" + ) + + # Check if the incumbents ran on all the ones of this config + if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): + logger.debug( + "Config ran on other isb_keys than the incumbents. Should not happen.") + return True + + # Ensure that the config is not part of the incumbent + if config in incumbents: + return True + + # Only compare domination between one incumbent (as relaxation measure) + #iid = self._rng.choice(len(incumbents)) + #TODO Normalize to determine closests? + inc_costs = _get_costs(self.runhistory, incumbents, [config_isb_keys for _ in incumbents], normalize=True) + conf_cost = _get_costs(self.runhistory, [config], [config_isb_keys], normalize=True)[0] + distances = [np.linalg.norm(inc_cost - conf_cost) for inc_cost in inc_costs] + iid = np.argmin(distances) + incumbents = [incumbents[iid], config] + + # Only the trials of the challenger + all_incumbent_isb_keys = [config_isb_keys for _ in incumbents] + + new_incumbents = self._calculate_pareto_front(self.runhistory, + incumbents, + all_incumbent_isb_keys) + + verdict = config in new_incumbents + self._register_comparison(config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + costs=self._get_costs_comp(config), + prediction=verdict, + name="ClosestInc") + + return config in new_incumbents + + class RandomComparison(DebugComparison): def _intermediate_comparison(self, config: Configuration) -> bool: """Compares the configuration against the incumbent @@ -161,19 +236,36 @@ def _intermediate_comparison(self, config: Configuration) -> bool: ------- A boolean which indicates if we should continue with this configuration. """ + config_hash = get_config_hash(config) + incumbents = self.get_incumbents() + config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( + compare=True) + + # Check if the incumbents ran on all the ones of this config + if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): + logger.debug( + "Config ran on other isb_keys than the incumbents. Should not happen.") + return True + + # Ensure that the config is not part of the incumbent + if config in incumbents: + return True + config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) verdict = self._rng.random() >= 0.5 self._register_comparison(config=config, incumbent=self.get_incumbents(), isb_keys=len(config_isb_keys), + costs=self._get_costs_comp(config), prediction=verdict, name="Random") return verdict -class BootstrapComparison(DebugComparison): +class NoComparison(DebugComparison): def _intermediate_comparison(self, config: Configuration) -> bool: - """Compares the configuration by generating bootstraps + """Compares the configuration against the incumbent Parameters ---------- @@ -183,10 +275,62 @@ def _intermediate_comparison(self, config: Configuration) -> bool: ------- A boolean which indicates if we should continue with this configuration. """ + config_hash = get_config_hash(config) + incumbents = self.get_incumbents() + config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( + compare=True) + + # Check if the incumbents ran on all the ones of this config + if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): + logger.debug( + "Config ran on other isb_keys than the incumbents. Should not happen.") + return True + # Ensure that the config is not part of the incumbent + if config in incumbents: + return True config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + verdict = True + self._register_comparison(config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + costs=self.get_costs(config), + prediction=verdict, + name="NoComp") + return verdict + + +class BootstrapComparison(DebugComparison): + + def _intermediate_comparison(self, config: Configuration) -> bool: + """Compares the configuration by generating bootstraps + + Parameters + ---------- + config: Configuration + + Returns + ------- + A boolean which indicates if we should continue with this configuration. + """ + config_hash = get_config_hash(config) incumbents = self.get_incumbents() + config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( + compare=True) + + # Check if the incumbents ran on all the ones of this config + if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): + logger.debug( + "Config ran on other isb_keys than the incumbents. Should not happen.") + return True + + # Ensure that the config is not part of the incumbent + if config in incumbents: + return True + if config not in incumbents: incumbents.append(config) @@ -217,12 +361,153 @@ def _intermediate_comparison(self, config: Configuration) -> bool: self._register_comparison(config=config, incumbent=self.get_incumbents(), isb_keys=len(config_isb_keys), + costs=self._get_costs_comp(config), prediction=verdict, name="Bootstrap", probability=np.count_nonzero(verdicts)/n_samples, n_samples=n_samples) return verdict + +class BootstrapSingleComparison(DebugComparison): + + def _intermediate_comparison(self, config: Configuration) -> bool: + """Compares the configuration by generating bootstraps + + Parameters + ---------- + config: Configuration + + Returns + ------- + A boolean which indicates if we should continue with this configuration. + """ + + config_hash = get_config_hash(config) + incumbents = self.get_incumbents() + config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( + compare=True) + + # Check if the incumbents ran on all the ones of this config + if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): + logger.debug( + "Config ran on other isb_keys than the incumbents. Should not happen.") + return True + + # Ensure that the config is not part of the incumbent + if config in incumbents: + return True + + iid = self._rng.choice(len(incumbents)) + incumbents = [incumbents[iid], config] + + n_samples = 1000 + if len(config_isb_keys) < 7: # When there are only a limited number of trials available we run all combinations + samples = list(itertools.combinations_with_replacement(list(range(len(config_isb_keys))), r=len(config_isb_keys))) + n_samples = len(samples) + else: + samples = np.random.choice(len(config_isb_keys), + (n_samples, len(config_isb_keys)), + replace=True) + + verdicts = np.zeros(n_samples, dtype=bool) + + + for sid, sample in enumerate(samples): + sample_isb_keys = [config_isb_keys[i] for i in sample] + all_incumbent_isb_keys = [sample_isb_keys]*len(incumbents) + new_incumbents = self._calculate_pareto_front(self.runhistory, + incumbents, + all_incumbent_isb_keys) + + verdicts[sid] = config in new_incumbents + + verdict = np.count_nonzero(verdicts) >= 0.5 * n_samples # The config is in more than 50% of the times non-dominated + #P = np.count_nonzero(verdicts)/n_samples + #print(f"P = {np.count_nonzero(verdicts)}/{n_samples}={P:.2f}") + self._register_comparison(config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + costs=self._get_costs_comp(config), + prediction=verdict, + name="BootstrapSingle", + probability=np.count_nonzero(verdicts)/n_samples, + n_samples=n_samples) + return verdict + + +class BootstrapClosestComparison(DebugComparison): + + def _intermediate_comparison(self, config: Configuration) -> bool: + """Compares the configuration by generating bootstraps + + Parameters + ---------- + config: Configuration + + Returns + ------- + A boolean which indicates if we should continue with this configuration. + """ + + config_hash = get_config_hash(config) + incumbents = self.get_incumbents() + config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( + compare=True) + + # Check if the incumbents ran on all the ones of this config + if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): + logger.debug( + "Config ran on other isb_keys than the incumbents. Should not happen.") + return True + + # Ensure that the config is not part of the incumbent + if config in incumbents: + return True + + inc_costs = _get_costs(self.runhistory, incumbents, [config_isb_keys for _ in incumbents], normalize=True) + conf_cost = _get_costs(self.runhistory, [config], [config_isb_keys], normalize=True)[0] + distances = [np.linalg.norm(inc_cost - conf_cost) for inc_cost in inc_costs] + iid = np.argmin(distances) + incumbents = [incumbents[iid], config] + + n_samples = 1000 + if len(config_isb_keys) < 7: # When there are only a limited number of trials available we run all combinations + samples = list(itertools.combinations_with_replacement(list(range(len(config_isb_keys))), r=len(config_isb_keys))) + n_samples = len(samples) + else: + samples = np.random.choice(len(config_isb_keys), + (n_samples, len(config_isb_keys)), + replace=True) + + verdicts = np.zeros(n_samples, dtype=bool) + + + for sid, sample in enumerate(samples): + sample_isb_keys = [config_isb_keys[i] for i in sample] + all_incumbent_isb_keys = [sample_isb_keys]*len(incumbents) + new_incumbents = self._calculate_pareto_front(self.runhistory, + incumbents, + all_incumbent_isb_keys) + + verdicts[sid] = config in new_incumbents + + verdict = np.count_nonzero(verdicts) >= 0.5 * n_samples # The config is in more than 50% of the times non-dominated + #P = np.count_nonzero(verdicts)/n_samples + #print(f"P = {np.count_nonzero(verdicts)}/{n_samples}={P:.2f}") + self._register_comparison(config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + costs=self._get_costs_comp(config), + prediction=verdict, + name="BootstrapClosest", + probability=np.count_nonzero(verdicts)/n_samples, + n_samples=n_samples) + return verdict + + class SRaceComparison(DebugComparison): def _intermediate_comparison(self, config: Configuration) -> bool: """Compares the configuration by generating bootstraps @@ -253,8 +538,21 @@ def dominates(a, b): b = np.array(b) return 1 if np.count_nonzero(a <= b) >= len(a) and np.count_nonzero(a < b) >= 1 else 0 - config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + config_hash = get_config_hash(config) incumbents = self.get_incumbents() + config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( + compare=True) + + # Check if the incumbents ran on all the ones of this config + if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): + logger.debug( + "Config ran on other isb_keys than the incumbents. Should not happen.") + return True + + # Ensure that the config is not part of the incumbent + if config in incumbents: + return True p_values = [] chall_perf = self.runhistory._cost(config, config_isb_keys) @@ -284,6 +582,7 @@ def dominates(a, b): self._register_comparison(config=config, incumbent=self.get_incumbents(), isb_keys=len(config_isb_keys), + costs={conf: cost for conf, cost in zip(incumbents, costs)}, prediction=verdict, name="S-Race") return verdict \ No newline at end of file diff --git a/smac/intensifier/mixins/update_incumbent.py b/smac/intensifier/mixins/update_incumbent.py new file mode 100644 index 000000000..8ddb5d8e2 --- /dev/null +++ b/smac/intensifier/mixins/update_incumbent.py @@ -0,0 +1,132 @@ +from __future__ import annotations + +import copy +import itertools +from abc import abstractmethod +from typing import Any, Callable, Iterator +from scipy.stats import binom + +import dataclasses +import json +from collections import defaultdict +from pathlib import Path + +import numpy as np +from ConfigSpace import Configuration + +import smac +from smac.callback import Callback +from smac.constants import MAXINT +from smac.main.config_selector import ConfigSelector +from smac.runhistory import TrialInfo +from smac.runhistory.dataclasses import ( + InstanceSeedBudgetKey, + InstanceSeedKey, + TrajectoryItem, + TrialValue, +) +from smac.runhistory.runhistory import RunHistory +from smac.scenario import Scenario +from smac.utils.configspace import get_config_hash, print_config_changes +from smac.utils.logging import get_logger +from smac.utils.pareto_front import calculate_pareto_front, sort_by_crowding_distance, _get_costs + +__copyright__ = "Copyright 2022, automl.org" +__license__ = "3-clause BSD" + +logger = get_logger(__name__) + + +class DebugUpdate(object): + def _register_incumbent_update(self, **kwargs): + if not hasattr(self, "_update_incumbent_log"): + self._update_incumbent_log = [] + self._update_incumbent_log.append(kwargs) + +class NonDominatedUpdate(DebugUpdate): + + def _update_incumbent(self, config: Configuration) -> list[Configuration]: + """Updates the incumbent with the config (which can be the challenger) + + Parameters + ---------- + config: Configuration + + Returns + ------- + """ + rh = self.runhistory + + incumbents = self.get_incumbents() + + if config not in incumbents: + incumbents.append(config) + + isb_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) + all_incumbent_isb_keys = [isb_keys for _ in range(len(incumbents))] + + # We compare the incumbents now and only return the ones on the Pareto front + # _calculate_pareto_front returns only non-dominated points + new_incumbents = self._calculate_pareto_front(rh, incumbents, + all_incumbent_isb_keys) + + self._register_incumbent_update(config=config, + incumbent=self.get_incumbents(), + isb_keys=isb_keys, + new_incumbents=new_incumbents, + name="NonDominated",) + + return new_incumbents +class BootstrapUpdate(DebugUpdate): + + def _update_incumbent(self, config: Configuration) -> list[Configuration]: + """Updates the incumbent with the config (which can be the challenger) + + Parameters + ---------- + config: Configuration + + Returns + ------- + """ + rh = self.runhistory + + incumbents = self.get_incumbents() + + if config not in incumbents: + incumbents.append(config) + + isb_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) + + n_samples = 1000 + if len(isb_keys) < 7: # When there are only a limited number of trials available we run all combinations + samples = list(itertools.combinations_with_replacement(list(range(len(isb_keys))), r=len(isb_keys))) + n_samples = len(samples) + else: + samples = np.random.choice(len(isb_keys), (n_samples, len(isb_keys)), replace=True) + + verdicts = np.zeros((n_samples, len(incumbents)), dtype=bool) + + for sid, sample in enumerate(samples): + sample_isb_keys = [isb_keys[i] for i in sample] + all_incumbent_isb_keys = [sample_isb_keys] * len(incumbents) + new_incumbents = self._calculate_pareto_front(self.runhistory, + incumbents, + all_incumbent_isb_keys) + + verdicts[sid, :] = [incumbents[i] in new_incumbents for i in range(len(incumbents))] + + probabilities = np.count_nonzero(verdicts, axis=0) / n_samples + + new_incumbent_ids = np.argwhere(probabilities >= 0.5).flatten() # Incumbent needs to be non-dominated at least 50% of the time + new_incumbents = [incumbents[i] for i in new_incumbent_ids] + + self._register_incumbent_update(config=config, + incumbent=self.get_incumbents(), + isb_keys=isb_keys, + new_incumbents=new_incumbents, + name="Bootstrap", + probabilities=probabilities, + n_samples=n_samples,) + + return new_incumbents \ No newline at end of file diff --git a/smac/intensifier/multi_objective_intensifier.py b/smac/intensifier/multi_objective_intensifier.py index c6278ecf4..879cb093a 100644 --- a/smac/intensifier/multi_objective_intensifier.py +++ b/smac/intensifier/multi_objective_intensifier.py @@ -57,9 +57,9 @@ def _calculate_pareto_front( config_instance_seed_budget_keys=config_instance_seed_budget_keys, ) - def _remove_incumbent(self, config: Configuration, previous_incumbent_ids: list[int], new_incumbent_ids: list[int]) -> None: - # TODO adjust - raise NotImplementedError + # def _remove_incumbent(self, config: Configuration, previous_incumbent_ids: list[int], new_incumbent_ids: list[int]) -> None: + # # TODO adjust + # raise NotImplementedError def _cut_incumbents(self, incumbent_ids: list[int], all_incumbent_isb_keys: list[list[InstanceSeedBudgetKey]]) -> list[int]: #TODO JG sort by hypervolume diff --git a/smac/utils/pareto_front.py b/smac/utils/pareto_front.py index 79f4676b5..42280d5f8 100644 --- a/smac/utils/pareto_front.py +++ b/smac/utils/pareto_front.py @@ -108,7 +108,7 @@ def sort_by_crowding_distance( sorted_list : list[Configuration] Configurations sorted by crowding distance. """ - F = _get_costs(runhistory, configs, config_instance_seed_budget_keys) + F = _get_costs(runhistory, configs, config_instance_seed_budget_keys, normalize=True) infinity = 1e14 n_points = F.shape[0] From 17ce0a386f908fa2133ccec8c4cd806ef8bf7cdd Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Wed, 3 May 2023 08:34:02 +0200 Subject: [PATCH 48/74] Changed the intensifier --- smac/facade/algorithm_configuration_facade.py | 8 +++- smac/facade/multi_objective_facade.py | 8 +++- .../mixins/intermediate_decision.py | 43 +++++-------------- .../intensifier/mixins/intermediate_update.py | 3 +- .../multi_objective_intensifier.py | 1 - smac/runner/target_function_script_runner.py | 2 +- 6 files changed, 27 insertions(+), 38 deletions(-) diff --git a/smac/facade/algorithm_configuration_facade.py b/smac/facade/algorithm_configuration_facade.py index a82e2f92c..1756ffe69 100644 --- a/smac/facade/algorithm_configuration_facade.py +++ b/smac/facade/algorithm_configuration_facade.py @@ -15,6 +15,7 @@ from smac.runhistory.encoder.encoder import RunHistoryEncoder from smac.scenario import Scenario from smac.utils.logging import get_logger +from smac.intensifier.mixins import intermediate_update, intermediate_decision __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" @@ -115,7 +116,12 @@ def get_intensifier( max_incumbents : int, defaults to 10 How many incumbents to keep track of in the case of multi-objective. """ - return Intensifier( + class NewIntensifier(intermediate_decision.NewCostDominatesOldCost, + intermediate_update.ClosestIncumbentComparison, + Intensifier): + pass + + return NewIntensifier( scenario=scenario, max_config_calls=max_config_calls, max_incumbents=max_incumbents, diff --git a/smac/facade/multi_objective_facade.py b/smac/facade/multi_objective_facade.py index 5846db079..b7962a000 100644 --- a/smac/facade/multi_objective_facade.py +++ b/smac/facade/multi_objective_facade.py @@ -8,6 +8,7 @@ from smac.initial_design.default_design import DefaultInitialDesign from smac.intensifier.intensifier import Intensifier from smac.intensifier.multi_objective_intensifier import MOIntensifier +from smac.intensifier.mixins import intermediate_update, intermediate_decision, update_incumbent from smac.model.random_forest.random_forest import RandomForest from smac.model.multi_objective_model import MultiObjectiveModel from smac.multi_objective.aggregation_strategy import NoAggregationStrategy @@ -96,7 +97,12 @@ def get_intensifier( # type: ignore max_incumbents : int, defaults to 10 How many incumbents to keep track of in the case of multi-objective. """ - return MOIntensifier( + class NewIntensifier(intermediate_decision.NewCostDominatesOldCost, + intermediate_update.ClosestIncumbentComparison, + MOIntensifier): + pass + + return NewIntensifier( scenario=scenario, max_config_calls=max_config_calls, max_incumbents=max_incumbents, diff --git a/smac/intensifier/mixins/intermediate_decision.py b/smac/intensifier/mixins/intermediate_decision.py index 4f8884831..48943919c 100644 --- a/smac/intensifier/mixins/intermediate_decision.py +++ b/smac/intensifier/mixins/intermediate_decision.py @@ -118,23 +118,12 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: config_id = self.runhistory.get_config_id(config) config_hash = get_config_hash(config) - # Do not compare very early in the process - # if len(config_isb_keys) < 4: - # return False - - # Find N in _queue - N = None - for c, cn in self._queue: - if config == c: - N = cn - break - - if N is None: - logger.debug( - f"This should not happen, but config {config_hash} is not in the queue.") - return False + max_trigger_number = int(np.ceil(np.log2(self._max_config_calls))) + trigger_points = [(2**n) - 1 for n in range(1, max_trigger_number + 1)] # 1, 3, 7, 15, ... + logger.debug(f"{trigger_points=}") + logger.debug(f"{len(config_isb_keys)=}") + return len(config_isb_keys) in trigger_points - return len(config_isb_keys) == N class DoublingNComparisonFour(): @@ -153,20 +142,8 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: config_id = self.runhistory.get_config_id(config) config_hash = get_config_hash(config) - # Do not compare very early in the process - if len(config_isb_keys) < 4: - return False - - # Find N in _queue - N = None - for c, cn in self._queue: - if config == c: - N = cn - break - - if N is None: - logger.debug( - f"This should not happen, but config {config_hash} is not in the queue.") - return False - - return len(config_isb_keys) == N + max_trigger_number = int(np.ceil(np.log2(self._max_config_calls))) + trigger_points = [(2 ** n) - 1 for n in range(2, max_trigger_number + 1)] # 1, 3, 7, 15, ... + logger.debug(f"{trigger_points=}") + logger.debug(f"{len(config_isb_keys)=}") + return len(config_isb_keys) in trigger_points diff --git a/smac/intensifier/mixins/intermediate_update.py b/smac/intensifier/mixins/intermediate_update.py index b444e8c77..d31661394 100644 --- a/smac/intensifier/mixins/intermediate_update.py +++ b/smac/intensifier/mixins/intermediate_update.py @@ -40,6 +40,7 @@ class DebugComparison(object): def _register_comparison(self, **kwargs): + logger.debug(f"Made intermediate comparison with {kwargs['name']} comparison ") if not hasattr(self, "_intermediate_comparisons_log"): self._intermediate_comparisons_log = [] self._intermediate_comparisons_log.append(kwargs) @@ -296,7 +297,7 @@ def _intermediate_comparison(self, config: Configuration) -> bool: self._register_comparison(config=config, incumbent=self.get_incumbents(), isb_keys=len(config_isb_keys), - costs=self.get_costs(config), + costs=self._get_costs_comp(config), prediction=verdict, name="NoComp") return verdict diff --git a/smac/intensifier/multi_objective_intensifier.py b/smac/intensifier/multi_objective_intensifier.py index 879cb093a..8ecf7ea53 100644 --- a/smac/intensifier/multi_objective_intensifier.py +++ b/smac/intensifier/multi_objective_intensifier.py @@ -50,7 +50,6 @@ def _calculate_pareto_front( configs: list[Configuration], config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], ) -> list[Configuration]: - # TODO use fast non dominance sorting return calculate_pareto_front( runhistory=runhistory, configs=configs, diff --git a/smac/runner/target_function_script_runner.py b/smac/runner/target_function_script_runner.py index f01970a98..cf3f49fce 100644 --- a/smac/runner/target_function_script_runner.py +++ b/smac/runner/target_function_script_runner.py @@ -183,7 +183,7 @@ def run( if "additional_info" in outputs: additional_info["additional_info"] = outputs["additional_info"] - if status != StatusType.SUCCESS: + if not status in [StatusType.SUCCESS, StatusType.TIMEOUT]: additional_info["error"] = error if cost != self._crash_cost: From fd317b011e0a222c8db8756623c7f1e474772d76 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Wed, 3 May 2023 14:35:25 +0200 Subject: [PATCH 49/74] Commit everythin --- examples/2_multi_fidelity/1_mlp_epochs.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/examples/2_multi_fidelity/1_mlp_epochs.py b/examples/2_multi_fidelity/1_mlp_epochs.py index 9fd256c5d..0feff49c9 100644 --- a/examples/2_multi_fidelity/1_mlp_epochs.py +++ b/examples/2_multi_fidelity/1_mlp_epochs.py @@ -80,7 +80,7 @@ def configspace(self) -> ConfigurationSpace: return cs - def train(self, config: Configuration, seed: int = 0, budget: int = 25) -> float: + def train(self, config: Configuration, seed: int = 0, instance: str = "0", budget: int = 25) -> dict[str, float]: # For deactivated parameters (by virtue of the conditions), # the configuration stores None-values. # This is not accepted by the MLP, so we replace them with placeholder values. @@ -106,7 +106,7 @@ def train(self, config: Configuration, seed: int = 0, budget: int = 25) -> float cv = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True) # to make CV splits consistent score = cross_val_score(classifier, dataset.data, dataset.target, cv=cv, error_score="raise") - return 1 - np.mean(score) + return {"accuracy": 1 - np.mean(score)} def plot_trajectory(facades: list[AbstractFacade]) -> None: @@ -147,9 +147,11 @@ def plot_trajectory(facades: list[AbstractFacade]) -> None: mlp.configspace, walltime_limit=60, # After 60 seconds, we stop the hyperparameter optimization n_trials=500, # Evaluate max 500 different trials - min_budget=1, # Train the MLP using a hyperparameter configuration for at least 5 epochs - max_budget=25, # Train the MLP using a hyperparameter configuration for at most 25 epochs - n_workers=8, + instances=[str(i) for i in range(10)], + objectives="accuracy", + # min_budget=1, # Train the MLP using a hyperparameter configuration for at least 5 epochs + # max_budget=25, # Train the MLP using a hyperparameter configuration for at most 25 epochs + n_workers=4, ) # We want to run five random configurations before starting the optimization. From b50db2b4336297f9b4183df32fa886d763283900 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Thu, 4 May 2023 19:29:46 +0200 Subject: [PATCH 50/74] csvs --- smac/intensifier/mixins/intermediate_decision.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/smac/intensifier/mixins/intermediate_decision.py b/smac/intensifier/mixins/intermediate_decision.py index 48943919c..7285bcd1b 100644 --- a/smac/intensifier/mixins/intermediate_decision.py +++ b/smac/intensifier/mixins/intermediate_decision.py @@ -118,11 +118,14 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: config_id = self.runhistory.get_config_id(config) config_hash = get_config_hash(config) - max_trigger_number = int(np.ceil(np.log2(self._max_config_calls))) - trigger_points = [(2**n) - 1 for n in range(1, max_trigger_number + 1)] # 1, 3, 7, 15, ... - logger.debug(f"{trigger_points=}") - logger.debug(f"{len(config_isb_keys)=}") - return len(config_isb_keys) in trigger_points + # max_trigger_number = int(np.ceil(np.log2(self._max_config_calls))) + # trigger_points = [(2**n) - 1 for n in range(1, max_trigger_number + 1)] # 1, 3, 7, 15, ... + # logger.debug(f"{trigger_points=}") + # logger.debug(f"{len(config_isb_keys)=}") + # return len(config_isb_keys) in trigger_points + + nkeys = len(config_isb_keys) + return (nkeys+1) & nkeys == 0 # checks if nkeys+1 is a power of 2 (complies with the sequence (2**n)-1) class DoublingNComparisonFour(): From 69d466b9f10c7a9af29eb1f6f1bd53c9d86727bf Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Wed, 15 Nov 2023 16:07:44 +0100 Subject: [PATCH 51/74] README change --- README.md | 118 +++++++++--------------------------------------------- 1 file changed, 18 insertions(+), 100 deletions(-) diff --git a/README.md b/README.md index 03999059a..75a5673bb 100644 --- a/README.md +++ b/README.md @@ -1,44 +1,11 @@ -# SMAC3: A Versatile Bayesian Optimization Package for Hyperparameter Optimization - - -[![Tests](https://github.com/automl/SMAC3/actions/workflows/pytest.yml/badge.svg?branch=main)](https://github.com/automl/SMAC3/actions/workflows/pytest.yml) -[![Documentation](https://github.com/automl/SMAC3/actions/workflows/docs.yml/badge.svg?branch=main)](https://github.com/automl/SMAC3/actions/workflows/docs.yml) -[![codecov -Status](https://codecov.io/gh/automl/SMAC3/branch/master/graph/badge.svg)](https://codecov.io/gh/automl/SMAC3) - - - -SMAC offers a robust and flexible framework for Bayesian Optimization to support users in determining well-performing -hyperparameter configurations for their (Machine Learning) algorithms, datasets and applications at hand. The main core -consists of Bayesian Optimization in combination with an aggressive racing mechanism to efficiently decide which of two configurations performs better. - -SMAC3 is written in Python3 and continuously tested with Python 3.8, 3.9, and 3.10. Its Random -Forest is written in C++. In further texts, SMAC is representatively mentioned for SMAC3. - -> [Documentation](https://automl.github.io/SMAC3) - -> [Roadmap](https://github.com/orgs/automl/projects/5/views/2) - - -## Important: Changes in v2.0 - -With the next big major release of SMAC, we drastically boosted the user experience by improving the APIs and how the -pipelining is done (see [changelog](CHANGELOG.md)). All facades/intensifiers support multi-objective, multi-fidelity, -and multi-threading natively now! That includes having an ask-and-tell interface and continuing a run -wherever you left off. pSMAC is removed because when specifying the number of workers, SMAC automatically uses -multi-threading for evaluating trials. When cleaning the code base, however, we removed the command-line -interface (calling a target function from a script is still supported), and runtime optimization. Also, -python 3.7 is not supported anymore. If you depend on those functionalities, please keep using v1.4. - -We are excited to introduce the new major release and look forward to developing new features on the new code base. -We hope you enjoy this new user experience as much as we do. 🚀 +# MO-SMAC +MO-SMAC is implemented directly into SMAC3. This repository is forked from the [SMAC3 repository](https://github.com/automl/SMAC3) and therefore contains references and copyright information to those authors. +These do not align with the authors of MO-SMAC and therefor the anonymity for this repository remains intact. ## Installation -This instruction is for the installation on a Linux system, for Windows and Mac and further information see the [documentation](https://automl.github.io/SMAC3/main/1_installation.html). - -Create a new environment with python 3.10 and make sure swig is installed either on your system or +Create a new environment with Python 3.10 and make sure swig is installed either on your system or inside the environment. We demonstrate the installation via anaconda in the following: Create and activate environment: @@ -52,25 +19,23 @@ Install swig: conda install gxx_linux-64 gcc_linux-64 swig ``` -Install SMAC via PyPI: +Clone this repository and install locally: ``` -pip install smac -``` - -Or alternatively, clone the environment: -``` -git clone https://github.com/automl/SMAC3.git && cd SMAC3 +cd SMAC3 pip install -e .[dev] ``` ## Minimal Example +To use MO-SMAC, there is a multi-objective facade that provides all the functionalities for MO-AAC. The example below shows how this facade can be accessed and used. ```py from ConfigSpace import Configuration, ConfigurationSpace +import time import numpy as np -from smac import HyperparameterOptimizationFacade, Scenario +from smac.facade.multi_objective_facade import MultiObjectiveFacade +from smac import Scenario from sklearn import datasets from sklearn.svm import SVC from sklearn.model_selection import cross_val_score @@ -80,68 +45,21 @@ iris = datasets.load_iris() def train(config: Configuration, seed: int = 0) -> float: classifier = SVC(C=config["C"], random_state=seed) + start_time = time.time() scores = cross_val_score(classifier, iris.data, iris.target, cv=5) - return 1 - np.mean(scores) + run_time = time.time() - start_time + return {"perf": 1 - np.mean(scores), "runtime": run_time} configspace = ConfigurationSpace({"C": (0.100, 1000.0)}) # Scenario object specifying the optimization environment -scenario = Scenario(configspace, deterministic=True, n_trials=200) +scenario = Scenario(configspace, + deterministic=True, + n_trials=200, + objectives=["perf", "runtime"]) # Use SMAC to find the best configuration/hyperparameters -smac = HyperparameterOptimizationFacade(scenario, train) +smac = MultiObjectiveFacade(scenario, train) incumbent = smac.optimize() ``` - -More examples can be found in the [documentation](https://automl.github.io/SMAC3/main/examples/). - -## Visualization via DeepCAVE - -With DeepCAVE ([Repo](https://github.com/automl/DeepCAVE), [Paper](https://arxiv.org/abs/2206.03493)) you can visualize your SMAC runs. It is a visualization and analysis tool for AutoML (especially for the sub-problem -hyperparameter optimization) runs. - -## License - -This program is free software: you can redistribute it and/or modify -it under the terms of the 3-clause BSD license (please see the LICENSE file). - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -You should have received a copy of the 3-clause BSD license -along with this program (see LICENSE file). -If not, see [here](https://opensource.org/licenses/BSD-3-Clause). - -## Contacting us - -If you have trouble using SMAC, a concrete question or found a bug, please create an [issue](https://github.com/automl/SMAC3/issues). This is the easiest way to communicate about these things with us. - -For all other inquiries, please write an email to smac[at]ai[dot]uni[dash]hannover[dot]de. - -## Miscellaneous - -SMAC3 is developed by the [AutoML Groups of the Universities of Hannover and -Freiburg](http://www.automl.org/). - -If you have found a bug, please report to [issues](https://github.com/automl/SMAC3/issues). Moreover, we are -appreciating any kind of help. Find our guidelines for contributing to this package -[here](CONTRIBUTING.md). - -If you use SMAC in one of your research projects, please cite our -[JMLR paper](https://jmlr.org/papers/v23/21-0888.html): -``` -@article{JMLR:v23:21-0888, - author = {Marius Lindauer and Katharina Eggensperger and Matthias Feurer and André Biedenkapp and Difan Deng and Carolin Benjamins and Tim Ruhkopf and René Sass and Frank Hutter}, - title = {SMAC3: A Versatile Bayesian Optimization Package for Hyperparameter Optimization}, - journal = {Journal of Machine Learning Research}, - year = {2022}, - volume = {23}, - number = {54}, - pages = {1--9}, - url = {http://jmlr.org/papers/v23/21-0888.html} -} -``` - -Copyright (C) 2016-2022 [AutoML Group](http://www.automl.org). From fdd33f6cde08c0f0060a9cf65880ad832076d29e Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Wed, 15 Nov 2023 17:58:15 +0100 Subject: [PATCH 52/74] README change --- README.md | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 75a5673bb..6426515c0 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,26 @@ # MO-SMAC MO-SMAC is implemented directly into SMAC3. This repository is forked from the [SMAC3 repository](https://github.com/automl/SMAC3) and therefore contains references and copyright information to those authors. -These do not align with the authors of MO-SMAC and therefor the anonymity for this repository remains intact. +These do not align with the authors of MO-SMAC and, therefore, the anonymity for this repository remains intact. ## Installation Create a new environment with Python 3.10 and make sure swig is installed either on your system or inside the environment. We demonstrate the installation via anaconda in the following: -Create and activate environment: +Create and activate environment after which you install `swig`: ``` conda create -n SMAC python=3.10 conda activate SMAC -``` - -Install swig: -``` conda install gxx_linux-64 gcc_linux-64 swig ``` Clone this repository and install locally: ``` -cd SMAC3 +cd SMAC3K pip install -e .[dev] ``` - +K ## Minimal Example To use MO-SMAC, there is a multi-objective facade that provides all the functionalities for MO-AAC. The example below shows how this facade can be accessed and used. From bf2a2f04bb39414522420f8160284dc581e738d1 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Mon, 3 Mar 2025 09:40:57 +0100 Subject: [PATCH 53/74] Even bigger push --- README.md | 1 - smac/intensifier/abstract_intensifier.py | 1 - smac/intensifier/intensifier.py | 28 +++++++++++++++---- .../mixins/intermediate_decision.py | 12 ++++++++ .../intensifier/mixins/intermediate_update.py | 10 +++---- 5 files changed, 40 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 6426515c0..c71586e13 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,6 @@ Clone this repository and install locally: cd SMAC3K pip install -e .[dev] ``` -K ## Minimal Example To use MO-SMAC, there is a multi-objective facade that provides all the functionalities for MO-AAC. The example below shows how this facade can be accessed and used. diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index fe2a770cd..4a163a03e 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -544,7 +544,6 @@ def _update_incumbent(self, config: Configuration) -> list[Configuration]: isb_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) all_incumbent_isb_keys = [isb_keys for _ in range(len(incumbents))] - # We compare the incumbents now and only return the ones on the Pareto front # _calculate_pareto_front returns only non-dominated points new_incumbents = self._calculate_pareto_front(rh, incumbents, all_incumbent_isb_keys) diff --git a/smac/intensifier/intensifier.py b/smac/intensifier/intensifier.py index 705c4ab82..bfddcd90a 100644 --- a/smac/intensifier/intensifier.py +++ b/smac/intensifier/intensifier.py @@ -1,5 +1,6 @@ from __future__ import annotations +import time from typing import Any, Iterator from ConfigSpace import Configuration @@ -51,10 +52,15 @@ def __init__( max_config_calls: int = 3, max_incumbents: int = 10, retries: int = 16, + min_config_calls: int = 1, seed: int | None = None, ): super().__init__(scenario=scenario, max_config_calls=max_config_calls, max_incumbents=max_incumbents, seed=seed) self._retries = retries + self._min_config_calls = min_config_calls + + if max_config_calls < min_config_calls: + raise ValueError("min_config_calls must be smaller or equal than max_config_calls") def reset(self) -> None: """Resets the internal variables of the intensifier including the queue.""" @@ -108,6 +114,12 @@ def __iter__(self) -> Iterator[TrialInfo]: rh = self.runhistory assert self._max_config_calls is not None + is_keys = self.get_instance_seed_keys_of_interest() + if len(is_keys) < self._min_config_calls: + logger.debug(f"There are less instance, seed pairs of interest than the requested minimum trails per " + f"configuration. Changing min_config_calls from {self._min_config_calls} to {len(is_keys)}") + self._min_config_calls = len(is_keys) + # What if there are already trials in the runhistory? Should we queue them up? # Because they are part of the runhistory, they might be selected as incumbents. However, they are not # intensified because they are not part of the queue. We could add them here to incorporate them in the @@ -119,7 +131,7 @@ def __iter__(self) -> Iterator[TrialInfo]: if len(self._queue) == 0: for config in rh.get_configs(): hash = get_config_hash(config) - self._queue.append((config, 1)) + self._queue.append((config, self._min_config_calls)) logger.info(f"Added config {hash} from runhistory to the intensifier queue.") fails = -1 @@ -139,7 +151,7 @@ def __iter__(self) -> Iterator[TrialInfo]: # Also, incorporate ``get_incumbent_instance_seed_budget_keys`` here because challengers are only allowed to # sample from the incumbent's instances incumbents = self.get_incumbents(sort_by="num_trials") - incumbent_isb_keys = self.get_incumbent_instance_seed_budget_keys() # Intersection + incumbent_isb_keys = self.get_incumbent_instance_seed_budget_keys() # Intersection # Check if configs in queue are still running all_configs_running = True @@ -148,7 +160,7 @@ def __iter__(self) -> Iterator[TrialInfo]: all_configs_running = False break - if len(self._queue) == 0 or all_configs_running: # Incumbents + if len(self._queue) == 0 or all_configs_running: # Incumbents if len(self._queue) == 0: logger.debug("Queue is empty:") else: @@ -205,6 +217,7 @@ def __iter__(self) -> Iterator[TrialInfo]: f"{self._max_config_calls} from incumbent {incumbent_hash}..." ) yield trials[0] + logger.debug(f"--- Finished yielding for config {incumbent_hash}.") # We break here because we only want to intensify one more trial of one incumbent @@ -223,7 +236,7 @@ def __iter__(self) -> Iterator[TrialInfo]: try: config = next(self.config_generator) config_hash = get_config_hash(config) - self._queue.append((config, 1)) + self._queue.append((config, self._min_config_calls)) logger.debug(f"--- Added a new config {config_hash} to the queue.") # If we added a new config, then we did something in this iteration @@ -271,6 +284,10 @@ def __iter__(self) -> Iterator[TrialInfo]: else: logger.debug(f"--- Yielding {len(trials)} trials to evaluate config {config_hash}...") for trial in trials: + # We need to check if the configuration has been rejected! + if config in self.get_rejected_configs(): + logger.debug(f"--- {config_hash} was rejected so we do not run any more trials") + break fails = -1 yield trial @@ -280,9 +297,10 @@ def __iter__(self) -> Iterator[TrialInfo]: self._queue.remove((config, N)) logger.debug(f"--- Removed config {config_hash} with N={N} from queue.") + # Finally, we add the same config to the queue with a higher N # If the config was rejected by the runhistory, then it's been removed in the next iteration - if N < self._max_config_calls: + if N < self._max_config_calls and config not in self.get_rejected_configs(): new_pair = (config, N * 2) if new_pair not in self._queue: logger.debug( diff --git a/smac/intensifier/mixins/intermediate_decision.py b/smac/intensifier/mixins/intermediate_decision.py index 7285bcd1b..bfdb46f67 100644 --- a/smac/intensifier/mixins/intermediate_decision.py +++ b/smac/intensifier/mixins/intermediate_decision.py @@ -128,6 +128,18 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: return (nkeys+1) & nkeys == 0 # checks if nkeys+1 is a power of 2 (complies with the sequence (2**n)-1) +class Always(): + + def _check_for_intermediate_comparison(self, config: Configuration) -> bool: + return True + + +class Never(): + + def _check_for_intermediate_comparison(self, config: Configuration) -> bool: + return False + + class DoublingNComparisonFour(): def _check_for_intermediate_comparison(self, config: Configuration) -> bool: diff --git a/smac/intensifier/mixins/intermediate_update.py b/smac/intensifier/mixins/intermediate_update.py index d31661394..413f83c69 100644 --- a/smac/intensifier/mixins/intermediate_update.py +++ b/smac/intensifier/mixins/intermediate_update.py @@ -185,19 +185,19 @@ def _intermediate_comparison(self, config: Configuration) -> bool: compare=True) logger.debug( - f"Perform intermediate comparions of config {config_hash} with incumbents to see if it is worse" + f"Perform intermediate comparisons of config {config_hash} with incumbents to see if it is worse" ) + # Ensure that the config is not part of the incumbent + if config in incumbents: + return True + # Check if the incumbents ran on all the ones of this config if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): logger.debug( "Config ran on other isb_keys than the incumbents. Should not happen.") return True - # Ensure that the config is not part of the incumbent - if config in incumbents: - return True - # Only compare domination between one incumbent (as relaxation measure) #iid = self._rng.choice(len(incumbents)) #TODO Normalize to determine closests? From 7d7290dfcc655995a93e38f9df5f31eb793ef6f2 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Thu, 27 Mar 2025 10:21:26 +0100 Subject: [PATCH 54/74] Remove EHVI acquisition function --- .../function/expected_hypervolume.py | 235 +++++++++--------- smac/facade/multi_objective_facade.py | 4 +- 2 files changed, 118 insertions(+), 121 deletions(-) diff --git a/smac/acquisition/function/expected_hypervolume.py b/smac/acquisition/function/expected_hypervolume.py index a023fb9b7..8cdd30eff 100644 --- a/smac/acquisition/function/expected_hypervolume.py +++ b/smac/acquisition/function/expected_hypervolume.py @@ -6,8 +6,6 @@ import pygmo import numpy as np -from torch import Tensor -from abc import ABC from smac.intensifier.abstract_intensifier import AbstractIntensifier from smac.runhistory import TrialInfo, RunHistory @@ -20,57 +18,56 @@ from smac.model.abstract_model import AbstractModel from smac.utils.multi_objective import normalize_costs -import torch -from botorch.acquisition.multi_objective import ExpectedHypervolumeImprovement -from botorch.models.model import Model -from botorch.utils.multi_objective.box_decompositions.non_dominated import ( - NondominatedPartitioning, -) +# import torch +# from botorch.acquisition.multi_objective import ExpectedHypervolumeImprovement +# from botorch.models.model import Model +# from botorch.utils.multi_objective.box_decompositions.non_dominated import ( +# NondominatedPartitioning, +# ) __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" logger = get_logger(__name__) -class _PosteriorProxy(object): - def __init__(self) -> None: - self.mean: Tensor = [] - self.variance: Tensor = [] - - -class _ModelProxy(Model, ABC): - def __init__(self, model: AbstractModel, objective_bounds: list[tuple[float, float]]): - super(_ModelProxy).__init__() - self.model = model - self._objective_bounds = objective_bounds - - def posterior(self, X: Tensor, **kwargs: Any) -> _PosteriorProxy: - """Docstring - X: A `b x q x d`-dim Tensor, where `d` is the dimension of the - feature space, `q` is the number of points considered jointly, - and `b` is the batch dimension. - - - A `Posterior` object, representing a batch of `b` joint distributions - over `q` points and `m` outputs each. - """ - assert X.shape[1] == 1 - X = X.reshape([X.shape[0], -1]).numpy() # 3D -> 2D - - # predict - # start_time = time.time() - # print(f"Start predicting ") - mean, var_ = self.model.predict_marginalized(X) - normalized_mean = np.array([normalize_costs(m, self._objective_bounds) for m in mean]) - scale = normalized_mean / mean - var_ *= scale # Scale variance accordingly - mean = normalized_mean - # print(f"Done in {time.time() - start_time}s") - post = _PosteriorProxy() - post.mean = torch.asarray(mean).reshape(X.shape[0], 1, -1) # 2D -> 3D - post.variance = torch.asarray(var_).reshape(X.shape[0], 1, -1) # 2D -> 3D - - return post +# class _PosteriorProxy(object): +# def __init__(self) -> None: +# self.mean: Tensor = [] +# self.variance: Tensor = [] + +# class _ModelProxy(Model, ABC): +# def __init__(self, model: AbstractModel, objective_bounds: list[tuple[float, float]]): +# super(_ModelProxy).__init__() +# self.model = model +# self._objective_bounds = objective_bounds +# +# def posterior(self, X: Tensor, **kwargs: Any) -> _PosteriorProxy: +# """Docstring +# X: A `b x q x d`-dim Tensor, where `d` is the dimension of the +# feature space, `q` is the number of points considered jointly, +# and `b` is the batch dimension. +# +# +# A `Posterior` object, representing a batch of `b` joint distributions +# over `q` points and `m` outputs each. +# """ +# assert X.shape[1] == 1 +# X = X.reshape([X.shape[0], -1]).numpy() # 3D -> 2D +# +# # predict +# # start_time = time.time() +# # print(f"Start predicting ") +# mean, var_ = self.model.predict_marginalized(X) +# normalized_mean = np.array([normalize_costs(m, self._objective_bounds) for m in mean]) +# scale = normalized_mean / mean +# var_ *= scale # Scale variance accordingly +# mean = normalized_mean +# # print(f"Done in {time.time() - start_time}s") +# post = _PosteriorProxy() +# post.mean = torch.asarray(mean).reshape(X.shape[0], 1, -1) # 2D -> 3D +# post.variance = torch.asarray(var_).reshape(X.shape[0], 1, -1) # 2D -> 3D +# +# return post class AbstractHVI(AbstractAcquisitionFunction): def __init__(self): @@ -182,78 +179,78 @@ def _compute(self, X: np.ndarray) -> np.ndarray: return phvi.reshape(-1, 1) -class EHVI(AbstractHVI): - def __init__(self): - super(EHVI, self).__init__() - self._ehvi: ExpectedHypervolumeImprovement | None = None - - @property - def name(self) -> str: - return "Expected Hypervolume Improvement" - - def _update(self, **kwargs: Any) -> None: - super(EHVI, self)._update(**kwargs) - incumbents: list[Configuration] = kwargs.get("incumbents", None) - - # Update EHVI - # Prediction all - population_configs = incumbents - population_X = np.array([config.get_array() for config in population_configs]) - population_costs, _ = self.model.predict_marginalized(population_X) - # Normalize the objectives here to give equal attention to the objectives when computing the HV - population_costs = [normalize_costs(p, self._objective_bounds) for p in population_costs] - - # BOtorch EHVI implementation - bomodel = _ModelProxy(self.model, self._objective_bounds) - # ref_point = pygmo.hypervolume(population_costs).refpoint( - # offset=1 - # ) # TODO get proper reference points from user/cutoffs - ref_point = [1.1] * len(self._objective_bounds) - # ref_point = torch.asarray(ref_point) - # TODO partition from all runs instead of only population? - # TODO NondominatedPartitioning and ExpectedHypervolumeImprovement seem no too difficult to implement natively - # TODO pass along RNG - # Transfrom the objective space to cells based on the population - partitioning = NondominatedPartitioning(torch.asarray(ref_point), torch.asarray(population_costs)) - self._ehvi = ExpectedHypervolumeImprovement(bomodel, ref_point, partitioning) - - def _compute(self, X: np.ndarray) -> np.ndarray: - """Computes the EHVI values and its derivatives. - - Parameters - ---------- - X: np.ndarray(N, D), The input points where the acquisition function - should be evaluated. The dimensionality of X is (N, D), with N as - the number of points to evaluate at and D is the number of - dimensions of one X. - - Returns - ------- - np.ndarray(N,1) - Expected HV Improvement of X - """ - if self._ehvi is None: - raise ValueError(f"The expected hypervolume improvement is not defined yet. Call self.update.") - - if len(X.shape) == 1: - X = X[:, np.newaxis] - - # m, var_ = self.model.predict_marginalized_over_instances(X) - # Find a way to propagate the variance into the HV - boX = torch.asarray(X).reshape(X.shape[0], 1, -1) # 2D -> #3D - improvements = self._ehvi(boX).numpy().reshape(-1, 1) # TODO here are the expected hv improvements computed. - return improvements - - # TODO non-dominated sorting of costs. Compute EHVI only until the EHVI is not expected to improve anymore. - # Option 1: Supplement missing instances of population with acq. function to get predicted performance over - # all instances. Idea is this prevents optimizing for the initial instances which get it stuck in local optima - # Option 2: Only on instances of population - # Option 3: EVHI per instance and aggregate afterwards - # ehvi = np.zeros(len(X)) - # for i, indiv in enumerate(m): - # ehvi[i] = self.get_hypervolume(population_costs + [indiv]) - population_hv - # - # return ehvi.reshape(-1, 1) +# class EHVI(AbstractHVI): +# def __init__(self): +# super(EHVI, self).__init__() +# self._ehvi: ExpectedHypervolumeImprovement | None = None +# +# @property +# def name(self) -> str: +# return "Expected Hypervolume Improvement" +# +# def _update(self, **kwargs: Any) -> None: +# super(EHVI, self)._update(**kwargs) +# incumbents: list[Configuration] = kwargs.get("incumbents", None) +# +# # Update EHVI +# # Prediction all +# population_configs = incumbents +# population_X = np.array([config.get_array() for config in population_configs]) +# population_costs, _ = self.model.predict_marginalized(population_X) +# # Normalize the objectives here to give equal attention to the objectives when computing the HV +# population_costs = [normalize_costs(p, self._objective_bounds) for p in population_costs] +# +# # BOtorch EHVI implementation +# bomodel = _ModelProxy(self.model, self._objective_bounds) +# # ref_point = pygmo.hypervolume(population_costs).refpoint( +# # offset=1 +# # ) # TODO get proper reference points from user/cutoffs +# ref_point = [1.1] * len(self._objective_bounds) +# # ref_point = torch.asarray(ref_point) +# # TODO partition from all runs instead of only population? +# # TODO NondominatedPartitioning and ExpectedHypervolumeImprovement seem no too difficult to implement natively +# # TODO pass along RNG +# # Transfrom the objective space to cells based on the population +# partitioning = NondominatedPartitioning(torch.asarray(ref_point), torch.asarray(population_costs)) +# self._ehvi = ExpectedHypervolumeImprovement(bomodel, ref_point, partitioning) +# +# def _compute(self, X: np.ndarray) -> np.ndarray: +# """Computes the EHVI values and its derivatives. +# +# Parameters +# ---------- +# X: np.ndarray(N, D), The input points where the acquisition function +# should be evaluated. The dimensionality of X is (N, D), with N as +# the number of points to evaluate at and D is the number of +# dimensions of one X. +# +# Returns +# ------- +# np.ndarray(N,1) +# Expected HV Improvement of X +# """ +# if self._ehvi is None: +# raise ValueError(f"The expected hypervolume improvement is not defined yet. Call self.update.") +# +# if len(X.shape) == 1: +# X = X[:, np.newaxis] +# +# # m, var_ = self.model.predict_marginalized_over_instances(X) +# # Find a way to propagate the variance into the HV +# boX = torch.asarray(X).reshape(X.shape[0], 1, -1) # 2D -> #3D +# improvements = self._ehvi(boX).numpy().reshape(-1, 1) # TODO here are the expected hv improvements computed. +# return improvements +# +# # TODO non-dominated sorting of costs. Compute EHVI only until the EHVI is not expected to improve anymore. +# # Option 1: Supplement missing instances of population with acq. function to get predicted performance over +# # all instances. Idea is this prevents optimizing for the initial instances which get it stuck in local optima +# # Option 2: Only on instances of population +# # Option 3: EVHI per instance and aggregate afterwards +# # ehvi = np.zeros(len(X)) +# # for i, indiv in enumerate(m): +# # ehvi[i] = self.get_hypervolume(population_costs + [indiv]) - population_hv +# # +# # return ehvi.reshape(-1, 1) class PHVI(AbstractHVI): diff --git a/smac/facade/multi_objective_facade.py b/smac/facade/multi_objective_facade.py index b7962a000..ccb842525 100644 --- a/smac/facade/multi_objective_facade.py +++ b/smac/facade/multi_objective_facade.py @@ -3,7 +3,7 @@ from ConfigSpace import Configuration from smac.acquisition.function.expected_improvement import EI -from smac.acquisition.function.expected_hypervolume import EHVI, PHVI +from smac.acquisition.function.expected_hypervolume import PHVI from smac.facade.abstract_facade import AbstractFacade from smac.initial_design.default_design import DefaultInitialDesign from smac.intensifier.intensifier import Intensifier @@ -124,7 +124,7 @@ def get_acquisition_function( # type: ignore Controls the balance between exploration and exploitation of the acquisition function. """ - return EHVI() + return PHVI() @staticmethod def get_acquisition_maximizer( # type: ignore From aec7609b018aad62e999efacfee34dfbf5edbe98 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Thu, 27 Mar 2025 10:40:05 +0100 Subject: [PATCH 55/74] README --- README.md | 102 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 84 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index be806a7b0..d0b96fb6b 100644 --- a/README.md +++ b/README.md @@ -12,12 +12,33 @@ SMAC offers a robust and flexible framework for Bayesian Optimization to support hyperparameter configurations for their (Machine Learning) algorithms, datasets and applications at hand. The main core consists of Bayesian Optimization in combination with an aggressive racing mechanism to efficiently decide which of two configurations performs better. -MO-SMAC is implemented directly into SMAC3. This repository is forked from the [SMAC3 repository](https://github.com/automl/SMAC3) and therefore contains references and copyright information to those authors. -These do not align with the authors of MO-SMAC and, therefore, the anonymity for this repository remains intact. +SMAC3 is written in Python3 and continuously tested with Python 3.8, 3.9, and 3.10 (and works with newer python versions). Its Random +Forest is written in C++. In further texts, SMAC is representatively mentioned for SMAC3. + +> [Documentation](https://automl.github.io/SMAC3/latest/) + +> [Roadmap](https://github.com/orgs/automl/projects/5/views/2) + + +## Important: Changes in v2.0 + +With the next big major release of SMAC, we drastically boosted the user experience by improving the APIs and how the +pipelining is done (see [changelog](CHANGELOG.md)). All facades/intensifiers support multi-objective, multi-fidelity, +and multi-threading natively now! That includes having an ask-and-tell interface and continuing a run +wherever you left off. pSMAC is removed because when specifying the number of workers, SMAC automatically uses +multi-threading for evaluating trials. When cleaning the code base, however, we removed the command-line +interface (calling a target function from a script is still supported), and runtime optimization. Also, +python 3.7 is not supported anymore. If you depend on those functionalities, please keep using v1.4. + +We are excited to introduce the new major release and look forward to developing new features on the new code base. +We hope you enjoy this new user experience as much as we do. 🚀 + ## Installation -Create a new environment with Python 3.10 and make sure swig is installed either on your system or +This instruction is for the installation on a Linux system, for Windows and Mac and further information see the [documentation](https://automl.github.io/SMAC3/latest/1_installation/). + +Create a new environment with python 3.10 and make sure swig is installed either on your system or inside the environment. We demonstrate the installation via anaconda in the following: Create and activate environment: @@ -31,27 +52,25 @@ Install swig: conda install gxx_linux-64 gcc_linux-64 swig ``` -Clone this repository and install locally: +Install SMAC via PyPI: +``` +pip install smac ``` If you want to contribute to SMAC, use the following steps instead: ``` git clone https://github.com/automl/SMAC3.git && cd SMAC3 make install-dev -cd SMAC3K -pip install -e .[dev] ``` + ## Minimal Example -To use MO-SMAC, there is a multi-objective facade that provides all the functionalities for MO-AAC. The example below shows how this facade can be accessed and used. ```py from ConfigSpace import Configuration, ConfigurationSpace -import time import numpy as np -from smac.facade.multi_objective_facade import MultiObjectiveFacade -from smac import Scenario +from smac import HyperparameterOptimizationFacade, Scenario from sklearn import datasets from sklearn.svm import SVC from sklearn.model_selection import cross_val_score @@ -61,21 +80,68 @@ iris = datasets.load_iris() def train(config: Configuration, seed: int = 0) -> float: classifier = SVC(C=config["C"], random_state=seed) - start_time = time.time() scores = cross_val_score(classifier, iris.data, iris.target, cv=5) - run_time = time.time() - start_time - return {"perf": 1 - np.mean(scores), "runtime": run_time} + return 1 - np.mean(scores) configspace = ConfigurationSpace({"C": (0.100, 1000.0)}) # Scenario object specifying the optimization environment -scenario = Scenario(configspace, - deterministic=True, - n_trials=200, - objectives=["perf", "runtime"]) +scenario = Scenario(configspace, deterministic=True, n_trials=200) # Use SMAC to find the best configuration/hyperparameters -smac = MultiObjectiveFacade(scenario, train) +smac = HyperparameterOptimizationFacade(scenario, train) incumbent = smac.optimize() ``` + +More examples can be found in the [documentation](https://automl.github.io/SMAC3/latest/examples/1%20Basics/1_quadratic_function/). + +## Visualization via DeepCAVE + +With DeepCAVE ([Repo](https://github.com/automl/DeepCAVE), [Paper](https://arxiv.org/abs/2206.03493)) you can visualize your SMAC runs. It is a visualization and analysis tool for AutoML (especially for the sub-problem +hyperparameter optimization) runs. + +## License + +This program is free software: you can redistribute it and/or modify +it under the terms of the 3-clause BSD license (please see the LICENSE file). + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +You should have received a copy of the 3-clause BSD license +along with this program (see LICENSE file). +If not, see [here](https://opensource.org/licenses/BSD-3-Clause). + +## Contacting us + +If you have trouble using SMAC, a concrete question or found a bug, please create an [issue](https://github.com/automl/SMAC3/issues). This is the easiest way to communicate about these things with us. + +For all other inquiries, please write an email to smac[at]ai[dot]uni[dash]hannover[dot]de. + +## Miscellaneous + +SMAC3 is developed by the [AutoML Groups of the Universities of Hannover and +Freiburg](http://www.automl.org/). It is a featured optimizer on [AutoML Space](https://automl.space/automl-tools/). + +If you have found a bug, please report to [issues](https://github.com/automl/SMAC3/issues). Moreover, we are +appreciating any kind of help. Find our guidelines for contributing to this package +[here](CONTRIBUTING.md). + +If you use SMAC in one of your research projects, please cite our +[JMLR paper](https://jmlr.org/papers/v23/21-0888.html): +``` +@article{JMLR:v23:21-0888, + author = {Marius Lindauer and Katharina Eggensperger and Matthias Feurer and André Biedenkapp and Difan Deng and Carolin Benjamins and Tim Ruhkopf and René Sass and Frank Hutter}, + title = {SMAC3: A Versatile Bayesian Optimization Package for Hyperparameter Optimization}, + journal = {Journal of Machine Learning Research}, + year = {2022}, + volume = {23}, + number = {54}, + pages = {1--9}, + url = {http://jmlr.org/papers/v23/21-0888.html} +} +``` + +Copyright (c) 2025, [Leibniz University Hannover - Institute of AI](https://www.ai.uni-hannover.de/) \ No newline at end of file From cb9eab68494bf8709bf19aadda845519a762e00c Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Thu, 27 Mar 2025 14:06:09 +0100 Subject: [PATCH 56/74] Fix failing tests. Disentangle normalisation and aggregation --- smac/acquisition/maximizer/local_search.py | 2 +- smac/runhistory/runhistory.py | 10 ++++++---- tests/test_utils/test_pareto_front.py | 6 +++--- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/smac/acquisition/maximizer/local_search.py b/smac/acquisition/maximizer/local_search.py index ddd1bcc64..300a6a599 100644 --- a/smac/acquisition/maximizer/local_search.py +++ b/smac/acquisition/maximizer/local_search.py @@ -118,7 +118,7 @@ def _maximize( # Sort according to acq value configs_acq.sort(reverse=True, key=lambda x: x[0]) for a, inc in configs_acq: - inc.origin = "Local Search" + inc.origin = "Acquisition Function Maximizer: Local Search" return configs_acq diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py index adc44c05e..86e44f4d8 100644 --- a/smac/runhistory/runhistory.py +++ b/smac/runhistory/runhistory.py @@ -463,6 +463,7 @@ def average_cost( config: Configuration, instance_seed_budget_keys: list[InstanceSeedBudgetKey] | None = None, normalize: bool = False, + run_multi_objective_algorithm: bool = False, ) -> float | list[float]: """Return the average cost of a configuration. This is the mean of costs of all instance- seed pairs. @@ -492,11 +493,12 @@ def average_cost( # [[100, 200], [0, 0]] -> [50, 100] averaged_costs = np.mean(costs, axis=0).tolist() - if normalize: - assert self.multi_objective_algorithm is not None - normalized_costs = normalize_costs(averaged_costs, self._objective_bounds) + if normalize: #only normalize also does aggregation. This needs to be disentangled + averaged_costs = normalize_costs(averaged_costs, self._objective_bounds) - return self.multi_objective_algorithm(normalized_costs) + if run_multi_objective_algorithm: + assert self.multi_objective_algorithm is not None + return self.multi_objective_algorithm(averaged_costs) else: return averaged_costs diff --git a/tests/test_utils/test_pareto_front.py b/tests/test_utils/test_pareto_front.py index c1bbf1403..19ee35b29 100644 --- a/tests/test_utils/test_pareto_front.py +++ b/tests/test_utils/test_pareto_front.py @@ -30,15 +30,15 @@ def test_crowding_distance(configspace_small): configs = configspace_small.sample_configuration(20) config_instance_seed_budget_keys = [[isb_key]] * 20 - # Add points on pareto + # Add points on Pareto rh.add(configs[0], cost=[5, 5], instance=isb_key.instance, budget=isb_key.budget, seed=isb_key.seed) rh.add(configs[1], cost=[4, 6], instance=isb_key.instance, budget=isb_key.budget, seed=isb_key.seed) - # Add points not on pareto + # Add points not on Pareto rh.add(configs[2], cost=[5, 6], instance=isb_key.instance, budget=isb_key.budget, seed=isb_key.seed) rh.add(configs[3], cost=[5, 6], instance=isb_key.instance, budget=isb_key.budget, seed=isb_key.seed) - # Calculate pareto front + # Calculate Pareto front incumbents = calculate_pareto_front(rh, configs[:4], config_instance_seed_budget_keys[:4]) sorted_configs = sort_by_crowding_distance(rh, incumbents, config_instance_seed_budget_keys[: len(incumbents)]) # Nothing should happen if we only have two points on the pareto front From 373dc087b9959003b55ba78080b3c401aecf6be7 Mon Sep 17 00:00:00 2001 From: "J. Rook" Date: Thu, 27 Mar 2025 16:09:39 +0100 Subject: [PATCH 57/74] Fix failing pytests --- smac/intensifier/mixins/intermediate_decision.py | 4 ++-- smac/runhistory/runhistory.py | 2 +- smac/utils/pareto_front.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/smac/intensifier/mixins/intermediate_decision.py b/smac/intensifier/mixins/intermediate_decision.py index bfdb46f67..7e90e3713 100644 --- a/smac/intensifier/mixins/intermediate_decision.py +++ b/smac/intensifier/mixins/intermediate_decision.py @@ -39,8 +39,8 @@ def _dominates(a, b) -> bool: # Checks if a dominates b - a = np.array(a) - b = np.array(b) + a = np.atleast_1d(a) + b = np.atleast_1d(b) return np.count_nonzero(a <= b) >= len(a) and np.count_nonzero(a < b) >= 1 class NewCostDominatesOldCost(): diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py index 86e44f4d8..c7b7adf19 100644 --- a/smac/runhistory/runhistory.py +++ b/smac/runhistory/runhistory.py @@ -493,7 +493,7 @@ def average_cost( # [[100, 200], [0, 0]] -> [50, 100] averaged_costs = np.mean(costs, axis=0).tolist() - if normalize: #only normalize also does aggregation. This needs to be disentangled + if normalize: averaged_costs = normalize_costs(averaged_costs, self._objective_bounds) if run_multi_objective_algorithm: diff --git a/smac/utils/pareto_front.py b/smac/utils/pareto_front.py index 42280d5f8..4240c4689 100644 --- a/smac/utils/pareto_front.py +++ b/smac/utils/pareto_front.py @@ -41,7 +41,7 @@ def _get_costs( # configuration # However, we only want to consider the config trials # Average cost is a list of floats (one for each objective) - average_cost = runhistory.average_cost(config, isb_keys, normalize=normalize) + average_cost = runhistory.average_cost(config, isb_keys, normalize=normalize, run_multi_objective_algorithm=normalize) average_costs += [average_cost] # Let's work with a numpy array for efficiency From cc2762d43f27659a97cb1458ebb843dc5e181bd4 Mon Sep 17 00:00:00 2001 From: rookj Date: Tue, 7 Oct 2025 10:48:16 +0200 Subject: [PATCH 58/74] resolving tests --- smac/facade/abstract_facade.py | 13 +++++++------ smac/main/config_selector.py | 8 ++++---- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/smac/facade/abstract_facade.py b/smac/facade/abstract_facade.py index 81502a6b6..39069e671 100644 --- a/smac/facade/abstract_facade.py +++ b/smac/facade/abstract_facade.py @@ -432,12 +432,13 @@ def get_config_selector( retries: int = 16, ) -> ConfigSelector: """Returns the default configuration selector.""" - return ConfigSelector(scenario, - retrain_after=retrain_after, - retrain_wallclock_ratio=retrain_wallclock_ratio, - retries=retries, - max_new_config_tries=retries - ) + return ConfigSelector( + scenario, + retrain_after=retrain_after, + retrain_wallclock_ratio=retrain_wallclock_ratio, + # retries=retries, + max_new_config_tries=retries, + ) def _get_optimizer(self) -> SMBO: """Fills the SMBO with all the pre-initialized components.""" diff --git a/smac/main/config_selector.py b/smac/main/config_selector.py index 9e5eb2d14..10a549a0a 100644 --- a/smac/main/config_selector.py +++ b/smac/main/config_selector.py @@ -1,9 +1,9 @@ from __future__ import annotations -import time from typing import Any, Iterator import copy +import time import numpy as np from ConfigSpace import Configuration @@ -59,7 +59,7 @@ def __init__( *, retrain_after: int | None = 8, retrain_wallclock_ratio: float | None = None, - min_configurations: int = 2, + min_configurations: int = 1, max_new_config_tries: int = 16, min_trials: int = 1, ) -> None: @@ -97,7 +97,7 @@ def __init__( # Processed configurations should be stored here; this is important to not return the same configuration twice self._processed_configs: list[Configuration] = [] - #Check if there is at least one retrain condition + # Check if there is at least one retrain condition if self._retrain_after is None and self._retrain_wallclock_ratio is None: raise ValueError("No retrain condition specified!") @@ -254,7 +254,6 @@ def __iter__(self) -> Iterator[Configuration]: self._acquisition_training_times.append(time.time() - start_time) - failed_counter = 0 for config in challengers: if config not in self._processed_configs: @@ -310,6 +309,7 @@ def _check_for_retrain(self) -> bool: if self._retrain_wallclock_ratio is not None: if self._counter < self._min_configurations: + # Force a minimum number of configurations to be yielded despite the ratio return False # Total elapsed wallcock time From c6c4b8b2080868b7ad11c6eb20dfa7ec5d903467 Mon Sep 17 00:00:00 2001 From: rookj Date: Tue, 7 Oct 2025 15:26:59 +0200 Subject: [PATCH 59/74] intensifier fix for MF. Passes tests --- smac/facade/multi_fidelity_facade.py | 2 +- smac/intensifier/abstract_intensifier.py | 45 ++++++++-------- smac/intensifier/mixins/update_incumbent.py | 58 ++++++++++++--------- smac/intensifier/successive_halving.py | 39 +++++++++++++- smac/utils/pareto_front.py | 44 +++++++++------- 5 files changed, 119 insertions(+), 69 deletions(-) diff --git a/smac/facade/multi_fidelity_facade.py b/smac/facade/multi_fidelity_facade.py index 5e379b657..1621b7a11 100644 --- a/smac/facade/multi_fidelity_facade.py +++ b/smac/facade/multi_fidelity_facade.py @@ -44,7 +44,7 @@ def get_intensifier( # type: ignore * None: No shuffling at all and use the instance-seed order provided by the user. * "shuffle_once": Shuffle the instance-seed keys once and use the same order across all runs. * "shuffle": Shuffles the instance-seed keys for each bracket individually. - incumbent_selection : str, defaults to "any_budget" + incumbent_selection : str, defaults to "highest_observed_budget" How to select the incumbent when using budgets. Can be set to: * "any_budget": Incumbent is the best on any budget, i.e., the best performance regardless of budget. * "highest_observed_budget": Incumbent is the best in the highest budget run so far. diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index 117493d14..57fd15e97 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -1,9 +1,9 @@ from __future__ import annotations -import copy from abc import abstractmethod from typing import Any, Callable, Iterator +import copy import dataclasses import json from collections import defaultdict @@ -404,7 +404,7 @@ def get_instance_seed_budget_keys( return self.runhistory.get_instance_seed_budget_keys(config, highest_observed_budget_only=False) def get_incumbent_instance_seed_budget_keys(self, compare: bool = False) -> list[InstanceSeedBudgetKey]: - """Find the lowest intersection of instance-seed-budget keys for all incumbents.""" + """Find the intersection of instance-seed-budget keys for all incumbents.""" incumbents = self.get_incumbents() if len(incumbents) > 0: @@ -431,9 +431,9 @@ def get_incumbent_instance_seed_budget_key_differences(self, compare: bool = Fal return [] # Compute the actual differences - intersection_isb_keys = set.intersection(*map(set, incumbent_isb_keys)) # type: ignore - union_isb_keys = set.union(*map(set, incumbent_isb_keys)) # type: ignore - incumbent_isb_keys_differences = list(union_isb_keys - intersection_isb_keys) # type: ignore + intersection_isb_keys = set.intersection(*map(set, incumbent_isb_keys)) # type: ignore + union_isb_keys = set.union(*map(set, incumbent_isb_keys)) # type: ignore + incumbent_isb_keys_differences = list(union_isb_keys - intersection_isb_keys) # type: ignore # incumbent_isb_keys = list(set.difference(*map(set, incumbent_isb_keys))) # type: ignore if len(incumbent_isb_keys_differences) == 0: @@ -497,10 +497,7 @@ def _intermediate_comparison(self, config: Configuration) -> bool: config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) - - logger.debug( - f"Perform intermediate comparions of config {config_hash} with incumbents to see if it is worse" - ) + logger.debug(f"Perform intermediate comparisons of config {config_hash} with incumbents to see if it is worse") # TODO perform comparison with incumbent on current instances. # Check if the config with these number of trials is part of the Pareto front @@ -557,7 +554,7 @@ def update_incumbents(self, config: Configuration) -> None: config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) incumbent_isb_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) - #Check if config holds keys + # Check if config holds keys # Note: This is especially the case if trials of a config are still running # because if trials are running, the runhistory does not update the trials in the fast data structure if len(config_isb_keys) == 0: @@ -578,7 +575,7 @@ def update_incumbents(self, config: Configuration) -> None: # Nothing else to do return - #Check if config isb is subset of incumbents + # Check if config isb is subset of incumbents # if not all([isb_key in incumbent_isb_keys for isb_key in config_isb_keys]): # # If the config is part of the incumbents this could happen # logger.info(f"Config {config_hash} did run on more instances than the incumbent. Cannot make a proper comparison.") @@ -590,7 +587,9 @@ def update_incumbents(self, config: Configuration) -> None: # Config did not run on all trials if self._check_for_intermediate_comparison(config): if not self._intermediate_comparison(config): - logger.debug(f"Rejected config {config_hash} in an intermediate comparison on {len(config_isb_keys)} trials.") + logger.debug( + f"Rejected config {config_hash} in an intermediate comparison on {len(config_isb_keys)} trials." + ) self._add_rejected_config(config) return @@ -613,9 +612,9 @@ def update_incumbents(self, config: Configuration) -> None: # In this case, we have to determine which config replaced which incumbent and reject it # We will remove the oldest configuration (the one with the lowest id) because # set orders the ids ascending. - self._remove_incumbent(config=config, - previous_incumbent_ids=previous_incumbent_ids, - new_incumbent_ids=new_incumbent_ids) + self._remove_incumbent( + config=config, previous_incumbent_ids=previous_incumbent_ids, new_incumbent_ids=new_incumbent_ids + ) elif len(previous_incumbents) < len(new_incumbents): # Config becomes a new incumbent; nothing is rejected in this case self._remove_rejected_config(config) @@ -638,11 +637,10 @@ def update_incumbents(self, config: Configuration) -> None: if len(new_incumbents) > self._max_incumbents: all_incumbent_isb_keys = [incumbent_isb_keys for i in range(len(new_incumbents))] new_incumbents = self._cut_incumbents(new_incumbents, all_incumbent_isb_keys) - #TODO JG adjust. Other option: statistical test or HV (SMS-EMOA reduce function) + # TODO JG adjust. Other option: statistical test or HV (SMS-EMOA reduce function) self._update_trajectory(new_incumbents) - # def update_incumbents(self, config: Configuration) -> None: # """Updates the incumbents. This method is called everytime a trial is added to the runhistory. Since only # the affected config and the current incumbents are used, this method is very efficient. Furthermore, a @@ -844,7 +842,9 @@ def update_incumbents(self, config: Configuration) -> None: # # self._update_trajectory(new_incumbents) - def _cut_incumbents(self, incumbent_ids: list[int], all_incumbent_isb_keys: list[list[InstanceSeedBudgetKey]]) -> list[int]: + def _cut_incumbents( + self, incumbent_ids: list[int], all_incumbent_isb_keys: list[list[InstanceSeedBudgetKey]] + ) -> list[int]: new_incumbents = sort_by_crowding_distance(self.runhistory, incumbent_ids, all_incumbent_isb_keys) new_incumbents = new_incumbents[: self._max_incumbents] @@ -854,13 +854,14 @@ def _cut_incumbents(self, incumbent_ids: list[int], all_incumbent_isb_keys: list # del new_incumbent_ids[idx] logger.info( - f"Removed one incumbent using crowding distance because more than {self._max_incumbents} are " - "available." + f"Removed one incumbent using crowding distance because more than {self._max_incumbents} are " "available." ) return new_incumbents - def _remove_incumbent(self, config: Configuration, previous_incumbent_ids: list[int], new_incumbent_ids: list[int]) -> None: + def _remove_incumbent( + self, config: Configuration, previous_incumbent_ids: list[int], new_incumbent_ids: list[int] + ) -> None: """Remove incumbents if population is too big If new and old incumbents differ. @@ -950,7 +951,7 @@ def get_save_data(self) -> dict: try: incumbent_ids.append(self.runhistory.get_config_id(config)) except KeyError: - incumbent_ids.append(-1) #Should not happen, but occurs sometimes with small-budget runs + incumbent_ids.append(-1) # Should not happen, but occurs sometimes with small-budget runs logger.warning(f"{config} does not exist in runhistory, but is part of the incumbent!") data = { diff --git a/smac/intensifier/mixins/update_incumbent.py b/smac/intensifier/mixins/update_incumbent.py index 8ddb5d8e2..0ca61c78c 100644 --- a/smac/intensifier/mixins/update_incumbent.py +++ b/smac/intensifier/mixins/update_incumbent.py @@ -1,18 +1,18 @@ from __future__ import annotations -import copy -import itertools from abc import abstractmethod from typing import Any, Callable, Iterator -from scipy.stats import binom +import copy import dataclasses +import itertools import json from collections import defaultdict from pathlib import Path import numpy as np from ConfigSpace import Configuration +from scipy.stats import binom import smac from smac.callback import Callback @@ -29,7 +29,11 @@ from smac.scenario import Scenario from smac.utils.configspace import get_config_hash, print_config_changes from smac.utils.logging import get_logger -from smac.utils.pareto_front import calculate_pareto_front, sort_by_crowding_distance, _get_costs +from smac.utils.pareto_front import ( + _get_costs, + calculate_pareto_front, + sort_by_crowding_distance, +) __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" @@ -43,8 +47,8 @@ def _register_incumbent_update(self, **kwargs): self._update_incumbent_log = [] self._update_incumbent_log.append(kwargs) -class NonDominatedUpdate(DebugUpdate): +class NonDominatedUpdate(DebugUpdate): def _update_incumbent(self, config: Configuration) -> list[Configuration]: """Updates the incumbent with the config (which can be the challenger) @@ -67,18 +71,20 @@ def _update_incumbent(self, config: Configuration) -> list[Configuration]: # We compare the incumbents now and only return the ones on the Pareto front # _calculate_pareto_front returns only non-dominated points - new_incumbents = self._calculate_pareto_front(rh, incumbents, - all_incumbent_isb_keys) + new_incumbents = self._calculate_pareto_front(rh, incumbents, all_incumbent_isb_keys) - self._register_incumbent_update(config=config, - incumbent=self.get_incumbents(), - isb_keys=isb_keys, - new_incumbents=new_incumbents, - name="NonDominated",) + self._register_incumbent_update( + config=config, + incumbent=self.get_incumbents(), + isb_keys=isb_keys, + new_incumbents=new_incumbents, + name="NonDominated", + ) return new_incumbents -class BootstrapUpdate(DebugUpdate): + +class BootstrapUpdate(DebugUpdate): def _update_incumbent(self, config: Configuration) -> list[Configuration]: """Updates the incumbent with the config (which can be the challenger) @@ -110,23 +116,25 @@ def _update_incumbent(self, config: Configuration) -> list[Configuration]: for sid, sample in enumerate(samples): sample_isb_keys = [isb_keys[i] for i in sample] all_incumbent_isb_keys = [sample_isb_keys] * len(incumbents) - new_incumbents = self._calculate_pareto_front(self.runhistory, - incumbents, - all_incumbent_isb_keys) + new_incumbents = self._calculate_pareto_front(self.runhistory, incumbents, all_incumbent_isb_keys) verdicts[sid, :] = [incumbents[i] in new_incumbents for i in range(len(incumbents))] probabilities = np.count_nonzero(verdicts, axis=0) / n_samples - new_incumbent_ids = np.argwhere(probabilities >= 0.5).flatten() # Incumbent needs to be non-dominated at least 50% of the time + new_incumbent_ids = np.argwhere( + probabilities >= 0.5 + ).flatten() # Incumbent needs to be non-dominated at least 50% of the time new_incumbents = [incumbents[i] for i in new_incumbent_ids] - self._register_incumbent_update(config=config, - incumbent=self.get_incumbents(), - isb_keys=isb_keys, - new_incumbents=new_incumbents, - name="Bootstrap", - probabilities=probabilities, - n_samples=n_samples,) + self._register_incumbent_update( + config=config, + incumbent=self.get_incumbents(), + isb_keys=isb_keys, + new_incumbents=new_incumbents, + name="Bootstrap", + probabilities=probabilities, + n_samples=n_samples, + ) - return new_incumbents \ No newline at end of file + return new_incumbents diff --git a/smac/intensifier/successive_halving.py b/smac/intensifier/successive_halving.py index 546a27377..5167947f7 100644 --- a/smac/intensifier/successive_halving.py +++ b/smac/intensifier/successive_halving.py @@ -27,7 +27,7 @@ class SuccessiveHalving(AbstractIntensifier): """ - Implementation of Succesive Halving supporting multi-fidelity, multi-objective, and multi-processing. + Implementation of Successive Halving supporting multi-fidelity, multi-objective, and multi-processing. Internally, a tracker keeps track of configurations and their bracket and stage. The behaviour of this intensifier is as follows: @@ -583,3 +583,40 @@ def _get_next_order_seed(self) -> int | None: def _get_next_bracket(self) -> int: """Successive Halving only uses one bracket. Therefore, we always return 0 here.""" return 0 + + def _calculate_pareto_front( + self, + runhistory: RunHistory, + configs: list[Configuration], + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], + ) -> list[Configuration]: + """Compares the passed configurations and returns only the ones on the pareto front. Needs to include the budget type + + Parameters + ---------- + runhistory : RunHistory + The runhistory containing the given configurations. + configs : list[Configuration] + The configurations from which the Pareto front should be computed. + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] + The instance-seed budget keys for the configurations on the basis of which the Pareto front should be computed. + + Returns + ------- + pareto_front : list[Configuration] + The pareto front computed from the given configurations. + """ + + # Add the budgets to the isb keys according to the set incumbent heuristic + for i, (config, isb_keys) in enumerate(zip(configs, config_instance_seed_budget_keys)): + existing_isb_keys = [] + for key in self.get_instance_seed_budget_keys(config, compare=False): + if InstanceSeedBudgetKey(instance=key.instance, seed=key.seed, budget=None) in isb_keys: + existing_isb_keys.append(key) + config_instance_seed_budget_keys[i] = existing_isb_keys + + return calculate_pareto_front( + runhistory=runhistory, + configs=configs, + config_instance_seed_budget_keys=config_instance_seed_budget_keys, + ) diff --git a/smac/utils/pareto_front.py b/smac/utils/pareto_front.py index 4240c4689..441ac7b99 100644 --- a/smac/utils/pareto_front.py +++ b/smac/utils/pareto_front.py @@ -24,7 +24,7 @@ def _get_costs( config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] The instance-seed budget keys for the configs for which the costs should be returned. normalize: bool - If the costs should be normalised + If the costs should be normalized Returns ------- @@ -41,7 +41,9 @@ def _get_costs( # configuration # However, we only want to consider the config trials # Average cost is a list of floats (one for each objective) - average_cost = runhistory.average_cost(config, isb_keys, normalize=normalize, run_multi_objective_algorithm=normalize) + average_cost = runhistory.average_cost( + config, isb_keys, normalize=normalize, run_multi_objective_algorithm=normalize + ) average_costs += [average_cost] # Let's work with a numpy array for efficiency @@ -157,27 +159,28 @@ def sort_by_crowding_distance( return [c for c, _ in config_with_crowding] + def sort_by_hypervolume_contribution( runhistory: RunHistory, configs: list[Configuration], config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], ) -> list[Configuration]: - """ Sorts the passed configurations by their hypervolume contribution. - - Parameters - ---------- - runhistory : RunHistory - The runhistory containing the given configurations. - configs : list[Configuration] - The configurations which should be sorted. - config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] - The instance-seed budget keys for the configurations which should be sorted. - - Returns - ------- - sorted_list : list[Configuration] - Configurations sorted by hypervolume contribution. - """ + """Sorts the passed configurations by their hypervolume contribution. + + Parameters + ---------- + runhistory : RunHistory + The runhistory containing the given configurations. + configs : list[Configuration] + The configurations which should be sorted. + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] + The instance-seed budget keys for the configurations which should be sorted. + + Returns + ------- + sorted_list : list[Configuration] + Configurations sorted by hypervolume contribution. + """ # Get the average costs per configuration @@ -191,6 +194,7 @@ def sort_by_hypervolume_contribution( raise NotImplementedError + def calculate_hypervolume( runhistory: RunHistory, configs: list[Configuration], @@ -200,9 +204,9 @@ def calculate_hypervolume( if reference_point is None: reference_point = calculate_reference_point(runhistory) - raise NotImplementedError + def calculate_reference_point( runhistory: RunHistory, configs: list[Configuration] | None = None, @@ -214,4 +218,4 @@ def calculate_reference_point( return np.max(np.array(costs), axis=1) else: assert len(configs) == len(config_instance_seed_budget_keys) - raise NotImplementedError \ No newline at end of file + raise NotImplementedError From f390582bc70faba81840fe93063faad8b21cc132 Mon Sep 17 00:00:00 2001 From: rookj Date: Tue, 7 Oct 2025 16:04:32 +0200 Subject: [PATCH 60/74] fix merging retrain. test passes --- smac/main/config_selector.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/smac/main/config_selector.py b/smac/main/config_selector.py index 10a549a0a..c17610c15 100644 --- a/smac/main/config_selector.py +++ b/smac/main/config_selector.py @@ -282,15 +282,15 @@ def __iter__(self) -> Iterator[Configuration]: "Did not find enough configuration from the acquisition function. Sampling random configurations." ) random_configs_retries = 0 - while counter < self._retrain_after and random_configs_retries < self._max_new_config_tries: + while not retrain and random_configs_retries < self._max_new_config_tries: config = self._scenario.configspace.sample_configuration() if config not in self._processed_configs: - counter += 1 + self._counter += 1 config.origin = "Random Search (max retries, no candidates)" self._processed_configs.append(config) self._call_callbacks_on_end(config) yield config - retrain = counter == self._retrain_after + retrain = self._check_for_retrain() self._call_callbacks_on_start() else: random_configs_retries += 1 From 04643e5e75668d2a9c24fd91f76bc6eb44f7e7f4 Mon Sep 17 00:00:00 2001 From: benjamc Date: Tue, 7 Oct 2025 19:48:57 +0200 Subject: [PATCH 61/74] format: ruff --- .../function/expected_hypervolume.py | 32 +- smac/acquisition/maximizer/local_search.py | 9 +- .../maximizer/multi_objective_search.py | 19 +- smac/facade/algorithm_configuration_facade.py | 9 +- smac/facade/multi_objective_facade.py | 29 +- smac/intensifier/intensifier.py | 17 +- smac/intensifier/mixins/__init__.py | 2 +- .../mixins/intermediate_decision.py | 33 +- .../intensifier/mixins/intermediate_update.py | 321 +++++++++--------- .../multi_objective_intensifier.py | 21 +- smac/model/multi_objective_model.py | 2 +- smac/multi_objective/aggregation_strategy.py | 4 +- smac/runhistory/encoder/abstract_encoder.py | 1 - smac/runhistory/encoder/encoder.py | 1 - smac/runner/aclib_runner.py | 26 +- 15 files changed, 269 insertions(+), 257 deletions(-) diff --git a/smac/acquisition/function/expected_hypervolume.py b/smac/acquisition/function/expected_hypervolume.py index 8cdd30eff..52794c62d 100644 --- a/smac/acquisition/function/expected_hypervolume.py +++ b/smac/acquisition/function/expected_hypervolume.py @@ -2,20 +2,21 @@ from typing import Any, Iterator -from ConfigSpace import Configuration - -import pygmo import numpy as np +import pygmo +from ConfigSpace import Configuration +from smac.acquisition.function.abstract_acquisition_function import ( + AbstractAcquisitionFunction, +) from smac.intensifier.abstract_intensifier import AbstractIntensifier -from smac.runhistory import TrialInfo, RunHistory -from smac.runhistory.encoder import AbstractRunHistoryEncoder +from smac.model.abstract_model import AbstractModel +from smac.runhistory import RunHistory, TrialInfo from smac.runhistory.dataclasses import InstanceSeedBudgetKey +from smac.runhistory.encoder import AbstractRunHistoryEncoder from smac.scenario import Scenario from smac.utils.configspace import get_config_hash from smac.utils.logging import get_logger -from smac.acquisition.function.abstract_acquisition_function import AbstractAcquisitionFunction -from smac.model.abstract_model import AbstractModel from smac.utils.multi_objective import normalize_costs # import torch @@ -69,6 +70,7 @@ # # return post + class AbstractHVI(AbstractAcquisitionFunction): def __init__(self): """Computes for a given x the predicted hypervolume improvement as @@ -109,12 +111,12 @@ def _update(self, **kwargs: Any) -> None: if incumbents is None: raise ValueError(f"Incumbents are not passed properly.") if len(incumbents) == 0: - raise ValueError(f"No incumbents here. Did the intensifier properly " - "update the incumbents in the runhistory?") + raise ValueError( + f"No incumbents here. Did the intensifier properly " "update the incumbents in the runhistory?" + ) objective_bounds = np.array(self.runhistory.objective_bounds) - self._objective_bounds = self.runhistory_encoder.transform_response_values( - objective_bounds) + self._objective_bounds = self.runhistory_encoder.transform_response_values(objective_bounds) self._reference_point = [1.1] * len(self._objective_bounds) def get_hypervolume(self, points: np.ndarray = None, reference_point: list = None) -> float: @@ -162,8 +164,7 @@ def _compute(self, X: np.ndarray) -> np.ndarray: # all instances. Idea is this prevents optimizing for the initial instances which get it stuck in local optima # Option 2: Only on instances of population # Option 3: EVHI per instance and aggregate afterwards - mean, var_ = self.model.predict_marginalized(X) #Expected to be not normalized - + mean, var_ = self.model.predict_marginalized(X) # Expected to be not normalized phvi = np.zeros(len(X)) for i, indiv in enumerate(mean): @@ -179,6 +180,7 @@ def _compute(self, X: np.ndarray) -> np.ndarray: return phvi.reshape(-1, 1) + # class EHVI(AbstractHVI): # def __init__(self): # super(EHVI, self).__init__() @@ -252,8 +254,8 @@ def _compute(self, X: np.ndarray) -> np.ndarray: # # # # return ehvi.reshape(-1, 1) -class PHVI(AbstractHVI): +class PHVI(AbstractHVI): def __init__(self): super(PHVI, self).__init__() self.population_hv = None @@ -318,7 +320,7 @@ def _compute(self, X: np.ndarray) -> np.ndarray: if len(X.shape) == 1: X = X[:, np.newaxis] - mean, _ = self.model.predict_marginalized(X) #Expected to be not normalized + mean, _ = self.model.predict_marginalized(X) # Expected to be not normalized phvi = np.zeros(len(X)) for i, indiv in enumerate(mean): points = list(self.population_costs) + [indiv] diff --git a/smac/acquisition/maximizer/local_search.py b/smac/acquisition/maximizer/local_search.py index f45739ff0..76d7c256b 100644 --- a/smac/acquisition/maximizer/local_search.py +++ b/smac/acquisition/maximizer/local_search.py @@ -267,7 +267,6 @@ def _create_sort_keys(self, costs: np.array) -> list[list[float]]: sort_objectives = [costs.flatten()] return sort_objectives - @staticmethod def _unique_list(elements: list | itertools.chain) -> list: """ @@ -405,7 +404,9 @@ def _search( obtain_n[i] = len(neighbors_for_i) neighbors.extend(neighbors_for_i) - logger.debug(f"Iteration {num_iters} with {np.count_nonzero(active)} active searches and {len(neighbors)} aqcuisition function calls.") + logger.debug( + f"Iteration {num_iters} with {np.count_nonzero(active)} active searches and {len(neighbors)} aqcuisition function calls." + ) if len(neighbors) != 0: start_time = time.time() acq_val = self._acquisition_function(neighbors) @@ -493,8 +494,8 @@ def _search( ) logger.debug( - f"Local searches took {local_search_steps} steps and looked at {neighbors_looked_at} configurations." - f"Computing the acquisition function for each search took {np.sum(times_per_iteration)/num_candidates}" + f"Local searches took {local_search_steps} steps and looked at {neighbors_looked_at} configurations." + f"Computing the acquisition function for each search took {np.sum(times_per_iteration)/num_candidates}" f"(prev {np.mean(times_per_iteration)}) seconds on average and each acquisition function call took {times_per_iteration/np.sum(neighbors_looked_at)} seconds on average." f"In total the whole procedure took {np.sum(times_per_iteration)} seconds to look at {np.sum(neighbors_looked_at)} configurations." ) diff --git a/smac/acquisition/maximizer/multi_objective_search.py b/smac/acquisition/maximizer/multi_objective_search.py index 13283b732..7a87d93a1 100644 --- a/smac/acquisition/maximizer/multi_objective_search.py +++ b/smac/acquisition/maximizer/multi_objective_search.py @@ -5,15 +5,16 @@ import itertools import time -from pygmo import fast_non_dominated_sorting - import numpy as np from ConfigSpace import Configuration, ConfigurationSpace from ConfigSpace.exceptions import ForbiddenValueError +from pygmo import fast_non_dominated_sorting from smac.acquisition.function import AbstractAcquisitionFunction +from smac.acquisition.maximizer.local_and_random_search import ( + LocalAndSortedRandomSearch, +) from smac.acquisition.maximizer.local_search import LocalSearch -from smac.acquisition.maximizer.local_and_random_search import LocalAndSortedRandomSearch from smac.utils.configspace import ( convert_configurations_to_array, get_one_exchange_neighbourhood, @@ -26,7 +27,7 @@ logger = get_logger(__name__) -class MOLocalSearch(LocalSearch): +class MOLocalSearch(LocalSearch): def _get_initial_points( self, previous_configs: list[Configuration], @@ -34,7 +35,7 @@ def _get_initial_points( additional_start_points: list[tuple[float, Configuration]] | None, ) -> list[Configuration]: """Get initial points to start search from. - + If we already have a population, add those to the initial points. Parameters @@ -50,8 +51,10 @@ def _get_initial_points( ------- list[Configuration] A list of initial points/configurations. - """ - init_points = super()._get_initial_points(previous_configs=previous_configs, n_points=n_points, additional_start_points=additional_start_points) + """ + init_points = super()._get_initial_points( + previous_configs=previous_configs, n_points=n_points, additional_start_points=additional_start_points + ) # Add population to Local search # TODO where is population saved? update accordingly @@ -131,5 +134,5 @@ def __init__( challengers=challengers, max_steps=max_steps, n_steps_plateau_walk=n_steps_plateau_walk, - seed=seed + seed=seed, ) diff --git a/smac/facade/algorithm_configuration_facade.py b/smac/facade/algorithm_configuration_facade.py index 70e93468b..e42f27deb 100644 --- a/smac/facade/algorithm_configuration_facade.py +++ b/smac/facade/algorithm_configuration_facade.py @@ -9,13 +9,13 @@ from smac.facade.abstract_facade import AbstractFacade from smac.initial_design.default_design import DefaultInitialDesign from smac.intensifier.intensifier import Intensifier +from smac.intensifier.mixins import intermediate_decision, intermediate_update from smac.model.random_forest.random_forest import RandomForest from smac.multi_objective.aggregation_strategy import MeanAggregationStrategy from smac.random_design.probability_design import ProbabilityRandomDesign from smac.runhistory.encoder.encoder import RunHistoryEncoder from smac.scenario import Scenario from smac.utils.logging import get_logger -from smac.intensifier.mixins import intermediate_update, intermediate_decision __copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI" __license__ = "3-clause BSD" @@ -116,9 +116,10 @@ def get_intensifier( max_incumbents : int, defaults to 10 How many incumbents to keep track of in the case of multi-objective. """ - class NewIntensifier(intermediate_decision.NewCostDominatesOldCost, - intermediate_update.ClosestIncumbentComparison, - Intensifier): + + class NewIntensifier( + intermediate_decision.NewCostDominatesOldCost, intermediate_update.ClosestIncumbentComparison, Intensifier + ): pass return NewIntensifier( diff --git a/smac/facade/multi_objective_facade.py b/smac/facade/multi_objective_facade.py index ccb842525..0dca98836 100644 --- a/smac/facade/multi_objective_facade.py +++ b/smac/facade/multi_objective_facade.py @@ -2,22 +2,28 @@ from ConfigSpace import Configuration -from smac.acquisition.function.expected_improvement import EI from smac.acquisition.function.expected_hypervolume import PHVI +from smac.acquisition.function.expected_improvement import EI +from smac.acquisition.maximizer.multi_objective_search import ( + MOLocalAndSortedRandomSearch, +) from smac.facade.abstract_facade import AbstractFacade from smac.initial_design.default_design import DefaultInitialDesign from smac.intensifier.intensifier import Intensifier +from smac.intensifier.mixins import ( + intermediate_decision, + intermediate_update, + update_incumbent, +) from smac.intensifier.multi_objective_intensifier import MOIntensifier -from smac.intensifier.mixins import intermediate_update, intermediate_decision, update_incumbent -from smac.model.random_forest.random_forest import RandomForest from smac.model.multi_objective_model import MultiObjectiveModel +from smac.model.random_forest.random_forest import RandomForest from smac.multi_objective.aggregation_strategy import NoAggregationStrategy from smac.random_design.probability_design import ProbabilityRandomDesign from smac.runhistory.encoder.encoder import RunHistoryEncoder from smac.runhistory.encoder.log_encoder import RunHistoryLogEncoder from smac.scenario import Scenario from smac.utils.logging import get_logger -from smac.acquisition.maximizer.multi_objective_search import MOLocalAndSortedRandomSearch __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" @@ -81,10 +87,10 @@ def get_model( # type: ignore @staticmethod def get_intensifier( # type: ignore - scenario: Scenario, - *, - max_config_calls: int = 2000, - max_incumbents: int = 10, + scenario: Scenario, + *, + max_config_calls: int = 2000, + max_incumbents: int = 10, ) -> Intensifier: """Returns ``MOIntensifier`` as intensifier. Uses the default configuration for ``race_against``. @@ -97,9 +103,10 @@ def get_intensifier( # type: ignore max_incumbents : int, defaults to 10 How many incumbents to keep track of in the case of multi-objective. """ - class NewIntensifier(intermediate_decision.NewCostDominatesOldCost, - intermediate_update.ClosestIncumbentComparison, - MOIntensifier): + + class NewIntensifier( + intermediate_decision.NewCostDominatesOldCost, intermediate_update.ClosestIncumbentComparison, MOIntensifier + ): pass return NewIntensifier( diff --git a/smac/intensifier/intensifier.py b/smac/intensifier/intensifier.py index 5ab697b2a..c3239c6be 100644 --- a/smac/intensifier/intensifier.py +++ b/smac/intensifier/intensifier.py @@ -1,8 +1,9 @@ from __future__ import annotations -import time from typing import Any, Iterator +import time + from ConfigSpace import Configuration from smac.intensifier.abstract_intensifier import AbstractIntensifier @@ -120,8 +121,10 @@ def __iter__(self) -> Iterator[TrialInfo]: is_keys = self.get_instance_seed_keys_of_interest() if len(is_keys) < self._min_config_calls: - logger.debug(f"There are less instance, seed pairs of interest than the requested minimum trails per " - f"configuration. Changing min_config_calls from {self._min_config_calls} to {len(is_keys)}") + logger.debug( + f"There are less instance, seed pairs of interest than the requested minimum trails per " + f"configuration. Changing min_config_calls from {self._min_config_calls} to {len(is_keys)}" + ) self._min_config_calls = len(is_keys) # What if there are already trials in the runhistory? Should we queue them up? @@ -282,13 +285,14 @@ def __iter__(self) -> Iterator[TrialInfo]: if len(incumbent_isb_keys) > 0: isk_keys = incumbent_isb_keys - trials = self._get_next_trials(config, N=N, from_keys=isk_keys) if len(trials) == 0: # We remove the config and do not add it back to the queue. self._queue.remove((config, N)) - logger.debug(f"--- No trails to evaluate for config {config_hash}. " - f"Removed config {config_hash} with N={N} from queue.") + logger.debug( + f"--- No trails to evaluate for config {config_hash}. " + f"Removed config {config_hash} with N={N} from queue." + ) else: logger.debug(f"--- Yielding {len(trials)} trials to evaluate config {config_hash}...") for trial in trials: @@ -305,7 +309,6 @@ def __iter__(self) -> Iterator[TrialInfo]: self._queue.remove((config, N)) logger.debug(f"--- Removed config {config_hash} with N={N} from queue.") - # Finally, we add the same config to the queue with a higher N # If the config was rejected by the runhistory, then it's been removed in the next iteration if N < self._max_config_calls and config not in self.get_rejected_configs(): diff --git a/smac/intensifier/mixins/__init__.py b/smac/intensifier/mixins/__init__.py index 7b024699a..f096d7a48 100644 --- a/smac/intensifier/mixins/__init__.py +++ b/smac/intensifier/mixins/__init__.py @@ -1,3 +1,3 @@ """ Mixin are used to overwrite single functions in the intensifier classes -""" \ No newline at end of file +""" diff --git a/smac/intensifier/mixins/intermediate_decision.py b/smac/intensifier/mixins/intermediate_decision.py index 7e90e3713..c43c744d0 100644 --- a/smac/intensifier/mixins/intermediate_decision.py +++ b/smac/intensifier/mixins/intermediate_decision.py @@ -1,18 +1,18 @@ from __future__ import annotations -import copy -import itertools from abc import abstractmethod from typing import Any, Callable, Iterator -from scipy.stats import binom +import copy import dataclasses +import itertools import json from collections import defaultdict from pathlib import Path import numpy as np from ConfigSpace import Configuration +from scipy.stats import binom import smac from smac.callback import Callback @@ -29,7 +29,11 @@ from smac.scenario import Scenario from smac.utils.configspace import get_config_hash, print_config_changes from smac.utils.logging import get_logger -from smac.utils.pareto_front import calculate_pareto_front, sort_by_crowding_distance, _get_costs +from smac.utils.pareto_front import ( + _get_costs, + calculate_pareto_front, + sort_by_crowding_distance, +) __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" @@ -43,8 +47,8 @@ def _dominates(a, b) -> bool: b = np.atleast_1d(b) return np.count_nonzero(a <= b) >= len(a) and np.count_nonzero(a < b) >= 1 -class NewCostDominatesOldCost(): +class NewCostDominatesOldCost: def _check_for_intermediate_comparison(self, config: Configuration) -> bool: """ @@ -72,10 +76,10 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: return True return False -class NewCostDominatesOldCostSkipFirst(): +class NewCostDominatesOldCostSkipFirst: def _check_for_intermediate_comparison(self, config: Configuration) -> bool: - """ Do the first comparison with the incumbent when the configuration dominates the cost after finishing its first trial + """Do the first comparison with the incumbent when the configuration dominates the cost after finishing its first trial Parameters ---------- @@ -101,8 +105,8 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: return True return False -class DoublingNComparison(): +class DoublingNComparison: def _check_for_intermediate_comparison(self, config: Configuration) -> bool: """ @@ -125,23 +129,20 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: # return len(config_isb_keys) in trigger_points nkeys = len(config_isb_keys) - return (nkeys+1) & nkeys == 0 # checks if nkeys+1 is a power of 2 (complies with the sequence (2**n)-1) - + return (nkeys + 1) & nkeys == 0 # checks if nkeys+1 is a power of 2 (complies with the sequence (2**n)-1) -class Always(): +class Always: def _check_for_intermediate_comparison(self, config: Configuration) -> bool: return True -class Never(): - +class Never: def _check_for_intermediate_comparison(self, config: Configuration) -> bool: return False -class DoublingNComparisonFour(): - +class DoublingNComparisonFour: def _check_for_intermediate_comparison(self, config: Configuration) -> bool: """ @@ -158,7 +159,7 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: config_hash = get_config_hash(config) max_trigger_number = int(np.ceil(np.log2(self._max_config_calls))) - trigger_points = [(2 ** n) - 1 for n in range(2, max_trigger_number + 1)] # 1, 3, 7, 15, ... + trigger_points = [(2**n) - 1 for n in range(2, max_trigger_number + 1)] # 1, 3, 7, 15, ... logger.debug(f"{trigger_points=}") logger.debug(f"{len(config_isb_keys)=}") return len(config_isb_keys) in trigger_points diff --git a/smac/intensifier/mixins/intermediate_update.py b/smac/intensifier/mixins/intermediate_update.py index 413f83c69..529dcf804 100644 --- a/smac/intensifier/mixins/intermediate_update.py +++ b/smac/intensifier/mixins/intermediate_update.py @@ -1,18 +1,18 @@ from __future__ import annotations -import copy -import itertools from abc import abstractmethod from typing import Any, Callable, Iterator -from scipy.stats import binom +import copy import dataclasses +import itertools import json from collections import defaultdict from pathlib import Path import numpy as np from ConfigSpace import Configuration +from scipy.stats import binom import smac from smac.callback import Callback @@ -29,7 +29,11 @@ from smac.scenario import Scenario from smac.utils.configspace import get_config_hash, print_config_changes from smac.utils.logging import get_logger -from smac.utils.pareto_front import calculate_pareto_front, sort_by_crowding_distance, _get_costs +from smac.utils.pareto_front import ( + _get_costs, + calculate_pareto_front, + sort_by_crowding_distance, +) __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" @@ -38,7 +42,6 @@ class DebugComparison(object): - def _register_comparison(self, **kwargs): logger.debug(f"Made intermediate comparison with {kwargs['name']} comparison ") if not hasattr(self, "_intermediate_comparisons_log"): @@ -57,7 +60,6 @@ def _get_costs_comp(self, config: Configuration) -> dict: class FullIncumbentComparison(DebugComparison): - def _intermediate_comparison(self, config: Configuration) -> bool: """Compares the configuration against the incumbent @@ -72,19 +74,15 @@ def _intermediate_comparison(self, config: Configuration) -> bool: config_hash = get_config_hash(config) incumbents = self.get_incumbents() config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) - incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( - compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) - logger.debug( - f"Perform intermediate comparions of config {config_hash} with incumbents to see if it is worse" - ) + logger.debug(f"Perform intermediate comparions of config {config_hash} with incumbents to see if it is worse") # TODO perform comparison with incumbent on current instances. # Check if the config with these number of trials is part of the Pareto front # Check if the incumbents ran on all the ones of this config if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): - logger.debug( - "Config ran on other isb_keys than the incumbents. Should not happen.") + logger.debug("Config ran on other isb_keys than the incumbents. Should not happen.") return True # Ensure that the config is not part of the incumbent @@ -98,16 +96,17 @@ def _intermediate_comparison(self, config: Configuration) -> bool: # Only the trials of the challenger all_incumbent_isb_keys = [config_isb_keys for _ in incumbents] - new_incumbents = self._calculate_pareto_front(self.runhistory, incumbents, - all_incumbent_isb_keys) + new_incumbents = self._calculate_pareto_front(self.runhistory, incumbents, all_incumbent_isb_keys) verdict = config in new_incumbents - self._register_comparison(config=config, - incumbent=self.get_incumbents(), - isb_keys=len(config_isb_keys), - costs=self._get_costs_comp(config), - prediction=verdict, - name="FullInc") + self._register_comparison( + config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + costs=self._get_costs_comp(config), + prediction=verdict, + name="FullInc", + ) return config in new_incumbents @@ -127,17 +126,13 @@ def _intermediate_comparison(self, config: Configuration) -> bool: config_hash = get_config_hash(config) incumbents = self.get_incumbents() config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) - incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( - compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) - logger.debug( - f"Perform intermediate comparions of config {config_hash} with incumbents to see if it is worse" - ) + logger.debug(f"Perform intermediate comparions of config {config_hash} with incumbents to see if it is worse") # Check if the incumbents ran on all the ones of this config if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): - logger.debug( - "Config ran on other isb_keys than the incumbents. Should not happen.") + logger.debug("Config ran on other isb_keys than the incumbents. Should not happen.") return True # Ensure that the config is not part of the incumbent @@ -151,17 +146,17 @@ def _intermediate_comparison(self, config: Configuration) -> bool: # Only the trials of the challenger all_incumbent_isb_keys = [config_isb_keys for _ in incumbents] - new_incumbents = self._calculate_pareto_front(self.runhistory, - incumbents, - all_incumbent_isb_keys) + new_incumbents = self._calculate_pareto_front(self.runhistory, incumbents, all_incumbent_isb_keys) verdict = config in new_incumbents - self._register_comparison(config=config, - incumbent=self.get_incumbents(), - isb_keys=len(config_isb_keys), - costs=self._get_costs_comp(config), - prediction=verdict, - name="SingleInc") + self._register_comparison( + config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + costs=self._get_costs_comp(config), + prediction=verdict, + name="SingleInc", + ) return config in new_incumbents @@ -181,12 +176,9 @@ def _intermediate_comparison(self, config: Configuration) -> bool: config_hash = get_config_hash(config) incumbents = self.get_incumbents() config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) - incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( - compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) - logger.debug( - f"Perform intermediate comparisons of config {config_hash} with incumbents to see if it is worse" - ) + logger.debug(f"Perform intermediate comparisons of config {config_hash} with incumbents to see if it is worse") # Ensure that the config is not part of the incumbent if config in incumbents: @@ -194,13 +186,12 @@ def _intermediate_comparison(self, config: Configuration) -> bool: # Check if the incumbents ran on all the ones of this config if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): - logger.debug( - "Config ran on other isb_keys than the incumbents. Should not happen.") + logger.debug("Config ran on other isb_keys than the incumbents. Should not happen.") return True # Only compare domination between one incumbent (as relaxation measure) - #iid = self._rng.choice(len(incumbents)) - #TODO Normalize to determine closests? + # iid = self._rng.choice(len(incumbents)) + # TODO Normalize to determine closests? inc_costs = _get_costs(self.runhistory, incumbents, [config_isb_keys for _ in incumbents], normalize=True) conf_cost = _get_costs(self.runhistory, [config], [config_isb_keys], normalize=True)[0] distances = [np.linalg.norm(inc_cost - conf_cost) for inc_cost in inc_costs] @@ -210,17 +201,17 @@ def _intermediate_comparison(self, config: Configuration) -> bool: # Only the trials of the challenger all_incumbent_isb_keys = [config_isb_keys for _ in incumbents] - new_incumbents = self._calculate_pareto_front(self.runhistory, - incumbents, - all_incumbent_isb_keys) + new_incumbents = self._calculate_pareto_front(self.runhistory, incumbents, all_incumbent_isb_keys) verdict = config in new_incumbents - self._register_comparison(config=config, - incumbent=self.get_incumbents(), - isb_keys=len(config_isb_keys), - costs=self._get_costs_comp(config), - prediction=verdict, - name="ClosestInc") + self._register_comparison( + config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + costs=self._get_costs_comp(config), + prediction=verdict, + name="ClosestInc", + ) return config in new_incumbents @@ -240,13 +231,11 @@ def _intermediate_comparison(self, config: Configuration) -> bool: config_hash = get_config_hash(config) incumbents = self.get_incumbents() config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) - incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( - compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) # Check if the incumbents ran on all the ones of this config if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): - logger.debug( - "Config ran on other isb_keys than the incumbents. Should not happen.") + logger.debug("Config ran on other isb_keys than the incumbents. Should not happen.") return True # Ensure that the config is not part of the incumbent @@ -255,12 +244,14 @@ def _intermediate_comparison(self, config: Configuration) -> bool: config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) verdict = self._rng.random() >= 0.5 - self._register_comparison(config=config, - incumbent=self.get_incumbents(), - isb_keys=len(config_isb_keys), - costs=self._get_costs_comp(config), - prediction=verdict, - name="Random") + self._register_comparison( + config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + costs=self._get_costs_comp(config), + prediction=verdict, + name="Random", + ) return verdict @@ -279,13 +270,11 @@ def _intermediate_comparison(self, config: Configuration) -> bool: config_hash = get_config_hash(config) incumbents = self.get_incumbents() config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) - incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( - compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) # Check if the incumbents ran on all the ones of this config if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): - logger.debug( - "Config ran on other isb_keys than the incumbents. Should not happen.") + logger.debug("Config ran on other isb_keys than the incumbents. Should not happen.") return True # Ensure that the config is not part of the incumbent @@ -294,17 +283,18 @@ def _intermediate_comparison(self, config: Configuration) -> bool: config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) verdict = True - self._register_comparison(config=config, - incumbent=self.get_incumbents(), - isb_keys=len(config_isb_keys), - costs=self._get_costs_comp(config), - prediction=verdict, - name="NoComp") + self._register_comparison( + config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + costs=self._get_costs_comp(config), + prediction=verdict, + name="NoComp", + ) return verdict class BootstrapComparison(DebugComparison): - def _intermediate_comparison(self, config: Configuration) -> bool: """Compares the configuration by generating bootstraps @@ -319,13 +309,11 @@ def _intermediate_comparison(self, config: Configuration) -> bool: config_hash = get_config_hash(config) incumbents = self.get_incumbents() config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) - incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( - compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) # Check if the incumbents ran on all the ones of this config if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): - logger.debug( - "Config ran on other isb_keys than the incumbents. Should not happen.") + logger.debug("Config ran on other isb_keys than the incumbents. Should not happen.") return True # Ensure that the config is not part of the incumbent @@ -337,41 +325,41 @@ def _intermediate_comparison(self, config: Configuration) -> bool: n_samples = 1000 if len(config_isb_keys) < 7: # When there are only a limited number of trials available we run all combinations - samples = list(itertools.combinations_with_replacement(list(range(len(config_isb_keys))), r=len(config_isb_keys))) + samples = list( + itertools.combinations_with_replacement(list(range(len(config_isb_keys))), r=len(config_isb_keys)) + ) n_samples = len(samples) else: - samples = np.random.choice(len(config_isb_keys), - (n_samples, len(config_isb_keys)), - replace=True) + samples = np.random.choice(len(config_isb_keys), (n_samples, len(config_isb_keys)), replace=True) verdicts = np.zeros(n_samples, dtype=bool) - for sid, sample in enumerate(samples): sample_isb_keys = [config_isb_keys[i] for i in sample] - all_incumbent_isb_keys = [sample_isb_keys]*len(incumbents) - new_incumbents = self._calculate_pareto_front(self.runhistory, - incumbents, - all_incumbent_isb_keys) + all_incumbent_isb_keys = [sample_isb_keys] * len(incumbents) + new_incumbents = self._calculate_pareto_front(self.runhistory, incumbents, all_incumbent_isb_keys) verdicts[sid] = config in new_incumbents - verdict = np.count_nonzero(verdicts) >= 0.5 * n_samples # The config is in more than 50% of the times non-dominated - #P = np.count_nonzero(verdicts)/n_samples - #print(f"P = {np.count_nonzero(verdicts)}/{n_samples}={P:.2f}") - self._register_comparison(config=config, - incumbent=self.get_incumbents(), - isb_keys=len(config_isb_keys), - costs=self._get_costs_comp(config), - prediction=verdict, - name="Bootstrap", - probability=np.count_nonzero(verdicts)/n_samples, - n_samples=n_samples) + verdict = ( + np.count_nonzero(verdicts) >= 0.5 * n_samples + ) # The config is in more than 50% of the times non-dominated + # P = np.count_nonzero(verdicts)/n_samples + # print(f"P = {np.count_nonzero(verdicts)}/{n_samples}={P:.2f}") + self._register_comparison( + config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + costs=self._get_costs_comp(config), + prediction=verdict, + name="Bootstrap", + probability=np.count_nonzero(verdicts) / n_samples, + n_samples=n_samples, + ) return verdict class BootstrapSingleComparison(DebugComparison): - def _intermediate_comparison(self, config: Configuration) -> bool: """Compares the configuration by generating bootstraps @@ -387,13 +375,11 @@ def _intermediate_comparison(self, config: Configuration) -> bool: config_hash = get_config_hash(config) incumbents = self.get_incumbents() config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) - incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( - compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) # Check if the incumbents ran on all the ones of this config if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): - logger.debug( - "Config ran on other isb_keys than the incumbents. Should not happen.") + logger.debug("Config ran on other isb_keys than the incumbents. Should not happen.") return True # Ensure that the config is not part of the incumbent @@ -405,41 +391,41 @@ def _intermediate_comparison(self, config: Configuration) -> bool: n_samples = 1000 if len(config_isb_keys) < 7: # When there are only a limited number of trials available we run all combinations - samples = list(itertools.combinations_with_replacement(list(range(len(config_isb_keys))), r=len(config_isb_keys))) + samples = list( + itertools.combinations_with_replacement(list(range(len(config_isb_keys))), r=len(config_isb_keys)) + ) n_samples = len(samples) else: - samples = np.random.choice(len(config_isb_keys), - (n_samples, len(config_isb_keys)), - replace=True) + samples = np.random.choice(len(config_isb_keys), (n_samples, len(config_isb_keys)), replace=True) verdicts = np.zeros(n_samples, dtype=bool) - for sid, sample in enumerate(samples): sample_isb_keys = [config_isb_keys[i] for i in sample] - all_incumbent_isb_keys = [sample_isb_keys]*len(incumbents) - new_incumbents = self._calculate_pareto_front(self.runhistory, - incumbents, - all_incumbent_isb_keys) + all_incumbent_isb_keys = [sample_isb_keys] * len(incumbents) + new_incumbents = self._calculate_pareto_front(self.runhistory, incumbents, all_incumbent_isb_keys) verdicts[sid] = config in new_incumbents - verdict = np.count_nonzero(verdicts) >= 0.5 * n_samples # The config is in more than 50% of the times non-dominated - #P = np.count_nonzero(verdicts)/n_samples - #print(f"P = {np.count_nonzero(verdicts)}/{n_samples}={P:.2f}") - self._register_comparison(config=config, - incumbent=self.get_incumbents(), - isb_keys=len(config_isb_keys), - costs=self._get_costs_comp(config), - prediction=verdict, - name="BootstrapSingle", - probability=np.count_nonzero(verdicts)/n_samples, - n_samples=n_samples) + verdict = ( + np.count_nonzero(verdicts) >= 0.5 * n_samples + ) # The config is in more than 50% of the times non-dominated + # P = np.count_nonzero(verdicts)/n_samples + # print(f"P = {np.count_nonzero(verdicts)}/{n_samples}={P:.2f}") + self._register_comparison( + config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + costs=self._get_costs_comp(config), + prediction=verdict, + name="BootstrapSingle", + probability=np.count_nonzero(verdicts) / n_samples, + n_samples=n_samples, + ) return verdict class BootstrapClosestComparison(DebugComparison): - def _intermediate_comparison(self, config: Configuration) -> bool: """Compares the configuration by generating bootstraps @@ -455,13 +441,11 @@ def _intermediate_comparison(self, config: Configuration) -> bool: config_hash = get_config_hash(config) incumbents = self.get_incumbents() config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) - incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( - compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) # Check if the incumbents ran on all the ones of this config if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): - logger.debug( - "Config ran on other isb_keys than the incumbents. Should not happen.") + logger.debug("Config ran on other isb_keys than the incumbents. Should not happen.") return True # Ensure that the config is not part of the incumbent @@ -476,36 +460,37 @@ def _intermediate_comparison(self, config: Configuration) -> bool: n_samples = 1000 if len(config_isb_keys) < 7: # When there are only a limited number of trials available we run all combinations - samples = list(itertools.combinations_with_replacement(list(range(len(config_isb_keys))), r=len(config_isb_keys))) + samples = list( + itertools.combinations_with_replacement(list(range(len(config_isb_keys))), r=len(config_isb_keys)) + ) n_samples = len(samples) else: - samples = np.random.choice(len(config_isb_keys), - (n_samples, len(config_isb_keys)), - replace=True) + samples = np.random.choice(len(config_isb_keys), (n_samples, len(config_isb_keys)), replace=True) verdicts = np.zeros(n_samples, dtype=bool) - for sid, sample in enumerate(samples): sample_isb_keys = [config_isb_keys[i] for i in sample] - all_incumbent_isb_keys = [sample_isb_keys]*len(incumbents) - new_incumbents = self._calculate_pareto_front(self.runhistory, - incumbents, - all_incumbent_isb_keys) + all_incumbent_isb_keys = [sample_isb_keys] * len(incumbents) + new_incumbents = self._calculate_pareto_front(self.runhistory, incumbents, all_incumbent_isb_keys) verdicts[sid] = config in new_incumbents - verdict = np.count_nonzero(verdicts) >= 0.5 * n_samples # The config is in more than 50% of the times non-dominated - #P = np.count_nonzero(verdicts)/n_samples - #print(f"P = {np.count_nonzero(verdicts)}/{n_samples}={P:.2f}") - self._register_comparison(config=config, - incumbent=self.get_incumbents(), - isb_keys=len(config_isb_keys), - costs=self._get_costs_comp(config), - prediction=verdict, - name="BootstrapClosest", - probability=np.count_nonzero(verdicts)/n_samples, - n_samples=n_samples) + verdict = ( + np.count_nonzero(verdicts) >= 0.5 * n_samples + ) # The config is in more than 50% of the times non-dominated + # P = np.count_nonzero(verdicts)/n_samples + # print(f"P = {np.count_nonzero(verdicts)}/{n_samples}={P:.2f}") + self._register_comparison( + config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + costs=self._get_costs_comp(config), + prediction=verdict, + name="BootstrapClosest", + probability=np.count_nonzero(verdicts) / n_samples, + n_samples=n_samples, + ) return verdict @@ -542,13 +527,11 @@ def dominates(a, b): config_hash = get_config_hash(config) incumbents = self.get_incumbents() config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) - incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys( - compare=True) + incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) # Check if the incumbents ran on all the ones of this config if not all([key in incumbent_isb_comparison_keys for key in config_isb_keys]): - logger.debug( - "Config ran on other isb_keys than the incumbents. Should not happen.") + logger.debug("Config ran on other isb_keys than the incumbents. Should not happen.") return True # Ensure that the config is not part of the incumbent @@ -559,9 +542,13 @@ def dominates(a, b): chall_perf = self.runhistory._cost(config, config_isb_keys) for incumbent in incumbents: inc_perf = self.runhistory._cost(incumbent, config_isb_keys) - n_ij = sum([dominates(*x) for x in zip(chall_perf, inc_perf)]) # Number of times the incumbent candidate dominates the challenger - n_ji = sum([dominates(*x) for x in zip(inc_perf, chall_perf)]) # Number of times the challenger dominates the incumbent candidate - p_value = 1 - binom.cdf(n_ij - 1, n_ij + n_ji, .5) + n_ij = sum( + [dominates(*x) for x in zip(chall_perf, inc_perf)] + ) # Number of times the incumbent candidate dominates the challenger + n_ji = sum( + [dominates(*x) for x in zip(inc_perf, chall_perf)] + ) # Number of times the challenger dominates the incumbent candidate + p_value = 1 - binom.cdf(n_ij - 1, n_ij + n_ji, 0.5) p_values.append(p_value) pvalues_order = np.argsort(p_values) @@ -578,12 +565,14 @@ def dominates(a, b): break verdict = np.count_nonzero(reject) != 0 - #P = np.count_nonzero(verdicts)/n_samples - #print(f"P = {np.count_nonzero(verdicts)}/{n_samples}={P:.2f}") - self._register_comparison(config=config, - incumbent=self.get_incumbents(), - isb_keys=len(config_isb_keys), - costs={conf: cost for conf, cost in zip(incumbents, costs)}, - prediction=verdict, - name="S-Race") - return verdict \ No newline at end of file + # P = np.count_nonzero(verdicts)/n_samples + # print(f"P = {np.count_nonzero(verdicts)}/{n_samples}={P:.2f}") + self._register_comparison( + config=config, + incumbent=self.get_incumbents(), + isb_keys=len(config_isb_keys), + costs={conf: cost for conf, cost in zip(incumbents, costs)}, + prediction=verdict, + name="S-Race", + ) + return verdict diff --git a/smac/intensifier/multi_objective_intensifier.py b/smac/intensifier/multi_objective_intensifier.py index 8ecf7ea53..9c0d17f19 100644 --- a/smac/intensifier/multi_objective_intensifier.py +++ b/smac/intensifier/multi_objective_intensifier.py @@ -17,6 +17,10 @@ import smac from smac.callback import Callback from smac.constants import MAXINT +from smac.intensifier.abstract_intensifier import AbstractIntensifier +from smac.intensifier.hyperband import Hyperband +from smac.intensifier.intensifier import Intensifier +from smac.intensifier.successive_halving import SuccessiveHalving from smac.main.config_selector import ConfigSelector from smac.runhistory import TrialInfo from smac.runhistory.dataclasses import ( @@ -30,11 +34,6 @@ from smac.utils.configspace import get_config_hash, print_config_changes from smac.utils.logging import get_logger from smac.utils.pareto_front import calculate_pareto_front, sort_by_crowding_distance -from smac.intensifier.abstract_intensifier import AbstractIntensifier -from smac.intensifier.hyperband import Hyperband -from smac.intensifier.successive_halving import SuccessiveHalving -from smac.intensifier.intensifier import Intensifier - __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" @@ -43,6 +42,7 @@ # TODO add minimum population size? + class MOIntensifierMixin(object): def _calculate_pareto_front( self, @@ -60,8 +60,10 @@ def _calculate_pareto_front( # # TODO adjust # raise NotImplementedError - def _cut_incumbents(self, incumbent_ids: list[int], all_incumbent_isb_keys: list[list[InstanceSeedBudgetKey]]) -> list[int]: - #TODO JG sort by hypervolume + def _cut_incumbents( + self, incumbent_ids: list[int], all_incumbent_isb_keys: list[list[InstanceSeedBudgetKey]] + ) -> list[int]: + # TODO JG sort by hypervolume new_incumbents = sort_by_crowding_distance(self.runhistory, incumbent_ids, all_incumbent_isb_keys) new_incumbents = new_incumbents[: self._max_incumbents] @@ -81,11 +83,14 @@ def get_instance_seed_budget_keys( """ return self.runhistory.get_instance_seed_budget_keys(config, highest_observed_budget_only=True) + class MOIntensifier(MOIntensifierMixin, Intensifier): pass + class MOSuccessiveHalving(MOIntensifierMixin, SuccessiveHalving): pass + class MOHyperband(MOIntensifierMixin, Hyperband): - pass \ No newline at end of file + pass diff --git a/smac/model/multi_objective_model.py b/smac/model/multi_objective_model.py index 61a7941a8..fe2bbd749 100644 --- a/smac/model/multi_objective_model.py +++ b/smac/model/multi_objective_model.py @@ -53,7 +53,7 @@ def __init__( seed=seed, ) - self._n_features = self._models[0]._n_features #TODO JG make more elegant + self._n_features = self._models[0]._n_features # TODO JG make more elegant @property def models(self) -> list[AbstractModel]: diff --git a/smac/multi_objective/aggregation_strategy.py b/smac/multi_objective/aggregation_strategy.py index e139d5265..9ca58ecab 100644 --- a/smac/multi_objective/aggregation_strategy.py +++ b/smac/multi_objective/aggregation_strategy.py @@ -46,8 +46,8 @@ def __call__(self, values: list[float]) -> float: # noqa: D102 class NoAggregationStrategy(AbstractMultiObjectiveAlgorithm): """ - A class to not aggregate multi-objective losses into a single objective losses. - """ + A class to not aggregate multi-objective losses into a single objective losses. + """ def __call__(self, values: list[float]) -> list[float]: """ diff --git a/smac/runhistory/encoder/abstract_encoder.py b/smac/runhistory/encoder/abstract_encoder.py index cb0526a0d..dc933ab38 100644 --- a/smac/runhistory/encoder/abstract_encoder.py +++ b/smac/runhistory/encoder/abstract_encoder.py @@ -305,4 +305,3 @@ def transform_response_values( transformed_values : np.ndarray """ raise NotImplementedError - diff --git a/smac/runhistory/encoder/encoder.py b/smac/runhistory/encoder/encoder.py index 6c2ada750..11693af28 100644 --- a/smac/runhistory/encoder/encoder.py +++ b/smac/runhistory/encoder/encoder.py @@ -18,7 +18,6 @@ class RunHistoryEncoder(AbstractRunHistoryEncoder): - def _build_matrix( self, trials: Mapping[TrialKey, TrialValue], diff --git a/smac/runner/aclib_runner.py b/smac/runner/aclib_runner.py index d84aedff3..718f23b0a 100644 --- a/smac/runner/aclib_runner.py +++ b/smac/runner/aclib_runner.py @@ -3,35 +3,38 @@ __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" -import re from abc import ABC, abstractmethod from typing import Any, Iterator +import re import time import traceback -from subprocess import Popen, PIPE +from subprocess import PIPE, Popen import numpy as np from ConfigSpace import Configuration from smac.runhistory import StatusType, TrialInfo, TrialValue +from smac.runner.target_function_script_runner import TargetFunctionScriptRunner from smac.scenario import Scenario from smac.utils.logging import get_logger -from smac.runner.target_function_script_runner import TargetFunctionScriptRunner logger = get_logger(__name__) + class ACLibRunner(TargetFunctionScriptRunner): - def __init__(self, - target_function: str, - scenario: Scenario, - required_arguments: list[str] = [], - target_function_arguments: dict[str, str] | None = None, - ): + def __init__( + self, + target_function: str, + scenario: Scenario, + required_arguments: list[str] = [], + target_function_arguments: dict[str, str] | None = None, + ): self._target_function_arguments = target_function_arguments super().__init__(target_function, scenario, required_arguments) + def __call__(self, algorithm_kwargs: dict[str, Any]) -> tuple[str, str]: # kwargs has "instance", "seed" and "budget" --> translate those @@ -74,11 +77,10 @@ def __call__(self, algorithm_kwargs: dict[str, Any]) -> tuple[str, str]: line = line.strip() if re.match(result_begin, line): # print("match") - outputline = line[len(result_begin):] + outputline = line[len(result_begin) :] logger.debug(f"Found result in output: {outputline}") - #Parse output to form of key=value;key2=value2;...;cost=value1,value2;... + # Parse output to form of key=value;key2=value2;...;cost=value1,value2;... return outputline, error - From dd2ff5819777113c05973881b67ae0878afae7a8 Mon Sep 17 00:00:00 2001 From: benjamc Date: Tue, 7 Oct 2025 20:47:01 +0200 Subject: [PATCH 62/74] build(setup.py): add dependency pygmo --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index 1eca8d910..85ac64791 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,9 @@ def read_file(filepath: str) -> str: "pyrfr": [ "pyrfr>=0.9.0", ], + "mosmac": [ + "pygmo" + ], "dev": [ "setuptools", "types-setuptools", From 5b0c318a53745acb6fe6f331e2851dccc1e90b61 Mon Sep 17 00:00:00 2001 From: benjamc Date: Tue, 7 Oct 2025 21:02:47 +0200 Subject: [PATCH 63/74] style: pydocstyle, flake --- .../function/expected_hypervolume.py | 17 +- smac/acquisition/maximizer/local_search.py | 12 +- .../maximizer/multi_objective_search.py | 8 - smac/facade/multi_objective_facade.py | 11 +- smac/intensifier/abstract_intensifier.py | 42 +++- smac/intensifier/intensifier.py | 3 - smac/intensifier/mixins/__init__.py | 4 +- .../mixins/intermediate_decision.py | 39 +--- .../intensifier/mixins/intermediate_update.py | 37 +-- smac/intensifier/mixins/update_incumbent.py | 36 +-- .../multi_objective_intensifier.py | 27 +-- smac/intensifier/successive_halving.py | 14 +- smac/main/config_selector.py | 12 +- smac/main/smbo.py | 6 +- smac/multi_objective/aggregation_strategy.py | 4 +- smac/runhistory/runhistory.py | 1 + smac/runner/aclib_runner.py | 21 +- smac/runner/target_function_script_runner.py | 2 +- smac/utils/data_structures.py | 2 +- smac/utils/pareto_front.py | 221 ------------------ 20 files changed, 102 insertions(+), 417 deletions(-) delete mode 100644 smac/utils/pareto_front.py diff --git a/smac/acquisition/function/expected_hypervolume.py b/smac/acquisition/function/expected_hypervolume.py index 52794c62d..b6c668ef2 100644 --- a/smac/acquisition/function/expected_hypervolume.py +++ b/smac/acquisition/function/expected_hypervolume.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Iterator +from typing import Any import numpy as np import pygmo @@ -9,13 +9,8 @@ from smac.acquisition.function.abstract_acquisition_function import ( AbstractAcquisitionFunction, ) -from smac.intensifier.abstract_intensifier import AbstractIntensifier -from smac.model.abstract_model import AbstractModel -from smac.runhistory import RunHistory, TrialInfo -from smac.runhistory.dataclasses import InstanceSeedBudgetKey +from smac.runhistory import RunHistory from smac.runhistory.encoder import AbstractRunHistoryEncoder -from smac.scenario import Scenario -from smac.utils.configspace import get_config_hash from smac.utils.logging import get_logger from smac.utils.multi_objective import normalize_costs @@ -86,6 +81,7 @@ def __init__(self): @property def runhistory(self) -> RunHistory: + """Return the runhistory.""" return self._runhistory @runhistory.setter @@ -94,6 +90,7 @@ def runhistory(self, runhistory: RunHistory): @property def runhistory_encoder(self) -> AbstractRunHistoryEncoder: + """Return the runhistory encoder.""" return self._runhistory_encoder @runhistory_encoder.setter @@ -102,6 +99,7 @@ def runhistory_encoder(self, runhistory_encoder: AbstractRunHistoryEncoder): @property def name(self) -> str: + """Return name of the acquisition function.""" return "Abstract Hypervolume Improvement" def _update(self, **kwargs: Any) -> None: @@ -109,10 +107,10 @@ def _update(self, **kwargs: Any) -> None: incumbents: list[Configuration] = kwargs.get("incumbents", None) if incumbents is None: - raise ValueError(f"Incumbents are not passed properly.") + raise ValueError("Incumbents are not passed properly.") if len(incumbents) == 0: raise ValueError( - f"No incumbents here. Did the intensifier properly " "update the incumbents in the runhistory?" + "No incumbents here. Did the intensifier properly update the incumbents in the runhistory?" ) objective_bounds = np.array(self.runhistory.objective_bounds) @@ -263,6 +261,7 @@ def __init__(self): @property def name(self) -> str: + """Return name of the acquisition function.""" return "Predicted Hypervolume Improvement" def _update(self, **kwargs: Any) -> None: diff --git a/smac/acquisition/maximizer/local_search.py b/smac/acquisition/maximizer/local_search.py index 76d7c256b..788e7b2aa 100644 --- a/smac/acquisition/maximizer/local_search.py +++ b/smac/acquisition/maximizer/local_search.py @@ -405,7 +405,8 @@ def _search( neighbors.extend(neighbors_for_i) logger.debug( - f"Iteration {num_iters} with {np.count_nonzero(active)} active searches and {len(neighbors)} aqcuisition function calls." + f"Iteration {num_iters} with {np.count_nonzero(active)} active searches and {len(neighbors)} " + "acquisition function calls." ) if len(neighbors) != 0: start_time = time.time() @@ -481,7 +482,8 @@ def _search( n_no_plateau_walk[i] += 1 if n_no_plateau_walk[i] >= self._n_steps_plateau_walk or local_search_steps[i] >= self._max_steps: - message = f"Local search {i}: Stop search after walking {n_no_plateau_walk[i]} plateaus after {neighbors_looked_at[i]}." + message = f"Local search {i}: Stop search after walking {n_no_plateau_walk[i]} plateaus " + message += f"after {neighbors_looked_at[i]}." if local_search_steps[i] >= self._max_steps: message += f" Reached max_steps ({self._max_steps}) of local search." logger.debug(message) @@ -496,8 +498,10 @@ def _search( logger.debug( f"Local searches took {local_search_steps} steps and looked at {neighbors_looked_at} configurations." f"Computing the acquisition function for each search took {np.sum(times_per_iteration)/num_candidates}" - f"(prev {np.mean(times_per_iteration)}) seconds on average and each acquisition function call took {times_per_iteration/np.sum(neighbors_looked_at)} seconds on average." - f"In total the whole procedure took {np.sum(times_per_iteration)} seconds to look at {np.sum(neighbors_looked_at)} configurations." + f"(prev {np.mean(times_per_iteration)}) seconds on average and each acquisition function call " + f"took {times_per_iteration/np.sum(neighbors_looked_at)} seconds on average." + f"In total the whole procedure took {np.sum(times_per_iteration)} seconds to look at " + f"{np.sum(neighbors_looked_at)} configurations." ) return [(a, i) for a, i in zip(acq_val_candidates, candidates)] diff --git a/smac/acquisition/maximizer/multi_objective_search.py b/smac/acquisition/maximizer/multi_objective_search.py index 7a87d93a1..155a18acd 100644 --- a/smac/acquisition/maximizer/multi_objective_search.py +++ b/smac/acquisition/maximizer/multi_objective_search.py @@ -1,13 +1,9 @@ from __future__ import annotations -from typing import Any - import itertools -import time import numpy as np from ConfigSpace import Configuration, ConfigurationSpace -from ConfigSpace.exceptions import ForbiddenValueError from pygmo import fast_non_dominated_sorting from smac.acquisition.function import AbstractAcquisitionFunction @@ -15,10 +11,6 @@ LocalAndSortedRandomSearch, ) from smac.acquisition.maximizer.local_search import LocalSearch -from smac.utils.configspace import ( - convert_configurations_to_array, - get_one_exchange_neighbourhood, -) from smac.utils.logging import get_logger __copyright__ = "Copyright 2022, automl.org" diff --git a/smac/facade/multi_objective_facade.py b/smac/facade/multi_objective_facade.py index 0dca98836..287f578f7 100644 --- a/smac/facade/multi_objective_facade.py +++ b/smac/facade/multi_objective_facade.py @@ -2,26 +2,20 @@ from ConfigSpace import Configuration -from smac.acquisition.function.expected_hypervolume import PHVI -from smac.acquisition.function.expected_improvement import EI +from smac.acquisition.function.expected_hypervolume import EHVI, PHVI from smac.acquisition.maximizer.multi_objective_search import ( MOLocalAndSortedRandomSearch, ) from smac.facade.abstract_facade import AbstractFacade from smac.initial_design.default_design import DefaultInitialDesign from smac.intensifier.intensifier import Intensifier -from smac.intensifier.mixins import ( - intermediate_decision, - intermediate_update, - update_incumbent, -) +from smac.intensifier.mixins import intermediate_decision, intermediate_update from smac.intensifier.multi_objective_intensifier import MOIntensifier from smac.model.multi_objective_model import MultiObjectiveModel from smac.model.random_forest.random_forest import RandomForest from smac.multi_objective.aggregation_strategy import NoAggregationStrategy from smac.random_design.probability_design import ProbabilityRandomDesign from smac.runhistory.encoder.encoder import RunHistoryEncoder -from smac.runhistory.encoder.log_encoder import RunHistoryLogEncoder from smac.scenario import Scenario from smac.utils.logging import get_logger @@ -64,7 +58,6 @@ def get_model( # type: ignore pca_components : float, defaults to 4 Number of components to keep when using PCA to reduce dimensionality of instance features. """ - models = [] for objective in scenario.objectives: models.append( diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index 57fd15e97..e2edd3177 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -97,6 +97,7 @@ def reset(self) -> None: @property def incumbents(self) -> list[Configuration]: + """Return the incumbents (points on the Pareto front) of the runhistory.""" return self._incumbents @incumbents.setter @@ -478,7 +479,6 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: ------- A boolean which decides if the current configuration should be compared against the incumbent. """ - return False def _intermediate_comparison(self, config: Configuration) -> bool: @@ -531,6 +531,7 @@ def _update_incumbent(self, config: Configuration) -> list[Configuration]: Returns ------- + A list of configurations which are the new incumbents (Pareto front). """ rh = self.runhistory @@ -548,6 +549,17 @@ def _update_incumbent(self, config: Configuration) -> list[Configuration]: return new_incumbents def update_incumbents(self, config: Configuration) -> None: + """Updates the incumbents. + + This method is called everytime a trial is added to the runhistory. A configuration is only considered + incumbent if it has a trial on all instances/seeds/budgets the incumbent + has been evaluated on. + + Parameters + ---------- + config : Configuration + The configuration that was just evaluated. + """ incumbents = self.get_incumbents() config_hash = get_config_hash(config) @@ -578,7 +590,8 @@ def update_incumbents(self, config: Configuration) -> None: # Check if config isb is subset of incumbents # if not all([isb_key in incumbent_isb_keys for isb_key in config_isb_keys]): # # If the config is part of the incumbents this could happen - # logger.info(f"Config {config_hash} did run on more instances than the incumbent. Cannot make a proper comparison.") + # logger.info(f"Config {config_hash} did run on more instances than the incumbent. " + # "Cannot make a proper comparison.") # return # Config did not run on all isb keys of incumbent @@ -722,7 +735,8 @@ def update_incumbents(self, config: Configuration) -> None: # # will remove the budgets from the keys. # config_isb_comparison_keys = self.get_instance_seed_budget_keys(config, compare=True) # # Find the lowest intersection of instance-seed-budget keys for all incumbents. - # config_incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) # Intersection + # # Intersection + # config_incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) # # # Now we have to check if the new config has been evaluated on the same keys as the incumbents # # TODO If the config is part of the incumbent then it should always be a subset of the intersection @@ -813,8 +827,8 @@ def update_incumbents(self, config: Configuration) -> None: # else: # self._remove_rejected_config(config_id) # logger.info( - # f"Added config {config_hash} and rejected config {removed_incumbent_hash} as incumbent because " - # f"it is not better than the incumbents on {len(config_isb_keys)} instances: " + # f"Added config {config_hash} and rejected config {removed_incumbent_hash} as incumbent " + # f"becauseit is not better than the incumbents on {len(config_isb_keys)} instances: " # ) # print_config_changes(rh.get_config(removed_incumbent_id), config, logger=logger) # elif len(previous_incumbents) < len(new_incumbents): @@ -897,7 +911,7 @@ def _remove_incumbent( self._remove_rejected_config(config_id) logger.info( f"Added config {config_hash} and rejected config {removed_incumbent_hash} as incumbent because " - f"it is not better than the incumbents on {len(config_isb_keys)} instances:" + f"it is not better than the incumbents on {len(config_isb_keys)} instances: " ) print_config_changes(rh.get_config(removed_incumbent_id), config, logger=logger) @@ -916,7 +930,8 @@ def _calculate_pareto_front( configs : list[Configuration] The configurations from which the Pareto front should be computed. config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] - The instance-seed budget keys for the configurations on the basis of which the Pareto front should be computed. + The instance-seed budget keys for the configurations on the basis of which the Pareto front should be + computed. Returns ------- @@ -946,13 +961,24 @@ def set_state(self, state: dict[str, Any]) -> None: pass def get_save_data(self) -> dict: + """Returns the data that should be saved when calling ``save()``. + + This includes the incumbents, trajectory, + rejected configurations, and the state of the intensifier. + + Returns + ------- + data : dict + The data that should be saved with keys ``incumbent_ids``, ``rejected_config_ids``, + ``incumbents_changed``, ``trajectory``, and ``state``. + """ incumbent_ids = [] for config in self.incumbents: try: incumbent_ids.append(self.runhistory.get_config_id(config)) except KeyError: incumbent_ids.append(-1) # Should not happen, but occurs sometimes with small-budget runs - logger.warning(f"{config} does not exist in runhistory, but is part of the incumbent!") + logger.warning(f"{config} does not exist in runhistory, but is part of the incumbent!") # noqa: E713 data = { "incumbent_ids": incumbent_ids, diff --git a/smac/intensifier/intensifier.py b/smac/intensifier/intensifier.py index c3239c6be..24511520b 100644 --- a/smac/intensifier/intensifier.py +++ b/smac/intensifier/intensifier.py @@ -2,8 +2,6 @@ from typing import Any, Iterator -import time - from ConfigSpace import Configuration from smac.intensifier.abstract_intensifier import AbstractIntensifier @@ -341,7 +339,6 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: A boolean which decides if the current configuration should be compared against the incumbent. """ config_isb_keys = self.get_instance_seed_budget_keys(config) - config_id = self.runhistory.get_config_id(config) config_hash = get_config_hash(config) # Do not compare very early in the process diff --git a/smac/intensifier/mixins/__init__.py b/smac/intensifier/mixins/__init__.py index f096d7a48..0377153c2 100644 --- a/smac/intensifier/mixins/__init__.py +++ b/smac/intensifier/mixins/__init__.py @@ -1,3 +1 @@ -""" -Mixin are used to overwrite single functions in the intensifier classes -""" +"""Mixin are used to overwrite single functions in the intensifier classes.""" diff --git a/smac/intensifier/mixins/intermediate_decision.py b/smac/intensifier/mixins/intermediate_decision.py index c43c744d0..39b58b7a8 100644 --- a/smac/intensifier/mixins/intermediate_decision.py +++ b/smac/intensifier/mixins/intermediate_decision.py @@ -1,39 +1,9 @@ from __future__ import annotations -from abc import abstractmethod -from typing import Any, Callable, Iterator - -import copy -import dataclasses -import itertools -import json -from collections import defaultdict -from pathlib import Path - import numpy as np from ConfigSpace import Configuration -from scipy.stats import binom - -import smac -from smac.callback import Callback -from smac.constants import MAXINT -from smac.main.config_selector import ConfigSelector -from smac.runhistory import TrialInfo -from smac.runhistory.dataclasses import ( - InstanceSeedBudgetKey, - InstanceSeedKey, - TrajectoryItem, - TrialValue, -) -from smac.runhistory.runhistory import RunHistory -from smac.scenario import Scenario -from smac.utils.configspace import get_config_hash, print_config_changes + from smac.utils.logging import get_logger -from smac.utils.pareto_front import ( - _get_costs, - calculate_pareto_front, - sort_by_crowding_distance, -) __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" @@ -79,7 +49,8 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: class NewCostDominatesOldCostSkipFirst: def _check_for_intermediate_comparison(self, config: Configuration) -> bool: - """Do the first comparison with the incumbent when the configuration dominates the cost after finishing its first trial + """Do the first comparison with the incumbent when the configuration dominates the cost after finishing + its first trial. Parameters ---------- @@ -119,8 +90,6 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: A boolean which decides if the current configuration should be compared against the incumbent. """ config_isb_keys = self.get_instance_seed_budget_keys(config) - config_id = self.runhistory.get_config_id(config) - config_hash = get_config_hash(config) # max_trigger_number = int(np.ceil(np.log2(self._max_config_calls))) # trigger_points = [(2**n) - 1 for n in range(1, max_trigger_number + 1)] # 1, 3, 7, 15, ... @@ -155,8 +124,6 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: A boolean which decides if the current configuration should be compared against the incumbent. """ config_isb_keys = self.get_instance_seed_budget_keys(config) - config_id = self.runhistory.get_config_id(config) - config_hash = get_config_hash(config) max_trigger_number = int(np.ceil(np.log2(self._max_config_calls))) trigger_points = [(2**n) - 1 for n in range(2, max_trigger_number + 1)] # 1, 3, 7, 15, ... diff --git a/smac/intensifier/mixins/intermediate_update.py b/smac/intensifier/mixins/intermediate_update.py index 529dcf804..f76c378aa 100644 --- a/smac/intensifier/mixins/intermediate_update.py +++ b/smac/intensifier/mixins/intermediate_update.py @@ -1,39 +1,14 @@ from __future__ import annotations -from abc import abstractmethod -from typing import Any, Callable, Iterator - -import copy -import dataclasses import itertools -import json -from collections import defaultdict -from pathlib import Path import numpy as np from ConfigSpace import Configuration from scipy.stats import binom -import smac -from smac.callback import Callback -from smac.constants import MAXINT -from smac.main.config_selector import ConfigSelector -from smac.runhistory import TrialInfo -from smac.runhistory.dataclasses import ( - InstanceSeedBudgetKey, - InstanceSeedKey, - TrajectoryItem, - TrialValue, -) -from smac.runhistory.runhistory import RunHistory -from smac.scenario import Scenario -from smac.utils.configspace import get_config_hash, print_config_changes +from smac.utils.configspace import get_config_hash from smac.utils.logging import get_logger -from smac.utils.pareto_front import ( - _get_costs, - calculate_pareto_front, - sort_by_crowding_distance, -) +from smac.utils.pareto_front import _get_costs __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" @@ -228,7 +203,6 @@ def _intermediate_comparison(self, config: Configuration) -> bool: ------- A boolean which indicates if we should continue with this configuration. """ - config_hash = get_config_hash(config) incumbents = self.get_incumbents() config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) @@ -267,7 +241,6 @@ def _intermediate_comparison(self, config: Configuration) -> bool: ------- A boolean which indicates if we should continue with this configuration. """ - config_hash = get_config_hash(config) incumbents = self.get_incumbents() config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) @@ -306,7 +279,6 @@ def _intermediate_comparison(self, config: Configuration) -> bool: ------- A boolean which indicates if we should continue with this configuration. """ - config_hash = get_config_hash(config) incumbents = self.get_incumbents() config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) @@ -371,8 +343,6 @@ def _intermediate_comparison(self, config: Configuration) -> bool: ------- A boolean which indicates if we should continue with this configuration. """ - - config_hash = get_config_hash(config) incumbents = self.get_incumbents() config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) @@ -437,8 +407,6 @@ def _intermediate_comparison(self, config: Configuration) -> bool: ------- A boolean which indicates if we should continue with this configuration. """ - - config_hash = get_config_hash(config) incumbents = self.get_incumbents() config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) @@ -524,7 +492,6 @@ def dominates(a, b): b = np.array(b) return 1 if np.count_nonzero(a <= b) >= len(a) and np.count_nonzero(a < b) >= 1 else 0 - config_hash = get_config_hash(config) incumbents = self.get_incumbents() config_isb_keys = self.get_instance_seed_budget_keys(config, compare=True) incumbent_isb_comparison_keys = self.get_incumbent_instance_seed_budget_keys(compare=True) diff --git a/smac/intensifier/mixins/update_incumbent.py b/smac/intensifier/mixins/update_incumbent.py index 0ca61c78c..e1efd9454 100644 --- a/smac/intensifier/mixins/update_incumbent.py +++ b/smac/intensifier/mixins/update_incumbent.py @@ -1,39 +1,11 @@ from __future__ import annotations -from abc import abstractmethod -from typing import Any, Callable, Iterator - -import copy -import dataclasses import itertools -import json -from collections import defaultdict -from pathlib import Path import numpy as np from ConfigSpace import Configuration -from scipy.stats import binom - -import smac -from smac.callback import Callback -from smac.constants import MAXINT -from smac.main.config_selector import ConfigSelector -from smac.runhistory import TrialInfo -from smac.runhistory.dataclasses import ( - InstanceSeedBudgetKey, - InstanceSeedKey, - TrajectoryItem, - TrialValue, -) -from smac.runhistory.runhistory import RunHistory -from smac.scenario import Scenario -from smac.utils.configspace import get_config_hash, print_config_changes + from smac.utils.logging import get_logger -from smac.utils.pareto_front import ( - _get_costs, - calculate_pareto_front, - sort_by_crowding_distance, -) __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" @@ -58,6 +30,8 @@ def _update_incumbent(self, config: Configuration) -> list[Configuration]: Returns ------- + list[Configuration] + New incumbents after update. """ rh = self.runhistory @@ -94,9 +68,9 @@ def _update_incumbent(self, config: Configuration) -> list[Configuration]: Returns ------- + list[Configuration] + New incumbents after update. """ - rh = self.runhistory - incumbents = self.get_incumbents() if config not in incumbents: diff --git a/smac/intensifier/multi_objective_intensifier.py b/smac/intensifier/multi_objective_intensifier.py index 9c0d17f19..33223ef65 100644 --- a/smac/intensifier/multi_objective_intensifier.py +++ b/smac/intensifier/multi_objective_intensifier.py @@ -3,35 +3,13 @@ from __future__ import annotations -from abc import abstractmethod -from typing import Any, Callable, Iterator - -import dataclasses -import json -from collections import defaultdict -from pathlib import Path - -import numpy as np from ConfigSpace import Configuration -import smac -from smac.callback import Callback -from smac.constants import MAXINT -from smac.intensifier.abstract_intensifier import AbstractIntensifier from smac.intensifier.hyperband import Hyperband from smac.intensifier.intensifier import Intensifier from smac.intensifier.successive_halving import SuccessiveHalving -from smac.main.config_selector import ConfigSelector -from smac.runhistory import TrialInfo -from smac.runhistory.dataclasses import ( - InstanceSeedBudgetKey, - InstanceSeedKey, - TrajectoryItem, - TrialValue, -) +from smac.runhistory.dataclasses import InstanceSeedBudgetKey from smac.runhistory.runhistory import RunHistory -from smac.scenario import Scenario -from smac.utils.configspace import get_config_hash, print_config_changes from smac.utils.logging import get_logger from smac.utils.pareto_front import calculate_pareto_front, sort_by_crowding_distance @@ -56,7 +34,8 @@ def _calculate_pareto_front( config_instance_seed_budget_keys=config_instance_seed_budget_keys, ) - # def _remove_incumbent(self, config: Configuration, previous_incumbent_ids: list[int], new_incumbent_ids: list[int]) -> None: + # def _remove_incumbent( + # self, config: Configuration, previous_incumbent_ids: list[int], new_incumbent_ids: list[int]) -> None: # # TODO adjust # raise NotImplementedError diff --git a/smac/intensifier/successive_halving.py b/smac/intensifier/successive_halving.py index 5167947f7..4f00d4605 100644 --- a/smac/intensifier/successive_halving.py +++ b/smac/intensifier/successive_halving.py @@ -10,7 +10,7 @@ from smac.constants import MAXINT from smac.intensifier.abstract_intensifier import AbstractIntensifier -from smac.runhistory import TrialInfo +from smac.runhistory import RunHistory, TrialInfo from smac.runhistory.dataclasses import InstanceSeedBudgetKey from smac.runhistory.errors import NotEvaluatedError from smac.scenario import Scenario @@ -209,7 +209,7 @@ def get_state(self) -> dict[str, Any]: # noqa: D102 for key in list(self._tracker.keys()): for seed, configs in self._tracker[key]: # We have to make key serializable - new_key = f"{key[0]},{key[1]}" + new_key = f"{key[0]},{key[1]}" # noqa: E231 tracker[new_key].append((seed, [dict(config) for config in configs])) return {"tracker": tracker} @@ -260,7 +260,7 @@ def print_tracker(self) -> None: messages.append(f"--- Bracket {bracket} / Stage {stage}: {counter} configs") if len(messages) > 0: - logger.debug(f"{self.__class__.__name__} statistics:") + logger.debug(f"{self.__class__.__name__} statistics:") # noqa: E231 for message in messages: logger.debug(message) @@ -590,7 +590,9 @@ def _calculate_pareto_front( configs: list[Configuration], config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], ) -> list[Configuration]: - """Compares the passed configurations and returns only the ones on the pareto front. Needs to include the budget type + """Compares the passed configurations and returns only the ones on the pareto front. + + Needs to include the budget type. Parameters ---------- @@ -599,14 +601,14 @@ def _calculate_pareto_front( configs : list[Configuration] The configurations from which the Pareto front should be computed. config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] - The instance-seed budget keys for the configurations on the basis of which the Pareto front should be computed. + The instance-seed budget keys for the configurations on the basis of which the Pareto front should be + computed. Returns ------- pareto_front : list[Configuration] The pareto front computed from the given configurations. """ - # Add the budgets to the isb keys according to the set incumbent heuristic for i, (config, isb_keys) in enumerate(zip(configs, config_instance_seed_budget_keys)): existing_isb_keys = [] diff --git a/smac/main/config_selector.py b/smac/main/config_selector.py index c17610c15..f5545f372 100644 --- a/smac/main/config_selector.py +++ b/smac/main/config_selector.py @@ -41,9 +41,11 @@ class ConfigSelector: How many configurations should be returned before the surrogate model is retrained. retrain_wallclock_ratio: float | None, default to None How much time of the total elapsed wallclock time should be spend on retraining the surrogate model - and the acquisition function look. Example ratio of 0.1 would result in that only 10% of the wallclock time is spend on retraining. + and the acquisition function look. Example ratio of 0.1 would result in that only 10% of the wallclock time is + spend on retraining. min_configurations: int, defaults to 2 - The minimum number of configurations that need to yield before retraining can occur. Should be lower or equal to retrain_after. + The minimum number of configurations that need to yield before retraining can occur. Should be lower or equal to + retrain_after. max_new_config_tries : int, defaults to 8 How often to retry receiving a new configuration before giving up. min_trials: int, defaults to 1 @@ -321,8 +323,10 @@ def _check_for_retrain(self) -> bool: # Retrain when more time has been spend if acquisition_training_time / elapsed_time < self._retrain_wallclock_ratio: logger.debug( - f"Less than {self._retrain_wallclock_ratio:.2%} ({acquisition_training_time / elapsed_time:.2f}) " - f"of the elapsed wallclock time ({elapsed_time:.2f}s) has been spend on finding new configurations " + f"Less than {self._retrain_wallclock_ratio:.2%} " # noqa: E231 + f"({acquisition_training_time / elapsed_time:.2f}) " # noqa: E231 + f"of the elapsed wallclock time ({elapsed_time:.2f}s) has " # noqa: E231 + "been spend on finding new configurations " f"with the surrogate model. Start new iteration and retrain surrogate model." ) return True diff --git a/smac/main/smbo.py b/smac/main/smbo.py index 11c4d817b..506a886c6 100644 --- a/smac/main/smbo.py +++ b/smac/main/smbo.py @@ -325,8 +325,8 @@ def optimize(self, *, data_to_scatter: dict[str, Any] | None = None) -> Configur # Some statistics logger.debug( - f"Remaining wallclock time: {self.remaining_walltime}; " - f"Remaining cpu time: {self.remaining_cputime}; " + f"Remaining wallclock time: {self.remaining_walltime}; " # noqa: E702 + f"Remaining cpu time: {self.remaining_cputime}; " # noqa: E702 f"Remaining trials: {self.remaining_trials}" ) @@ -538,7 +538,7 @@ def _initialize_state(self) -> None: ) logger.info( f"Found old run in `{self._scenario.output_directory}` but it is not the same as the current " - f"one:\n{diff}" + f"one:\n{diff}" # noqa: E231 ) feedback = input( diff --git a/smac/multi_objective/aggregation_strategy.py b/smac/multi_objective/aggregation_strategy.py index 9ca58ecab..1958b2937 100644 --- a/smac/multi_objective/aggregation_strategy.py +++ b/smac/multi_objective/aggregation_strategy.py @@ -45,9 +45,7 @@ def __call__(self, values: list[float]) -> float: # noqa: D102 class NoAggregationStrategy(AbstractMultiObjectiveAlgorithm): - """ - A class to not aggregate multi-objective losses into a single objective losses. - """ + """A class to not aggregate multi-objective losses into a single objective losses.""" def __call__(self, values: list[float]) -> list[float]: """ diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py index 8b35d21d6..944597c6c 100644 --- a/smac/runhistory/runhistory.py +++ b/smac/runhistory/runhistory.py @@ -163,6 +163,7 @@ def __eq__(self, other: Any) -> bool: @property def incumbents(self) -> list[Configuration]: + """Return the incumbents (points on the Pareto front) of the runhistory.""" return self._incumbents @incumbents.setter diff --git a/smac/runner/aclib_runner.py b/smac/runner/aclib_runner.py index 718f23b0a..74c590af6 100644 --- a/smac/runner/aclib_runner.py +++ b/smac/runner/aclib_runner.py @@ -3,18 +3,11 @@ __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" -from abc import ABC, abstractmethod -from typing import Any, Iterator +from typing import Any import re -import time -import traceback from subprocess import PIPE, Popen -import numpy as np -from ConfigSpace import Configuration - -from smac.runhistory import StatusType, TrialInfo, TrialValue from smac.runner.target_function_script_runner import TargetFunctionScriptRunner from smac.scenario import Scenario from smac.utils.logging import get_logger @@ -36,6 +29,18 @@ def __init__( super().__init__(target_function, scenario, required_arguments) def __call__(self, algorithm_kwargs: dict[str, Any]) -> tuple[str, str]: + """Calls the target function with the given arguments. + + Parameters + ---------- + algorithm_kwargs: dict[str, Any] + The arguments to pass to the target function. + + Returns + ------- + tuple[str, str] + The output and error messages from the target function. + """ # kwargs has "instance", "seed" and "budget" --> translate those cmd = self._target_function.split(" ") diff --git a/smac/runner/target_function_script_runner.py b/smac/runner/target_function_script_runner.py index 8da0dae7b..90ca72117 100644 --- a/smac/runner/target_function_script_runner.py +++ b/smac/runner/target_function_script_runner.py @@ -194,7 +194,7 @@ def run( if "additional_info" in outputs: additional_info["additional_info"] = outputs["additional_info"] - if not status in [StatusType.SUCCESS, StatusType.TIMEOUT]: + if status not in [StatusType.SUCCESS, StatusType.TIMEOUT]: additional_info["error"] = error if cost != self._crash_cost: diff --git a/smac/utils/data_structures.py b/smac/utils/data_structures.py index 6468f43ab..c9e728cff 100644 --- a/smac/utils/data_structures.py +++ b/smac/utils/data_structures.py @@ -51,7 +51,7 @@ def recursively_compare_dicts( elif isinstance(d1, list) and isinstance(d2, list): if len(d1) != len(d2): - diff += [f"{level}: len1={len(d1)}; len2={len(d2)}"] + diff += [f"{level}: len1={len(d1)}; len2={len(d2)}"] # noqa: E702 # logger.info("{:<20} len1={}; len2={}".format(level, len(d1), len(d2))) # logger.info("len1={}; len2={}".format(len(d1), len(d2))) common_len = min(len(d1), len(d2)) diff --git a/smac/utils/pareto_front.py b/smac/utils/pareto_front.py deleted file mode 100644 index 441ac7b99..000000000 --- a/smac/utils/pareto_front.py +++ /dev/null @@ -1,221 +0,0 @@ -from __future__ import annotations - -import numpy as np -from ConfigSpace import Configuration - -from smac.runhistory import RunHistory -from smac.runhistory.dataclasses import InstanceSeedBudgetKey - - -def _get_costs( - runhistory: RunHistory, - configs: list[Configuration], - config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], - normalize: bool = False, -) -> np.ndarray: - """Returns the costs of the passed configurations. - - Parameters - ---------- - runhistory : RunHistory - The runhistory containing the passed configs. - configs : list[Configuration] - The configs for which the costs should be returned. - config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] - The instance-seed budget keys for the configs for which the costs should be returned. - normalize: bool - If the costs should be normalized - - Returns - ------- - costs : np.ndarray[n_points, n_objectives] - Costs of the given configs. - """ - assert len(configs) == len(config_instance_seed_budget_keys) - - # Now we get the costs for the trials of the config - average_costs = [] - - for config, isb_keys in zip(configs, config_instance_seed_budget_keys): - # Since we use multiple seeds, we have to average them to get only one cost value pair for each - # configuration - # However, we only want to consider the config trials - # Average cost is a list of floats (one for each objective) - average_cost = runhistory.average_cost( - config, isb_keys, normalize=normalize, run_multi_objective_algorithm=normalize - ) - average_costs += [average_cost] - - # Let's work with a numpy array for efficiency - return np.vstack(average_costs) - - -def calculate_pareto_front( - runhistory: RunHistory, - configs: list[Configuration], - config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], -) -> list[Configuration]: - """Calculate pareto front based on non-dominance - - Parameters - ---------- - runhistory : RunHistory - The runhistory containing the given configurations. - configs : list[Configuration] - The configurations from which the Pareto front should be computed. - config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] - The instance-seed budget keys for the configurations on the basis of which the Pareto front should be computed. - - Returns - ------- - pareto_front : list[Configuration] - The pareto front computed from the given configurations. - """ - costs = _get_costs(runhistory, configs, config_instance_seed_budget_keys) - - # The following code is an efficient pareto front implementation - is_efficient = np.arange(costs.shape[0]) - next_point_index = 0 # Next index in the is_efficient array to search for - while next_point_index < len(costs): - nondominated_point_mask = np.any(costs < costs[next_point_index], axis=1) - nondominated_point_mask[next_point_index] = True - is_efficient = is_efficient[nondominated_point_mask] # Remove dominated points - costs = costs[nondominated_point_mask] - next_point_index = np.sum(nondominated_point_mask[:next_point_index]) + 1 - - new_incumbents = [configs[i] for i in is_efficient] - return new_incumbents - - -def sort_by_crowding_distance( - runhistory: RunHistory, - configs: list[Configuration], - config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], -) -> list[Configuration]: - """Sorts the passed configurations by their crowding distance. Taken from - https://github.com/anyoptimization/pymoo/blob/20abef1ade71915352217400c11ece4c2f35163e/pymoo/algorithms/nsga2.py - - - Parameters - ---------- - runhistory : RunHistory - The runhistory containing the given configurations. - configs : list[Configuration] - The configurations which should be sorted. - config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] - The instance-seed budget keys for the configurations which should be sorted. - - Returns - ------- - sorted_list : list[Configuration] - Configurations sorted by crowding distance. - """ - F = _get_costs(runhistory, configs, config_instance_seed_budget_keys, normalize=True) - infinity = 1e14 - - n_points = F.shape[0] - n_obj = F.shape[1] - - if n_points <= 2: - # distances = np.full(n_points, infinity) - return configs - else: - # Sort each column and get index - I = np.argsort(F, axis=0, kind="mergesort") # noqa - - # Now really sort the whole array - F = F[I, np.arange(n_obj)] - - # get the distance to the last element in sorted list and replace zeros with actual values - dist = np.concatenate([F, np.full((1, n_obj), np.inf)]) - np.concatenate([np.full((1, n_obj), -np.inf), F]) - - index_dist_is_zero = np.where(dist == 0) - - dist_to_last = np.copy(dist) - for i, j in zip(*index_dist_is_zero): - dist_to_last[i, j] = dist_to_last[i - 1, j] - - dist_to_next = np.copy(dist) - for i, j in reversed(list(zip(*index_dist_is_zero))): - dist_to_next[i, j] = dist_to_next[i + 1, j] - - # Normalize all the distances - norm = np.max(F, axis=0) - np.min(F, axis=0) - norm[norm == 0] = np.nan - dist_to_last, dist_to_next = dist_to_last[:-1] / norm, dist_to_next[1:] / norm - - # If we divided by zero because all values in one columns are equal replace by none - dist_to_last[np.isnan(dist_to_last)] = 0.0 - dist_to_next[np.isnan(dist_to_next)] = 0.0 - - # Sum up the distance to next and last and norm by objectives - also reorder from sorted list - J = np.argsort(I, axis=0) - crowding = np.sum(dist_to_last[J, np.arange(n_obj)] + dist_to_next[J, np.arange(n_obj)], axis=1) / n_obj - - # Replace infinity with a large number - crowding[np.isinf(crowding)] = infinity - config_with_crowding = [(config, v) for config, v in zip(configs, crowding)] - config_with_crowding = sorted(config_with_crowding, key=lambda x: x[1], reverse=True) - - return [c for c, _ in config_with_crowding] - - -def sort_by_hypervolume_contribution( - runhistory: RunHistory, - configs: list[Configuration], - config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], -) -> list[Configuration]: - """Sorts the passed configurations by their hypervolume contribution. - - Parameters - ---------- - runhistory : RunHistory - The runhistory containing the given configurations. - configs : list[Configuration] - The configurations which should be sorted. - config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] - The instance-seed budget keys for the configurations which should be sorted. - - Returns - ------- - sorted_list : list[Configuration] - Configurations sorted by hypervolume contribution. - """ - - # Get the average costs per configuration - - # Normalize the costs per objective - - # Compute a reference point (with the local points or all observed history) - - # Apply reduce procedure - - # Sort based on HV contribution - - raise NotImplementedError - - -def calculate_hypervolume( - runhistory: RunHistory, - configs: list[Configuration], - config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], - reference_point: list[float] | None = None, -) -> float: - if reference_point is None: - reference_point = calculate_reference_point(runhistory) - - raise NotImplementedError - - -def calculate_reference_point( - runhistory: RunHistory, - configs: list[Configuration] | None = None, - config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] | None = None, -) -> list[float]: - if configs is None: - # Compute over the complete runhistory - costs = [trail.cost for trial in runhistory.values()] - return np.max(np.array(costs), axis=1) - else: - assert len(configs) == len(config_instance_seed_budget_keys) - raise NotImplementedError From 2fab658adf5dd86a74718e3c1a7a15ff72115ee0 Mon Sep 17 00:00:00 2001 From: benjamc Date: Tue, 7 Oct 2025 21:17:51 +0200 Subject: [PATCH 64/74] readd paretofront --- smac/utils/pareto_front.py | 221 +++++++++++++++++++++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 smac/utils/pareto_front.py diff --git a/smac/utils/pareto_front.py b/smac/utils/pareto_front.py new file mode 100644 index 000000000..441ac7b99 --- /dev/null +++ b/smac/utils/pareto_front.py @@ -0,0 +1,221 @@ +from __future__ import annotations + +import numpy as np +from ConfigSpace import Configuration + +from smac.runhistory import RunHistory +from smac.runhistory.dataclasses import InstanceSeedBudgetKey + + +def _get_costs( + runhistory: RunHistory, + configs: list[Configuration], + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], + normalize: bool = False, +) -> np.ndarray: + """Returns the costs of the passed configurations. + + Parameters + ---------- + runhistory : RunHistory + The runhistory containing the passed configs. + configs : list[Configuration] + The configs for which the costs should be returned. + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] + The instance-seed budget keys for the configs for which the costs should be returned. + normalize: bool + If the costs should be normalized + + Returns + ------- + costs : np.ndarray[n_points, n_objectives] + Costs of the given configs. + """ + assert len(configs) == len(config_instance_seed_budget_keys) + + # Now we get the costs for the trials of the config + average_costs = [] + + for config, isb_keys in zip(configs, config_instance_seed_budget_keys): + # Since we use multiple seeds, we have to average them to get only one cost value pair for each + # configuration + # However, we only want to consider the config trials + # Average cost is a list of floats (one for each objective) + average_cost = runhistory.average_cost( + config, isb_keys, normalize=normalize, run_multi_objective_algorithm=normalize + ) + average_costs += [average_cost] + + # Let's work with a numpy array for efficiency + return np.vstack(average_costs) + + +def calculate_pareto_front( + runhistory: RunHistory, + configs: list[Configuration], + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], +) -> list[Configuration]: + """Calculate pareto front based on non-dominance + + Parameters + ---------- + runhistory : RunHistory + The runhistory containing the given configurations. + configs : list[Configuration] + The configurations from which the Pareto front should be computed. + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] + The instance-seed budget keys for the configurations on the basis of which the Pareto front should be computed. + + Returns + ------- + pareto_front : list[Configuration] + The pareto front computed from the given configurations. + """ + costs = _get_costs(runhistory, configs, config_instance_seed_budget_keys) + + # The following code is an efficient pareto front implementation + is_efficient = np.arange(costs.shape[0]) + next_point_index = 0 # Next index in the is_efficient array to search for + while next_point_index < len(costs): + nondominated_point_mask = np.any(costs < costs[next_point_index], axis=1) + nondominated_point_mask[next_point_index] = True + is_efficient = is_efficient[nondominated_point_mask] # Remove dominated points + costs = costs[nondominated_point_mask] + next_point_index = np.sum(nondominated_point_mask[:next_point_index]) + 1 + + new_incumbents = [configs[i] for i in is_efficient] + return new_incumbents + + +def sort_by_crowding_distance( + runhistory: RunHistory, + configs: list[Configuration], + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], +) -> list[Configuration]: + """Sorts the passed configurations by their crowding distance. Taken from + https://github.com/anyoptimization/pymoo/blob/20abef1ade71915352217400c11ece4c2f35163e/pymoo/algorithms/nsga2.py + + + Parameters + ---------- + runhistory : RunHistory + The runhistory containing the given configurations. + configs : list[Configuration] + The configurations which should be sorted. + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] + The instance-seed budget keys for the configurations which should be sorted. + + Returns + ------- + sorted_list : list[Configuration] + Configurations sorted by crowding distance. + """ + F = _get_costs(runhistory, configs, config_instance_seed_budget_keys, normalize=True) + infinity = 1e14 + + n_points = F.shape[0] + n_obj = F.shape[1] + + if n_points <= 2: + # distances = np.full(n_points, infinity) + return configs + else: + # Sort each column and get index + I = np.argsort(F, axis=0, kind="mergesort") # noqa + + # Now really sort the whole array + F = F[I, np.arange(n_obj)] + + # get the distance to the last element in sorted list and replace zeros with actual values + dist = np.concatenate([F, np.full((1, n_obj), np.inf)]) - np.concatenate([np.full((1, n_obj), -np.inf), F]) + + index_dist_is_zero = np.where(dist == 0) + + dist_to_last = np.copy(dist) + for i, j in zip(*index_dist_is_zero): + dist_to_last[i, j] = dist_to_last[i - 1, j] + + dist_to_next = np.copy(dist) + for i, j in reversed(list(zip(*index_dist_is_zero))): + dist_to_next[i, j] = dist_to_next[i + 1, j] + + # Normalize all the distances + norm = np.max(F, axis=0) - np.min(F, axis=0) + norm[norm == 0] = np.nan + dist_to_last, dist_to_next = dist_to_last[:-1] / norm, dist_to_next[1:] / norm + + # If we divided by zero because all values in one columns are equal replace by none + dist_to_last[np.isnan(dist_to_last)] = 0.0 + dist_to_next[np.isnan(dist_to_next)] = 0.0 + + # Sum up the distance to next and last and norm by objectives - also reorder from sorted list + J = np.argsort(I, axis=0) + crowding = np.sum(dist_to_last[J, np.arange(n_obj)] + dist_to_next[J, np.arange(n_obj)], axis=1) / n_obj + + # Replace infinity with a large number + crowding[np.isinf(crowding)] = infinity + config_with_crowding = [(config, v) for config, v in zip(configs, crowding)] + config_with_crowding = sorted(config_with_crowding, key=lambda x: x[1], reverse=True) + + return [c for c, _ in config_with_crowding] + + +def sort_by_hypervolume_contribution( + runhistory: RunHistory, + configs: list[Configuration], + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], +) -> list[Configuration]: + """Sorts the passed configurations by their hypervolume contribution. + + Parameters + ---------- + runhistory : RunHistory + The runhistory containing the given configurations. + configs : list[Configuration] + The configurations which should be sorted. + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] + The instance-seed budget keys for the configurations which should be sorted. + + Returns + ------- + sorted_list : list[Configuration] + Configurations sorted by hypervolume contribution. + """ + + # Get the average costs per configuration + + # Normalize the costs per objective + + # Compute a reference point (with the local points or all observed history) + + # Apply reduce procedure + + # Sort based on HV contribution + + raise NotImplementedError + + +def calculate_hypervolume( + runhistory: RunHistory, + configs: list[Configuration], + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], + reference_point: list[float] | None = None, +) -> float: + if reference_point is None: + reference_point = calculate_reference_point(runhistory) + + raise NotImplementedError + + +def calculate_reference_point( + runhistory: RunHistory, + configs: list[Configuration] | None = None, + config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] | None = None, +) -> list[float]: + if configs is None: + # Compute over the complete runhistory + costs = [trail.cost for trial in runhistory.values()] + return np.max(np.array(costs), axis=1) + else: + assert len(configs) == len(config_instance_seed_budget_keys) + raise NotImplementedError From 31eda8cfb9120be06dd82fbd7688ab2763329654 Mon Sep 17 00:00:00 2001 From: benjamc Date: Tue, 7 Oct 2025 21:19:29 +0200 Subject: [PATCH 65/74] refactor(pareto_front.py): delete illegal functions --- smac/utils/pareto_front.py | 61 -------------------------------------- 1 file changed, 61 deletions(-) diff --git a/smac/utils/pareto_front.py b/smac/utils/pareto_front.py index 441ac7b99..d716fad1c 100644 --- a/smac/utils/pareto_front.py +++ b/smac/utils/pareto_front.py @@ -158,64 +158,3 @@ def sort_by_crowding_distance( config_with_crowding = sorted(config_with_crowding, key=lambda x: x[1], reverse=True) return [c for c, _ in config_with_crowding] - - -def sort_by_hypervolume_contribution( - runhistory: RunHistory, - configs: list[Configuration], - config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], -) -> list[Configuration]: - """Sorts the passed configurations by their hypervolume contribution. - - Parameters - ---------- - runhistory : RunHistory - The runhistory containing the given configurations. - configs : list[Configuration] - The configurations which should be sorted. - config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] - The instance-seed budget keys for the configurations which should be sorted. - - Returns - ------- - sorted_list : list[Configuration] - Configurations sorted by hypervolume contribution. - """ - - # Get the average costs per configuration - - # Normalize the costs per objective - - # Compute a reference point (with the local points or all observed history) - - # Apply reduce procedure - - # Sort based on HV contribution - - raise NotImplementedError - - -def calculate_hypervolume( - runhistory: RunHistory, - configs: list[Configuration], - config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], - reference_point: list[float] | None = None, -) -> float: - if reference_point is None: - reference_point = calculate_reference_point(runhistory) - - raise NotImplementedError - - -def calculate_reference_point( - runhistory: RunHistory, - configs: list[Configuration] | None = None, - config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] | None = None, -) -> list[float]: - if configs is None: - # Compute over the complete runhistory - costs = [trail.cost for trial in runhistory.values()] - return np.max(np.array(costs), axis=1) - else: - assert len(configs) == len(config_instance_seed_budget_keys) - raise NotImplementedError From cf824ae9da62d038babf70da86b9a22bc1577d17 Mon Sep 17 00:00:00 2001 From: benjamc Date: Tue, 7 Oct 2025 21:30:58 +0200 Subject: [PATCH 66/74] fix some mypy --- smac/intensifier/abstract_intensifier.py | 2 +- smac/intensifier/mixins/intermediate_decision.py | 7 ++++--- smac/intensifier/mixins/intermediate_update.py | 7 ++++--- smac/main/config_selector.py | 1 + 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index e2edd3177..32687d14f 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -64,7 +64,7 @@ def __init__( self._scenario = scenario self._config_selector: ConfigSelector | None = None self._config_generator: Iterator[ConfigSelector] | None = None - self._runhistory: RunHistory | None = RunHistory + self._runhistory: RunHistory | None = None if seed is None: seed = self._scenario.seed diff --git a/smac/intensifier/mixins/intermediate_decision.py b/smac/intensifier/mixins/intermediate_decision.py index 39b58b7a8..71a511d74 100644 --- a/smac/intensifier/mixins/intermediate_decision.py +++ b/smac/intensifier/mixins/intermediate_decision.py @@ -3,6 +3,7 @@ import numpy as np from ConfigSpace import Configuration +from smac.intensifier.abstract_intensifier import AbstractIntensifier from smac.utils.logging import get_logger __copyright__ = "Copyright 2022, automl.org" @@ -11,14 +12,14 @@ logger = get_logger(__name__) -def _dominates(a, b) -> bool: +def _dominates(a: np.ndarray, b: np.ndarray) -> bool: # Checks if a dominates b a = np.atleast_1d(a) b = np.atleast_1d(b) return np.count_nonzero(a <= b) >= len(a) and np.count_nonzero(a < b) >= 1 -class NewCostDominatesOldCost: +class NewCostDominatesOldCost(AbstractIntensifier): def _check_for_intermediate_comparison(self, config: Configuration) -> bool: """ @@ -33,7 +34,7 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: config_isb_keys = self.get_instance_seed_budget_keys(config) if not hasattr(self, "_old_config_cost"): - self._old_config_cost = {} # TODO remove configuration when done + self._old_config_cost: dict[Configuration, np.ndarray] = {} # TODO remove configuration when done new_cost = self.runhistory.average_cost(config, config_isb_keys) if config not in self._old_config_cost: diff --git a/smac/intensifier/mixins/intermediate_update.py b/smac/intensifier/mixins/intermediate_update.py index f76c378aa..f56b03e55 100644 --- a/smac/intensifier/mixins/intermediate_update.py +++ b/smac/intensifier/mixins/intermediate_update.py @@ -5,7 +5,9 @@ import numpy as np from ConfigSpace import Configuration from scipy.stats import binom +from typing import Any +from smac.intensifier.abstract_intensifier import AbstractIntensifier from smac.utils.configspace import get_config_hash from smac.utils.logging import get_logger from smac.utils.pareto_front import _get_costs @@ -15,9 +17,8 @@ logger = get_logger(__name__) - -class DebugComparison(object): - def _register_comparison(self, **kwargs): +class DebugComparison(AbstractIntensifier): + def _register_comparison(self, **kwargs: Any) -> None: logger.debug(f"Made intermediate comparison with {kwargs['name']} comparison ") if not hasattr(self, "_intermediate_comparisons_log"): self._intermediate_comparisons_log = [] diff --git a/smac/main/config_selector.py b/smac/main/config_selector.py index f5545f372..7d1f039a3 100644 --- a/smac/main/config_selector.py +++ b/smac/main/config_selector.py @@ -252,6 +252,7 @@ def __iter__(self) -> Iterator[Configuration]: ) if self._retrain_wallclock_ratio is not None: + # TODO: CB: What does this actually do? Delete/clear the iterator? len(challengers) # TODO hacky: Forces actual computation of the acquisition function maximizer self._acquisition_training_times.append(time.time() - start_time) From 0b7e9471326351a27e38b6d4d49436dfc76a77a8 Mon Sep 17 00:00:00 2001 From: benjamc Date: Wed, 8 Oct 2025 09:20:15 +0200 Subject: [PATCH 67/74] style: mypy --- smac/acquisition/function/expected_hypervolume.py | 5 ++++- smac/facade/multi_objective_facade.py | 8 ++++---- smac/intensifier/mixins/intermediate_update.py | 4 +++- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/smac/acquisition/function/expected_hypervolume.py b/smac/acquisition/function/expected_hypervolume.py index b6c668ef2..4ebe05985 100644 --- a/smac/acquisition/function/expected_hypervolume.py +++ b/smac/acquisition/function/expected_hypervolume.py @@ -266,6 +266,7 @@ def name(self) -> str: def _update(self, **kwargs: Any) -> None: super(PHVI, self)._update(**kwargs) + assert self.model is not None, "Did you update the AF with the model?" incumbents: list[Configuration] = kwargs.get("incumbents", None) # Update PHVI @@ -282,7 +283,7 @@ def _update(self, **kwargs: Any) -> None: logger.info(f"New population HV: {population_hv}") - def get_hypervolume(self, points: np.ndarray = None, reference_point: list = None) -> float: + def get_hypervolume(self, points: np.ndarray = None, reference_point: list | None = None) -> float: """ Compute the hypervolume @@ -316,6 +317,8 @@ def _compute(self, X: np.ndarray) -> np.ndarray: np.ndarray(N,1) Expected HV Improvement of X """ + assert self.model is not None, "Did you update the AF with the model?" + if len(X.shape) == 1: X = X[:, np.newaxis] diff --git a/smac/facade/multi_objective_facade.py b/smac/facade/multi_objective_facade.py index 287f578f7..91262ca54 100644 --- a/smac/facade/multi_objective_facade.py +++ b/smac/facade/multi_objective_facade.py @@ -2,7 +2,7 @@ from ConfigSpace import Configuration -from smac.acquisition.function.expected_hypervolume import EHVI, PHVI +from smac.acquisition.function.expected_hypervolume import PHVI, AbstractHVI from smac.acquisition.maximizer.multi_objective_search import ( MOLocalAndSortedRandomSearch, ) @@ -38,7 +38,7 @@ def get_model( # type: ignore max_depth: int = 20, bootstrapping: bool = True, pca_components: int = 4, - ) -> RandomForest: + ) -> MultiObjectiveModel: """Returns a random forest as surrogate model. Parameters @@ -76,7 +76,7 @@ def get_model( # type: ignore ) ) - return MultiObjectiveModel(models=models, objectives=scenario.objectives) + return MultiObjectiveModel(models=models, objectives=scenario.objectives) # type: ignore[arg-type] @staticmethod def get_intensifier( # type: ignore @@ -114,7 +114,7 @@ def get_acquisition_function( # type: ignore scenario: Scenario, *, xi: float = 0.0, - ) -> EHVI: + ) -> AbstractHVI: """Returns an Expected Improvement acquisition function. Parameters diff --git a/smac/intensifier/mixins/intermediate_update.py b/smac/intensifier/mixins/intermediate_update.py index f56b03e55..808b2e8e8 100644 --- a/smac/intensifier/mixins/intermediate_update.py +++ b/smac/intensifier/mixins/intermediate_update.py @@ -1,11 +1,12 @@ from __future__ import annotations +from typing import Any + import itertools import numpy as np from ConfigSpace import Configuration from scipy.stats import binom -from typing import Any from smac.intensifier.abstract_intensifier import AbstractIntensifier from smac.utils.configspace import get_config_hash @@ -17,6 +18,7 @@ logger = get_logger(__name__) + class DebugComparison(AbstractIntensifier): def _register_comparison(self, **kwargs: Any) -> None: logger.debug(f"Made intermediate comparison with {kwargs['name']} comparison ") From a159eb49a2e858902646d368a310bc14da247ab1 Mon Sep 17 00:00:00 2001 From: benjamc Date: Wed, 8 Oct 2025 09:31:21 +0200 Subject: [PATCH 68/74] refactor(expected_hypervolume): rm duplicate function --- .../function/expected_hypervolume.py | 26 ++----------------- 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/smac/acquisition/function/expected_hypervolume.py b/smac/acquisition/function/expected_hypervolume.py index 4ebe05985..b26374a39 100644 --- a/smac/acquisition/function/expected_hypervolume.py +++ b/smac/acquisition/function/expected_hypervolume.py @@ -117,7 +117,7 @@ def _update(self, **kwargs: Any) -> None: self._objective_bounds = self.runhistory_encoder.transform_response_values(objective_bounds) self._reference_point = [1.1] * len(self._objective_bounds) - def get_hypervolume(self, points: np.ndarray = None, reference_point: list = None) -> float: + def get_hypervolume(self, points: np.ndarray) -> float: """ Compute the hypervolume @@ -133,10 +133,7 @@ def get_hypervolume(self, points: np.ndarray = None, reference_point: list = Non """ # Normalize the objectives here to give equal attention to the objectives when computing the HV points = [normalize_costs(p, self._objective_bounds) for p in points] - hv = pygmo.hypervolume(points) - # if reference_point is None: - # self._reference_point = hv.refpoint(offset=1) return hv.compute(self._reference_point) def _compute(self, X: np.ndarray) -> np.ndarray: @@ -283,25 +280,6 @@ def _update(self, **kwargs: Any) -> None: logger.info(f"New population HV: {population_hv}") - def get_hypervolume(self, points: np.ndarray = None, reference_point: list | None = None) -> float: - """ - Compute the hypervolume - - Parameters - ---------- - points : np.ndarray - A 2d numpy array. 1st dimension is an entity and the 2nd dimension are the costs - reference_point : list - - Return - ------ - hypervolume: float - """ - # Normalize the objectives here to give equal attention to the objectives when computing the HV - points = [normalize_costs(p, self._objective_bounds) for p in points] - hv = pygmo.hypervolume(points) - return hv.compute(self._reference_point) - def _compute(self, X: np.ndarray) -> np.ndarray: """Computes the PHVI values and its derivatives. @@ -315,7 +293,7 @@ def _compute(self, X: np.ndarray) -> np.ndarray: Returns ------- np.ndarray(N,1) - Expected HV Improvement of X + Predicted HV Improvement of X """ assert self.model is not None, "Did you update the AF with the model?" From 51418df47a82ab1ec6198d3b99cc9b4db5a1ef97 Mon Sep 17 00:00:00 2001 From: benjamc Date: Wed, 8 Oct 2025 09:33:30 +0200 Subject: [PATCH 69/74] refactor(expected_hypervolume): delete proxy method which was a comment anyway --- .../function/expected_hypervolume.py | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/smac/acquisition/function/expected_hypervolume.py b/smac/acquisition/function/expected_hypervolume.py index b26374a39..8b3d9424d 100644 --- a/smac/acquisition/function/expected_hypervolume.py +++ b/smac/acquisition/function/expected_hypervolume.py @@ -26,46 +26,6 @@ logger = get_logger(__name__) -# class _PosteriorProxy(object): -# def __init__(self) -> None: -# self.mean: Tensor = [] -# self.variance: Tensor = [] - -# class _ModelProxy(Model, ABC): -# def __init__(self, model: AbstractModel, objective_bounds: list[tuple[float, float]]): -# super(_ModelProxy).__init__() -# self.model = model -# self._objective_bounds = objective_bounds -# -# def posterior(self, X: Tensor, **kwargs: Any) -> _PosteriorProxy: -# """Docstring -# X: A `b x q x d`-dim Tensor, where `d` is the dimension of the -# feature space, `q` is the number of points considered jointly, -# and `b` is the batch dimension. -# -# -# A `Posterior` object, representing a batch of `b` joint distributions -# over `q` points and `m` outputs each. -# """ -# assert X.shape[1] == 1 -# X = X.reshape([X.shape[0], -1]).numpy() # 3D -> 2D -# -# # predict -# # start_time = time.time() -# # print(f"Start predicting ") -# mean, var_ = self.model.predict_marginalized(X) -# normalized_mean = np.array([normalize_costs(m, self._objective_bounds) for m in mean]) -# scale = normalized_mean / mean -# var_ *= scale # Scale variance accordingly -# mean = normalized_mean -# # print(f"Done in {time.time() - start_time}s") -# post = _PosteriorProxy() -# post.mean = torch.asarray(mean).reshape(X.shape[0], 1, -1) # 2D -> 3D -# post.variance = torch.asarray(var_).reshape(X.shape[0], 1, -1) # 2D -> 3D -# -# return post - - class AbstractHVI(AbstractAcquisitionFunction): def __init__(self): """Computes for a given x the predicted hypervolume improvement as From e0e59a98b0ac269dfffd36303364209443353e5b Mon Sep 17 00:00:00 2001 From: benjamc Date: Wed, 8 Oct 2025 09:34:51 +0200 Subject: [PATCH 70/74] refactor(expected_hypervolume): delete ehvi method which was a comment anyway --- .../function/expected_hypervolume.py | 88 +------------------ 1 file changed, 1 insertion(+), 87 deletions(-) diff --git a/smac/acquisition/function/expected_hypervolume.py b/smac/acquisition/function/expected_hypervolume.py index 8b3d9424d..79bd4b544 100644 --- a/smac/acquisition/function/expected_hypervolume.py +++ b/smac/acquisition/function/expected_hypervolume.py @@ -14,18 +14,12 @@ from smac.utils.logging import get_logger from smac.utils.multi_objective import normalize_costs -# import torch -# from botorch.acquisition.multi_objective import ExpectedHypervolumeImprovement -# from botorch.models.model import Model -# from botorch.utils.multi_objective.box_decompositions.non_dominated import ( -# NondominatedPartitioning, -# ) - __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" logger = get_logger(__name__) + class AbstractHVI(AbstractAcquisitionFunction): def __init__(self): """Computes for a given x the predicted hypervolume improvement as @@ -127,89 +121,9 @@ def _compute(self, X: np.ndarray) -> np.ndarray: hv = self.get_hypervolume(points) phvi[i] = hv - self.population_hv - # if len(X) == 10000: - # for op in ["max", "min", "mean", "median"]: - # val = getattr(np, op)(phvi) - # print(f"{op:6} - {val}") - # time.sleep(1.5) - return phvi.reshape(-1, 1) -# class EHVI(AbstractHVI): -# def __init__(self): -# super(EHVI, self).__init__() -# self._ehvi: ExpectedHypervolumeImprovement | None = None -# -# @property -# def name(self) -> str: -# return "Expected Hypervolume Improvement" -# -# def _update(self, **kwargs: Any) -> None: -# super(EHVI, self)._update(**kwargs) -# incumbents: list[Configuration] = kwargs.get("incumbents", None) -# -# # Update EHVI -# # Prediction all -# population_configs = incumbents -# population_X = np.array([config.get_array() for config in population_configs]) -# population_costs, _ = self.model.predict_marginalized(population_X) -# # Normalize the objectives here to give equal attention to the objectives when computing the HV -# population_costs = [normalize_costs(p, self._objective_bounds) for p in population_costs] -# -# # BOtorch EHVI implementation -# bomodel = _ModelProxy(self.model, self._objective_bounds) -# # ref_point = pygmo.hypervolume(population_costs).refpoint( -# # offset=1 -# # ) # TODO get proper reference points from user/cutoffs -# ref_point = [1.1] * len(self._objective_bounds) -# # ref_point = torch.asarray(ref_point) -# # TODO partition from all runs instead of only population? -# # TODO NondominatedPartitioning and ExpectedHypervolumeImprovement seem no too difficult to implement natively -# # TODO pass along RNG -# # Transfrom the objective space to cells based on the population -# partitioning = NondominatedPartitioning(torch.asarray(ref_point), torch.asarray(population_costs)) -# self._ehvi = ExpectedHypervolumeImprovement(bomodel, ref_point, partitioning) -# -# def _compute(self, X: np.ndarray) -> np.ndarray: -# """Computes the EHVI values and its derivatives. -# -# Parameters -# ---------- -# X: np.ndarray(N, D), The input points where the acquisition function -# should be evaluated. The dimensionality of X is (N, D), with N as -# the number of points to evaluate at and D is the number of -# dimensions of one X. -# -# Returns -# ------- -# np.ndarray(N,1) -# Expected HV Improvement of X -# """ -# if self._ehvi is None: -# raise ValueError(f"The expected hypervolume improvement is not defined yet. Call self.update.") -# -# if len(X.shape) == 1: -# X = X[:, np.newaxis] -# -# # m, var_ = self.model.predict_marginalized_over_instances(X) -# # Find a way to propagate the variance into the HV -# boX = torch.asarray(X).reshape(X.shape[0], 1, -1) # 2D -> #3D -# improvements = self._ehvi(boX).numpy().reshape(-1, 1) # TODO here are the expected hv improvements computed. -# return improvements -# -# # TODO non-dominated sorting of costs. Compute EHVI only until the EHVI is not expected to improve anymore. -# # Option 1: Supplement missing instances of population with acq. function to get predicted performance over -# # all instances. Idea is this prevents optimizing for the initial instances which get it stuck in local optima -# # Option 2: Only on instances of population -# # Option 3: EVHI per instance and aggregate afterwards -# # ehvi = np.zeros(len(X)) -# # for i, indiv in enumerate(m): -# # ehvi[i] = self.get_hypervolume(population_costs + [indiv]) - population_hv -# # -# # return ehvi.reshape(-1, 1) - - class PHVI(AbstractHVI): def __init__(self): super(PHVI, self).__init__() From 538f4dfc6406cfdba5c78ff83906eac27d4b2e31 Mon Sep 17 00:00:00 2001 From: benjamc Date: Wed, 8 Oct 2025 09:45:27 +0200 Subject: [PATCH 71/74] rename hypervolume.py --- .../function/{expected_hypervolume.py => hypervolume.py} | 0 smac/facade/multi_objective_facade.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename smac/acquisition/function/{expected_hypervolume.py => hypervolume.py} (100%) diff --git a/smac/acquisition/function/expected_hypervolume.py b/smac/acquisition/function/hypervolume.py similarity index 100% rename from smac/acquisition/function/expected_hypervolume.py rename to smac/acquisition/function/hypervolume.py diff --git a/smac/facade/multi_objective_facade.py b/smac/facade/multi_objective_facade.py index 91262ca54..8244fe64c 100644 --- a/smac/facade/multi_objective_facade.py +++ b/smac/facade/multi_objective_facade.py @@ -2,7 +2,7 @@ from ConfigSpace import Configuration -from smac.acquisition.function.expected_hypervolume import PHVI, AbstractHVI +from smac.acquisition.function.hypervolume import PHVI, AbstractHVI from smac.acquisition.maximizer.multi_objective_search import ( MOLocalAndSortedRandomSearch, ) From 9f84c6d507eb09598ec50484ddc250363f17e6c8 Mon Sep 17 00:00:00 2001 From: benjamc Date: Wed, 8 Oct 2025 10:04:35 +0200 Subject: [PATCH 72/74] style(hypervolume.py): fix mypy --- smac/acquisition/function/hypervolume.py | 54 +++++++++++------------- smac/main/config_selector.py | 8 +--- 2 files changed, 26 insertions(+), 36 deletions(-) diff --git a/smac/acquisition/function/hypervolume.py b/smac/acquisition/function/hypervolume.py index 79bd4b544..1bc9cd56d 100644 --- a/smac/acquisition/function/hypervolume.py +++ b/smac/acquisition/function/hypervolume.py @@ -21,7 +21,7 @@ class AbstractHVI(AbstractAcquisitionFunction): - def __init__(self): + def __init__(self) -> None: """Computes for a given x the predicted hypervolume improvement as acquisition value. """ @@ -33,23 +33,8 @@ def __init__(self): self._runhistory: RunHistory | None = None self._runhistory_encoder: AbstractRunHistoryEncoder | None = None - @property - def runhistory(self) -> RunHistory: - """Return the runhistory.""" - return self._runhistory - - @runhistory.setter - def runhistory(self, runhistory: RunHistory): - self._runhistory = runhistory - - @property - def runhistory_encoder(self) -> AbstractRunHistoryEncoder: - """Return the runhistory encoder.""" - return self._runhistory_encoder - - @runhistory_encoder.setter - def runhistory_encoder(self, runhistory_encoder: AbstractRunHistoryEncoder): - self._runhistory_encoder = runhistory_encoder + self._population_hv: float | None = None + self._population_costs: np.ndarray | None = None @property def name(self) -> str: @@ -67,9 +52,14 @@ def _update(self, **kwargs: Any) -> None: "No incumbents here. Did the intensifier properly update the incumbents in the runhistory?" ) - objective_bounds = np.array(self.runhistory.objective_bounds) - self._objective_bounds = self.runhistory_encoder.transform_response_values(objective_bounds) - self._reference_point = [1.1] * len(self._objective_bounds) + self._runhistory = kwargs.get("runhistory") + self._runhistory_encoder = kwargs.get("runhistory_encoder") + assert self._runhistory is not None, "Did you update the AF with the runhistory?" + assert self._runhistory_encoder is not None, "Did you update the AF with the runhistory encoder?" + + objective_bounds = np.array(self._runhistory.objective_bounds) + self._objective_bounds = self._runhistory_encoder.transform_response_values(objective_bounds) + self._reference_point = [1.1] * len(self._objective_bounds) # type: ignore[arg-type,assignment] def get_hypervolume(self, points: np.ndarray) -> float: """ @@ -105,6 +95,10 @@ def _compute(self, X: np.ndarray) -> np.ndarray: np.ndarray(N,1) Expected HV Improvement of X """ + assert self.model is not None, "Did you update the AF with the model?" + assert self._population_costs is not None + assert self._population_hv is not None + if len(X.shape) == 1: X = X[:, np.newaxis] @@ -117,18 +111,16 @@ def _compute(self, X: np.ndarray) -> np.ndarray: phvi = np.zeros(len(X)) for i, indiv in enumerate(mean): - points = list(self.population_costs) + [indiv] + points = list(self._population_costs) + [indiv] hv = self.get_hypervolume(points) - phvi[i] = hv - self.population_hv + phvi[i] = hv - self._population_hv return phvi.reshape(-1, 1) class PHVI(AbstractHVI): - def __init__(self): + def __init__(self) -> None: super(PHVI, self).__init__() - self.population_hv = None - self.population_costs = None @property def name(self) -> str: @@ -149,8 +141,8 @@ def _update(self, **kwargs: Any) -> None: # Compute HV population_hv = self.get_hypervolume(population_costs) - self.population_costs = population_costs - self.population_hv = population_hv + self._population_costs = population_costs + self._population_hv = population_hv logger.info(f"New population HV: {population_hv}") @@ -170,6 +162,8 @@ def _compute(self, X: np.ndarray) -> np.ndarray: Predicted HV Improvement of X """ assert self.model is not None, "Did you update the AF with the model?" + assert self._population_costs is not None + assert self._population_hv is not None if len(X.shape) == 1: X = X[:, np.newaxis] @@ -177,8 +171,8 @@ def _compute(self, X: np.ndarray) -> np.ndarray: mean, _ = self.model.predict_marginalized(X) # Expected to be not normalized phvi = np.zeros(len(X)) for i, indiv in enumerate(mean): - points = list(self.population_costs) + [indiv] + points = list(self._population_costs) + [indiv] hv = self.get_hypervolume(points) - phvi[i] = hv - self.population_hv + phvi[i] = hv - self._population_hv return phvi.reshape(-1, 1) diff --git a/smac/main/config_selector.py b/smac/main/config_selector.py index 7d1f039a3..6aa156e7f 100644 --- a/smac/main/config_selector.py +++ b/smac/main/config_selector.py @@ -131,12 +131,6 @@ def _set_components( # raise RuntimeError("SMAC needs initial configurations to work.") logger.warning("No initial configurations were sampled.") - if hasattr(self._acquisition_function, "runhistory"): - self._acquisition_function.runhistory = runhistory - - if hasattr(self._acquisition_function, "runhistory_encoder"): - self._acquisition_function.runhistory_encoder = runhistory_encoder - @property def meta(self) -> dict[str, Any]: """Returns the meta data of the created object.""" @@ -238,6 +232,8 @@ def __iter__(self) -> Iterator[Configuration]: num_data=len(self._get_evaluated_configs()), X=X_configurations, incumbents=self._runhistory.incumbents, + runhistory=self._runhistory, + runhistory_encoder=self._runhistory_encoder, ) # We want to cache how many entries we used because if we have the same number of entries From 3f06d61607f249b41c2508ef6657b739d41a0710 Mon Sep 17 00:00:00 2001 From: benjamc Date: Wed, 8 Oct 2025 10:53:45 +0200 Subject: [PATCH 73/74] style: pre-commit fix --- smac/facade/blackbox_facade.py | 2 +- smac/facade/multi_objective_facade.py | 2 +- .../initial_design/abstract_initial_design.py | 2 +- .../mixins/intermediate_decision.py | 25 +++++++++---------- .../intensifier/mixins/intermediate_update.py | 14 +++++------ smac/intensifier/mixins/update_incumbent.py | 7 ++++-- .../multi_objective_intensifier.py | 3 ++- smac/multi_objective/aggregation_strategy.py | 2 +- smac/runner/aclib_runner.py | 2 +- smac/runner/target_function_script_runner.py | 2 +- 10 files changed, 32 insertions(+), 29 deletions(-) diff --git a/smac/facade/blackbox_facade.py b/smac/facade/blackbox_facade.py index ac4936f78..29d7d1a09 100644 --- a/smac/facade/blackbox_facade.py +++ b/smac/facade/blackbox_facade.py @@ -314,7 +314,7 @@ def get_runhistory_encoder( return RunHistoryEncoder(scenario) @staticmethod - def get_config_selector( + def get_config_selector( # type: ignore[override] scenario: Scenario, *, retrain_after: int = 1, diff --git a/smac/facade/multi_objective_facade.py b/smac/facade/multi_objective_facade.py index 8244fe64c..7cf5fbaa7 100644 --- a/smac/facade/multi_objective_facade.py +++ b/smac/facade/multi_objective_facade.py @@ -143,7 +143,7 @@ def get_acquisition_maximizer( # type: ignore def get_initial_design( # type: ignore scenario: Scenario, *, - additional_configs: list[Configuration] = [], + additional_configs: list[Configuration] | None = None, ) -> DefaultInitialDesign: """Returns an initial design, which returns the default configuration. diff --git a/smac/initial_design/abstract_initial_design.py b/smac/initial_design/abstract_initial_design.py index 466ec7649..b363d7442 100644 --- a/smac/initial_design/abstract_initial_design.py +++ b/smac/initial_design/abstract_initial_design.py @@ -43,7 +43,7 @@ def __init__( n_configs: int | None = None, n_configs_per_hyperparameter: int | None = 10, max_ratio: float = 0.25, - additional_configs: list[Configuration] = None, + additional_configs: list[Configuration] | None = None, seed: int | None = None, ): self._configspace = scenario.configspace diff --git a/smac/intensifier/mixins/intermediate_decision.py b/smac/intensifier/mixins/intermediate_decision.py index 71a511d74..58f100c00 100644 --- a/smac/intensifier/mixins/intermediate_decision.py +++ b/smac/intensifier/mixins/intermediate_decision.py @@ -12,7 +12,7 @@ logger = get_logger(__name__) -def _dominates(a: np.ndarray, b: np.ndarray) -> bool: +def _dominates(a: list[float], b: list[float]) -> bool: # Checks if a dominates b a = np.atleast_1d(a) b = np.atleast_1d(b) @@ -34,21 +34,21 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: config_isb_keys = self.get_instance_seed_budget_keys(config) if not hasattr(self, "_old_config_cost"): - self._old_config_cost: dict[Configuration, np.ndarray] = {} # TODO remove configuration when done + self._old_config_cost: dict[Configuration, list[float]] = {} # TODO remove configuration when done - new_cost = self.runhistory.average_cost(config, config_isb_keys) + new_cost: list[float] = self.runhistory.average_cost(config, config_isb_keys) # type: ignore[assignment] if config not in self._old_config_cost: self._old_config_cost[config] = new_cost return True - old_cost = self._old_config_cost[config] + old_cost: list[float] = self._old_config_cost[config] if _dominates(new_cost, old_cost): self._old_config_cost[config] = new_cost return True return False -class NewCostDominatesOldCostSkipFirst: +class NewCostDominatesOldCostSkipFirst(AbstractIntensifier): def _check_for_intermediate_comparison(self, config: Configuration) -> bool: """Do the first comparison with the incumbent when the configuration dominates the cost after finishing its first trial. @@ -64,21 +64,20 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: config_isb_keys = self.get_instance_seed_budget_keys(config) if not hasattr(self, "_old_config_cost"): - self._old_config_cost = {} # TODO remove configuration when done + self._old_config_cost: dict[Configuration, list[float]] = {} # TODO remove configuration when done - new_cost = self.runhistory.average_cost(config, config_isb_keys) + new_cost: list[float] = self.runhistory.average_cost(config, config_isb_keys) # type: ignore[assignment] if config not in self._old_config_cost: self._old_config_cost[config] = new_cost return False - old_cost = self._old_config_cost[config] + old_cost: list[float] = self._old_config_cost[config] # type: ignore[assignment] if _dominates(new_cost, old_cost): self._old_config_cost[config] = new_cost - return True return False -class DoublingNComparison: +class DoublingNComparison(AbstractIntensifier): def _check_for_intermediate_comparison(self, config: Configuration) -> bool: """ @@ -102,17 +101,17 @@ def _check_for_intermediate_comparison(self, config: Configuration) -> bool: return (nkeys + 1) & nkeys == 0 # checks if nkeys+1 is a power of 2 (complies with the sequence (2**n)-1) -class Always: +class Always(AbstractIntensifier): def _check_for_intermediate_comparison(self, config: Configuration) -> bool: return True -class Never: +class Never(AbstractIntensifier): def _check_for_intermediate_comparison(self, config: Configuration) -> bool: return False -class DoublingNComparisonFour: +class DoublingNComparisonFour(AbstractIntensifier): def _check_for_intermediate_comparison(self, config: Configuration) -> bool: """ diff --git a/smac/intensifier/mixins/intermediate_update.py b/smac/intensifier/mixins/intermediate_update.py index 808b2e8e8..cedbc0614 100644 --- a/smac/intensifier/mixins/intermediate_update.py +++ b/smac/intensifier/mixins/intermediate_update.py @@ -395,7 +395,7 @@ def _intermediate_comparison(self, config: Configuration) -> bool: probability=np.count_nonzero(verdicts) / n_samples, n_samples=n_samples, ) - return verdict + return bool(verdict) class BootstrapClosestComparison(DebugComparison): @@ -478,7 +478,7 @@ def _intermediate_comparison(self, config: Configuration) -> bool: A boolean which indicates if we should continue with this configuration. """ - def get_alpha(delta, n_instances): + def get_alpha(delta: float, n_instances: int) -> float: steps = 0 n = 1 inst = 0 @@ -489,7 +489,7 @@ def get_alpha(delta, n_instances): return (1 - delta) / (n_instances) * (steps - 1) - def dominates(a, b): + def dominates(a: list[float], b: list[float]) -> int: # Checks if a dominates b a = np.array(a) b = np.array(b) @@ -509,14 +509,14 @@ def dominates(a, b): return True p_values = [] - chall_perf = self.runhistory._cost(config, config_isb_keys) + chall_perf: list[list[float]] = self.runhistory._cost(config, config_isb_keys) # type: ignore[assignment] for incumbent in incumbents: - inc_perf = self.runhistory._cost(incumbent, config_isb_keys) + inc_perf: list[list[float]] = self.runhistory._cost(incumbent, config_isb_keys) # type: ignore[assignment] n_ij = sum( - [dominates(*x) for x in zip(chall_perf, inc_perf)] + [dominates(_chall_perf, _inc_perf) for _chall_perf, _inc_perf in zip(chall_perf, inc_perf)] ) # Number of times the incumbent candidate dominates the challenger n_ji = sum( - [dominates(*x) for x in zip(inc_perf, chall_perf)] + [dominates(_chall_perf, _inc_perf) for _chall_perf, _inc_perf in zip(chall_perf, inc_perf)] ) # Number of times the challenger dominates the incumbent candidate p_value = 1 - binom.cdf(n_ij - 1, n_ij + n_ji, 0.5) p_values.append(p_value) diff --git a/smac/intensifier/mixins/update_incumbent.py b/smac/intensifier/mixins/update_incumbent.py index e1efd9454..acd1b303c 100644 --- a/smac/intensifier/mixins/update_incumbent.py +++ b/smac/intensifier/mixins/update_incumbent.py @@ -1,10 +1,13 @@ from __future__ import annotations +from typing import Any + import itertools import numpy as np from ConfigSpace import Configuration +from smac.intensifier.abstract_intensifier import AbstractIntensifier from smac.utils.logging import get_logger __copyright__ = "Copyright 2022, automl.org" @@ -13,8 +16,8 @@ logger = get_logger(__name__) -class DebugUpdate(object): - def _register_incumbent_update(self, **kwargs): +class DebugUpdate(AbstractIntensifier): + def _register_incumbent_update(self, **kwargs: Any) -> None: if not hasattr(self, "_update_incumbent_log"): self._update_incumbent_log = [] self._update_incumbent_log.append(kwargs) diff --git a/smac/intensifier/multi_objective_intensifier.py b/smac/intensifier/multi_objective_intensifier.py index 33223ef65..cef0b4242 100644 --- a/smac/intensifier/multi_objective_intensifier.py +++ b/smac/intensifier/multi_objective_intensifier.py @@ -5,6 +5,7 @@ from ConfigSpace import Configuration +from smac.intensifier.abstract_intensifier import AbstractIntensifier from smac.intensifier.hyperband import Hyperband from smac.intensifier.intensifier import Intensifier from smac.intensifier.successive_halving import SuccessiveHalving @@ -21,7 +22,7 @@ # TODO add minimum population size? -class MOIntensifierMixin(object): +class MOIntensifierMixin(AbstractIntensifier): def _calculate_pareto_front( self, runhistory: RunHistory, diff --git a/smac/multi_objective/aggregation_strategy.py b/smac/multi_objective/aggregation_strategy.py index 1958b2937..ca40f99c3 100644 --- a/smac/multi_objective/aggregation_strategy.py +++ b/smac/multi_objective/aggregation_strategy.py @@ -47,7 +47,7 @@ def __call__(self, values: list[float]) -> float: # noqa: D102 class NoAggregationStrategy(AbstractMultiObjectiveAlgorithm): """A class to not aggregate multi-objective losses into a single objective losses.""" - def __call__(self, values: list[float]) -> list[float]: + def __call__(self, values: list[float]) -> list[float]: # type: ignore[override] """ Not transform a multi-objective loss to a single loss. diff --git a/smac/runner/aclib_runner.py b/smac/runner/aclib_runner.py index 74c590af6..7af4dacda 100644 --- a/smac/runner/aclib_runner.py +++ b/smac/runner/aclib_runner.py @@ -20,7 +20,7 @@ def __init__( self, target_function: str, scenario: Scenario, - required_arguments: list[str] = [], + required_arguments: list[str] | None = None, target_function_arguments: dict[str, str] | None = None, ): diff --git a/smac/runner/target_function_script_runner.py b/smac/runner/target_function_script_runner.py index 90ca72117..bd6a152ea 100644 --- a/smac/runner/target_function_script_runner.py +++ b/smac/runner/target_function_script_runner.py @@ -51,7 +51,7 @@ def __init__( self, target_function: str, scenario: Scenario, - required_arguments: list[str] = None, + required_arguments: list[str] | None = None, ): if required_arguments is None: required_arguments = [] From 41742c942c3c7ab4d98a1233e41369815212128b Mon Sep 17 00:00:00 2001 From: rookj Date: Wed, 8 Oct 2025 13:33:07 +0200 Subject: [PATCH 74/74] refactor crowding distance: optional normalization --- smac/intensifier/abstract_intensifier.py | 13 +++++++++---- smac/intensifier/multi_objective_intensifier.py | 4 +++- smac/intensifier/successive_halving.py | 2 +- smac/runhistory/runhistory.py | 2 +- smac/utils/pareto_front.py | 6 ++++-- 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index 32687d14f..22c171ce6 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -618,8 +618,10 @@ def update_incumbents(self, config: Configuration) -> None: new_incumbent_ids = [rh.get_config_id(c) for c in new_incumbents] # Update trajectory - if previous_incumbents == new_incumbents: # Only happens with incumbent config - self._remove_rejected_config(config) + if previous_incumbents == new_incumbents: # Only happens with incumbent config #TODO JG check why this is + self._remove_rejected_config(config) # Remove the incumbent from the rejected config list + + # TODO JG ?remove the config it is the incumbent. otherwise add the config.? However if a config is rejected, it should be possible to requeue a configuration. return elif len(previous_incumbents) == len(new_incumbents): # In this case, we have to determine which config replaced which incumbent and reject it @@ -804,7 +806,8 @@ def update_incumbents(self, config: Configuration) -> None: # new_incumbent_ids = [rh.get_config_id(c) for c in new_incumbents] # # #TODO JG: merge 06-10-2025 updated - # if len(previous_incumbents) == len(new_incumbents): + + # if len(previous_incumbents) == len(new_incumbents): # if previous_incumbents == new_incumbents: # # No changes in the incumbents, we need this clause because we can't use set difference then # if config_id in new_incumbent_ids: @@ -859,7 +862,9 @@ def update_incumbents(self, config: Configuration) -> None: def _cut_incumbents( self, incumbent_ids: list[int], all_incumbent_isb_keys: list[list[InstanceSeedBudgetKey]] ) -> list[int]: - new_incumbents = sort_by_crowding_distance(self.runhistory, incumbent_ids, all_incumbent_isb_keys) + new_incumbents = sort_by_crowding_distance( + self.runhistory, incumbent_ids, all_incumbent_isb_keys, normalize=True + ) new_incumbents = new_incumbents[: self._max_incumbents] # or random? diff --git a/smac/intensifier/multi_objective_intensifier.py b/smac/intensifier/multi_objective_intensifier.py index cef0b4242..16bb45d0b 100644 --- a/smac/intensifier/multi_objective_intensifier.py +++ b/smac/intensifier/multi_objective_intensifier.py @@ -44,7 +44,9 @@ def _cut_incumbents( self, incumbent_ids: list[int], all_incumbent_isb_keys: list[list[InstanceSeedBudgetKey]] ) -> list[int]: # TODO JG sort by hypervolume - new_incumbents = sort_by_crowding_distance(self.runhistory, incumbent_ids, all_incumbent_isb_keys) + new_incumbents = sort_by_crowding_distance( + self.runhistory, incumbent_ids, all_incumbent_isb_keys, normalize=True + ) new_incumbents = new_incumbents[: self._max_incumbents] logger.info( diff --git a/smac/intensifier/successive_halving.py b/smac/intensifier/successive_halving.py index 4f00d4605..dc8da1095 100644 --- a/smac/intensifier/successive_halving.py +++ b/smac/intensifier/successive_halving.py @@ -563,7 +563,7 @@ def _get_best_configs( # If we have more selected configs, we remove the ones with the smallest crowding distance if len(selected_configs) > n_configs: all_keys = [from_keys for _ in selected_configs] - selected_configs = sort_by_crowding_distance(rh, selected_configs, all_keys)[:n_configs] + selected_configs = sort_by_crowding_distance(rh, selected_configs, all_keys, normalize=True)[:n_configs] logger.debug("Found more configs than required. Removed configs with smallest crowding distance.") return selected_configs diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py index 944597c6c..3fe393fc4 100644 --- a/smac/runhistory/runhistory.py +++ b/smac/runhistory/runhistory.py @@ -84,7 +84,7 @@ def running(self) -> int: @property def multi_objective_algorithm(self) -> AbstractMultiObjectiveAlgorithm | None: - """The multi-objective algorithm required to scaralize the costs in case of multi-objective.""" + """The multi-objective algorithm required to scalarize the costs in case of multi-objective.""" return self._multi_objective_algorithm @multi_objective_algorithm.setter diff --git a/smac/utils/pareto_front.py b/smac/utils/pareto_front.py index d716fad1c..bae72b6fb 100644 --- a/smac/utils/pareto_front.py +++ b/smac/utils/pareto_front.py @@ -91,6 +91,7 @@ def sort_by_crowding_distance( runhistory: RunHistory, configs: list[Configuration], config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]], + normalize: bool = False, ) -> list[Configuration]: """Sorts the passed configurations by their crowding distance. Taken from https://github.com/anyoptimization/pymoo/blob/20abef1ade71915352217400c11ece4c2f35163e/pymoo/algorithms/nsga2.py @@ -104,13 +105,14 @@ def sort_by_crowding_distance( The configurations which should be sorted. config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]] The instance-seed budget keys for the configurations which should be sorted. - + normalize: bool + If the costs should be normalized Returns ------- sorted_list : list[Configuration] Configurations sorted by crowding distance. """ - F = _get_costs(runhistory, configs, config_instance_seed_budget_keys, normalize=True) + F = _get_costs(runhistory, configs, config_instance_seed_budget_keys, normalize=normalize) infinity = 1e14 n_points = F.shape[0]