From 48c9aaf0feeaeadc27bbe44a331b5703d4526950 Mon Sep 17 00:00:00 2001 From: timruhkopf Date: Thu, 17 Apr 2025 08:24:58 +0200 Subject: [PATCH 1/7] [feature] #1191 is trained property to indicate the state of the model. --- CHANGELOG.md | 3 ++- smac/model/abstract_model.py | 6 ++++++ smac/model/gaussian_process/mcmc_gaussian_process.py | 2 +- smac/model/multi_objective_model.py | 7 +++++++ smac/model/random_forest/random_forest.py | 2 ++ tests/test_model/test_gp.py | 10 ++++++++++ tests/test_model/test_gp_mcmc.py | 2 ++ tests/test_model/test_rf.py | 2 ++ 8 files changed, 32 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36b2d614d..9d3c7b978 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,8 @@ - Add logger information on handling of stopIteration error (#960) - Replace deprecated ConfigSpace methods (#1139) - Separated Wallclock time measurements from CPU time measurements and storing them under new 'cpu_time' variable (#1173) -- Adapt RunHistory to be human readable (# 1174) +- Adapt RunHistory to be human readable (#1174) +- The models have a `is_trained` property to indicate the internal state (#1191) ## Dependencies - Allow numpy >= 2.x (#1146) diff --git a/smac/model/abstract_model.py b/smac/model/abstract_model.py index 18ee1bdda..8a0b40c0e 100644 --- a/smac/model/abstract_model.py +++ b/smac/model/abstract_model.py @@ -57,6 +57,7 @@ def __init__( self._rng = np.random.RandomState(self._seed) self._instance_features = instance_features self._pca_components = pca_components + self._is_trained = False n_features = 0 if self._instance_features is not None: @@ -92,6 +93,11 @@ def meta(self) -> dict[str, Any]: "pca_components": self._pca_components, } + @property + def is_trained(self) -> bool: + """Returns True if the model is trained, False otherwise.""" + return self._is_trained + def train(self: Self, X: np.ndarray, Y: np.ndarray) -> Self: """Trains the random forest on X and Y. Internally, calls the method `_train`. diff --git a/smac/model/gaussian_process/mcmc_gaussian_process.py b/smac/model/gaussian_process/mcmc_gaussian_process.py index 71c4d45aa..d4a1da2cb 100644 --- a/smac/model/gaussian_process/mcmc_gaussian_process.py +++ b/smac/model/gaussian_process/mcmc_gaussian_process.py @@ -287,7 +287,7 @@ def _train( model.mean_y_ = self.mean_y_ model.std_y_ = self.std_y_ - self._is_trained = True + self._is_trained = all(model.is_trained for model in self._models) return self def _get_gaussian_process(self) -> GaussianProcessRegressor: diff --git a/smac/model/multi_objective_model.py b/smac/model/multi_objective_model.py index f4bf8c316..8b1a57634 100644 --- a/smac/model/multi_objective_model.py +++ b/smac/model/multi_objective_model.py @@ -53,6 +53,11 @@ def __init__( seed=seed, ) + @property + def is_trained(self) -> bool: + """Whether the model is trained or not.""" + return self._is_trained + @property def models(self) -> list[AbstractModel]: """The internally used surrogate models.""" @@ -76,6 +81,8 @@ def _train(self: Self, X: np.ndarray, Y: np.ndarray) -> Self: for i, model in enumerate(self._models): model.train(X, Y[:, i]) + self._is_trained = all(model.is_trained for model in self._models) + return self def _predict( diff --git a/smac/model/random_forest/random_forest.py b/smac/model/random_forest/random_forest.py index 0c5110240..df1664511 100644 --- a/smac/model/random_forest/random_forest.py +++ b/smac/model/random_forest/random_forest.py @@ -153,6 +153,8 @@ def _train(self, X: np.ndarray, y: np.ndarray) -> RandomForest: data = self._init_data_container(X, y) self._rf.fit(data, rng=self._rng) + self._is_trained = True + return self def _init_data_container(self, X: np.ndarray, y: np.ndarray) -> DataContainer: diff --git a/tests/test_model/test_gp.py b/tests/test_model/test_gp.py index ecdc8dd68..6bde0ec00 100644 --- a/tests/test_model/test_gp.py +++ b/tests/test_model/test_gp.py @@ -176,6 +176,16 @@ def test_predict(): assert m_hat.shape == (10, 1) assert v_hat.shape == (10, 1) +def test_is_trained(): + seed = 1 + rs = np.random.RandomState(seed) + X, Y, n_dims = get_cont_data(rs) + + model = get_gp(n_dims, seed) + assert not model.is_trained + model._train(X[:10], Y[:10], optimize_hyperparameters=False) + assert model.is_trained + def test_train_do_optimize(): # Check that do_optimize does not mess with the kernel hyperparameters given to the Gaussian process! diff --git a/tests/test_model/test_gp_mcmc.py b/tests/test_model/test_gp_mcmc.py index ebba87dc7..f73a49804 100644 --- a/tests/test_model/test_gp_mcmc.py +++ b/tests/test_model/test_gp_mcmc.py @@ -91,8 +91,10 @@ def test_gp_train(): fixture = np.array([0.693147, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -6.907755]) model = get_gp(10, seed) + assert not model.is_trained np.testing.assert_array_almost_equal(model._kernel.theta, fixture) model.train(X[:10], Y[:10]) + assert model.is_trained assert len(model.models) == 36 for base_model in model.models: diff --git a/tests/test_model/test_rf.py b/tests/test_model/test_rf.py index 65e6478ff..1b0532083 100644 --- a/tests/test_model/test_rf.py +++ b/tests/test_model/test_rf.py @@ -49,7 +49,9 @@ def test_predict(): X = rs.rand(20, 10) Y = rs.rand(10, 1) model = RandomForest(configspace=_get_cs(10)) + assert not model.is_trained model.train(X[:10], Y[:10]) + assert model.is_trained m_hat, v_hat = model.predict(X[10:]) assert m_hat.shape == (10, 1) assert v_hat.shape == (10, 1) From bf6d2fc82dbc4f037e83a087eff6be606092c1a4 Mon Sep 17 00:00:00 2001 From: Tim Ruhkopf Date: Thu, 17 Apr 2025 08:31:27 +0200 Subject: [PATCH 2/7] Update smac/model/gaussian_process/mcmc_gaussian_process.py Safeguard against the unlikely event that no model is in the MCMCGP, Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- smac/model/gaussian_process/mcmc_gaussian_process.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/smac/model/gaussian_process/mcmc_gaussian_process.py b/smac/model/gaussian_process/mcmc_gaussian_process.py index d4a1da2cb..c2d6f3ae7 100644 --- a/smac/model/gaussian_process/mcmc_gaussian_process.py +++ b/smac/model/gaussian_process/mcmc_gaussian_process.py @@ -287,7 +287,10 @@ def _train( model.mean_y_ = self.mean_y_ model.std_y_ = self.std_y_ - self._is_trained = all(model.is_trained for model in self._models) + if not self._models: + self._is_trained = False + else: + self._is_trained = all(model.is_trained for model in self._models) return self def _get_gaussian_process(self) -> GaussianProcessRegressor: From 36bbc9b8954afee3ed2daf86f2a00e172d207a44 Mon Sep 17 00:00:00 2001 From: timruhkopf Date: Tue, 20 May 2025 14:42:10 +0200 Subject: [PATCH 3/7] [feature, test] property is_trained is required and will fail otherwise --- smac/model/abstract_model.py | 9 +++-- .../abstract_gaussian_process.py | 5 +++ .../random_forest/abstract_random_forest.py | 5 +++ smac/model/random_model.py | 5 +++ tests/test_model/test_abstract_model.py | 33 +++++++++++++++++++ 5 files changed, 52 insertions(+), 5 deletions(-) diff --git a/smac/model/abstract_model.py b/smac/model/abstract_model.py index 8a0b40c0e..cb7138d2d 100644 --- a/smac/model/abstract_model.py +++ b/smac/model/abstract_model.py @@ -1,6 +1,6 @@ from __future__ import annotations -from abc import abstractmethod +from abc import ABC, abstractmethod from typing import Any, TypeVar import copy @@ -19,14 +19,12 @@ __copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI" __license__ = "3-clause BSD" - logger = get_logger(__name__) - Self = TypeVar("Self", bound="AbstractModel") -class AbstractModel: +class AbstractModel(ABC): """Abstract implementation of the surrogate model. Note @@ -94,9 +92,10 @@ def meta(self) -> dict[str, Any]: } @property + @abstractmethod def is_trained(self) -> bool: """Returns True if the model is trained, False otherwise.""" - return self._is_trained + raise NotImplementedError() # make use of `self._is_trained` in subclasses def train(self: Self, X: np.ndarray, Y: np.ndarray) -> Self: """Trains the random forest on X and Y. Internally, calls the method `_train`. diff --git a/smac/model/gaussian_process/abstract_gaussian_process.py b/smac/model/gaussian_process/abstract_gaussian_process.py index 512817a38..ac8e945ff 100644 --- a/smac/model/gaussian_process/abstract_gaussian_process.py +++ b/smac/model/gaussian_process/abstract_gaussian_process.py @@ -58,6 +58,11 @@ def meta(self) -> dict[str, Any]: # noqa: D102 return meta + @property + def is_trained(self) -> bool: + """Returns whether the model is trained or not.""" + return self._is_trained + @abstractmethod def _get_gaussian_process(self) -> GaussianProcessRegressor: """Generates a Gaussian process.""" diff --git a/smac/model/random_forest/abstract_random_forest.py b/smac/model/random_forest/abstract_random_forest.py index 0703551f3..c328a9719 100644 --- a/smac/model/random_forest/abstract_random_forest.py +++ b/smac/model/random_forest/abstract_random_forest.py @@ -26,6 +26,11 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self._conditional: dict[int, bool] = dict() self._impute_values: dict[int, float] = dict() + @property + def is_trained(self) -> bool: + """Returns whether the model is trained or not.""" + return self._is_trained + def _impute_inactive(self, X: np.ndarray) -> np.ndarray: X = X.copy() for idx, hp in enumerate(list(self._configspace.values())): diff --git a/smac/model/random_model.py b/smac/model/random_model.py index f55ce00bc..08a28b2f1 100644 --- a/smac/model/random_model.py +++ b/smac/model/random_model.py @@ -14,6 +14,11 @@ class RandomModel(AbstractModel): """AbstractModel which returns random values on a call to `fit`.""" + @property + def is_trained(self) -> bool: + """Returns whether the model is trained or not.""" + return self._is_trained + def _train(self, X: np.ndarray, Y: np.ndarray) -> RandomModel: if not isinstance(X, np.ndarray): raise NotImplementedError("X has to be of type np.ndarray.") diff --git a/tests/test_model/test_abstract_model.py b/tests/test_model/test_abstract_model.py index 6fe77e466..5b48985dd 100644 --- a/tests/test_model/test_abstract_model.py +++ b/tests/test_model/test_abstract_model.py @@ -88,3 +88,36 @@ def test_pca(configspace_small, make_scenario): X_test, _ = get_X_y(configspace_small, n_samples, 10) with pytest.raises(ValueError, match="Feature mismatch.*"): model.predict_marginalized(X_test) + +def test_abstract_model_raises_without_istrained(configspace_small): + from dataclasses import dataclass + from ConfigSpace import ConfigurationSpace, Constant + + @dataclass + class A: + a: int + + def f() -> None: + return None + + class DummyModel(AbstractModel): + def _train(self, X: np.ndarray, Y: np.ndarray): + self._is_trained = True + return self + + @property + def is_trained(self) -> bool: + return getattr(self, "_is_trained", False) + + @pytest.fixture + def configspace(): + return ConfigurationSpace({ + "cat": [True, False, None], + "othercat": [A(1), f], + "constant": Constant("constant", (24, 25)), + }) + try: + model = DummyModel(configspace_small) + + except TypeError as e: + return From 1390b65c97d4dccdf39d329c3a6bd345b07975c1 Mon Sep 17 00:00:00 2001 From: timruhkopf Date: Tue, 20 May 2025 14:59:28 +0200 Subject: [PATCH 4/7] [feature, test] property is_trained is required and will fail otherwise --- tests/test_model/test_abstract_model.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/test_model/test_abstract_model.py b/tests/test_model/test_abstract_model.py index 5b48985dd..d88949915 100644 --- a/tests/test_model/test_abstract_model.py +++ b/tests/test_model/test_abstract_model.py @@ -23,6 +23,16 @@ def get_X_y(cs, n_samples, n_instance_features): def _train(X, Y): return None +class MyAbstractModel(AbstractModel): + # dummy class to ensure that the required methods are implemented + @property + def is_trained(self): + return self._is_trained + + def _train(self, X, Y): + self._is_trained = True + return self + def test_no_pca(configspace_small, make_scenario): n_instances = 100 @@ -35,7 +45,7 @@ def test_no_pca(configspace_small, make_scenario): n_instances=n_instances, n_instance_features=n_instance_features, ) - model = AbstractModel(configspace_small, scenario.instance_features, pca_components=7) + model = MyAbstractModel(configspace_small, scenario.instance_features, pca_components=7) # We just overwrite the function as mock here model._train = _train @@ -68,7 +78,7 @@ def test_pca(configspace_small, make_scenario): n_instances=n_instances, n_instance_features=n_instance_features, ) - model = AbstractModel(configspace_small, scenario.instance_features, pca_components=7) + model = MyAbstractModel(configspace_small, scenario.instance_features, pca_components=7) # We just overwrite the function as mock here model._train = _train From 7846c1ee33597326b3dee6b31f92896ba16e5c5b Mon Sep 17 00:00:00 2001 From: timruhkopf Date: Wed, 21 May 2025 15:16:45 +0200 Subject: [PATCH 5/7] [test-fix] removed unnecessary stuff and used pytest contextmanager --- tests/test_model/test_abstract_model.py | 30 +++++-------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/tests/test_model/test_abstract_model.py b/tests/test_model/test_abstract_model.py index d88949915..39b4d3884 100644 --- a/tests/test_model/test_abstract_model.py +++ b/tests/test_model/test_abstract_model.py @@ -100,34 +100,14 @@ def test_pca(configspace_small, make_scenario): model.predict_marginalized(X_test) def test_abstract_model_raises_without_istrained(configspace_small): - from dataclasses import dataclass - from ConfigSpace import ConfigurationSpace, Constant - - @dataclass - class A: - a: int - - def f() -> None: - return None - class DummyModel(AbstractModel): def _train(self, X: np.ndarray, Y: np.ndarray): self._is_trained = True return self - @property - def is_trained(self) -> bool: - return getattr(self, "_is_trained", False) - - @pytest.fixture - def configspace(): - return ConfigurationSpace({ - "cat": [True, False, None], - "othercat": [A(1), f], - "constant": Constant("constant", (24, 25)), - }) - try: - model = DummyModel(configspace_small) + # @property # -> because this code is missing, it should raise + # def is_trained(self) -> bool: + # return self._is_trained - except TypeError as e: - return + with pytest.raises(TypeError, match="Can't instantiate abstract class DummyModel with abstract method is_trained"): + model = DummyModel(configspace_small) From 260e4d36160037e04b920065222aa576efa97f51 Mon Sep 17 00:00:00 2001 From: timruhkopf Date: Wed, 21 May 2025 15:41:36 +0200 Subject: [PATCH 6/7] [test-fix] somehow failed --- pyproject.toml | 2 +- tests/test_model/test_abstract_model.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 77783d8a7..681747a52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ [tool.pytest.ini_options] testpaths = ["tests"] # path to the test directory minversion = "3.8" -addopts = "--cov=smac" # Should be package name +#addopts = "--cov=smac" # Should be package name [tool.coverage.run] branch = true diff --git a/tests/test_model/test_abstract_model.py b/tests/test_model/test_abstract_model.py index 39b4d3884..6b4613048 100644 --- a/tests/test_model/test_abstract_model.py +++ b/tests/test_model/test_abstract_model.py @@ -109,5 +109,5 @@ def _train(self, X: np.ndarray, Y: np.ndarray): # def is_trained(self) -> bool: # return self._is_trained - with pytest.raises(TypeError, match="Can't instantiate abstract class DummyModel with abstract method is_trained"): + with pytest.raises(TypeError, match="instantiate abstract class"): model = DummyModel(configspace_small) From 9306932a815d9fbb3f38e0179c70325584a5b18f Mon Sep 17 00:00:00 2001 From: benjamc Date: Fri, 26 Sep 2025 14:34:55 +0200 Subject: [PATCH 7/7] readd coverage measurement for pytest --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 681747a52..77783d8a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ [tool.pytest.ini_options] testpaths = ["tests"] # path to the test directory minversion = "3.8" -#addopts = "--cov=smac" # Should be package name +addopts = "--cov=smac" # Should be package name [tool.coverage.run] branch = true