Skip to content
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@
- Add logger information on handling of stopIteration error (#960)
- Replace deprecated ConfigSpace methods (#1139)
- Separated Wallclock time measurements from CPU time measurements and storing them under new 'cpu_time' variable (#1173)
- Adapt RunHistory to be human readable (# 1174)
- Adapt RunHistory to be human readable (#1174)
- The models have a `is_trained` property to indicate the internal state (#1191)

## Dependencies
- Allow numpy >= 2.x (#1146)
Expand Down
13 changes: 9 additions & 4 deletions smac/model/abstract_model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from abc import abstractmethod
from abc import ABC, abstractmethod
from typing import Any, TypeVar

import copy
Expand All @@ -19,14 +19,12 @@
__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
__license__ = "3-clause BSD"


logger = get_logger(__name__)


Self = TypeVar("Self", bound="AbstractModel")


class AbstractModel:
class AbstractModel(ABC):
"""Abstract implementation of the surrogate model.

Note
Expand Down Expand Up @@ -57,6 +55,7 @@ def __init__(
self._rng = np.random.RandomState(self._seed)
self._instance_features = instance_features
self._pca_components = pca_components
self._is_trained = False

n_features = 0
if self._instance_features is not None:
Expand Down Expand Up @@ -92,6 +91,12 @@ def meta(self) -> dict[str, Any]:
"pca_components": self._pca_components,
}

@property
@abstractmethod
def is_trained(self) -> bool:
"""Returns True if the model is trained, False otherwise."""
raise NotImplementedError() # make use of `self._is_trained` in subclasses

def train(self: Self, X: np.ndarray, Y: np.ndarray) -> Self:
"""Trains the random forest on X and Y. Internally, calls the method `_train`.

Expand Down
5 changes: 5 additions & 0 deletions smac/model/gaussian_process/abstract_gaussian_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ def meta(self) -> dict[str, Any]: # noqa: D102

return meta

@property
def is_trained(self) -> bool:
"""Returns whether the model is trained or not."""
return self._is_trained

@abstractmethod
def _get_gaussian_process(self) -> GaussianProcessRegressor:
"""Generates a Gaussian process."""
Expand Down
5 changes: 4 additions & 1 deletion smac/model/gaussian_process/mcmc_gaussian_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,10 @@ def _train(
model.mean_y_ = self.mean_y_
model.std_y_ = self.std_y_

self._is_trained = True
if not self._models:
self._is_trained = False
else:
self._is_trained = all(model.is_trained for model in self._models)
return self

def _get_gaussian_process(self) -> GaussianProcessRegressor:
Expand Down
7 changes: 7 additions & 0 deletions smac/model/multi_objective_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ def __init__(
seed=seed,
)

@property
def is_trained(self) -> bool:
"""Whether the model is trained or not."""
return self._is_trained

@property
def models(self) -> list[AbstractModel]:
"""The internally used surrogate models."""
Expand All @@ -76,6 +81,8 @@ def _train(self: Self, X: np.ndarray, Y: np.ndarray) -> Self:
for i, model in enumerate(self._models):
model.train(X, Y[:, i])

self._is_trained = all(model.is_trained for model in self._models)

return self

def _predict(
Expand Down
5 changes: 5 additions & 0 deletions smac/model/random_forest/abstract_random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
self._conditional: dict[int, bool] = dict()
self._impute_values: dict[int, float] = dict()

@property
def is_trained(self) -> bool:
"""Returns whether the model is trained or not."""
return self._is_trained

def _impute_inactive(self, X: np.ndarray) -> np.ndarray:
X = X.copy()
for idx, hp in enumerate(list(self._configspace.values())):
Expand Down
2 changes: 2 additions & 0 deletions smac/model/random_forest/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ def _train(self, X: np.ndarray, y: np.ndarray) -> RandomForest:
data = self._init_data_container(X, y)
self._rf.fit(data, rng=self._rng)

self._is_trained = True

return self

def _init_data_container(self, X: np.ndarray, y: np.ndarray) -> DataContainer:
Expand Down
5 changes: 5 additions & 0 deletions smac/model/random_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
class RandomModel(AbstractModel):
"""AbstractModel which returns random values on a call to `fit`."""

@property
def is_trained(self) -> bool:
"""Returns whether the model is trained or not."""
return self._is_trained

def _train(self, X: np.ndarray, Y: np.ndarray) -> RandomModel:
if not isinstance(X, np.ndarray):
raise NotImplementedError("X has to be of type np.ndarray.")
Expand Down
27 changes: 25 additions & 2 deletions tests/test_model/test_abstract_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ def get_X_y(cs, n_samples, n_instance_features):
def _train(X, Y):
return None

class MyAbstractModel(AbstractModel):
# dummy class to ensure that the required methods are implemented
@property
def is_trained(self):
return self._is_trained

def _train(self, X, Y):
self._is_trained = True
return self


def test_no_pca(configspace_small, make_scenario):
n_instances = 100
Expand All @@ -35,7 +45,7 @@ def test_no_pca(configspace_small, make_scenario):
n_instances=n_instances,
n_instance_features=n_instance_features,
)
model = AbstractModel(configspace_small, scenario.instance_features, pca_components=7)
model = MyAbstractModel(configspace_small, scenario.instance_features, pca_components=7)
# We just overwrite the function as mock here
model._train = _train

Expand Down Expand Up @@ -68,7 +78,7 @@ def test_pca(configspace_small, make_scenario):
n_instances=n_instances,
n_instance_features=n_instance_features,
)
model = AbstractModel(configspace_small, scenario.instance_features, pca_components=7)
model = MyAbstractModel(configspace_small, scenario.instance_features, pca_components=7)
# We just overwrite the function as mock here
model._train = _train

Expand All @@ -88,3 +98,16 @@ def test_pca(configspace_small, make_scenario):
X_test, _ = get_X_y(configspace_small, n_samples, 10)
with pytest.raises(ValueError, match="Feature mismatch.*"):
model.predict_marginalized(X_test)

def test_abstract_model_raises_without_istrained(configspace_small):
class DummyModel(AbstractModel):
def _train(self, X: np.ndarray, Y: np.ndarray):
self._is_trained = True
return self

# @property # -> because this code is missing, it should raise
# def is_trained(self) -> bool:
# return self._is_trained

with pytest.raises(TypeError, match="instantiate abstract class"):
model = DummyModel(configspace_small)
10 changes: 10 additions & 0 deletions tests/test_model/test_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,16 @@ def test_predict():
assert m_hat.shape == (10, 1)
assert v_hat.shape == (10, 1)

def test_is_trained():
seed = 1
rs = np.random.RandomState(seed)
X, Y, n_dims = get_cont_data(rs)

model = get_gp(n_dims, seed)
assert not model.is_trained
model._train(X[:10], Y[:10], optimize_hyperparameters=False)
assert model.is_trained


def test_train_do_optimize():
# Check that do_optimize does not mess with the kernel hyperparameters given to the Gaussian process!
Expand Down
2 changes: 2 additions & 0 deletions tests/test_model/test_gp_mcmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,10 @@ def test_gp_train():
fixture = np.array([0.693147, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -6.907755])

model = get_gp(10, seed)
assert not model.is_trained
np.testing.assert_array_almost_equal(model._kernel.theta, fixture)
model.train(X[:10], Y[:10])
assert model.is_trained
assert len(model.models) == 36

for base_model in model.models:
Expand Down
2 changes: 2 additions & 0 deletions tests/test_model/test_rf.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ def test_predict():
X = rs.rand(20, 10)
Y = rs.rand(10, 1)
model = RandomForest(configspace=_get_cs(10))
assert not model.is_trained
model.train(X[:10], Y[:10])
assert model.is_trained
m_hat, v_hat = model.predict(X[10:])
assert m_hat.shape == (10, 1)
assert v_hat.shape == (10, 1)
Expand Down
Loading