Skip to content

Commit

Permalink
Small fixes (#173)
Browse files Browse the repository at this point in the history
Boston to Diabetes, system tests.
  • Loading branch information
PGijsbers authored Sep 15, 2022
1 parent 8f1e48d commit 241dd0f
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 9 deletions.
18 changes: 18 additions & 0 deletions .github/workflows/pytest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,21 @@ jobs:
path: tests/unit
os: ${{ matrix.os }}
python-version: ${{ matrix.python-version }}

system:
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10"]
os: [ubuntu-latest, macos-latest, windows-latest]

runs-on: ${{ matrix.os }}

steps:
- uses: actions/checkout@v3
- name: Invoke System Tests
uses: ./.github/actions/pytest
with:
path: tests/system
os: ${{ matrix.os }}
python-version: ${{ matrix.python-version }}
4 changes: 2 additions & 2 deletions examples/regression_example.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from sklearn.datasets import load_boston
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from gama import GamaRegressor

if __name__ == "__main__":
X, y = load_boston(return_X_y=True)
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

automl = GamaRegressor(max_total_time=180, store="nothing", n_jobs=1)
Expand Down
2 changes: 1 addition & 1 deletion gama/configuration/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
},
GradientBoostingRegressor: {
"n_estimators": [100],
"loss": ["ls", "lad", "huber", "quantile"],
"loss": ["squared_error", "absolute_error", "huber", "quantile"],
"learning_rate": [1e-3, 1e-2, 1e-1, 0.5, 1.0],
"max_depth": range(1, 11),
"min_samples_split": range(2, 21),
Expand Down
7 changes: 7 additions & 0 deletions gama/gama.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,13 @@ def fit(
if p.identifier not in [PolynomialFeatures]
]

if self._time_manager.total_time_remaining < 0:
pre_time = self._time_manager.activities[-1].stopwatch.elapsed_time
raise RuntimeError(
f"Preprocessing took {pre_time} seconds. "
f"No time remaining (budget: {self._time_manager.total_time} seconds)."
)

fit_time = int(
(1 - self._post_processing.time_fraction)
* self._time_manager.total_time_remaining
Expand Down
8 changes: 7 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,14 @@ filterwarnings = [
"error",
"ignore::sklearn.exceptions.ConvergenceWarning",
"ignore::RuntimeWarning",
# Block a warning coming from scikit-learn internals about scipy.mode
"ignore:.*mode.*:FutureWarning",
# We have a CRON job checking for deprecation/future warnings,
# but we dont fail on them by default as they should not interfere with most PRs.
# We still print to ensure new warnings are not introduced by the change.
"default::PendingDeprecationWarning",
"default::DeprecationWarning",
"default::FutureWarning",
"ignore:.*mode.*:FutureWarning",
# We will (probably) get rid of category_encoders in 22.1+
"ignore:::category_encoders.target_encoder",
]
10 changes: 5 additions & 5 deletions tests/system/test_gamaregressor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
""" Contains full system tests for GamaRegressor """
import numpy as np
from sklearn.datasets import load_boston
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

Expand All @@ -13,7 +13,7 @@

# While we could derive statistics dynamically,
# we want to know if any changes ever happen, so we save them statically.
boston = dict(name="boston", load=load_boston, test_size=127, base_mse=81.790)
diabetes = dict(name="diabetes", load=load_diabetes, test_size=111, base_mse=4966)


def _test_gama_regressor(gama, X_train, X_test, y_train, y_test, data, metric):
Expand All @@ -28,7 +28,7 @@ def _test_gama_regressor(gama, X_train, X_test, y_train, y_test, data, metric):
assert isinstance(predictions, np.ndarray), "predictions should be numpy arrays."
assert (data["test_size"],) == predictions.shape, "should predict (N,) shape array."

# Majority classifier on this split achieves 0.6293706293706294
# Predicting the mean will score roughly 4966
mse = mean_squared_error(y_test, predictions)
print(data["name"], metric, "mse:", mse)
assert (
Expand All @@ -55,12 +55,12 @@ def _test_dataset_problem(data, metric):

def test_regression_mean_squared_error():
"""GamaRegressor works on all-numeric data."""
_test_dataset_problem(boston, "neg_mean_squared_error")
_test_dataset_problem(diabetes, "neg_mean_squared_error")


def test_missing_value_regression():
"""GamaRegressor works when missing values are present."""
data = boston
data = diabetes
metric = "neg_mean_squared_error"
X, y = data["load"](return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
Expand Down

0 comments on commit 241dd0f

Please sign in to comment.