Small fixes (#173)

Boston to Diabetes, system tests.
openml-labs · Sep 15, 2022 · 241dd0f · 241dd0f
1 parent 8f1e48d
commit 241dd0f
Show file tree

Hide file tree

Showing 6 changed files with 40 additions and 9 deletions.
diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml
@@ -21,3 +21,21 @@ jobs:
           path: tests/unit
           os: ${{ matrix.os }}
           python-version: ${{ matrix.python-version }}
+
+  system:
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.8", "3.9", "3.10"]
+        os: [ubuntu-latest, macos-latest, windows-latest]
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Invoke System Tests
+        uses: ./.github/actions/pytest
+        with:
+          path: tests/system
+          os: ${{ matrix.os }}
+          python-version: ${{ matrix.python-version }}
diff --git a/examples/regression_example.py b/examples/regression_example.py
@@ -1,10 +1,10 @@
-from sklearn.datasets import load_boston
+from sklearn.datasets import load_diabetes
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_squared_error
 from gama import GamaRegressor
 
 if __name__ == "__main__":
-    X, y = load_boston(return_X_y=True)
+    X, y = load_diabetes(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
 
     automl = GamaRegressor(max_total_time=180, store="nothing", n_jobs=1)

diff --git a/gama/configuration/regression.py b/gama/configuration/regression.py
@@ -48,7 +48,7 @@
     },
     GradientBoostingRegressor: {
         "n_estimators": [100],
-        "loss": ["ls", "lad", "huber", "quantile"],
+        "loss": ["squared_error", "absolute_error", "huber", "quantile"],
         "learning_rate": [1e-3, 1e-2, 1e-1, 0.5, 1.0],
         "max_depth": range(1, 11),
         "min_samples_split": range(2, 21),

diff --git a/gama/gama.py b/gama/gama.py
@@ -546,6 +546,13 @@ def fit(
                     if p.identifier not in [PolynomialFeatures]
                 ]
 
+        if self._time_manager.total_time_remaining < 0:
+            pre_time = self._time_manager.activities[-1].stopwatch.elapsed_time
+            raise RuntimeError(
+                f"Preprocessing took {pre_time} seconds. "
+                f"No time remaining (budget: {self._time_manager.total_time} seconds)."
+            )
+
         fit_time = int(
             (1 - self._post_processing.time_fraction)
             * self._time_manager.total_time_remaining

diff --git a/pyproject.toml b/pyproject.toml
@@ -67,8 +67,14 @@ filterwarnings = [
     "error",
     "ignore::sklearn.exceptions.ConvergenceWarning",
     "ignore::RuntimeWarning",
+    # Block a warning coming from scikit-learn internals about scipy.mode
+    "ignore:.*mode.*:FutureWarning",
+    # We have a CRON job checking for deprecation/future warnings,
+    # but we dont fail on them by default as they should not interfere with most PRs.
+    # We still print to ensure new warnings are not introduced by the change.
     "default::PendingDeprecationWarning",
     "default::DeprecationWarning",
     "default::FutureWarning",
-    "ignore:.*mode.*:FutureWarning",
+    # We will (probably) get rid of category_encoders in 22.1+
+    "ignore:::category_encoders.target_encoder",
 ]
diff --git a/tests/system/test_gamaregressor.py b/tests/system/test_gamaregressor.py
@@ -1,6 +1,6 @@
 """ Contains full system tests for GamaRegressor """
 import numpy as np
-from sklearn.datasets import load_boston
+from sklearn.datasets import load_diabetes
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_squared_error
 
@@ -13,7 +13,7 @@
 
 # While we could derive statistics dynamically,
 # we want to know if any changes ever happen, so we save them statically.
-boston = dict(name="boston", load=load_boston, test_size=127, base_mse=81.790)
+diabetes = dict(name="diabetes", load=load_diabetes, test_size=111, base_mse=4966)
 
 
 def _test_gama_regressor(gama, X_train, X_test, y_train, y_test, data, metric):
@@ -28,7 +28,7 @@ def _test_gama_regressor(gama, X_train, X_test, y_train, y_test, data, metric):
     assert isinstance(predictions, np.ndarray), "predictions should be numpy arrays."
     assert (data["test_size"],) == predictions.shape, "should predict (N,) shape array."
 
-    # Majority classifier on this split achieves 0.6293706293706294
+    # Predicting the mean will score roughly 4966
     mse = mean_squared_error(y_test, predictions)
     print(data["name"], metric, "mse:", mse)
     assert (
@@ -55,12 +55,12 @@ def _test_dataset_problem(data, metric):
 
 def test_regression_mean_squared_error():
     """GamaRegressor works on all-numeric data."""
-    _test_dataset_problem(boston, "neg_mean_squared_error")
+    _test_dataset_problem(diabetes, "neg_mean_squared_error")
 
 
 def test_missing_value_regression():
     """GamaRegressor works when missing values are present."""
-    data = boston
+    data = diabetes
     metric = "neg_mean_squared_error"
     X, y = data["load"](return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)