diff --git a/.github/workflows/dist.yml b/.github/workflows/dist.yml index 24fc6bbf1d..07ad9366a2 100644 --- a/.github/workflows/dist.yml +++ b/.github/workflows/dist.yml @@ -27,7 +27,7 @@ jobs: submodules: recursive - name: Setup Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: 3.8 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2b5d32a4f4..83510c5483 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -28,7 +28,7 @@ jobs: submodules: recursive - name: Setup Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: 3.8 diff --git a/.github/workflows/generate-baselines.yml b/.github/workflows/generate-baselines.yml index 337fdd269e..5149dd57d8 100644 --- a/.github/workflows/generate-baselines.yml +++ b/.github/workflows/generate-baselines.yml @@ -59,7 +59,7 @@ jobs: # value: The python version used by the installed system - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ steps.python-version.outputs.value }} @@ -109,7 +109,7 @@ jobs: # results_path: path to the benchmark results - name: Upload Results as Artifact - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: baselines path: | diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index 2d28dd1eae..c7e5b94438 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -25,7 +25,7 @@ jobs: submodules: recursive - name: Setup Python 3.7 - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: 3.7 diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index de29e860fc..794157f602 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -26,7 +26,7 @@ env: pytest-args: >- --forked --durations=20 - --timeout=300 + --timeout=600 --timeout-method=thread -s @@ -79,7 +79,7 @@ jobs: submodules: recursive - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} @@ -150,7 +150,7 @@ jobs: - name: Upload coverage if: matrix.code-cov && always() - uses: codecov/codecov-action@v2 + uses: codecov/codecov-action@v3 with: fail_ci_if_error: true verbose: true diff --git a/.github/workflows/regressions.yml b/.github/workflows/regressions.yml index 70be44beff..8bb0addcf4 100644 --- a/.github/workflows/regressions.yml +++ b/.github/workflows/regressions.yml @@ -142,7 +142,7 @@ jobs: # value: The python version used by the installed system - name: Setup Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: ${{ steps.python-version.outputs.value }} @@ -206,7 +206,7 @@ jobs: # value: The python version used by the installed system - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ steps.python-version.outputs.value }} @@ -250,7 +250,7 @@ jobs: # - baseline_regression_x_x_x.csv - name: Download workflow artifacts - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: artifacts @@ -307,7 +307,7 @@ jobs: # compared_means: path to the results of regression test vs baseline - name: Upload all results together as an artifact - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ github.repository_owner }}_${{ steps.extract.outputs.branch }}_${{ github.sha }} path: | @@ -327,7 +327,7 @@ jobs: && github.event.action == 'labeled' && github.event.label.name == 'regression-tests' ) - uses: peter-evans/find-comment@v1 + uses: peter-evans/find-comment@v2 id: comment_finder with: issue-number: ${{ github.event.pull_request.number }} diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml index b4bb87fafd..5d24ae0627 100644 --- a/.github/workflows/stale.yaml +++ b/.github/workflows/stale.yaml @@ -9,7 +9,7 @@ jobs: stale: runs-on: ubuntu-latest steps: - - uses: actions/stale@v4 + - uses: actions/stale@v5 with: days-before-stale: 60 days-before-close: 7 diff --git a/autosklearn/automl.py b/autosklearn/automl.py index 12e80b8e4e..278cd5c146 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -48,6 +48,7 @@ BaseShuffleSplit, _RepeatedSplits, ) +from sklearn.pipeline import Pipeline from sklearn.utils import check_random_state from sklearn.utils.validation import check_is_fitted from smac.callbacks import IncorporateRunResultCallback @@ -120,6 +121,7 @@ ) from autosklearn.util.parallel import preload_modules from autosklearn.util.single_thread_client import SingleThreadedClient +from autosklearn.util.smac_wrap import SMACCallback, SmacRunCallback from autosklearn.util.stopwatch import StopWatch import unittest.mock @@ -235,7 +237,7 @@ def __init__( logging_config: Optional[Mapping] = None, metrics: Sequence[Scorer] | None = None, scoring_functions: Optional[list[Scorer]] = None, - get_trials_callback: Optional[IncorporateRunResultCallback] = None, + get_trials_callback: SMACCallback | None = None, dataset_compression: bool | Mapping[str, Any] = True, allow_string_features: bool = True, ): @@ -243,7 +245,7 @@ def __init__( if isinstance(disable_evaluator_output, Iterable): disable_evaluator_output = list(disable_evaluator_output) # Incase iterator - allowed = set(["model", "cv_model", "y_optimization", "y_test", "y_valid"]) + allowed = set(["model", "cv_model", "y_optimization", "y_test"]) unknown = allowed - set(disable_evaluator_output) if any(unknown): raise ValueError( @@ -264,6 +266,15 @@ def __init__( memory_limit=memory_limit, ) + # If we got something callable for `get_trials_callback`, wrap it so SMAC + # will accept it. + if ( + get_trials_callback is not None + and callable(get_trials_callback) + and not isinstance(get_trials_callback, IncorporateRunResultCallback) + ): + get_trials_callback = SmacRunCallback(get_trials_callback) + self._delete_tmp_folder_after_terminate = delete_tmp_folder_after_terminate self._time_for_task = time_left_for_this_task self._per_run_time_limit = per_run_time_limit @@ -646,273 +657,282 @@ def fit( # By default try to use the TCP logging port or get a new port self._logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT - self._logger = self._get_logger(dataset_name) - # The first thing we have to do is create the logger to update the backend - self._backend.setup_logger(self._logger_port) + # Once we start the logging server, it starts in a new process + # If an error occurs then we want to make sure that we exit cleanly + # and shut it down, else it might hang + # https://github.com/automl/auto-sklearn/issues/1480 + try: + self._logger = self._get_logger(dataset_name) - if not only_return_configuration_space: - # If only querying the configuration space, we do not save the start time - # The start time internally checks for the fit() method to execute only once - # But this does not apply when only querying the configuration space - self._backend.save_start_time(self._seed) + # The first thing we have to do is create the logger to update the backend + self._backend.setup_logger(self._logger_port) - self._stopwatch = StopWatch() + if not only_return_configuration_space: + # If only querying the configuration space, we do not save the start + # time The start time internally checks for the fit() method to execute + # only once but this does not apply when only querying the configuration + # space + self._backend.save_start_time(self._seed) - # Make sure that input is valid - # Performs Ordinal one hot encoding to the target - # both for train and test data - self.InputValidator = InputValidator( - is_classification=is_classification, - feat_type=feat_type, - logger_port=self._logger_port, - allow_string_features=self.allow_string_features, - ) - self.InputValidator.fit(X_train=X, y_train=y, X_test=X_test, y_test=y_test) - X, y = self.InputValidator.transform(X, y) + self._stopwatch = StopWatch() - if X_test is not None and y_test is not None: - X_test, y_test = self.InputValidator.transform(X_test, y_test) + # Make sure that input is valid + # Performs Ordinal one hot encoding to the target + # both for train and test data + self.InputValidator = InputValidator( + is_classification=is_classification, + feat_type=feat_type, + logger_port=self._logger_port, + allow_string_features=self.allow_string_features, + ) + self.InputValidator.fit(X_train=X, y_train=y, X_test=X_test, y_test=y_test) + X, y = self.InputValidator.transform(X, y) - # We don't support size reduction on pandas type object yet - if ( - self._dataset_compression is not None - and not isinstance(X, pd.DataFrame) - and not (isinstance(y, pd.Series) or isinstance(y, pd.DataFrame)) - ): - methods = self._dataset_compression["methods"] - memory_allocation = self._dataset_compression["memory_allocation"] - - # Remove precision reduction if we can't perform it - if "precision" in methods and X.dtype not in supported_precision_reductions: - methods = [method for method in methods if method != "precision"] - - with warnings_to(self._logger): - X, y = reduce_dataset_size_if_too_large( - X=X, - y=y, - memory_limit=self._memory_limit, - is_classification=is_classification, - random_state=self._seed, - operations=methods, - memory_allocation=memory_allocation, - ) + if X_test is not None and y_test is not None: + X_test, y_test = self.InputValidator.transform(X_test, y_test) - # Check the re-sampling strategy - try: + # We don't support size reduction on pandas type object yet + if ( + self._dataset_compression is not None + and not isinstance(X, pd.DataFrame) + and not (isinstance(y, pd.Series) or isinstance(y, pd.DataFrame)) + ): + methods = self._dataset_compression["methods"] + memory_allocation = self._dataset_compression["memory_allocation"] + + # Remove precision reduction if we can't perform it + if ( + "precision" in methods + and X.dtype not in supported_precision_reductions + ): + methods = [method for method in methods if method != "precision"] + + with warnings_to(self._logger): + X, y = reduce_dataset_size_if_too_large( + X=X, + y=y, + memory_limit=self._memory_limit, + is_classification=is_classification, + random_state=self._seed, + operations=methods, + memory_allocation=memory_allocation, + ) + + # Check the re-sampling strategy self._check_resampling_strategy( X=X, y=y, task=self._task, ) - except Exception as e: - self._fit_cleanup() - raise e - # Reset learnt stuff - self.models_ = None - self.cv_models_ = None - self.ensemble_ = None - - # The metric must exist as of this point - # It can be provided in the constructor, or automatically - # defined in the estimator fit call - if isinstance(self._metrics, Sequence): - for entry in self._metrics: - if not isinstance(entry, Scorer): - raise ValueError( - "Metric {entry} must be instance of autosklearn.metrics.Scorer." - ) - else: - raise ValueError( - "Metric must be a sequence of instances of " - "autosklearn.metrics.Scorer." - ) - - # If no dask client was provided, we create one, so that we can - # start a ensemble process in parallel to smbo optimize - if self._dask_client is None and ( - self._ensemble_class is not None - or self._n_jobs is not None - and self._n_jobs > 1 - ): - self._create_dask_client() - else: - self._is_dask_client_internally_created = False - - self._dataset_name = dataset_name - self._stopwatch.start(self._dataset_name) + # Reset learnt stuff + self.models_ = None + self.cv_models_ = None + self.ensemble_ = None - # Take the feature types from the validator - self._feat_type = self.InputValidator.feature_validator.feat_type + # The metric must exist as of this point + # It can be provided in the constructor, or automatically + # defined in the estimator fit call + if isinstance(self._metrics, Sequence): + for entry in self._metrics: + if not isinstance(entry, Scorer): + raise ValueError( + f"Metric {entry} must be instance of" + " autosklearn.metrics.Scorer." + ) + else: + raise ValueError( + "Metric must be a sequence of instances of " + "autosklearn.metrics.Scorer." + ) - self._log_fit_setup() + # If no dask client was provided, we create one, so that we can + # start a ensemble process in parallel to smbo optimize + if self._dask_client is None and ( + self._ensemble_class is not None + or self._n_jobs is not None + and self._n_jobs > 1 + ): + self._create_dask_client() + else: + self._is_dask_client_internally_created = False - # == Pickle the data manager to speed up loading - with self._stopwatch.time("Save Datamanager"): - datamanager = XYDataManager( - X, - y, - X_test=X_test, - y_test=y_test, - task=self._task, - feat_type=self._feat_type, - dataset_name=dataset_name, - ) + self._dataset_name = dataset_name + self._stopwatch.start(self._dataset_name) - self._backend._make_internals_directory() - self._label_num = datamanager.info["label_num"] - - self._backend.save_datamanager(datamanager) - - # = Create a searchspace - # Do this before One Hot Encoding to make sure that it creates a - # search space for a dense classifier even if one hot encoding would - # make it sparse (tradeoff; if one hot encoding would make it sparse, - # densifier and truncatedSVD would probably lead to a MemoryError, - # like this we can't use some of the preprocessing methods in case - # the data became sparse) - with self._stopwatch.time("Create Search space"): - self.configuration_space, configspace_path = self._create_search_space( - self._backend.temporary_directory, - self._backend, - datamanager, - include=self._include, - exclude=self._exclude, - ) + # Take the feature types from the validator + self._feat_type = self.InputValidator.feature_validator.feat_type - if only_return_configuration_space: - self._fit_cleanup() - return self.configuration_space - - # == Perform dummy predictions - with self._stopwatch.time("Dummy predictions"): - self.num_run += 1 - self._do_dummy_prediction() - - # == RUN ensemble builder - # Do this before calculating the meta-features to make sure that the - # dummy predictions are actually included in the ensemble even if - # calculating the meta-features takes very long - with self._stopwatch.time("Run Ensemble Builder"): - - elapsed_time = self._stopwatch.time_since(self._dataset_name, "start") - - time_left_for_ensembles = max(0, self._time_for_task - elapsed_time) - proc_ensemble = None - if time_left_for_ensembles <= 0: - # Fit only raises error when an ensemble class is given but - # time_left_for_ensembles is zero. - if self._ensemble_class is not None: - raise ValueError( - "Not starting ensemble builder because there " - "is no time left. Try increasing the value " - "of time_left_for_this_task." - ) - elif self._ensemble_class is None: - self._logger.info( - "Not starting ensemble builder because no ensemble class is given." - ) - else: - self._logger.info( - "Start Ensemble with %5.2fsec time left" % time_left_for_ensembles - ) + self._log_fit_setup() - proc_ensemble = EnsembleBuilderManager( - start_time=time.time(), - time_left_for_ensembles=time_left_for_ensembles, - backend=copy.deepcopy(self._backend), - dataset_name=dataset_name, + # == Pickle the data manager to speed up loading + with self._stopwatch.time("Save Datamanager"): + datamanager = XYDataManager( + X, + y, + X_test=X_test, + y_test=y_test, task=self._task, - metrics=self._metrics, - ensemble_class=self._ensemble_class, - ensemble_kwargs=self._ensemble_kwargs, - ensemble_nbest=self._ensemble_nbest, - max_models_on_disc=self._max_models_on_disc, - seed=self._seed, - precision=self.precision, - max_iterations=self._max_ensemble_build_iterations, - read_at_most=self._read_at_most, - memory_limit=self._memory_limit, - random_state=self._seed, - logger_port=self._logger_port, - pynisher_context=self._multiprocessing_context, + feat_type=self._feat_type, + dataset_name=dataset_name, ) - # kill the datamanager as it will be re-loaded anyways from sub processes - try: - del self._datamanager - except Exception: - pass + self._backend._make_internals_directory() + self._label_num = datamanager.info["label_num"] + + self._backend.save_datamanager(datamanager) + + # = Create a searchspace + # Do this before One Hot Encoding to make sure that it creates a + # search space for a dense classifier even if one hot encoding would + # make it sparse (tradeoff; if one hot encoding would make it sparse, + # densifier and truncatedSVD would probably lead to a MemoryError, + # like this we can't use some of the preprocessing methods in case + # the data became sparse) + with self._stopwatch.time("Create Search space"): + self.configuration_space, configspace_path = self._create_search_space( + self._backend.temporary_directory, + self._backend, + datamanager, + include=self._include, + exclude=self._exclude, + ) - # => RUN SMAC - with self._stopwatch.time("Run SMAC"): - elapsed_time = self._stopwatch.time_since(self._dataset_name, "start") - time_left = self._time_for_task - elapsed_time - - if self._logger: - self._logger.info("Start SMAC with %5.2fsec time left" % time_left) - if time_left <= 0: - self._logger.warning("Not starting SMAC because there is no time left.") - _proc_smac = None - self._budget_type = None - else: - if ( - self._per_run_time_limit is None - or self._per_run_time_limit > time_left - ): - self._logger.warning( - "Time limit for a single run is higher than total time " - "limit. Capping the limit for a single run to the total " - "time given to SMAC (%f)" % time_left + if only_return_configuration_space: + return self.configuration_space + + # == Perform dummy predictions + with self._stopwatch.time("Dummy predictions"): + self.num_run += 1 + self._do_dummy_prediction() + + # == RUN ensemble builder + # Do this before calculating the meta-features to make sure that the + # dummy predictions are actually included in the ensemble even if + # calculating the meta-features takes very long + with self._stopwatch.time("Run Ensemble Builder"): + + elapsed_time = self._stopwatch.time_since(self._dataset_name, "start") + + time_left_for_ensembles = max(0, self._time_for_task - elapsed_time) + proc_ensemble = None + if time_left_for_ensembles <= 0: + # Fit only raises error when an ensemble class is given but + # time_left_for_ensembles is zero. + if self._ensemble_class is not None: + raise ValueError( + "Not starting ensemble builder because there " + "is no time left. Try increasing the value " + "of time_left_for_this_task." + ) + elif self._ensemble_class is None: + self._logger.info( + "No ensemble buildin because no ensemble class was given." ) - per_run_time_limit = time_left else: - per_run_time_limit = self._per_run_time_limit + self._logger.info( + "Start Ensemble with %5.2fsec time left" + % time_left_for_ensembles + ) + + proc_ensemble = EnsembleBuilderManager( + start_time=time.time(), + time_left_for_ensembles=time_left_for_ensembles, + backend=copy.deepcopy(self._backend), + dataset_name=dataset_name, + task=self._task, + metrics=self._metrics, + ensemble_class=self._ensemble_class, + ensemble_kwargs=self._ensemble_kwargs, + ensemble_nbest=self._ensemble_nbest, + max_models_on_disc=self._max_models_on_disc, + seed=self._seed, + precision=self.precision, + max_iterations=self._max_ensemble_build_iterations, + read_at_most=self._read_at_most, + memory_limit=self._memory_limit, + random_state=self._seed, + logger_port=self._logger_port, + pynisher_context=self._multiprocessing_context, + ) - # Make sure that at least 2 models are created for the ensemble process - num_models = time_left // per_run_time_limit - if num_models < 2: - per_run_time_limit = time_left // 2 + # kill the datamanager as it will be re-loaded anyways from sub processes + try: + del self._datamanager + except Exception: + pass + + # => RUN SMAC + with self._stopwatch.time("Run SMAC"): + elapsed_time = self._stopwatch.time_since(self._dataset_name, "start") + time_left = self._time_for_task - elapsed_time + + if self._logger: + self._logger.info("Start SMAC with %5.2fsec time left" % time_left) + if time_left <= 0: self._logger.warning( - "Capping the per_run_time_limit to {} to have " - "time for a least 2 models in each process.".format( - per_run_time_limit - ) + "Not starting SMAC because there is no time left." ) + _proc_smac = None + self._budget_type = None + else: + if ( + self._per_run_time_limit is None + or self._per_run_time_limit > time_left + ): + self._logger.warning( + "Time limit for a single run is higher than total time " + "limit. Capping the limit for a single run to the total " + "time given to SMAC (%f)" % time_left + ) + per_run_time_limit = time_left + else: + per_run_time_limit = self._per_run_time_limit + + # At least 2 models are created for the ensemble process + num_models = time_left // per_run_time_limit + if num_models < 2: + per_run_time_limit = time_left // 2 + self._logger.warning( + "Capping the per_run_time_limit to {} to have " + "time for a least 2 models in each process.".format( + per_run_time_limit + ) + ) - _proc_smac = AutoMLSMBO( - config_space=self.configuration_space, - dataset_name=self._dataset_name, - backend=self._backend, - total_walltime_limit=time_left, - func_eval_time_limit=per_run_time_limit, - memory_limit=self._memory_limit, - data_memory_limit=self._data_memory_limit, - stopwatch=self._stopwatch, - n_jobs=self._n_jobs, - dask_client=self._dask_client, - start_num_run=self.num_run, - num_metalearning_cfgs=self._initial_configurations_via_metalearning, - config_file=configspace_path, - seed=self._seed, - metadata_directory=self._metadata_directory, - metrics=self._metrics, - resampling_strategy=self._resampling_strategy, - resampling_strategy_args=self._resampling_strategy_arguments, - include=self._include, - exclude=self._exclude, - disable_file_output=self._disable_evaluator_output, - get_smac_object_callback=self._get_smac_object_callback, - smac_scenario_args=self._smac_scenario_args, - scoring_functions=self._scoring_functions, - port=self._logger_port, - pynisher_context=self._multiprocessing_context, - ensemble_callback=proc_ensemble, - trials_callback=self._get_trials_callback, - ) + n_meta_configs = self._initial_configurations_via_metalearning + _proc_smac = AutoMLSMBO( + config_space=self.configuration_space, + dataset_name=self._dataset_name, + backend=self._backend, + total_walltime_limit=time_left, + func_eval_time_limit=per_run_time_limit, + memory_limit=self._memory_limit, + data_memory_limit=self._data_memory_limit, + stopwatch=self._stopwatch, + n_jobs=self._n_jobs, + dask_client=self._dask_client, + start_num_run=self.num_run, + num_metalearning_cfgs=n_meta_configs, + config_file=configspace_path, + seed=self._seed, + metadata_directory=self._metadata_directory, + metrics=self._metrics, + resampling_strategy=self._resampling_strategy, + resampling_strategy_args=self._resampling_strategy_arguments, + include=self._include, + exclude=self._exclude, + disable_file_output=self._disable_evaluator_output, + get_smac_object_callback=self._get_smac_object_callback, + smac_scenario_args=self._smac_scenario_args, + scoring_functions=self._scoring_functions, + port=self._logger_port, + pynisher_context=self._multiprocessing_context, + ensemble_callback=proc_ensemble, + trials_callback=self._get_trials_callback, + ) - try: ( self.runhistory_, self.trajectory_, @@ -928,42 +948,49 @@ def fit( ] with open(trajectory_filename, "w") as fh: json.dump(saveable_trajectory, fh) - except Exception as e: - self._logger.exception(e) - raise - - self._logger.info("Starting shutdown...") - # Wait until the ensemble process is finished to avoid shutting down - # while the ensemble builder tries to access the data - if proc_ensemble is not None: - self.ensemble_performance_history = list(proc_ensemble.history) - - if len(proc_ensemble.futures) > 0: - # Now we need to wait for the future to return as it cannot be cancelled - # while it is running: https://stackoverflow.com/a/49203129 - self._logger.info( - "Ensemble script still running, waiting for it to finish." - ) - result = proc_ensemble.futures.pop().result() - if result: - ensemble_history, _ = result - self.ensemble_performance_history.extend(ensemble_history) - self._logger.info("Ensemble script finished, continue shutdown.") - - # save the ensemble performance history file - if len(self.ensemble_performance_history) > 0: - pd.DataFrame(self.ensemble_performance_history).to_json( - os.path.join( - self._backend.internals_directory, "ensemble_history.json" + + self._logger.info("Starting shutdown...") + # Wait until the ensemble process is finished to avoid shutting down + # while the ensemble builder tries to access the data + if proc_ensemble is not None: + self.ensemble_performance_history = list(proc_ensemble.history) + + if len(proc_ensemble.futures) > 0: + # Now we wait for the future to return as it cannot be cancelled + # while it is running: https://stackoverflow.com/a/49203129 + self._logger.info( + "Ensemble script still running, waiting for it to finish." + ) + result = proc_ensemble.futures.pop().result() + if result: + ensemble_history, _ = result + self.ensemble_performance_history.extend(ensemble_history) + self._logger.info("Ensemble script finished, continue shutdown.") + + # save the ensemble performance history file + if len(self.ensemble_performance_history) > 0: + pd.DataFrame(self.ensemble_performance_history).to_json( + os.path.join( + self._backend.internals_directory, "ensemble_history.json" + ) ) - ) - if load_models: - self._logger.info("Loading models...") - self._load_models() - self._logger.info("Finished loading models...") + if load_models: + self._logger.info("Loading models...") + self._load_models() + self._logger.info("Finished loading models...") + + # The whole logic above from where we begin the logging server is capture + # in a try: finally: so that if something goes wrong, we at least close + # down the logging server, preventing it from hanging and not closing + # until ctrl+c is pressed + except Exception as e: + # This will be called before the _fit_cleanup + self._logger.exception(e) + raise e + finally: + self._fit_cleanup() - self._fit_cleanup() self.fitted = True return self @@ -1447,6 +1474,7 @@ def predict(self, X, batch_size=None, n_jobs=1): # Each process computes predictions in chunks of batch_size rows. try: for i, tmp_model in enumerate(self.models_.values()): + # TODO, modify this if isinstance(tmp_model, (DummyRegressor, DummyClassifier)): check_is_fitted(tmp_model) else: @@ -1502,6 +1530,7 @@ def fit_ensemble( ensemble_nbest: Optional[int] = None, ensemble_class: Optional[AbstractEnsemble] = EnsembleSelection, ensemble_kwargs: Optional[Dict[str, Any]] = None, + metrics: Scorer | Sequence[Scorer] | None = None, ): check_is_fitted(self) @@ -1532,6 +1561,10 @@ def fit_ensemble( else: self._is_dask_client_internally_created = False + metrics = metrics if metrics is not None else self._metrics + if not isinstance(metrics, Sequence): + metrics = [metrics] + # Use the current thread to start the ensemble builder process # The function ensemble_builder_process will internally create a ensemble # builder in the provide dask client @@ -1541,7 +1574,7 @@ def fit_ensemble( backend=copy.deepcopy(self._backend), dataset_name=dataset_name if dataset_name else self._dataset_name, task=task if task else self._task, - metrics=self._metrics, + metrics=metrics if metrics is not None else self._metrics, ensemble_class=( ensemble_class if ensemble_class is not None else self._ensemble_class ), @@ -1652,20 +1685,12 @@ def _load_best_individual_model(self): return ensemble def _load_pareto_set(self) -> Sequence[VotingClassifier | VotingRegressor]: - if len(self._metrics) <= 1: - raise ValueError("Pareto set is only available for two or more metrics.") - - if self._ensemble_class is not None: + if self.ensemble_ is None: self.ensemble_ = self._backend.load_ensemble(self._seed) - else: - self.ensemble_ = None # If no ensemble is loaded we cannot do anything if not self.ensemble_: - - raise ValueError( - "Pareto set can only be accessed if an ensemble is available." - ) + raise ValueError("Pareto set only available if ensemble can be loaded.") if isinstance(self.ensemble_, AbstractMultiObjectiveEnsemble): pareto_set = self.ensemble_.get_pareto_set() @@ -1691,8 +1716,10 @@ def _load_pareto_set(self) -> Sequence[VotingClassifier | VotingRegressor]: estimators=None, voting="soft", ) + kind = "classifier" else: voter = VotingRegressor(estimators=None) + kind = "regeressor" if self._resampling_strategy in ("cv", "cv-iterative-fit"): models = self._backend.load_cv_models_by_identifiers(identifiers) @@ -1705,8 +1732,32 @@ def _load_pareto_set(self) -> Sequence[VotingClassifier | VotingRegressor]: weight_vector = [] estimators = [] for identifier in identifiers: - weight_vector.append(weights[identifier]) - estimators.append(models[identifier]) + estimator = models[identifier] + weight = weights[identifier] + + # Kind of hacky, really the dummy models should + # act like everything else does. Doing this is + # required so that the VotingClassifier/Regressor + # can use it as intended + if not isinstance(estimator, Pipeline): + if kind == "classifier": + steps = [ + ("data_preprocessor", None), + ("balancing", None), + ("feature_preprocessor", None), + (kind, estimator), + ] + else: + steps = [ + ("data_preprocessor", None), + ("feature_preprocessor", None), + (kind, estimator), + ] + + estimator = Pipeline(steps=steps) + + weight_vector.append(weight) + estimators.append(estimator) voter.estimators = estimators voter.estimators_ = estimators @@ -2123,7 +2174,7 @@ def show_models(self) -> dict[int, Any]: ensemble_dict = {} - if self._ensemble_class is not None: + if self._ensemble_class is None: warnings.warn( "No models in the ensemble. Kindly provide an ensemble class." ) @@ -2138,10 +2189,10 @@ def has_key(rv, key): return rv.additional_info and key in rv.additional_info table_dict = {} - for rkey, rval in self.runhistory_.data.items(): - if has_key(rval, "num_run"): - model_id = rval.additional_info["num_run"] - table_dict[model_id] = {"model_id": model_id, "cost": rval.cost} + for run_key, run_val in self.runhistory_.data.items(): + if has_key(run_val, "num_run"): + model_id = run_val.additional_info["num_run"] + table_dict[model_id] = {"model_id": model_id, "cost": run_val.cost} # Checking if the dictionary is empty if not table_dict: diff --git a/autosklearn/ensemble_building/builder.py b/autosklearn/ensemble_building/builder.py index 487332cbe1..50f69eb35a 100644 --- a/autosklearn/ensemble_building/builder.py +++ b/autosklearn/ensemble_building/builder.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Dict, Iterable, Sequence, Type, cast +from typing import Any, Iterable, Mapping, Sequence, Type, cast import logging.handlers import multiprocessing @@ -46,7 +46,7 @@ def __init__( task_type: int, metrics: Sequence[Scorer], ensemble_class: Type[AbstractEnsemble] = EnsembleSelection, - ensemble_kwargs: Dict[str, Any] | None = None, + ensemble_kwargs: Mapping[str, Any] | None = None, ensemble_nbest: int | float = 50, max_models_on_disc: int | float | None = 100, seed: int = 1, @@ -71,9 +71,11 @@ def __init__( metrics: Sequence[Scorer] Metrics to optimize the ensemble for. These must be non-duplicated. - ensemble_class + ensemble_class: Type[AbstractEnsemble] + Implementation of the ensemble algorithm. - ensemble_kwargs + ensemble_kwargs: Mapping[str, Any] | None + Arguments passed to the constructor of the ensemble algorithm. ensemble_nbest: int | float = 50 @@ -169,6 +171,8 @@ def __init__( self.validation_performance_ = np.inf # Data we may need + # TODO: The test data is needlessly loaded but automl_common has no concept of + # these and is perhaps too rigid datamanager: XYDataManager = self.backend.load_datamanager() self._X_test: SUPPORTED_FEAT_TYPES | None = datamanager.data.get("X_test", None) self._y_test: np.ndarray | None = datamanager.data.get("Y_test", None) @@ -442,6 +446,17 @@ def main( self.logger.debug("Found no runs") raise RuntimeError("Found no runs") + # We load in `X_data` if we need it + if any(m._needs_X for m in self.metrics): + ensemble_X_data = self.X_data("ensemble") + + if ensemble_X_data is None: + msg = "No `X_data` for 'ensemble' which was required by metrics" + self.logger.debug(msg) + raise RuntimeError(msg) + else: + ensemble_X_data = None + # Calculate the loss for those that require it requires_update = self.requires_loss_update(runs) if self.read_at_most is not None: @@ -450,9 +465,7 @@ def main( for run in requires_update: run.record_modified_times() # So we don't count as modified next time run.losses = { - metric.name: self.loss( - run, metric=metric, X_data=self.X_data("ensemble") - ) + metric.name: self.loss(run, metric=metric, X_data=ensemble_X_data) for metric in self.metrics } @@ -549,15 +562,14 @@ def main( return self.ensemble_history, self.ensemble_nbest targets = cast(np.ndarray, self.targets("ensemble")) # Sure they exist - X_data = self.X_data("ensemble") ensemble = self.fit_ensemble( candidates=candidates, - X_data=X_data, targets=targets, runs=runs, ensemble_class=self.ensemble_class, ensemble_kwargs=self.ensemble_kwargs, + X_data=ensemble_X_data, task=self.task_type, metrics=self.metrics, precision=self.precision, @@ -587,7 +599,15 @@ def main( run_preds = [r.predictions(kind, precision=self.precision) for r in models] pred = ensemble.predict(run_preds) - X_data = self.X_data(kind) + + if any(m._needs_X for m in self.metrics): + X_data = self.X_data(kind) + if X_data is None: + msg = f"No `X` data for '{kind}' which was required by metrics" + self.logger.debug(msg) + raise RuntimeError(msg) + else: + X_data = None scores = calculate_scores( solution=pred_targets, @@ -597,10 +617,19 @@ def main( X_data=X_data, scoring_functions=None, ) + + # TODO only one metric in history + # + # We should probably return for all metrics but this makes + # automl::performance_history a lot more complicated, will + # tackle in a future PR + first_metric = self.metrics[0] performance_stamp[f"ensemble_{score_name}_score"] = scores[ - self.metrics[0].name + first_metric.name ] - self.ensemble_history.append(performance_stamp) + + # Add the performance stamp to the history + self.ensemble_history.append(performance_stamp) # Lastly, delete any runs that need to be deleted. We save this as the last step # so that we have an ensemble saved that is up to date. If we do not do so, @@ -805,13 +834,13 @@ def candidate_selection( def fit_ensemble( self, - candidates: list[Run], - X_data: SUPPORTED_FEAT_TYPES, - targets: np.ndarray, + candidates: Sequence[Run], + runs: Sequence[Run], *, - runs: list[Run], + targets: np.ndarray | None = None, ensemble_class: Type[AbstractEnsemble] = EnsembleSelection, - ensemble_kwargs: Dict[str, Any] | None = None, + ensemble_kwargs: Mapping[str, Any] | None = None, + X_data: SUPPORTED_FEAT_TYPES | None = None, task: int | None = None, metrics: Sequence[Scorer] | None = None, precision: int | None = None, @@ -825,24 +854,24 @@ def fit_ensemble( Parameters ---------- - candidates: list[Run] + candidates: Sequence[Run] List of runs to build an ensemble from - X_data: SUPPORTED_FEAT_TYPES - The base level data. + runs: Sequence[Run] + List of all runs (also pruned ones and dummy runs) - targets: np.ndarray + targets: np.ndarray | None = None The targets to build the ensemble with - runs: list[Run] - List of all runs (also pruned ones and dummy runs) - - ensemble_class: AbstractEnsemble + ensemble_class: Type[AbstractEnsemble] Implementation of the ensemble algorithm. - ensemble_kwargs: Dict[str, Any] + ensemble_kwargs: Mapping[str, Any] | None Arguments passed to the constructor of the ensemble algorithm. + X_data: SUPPORTED_FEAT_TYPES | None = None + The base level data. + task: int | None = None The kind of task performed @@ -859,24 +888,42 @@ def fit_ensemble( ------- AbstractEnsemble """ - task = task if task is not None else self.task_type + # Validate we have targets if None specified + if targets is None: + targets = self.targets("ensemble") + if targets is None: + path = self.backend._get_targets_ensemble_filename() + raise ValueError(f"`fit_ensemble` could not find any targets at {path}") + ensemble_class = ( ensemble_class if ensemble_class is not None else self.ensemble_class ) - ensemble_kwargs = ( - ensemble_kwargs if ensemble_kwargs is not None else self.ensemble_kwargs - ) - ensemble_kwargs = ensemble_kwargs if ensemble_kwargs is not None else {} - metrics = metrics if metrics is not None else self.metrics - rs = random_state if random_state is not None else self.random_state - ensemble = ensemble_class( - task_type=task, - metrics=metrics, - random_state=rs, - backend=self.backend, - **ensemble_kwargs, - ) # type: AbstractEnsemble + # Create the ensemble_kwargs, favouring in order: + # 1) function kwargs, 2) function params 3) init_kwargs 4) init_params + + # Collect func params in dict if they're not None + params = { + k: v + for k, v in [ + ("task_type", task), + ("metrics", metrics), + ("random_state", random_state), + ] + if v is not None + } + + kwargs = { + "backend": self.backend, + "task_type": self.task_type, + "metrics": self.metrics, + "random_state": self.random_state, + **(self.ensemble_kwargs or {}), + **params, + **(ensemble_kwargs or {}), + } + + ensemble = ensemble_class(**kwargs) # type: AbstractEnsemble self.logger.debug(f"Fitting ensemble on {len(candidates)} models") start_time = time.time() @@ -995,7 +1042,8 @@ def loss( self, run: Run, metric: Scorer, - X_data: SUPPORTED_FEAT_TYPES, + *, + X_data: SUPPORTED_FEAT_TYPES | None = None, kind: str = "ensemble", ) -> float: """Calculate the loss for a run @@ -1008,6 +1056,9 @@ def loss( metric: Scorer The metric to calculate the loss of + X_data: SUPPORTED_FEAT_TYPES | None = None + Any X_data required to be passed to the metric + kind: str = "ensemble" The kind of targets to use for the run diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py index 7144fcc39c..5afd8c597c 100644 --- a/autosklearn/estimators.py +++ b/autosklearn/estimators.py @@ -38,6 +38,7 @@ from autosklearn.ensembles.ensemble_selection import EnsembleSelection from autosklearn.metrics import Scorer from autosklearn.pipeline.base import BasePipeline +from autosklearn.util.smac_wrap import SMACCallback class AutoSklearnEstimator(BaseEstimator): @@ -69,7 +70,7 @@ def __init__( metric: Scorer | Sequence[Scorer] | None = None, scoring_functions: Optional[List[Scorer]] = None, load_models: bool = True, - get_trials_callback=None, + get_trials_callback: SMACCallback | None = None, dataset_compression: Union[bool, Mapping[str, Any]] = True, allow_string_features: bool = True, ): @@ -261,8 +262,8 @@ def __init__( list are: * ``'y_optimization'`` : do not save the predictions for the - optimization/validation set, which would later on be used to build - an ensemble. + optimization set, which would later on be used to build an ensemble. + * ``model`` : do not save any model files smac_scenario_args : dict, optional (None) @@ -301,10 +302,19 @@ def __init__( Whether to load the models after fitting Auto-sklearn. get_trials_callback: callable - Callback function to create an object of subclass defined in module - `smac.callbacks `_. - This is an advanced feature. Use only if you are familiar with - `SMAC `_. + A callable with the following definition. + + * (smac.SMBO, smac.RunInfo, smac.RunValue, time_left: float) -> bool | None + + This will be called after SMAC, the underlying optimizer for autosklearn, + finishes training each run. + + You can use this to record your own information about the optimization + process. You can also use this to enable a early stopping based on some + critera. + + See the example: + :ref:`Early Stopping And Callbacks `. dataset_compression: Union[bool, Mapping[str, Any]] = True We compress datasets so that they fit into some predefined amount of memory. @@ -601,6 +611,7 @@ def fit_ensemble( ensemble_kwargs: Optional[Dict[str, Any]] = None, ensemble_nbest: Optional[int] = None, ensemble_class: Optional[AbstractEnsemble] = EnsembleSelection, + metrics: Scorer | Sequence[Scorer] | None = None, ): """Fit an ensemble to models trained during an optimization process. @@ -650,12 +661,13 @@ def fit_ensemble( to obtain only use the single best model instead of an ensemble. + metrics: Scorer | Sequence[Scorer] | None = None + A metric or list of metrics to score the ensemble with + Returns ------- self - """ - # User specified `ensemble_size` explicitly, warn them about deprecation if ensemble_size is not None: # Keep consistent behaviour @@ -708,6 +720,7 @@ def fit_ensemble( ensemble_nbest=ensemble_nbest, ensemble_class=ensemble_class, ensemble_kwargs=ensemble_kwargs, + metrics=metrics, ) return self @@ -1041,31 +1054,31 @@ def additional_info_has_key(rv, key): return rv.additional_info and key in rv.additional_info model_runs = {} - for rkey, rval in self.automl_.runhistory_.data.items(): - if not additional_info_has_key(rval, "num_run"): + for run_key, run_val in self.automl_.runhistory_.data.items(): + if not additional_info_has_key(run_val, "num_run"): continue else: - model_key = rval.additional_info["num_run"] + model_key = run_val.additional_info["num_run"] model_run = { - "model_id": rval.additional_info["num_run"], - "seed": rkey.seed, - "budget": rkey.budget, - "duration": rval.time, - "config_id": rkey.config_id, - "start_time": rval.starttime, - "end_time": rval.endtime, - "status": str(rval.status), - "train_loss": rval.additional_info["train_loss"] - if additional_info_has_key(rval, "train_loss") + "model_id": run_val.additional_info["num_run"], + "seed": run_key.seed, + "budget": run_key.budget, + "duration": run_val.time, + "config_id": run_key.config_id, + "start_time": run_val.starttime, + "end_time": run_val.endtime, + "status": str(run_val.status), + "train_loss": run_val.additional_info["train_loss"] + if additional_info_has_key(run_val, "train_loss") else None, - "config_origin": rval.additional_info["configuration_origin"] - if additional_info_has_key(rval, "configuration_origin") + "config_origin": run_val.additional_info["configuration_origin"] + if additional_info_has_key(run_val, "configuration_origin") else None, } if num_metrics == 1: - model_run["cost"] = rval.cost + model_run["cost"] = run_val.cost else: - for cost_idx, cost in enumerate(rval.cost): + for cost_idx, cost in enumerate(run_val.cost): model_run[f"cost_{cost_idx}"] = cost model_runs[model_key] = model_run diff --git a/autosklearn/evaluation/__init__.py b/autosklearn/evaluation/__init__.py index aace158c00..ba17513ae0 100644 --- a/autosklearn/evaluation/__init__.py +++ b/autosklearn/evaluation/__init__.py @@ -71,7 +71,7 @@ def fit_predict_try_except_decorator( # File "auto-sklearn/autosklearn/evaluation/train_evaluator.py", line 616, in fit_predict_and_loss, # noqa E501 # status=status # File "auto-sklearn/autosklearn/evaluation/abstract_evaluator.py", line 320, in finish_up # noqa E501 - # self.queue.put(rval_dict) + # self.queue.put(return_value_dict) # File "miniconda/3-4.5.4/envs/autosklearn/lib/python3.7/multiprocessing/queues.py", line 87, in put # noqa E501 # self._start_thread() # File "miniconda/3-4.5.4/envs/autosklearn/lib/python3.7/multiprocessing/queues.py", line 170, in _start_thread # noqa E501 @@ -230,14 +230,7 @@ def __init__( self.memory_limit = memory_limit dm = self.backend.load_datamanager() - if "X_valid" in dm.data and "Y_valid" in dm.data: - self._get_validation_loss = True - else: - self._get_validation_loss = False - if "X_test" in dm.data and "Y_test" in dm.data: - self._get_test_loss = True - else: - self._get_test_loss = False + self._get_test_loss = "X_test" in dm.data and "Y_test" in dm.data self.port = port self.pynisher_context = pynisher_context @@ -533,21 +526,6 @@ def run( additional_run_info["train_learning_curve"] = train_learning_curve additional_run_info["learning_curve_runtime"] = learning_curve_runtime - if self._get_validation_loss: - validation_learning_curve = ( - autosklearn.evaluation.util.extract_learning_curve( - info, - "validation_loss", - ) - ) - if len(validation_learning_curve) > 1: - additional_run_info[ - "validation_learning_curve" - ] = validation_learning_curve - additional_run_info[ - "learning_curve_runtime" - ] = learning_curve_runtime - if self._get_test_loss: test_learning_curve = ( autosklearn.evaluation.util.extract_learning_curve( diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py index efd87c6cc3..b97f588a45 100644 --- a/autosklearn/evaluation/abstract_evaluator.py +++ b/autosklearn/evaluation/abstract_evaluator.py @@ -220,8 +220,6 @@ def __init__( self.include = include self.exclude = exclude - self.X_valid = self.datamanager.data.get("X_valid") - self.y_valid = self.datamanager.data.get("Y_valid") self.X_test = self.datamanager.data.get("X_test") self.y_test = self.datamanager.data.get("Y_test") @@ -359,7 +357,6 @@ def finish_up( loss: Union[Dict[str, float], float], train_loss: Optional[Dict[str, float]], opt_pred: np.ndarray, - valid_pred: np.ndarray, test_pred: np.ndarray, additional_run_info: Optional[TYPE_ADDITIONAL_INFO], file_output: bool, @@ -382,19 +379,12 @@ def finish_up( self.duration = time.time() - self.starttime if file_output: - file_out_loss, additional_run_info_ = self.file_output( - opt_pred, - valid_pred, - test_pred, - ) + file_out_loss, additional_run_info_ = self.file_output(opt_pred, test_pred) else: file_out_loss = None additional_run_info_ = {} - validation_loss, test_loss = self.calculate_auxiliary_losses( - valid_pred, - test_pred, - ) + test_loss = self.calculate_auxiliary_losses(test_pred) if file_out_loss is not None: return self.duration, file_out_loss, self.seed, additional_run_info_ @@ -424,59 +414,38 @@ def finish_up( additional_run_info["train_loss"] = [ train_loss[metric.name] for metric in self.metrics ] - if validation_loss is not None: - additional_run_info["validation_loss"] = validation_loss if test_loss is not None: additional_run_info["test_loss"] = test_loss - rval_dict = { + return_value_dict = { "loss": loss, "additional_run_info": additional_run_info, "status": status, } if final_call: - rval_dict["final_queue_element"] = True + return_value_dict["final_queue_element"] = True - self.queue.put(rval_dict) + self.queue.put(return_value_dict) return self.duration, loss_, self.seed, additional_run_info_ def calculate_auxiliary_losses( self, - Y_valid_pred: np.ndarray, - Y_test_pred: np.ndarray, - ) -> Tuple[Optional[float | Sequence[float]], Optional[float | Sequence[float]]]: - if Y_valid_pred is not None: - if self.y_valid is not None: - validation_loss: Optional[Union[float, Dict[str, float]]] = self._loss( - self.y_valid, Y_valid_pred - ) - if len(self.metrics) == 1: - validation_loss = validation_loss[self.metrics[0].name] - else: - validation_loss = None - else: - validation_loss = None + Y_test_pred: np.ndarray | None, + ) -> float | dict[str, float] | None: + if Y_test_pred is None or self.y_test is None: + return None - if Y_test_pred is not None: - if self.y_test is not None: - test_loss: Optional[Union[float, Dict[str, float]]] = self._loss( - self.y_test, Y_test_pred - ) - if len(self.metrics) == 1: - test_loss = test_loss[self.metrics[0].name] - else: - test_loss = None - else: - test_loss = None + test_loss = self._loss(self.y_test, Y_test_pred) + if len(self.metrics) == 1: + test_loss = test_loss[self.metrics[0].name] - return validation_loss, test_loss + return test_loss def file_output( self, Y_optimization_pred: np.ndarray, - Y_valid_pred: np.ndarray, Y_test_pred: np.ndarray, - ) -> Tuple[Optional[float], Dict[str, Union[str, int, float, List, Dict, Tuple]]]: + ) -> tuple[float | None, dict[str, Any]]: # Abort if self.Y_optimization is None # self.Y_optimization can be None if we use partial-cv, then, # obviously no output should be saved. @@ -496,12 +465,7 @@ def file_output( ) # Abort if predictions contain NaNs - for y, s in [ - # Y_train_pred deleted here. Fix unittest accordingly. - [Y_optimization_pred, "optimization"], - [Y_valid_pred, "validation"], - [Y_test_pred, "test"], - ]: + for y, s in [(Y_optimization_pred, "optimization"), (Y_test_pred, "test")]: if y is not None and not np.all(np.isfinite(y)): return ( 1.0, @@ -553,14 +517,13 @@ def file_output( budget=self.budget, model=self.model if "model" not in self.disable_file_output else None, cv_model=models if "cv_model" not in self.disable_file_output else None, + # TODO: below line needs to be deleted once backend is updated + valid_predictions=None, ensemble_predictions=( Y_optimization_pred if "y_optimization" not in self.disable_file_output else None ), - valid_predictions=( - Y_valid_pred if "y_valid" not in self.disable_file_output else None - ), test_predictions=( Y_test_pred if "y_test" not in self.disable_file_output else None ), diff --git a/autosklearn/evaluation/test_evaluator.py b/autosklearn/evaluation/test_evaluator.py index e76186aa06..d624c1a44d 100644 --- a/autosklearn/evaluation/test_evaluator.py +++ b/autosklearn/evaluation/test_evaluator.py @@ -67,7 +67,6 @@ def fit_predict_and_loss(self) -> None: loss=loss, train_loss=None, opt_pred=Y_pred, - valid_pred=None, test_pred=None, file_output=False, final_call=True, @@ -78,7 +77,6 @@ def fit_predict_and_loss(self) -> None: def predict_and_loss( self, train: bool = False ) -> Tuple[Union[Dict[str, float], float], np.array, Any, Any]: - if train: Y_pred = self.predict_function( self.X_train, self.model, self.task_type, self.Y_train diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py index a8433c2136..f19db473bf 100644 --- a/autosklearn/evaluation/train_evaluator.py +++ b/autosklearn/evaluation/train_evaluator.py @@ -316,7 +316,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: Y_train_pred = [None] * self.num_cv_folds Y_optimization_pred = [None] * self.num_cv_folds - Y_valid_pred = [None] * self.num_cv_folds Y_test_pred = [None] * self.num_cv_folds train_splits = [None] * self.num_cv_folds @@ -417,7 +416,7 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: **fit_params_array[i], ) - (train_pred, opt_pred, valid_pred, test_pred) = self._predict( + (train_pred, opt_pred, test_pred) = self._predict( model, train_indices=train_indices, test_indices=test_indices, @@ -425,7 +424,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: Y_train_pred[i] = train_pred Y_optimization_pred[i] = opt_pred - Y_valid_pred[i] = valid_pred Y_test_pred[i] = test_pred train_splits[i] = train_indices @@ -499,20 +497,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: X_targets = concat_data(X_targets, num_cv_folds=self.num_cv_folds) Y_targets = concat_data(Y_targets, num_cv_folds=self.num_cv_folds) - if self.X_valid is not None: - Y_valid_preds = np.array( - [ - Y_valid_pred[i] - for i in range(self.num_cv_folds) - if Y_valid_pred[i] is not None - ] - ) - # Average the predictions of several models - if len(Y_valid_preds.shape) == 3: - Y_valid_preds = np.nanmean(Y_valid_preds, axis=0) - else: - Y_valid_preds = None - if self.X_test is not None: Y_test_preds = np.array( [ @@ -544,7 +528,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: loss=opt_loss, train_loss=train_loss, opt_pred=Y_optimization_pred_concat, - valid_pred=Y_valid_preds, test_pred=Y_test_preds, additional_run_info=additional_run_info, file_output=True, @@ -558,7 +541,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: Y_train_pred = [None] * self.num_cv_folds Y_optimization_pred = [None] * self.num_cv_folds - Y_valid_pred = [None] * self.num_cv_folds Y_test_pred = [None] * self.num_cv_folds train_splits = [None] * self.num_cv_folds @@ -586,7 +568,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: ( train_pred, opt_pred, - valid_pred, test_pred, additional_run_info, ) = self._partial_fit_and_predict_standard( @@ -599,7 +580,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: ( train_pred, opt_pred, - valid_pred, test_pred, additional_run_info, ) = self._partial_fit_and_predict_budget( @@ -622,7 +602,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: Y_train_pred[i] = train_pred Y_optimization_pred[i] = opt_pred - Y_valid_pred[i] = valid_pred Y_test_pred[i] = test_pred train_splits[i] = train_split @@ -683,18 +662,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: X_targets = concat_data(X_targets, num_cv_folds=self.num_cv_folds) Y_targets = concat_data(Y_targets, num_cv_folds=self.num_cv_folds) - if self.X_valid is not None: - Y_valid_pred = np.array( - [ - Y_valid_pred[i] - for i in range(self.num_cv_folds) - if Y_valid_pred[i] is not None - ] - ) - # Average the predictions of several models - if len(np.shape(Y_valid_pred)) == 3: - Y_valid_pred = np.nanmean(Y_valid_pred, axis=0) - if self.X_test is not None: Y_test_pred = np.array( [ @@ -746,7 +713,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: loss=opt_loss, train_loss=train_loss, opt_pred=Y_optimization_pred, - valid_pred=Y_valid_pred if self.X_valid is not None else None, test_pred=Y_test_pred if self.X_test is not None else None, additional_run_info=additional_run_info, file_output=True, @@ -793,7 +759,6 @@ def partial_fit_predict_and_loss(self, fold: int, iterative: bool = False) -> No ( train_pred, opt_pred, - valid_pred, test_pred, additional_run_info, ) = self._partial_fit_and_predict_standard( @@ -819,7 +784,6 @@ def partial_fit_predict_and_loss(self, fold: int, iterative: bool = False) -> No loss=loss, train_loss=train_loss, opt_pred=opt_pred, - valid_pred=valid_pred, test_pred=test_pred, file_output=False, final_call=True, @@ -883,12 +847,7 @@ def _partial_fit_and_predict_iterative( n_iter=n_iter, **fit_params, ) - ( - Y_train_pred, - Y_optimization_pred, - Y_valid_pred, - Y_test_pred, - ) = self._predict( + (Y_train_pred, Y_optimization_pred, Y_test_pred,) = self._predict( model, train_indices=train_indices, test_indices=test_indices, @@ -921,7 +880,6 @@ def _partial_fit_and_predict_iterative( loss=loss, train_loss=train_loss, opt_pred=Y_optimization_pred, - valid_pred=Y_valid_pred, test_pred=Y_test_pred, additional_run_info=additional_run_info, file_output=file_output, @@ -936,7 +894,6 @@ def _partial_fit_and_predict_iterative( ( Y_train_pred, Y_optimization_pred, - Y_valid_pred, Y_test_pred, additional_run_info, ) = self._partial_fit_and_predict_standard( @@ -962,7 +919,6 @@ def _partial_fit_and_predict_iterative( loss=loss, train_loss=train_loss, opt_pred=Y_optimization_pred, - valid_pred=Y_valid_pred, test_pred=Y_test_pred, additional_run_info=additional_run_info, file_output=file_output, @@ -980,7 +936,6 @@ def _partial_fit_and_predict_standard( ) -> Tuple[ PIPELINE_DATA_DTYPE, # train_pred PIPELINE_DATA_DTYPE, # opt_pred - PIPELINE_DATA_DTYPE, # valid_pred PIPELINE_DATA_DTYPE, # test_pred TYPE_ADDITIONAL_INFO, ]: @@ -1020,7 +975,7 @@ def _partial_fit_and_predict_standard( else self.Y_train[train_indices] ) - train_pred, opt_pred, valid_pred, test_pred = self._predict( + train_pred, opt_pred, test_pred = self._predict( model=model, train_indices=train_indices, test_indices=test_indices, @@ -1029,7 +984,6 @@ def _partial_fit_and_predict_standard( return ( train_pred, opt_pred, - valid_pred, test_pred, additional_run_info, ) @@ -1043,7 +997,6 @@ def _partial_fit_and_predict_budget( ) -> Tuple[ PIPELINE_DATA_DTYPE, # train_pred PIPELINE_DATA_DTYPE, # opt_pred - PIPELINE_DATA_DTYPE, # valid_pred PIPELINE_DATA_DTYPE, # test_pred TYPE_ADDITIONAL_INFO, ]: @@ -1073,7 +1026,7 @@ def _partial_fit_and_predict_budget( task_type=self.task_type, ) - train_pred, opt_pred, valid_pred, test_pred = self._predict( + train_pred, opt_pred, test_pred = self._predict( model, train_indices=train_indices, test_indices=test_indices, @@ -1088,19 +1041,13 @@ def _partial_fit_and_predict_budget( return ( train_pred, opt_pred, - valid_pred, test_pred, additional_run_info, ) def _predict( self, model: BaseEstimator, test_indices: List[int], train_indices: List[int] - ) -> Tuple[ - PIPELINE_DATA_DTYPE, - PIPELINE_DATA_DTYPE, - PIPELINE_DATA_DTYPE, - PIPELINE_DATA_DTYPE, - ]: + ) -> Tuple[PIPELINE_DATA_DTYPE, PIPELINE_DATA_DTYPE, PIPELINE_DATA_DTYPE]: train_pred = self.predict_function( self.X_train.iloc[train_indices] if hasattr(self.X_train, "iloc") @@ -1123,14 +1070,6 @@ def _predict( else self.Y_train[train_indices], ) - if self.X_valid is not None: - X_valid = self.X_valid.copy() - valid_pred = self.predict_function( - X_valid, model, self.task_type, self.Y_train[train_indices] - ) - else: - valid_pred = None - if self.X_test is not None: X_test = self.X_test.copy() test_pred = self.predict_function( @@ -1144,7 +1083,7 @@ def _predict( else: test_pred = None - return train_pred, opt_pred, valid_pred, test_pred + return train_pred, opt_pred, test_pred def get_splitter( self, D: AbstractDataManager diff --git a/autosklearn/evaluation/util.py b/autosklearn/evaluation/util.py index c249c8be1c..158825786b 100644 --- a/autosklearn/evaluation/util.py +++ b/autosklearn/evaluation/util.py @@ -12,19 +12,19 @@ def read_queue( stack = [] while True: try: - rval = queue_.get(timeout=1) + return_value = queue_.get(timeout=1) except queue.Empty: break # Check if there is a special placeholder value which tells us that # we don't have to wait until the queue times out in order to # retrieve the final value! - if "final_queue_element" in rval: - del rval["final_queue_element"] + if "final_queue_element" in return_value: + del return_value["final_queue_element"] do_break = True else: do_break = False - stack.append(rval) + stack.append(return_value) if do_break: break diff --git a/autosklearn/experimental/selector.py b/autosklearn/experimental/selector.py index 125cba6125..b854c7b440 100644 --- a/autosklearn/experimental/selector.py +++ b/autosklearn/experimental/selector.py @@ -297,17 +297,17 @@ def _predict( wins = wins / np.sum(wins) predictions[X.index[x_idx]] = wins - rval = { + return_value = { task_id: { strategy: predictions[task_id][strategy_idx] for strategy_idx, strategy in enumerate(self.strategies_) } for task_id in X.index } - rval = pd.DataFrame(rval).transpose().astype(float) - rval = rval[self.strategies_] - rval = rval.fillna(0.0) - return rval + return_value = pd.DataFrame(return_value).transpose().astype(float) + return_value = return_value[self.strategies_] + return_value = return_value.fillna(0.0) + return return_value def fit_pairwise_model(self, X, y, weights, rng, configuration): raise NotImplementedError() @@ -346,14 +346,14 @@ def fit( ) -> None: self.X_ = X self.strategies_ = y.columns - self.rval_ = np.array( + self.return_value_ = np.array( [ (len(self.strategies_) - self.default_strategies.index(strategy) - 1) / (len(self.strategies_) - 1) for strategy in self.strategies_ ] ) - self.rval_ = self.rval_ / np.sum(self.rval_) + self.return_value_ = self.return_value_ / np.sum(self.return_value_) self.selector.fit(X, y, minima, maxima) def _predict( @@ -377,7 +377,7 @@ def _predict( prediction.loc[task_id] = pd.Series( { strategy: value - for strategy, value in zip(self.strategies_, self.rval_) + for strategy, value in zip(self.strategies_, self.return_value_) } ) diff --git a/autosklearn/info.py b/autosklearn/info.py new file mode 100644 index 0000000000..a2c4318497 --- /dev/null +++ b/autosklearn/info.py @@ -0,0 +1,205 @@ +""" +This module servers as an introspection point for things users might +want to programatically query about autosklearn. +""" +from __future__ import annotations + +from typing import Any, Generic, Type, TypeVar + +from dataclasses import dataclass + +from typing_extensions import Literal + +from autosklearn.pipeline.components.base import ( + AutoSklearnClassificationAlgorithm, + AutoSklearnComponent, + AutoSklearnPreprocessingAlgorithm, + AutoSklearnRegressionAlgorithm, +) +from autosklearn.pipeline.components.classification import ClassifierChoice +from autosklearn.pipeline.components.data_preprocessing import DataPreprocessorChoice +from autosklearn.pipeline.components.feature_preprocessing import ( + FeaturePreprocessorChoice, +) +from autosklearn.pipeline.components.regression import RegressorChoice +from autosklearn.pipeline.constants import DATASET_PROPERTIES_TO_STRING + +# Something that is a type that inherits from AutoSklearnComponent +T = TypeVar("T", bound=Type[AutoSklearnComponent]) + + +def _translate_properties( + props: dict[str, Any], + kind: Literal["classifier", "regressor", "f_preprocessor", "d_preprocessor"], +) -> dict[str, Any]: + """Converts supported inputs and outputs to strings""" + # This is information is conveyed implicitly by being a regressor/classifier ... + delwords = ["handles_regression", "handles_classification"] + + # Covered by input type, duplicated info + delwords += ["handles_sparse", "handles_dense"] + + # Words we rename (from, to) + popwords: list[tuple[str, str]] = [ + ("input", "supported_inputs"), + ("output", "output_kind"), + ("is_deterministic", "deterministic"), + ] + + if kind in ["classifier", "f_preprocessor", "d_preprocessor"]: + delwords += ["handles_multioutput"] + + if kind in ["regressor", "f_preprocessor", "d_preprocessor"]: + delwords += ["handles_multiclass", "handles_multilabel"] + + for word in delwords: + if word in props: + del props[word] + + for frm, to in popwords: + props[to] = props.pop(frm) + + props["supported_inputs"] = [ + DATASET_PROPERTIES_TO_STRING[k] for k in props["supported_inputs"] + ] + props["output_kind"] = DATASET_PROPERTIES_TO_STRING[props["output_kind"][0]] + + return props + + +@dataclass +class _ComponentInfo(Generic[T]): + type: T # cls is not possible due to @dataclass conversion + name: str + shortname: str + output_kind: str + supported_inputs: list[str] + deterministic: bool = False + + +@dataclass +class RegressorInfo(_ComponentInfo[Type[AutoSklearnRegressionAlgorithm]]): + handles_multioutput: bool = False + prefers_data_normalized: bool = False + + +@dataclass +class ClassifierInfo(_ComponentInfo[Type[AutoSklearnClassificationAlgorithm]]): + handles_binary: bool = True # We assume all components support this + handles_multiclass: bool = False + handles_multilabel: bool = False + handles_multilabel_multiclass = False + + +@dataclass +class FeaturePreprocessorInfo(_ComponentInfo[Type[AutoSklearnPreprocessingAlgorithm]]): + pass + + +@dataclass +class DataPreprocessorInfo(_ComponentInfo[Type[AutoSklearnPreprocessingAlgorithm]]): + # There should be more here but our DataPreprocessing part of the pipeline doesn't + # pick up on it because there's on FeatTypeSplit available which further has + # subcomponents with extra properties + pass + + +@dataclass +class ComponentsInfo: + classifiers: dict[str, ClassifierInfo] + regressors: dict[str, RegressorInfo] + feature_preprocessors: dict[str, FeaturePreprocessorInfo] + data_preprocessors: dict[str, DataPreprocessorInfo] + + +def classifiers() -> dict[str, ClassifierInfo]: + """Get information about the classifiers available to auto-sklearn + + Returns + ------- + dict[str, ClassifierInfo] + The dict of classifiers and some info about them + """ + return { + name: ClassifierInfo( + **{ + "type": cls, + **_translate_properties(cls.get_properties(), "classifier"), + } + ) + for name, cls in ClassifierChoice.get_components().items() + } + + +def regressors() -> dict[str, RegressorInfo]: + """Get information about the regressors available to auto-sklearn + + Returns + ------- + dict[str, RegressorInfo] + The dict of regressors and some info about them + """ + return { + name: RegressorInfo( + **{"type": cls, **_translate_properties(cls.get_properties(), "regressor")}, + ) + for name, cls in RegressorChoice.get_components().items() + } + + +def feature_preprocessors() -> dict[str, FeaturePreprocessorInfo]: + """Get information about the feature preprocessors available to auto-sklearn + + Returns + ------- + dict[str, FeaturePreprocessorInfo] + The dict of feature preprocessors and some info about them + """ + return { + name: FeaturePreprocessorInfo( + **{ + "type": cls, + **_translate_properties(cls.get_properties(), "f_preprocessor"), + } + ) + for name, cls in FeaturePreprocessorChoice.get_components().items() + } + + +def data_preprocessors() -> dict[str, DataPreprocessorInfo]: + """Get information about the data preprocessors available to auto-sklearn + + Returns + ------- + dict[str, DataPreprocessorInfo] + The dict of data preprocessors and some info about them + """ + return { + name: DataPreprocessorInfo( + **{ + "type": cls, + **_translate_properties(cls.get_properties(), "d_preprocessor"), + } + ) + for name, cls in DataPreprocessorChoice.get_components().items() + } + + +def components() -> ComponentsInfo: + """Get information about all of the components available to auto-sklearn + + Returns + ------- + ComponentsInfo + A dataclass with the items + * classifiers + * regressors + * feature_preprocessors + * data_preprocessors + """ + return ComponentsInfo( + classifiers=classifiers(), + regressors=regressors(), + feature_preprocessors=feature_preprocessors(), + data_preprocessors=data_preprocessors(), + ) diff --git a/autosklearn/metalearning/metalearning/kNearestDatasets/kND.py b/autosklearn/metalearning/metalearning/kNearestDatasets/kND.py index f6c10c95d2..f49ed8ccab 100644 --- a/autosklearn/metalearning/metalearning/kNearestDatasets/kND.py +++ b/autosklearn/metalearning/metalearning/kNearestDatasets/kND.py @@ -122,7 +122,7 @@ def kNearestDatasets(self, x, k=1, return_distance=False): assert k == neighbor_indices.shape[1] - rval = [ + return_value = [ self.metafeatures.index[i] # Neighbor indices is 2d, each row is the indices for one # dataset in x. @@ -130,9 +130,9 @@ def kNearestDatasets(self, x, k=1, return_distance=False): ] if return_distance is False: - return rval + return return_value else: - return rval, distances[0] + return return_value, distances[0] def kBestSuggestions(self, x, k=1, exclude_double_configurations=True): assert type(x) == pd.Series diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py index 93c73b4716..3a13364ea6 100644 --- a/autosklearn/pipeline/base.py +++ b/autosklearn/pipeline/base.py @@ -495,15 +495,15 @@ def __repr__(self): dataset_properties_string.append("}") dataset_properties_string = "".join(dataset_properties_string) - rval = "%s(%s,\n%s)" % ( + return_value = "%s(%s,\n%s)" % ( class_name, configuration, dataset_properties_string, ) else: - rval = "%s(%s)" % (class_name, configuration_string) + return_value = "%s(%s)" % (class_name, configuration_string) - return rval + return return_value def _get_pipeline_steps(self, dataset_properties): raise NotImplementedError() diff --git a/autosklearn/pipeline/components/base.py b/autosklearn/pipeline/components/base.py index c4a95df08c..7f7adc91b5 100644 --- a/autosklearn/pipeline/components/base.py +++ b/autosklearn/pipeline/components/base.py @@ -1,4 +1,4 @@ -from typing import Dict +from __future__ import annotations import importlib import inspect @@ -10,7 +10,7 @@ from autosklearn.pipeline.constants import SPARSE -_addons = dict() # type: Dict[str, 'ThirdPartyComponents'] +_addons: dict[str, ThirdPartyComponents] = {} def find_components(package, directory, base_class): diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py index 5693efd441..c63a80679f 100644 --- a/autosklearn/pipeline/components/data_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py @@ -12,14 +12,16 @@ AutoSklearnChoice, AutoSklearnPreprocessingAlgorithm, ThirdPartyComponents, + _addons, find_components, ) -classifier_directory = os.path.split(__file__)[0] -_preprocessors = find_components( - __package__, classifier_directory, AutoSklearnPreprocessingAlgorithm +data_preprocessing_directory = os.path.split(__file__)[0] +_data_preprocessors = find_components( + __package__, data_preprocessing_directory, AutoSklearnPreprocessingAlgorithm ) -_addons = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm) +additional_components = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm) +_addons["data_preprocessing"] = additional_components def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> None: @@ -30,8 +32,8 @@ class DataPreprocessorChoice(AutoSklearnChoice): @classmethod def get_components(cls) -> OrderedDict: components: OrderedDict = OrderedDict() - components.update(_preprocessors) - components.update(_addons.components) + components.update(_data_preprocessors) + components.update(additional_components.components) return components def get_available_components( diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index bd42d8a67a..057099309c 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -215,6 +215,7 @@ def get_properties( "handles_multiclass": True, "handles_multilabel": True, "handles_multioutput": True, + "is_deterministic": True, # Assumption for now # TODO find out of this is right! "handles_sparse": True, "handles_dense": True, diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py index cd52d6ad34..968a8e11ad 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py @@ -14,9 +14,9 @@ find_components, ) -classifier_directory = os.path.split(__file__)[0] -_preprocessors = find_components( - __package__, classifier_directory, AutoSklearnPreprocessingAlgorithm +feature_preprocessing_directory = os.path.split(__file__)[0] +_feature_preprocessors = find_components( + __package__, feature_preprocessing_directory, AutoSklearnPreprocessingAlgorithm ) additional_components = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm) _addons["feature_preprocessing"] = additional_components @@ -30,7 +30,7 @@ class FeaturePreprocessorChoice(AutoSklearnChoice): @classmethod def get_components(cls): components = OrderedDict() - components.update(_preprocessors) + components.update(_feature_preprocessors) components.update(additional_components.components) return components diff --git a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py index 546c8742ad..1af3dc1d8e 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py +++ b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py @@ -86,6 +86,7 @@ def get_properties(dataset_properties=None): "handles_multiclass": True, "handles_multilabel": False, "handles_multioutput": False, + "is_deterministic": False, "input": (SPARSE, DENSE, UNSIGNED_DATA), "output": (INPUT,), } diff --git a/autosklearn/pipeline/components/regression/sgd.py b/autosklearn/pipeline/components/regression/sgd.py index 3b3f939fa8..38164e8d3f 100644 --- a/autosklearn/pipeline/components/regression/sgd.py +++ b/autosklearn/pipeline/components/regression/sgd.py @@ -179,7 +179,6 @@ def get_properties(dataset_properties=None): "handles_multilabel": False, "handles_multioutput": False, "is_deterministic": True, - "handles_sparse": True, "input": (DENSE, SPARSE, UNSIGNED_DATA), "output": (PREDICTIONS,), } diff --git a/autosklearn/util/smac_wrap.py b/autosklearn/util/smac_wrap.py new file mode 100644 index 0000000000..bf3202bbb3 --- /dev/null +++ b/autosklearn/util/smac_wrap.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from typing import Callable, Union + +from smac.callbacks import IncorporateRunResultCallback +from smac.optimizer.smbo import SMBO +from smac.runhistory.runhistory import RunInfo, RunValue + +SMACCallback = Callable[[SMBO, RunInfo, RunValue, float], Union[bool, None]] + + +class SmacRunCallback(IncorporateRunResultCallback): + def __init__(self, f: SMACCallback): + self.f = f + + def __call__( + self, + smbo: SMBO, + run_info: RunInfo, + result: RunValue, + time_left: float, + ) -> bool | None: + """ + Parameters + ---------- + smbo: SMBO + The SMAC SMBO object + + run_info: RunInfo + Information about the run completed + + result: RunValue + The results of the run + + time_left: float + How much time is left for the remaining runs + + Returns + ------- + bool | None + If False is returned, the optimization loop will stop + """ + return self.f(smbo, run_info, result, time_left) diff --git a/doc/manual.rst b/doc/manual.rst index 7cdb162881..7ab31ce727 100644 --- a/doc/manual.rst +++ b/doc/manual.rst @@ -374,3 +374,8 @@ Other according to its performance on the validation set. Setting the initial configurations found by meta-learning to zero makes *auto-sklearn* use the regular SMAC algorithm for suggesting new hyperparameter configurations. + +.. collapse:: Early stopping and Callbacks + + By using the parameter ``get_trials_callback``, we can get access to the results + of runs as they occur. See this example :ref:`Early Stopping And Callbacks ` for more! diff --git a/examples/40_advanced/example_early_stopping_and_callbacks.py b/examples/40_advanced/example_early_stopping_and_callbacks.py new file mode 100644 index 0000000000..84dae5dced --- /dev/null +++ b/examples/40_advanced/example_early_stopping_and_callbacks.py @@ -0,0 +1,81 @@ +""" +============================ +Early stopping and Callbacks +============================ + +The example below shows how we can use the ``get_trials_callback`` parameter of +auto-sklearn to implement an early-stopping mechanism through a callback. + +These callbacks give access to the result of each model + hyperparameter configuration +optimized by SMAC, the underlying optimizer for autosklearn. By checking the cost of +a result, we can implement a simple yet effective early stopping mechanism! + +Do note however, this does not provide any access to the ensembles that autosklearn +produces, only the individual models. You may wish to perform a more sophisticated +early stopping mechanism such that there are enough good models for autosklearn to build +and ensemble with. This is here to provide a simple example. +""" +from __future__ import annotations + +from pprint import pprint + +import sklearn.datasets +import sklearn.metrics + +import autosklearn.classification + +from smac.optimizer.smbo import SMBO +from smac.runhistory.runhistory import RunInfo, RunValue + + +############################################################################ +# Build and fit a classifier +# ========================== +def callback( + smbo: SMBO, + run_info: RunInfo, + result: RunValue, + time_left: float, +) -> bool | None: + """Stop early if we get a very low cost value for a single run + + The return value indicates to SMAC whether to stop or not. False will + stop the search process while any other value will mean it continues. + """ + # You can find out the parameters in the SMAC documentation + # https://automl.github.io/SMAC3/main/ + if result.cost <= 0.02: + print("Stopping!") + print(run_info) + print(result) + return False + + +X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) +X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( + X, y, random_state=1 +) + +automl = autosklearn.classification.AutoSklearnClassifier( + time_left_for_this_task=120, per_run_time_limit=30, get_trials_callback=callback +) +automl.fit(X_train, y_train, dataset_name="breast_cancer") + +############################################################################ +# View the models found by auto-sklearn +# ===================================== + +print(automl.leaderboard()) + +############################################################################ +# Print the final ensemble constructed by auto-sklearn +# ==================================================== + +pprint(automl.show_models(), indent=4) + +########################################################################### +# Get the Score of the final ensemble +# =================================== + +predictions = automl.predict(X_test) +print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions)) diff --git a/scripts/03_calculate_metafeatures.py b/scripts/03_calculate_metafeatures.py index 95d857145a..d7e08ffea8 100644 --- a/scripts/03_calculate_metafeatures.py +++ b/scripts/03_calculate_metafeatures.py @@ -5,6 +5,7 @@ import os import sys import unittest.mock +import tempfile import arff import joblib @@ -82,10 +83,7 @@ def calculate_metafeatures(task_id): for task_type in ("classification", "regression"): output_directory = os.path.join(working_directory, "metafeatures", task_type) - try: - os.makedirs(output_directory) - except: - pass + os.makedirs(output_directory, exist_ok=True) all_metafeatures = {} @@ -100,13 +98,10 @@ def calculate_metafeatures(task_id): tasks = copy.deepcopy(tasks) np.random.shuffle(tasks) - def producer(): - for task_id in tasks: - yield task_id - - memory = joblib.Memory(location="/tmp/joblib", verbose=10) + tmpdir = os.path.join(tempfile.gettempdir(), "joblib") + memory = joblib.Memory(location=tmpdir, verbose=10) cached_calculate_metafeatures = memory.cache(calculate_metafeatures) - mfs = [cached_calculate_metafeatures(task_id) for task_id in producer()] + mfs = [cached_calculate_metafeatures(task_id) for task_id in tasks] for mf in mfs: if mf is not None: diff --git a/scripts/2015_nips_paper/run/score_ensemble.py b/scripts/2015_nips_paper/run/score_ensemble.py index 1e873f01fd..9842359225 100644 --- a/scripts/2015_nips_paper/run/score_ensemble.py +++ b/scripts/2015_nips_paper/run/score_ensemble.py @@ -227,14 +227,14 @@ def evaluate(input_directory, validation_files, test_files, ensemble_size=50): ensemble_time = time.time() - start - rval = { + return_value = { "ensemble_time": ensemble_time, "time_function_evaluation": time_function_evaluation, "ensemble_error": ensemble_error, "ensemble_test_error": ensemble_test_error, } - return rval + return return_value if __name__ == "__main__": diff --git a/test/fixtures/ensemble_building.py b/test/fixtures/ensemble_building.py index cca68f76d0..548d1c5d72 100644 --- a/test/fixtures/ensemble_building.py +++ b/test/fixtures/ensemble_building.py @@ -164,6 +164,10 @@ def _make( backend.save_additional_data( datamanager.data["Y_train"], what="targets_ensemble" ) + if "X_train" in datamanager.data: + backend.save_additional_data( + datamanager.data["X_train"], what="input_ensemble" + ) builder = EnsembleBuilder( backend=backend, diff --git a/test/fixtures/metrics.py b/test/fixtures/metrics.py new file mode 100644 index 0000000000..8bf754aea5 --- /dev/null +++ b/test/fixtures/metrics.py @@ -0,0 +1,26 @@ +from typing import Any + +import numpy as np + +from autosklearn.metrics import accuracy, make_scorer + + +def _accuracy_requiring_X_data( + y_true: np.ndarray, + y_pred: np.ndarray, + X_data: Any, +) -> float: + """Dummy metric that needs X Data""" + if X_data is None: + raise ValueError() + return accuracy(y_true, y_pred) + + +acc_with_X_data = make_scorer( + name="acc_with_X_data", + score_func=_accuracy_requiring_X_data, + needs_X=True, + optimum=1, + worst_possible_result=0, + greater_is_better=True, +) diff --git a/test/test_automl/__init__.py b/test/test_automl/__init__.py index dae354a675..e69de29bb2 100644 --- a/test/test_automl/__init__.py +++ b/test/test_automl/__init__.py @@ -1 +0,0 @@ -# -*- encoding: utf-8 -*- diff --git a/test/test_automl/cases.py b/test/test_automl/cases.py index c80d3d3b5f..9583c7b31d 100644 --- a/test/test_automl/cases.py +++ b/test/test_automl/cases.py @@ -14,7 +14,6 @@ {fitted} - If the automl case has been fitted {cv, holdout} - Whether explicitly cv or holdout was used {no_ensemble} - Fit with no ensemble size - {cached} - If the resulting case is then cached {multiobjective} - If the automl instance is multiobjective """ from __future__ import annotations @@ -24,10 +23,15 @@ from pathlib import Path import numpy as np +import sklearn.model_selection import autosklearn.metrics from autosklearn.automl import AutoMLClassifier, AutoMLRegressor from autosklearn.automl_common.common.utils.backend import Backend +from autosklearn.evaluation.abstract_evaluator import ( + MyDummyClassifier, + MyDummyRegressor, +) from pytest_cases import case, parametrize @@ -35,6 +39,11 @@ from test.fixtures.caching import Cache +def stop_at_first(smbo, run_info, result, time_left) -> bool: + """Used in some cases to enforce the only valid model is the dummy model""" + return False + + @case(tags=["classifier"]) def case_classifier( tmp_dir: str, @@ -60,7 +69,7 @@ def case_regressor( # ################################### # The following are fitted and cached # ################################### -@case(tags=["classifier", "fitted", "holdout", "cached"]) +@case(tags=["classifier", "fitted", "holdout"]) @parametrize("dataset", ["iris"]) def case_classifier_fitted_holdout_iterative( dataset: str, @@ -97,7 +106,7 @@ def case_classifier_fitted_holdout_iterative( return model -@case(tags=["classifier", "fitted", "cv", "cached"]) +@case(tags=["classifier", "fitted", "cv"]) @parametrize("dataset", ["iris"]) def case_classifier_fitted_cv( make_cache: Callable[[str], Cache], @@ -134,7 +143,7 @@ def case_classifier_fitted_cv( return model -@case(tags=["classifier", "fitted", "holdout", "cached", "multiobjective"]) +@case(tags=["classifier", "fitted", "holdout", "multiobjective"]) @parametrize("dataset", ["iris"]) def case_classifier_fitted_holdout_multiobjective( dataset: str, @@ -177,7 +186,7 @@ def case_classifier_fitted_holdout_multiobjective( return model -@case(tags=["regressor", "fitted", "holdout", "cached"]) +@case(tags=["regressor", "fitted", "holdout"]) @parametrize("dataset", ["boston"]) def case_regressor_fitted_holdout( make_cache: Callable[[str], Cache], @@ -212,7 +221,7 @@ def case_regressor_fitted_holdout( return model -@case(tags=["regressor", "fitted", "cv", "cached"]) +@case(tags=["regressor", "fitted", "cv"]) @parametrize("dataset", ["boston"]) def case_regressor_fitted_cv( make_cache: Callable[[str], Cache], @@ -249,7 +258,7 @@ def case_regressor_fitted_cv( return model -@case(tags=["classifier", "fitted", "no_ensemble", "cached"]) +@case(tags=["classifier", "fitted", "no_ensemble"]) @parametrize("dataset", ["iris"]) def case_classifier_fitted_no_ensemble( make_cache: Callable[[str], Cache], @@ -258,8 +267,7 @@ def case_classifier_fitted_no_ensemble( make_automl_classifier: Callable[..., AutoMLClassifier], make_sklearn_dataset: Callable[..., Tuple[np.ndarray, ...]], ) -> AutoMLClassifier: - """Case of a fitted classifier but ensemble was disabled by - not writing models to disk""" + """Case of a fitted classifier but ensemble was disabled""" key = f"case_classifier_fitted_no_ensemble_{dataset}" # This locks the cache for this item while we check, required for pytest-xdist @@ -270,7 +278,6 @@ def case_classifier_fitted_no_ensemble( temporary_directory=cache.path("backend"), delete_tmp_folder_after_terminate=False, ensemble_class=None, - disable_evaluator_output=True, ) X, y, Xt, yt = make_sklearn_dataset(name=dataset) @@ -282,3 +289,85 @@ def case_classifier_fitted_no_ensemble( model._backend = copy_backend(old=model._backend, new=make_backend()) return model + + +@case(tags=["classifier", "fitted"]) +def case_classifier_fitted_only_dummy( + make_cache: Callable[[str], Cache], + make_backend: Callable[..., Backend], + make_automl_classifier: Callable[..., AutoMLClassifier], +) -> AutoMLClassifier: + """Case of a fitted classifier but only dummy was found""" + key = "case_classifier_fitted_only_dummy" + + # This locks the cache for this item while we check, required for pytest-xdist + + with make_cache(key) as cache: + if "model" not in cache: + model = make_automl_classifier( + temporary_directory=cache.path("backend"), + delete_tmp_folder_after_terminate=False, + include={"classifier": ["bernoulli_nb"]}, # Just a meh model + get_trials_callback=stop_at_first, + ) + rand = np.random.RandomState(2) + _X = rand.random((100, 50)) + _y = rand.randint(0, 2, (100,)) + X, Xt, y, yt = sklearn.model_selection.train_test_split( + _X, _y, random_state=1 # Required to ensure dummy is best + ) + model.fit(X, y, dataset_name="random") + + # We now validate that indeed, the only model is the Dummy + members = list(model.models_.values()) + if len(members) != 1 and not isinstance(members[0], MyDummyClassifier): + raise ValueError("Should only have one model, dummy\n", members) + + cache.save(model, "model") + + model = cache.load("model") + model._backend = copy_backend(old=model._backend, new=make_backend()) + + return model + + +@case(tags=["regressor", "fitted"]) +def case_regressor_fitted_only_dummy( + make_cache: Callable[[str], Cache], + make_backend: Callable[..., Backend], + make_automl_regressor: Callable[..., AutoMLRegressor], +) -> AutoMLRegressor: + """Case of a fitted classifier but only dummy was found""" + key = "case_regressor_fitted_only_dummy" + + # This locks the cache for this item while we check, required for pytest-xdist + + with make_cache(key) as cache: + if "model" not in cache: + model = make_automl_regressor( + temporary_directory=cache.path("backend"), + delete_tmp_folder_after_terminate=False, + include={"regressor": ["k_nearest_neighbors"]}, # Just a meh model + get_trials_callback=stop_at_first, + ) + + rand = np.random.RandomState(2) + _X = rand.random((100, 50)) + _y = rand.random((100,)) + + X, Xt, y, yt = sklearn.model_selection.train_test_split( + _X, _y, random_state=1 # Required to ensure dummy is best + ) + model.fit(X, y, dataset_name="random") + + # We now validate that indeed, the only model is the Dummy + members = list(model.models_.values()) + if len(members) != 1 and not isinstance(members[0], MyDummyRegressor): + raise ValueError("Should only have one model, dummy\n", members) + + cache.save(model, "model") + + model = cache.load("model") + model._backend = copy_backend(old=model._backend, new=make_backend()) + + return model diff --git a/test/test_automl/test_construction.py b/test/test_automl/test_construction.py index 5b15812acd..5b68d35118 100644 --- a/test/test_automl/test_construction.py +++ b/test/test_automl/test_construction.py @@ -1,9 +1,4 @@ -"""Property based Tests - -These test are for checking properties of already fitted models. Any test that does -tests using cases should not modify the state as these models are cached between tests -to reduce training time. -""" +"""Test things related to only constructing an AutoML instance""" from typing import Any, Dict, Optional, Union from autosklearn.automl import AutoML diff --git a/test/test_automl/test_dataset_compression.py b/test/test_automl/test_dataset_compression.py index d50869ebbf..0a7e5a18bf 100644 --- a/test/test_automl/test_dataset_compression.py +++ b/test/test_automl/test_dataset_compression.py @@ -1,3 +1,4 @@ +"""Test things related to how AutoML compresses the dataset size""" from typing import Any, Callable, Dict import numpy as np diff --git a/test/test_automl/test_dummy_predictions.py b/test/test_automl/test_dummy_predictions.py index c4aa560791..c593e7f4cf 100644 --- a/test/test_automl/test_dummy_predictions.py +++ b/test/test_automl/test_dummy_predictions.py @@ -1,3 +1,7 @@ +"""Test the dummy predictor of AutoML + +Dummy models can serve as an early warning of issues with parameters during fit +""" from __future__ import annotations from typing import Callable, Sequence, Tuple @@ -183,6 +187,11 @@ def test_crash_due_to_memory_exception( def test_raises_if_no_metric_set(make_automl: Callable[..., AutoML]) -> None: + """ + Expects + ------- + * raise if there was no metric set when calling `_do_dummy_prediction()` + """ automl = make_automl() with pytest.raises(ValueError, match="Metric/Metrics was/were not set"): automl._do_dummy_prediction() @@ -193,10 +202,17 @@ def test_raises_invalid_metric( make_automl: Callable[..., AutoML], make_sklearn_dataset: Callable[..., XYDataManager], ) -> None: + """ + Expects + ------- + * Should raise an error if the given metric is not applicable to a given task type + """ + # `precision` is not applicable to MULTICLASS_CLASSIFICATION dataset = "iris" task = MULTICLASS_CLASSIFICATION + metrics = [accuracy, precision] - automl = make_automl(metrics=[accuracy, precision]) + automl = make_automl(metrics=metrics) automl._logger = mock_logger datamanager = make_sklearn_dataset( diff --git a/test/test_automl/test_early_stopping.py b/test/test_automl/test_early_stopping.py new file mode 100644 index 0000000000..4aa7192180 --- /dev/null +++ b/test/test_automl/test_early_stopping.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable + +if TYPE_CHECKING: + import numpy as np + from smac.optimizer.smbo import SMBO + from smac.runhistory.runhistory import RunInfo, RunValue + + from autosklearn.automl import AutoMLClassifier + + +def test_early_stopping( + make_automl_classifier: Callable[..., AutoMLClassifier], + make_sklearn_dataset: Callable[..., tuple[np.ndarray, ...]], +) -> None: + """ + Expects + ------- + * Should early after fitting 2 models + """ + + def callback( + smbo: SMBO, + run_info: RunInfo, + result: RunValue, + time_left: float, + ) -> bool | None: + if int(result.additional_info["num_run"]) >= 2: + return False + + automl = make_automl_classifier(get_trials_callback=callback) + + X_train, Y_train, X_test, Y_test = make_sklearn_dataset("iris") + automl.fit(X_train, Y_train) + + assert len(automl.runhistory_.data) == 2 diff --git a/test/test_automl/test_fit.py b/test/test_automl/test_fit.py index 2defa2518b..02992e5e13 100644 --- a/test/test_automl/test_fit.py +++ b/test/test_automl/test_fit.py @@ -1,3 +1,4 @@ +"""Test specific ways of calling `fit` of AutoML""" from typing import Any, Callable, Dict, Optional, Tuple, Union import numpy as np diff --git a/test/test_automl/test_fit_pipeline.py b/test/test_automl/test_fit_pipeline.py index 137b57a5c3..5d1d648fc4 100644 --- a/test/test_automl/test_fit_pipeline.py +++ b/test/test_automl/test_fit_pipeline.py @@ -1 +1 @@ -"""TODO""" +"""Test specific ways of calling `fit_pipeline`""" diff --git a/test/test_automl/test_model_predict.py b/test/test_automl/test_model_predict.py index a301d1a9a5..b43c488220 100644 --- a/test/test_automl/test_model_predict.py +++ b/test/test_automl/test_model_predict.py @@ -1,3 +1,4 @@ +"""Test the _model_predict helper function such that it shapes output correctly""" from typing import Callable, Dict, Tuple import warnings @@ -19,7 +20,10 @@ class WarningModel: + """Simple model that returns incorrect shape and issues warning""" + def predict(self, X: np.ndarray) -> np.ndarray: + """Shout a warning during prediction""" warnings.warn("shout") return X diff --git a/test/test_automl/test_outputs.py b/test/test_automl/test_outputs.py deleted file mode 100644 index 5b31e60331..0000000000 --- a/test/test_automl/test_outputs.py +++ /dev/null @@ -1,118 +0,0 @@ -from pathlib import Path - -from autosklearn.automl import AutoML -from autosklearn.ensemble_building.builder import CANDIDATES_FILENAME - -from pytest import mark -from pytest_cases import parametrize_with_cases -from pytest_cases.filters import has_tag - -import test.test_automl.cases as cases -from test.conftest import DEFAULT_SEED - -# Some filters -has_ensemble = has_tag("fitted") & ~has_tag("no_ensemble") -no_ensemble = has_tag("fitted") & has_tag("no_ensemble") - - -@mark.todo -def test_datamanager_stored_contents() -> None: - ... - - -@parametrize_with_cases("automl", cases=cases, filter=has_ensemble) -def test_paths_created(automl: AutoML) -> None: - """ - Parameters - ---------- - automl : AutoML - A previously fitted automl - - Expects - ------- - * The given paths should exist after the automl has been run and fitted - """ - assert automl._backend is not None - - partial = Path(automl._backend.internals_directory) - expected = [ - partial / fixture - for fixture in ( - "true_targets_ensemble.npy", - f"start_time_{DEFAULT_SEED}", - "datamanager.pkl", - "runs", - ) - ] - - for path in expected: - assert path.exists() - - -@parametrize_with_cases("automl", cases=cases, filter=has_ensemble) -def test_paths_created_with_ensemble(automl: AutoML) -> None: - """ - Parameters - ---------- - automl : AutoML - A previously fitted automl - - Expects - ------- - * The given paths for an automl with an ensemble should include paths - specific to ensemble building - """ - assert automl._backend is not None - - partial = Path(automl._backend.internals_directory) - expected = [ - partial / fixture - for fixture in ( - "ensembles", - "ensemble_history.json", - CANDIDATES_FILENAME, - ) - ] - - for path in expected: - assert path.exists() - - -@parametrize_with_cases("automl", cases=cases, filter=has_ensemble) -def test_at_least_one_model_and_predictions(automl: AutoML) -> None: - """ - Expects - ------- - * There should be at least one models saved - * Each model saved should have predictions for the ensemble - """ - assert automl._backend is not None - runs_dir = Path(automl._backend.get_runs_directory()) - - runs = list(runs_dir.iterdir()) - assert len(runs) > 0 - - at_least_one = False - for run in runs: - prediction_files = run.glob("predictions_ensemble*.npy") - model_files = run.glob("*.*.model") - - if any(prediction_files): - at_least_one = True - assert any(model_files), "Run produced prediction but no model" - - assert at_least_one, "No runs produced predictions" - - -@parametrize_with_cases("automl", cases=cases, filter=has_ensemble) -def test_at_least_one_ensemble(automl: AutoML) -> None: - """ - Expects - ------- - * There should be at least one ensemble generated - """ - assert automl._backend is not None - ens_dir = Path(automl._backend.get_ensemble_dir()) - - # TODO make more generic - assert len(list(ens_dir.glob("*.ensemble"))) > 0 diff --git a/test/test_automl/test_pareto_front.py b/test/test_automl/test_pareto_front.py new file mode 100644 index 0000000000..8ff38a04a6 --- /dev/null +++ b/test/test_automl/test_pareto_front.py @@ -0,0 +1,36 @@ +"""Test the output of loading the pareto set from an automl instance""" +from autosklearn.automl import AutoML + +from pytest_cases import parametrize_with_cases +from pytest_cases.filters import has_tag + +import test.test_automl.cases as cases + +has_ensemble = has_tag("fitted") & ~has_tag("no_ensemble") + +single_objective = has_ensemble & ~has_tag("multiobjective") +multi_objective = has_ensemble & has_tag("multiobjective") + + +@parametrize_with_cases("automl", cases=cases, filter=single_objective) +def test_can_output_pareto_front_singleobjective(automl: AutoML) -> None: + """ + Expects + ------- + * Non-multiobjective instances should have a pareto set of size 1 + """ + pareto_set = automl._load_pareto_set() + + assert len(pareto_set) == 1 + + +@parametrize_with_cases("automl", cases=cases, filter=multi_objective) +def test_can_output_pareto_front_multiobjective(automl: AutoML) -> None: + """ + Expects + ------- + * Multiobjective ensembles should return >= 1, #TODO should test it's pareto optimal + """ + pareto_set = automl._load_pareto_set() + + assert len(pareto_set) >= 1 diff --git a/test/test_automl/test_performance.py b/test/test_automl/test_performance.py index e69de29bb2..76c1a0d9d4 100644 --- a/test/test_automl/test_performance.py +++ b/test/test_automl/test_performance.py @@ -0,0 +1,47 @@ +"""Test the performance of automl instances after fitting""" + +import numpy as np +from sklearn.ensemble import VotingClassifier, VotingRegressor + +from autosklearn.automl import AutoML + +from pytest_cases import parametrize_with_cases + +import test.test_automl.cases as cases + + +@parametrize_with_cases("automl", cases.case_classifier_fitted_holdout_multiobjective) +def test_performance_with_multiobjective(automl: AutoML) -> None: + """ + Expects + ------- + * Auto-sklearn can predict/predict_proba and has a model + * Each ensemble in the pareto_set can predict/predict_proba + """ + # TODO: This test is hyperspecific to this one case + # + # Long term we probably want to return additional info about the case so we can + # test things for other than this case + + # Check that the predict function works + X = np.array([[1.0, 1.0, 1.0, 1.0]]) + + assert automl.predict_proba(X).shape == (1, 3) + assert automl.predict(X).shape == (1,) + + pareto_front = automl._load_pareto_set() + for ensemble in pareto_front: + + assert isinstance(ensemble, (VotingClassifier, VotingRegressor)) + + y_pred = ensemble.predict_proba(X) + assert y_pred.shape == (1, 3) + + y_pred = ensemble.predict(X) + assert y_pred in ["setosa", "versicolor", "virginica"] + + statistics = automl.sprint_statistics() + assert "Metrics" in statistics + assert ("Best validation score: 0.9" in statistics) or ( + "Best validation score: 1.0" in statistics + ), statistics diff --git a/test/test_automl/test_performance_over_time.py b/test/test_automl/test_performance_over_time.py index d5cc327a41..f38bdd7ee2 100644 --- a/test/test_automl/test_performance_over_time.py +++ b/test/test_automl/test_performance_over_time.py @@ -1,3 +1,4 @@ +"""Test the performance over time functionality of automl instances""" from autosklearn.automl import AutoML from pytest_cases import parametrize_with_cases diff --git a/test/test_automl/test_post_fit.py b/test/test_automl/test_post_fit.py index ccc5f25b9b..37fcd63eca 100644 --- a/test/test_automl/test_post_fit.py +++ b/test/test_automl/test_post_fit.py @@ -1,11 +1,20 @@ -import numpy as np -from sklearn.ensemble import VotingClassifier, VotingRegressor +"""Check the internal state of the automl instances after it has been fitted""" + +from pathlib import Path from autosklearn.automl import AutoML +from autosklearn.ensemble_building.builder import CANDIDATES_FILENAME +from pytest import mark from pytest_cases import parametrize_with_cases +from pytest_cases.filters import has_tag import test.test_automl.cases as cases +from test.conftest import DEFAULT_SEED + +# Some filters +has_ensemble = has_tag("fitted") & ~has_tag("no_ensemble") +no_ensemble = has_tag("fitted") & has_tag("no_ensemble") @parametrize_with_cases("automl", cases=cases, has_tag=["fitted", "holdout"]) @@ -52,7 +61,7 @@ def test_cv_loaded_models(automl: AutoML) -> None: assert set(automl.cv_models_.keys()) == set(ensemble_identifiers) -@parametrize_with_cases("automl", cases=cases, has_tag=["fitted", "no_ensemble"]) +@parametrize_with_cases("automl", cases=cases, has_tag=no_ensemble) def test_no_ensemble(automl: AutoML) -> None: """ Parameters @@ -71,36 +80,99 @@ def test_no_ensemble(automl: AutoML) -> None: assert len(automl.cv_models_) == 0 -@parametrize_with_cases("automl", cases, has_tag=["multiobjective"]) -def test__load_pareto_front(automl: AutoML) -> None: +@mark.todo +def test_datamanager_stored_contents() -> None: """ - Parameters - ---------- - automl : AutoML - An AutoML object fitted with multiple objective metrics + Expects + ------- + * TODO + """ + ... + + +@parametrize_with_cases("automl", cases=cases, filter=has_ensemble) +def test_paths_created(automl: AutoML) -> None: + """ + Expects + ------- + * The given paths should exist after the automl has been run and fitted + """ + assert automl._backend is not None + + partial = Path(automl._backend.internals_directory) + expected = [ + partial / fixture + for fixture in ( + "true_targets_ensemble.npy", + f"start_time_{DEFAULT_SEED}", + "datamanager.pkl", + "runs", + ) + ] + + for path in expected: + assert path.exists() + + +@parametrize_with_cases("automl", cases=cases, filter=has_ensemble) +def test_paths_created_with_ensemble(automl: AutoML) -> None: + """ + Expects + ------- + * The given paths for an automl with an ensemble should include paths + specific to ensemble building + """ + assert automl._backend is not None + + partial = Path(automl._backend.internals_directory) + expected = [ + partial / fixture + for fixture in ( + "ensembles", + "ensemble_history.json", + CANDIDATES_FILENAME, + ) + ] + + for path in expected: + assert path.exists() + +@parametrize_with_cases("automl", cases=cases, filter=has_ensemble) +def test_at_least_one_model_and_predictions(automl: AutoML) -> None: + """ Expects ------- - * Auto-sklearn can predict and has a model - * _load_pareto_front returns one scikit-learn ensemble - """ - # Check that the predict function works - X = np.array([[1.0, 1.0, 1.0, 1.0]]) - - assert automl.predict_proba(X).shape == (1, 3) - assert automl.predict(X).shape == (1,) - - pareto_front = automl._load_pareto_set() - assert len(pareto_front) == 1 - for ensemble in pareto_front: - assert isinstance(ensemble, (VotingClassifier, VotingRegressor)) - y_pred = ensemble.predict_proba(X) - assert y_pred.shape == (1, 3) - y_pred = ensemble.predict(X) - assert y_pred in ["setosa", "versicolor", "virginica"] - - statistics = automl.sprint_statistics() - assert "Metrics" in statistics - assert ("Best validation score: 0.9" in statistics) or ( - "Best validation score: 1.0" in statistics - ), statistics + * There should be at least one models saved + * Each model saved should have predictions for the ensemble + """ + assert automl._backend is not None + runs_dir = Path(automl._backend.get_runs_directory()) + + runs = list(runs_dir.iterdir()) + assert len(runs) > 0 + + at_least_one = False + for run in runs: + prediction_files = run.glob("predictions_ensemble*.npy") + model_files = run.glob("*.*.model") + + if any(prediction_files): + at_least_one = True + assert any(model_files), "Run produced prediction but no model" + + assert at_least_one, "No runs produced predictions" + + +@parametrize_with_cases("automl", cases=cases, filter=has_ensemble) +def test_at_least_one_ensemble(automl: AutoML) -> None: + """ + Expects + ------- + * There should be at least one ensemble generated + """ + assert automl._backend is not None + ens_dir = Path(automl._backend.get_ensemble_dir()) + + # TODO make more generic + assert len(list(ens_dir.glob("*.ensemble"))) > 0 diff --git a/test/test_automl/test_predict.py b/test/test_automl/test_predict.py index 137b57a5c3..4bad9859be 100644 --- a/test/test_automl/test_predict.py +++ b/test/test_automl/test_predict.py @@ -1 +1 @@ -"""TODO""" +"""Test predictions of an automl instance""" diff --git a/test/test_automl/test_refit.py b/test/test_automl/test_refit.py index 341486ab13..0f5b42fff0 100644 --- a/test/test_automl/test_refit.py +++ b/test/test_automl/test_refit.py @@ -1,3 +1,4 @@ +"""Test the refitting functionality of an automl instance""" from typing import Callable, Union from itertools import repeat diff --git a/test/test_automl/test_show_models.py b/test/test_automl/test_show_models.py index 72b2e4f8d6..93a4aac651 100644 --- a/test/test_automl/test_show_models.py +++ b/test/test_automl/test_show_models.py @@ -1,3 +1,4 @@ +"""Test the show models functinality of an automl instance""" from autosklearn.automl import AutoML from pytest_cases import parametrize_with_cases diff --git a/test/test_automl/test_sklearn_compliance.py b/test/test_automl/test_sklearn_compliance.py index ce747e1bb8..c96468e0e2 100644 --- a/test/test_automl/test_sklearn_compliance.py +++ b/test/test_automl/test_sklearn_compliance.py @@ -1,7 +1,9 @@ -""" +"""Test that autosklearn is sklearn compliant + Note ---- -This is far from complete at the moment +* This is far from complete at the moment +* This should probably be tested on AutoSklearnEstimators not AutoML """ from typing import List, Union diff --git a/test/test_ensemble_builder/test_ensemble_builder.py b/test/test_ensemble_builder/test_ensemble_builder.py index a46da42ef1..9a7927384d 100644 --- a/test/test_ensemble_builder/test_ensemble_builder.py +++ b/test/test_ensemble_builder/test_ensemble_builder.py @@ -10,14 +10,15 @@ from autosklearn.automl_common.common.utils.backend import Backend from autosklearn.ensemble_building import EnsembleBuilder, Run -from autosklearn.metrics import make_scorer +from autosklearn.metrics import Scorer, accuracy, make_scorer from autosklearn.util.functional import bound, pairs import pytest from pytest_cases import fixture, parametrize -from unittest.mock import patch +from unittest.mock import Mock, patch from test.conftest import DEFAULT_SEED +from test.fixtures.metrics import acc_with_X_data @fixture @@ -673,6 +674,27 @@ def test_delete_runs_does_not_delete_dummy( assert set(loaded.values()) == set(dummy_runs) +def test_fit_ensemble_with_no_targets_raises( + builder: EnsembleBuilder, + make_run: Callable[..., Run], +) -> None: + """ + Expects + ------- + * If no ensemble targets can be found then `fit_ensemble` should fail + """ + # Delete the targets and then try fit ensemble + targets_path = Path(builder.backend._get_targets_ensemble_filename()) + targets_path.unlink() + + candidates = [make_run(backend=builder.backend) for _ in range(5)] + with pytest.raises(ValueError, match="`fit_ensemble` could not find any .*"): + builder.fit_ensemble( + candidates=candidates, + runs=candidates, + ) + + def test_fit_ensemble_produces_ensemble( builder: EnsembleBuilder, make_run: Callable[..., Run], @@ -682,16 +704,13 @@ def test_fit_ensemble_produces_ensemble( ------- * Should produce an ensemble if all runs have predictions """ - X_data = builder.X_data("ensemble") targets = builder.targets("ensemble") assert targets is not None predictions = targets runs = [make_run(predictions={"ensemble": predictions}) for _ in range(10)] - ensemble = builder.fit_ensemble( - candidates=runs, X_data=X_data, targets=targets, runs=runs - ) + ensemble = builder.fit_ensemble(candidates=runs, runs=runs) assert ensemble is not None @@ -823,3 +842,160 @@ def test_deletion_will_not_break_current_ensemble( for run in new_runs: assert run in available_runs + + +@parametrize("metrics", [accuracy, acc_with_X_data, [accuracy, acc_with_X_data]]) +def test_will_build_ensemble_with_different_metrics( + make_ensemble_builder: Callable[..., EnsembleBuilder], + make_run: Callable[..., Run], + metrics: Scorer | list[Scorer], +) -> None: + """ + Expects + ------- + * Should be able to build a valid ensemble with different combinations of metrics + * Should produce a validation score for both "ensemble" and "test" scores + """ + if not isinstance(metrics, list): + metrics = [metrics] + + builder = make_ensemble_builder(metrics=metrics) + + # Make some runs and stick them in the same backend as the builder + # Dummy just has a terrible loss for all metrics + make_run( + dummy=True, + losses={m.name: 1000 for m in metrics}, + backend=builder.backend, + ) + + # "Proper" runs will have the correct targets and so be better than dummy + run_predictions = { + "ensemble": builder.targets("ensemble"), + "test": builder.targets("test"), + } + for _ in range(5): + make_run(predictions=run_predictions, backend=builder.backend) + + history, nbest = builder.main() + + # Should only produce one step + assert len(history) == 1 + hist = history[0] + + # Each of these two keys should be present + for key in ["ensemble_optimization_score", "ensemble_test_score"]: + assert key in hist + + # TODO should be updated in next PR + # Each of these scores should contain all the metrics + # for metric in metrics: + # assert metric.name in hist[key] + + +@parametrize("n_least_prioritized", [1, 2, 3, 4]) +@parametrize("metrics", [accuracy, acc_with_X_data, [accuracy, acc_with_X_data]]) +def test_fit_ensemble_kwargs_priorities( + make_ensemble_builder: Callable[..., EnsembleBuilder], + make_run: Callable[..., Run], + metrics: Scorer | list[Scorer], + n_least_prioritized: int, +) -> None: + """ + Expects + ------- + * Should favour 1) function kwargs, 2) function params 3) init_kwargs 4) init_params + """ + if not isinstance(metrics, list): + metrics = [metrics] + + class FakeEnsembleClass: + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + + def fit(*args, **kwargs) -> None: + pass + + # We establish the priorty order and give each one of them a custom metric + priority = ["function_kwargs", "function_params", "init_kwargs", "init_params"] + + # We reverse the priority and use the `n_least_prioritized` ones + # with `n_least_prioritized = 3` + # reversed = ["init_params", "init_kwargs", "function_params", "function_kwargs"] + # used = ["init_params", "init_kwargs", "function_params"] + # highest = "function_params" + reversed_priority = list(reversed(priority)) + used = reversed_priority[:n_least_prioritized] + highest_priority = used[-1] + + def S(name: str) -> Scorer: + return make_scorer(name, lambda: None) + + # We now pass in all the places this arguments could be specified + # Naming them specifically to make it more clear in setup below + builder_metric = [S("init_params")] if "init_params" in used else None + fit_ensemble_metric = [S("function_params")] if "function_params" in used else None + + builder_ensemble_kwargs = ( + {"metrics": [S("init_kwargs")]} if "init_kwargs" in used else None + ) + fit_ensemble_kwargs = ( + {"metrics": [S("function_kwargs")]} if "function_kwargs" in used else None + ) + + builder = make_ensemble_builder( + metrics=builder_metric, + ensemble_kwargs=builder_ensemble_kwargs, + ) + + candidates = [make_run() for _ in range(5)] # Just so something can be run + + ensemble = builder.fit_ensemble( + metrics=fit_ensemble_metric, + ensemble_class=FakeEnsembleClass, + ensemble_kwargs=fit_ensemble_kwargs, + candidates=candidates, + runs=candidates, + ) + + # These are the final metrics passed to the ensemble builder when constructed + passed_metrics = ensemble.kwargs["metrics"] + metric = passed_metrics[0] + + assert metric.name == highest_priority + + +@parametrize("metric, should_be_loaded", [(accuracy, False), (acc_with_X_data, True)]) +def test_X_data_only_loaded_when_required( + make_ensemble_builder: Callable[..., EnsembleBuilder], + make_run: Callable[..., Run], + metric: Scorer, + should_be_loaded: bool, +) -> None: + """ + Expects + ------- + * Should only load X_train if it's required + * TODO should only load X_test if it's required + """ + metrics = [metric] + builder = make_ensemble_builder(metrics=metrics) + + # Make a dummy which is required for the whole pipeline to run + make_run(dummy=True, losses={metric.name: 1000}, backend=builder.backend) + + # Make a run that has no losses recorded, forcing us to use the metric + make_run( + dummy=False, + predictions={"ensemble": builder.targets("ensemble")}, + losses=None, + backend=builder.backend, + ) + + ret_value = builder.X_data() + builder.X_data = Mock(return_value=ret_value) + + builder.main() + + assert builder.X_data.called == should_be_loaded diff --git a/test/test_evaluation/evaluation_util.py b/test/test_evaluation/evaluation_util.py index 62623a50ba..38040f2e4e 100644 --- a/test/test_evaluation/evaluation_util.py +++ b/test/test_evaluation/evaluation_util.py @@ -133,27 +133,14 @@ def get_multiclass_classification_datamanager(): np.random.shuffle(indices) X_train = X_train[indices] Y_train = Y_train[indices] - - X_valid = X_test[ - :25, - ] - Y_valid = Y_test[ - :25, - ] - X_test = X_test[ - 25:, - ] - Y_test = Y_test[ - 25:, - ] + X_test = X_test[25:] + Y_test = Y_test[25:] D = Dummy() D.info = {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False, "label_num": 3} D.data = { "X_train": X_train, "Y_train": Y_train, - "X_valid": X_valid, - "Y_valid": Y_valid, "X_test": X_test, "Y_test": Y_test, } @@ -196,34 +183,16 @@ def get_multilabel_classification_datamanager(): Y_train = Y_train[indices] Y_train = np.array(convert_to_bin(Y_train, 3)) - # for i in range(Y_train_.shape[0]): - # Y_train_[:, Y_train[i]] = 1 - # Y_train = Y_train_ Y_test = np.array(convert_to_bin(Y_test, 3)) - # for i in range(Y_test_.shape[0]): - # Y_test_[:, Y_test[i]] = 1 - # Y_test = Y_test_ - X_valid = X_test[ - :25, - ] - Y_valid = Y_test[ - :25, - ] - X_test = X_test[ - 25:, - ] - Y_test = Y_test[ - 25:, - ] + X_test = X_test[25:] + Y_test = Y_test[25:] D = Dummy() D.info = {"task": MULTILABEL_CLASSIFICATION, "is_sparse": False, "label_num": 3} D.data = { "X_train": X_train, "Y_train": Y_train, - "X_valid": X_valid, - "Y_valid": Y_valid, "X_test": X_test, "Y_test": Y_test, } @@ -247,26 +216,14 @@ def get_binary_classification_datamanager(): X_test = X_test[eliminate_class_two] Y_test = Y_test[eliminate_class_two] - X_valid = X_test[ - :25, - ] - Y_valid = Y_test[ - :25, - ] - X_test = X_test[ - 25:, - ] - Y_test = Y_test[ - 25:, - ] + X_test = X_test[25:] + Y_test = Y_test[25:] D = Dummy() D.info = {"task": BINARY_CLASSIFICATION, "is_sparse": False, "label_num": 2} D.data = { "X_train": X_train, "Y_train": Y_train.reshape((-1, 1)), - "X_valid": X_valid, - "Y_valid": Y_valid.reshape((-1, 1)), "X_test": X_test, "Y_test": Y_test.reshape((-1, 1)), } @@ -282,26 +239,14 @@ def get_regression_datamanager(): X_train = X_train[indices] Y_train = Y_train[indices] - X_valid = X_test[ - :200, - ] - Y_valid = Y_test[ - :200, - ] - X_test = X_test[ - 200:, - ] - Y_test = Y_test[ - 200:, - ] + X_test = X_test[200:] + Y_test = Y_test[200:] D = Dummy() D.info = {"task": REGRESSION, "is_sparse": False, "label_num": 1} D.data = { "X_train": X_train, "Y_train": Y_train.reshape((-1, 1)), - "X_valid": X_valid, - "Y_valid": Y_valid.reshape((-1, 1)), "X_test": X_test, "Y_test": Y_test.reshape((-1, 1)), } @@ -334,8 +279,6 @@ def get_500_classes_datamanager(): D.data = { "X_train": X[:700], "Y_train": Y[:700], - "X_valid": X[700:710], - "Y_valid": Y[700:710], "X_test": X[710:], "Y_test": Y[710:], } diff --git a/test/test_evaluation/test_abstract_evaluator.py b/test/test_evaluation/test_abstract_evaluator.py index 7bd52c0f76..e2473d738b 100644 --- a/test/test_evaluation/test_abstract_evaluator.py +++ b/test/test_evaluation/test_abstract_evaluator.py @@ -71,7 +71,6 @@ def test_finish_up_model_predicts_NaN(self): ae.Y_optimization = rs.rand(33, 3) predictions_ensemble = rs.rand(33, 3) predictions_test = rs.rand(25, 3) - predictions_valid = rs.rand(25, 3) # NaNs in prediction ensemble predictions_ensemble[5, 2] = np.NaN @@ -79,7 +78,6 @@ def test_finish_up_model_predicts_NaN(self): loss=0.1, train_loss=0.1, opt_pred=predictions_ensemble, - valid_pred=predictions_valid, test_pred=predictions_test, additional_run_info=None, final_call=True, @@ -89,37 +87,15 @@ def test_finish_up_model_predicts_NaN(self): self.assertEqual(loss, 1.0) self.assertEqual( additional_run_info, - {"error": "Model predictions for optimization set " "contains NaNs."}, + {"error": "Model predictions for optimization set contains NaNs."}, ) - # NaNs in prediction validation - predictions_ensemble[5, 2] = 0.5 - predictions_valid[5, 2] = np.NaN - _, loss, _, additional_run_info = ae.finish_up( - loss=0.1, - train_loss=0.1, - opt_pred=predictions_ensemble, - valid_pred=predictions_valid, - test_pred=predictions_test, - additional_run_info=None, - final_call=True, - file_output=True, - status=StatusType.SUCCESS, - ) - self.assertEqual(loss, 1.0) - self.assertEqual( - additional_run_info, - {"error": "Model predictions for validation set " "contains NaNs."}, - ) - - # NaNs in prediction test - predictions_valid[5, 2] = 0.5 + predictions_ensemble = rs.rand(33, 3) predictions_test[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up( loss=0.1, train_loss=0.1, opt_pred=predictions_ensemble, - valid_pred=predictions_valid, test_pred=predictions_test, additional_run_info=None, final_call=True, @@ -129,9 +105,8 @@ def test_finish_up_model_predicts_NaN(self): self.assertEqual(loss, 1.0) self.assertEqual( additional_run_info, - {"error": "Model predictions for test set contains " "NaNs."}, + {"error": "Model predictions for test set contains NaNs."}, ) - self.assertEqual(self.backend_mock.save_predictions_as_npy.call_count, 0) def test_disable_file_output(self): @@ -150,11 +125,9 @@ def test_disable_file_output(self): predictions_ensemble = rs.rand(33, 3) predictions_test = rs.rand(25, 3) - predictions_valid = rs.rand(25, 3) loss_, additional_run_info_ = ae.file_output( predictions_ensemble, - predictions_valid, predictions_test, ) @@ -179,7 +152,6 @@ def test_disable_file_output(self): loss_, additional_run_info_ = ae.file_output( predictions_ensemble, - predictions_valid, predictions_test, ) @@ -211,11 +183,6 @@ def test_disable_file_output(self): "ensemble_predictions" ] ) - self.assertIsNotNone( - self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][ - "valid_predictions" - ] - ) self.assertIsNotNone( self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][ "test_predictions" @@ -237,7 +204,6 @@ def test_disable_file_output(self): loss_, additional_run_info_ = ae.file_output( predictions_ensemble, - predictions_valid, predictions_test, ) @@ -249,11 +215,6 @@ def test_disable_file_output(self): "ensemble_predictions" ] ) - self.assertIsNotNone( - self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][ - "valid_predictions" - ] - ) self.assertIsNotNone( self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][ "test_predictions" @@ -296,11 +257,9 @@ def test_file_output(self): ae.Y_optimization = rs.rand(33, 3) predictions_ensemble = rs.rand(33, 3) predictions_test = rs.rand(25, 3) - predictions_valid = rs.rand(25, 3) ae.file_output( Y_optimization_pred=predictions_ensemble, - Y_valid_pred=predictions_valid, Y_test_pred=predictions_test, ) diff --git a/test/test_evaluation/test_test_evaluator.py b/test/test_evaluation/test_test_evaluator.py index 457661df03..02eedcca91 100644 --- a/test/test_evaluation/test_test_evaluator.py +++ b/test/test_evaluation/test_test_evaluator.py @@ -80,10 +80,10 @@ def test_datasets(self): ) evaluator.fit_predict_and_loss() - rval = read_queue(evaluator.queue) - self.assertEqual(len(rval), 1) - self.assertEqual(len(rval[0]), 3) - self.assertTrue(np.isfinite(rval[0]["loss"])) + return_value = read_queue(evaluator.queue) + self.assertEqual(len(return_value), 1) + self.assertEqual(len(return_value[0]), 3) + self.assertTrue(np.isfinite(return_value[0]["loss"])) class FunctionsTest(unittest.TestCase): @@ -124,11 +124,11 @@ def test_eval_test(self): port=self.port, additional_components=dict(), ) - rval = read_queue(self.queue) - self.assertEqual(len(rval), 1) - self.assertAlmostEqual(rval[0]["loss"], 0.07999999999999996) - self.assertEqual(rval[0]["status"], StatusType.SUCCESS) - self.assertNotIn("bac_metric", rval[0]["additional_run_info"]) + return_value = read_queue(self.queue) + self.assertEqual(len(return_value), 1) + self.assertAlmostEqual(return_value[0]["loss"], 0.07999999999999996) + self.assertEqual(return_value[0]["status"], StatusType.SUCCESS) + self.assertNotIn("bac_metric", return_value[0]["additional_run_info"]) def test_eval_test_multi_objective(self): metrics = { @@ -151,12 +151,12 @@ def test_eval_test_multi_objective(self): port=self.port, additional_components=dict(), ) - rval = read_queue(self.queue) - self.assertEqual(len(rval), 1) + return_value = read_queue(self.queue) + self.assertEqual(len(return_value), 1) for metric, loss in metrics.items(): - self.assertAlmostEqual(rval[0]["loss"][metric.name], loss) - self.assertEqual(rval[0]["status"], StatusType.SUCCESS) - self.assertNotIn("bac_metric", rval[0]["additional_run_info"]) + self.assertAlmostEqual(return_value[0]["loss"][metric.name], loss) + self.assertEqual(return_value[0]["status"], StatusType.SUCCESS) + self.assertNotIn("bac_metric", return_value[0]["additional_run_info"]) def test_eval_test_all_loss_functions(self): eval_t( @@ -175,8 +175,8 @@ def test_eval_test_all_loss_functions(self): port=self.port, additional_components=dict(), ) - rval = read_queue(self.queue) - self.assertEqual(len(rval), 1) + return_value = read_queue(self.queue) + self.assertEqual(len(return_value), 1) # Note: All metric here should be minimized fixture = { @@ -195,7 +195,7 @@ def test_eval_test_all_loss_functions(self): "num_run": -1, } - additional_run_info = rval[0]["additional_run_info"] + additional_run_info = return_value[0]["additional_run_info"] for key, value in fixture.items(): self.assertAlmostEqual(additional_run_info[key], fixture[key], msg=key) self.assertEqual( @@ -204,5 +204,5 @@ def test_eval_test_all_loss_functions(self): msg=sorted(additional_run_info.items()), ) self.assertIn("duration", additional_run_info) - self.assertAlmostEqual(rval[0]["loss"], 0.040000000000000036) - self.assertEqual(rval[0]["status"], StatusType.SUCCESS) + self.assertAlmostEqual(return_value[0]["loss"], 0.040000000000000036) + self.assertEqual(return_value[0]["status"], StatusType.SUCCESS) diff --git a/test/test_evaluation/test_train_evaluator.py b/test/test_evaluation/test_train_evaluator.py index 9413af5509..c8fe1c5f87 100644 --- a/test/test_evaluation/test_train_evaluator.py +++ b/test/test_evaluation/test_train_evaluator.py @@ -7,6 +7,7 @@ import shutil import sys import tempfile +from itertools import chain import numpy as np import sklearn.model_selection @@ -68,6 +69,24 @@ ) +class LossSideEffect(object): + """Some kind of re-used fixture for losses calculated""" + + def __init__(self): + # The 3 below is related to train, test, opt sets + self.losses = [ + {"accuracy": value} + for value in chain.from_iterable( + [i] * 3 for i in [1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2] + ) + ] + self.iteration = 0 + + def side_effect(self, *args, **kwargs): + self.iteration += 1 + return self.losses[self.iteration - 1] + + class Dummy(object): def __init__(self): self.name = "dummy" @@ -150,24 +169,23 @@ def test_holdout(self, pipeline_mock): evaluator.fit_predict_and_loss() - rval = read_queue(evaluator.queue) - self.assertEqual(len(rval), 1) - result = rval[0]["loss"] - self.assertEqual(len(rval[0]), 3) + return_value = read_queue(evaluator.queue) + self.assertEqual(len(return_value), 1) + result = return_value[0]["loss"] + self.assertEqual(len(return_value[0]), 3) self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) self.assertEqual(evaluator.file_output.call_count, 1) self.assertEqual(result, 0.45833333333333337) self.assertEqual(pipeline_mock.fit.call_count, 1) - # four calls because of train, holdout, validation and test set - self.assertEqual(pipeline_mock.predict_proba.call_count, 4) + # four calls because of train, holdout and test set + self.assertEqual(pipeline_mock.predict_proba.call_count, 3) self.assertEqual(evaluator.file_output.call_count, 1) + self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 24) + self.assertEqual( - evaluator.file_output.call_args[0][1].shape[0], D.data["Y_valid"].shape[0] - ) - self.assertEqual( - evaluator.file_output.call_args[0][2].shape[0], D.data["Y_test"].shape[0] + evaluator.file_output.call_args[0][1].shape[0], D.data["Y_test"].shape[0] ) self.assertEqual(evaluator.model.fit.call_count, 1) @@ -240,46 +258,12 @@ def configuration_fully_fitted(self): class LossSideEffect(object): def __init__(self): + # The 3 below is related to train, test, opt sets self.losses = [ {"accuracy": value} - for value in [ - 1.0, - 1.0, - 1.0, - 1.0, - 0.9, - 0.9, - 0.9, - 0.9, - 0.8, - 0.8, - 0.8, - 0.8, - 0.7, - 0.7, - 0.7, - 0.7, - 0.6, - 0.6, - 0.6, - 0.6, - 0.5, - 0.5, - 0.5, - 0.5, - 0.4, - 0.4, - 0.4, - 0.4, - 0.3, - 0.3, - 0.3, - 0.3, - 0.2, - 0.2, - 0.2, - 0.2, - ] + for value in chain.from_iterable( + [i] * 3 for i in [1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2] + ) ] self.iteration = 0 @@ -294,15 +278,15 @@ def side_effect(self, *args, **kwargs): self.assertEqual(evaluator.file_output.call_count, 9) for i in range(1, 10): - rval = evaluator.queue.get(timeout=1) - result = rval["loss"] + return_value = evaluator.queue.get(timeout=1) + result = return_value["loss"] self.assertAlmostEqual(result, 1.0 - (0.1 * (i - 1))) if i < 9: - self.assertEqual(rval["status"], StatusType.DONOTADVANCE) - self.assertEqual(len(rval), 3) + self.assertEqual(return_value["status"], StatusType.DONOTADVANCE) + self.assertEqual(len(return_value), 3) else: - self.assertEqual(rval["status"], StatusType.SUCCESS) - self.assertEqual(len(rval), 4) + self.assertEqual(return_value["status"], StatusType.SUCCESS) + self.assertEqual(len(return_value), 4) self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) self.assertEqual(pipeline_mock.iterative_fit.call_count, 9) @@ -310,16 +294,12 @@ def side_effect(self, *args, **kwargs): [cal[1]["n_iter"] for cal in pipeline_mock.iterative_fit.call_args_list], [2, 2, 4, 8, 16, 32, 64, 128, 256], ) - # 20 calls because of train, holdout, validation and test set - # and a total of five calls because of five iterations of fitting - self.assertEqual(evaluator.model.predict_proba.call_count, 36) - # 1/3 of 69 + + # 9 per split type + self.assertEqual(evaluator.model.predict_proba.call_count, 27) self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 23) self.assertEqual( - evaluator.file_output.call_args[0][1].shape[0], D.data["Y_valid"].shape[0] - ) - self.assertEqual( - evaluator.file_output.call_args[0][2].shape[0], D.data["Y_test"].shape[0] + evaluator.file_output.call_args[0][1].shape[0], D.data["Y_test"].shape[0] ) self.assertEqual(evaluator.file_output.call_count, 9) self.assertEqual(evaluator.model.fit.call_count, 0) @@ -438,20 +418,19 @@ def side_effect(self, *args, **kwargs): self.assertEqual(evaluator.file_output.call_count, 2) for i in range(1, 3): - rval = evaluator.queue.get(timeout=1) - self.assertAlmostEqual(rval["loss"], 1.0 - (0.2 * i)) + return_value = evaluator.queue.get(timeout=1) + self.assertAlmostEqual(return_value["loss"], 1.0 - (0.2 * i)) self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) self.assertEqual(pipeline_mock.iterative_fit.call_count, 2) - # eight calls because of train, holdout, the validation and the test set + + # 6 calls because of train, holdout and test set # and a total of two calls each because of two iterations of fitting - self.assertEqual(evaluator.model.predict_proba.call_count, 8) + self.assertEqual(evaluator.model.predict_proba.call_count, 6) + self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 23) self.assertEqual( - evaluator.file_output.call_args[0][1].shape[0], D.data["Y_valid"].shape[0] - ) - self.assertEqual( - evaluator.file_output.call_args[0][2].shape[0], D.data["Y_test"].shape[0] + evaluator.file_output.call_args[0][1].shape[0], D.data["Y_test"].shape[0] ) self.assertEqual(evaluator.file_output.call_count, 2) self.assertEqual(evaluator.model.fit.call_count, 0) @@ -499,19 +478,18 @@ def test_iterative_holdout_not_iterative(self, pipeline_mock): evaluator.fit_predict_and_loss(iterative=True) self.assertEqual(evaluator.file_output.call_count, 1) - rval = evaluator.queue.get(timeout=1) - self.assertAlmostEqual(rval["loss"], 0.47826086956521741) + return_value = evaluator.queue.get(timeout=1) + self.assertAlmostEqual(return_value["loss"], 0.47826086956521741) self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) self.assertEqual(pipeline_mock.iterative_fit.call_count, 0) - # four calls for train, opt, valid and test - self.assertEqual(evaluator.model.predict_proba.call_count, 4) + + # 3 calls for train, opt and test + self.assertEqual(evaluator.model.predict_proba.call_count, 3) + self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 23) self.assertEqual( - evaluator.file_output.call_args[0][1].shape[0], D.data["Y_valid"].shape[0] - ) - self.assertEqual( - evaluator.file_output.call_args[0][2].shape[0], D.data["Y_test"].shape[0] + evaluator.file_output.call_args[0][1].shape[0], D.data["Y_test"].shape[0] ) self.assertEqual(evaluator.file_output.call_count, 1) self.assertEqual(evaluator.model.fit.call_count, 1) @@ -554,26 +532,23 @@ def test_cv(self, pipeline_mock): evaluator.fit_predict_and_loss() - rval = read_queue(evaluator.queue) - self.assertEqual(len(rval), 1) - result = rval[0]["loss"] - self.assertEqual(len(rval[0]), 3) + return_value = read_queue(evaluator.queue) + self.assertEqual(len(return_value), 1) + result = return_value[0]["loss"] + self.assertEqual(len(return_value[0]), 3) self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) self.assertEqual(evaluator.file_output.call_count, 1) self.assertEqual(result, 0.463768115942029) self.assertEqual(pipeline_mock.fit.call_count, 5) - # Fifteen calls because of the training, holdout, validation and - # test set (4 sets x 5 folds = 20) - self.assertEqual(pipeline_mock.predict_proba.call_count, 20) + + # 15 calls because of the training (5), holdout (5) and test set (5) + self.assertEqual(pipeline_mock.predict_proba.call_count, 15) self.assertEqual( evaluator.file_output.call_args[0][0].shape[0], D.data["Y_train"].shape[0] ) self.assertEqual( - evaluator.file_output.call_args[0][1].shape[0], D.data["Y_valid"].shape[0] - ) - self.assertEqual( - evaluator.file_output.call_args[0][2].shape[0], D.data["Y_test"].shape[0] + evaluator.file_output.call_args[0][1].shape[0], D.data["Y_test"].shape[0] ) # The model prior to fitting is saved, this cannot be directly tested # because of the way the mock module is used. Instead, we test whether @@ -623,13 +598,13 @@ def test_partial_cv(self, pipeline_mock): evaluator.partial_fit_predict_and_loss(fold=1) - rval = evaluator.queue.get(timeout=1) + return_value = evaluator.queue.get(timeout=1) self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) self.assertEqual(evaluator.file_output.call_count, 0) - self.assertEqual(rval["loss"], 0.5) + self.assertEqual(return_value["loss"], 0.5) self.assertEqual(pipeline_mock.fit.call_count, 1) - self.assertEqual(pipeline_mock.predict_proba.call_count, 4) + self.assertEqual(pipeline_mock.predict_proba.call_count, 3) # The model prior to fitting is saved, this cannot be directly tested # because of the way the mock module is used. Instead, we test whether # the if block in which model assignment is done is accessed @@ -703,55 +678,6 @@ def configuration_fully_fitted(self): evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output) evaluator.file_output.return_value = (None, {}) - class LossSideEffect(object): - def __init__(self): - self.losses = [ - {"accuracy": value} - for value in [ - 1.0, - 1.0, - 1.0, - 1.0, - 0.9, - 0.9, - 0.9, - 0.9, - 0.8, - 0.8, - 0.8, - 0.8, - 0.7, - 0.7, - 0.7, - 0.7, - 0.6, - 0.6, - 0.6, - 0.6, - 0.5, - 0.5, - 0.5, - 0.5, - 0.4, - 0.4, - 0.4, - 0.4, - 0.3, - 0.3, - 0.3, - 0.3, - 0.2, - 0.2, - 0.2, - 0.2, - ] - ] - self.iteration = 0 - - def side_effect(self, *args, **kwargs): - self.iteration += 1 - return self.losses[self.iteration - 1] - evaluator._loss = unittest.mock.Mock() evaluator._loss.side_effect = LossSideEffect().side_effect @@ -760,12 +686,12 @@ def side_effect(self, *args, **kwargs): self.assertEqual(evaluator.file_output.call_count, 0) for i in range(1, 10): - rval = evaluator.queue.get(timeout=1) - self.assertAlmostEqual(rval["loss"], 1.0 - (0.1 * (i - 1))) + return_value = evaluator.queue.get(timeout=1) + self.assertAlmostEqual(return_value["loss"], 1.0 - (0.1 * (i - 1))) if i < 9: - self.assertEqual(rval["status"], StatusType.DONOTADVANCE) + self.assertEqual(return_value["status"], StatusType.DONOTADVANCE) else: - self.assertEqual(rval["status"], StatusType.SUCCESS) + self.assertEqual(return_value["status"], StatusType.SUCCESS) self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) self.assertEqual(pipeline_mock.iterative_fit.call_count, 9) @@ -773,13 +699,9 @@ def side_effect(self, *args, **kwargs): [cal[1]["n_iter"] for cal in pipeline_mock.iterative_fit.call_args_list], [2, 2, 4, 8, 16, 32, 64, 128, 256], ) - # fifteen calls because of the holdout, the validation and the test set - # and a total of five calls because of five iterations of fitting self.assertTrue(hasattr(evaluator, "model")) self.assertEqual(pipeline_mock.iterative_fit.call_count, 9) - # 20 calls because of train, holdout, the validation and the test set - # and a total of five calls because of five iterations of fitting - self.assertEqual(pipeline_mock.predict_proba.call_count, 36) + self.assertEqual(pipeline_mock.predict_proba.call_count, 27) @unittest.mock.patch.object(TrainEvaluator, "_loss") @unittest.mock.patch.object(TrainEvaluator, "_get_model") @@ -809,13 +731,9 @@ def test_file_output(self, loss_mock, model_mock): self.backend_mock.get_model_dir.return_value = True evaluator.model = "model" evaluator.Y_optimization = D.data["Y_train"] - rval = evaluator.file_output( - D.data["Y_train"], - D.data["Y_valid"], - D.data["Y_test"], - ) + return_value = evaluator.file_output(D.data["Y_train"], D.data["Y_test"]) - self.assertEqual(rval, (None, {})) + self.assertEqual(return_value, (None, {})) self.assertEqual(self.backend_mock.save_additional_data.call_count, 2) self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 1) self.assertEqual( @@ -826,8 +744,8 @@ def test_file_output(self, loss_mock, model_mock): "budget", "model", "cv_model", + "valid_predictions", # TODO remove once backend updated "ensemble_predictions", - "valid_predictions", "test_predictions", }, ) @@ -839,12 +757,8 @@ def test_file_output(self, loss_mock, model_mock): ) evaluator.models = ["model2", "model2"] - rval = evaluator.file_output( - D.data["Y_train"], - D.data["Y_valid"], - D.data["Y_test"], - ) - self.assertEqual(rval, (None, {})) + return_value = evaluator.file_output(D.data["Y_train"], D.data["Y_test"]) + self.assertEqual(return_value, (None, {})) self.assertEqual(self.backend_mock.save_additional_data.call_count, 4) self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 2) self.assertEqual( @@ -855,8 +769,8 @@ def test_file_output(self, loss_mock, model_mock): "budget", "model", "cv_model", + "valid_predictions", # TODO remove once backend updated "ensemble_predictions", - "valid_predictions", "test_predictions", }, ) @@ -867,29 +781,10 @@ def test_file_output(self, loss_mock, model_mock): self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]["cv_model"] ) - # Check for not containing NaNs - that the models don't predict nonsense - # for unseen data - D.data["Y_valid"][0] = np.NaN - rval = evaluator.file_output( - D.data["Y_train"], - D.data["Y_valid"], - D.data["Y_test"], - ) - self.assertEqual( - rval, - ( - 1.0, - {"error": "Model predictions for validation set contains NaNs."}, - ), - ) D.data["Y_train"][0] = np.NaN - rval = evaluator.file_output( - D.data["Y_train"], - D.data["Y_valid"], - D.data["Y_test"], - ) + return_value = evaluator.file_output(D.data["Y_train"], D.data["Y_test"]) self.assertEqual( - rval, + return_value, ( 1.0, {"error": "Model predictions for optimization set contains NaNs."}, @@ -1086,7 +981,6 @@ def test_fit_predict_and_loss_standard_additional_run_info( _partial_fit_and_predict_mock.return_value = ( np.array([[0.1, 0.9]] * 46), np.array([[0.1, 0.9]] * 23), - np.array([[0.1, 0.9]] * 25), np.array([[0.1, 0.9]] * 6), {"a": 5}, ) @@ -1112,8 +1006,8 @@ def test_fit_predict_and_loss_standard_additional_run_info( evaluator.X_targets[0] = np.array([1, 0] * 23) evaluator.Y_targets[0] = np.array([1] * 23) evaluator.Y_train_targets = np.array([1] * 69) - rval = evaluator.fit_predict_and_loss(iterative=False) - self.assertIsNone(rval) + return_value = evaluator.fit_predict_and_loss(iterative=False) + self.assertIsNone(return_value) element = queue_.get() self.assertEqual(element["status"], StatusType.SUCCESS) self.assertEqual(element["additional_run_info"]["a"], 5) @@ -1129,7 +1023,6 @@ def __call__(self, *args, **kwargs): return ( np.array([[0.1, 0.9]] * 34), np.array([[0.1, 0.9]] * 35), - np.array([[0.1, 0.9]] * 25), np.array([[0.1, 0.9]] * 6), {"a": 5}, ) @@ -1137,7 +1030,6 @@ def __call__(self, *args, **kwargs): return ( np.array([[0.1, 0.9]] * 34), np.array([[0.1, 0.9]] * 34), - np.array([[0.1, 0.9]] * 25), np.array([[0.1, 0.9]] * 6), {"a": 5}, ) @@ -1219,8 +1111,8 @@ def __call__(self): evaluator.file_output.return_value = (None, {}) evaluator.Y_targets[0] = np.array([1] * 23).reshape((-1, 1)) evaluator.Y_train_targets = np.array([1] * 69).reshape((-1, 1)) - rval = evaluator.fit_predict_and_loss(iterative=True) - self.assertIsNone(rval) + return_value = evaluator.fit_predict_and_loss(iterative=True) + self.assertIsNone(return_value) self.assertEqual(finish_up_mock.call_count, 1) self.assertEqual(finish_up_mock.call_args[1]["additional_run_info"], 14678) @@ -1265,8 +1157,8 @@ def test_fit_predict_and_loss_iterative_noniterativemodel_additional_run_info( evaluator.Y_targets[0] = np.array([1] * 23).reshape((-1, 1)) evaluator.Y_train_targets = np.array([1] * 69).reshape((-1, 1)) - rval = evaluator.fit_predict_and_loss(iterative=True) - self.assertIsNone(rval) + return_value = evaluator.fit_predict_and_loss(iterative=True) + self.assertIsNone(return_value) self.assertEqual(finish_up_mock.call_count, 1) self.assertEqual(finish_up_mock.call_args[1]["additional_run_info"], 14678) @@ -1326,8 +1218,8 @@ def __call__(self): evaluator.Y_targets[0] = np.array([1] * 23).reshape((-1, 1)) evaluator.Y_train_targets = np.array([1] * 69).reshape((-1, 1)) - rval = evaluator.fit_predict_and_loss(iterative=False) - self.assertIsNone(rval) + return_value = evaluator.fit_predict_and_loss(iterative=False) + self.assertIsNone(return_value) self.assertEqual(finish_up_mock.call_count, 1) self.assertEqual( finish_up_mock.call_args[1]["additional_run_info"], {"val": 14678} @@ -1373,8 +1265,8 @@ def test_fit_predict_and_loss_budget_2_additional_run_info( evaluator.Y_targets[0] = np.array([1] * 23).reshape((-1, 1)) evaluator.Y_train_targets = np.array([1] * 69).reshape((-1, 1)) - rval = evaluator.fit_predict_and_loss(iterative=False) - self.assertIsNone(rval) + return_value = evaluator.fit_predict_and_loss(iterative=False) + self.assertIsNone(return_value) self.assertEqual(finish_up_mock.call_count, 1) self.assertEqual( finish_up_mock.call_args[1]["additional_run_info"], {"val": 14678} @@ -1422,8 +1314,8 @@ def test_datasets(self): ) evaluator.fit_predict_and_loss() - rval = evaluator.queue.get(timeout=1) - self.assertTrue(np.isfinite(rval["loss"])) + return_value = evaluator.queue.get(timeout=1) + self.assertTrue(np.isfinite(return_value["loss"])) ############################################################################ # Test obtaining a splitter object from scikit-learn @@ -3053,8 +2945,8 @@ def test_eval_holdout_all_loss_functions(self): metrics=[accuracy], additional_components=dict(), ) - rval = read_queue(self.queue) - self.assertEqual(len(rval), 1) + return_value = read_queue(self.queue) + self.assertEqual(len(return_value), 1) fixture = { "accuracy": 0.030303030303030276, @@ -3070,12 +2962,11 @@ def test_eval_holdout_all_loss_functions(self): "recall_micro": 0.030303030303030276, "recall_weighted": 0.030303030303030276, "num_run": 1, - "validation_loss": 0.0, "test_loss": 0.04, "train_loss": 0.0, } - additional_run_info = rval[0]["additional_run_info"] + additional_run_info = return_value[0]["additional_run_info"] for key, value in fixture.items(): self.assertAlmostEqual(additional_run_info[key], fixture[key], msg=key) self.assertIn("duration", additional_run_info) @@ -3085,8 +2976,8 @@ def test_eval_holdout_all_loss_functions(self): msg=sorted(additional_run_info.items()), ) - self.assertAlmostEqual(rval[0]["loss"], 0.030303030303030276, places=3) - self.assertEqual(rval[0]["status"], StatusType.SUCCESS) + self.assertAlmostEqual(return_value[0]["loss"], 0.030303030303030276, places=3) + self.assertEqual(return_value[0]["status"], StatusType.SUCCESS) def test_eval_holdout_iterative_fit_no_timeout(self): eval_iterative_holdout( @@ -3107,11 +2998,11 @@ def test_eval_holdout_iterative_fit_no_timeout(self): metrics=[accuracy], additional_components=dict(), ) - rval = read_queue(self.queue) - self.assertEqual(len(rval), 9) - self.assertAlmostEqual(rval[-1]["loss"], 0.030303030303030276) - self.assertEqual(rval[0]["status"], StatusType.DONOTADVANCE) - self.assertEqual(rval[-1]["status"], StatusType.SUCCESS) + return_value = read_queue(self.queue) + self.assertEqual(len(return_value), 9) + self.assertAlmostEqual(return_value[-1]["loss"], 0.030303030303030276) + self.assertEqual(return_value[0]["status"], StatusType.DONOTADVANCE) + self.assertEqual(return_value[-1]["status"], StatusType.SUCCESS) def test_eval_holdout_iterative_fit_no_timeout_multi_objective(self): metrics = { @@ -3136,12 +3027,12 @@ def test_eval_holdout_iterative_fit_no_timeout_multi_objective(self): metrics=list(metrics.keys()), additional_components=dict(), ) - rval = read_queue(self.queue) - self.assertEqual(len(rval), 9) + return_value = read_queue(self.queue) + self.assertEqual(len(return_value), 9) for metric, loss in metrics.items(): - self.assertAlmostEqual(rval[-1]["loss"][metric.name], loss) - self.assertEqual(rval[0]["status"], StatusType.DONOTADVANCE) - self.assertEqual(rval[-1]["status"], StatusType.SUCCESS) + self.assertAlmostEqual(return_value[-1]["loss"][metric.name], loss) + self.assertEqual(return_value[0]["status"], StatusType.DONOTADVANCE) + self.assertEqual(return_value[-1]["status"], StatusType.SUCCESS) def test_eval_holdout_budget_iterations(self): eval_holdout( @@ -3405,11 +3296,11 @@ def test_eval_cv(self): metrics=[accuracy], additional_components=dict(), ) - rval = read_queue(self.queue) - self.assertEqual(len(rval), 1) - self.assertAlmostEqual(rval[0]["loss"], 0.04999999999999997) - self.assertEqual(rval[0]["status"], StatusType.SUCCESS) - self.assertNotIn("bac_metric", rval[0]["additional_run_info"]) + return_value = read_queue(self.queue) + self.assertEqual(len(return_value), 1) + self.assertAlmostEqual(return_value[0]["loss"], 0.04999999999999997) + self.assertEqual(return_value[0]["status"], StatusType.SUCCESS) + self.assertNotIn("bac_metric", return_value[0]["additional_run_info"]) def test_eval_cv_all_loss_functions(self): eval_cv( @@ -3430,8 +3321,8 @@ def test_eval_cv_all_loss_functions(self): metrics=[accuracy], additional_components=dict(), ) - rval = read_queue(self.queue) - self.assertEqual(len(rval), 1) + return_value = read_queue(self.queue) + self.assertEqual(len(return_value), 1) fixture = { "accuracy": 0.04999999999999997, @@ -3447,12 +3338,11 @@ def test_eval_cv_all_loss_functions(self): "recall_micro": 0.04999999999999997, "recall_weighted": 0.04999999999999997, "num_run": 1, - "validation_loss": 0.04, "test_loss": 0.04, "train_loss": 0.0, } - additional_run_info = rval[0]["additional_run_info"] + additional_run_info = return_value[0]["additional_run_info"] for key, value in fixture.items(): self.assertAlmostEqual(additional_run_info[key], fixture[key], msg=key) self.assertIn("duration", additional_run_info) @@ -3462,8 +3352,8 @@ def test_eval_cv_all_loss_functions(self): msg=sorted(additional_run_info.items()), ) - self.assertAlmostEqual(rval[0]["loss"], 0.04999999999999997) - self.assertEqual(rval[0]["status"], StatusType.SUCCESS) + self.assertAlmostEqual(return_value[0]["loss"], 0.04999999999999997) + self.assertEqual(return_value[0]["status"], StatusType.SUCCESS) # def test_eval_cv_on_subset(self): # backend_api = backend.create(self.tmp_dir, self.tmp_dir) @@ -3504,10 +3394,10 @@ def test_eval_partial_cv(self): metrics=[accuracy], additional_components=dict(), ) - rval = read_queue(self.queue) - self.assertEqual(len(rval), 1) - self.assertAlmostEqual(rval[0]["loss"], results[fold]) - self.assertEqual(rval[0]["status"], StatusType.SUCCESS) + return_value = read_queue(self.queue) + self.assertEqual(len(return_value), 1) + self.assertAlmostEqual(return_value[0]["loss"], results[fold]) + self.assertEqual(return_value[0]["status"], StatusType.SUCCESS) def test_eval_partial_cv_multi_objective(self): metrics = { @@ -3547,8 +3437,8 @@ def test_eval_partial_cv_multi_objective(self): metrics=list(metrics.keys()), additional_components=dict(), ) - rval = read_queue(self.queue) - self.assertEqual(len(rval), 1) + return_value = read_queue(self.queue) + self.assertEqual(len(return_value), 1) for metric, loss in metrics.items(): - self.assertAlmostEqual(rval[0]["loss"][metric.name], loss[fold]) - self.assertEqual(rval[0]["status"], StatusType.SUCCESS) + self.assertAlmostEqual(return_value[0]["loss"][metric.name], loss[fold]) + self.assertEqual(return_value[0]["status"], StatusType.SUCCESS) diff --git a/test/test_pipeline/components/regression/test_mlp.py b/test/test_pipeline/components/regression/test_mlp.py index 9e2a92acac..941e30bf32 100644 --- a/test/test_pipeline/components/regression/test_mlp.py +++ b/test/test_pipeline/components/regression/test_mlp.py @@ -29,7 +29,7 @@ class MLPComponentTest(BaseRegressionComponentTest): # # Seems there is a consistently different values for boston so: # * include two valuess for n_iter in 'boston_iterative_n_iter' - # known-values = [236, 331] + # known-values = [236, 331, 327] # # * decreased places from 6 -> 5 in 'default_boston_{sparse,_iterative_sparse}' # to check for for iterations and expanded the default places for checking @@ -47,7 +47,7 @@ class MLPComponentTest(BaseRegressionComponentTest): res["default_boston"] = 0.2750079862455884 res["default_boston_places"] = 1 res["boston_n_calls"] = [8, 9] - res["boston_iterative_n_iter"] = [236, 331] + res["boston_iterative_n_iter"] = [236, 331, 327] res["default_boston_iterative"] = res["default_boston"] res["default_boston_iterative_places"] = 1 res["default_boston_sparse"] = -0.10972947168054104 diff --git a/test/test_pipeline/test_classification.py b/test/test_pipeline/test_classification.py index 7be8038119..88f091772a 100644 --- a/test/test_pipeline/test_classification.py +++ b/test/test_pipeline/test_classification.py @@ -172,7 +172,7 @@ def test_find_preprocessors(self): * At least 1 preprocessor component can be found * The inherit from AutoSklearnPreprocessingAlgorithm """ - preprocessors = preprocessing_components._preprocessors + preprocessors = preprocessing_components._feature_preprocessors self.assertGreaterEqual(len(preprocessors), 1) for key in preprocessors: if hasattr(preprocessors[key], "get_components"): diff --git a/test/test_pipeline/test_regression.py b/test/test_pipeline/test_regression.py index 3a50decb8c..788e347b1e 100644 --- a/test/test_pipeline/test_regression.py +++ b/test/test_pipeline/test_regression.py @@ -76,7 +76,7 @@ def test_find_regressors(self): self.assertIn(AutoSklearnRegressionAlgorithm, regressors[key].__bases__) def test_find_preprocessors(self): - preprocessors = preprocessing_components._preprocessors + preprocessors = preprocessing_components._feature_preprocessors self.assertGreaterEqual(len(preprocessors), 1) for key in preprocessors: if hasattr(preprocessors[key], "get_components"): diff --git a/test/test_scripts/test_metadata_generation.py b/test/test_scripts/test_metadata_generation.py index 929b90e029..25c4855b08 100644 --- a/test/test_scripts/test_metadata_generation.py +++ b/test/test_scripts/test_metadata_generation.py @@ -4,6 +4,7 @@ import shutil import socket import subprocess +import tempfile import arff import numpy as np @@ -15,10 +16,12 @@ class TestMetadataGeneration(unittest.TestCase): def setUp(self): - self.working_directory = "/tmp/autosklearn-unittest-tmp-dir-%s-%d-%d" % ( - socket.gethostname(), - os.getpid(), - random.randint(0, 1000000), + host = socket.gethostname() + pid = os.getpid() + rint = random.randint(0, 1000000) + + self.working_directory = os.path.join( + tempfile.gettempdir(), f"autosklearn-unittest-tmp-dir-{host}-{pid}-{rint}" ) def print_files(self): @@ -27,7 +30,6 @@ def print_files(self): print(dirpath, dirnames, filenames) def test_metadata_generation(self): - regression_task_id = 360029 regression_dataset_name = "SWD".lower() classification_task_id = 245 @@ -52,10 +54,15 @@ def test_metadata_generation(self): script_filename, self.working_directory, ) - rval = subprocess.run( - cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + + return_value = subprocess.run( + cmd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=30, ) - self.assertEqual(rval.returncode, 0, msg=str(rval)) + self.assertEqual(return_value.returncode, 0, msg=f"{cmd}\n{str(return_value)}") # 4. run one of the commands to get some data commands_output_file = os.path.join( @@ -99,11 +106,16 @@ def test_metadata_generation(self): # for training. In production, it would use twice as much! cmd = cmd.replace("-s 1", "-s 1 --unittest") print("COMMAND: %s" % cmd) - rval = subprocess.run( - cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + + return_value = subprocess.run( + cmd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=180, ) - print("STDOUT: %s" % repr(rval.stdout), flush=True) - print("STDERR: %s" % repr(rval.stderr), flush=True) + print("STDOUT: %s" % repr(return_value.stdout), flush=True) + print("STDERR: %s" % repr(return_value.stderr), flush=True) self.print_files() @@ -123,7 +135,11 @@ def test_metadata_generation(self): ) with open(smac_log) as fh: smac_output = fh.read() - self.assertEqual(rval.returncode, 0, msg=str(rval) + "\n" + smac_output) + self.assertEqual( + return_value.returncode, + 0, + msg=f"{cmd}\n{str(return_value)}" + "\n" + smac_output, + ) expected_validation_output = os.path.join( expected_output_directory, "..", "validation_trajectory_1.json" ) @@ -172,12 +188,17 @@ def test_metadata_generation(self): self.working_directory, ) print("COMMAND: %s" % cmd) - rval = subprocess.run( - cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + + return_value = subprocess.run( + cmd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=60, ) - print("STDOUT: %s" % repr(rval.stdout), flush=True) - print("STDERR: %s" % repr(rval.stderr), flush=True) - self.assertEqual(rval.returncode, 0, msg=str(rval)) + print("STDOUT: %s" % repr(return_value.stdout), flush=True) + print("STDERR: %s" % repr(return_value.stderr), flush=True) + self.assertEqual(return_value.returncode, 0, msg=f"{cmd}\n{str(return_value)}") for file in [ "algorithm_runs.arff", @@ -215,10 +236,14 @@ def test_metadata_generation(self): script_filename, self.working_directory, ) - rval = subprocess.run( - cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + return_value = subprocess.run( + cmd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=90, ) - self.assertEqual(rval.returncode, 0, msg=str(rval)) + self.assertEqual(return_value.returncode, 0, msg=f"{cmd}\n{str(return_value)}") for task_type in ("classification", "regression"): for file in [ "calculation_times.csv", @@ -271,10 +296,15 @@ def test_metadata_generation(self): script_filename, self.working_directory, ) - rval = subprocess.run( - cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + + return_value = subprocess.run( + cmd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=45, ) - self.assertEqual(rval.returncode, 0, msg=str(rval)) + self.assertEqual(return_value.returncode, 0, msg=f"{cmd}\n{str(return_value)}") for metric_, combination in ( (metric, "%s_binary.classification_dense" % metric),