Skip to content

Commit c24fac0

Browse files
committed
[refactor] Format evaluators (mainly tae, abstract_evaluator, evaluator)
* [refactor] Refactor __init__ of abstract evaluator * [refactor] Collect shared variables in NamedTuples * [fix] Copy the budget passed to the evaluator params * [refactor] Add cross validation result manager for separate management * [refactor] Separate pipeline classes from abstract evaluator * [refactor] Increase the safety level of pipeline config * [test] Add tests for the changes * [test] Modify queue.empty in a safer way [fix] Find the error in test_tabular_xxx Since pipeline is updated after the evaluations and the previous code updated self.pipeline in the predict method, dummy class only needs to override this method. However, the new code does it separately, so I override get_pipeline method so that we can reproduce the same results. [fix] Fix the shape issue in regression and add bug comment in a test [fix] Fix the ground truth of test_cv Since we changed the weighting strategy for the cross validation in the validation phase so that we weight performance from each model proportionally to the size of each VALIDATION split. I needed to change the answer. Note that the previous was weighting the performance proportionally to the TRAINING splits for both training and validation phases. [fix] Change qsize --> Empty since qsize might not be reliable [refactor] Add cost for crash in autoPyTorchMetrics [fix] Fix the issue when taking num_classes from regression task [fix] Deactivate the save of cv model in the case of holdout
1 parent a679b09 commit c24fac0

File tree

14 files changed

+1599
-2111
lines changed

14 files changed

+1599
-2111
lines changed

autoPyTorch/api/base_task.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,9 @@
4848
)
4949
from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager
5050
from autoPyTorch.ensemble.singlebest_ensemble import SingleBest
51-
from autoPyTorch.evaluation.abstract_evaluator import fit_and_suppress_warnings
52-
from autoPyTorch.evaluation.tae import ExecuteTaFuncWithQueue, get_cost_of_crash
51+
from autoPyTorch.evaluation.abstract_evaluator import fit_pipeline
52+
from autoPyTorch.evaluation.pipeline_class_collection import get_default_pipeline_config
53+
from autoPyTorch.evaluation.tae import TargetAlgorithmQuery
5354
from autoPyTorch.evaluation.utils import DisableFileOutputParameters
5455
from autoPyTorch.optimizer.smbo import AutoMLSMBO
5556
from autoPyTorch.pipeline.base_pipeline import BasePipeline
@@ -685,23 +686,24 @@ def _do_dummy_prediction(self) -> None:
685686
# already be generated here!
686687
stats = Stats(scenario_mock)
687688
stats.start_timing()
688-
ta = ExecuteTaFuncWithQueue(
689+
taq = TargetAlgorithmQuery(
689690
pynisher_context=self._multiprocessing_context,
690691
backend=self._backend,
691692
seed=self.seed,
692693
metric=self._metric,
693694
multi_objectives=["cost"],
694695
logger_port=self._logger_port,
695-
cost_for_crash=get_cost_of_crash(self._metric),
696+
cost_for_crash=self._metric._cost_of_crash,
696697
abort_on_first_run_crash=False,
697698
initial_num_run=num_run,
699+
pipeline_config=get_default_pipeline_config(choice='dummy'),
698700
stats=stats,
699701
memory_limit=memory_limit,
700702
disable_file_output=self._disable_file_output,
701703
all_supported_metrics=self._all_supported_metrics
702704
)
703705

704-
status, _, _, additional_info = ta.run(num_run, cutoff=self._time_for_task)
706+
status, _, _, additional_info = taq.run(num_run, cutoff=self._time_for_task)
705707
if status == StatusType.SUCCESS:
706708
self._logger.info("Finished creating dummy predictions.")
707709
else:
@@ -770,14 +772,14 @@ def _do_traditional_prediction(self, time_left: int, func_eval_time_limit_secs:
770772
# already be generated here!
771773
stats = Stats(scenario_mock)
772774
stats.start_timing()
773-
ta = ExecuteTaFuncWithQueue(
775+
taq = TargetAlgorithmQuery(
774776
pynisher_context=self._multiprocessing_context,
775777
backend=self._backend,
776778
seed=self.seed,
777779
multi_objectives=["cost"],
778780
metric=self._metric,
779781
logger_port=self._logger_port,
780-
cost_for_crash=get_cost_of_crash(self._metric),
782+
cost_for_crash=self._metric._cost_of_crash,
781783
abort_on_first_run_crash=False,
782784
initial_num_run=self._backend.get_next_num_run(),
783785
stats=stats,
@@ -788,7 +790,7 @@ def _do_traditional_prediction(self, time_left: int, func_eval_time_limit_secs:
788790
dask_futures.append([
789791
classifier,
790792
self._dask_client.submit(
791-
ta.run, config=classifier,
793+
taq.run, config=classifier,
792794
cutoff=func_eval_time_limit_secs,
793795
)
794796
])
@@ -1078,7 +1080,7 @@ def _search(
10781080

10791081
# Here the budget is set to max because the SMAC intensifier can be:
10801082
# Hyperband: in this case the budget is determined on the fly and overwritten
1081-
# by the ExecuteTaFuncWithQueue
1083+
# by the TargetAlgorithmQuery
10821084
# SimpleIntensifier (and others): in this case, we use max_budget as a target
10831085
# budget, and hece the below line is honored
10841086
self.pipeline_options[budget_type] = max_budget
@@ -1362,7 +1364,7 @@ def refit(
13621364
dataset_properties=dataset_properties,
13631365
dataset=dataset,
13641366
split_id=split_id)
1365-
fit_and_suppress_warnings(self._logger, model, X, y=None)
1367+
fit_pipeline(self._logger, model, X, y=None)
13661368

13671369
self._clean_logger()
13681370

@@ -1573,28 +1575,27 @@ def fit_pipeline(
15731575

15741576
stats.start_timing()
15751577

1576-
tae = ExecuteTaFuncWithQueue(
1578+
taq = TargetAlgorithmQuery(
15771579
backend=self._backend,
15781580
seed=self.seed,
15791581
metric=metric,
15801582
multi_objectives=["cost"],
15811583
logger_port=self._logger_port,
1582-
cost_for_crash=get_cost_of_crash(metric),
1584+
cost_for_crash=metric._cost_of_crash,
15831585
abort_on_first_run_crash=False,
15841586
initial_num_run=self._backend.get_next_num_run(),
15851587
stats=stats,
15861588
memory_limit=memory_limit,
15871589
disable_file_output=disable_file_output,
15881590
all_supported_metrics=all_supported_metrics,
1589-
budget_type=budget_type,
15901591
include=include_components,
15911592
exclude=exclude_components,
15921593
search_space_updates=search_space_updates,
15931594
pipeline_config=pipeline_options,
15941595
pynisher_context=self._multiprocessing_context
15951596
)
15961597

1597-
run_info, run_value = tae.run_wrapper(
1598+
run_info, run_value = taq.run_wrapper(
15981599
RunInfo(config=configuration,
15991600
budget=budget,
16001601
seed=self.seed,
@@ -1606,7 +1607,7 @@ def fit_pipeline(
16061607

16071608
fitted_pipeline = self._get_fitted_pipeline(
16081609
dataset_name=dataset.dataset_name,
1609-
pipeline_idx=run_info.config.config_id + tae.initial_num_run,
1610+
pipeline_idx=run_info.config.config_id + taq.initial_num_run,
16101611
run_info=run_info,
16111612
run_value=run_value,
16121613
disable_file_output=disable_file_output
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"budget_type": "epochs",
3+
"epochs": 1,
4+
"runtime": 1
5+
}

0 commit comments

Comments
 (0)