Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First draft for user code introspection into components #1505

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
8a9d53f
First draft
eddiebergman Jun 9, 2022
ad0675f
Fix moo things (#1501)
eddiebergman Jun 14, 2022
b44b1c1
Create simple example and doc for naive early stopping (#1476)
eddiebergman Jun 14, 2022
4d8401f
Rename `rval` to `return_value` or `run_value` (#1504)
eddiebergman Jun 14, 2022
ed60fc1
Bump actions/setup-python from 3 to 4 (#1511)
dependabot[bot] Jun 14, 2022
b98b3d9
Bump actions/download-artifact from 2 to 3 (#1512)
dependabot[bot] Jun 14, 2022
58d2316
Bump codecov/codecov-action from 2 to 3 (#1513)
dependabot[bot] Jun 14, 2022
147de23
Bump actions/upload-artifact from 2 to 3 (#1514)
dependabot[bot] Jun 14, 2022
0ae2463
Fix logging server cleanup (#1503)
eddiebergman Jun 15, 2022
8d82be8
Bump peter-evans/find-comment from 1 to 2 (#1520)
dependabot[bot] Jun 15, 2022
9002fca
Bump actions/stale from 4 to 5 (#1521)
dependabot[bot] Jun 15, 2022
c69800d
Remove references to validation set in evaluator (#1517)
eddiebergman Jun 17, 2022
9d63cb5
Fix timeouts related to metalearnings tests (#1508)
eddiebergman Jun 17, 2022
5e21e9c
Fix prediction fails with MOO ensemble and dummy is best (#1518)
eddiebergman Jun 23, 2022
ca46861
Fix no _preprocessors attribute
eddiebergman Jun 23, 2022
f0c8ecd
fix-1527-Fix-mlp-regressor-test-fixture-values (#1528)
eddiebergman Jun 23, 2022
4f691a1
fix docker workflow (#1526)
eddiebergman Jun 23, 2022
44b956c
First draft
eddiebergman Jun 9, 2022
c64f0fd
Fix no _preprocessors attribute
eddiebergman Jun 23, 2022
b03dddd
Revert "fix docker workflow (#1526)"
eddiebergman Jun 24, 2022
59e60e1
Merge branch 'document_model_capabilities' of github.com:automl/auto-…
eddiebergman Jun 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
205 changes: 205 additions & 0 deletions autosklearn/info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
"""
This module servers as an introspection point for things users might
want to programatically query about autosklearn.
"""
from __future__ import annotations

from typing import Any, Generic, Type, TypeVar

from dataclasses import dataclass

from typing_extensions import Literal

from autosklearn.pipeline.components.base import (
AutoSklearnClassificationAlgorithm,
AutoSklearnComponent,
AutoSklearnPreprocessingAlgorithm,
AutoSklearnRegressionAlgorithm,
)
from autosklearn.pipeline.components.classification import ClassifierChoice
from autosklearn.pipeline.components.data_preprocessing import DataPreprocessorChoice
from autosklearn.pipeline.components.feature_preprocessing import (
FeaturePreprocessorChoice,
)
from autosklearn.pipeline.components.regression import RegressorChoice
from autosklearn.pipeline.constants import DATASET_PROPERTIES_TO_STRING

# Something that is a type that inherits from AutoSklearnComponent
T = TypeVar("T", bound=Type[AutoSklearnComponent])


def _translate_properties(
props: dict[str, Any],
kind: Literal["classifier", "regressor", "f_preprocessor", "d_preprocessor"],
) -> dict[str, Any]:
"""Converts supported inputs and outputs to strings"""
# This is information is conveyed implicitly by being a regressor/classifier ...
delwords = ["handles_regression", "handles_classification"]

# Covered by input type, duplicated info
delwords += ["handles_sparse", "handles_dense"]

# Words we rename (from, to)
popwords: list[tuple[str, str]] = [
("input", "supported_inputs"),
("output", "output_kind"),
("is_deterministic", "deterministic"),
]

if kind in ["classifier", "f_preprocessor", "d_preprocessor"]:
delwords += ["handles_multioutput"]

if kind in ["regressor", "f_preprocessor", "d_preprocessor"]:
delwords += ["handles_multiclass", "handles_multilabel"]

for word in delwords:
if word in props:
del props[word]

for frm, to in popwords:
props[to] = props.pop(frm)

props["supported_inputs"] = [
DATASET_PROPERTIES_TO_STRING[k] for k in props["supported_inputs"]
]
props["output_kind"] = DATASET_PROPERTIES_TO_STRING[props["output_kind"][0]]

return props


@dataclass
class _ComponentInfo(Generic[T]):
type: T # cls is not possible due to @dataclass conversion
name: str
shortname: str
output_kind: str
supported_inputs: list[str]
deterministic: bool = False


@dataclass
class RegressorInfo(_ComponentInfo[Type[AutoSklearnRegressionAlgorithm]]):
handles_multioutput: bool = False
prefers_data_normalized: bool = False


@dataclass
class ClassifierInfo(_ComponentInfo[Type[AutoSklearnClassificationAlgorithm]]):
handles_binary: bool = True # We assume all components support this
handles_multiclass: bool = False
handles_multilabel: bool = False
handles_multilabel_multiclass = False


@dataclass
class FeaturePreprocessorInfo(_ComponentInfo[Type[AutoSklearnPreprocessingAlgorithm]]):
pass


@dataclass
class DataPreprocessorInfo(_ComponentInfo[Type[AutoSklearnPreprocessingAlgorithm]]):
# There should be more here but our DataPreprocessing part of the pipeline doesn't
# pick up on it because there's on FeatTypeSplit available which further has
# subcomponents with extra properties
pass


@dataclass
class ComponentsInfo:
classifiers: dict[str, ClassifierInfo]
regressors: dict[str, RegressorInfo]
feature_preprocessors: dict[str, FeaturePreprocessorInfo]
data_preprocessors: dict[str, DataPreprocessorInfo]


def classifiers() -> dict[str, ClassifierInfo]:
"""Get information about the classifiers available to auto-sklearn

Returns
-------
dict[str, ClassifierInfo]
The dict of classifiers and some info about them
"""
return {
name: ClassifierInfo(
**{
"type": cls,
**_translate_properties(cls.get_properties(), "classifier"),
}
)
for name, cls in ClassifierChoice.get_components().items()
}


def regressors() -> dict[str, RegressorInfo]:
"""Get information about the regressors available to auto-sklearn

Returns
-------
dict[str, RegressorInfo]
The dict of regressors and some info about them
"""
return {
name: RegressorInfo(
**{"type": cls, **_translate_properties(cls.get_properties(), "regressor")},
)
for name, cls in RegressorChoice.get_components().items()
}


def feature_preprocessors() -> dict[str, FeaturePreprocessorInfo]:
"""Get information about the feature preprocessors available to auto-sklearn

Returns
-------
dict[str, FeaturePreprocessorInfo]
The dict of feature preprocessors and some info about them
"""
return {
name: FeaturePreprocessorInfo(
**{
"type": cls,
**_translate_properties(cls.get_properties(), "f_preprocessor"),
}
)
for name, cls in FeaturePreprocessorChoice.get_components().items()
}


def data_preprocessors() -> dict[str, DataPreprocessorInfo]:
"""Get information about the data preprocessors available to auto-sklearn

Returns
-------
dict[str, DataPreprocessorInfo]
The dict of data preprocessors and some info about them
"""
return {
name: DataPreprocessorInfo(
**{
"type": cls,
**_translate_properties(cls.get_properties(), "d_preprocessor"),
}
)
for name, cls in DataPreprocessorChoice.get_components().items()
}


def components() -> ComponentsInfo:
"""Get information about all of the components available to auto-sklearn

Returns
-------
ComponentsInfo
A dataclass with the items
* classifiers
* regressors
* feature_preprocessors
* data_preprocessors
"""
return ComponentsInfo(
classifiers=classifiers(),
regressors=regressors(),
feature_preprocessors=feature_preprocessors(),
data_preprocessors=data_preprocessors(),
)
4 changes: 2 additions & 2 deletions autosklearn/pipeline/components/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict
from __future__ import annotations

import importlib
import inspect
Expand All @@ -10,7 +10,7 @@

from autosklearn.pipeline.constants import SPARSE

_addons = dict() # type: Dict[str, 'ThirdPartyComponents']
_addons: dict[str, ThirdPartyComponents] = {}


def find_components(package, directory, base_class):
Expand Down
14 changes: 8 additions & 6 deletions autosklearn/pipeline/components/data_preprocessing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@
AutoSklearnChoice,
AutoSklearnPreprocessingAlgorithm,
ThirdPartyComponents,
_addons,
find_components,
)

classifier_directory = os.path.split(__file__)[0]
_preprocessors = find_components(
__package__, classifier_directory, AutoSklearnPreprocessingAlgorithm
data_preprocessing_directory = os.path.split(__file__)[0]
_data_preprocessors = find_components(
__package__, data_preprocessing_directory, AutoSklearnPreprocessingAlgorithm
)
_addons = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm)
additional_components = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm)
_addons["data_preprocessing"] = additional_components


def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> None:
Expand All @@ -30,8 +32,8 @@ class DataPreprocessorChoice(AutoSklearnChoice):
@classmethod
def get_components(cls) -> OrderedDict:
components: OrderedDict = OrderedDict()
components.update(_preprocessors)
components.update(_addons.components)
components.update(_data_preprocessors)
components.update(additional_components.components)
return components

def get_available_components(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ def get_properties(
"handles_multiclass": True,
"handles_multilabel": True,
"handles_multioutput": True,
"is_deterministic": True, # Assumption for now
# TODO find out of this is right!
"handles_sparse": True,
"handles_dense": True,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
find_components,
)

classifier_directory = os.path.split(__file__)[0]
_preprocessors = find_components(
__package__, classifier_directory, AutoSklearnPreprocessingAlgorithm
feature_preprocessing_directory = os.path.split(__file__)[0]
_feature_preprocessors = find_components(
__package__, feature_preprocessing_directory, AutoSklearnPreprocessingAlgorithm
)
additional_components = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm)
_addons["feature_preprocessing"] = additional_components
Expand All @@ -30,7 +30,7 @@ class FeaturePreprocessorChoice(AutoSklearnChoice):
@classmethod
def get_components(cls):
components = OrderedDict()
components.update(_preprocessors)
components.update(_feature_preprocessors)
components.update(additional_components.components)
return components

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def get_properties(dataset_properties=None):
"handles_multiclass": True,
"handles_multilabel": False,
"handles_multioutput": False,
"is_deterministic": False,
"input": (SPARSE, DENSE, UNSIGNED_DATA),
"output": (INPUT,),
}
Expand Down
1 change: 0 additions & 1 deletion autosklearn/pipeline/components/regression/sgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ def get_properties(dataset_properties=None):
"handles_multilabel": False,
"handles_multioutput": False,
"is_deterministic": True,
"handles_sparse": True,
KEggensperger marked this conversation as resolved.
Show resolved Hide resolved
"input": (DENSE, SPARSE, UNSIGNED_DATA),
"output": (PREDICTIONS,),
}
Expand Down