Skip to content

Commit c9f1ca7

Browse files
committed
adapt time series to new pipelines
1 parent c2ffc02 commit c9f1ca7

File tree

5 files changed

+33
-49
lines changed

5 files changed

+33
-49
lines changed

autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/TimeSeriesTransformer.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
autoPyTorchTimeSeriesPreprocessingComponent,
1313
autoPyTorchTimeSeriesTargetPreprocessingComponent)
1414
from autoPyTorch.pipeline.components.preprocessing.time_series_preprocessing.utils import (
15-
get_time_series_preprocessers, get_time_series_target_preprocessers)
15+
get_time_series_preprocessors, get_time_series_target_preprocessers)
1616
from autoPyTorch.utils.common import FitRequirement
1717

1818

@@ -38,18 +38,25 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
3838
"""
3939
self.check_requirements(X, y)
4040

41-
preprocessors = get_time_series_preprocessers(X)
41+
preprocessors = get_time_series_preprocessors(X)
4242
column_transformers: List[Tuple[str, BaseEstimator, List[int]]] = []
43+
44+
numerical_pipeline = 'passthrough'
45+
encode_pipeline = 'passthrough'
46+
4347
if len(preprocessors['numerical']) > 0:
4448
numerical_pipeline = make_pipeline(*preprocessors['numerical'])
45-
column_transformers.append(
46-
('numerical_pipeline', numerical_pipeline, X['dataset_properties']['numerical_columns'])
47-
)
48-
if len(preprocessors['categorical']) > 0:
49-
categorical_pipeline = make_pipeline(*preprocessors['categorical'])
50-
column_transformers.append(
51-
('categorical_pipeline', categorical_pipeline, X['dataset_properties']['categorical_columns'])
52-
)
49+
50+
column_transformers.append(
51+
('numerical_pipeline', numerical_pipeline, X['dataset_properties']['numerical_columns'])
52+
)
53+
54+
if len(preprocessors['encode']) > 0:
55+
encode_pipeline = make_pipeline(*preprocessors['encode'])
56+
57+
column_transformers.append(
58+
('encode_pipeline', encode_pipeline, X['encode_columns'])
59+
)
5360

5461
# in case the preprocessing steps are disabled
5562
# i.e, NoEncoder for categorical, we want to
@@ -86,7 +93,6 @@ def __call__(self, X: pd.DataFrame) -> pd.DataFrame:
8693
if self.preprocessor is None:
8794
raise ValueError("cant call {} without fitting the column transformer first."
8895
.format(self.__class__.__name__))
89-
9096
return self.preprocessor.transform(X)
9197

9298
def get_column_transformer(self) -> ColumnTransformer:

autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/base_time_series_preprocessing.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,25 +2,17 @@
22

33
from sklearn.base import BaseEstimator
44

5-
from autoPyTorch.pipeline.components.preprocessing.base_preprocessing import (
6-
autoPyTorchPreprocessingComponent, autoPyTorchTargetPreprocessingComponent)
5+
from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
6+
autoPyTorchTabularPreprocessingComponent
7+
)
8+
from autoPyTorch.pipeline.components.preprocessing.base_preprocessing import autoPyTorchTargetPreprocessingComponent
79

810

9-
class autoPyTorchTimeSeriesPreprocessingComponent(autoPyTorchPreprocessingComponent):
11+
class autoPyTorchTimeSeriesPreprocessingComponent(autoPyTorchTabularPreprocessingComponent):
1012
"""
1113
Provides abstract interface for time series preprocessing algorithms in AutoPyTorch.
1214
"""
1315

14-
def __init__(self) -> None:
15-
super().__init__()
16-
self.preprocessor: Union[Dict[str, Optional[BaseEstimator]], BaseEstimator] = dict(
17-
numerical=None, categorical=None)
18-
19-
def __str__(self) -> str:
20-
""" Allow a nice understanding of what components where used """
21-
string = self.__class__.__name__
22-
return string
23-
2416

2517
class autoPyTorchTimeSeriesTargetPreprocessingComponent(autoPyTorchTargetPreprocessingComponent):
2618
"""

autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/encoding/time_series_base_encoder.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any, Dict, List, Union
1+
from typing import Any, Dict, List
22

33
from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder import \
44
BaseEncoder
@@ -7,7 +7,7 @@
77
from autoPyTorch.utils.common import FitRequirement
88

99

10-
class TimeSeriesBaseEncoder(autoPyTorchTimeSeriesPreprocessingComponent):
10+
class TimeSeriesBaseEncoder(autoPyTorchTimeSeriesPreprocessingComponent, BaseEncoder):
1111
"""
1212
Base class for encoder
1313
"""

autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/utils.py

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,32 +2,14 @@
22

33
from sklearn.base import BaseEstimator
44

5+
from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.utils import get_tabular_preprocessers
56

6-
def get_time_series_preprocessers(X: Dict[str, Any]) -> Dict[str, List[BaseEstimator]]:
7-
"""
8-
Expects fit_dictionary(X) to have numerical/categorical preprocessors
9-
(fitted numerical/categorical preprocessing nodes) that will build a pipeline in the TimeSeriesTransformer.
10-
This function parses X and extracts such components.
11-
Creates a dictionary with two keys,
12-
numerical- containing list of numerical preprocessors
13-
categorical- containing list of categorical preprocessors
14-
15-
Args:
16-
X: fit dictionary
177

18-
Returns:
19-
(Dict[str, List[BaseEstimator]]): dictionary with list of numerical and categorical preprocessors
8+
def get_time_series_preprocessors(X: Dict[str, Any]) -> Dict[str, List[BaseEstimator]]:
209
"""
21-
preprocessor = dict(numerical=list(), categorical=list()) # type: Dict[str, List[BaseEstimator]]
22-
for key, value in X.items():
23-
if isinstance(value, dict):
24-
# as each preprocessor is child of BaseEstimator
25-
if 'numerical' in value and isinstance(value['numerical'], BaseEstimator):
26-
preprocessor['numerical'].append(value['numerical'])
27-
if 'categorical' in value and isinstance(value['categorical'], BaseEstimator):
28-
preprocessor['categorical'].append(value['categorical'])
29-
30-
return preprocessor
10+
This function simply rename tabular preprocessor to time series preprocessor.
11+
"""
12+
return get_tabular_preprocessers(X)
3113

3214

3315
def get_time_series_target_preprocessers(X: Dict[str, Any]) -> Dict[str, List[BaseEstimator]]:

autoPyTorch/pipeline/components/setup/early_preprocessor/TimeSeriesEarlyPreProcessing.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
5454
feature_names = X['dataset_properties']['feature_names']
5555
numerical_columns = X['dataset_properties']['numerical_columns']
5656
categorical_columns = X['dataset_properties']['categorical_columns']
57+
# encoding_columns = X['dataset_properties']['encoding_columns']
58+
encode_columns = X['encode_columns']
59+
import pdb
60+
pdb.set_trace()
5761

5862
# resort feature_names
5963
# Previously, the categorical features are sorted before numerical features. However,

0 commit comments

Comments
 (0)