adapt time series to new pipelines

dengdifan · dengdifan · commit c9f1ca7535f4 · 2022-08-19T16:54:37.000+02:00
diff --git a/autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/TimeSeriesTransformer.py b/autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/TimeSeriesTransformer.py
@@ -12,7 +12,7 @@
     autoPyTorchTimeSeriesPreprocessingComponent,
     autoPyTorchTimeSeriesTargetPreprocessingComponent)
 from autoPyTorch.pipeline.components.preprocessing.time_series_preprocessing.utils import (
-    get_time_series_preprocessers, get_time_series_target_preprocessers)
+    get_time_series_preprocessors, get_time_series_target_preprocessers)
 from autoPyTorch.utils.common import FitRequirement
 
 
@@ -38,18 +38,25 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
         """
         self.check_requirements(X, y)
 
-        preprocessors = get_time_series_preprocessers(X)
+        preprocessors = get_time_series_preprocessors(X)
         column_transformers: List[Tuple[str, BaseEstimator, List[int]]] = []
+
+        numerical_pipeline = 'passthrough'
+        encode_pipeline = 'passthrough'
+
         if len(preprocessors['numerical']) > 0:
             numerical_pipeline = make_pipeline(*preprocessors['numerical'])
-            column_transformers.append(
-                ('numerical_pipeline', numerical_pipeline, X['dataset_properties']['numerical_columns'])
-            )
-        if len(preprocessors['categorical']) > 0:
-            categorical_pipeline = make_pipeline(*preprocessors['categorical'])
-            column_transformers.append(
-                ('categorical_pipeline', categorical_pipeline, X['dataset_properties']['categorical_columns'])
-            )
+
+        column_transformers.append(
+            ('numerical_pipeline', numerical_pipeline, X['dataset_properties']['numerical_columns'])
+        )
+
+        if len(preprocessors['encode']) > 0:
+            encode_pipeline = make_pipeline(*preprocessors['encode'])
+
+        column_transformers.append(
+            ('encode_pipeline', encode_pipeline, X['encode_columns'])
+        )
 
         # in case the preprocessing steps are disabled
         # i.e, NoEncoder for categorical, we want to
@@ -86,7 +93,6 @@ def __call__(self, X: pd.DataFrame) -> pd.DataFrame:
         if self.preprocessor is None:
             raise ValueError("cant call {} without fitting the column transformer first."
                              .format(self.__class__.__name__))
-
         return self.preprocessor.transform(X)
 
     def get_column_transformer(self) -> ColumnTransformer:
diff --git a/autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/base_time_series_preprocessing.py b/autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/base_time_series_preprocessing.py
@@ -2,25 +2,17 @@
 
 from sklearn.base import BaseEstimator
 
-from autoPyTorch.pipeline.components.preprocessing.base_preprocessing import (
-    autoPyTorchPreprocessingComponent, autoPyTorchTargetPreprocessingComponent)
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
+    autoPyTorchTabularPreprocessingComponent
+)
+from autoPyTorch.pipeline.components.preprocessing.base_preprocessing import autoPyTorchTargetPreprocessingComponent
 
 
-class autoPyTorchTimeSeriesPreprocessingComponent(autoPyTorchPreprocessingComponent):
+class autoPyTorchTimeSeriesPreprocessingComponent(autoPyTorchTabularPreprocessingComponent):
     """
      Provides abstract interface for time series preprocessing algorithms in AutoPyTorch.
     """
 
-    def __init__(self) -> None:
-        super().__init__()
-        self.preprocessor: Union[Dict[str, Optional[BaseEstimator]], BaseEstimator] = dict(
-            numerical=None, categorical=None)
-
-    def __str__(self) -> str:
-        """ Allow a nice understanding of what components where used """
-        string = self.__class__.__name__
-        return string
-
 
 class autoPyTorchTimeSeriesTargetPreprocessingComponent(autoPyTorchTargetPreprocessingComponent):
     """
diff --git a/autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/encoding/time_series_base_encoder.py b/autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/encoding/time_series_base_encoder.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List
 
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder import \
     BaseEncoder
@@ -7,7 +7,7 @@
 from autoPyTorch.utils.common import FitRequirement
 
 
-class TimeSeriesBaseEncoder(autoPyTorchTimeSeriesPreprocessingComponent):
+class TimeSeriesBaseEncoder(autoPyTorchTimeSeriesPreprocessingComponent, BaseEncoder):
     """
     Base class for encoder
     """
diff --git a/autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/utils.py b/autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/utils.py
@@ -2,32 +2,14 @@
 
 from sklearn.base import BaseEstimator
 
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.utils import get_tabular_preprocessers
 
-def get_time_series_preprocessers(X: Dict[str, Any]) -> Dict[str, List[BaseEstimator]]:
-    """
-    Expects fit_dictionary(X) to have numerical/categorical preprocessors
-    (fitted numerical/categorical preprocessing nodes) that will build a pipeline in the TimeSeriesTransformer.
-    This function parses X and extracts such components.
-    Creates a dictionary with two keys,
-    numerical- containing list of numerical preprocessors
-    categorical- containing list of categorical preprocessors
-
-    Args:
-        X: fit dictionary
 
-    Returns:
-        (Dict[str, List[BaseEstimator]]): dictionary with list of numerical and categorical preprocessors
+def get_time_series_preprocessors(X: Dict[str, Any]) -> Dict[str, List[BaseEstimator]]:
     """
-    preprocessor = dict(numerical=list(), categorical=list())  # type: Dict[str, List[BaseEstimator]]
-    for key, value in X.items():
-        if isinstance(value, dict):
-            # as each preprocessor is child of BaseEstimator
-            if 'numerical' in value and isinstance(value['numerical'], BaseEstimator):
-                preprocessor['numerical'].append(value['numerical'])
-            if 'categorical' in value and isinstance(value['categorical'], BaseEstimator):
-                preprocessor['categorical'].append(value['categorical'])
-
-    return preprocessor
+    This function simply rename tabular preprocessor to time series preprocessor.
+    """
+    return get_tabular_preprocessers(X)
 
 
 def get_time_series_target_preprocessers(X: Dict[str, Any]) -> Dict[str, List[BaseEstimator]]:
diff --git a/autoPyTorch/pipeline/components/setup/early_preprocessor/TimeSeriesEarlyPreProcessing.py b/autoPyTorch/pipeline/components/setup/early_preprocessor/TimeSeriesEarlyPreProcessing.py
@@ -54,6 +54,10 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         feature_names = X['dataset_properties']['feature_names']
         numerical_columns = X['dataset_properties']['numerical_columns']
         categorical_columns = X['dataset_properties']['categorical_columns']
+        # encoding_columns = X['dataset_properties']['encoding_columns']
+        encode_columns = X['encode_columns']
+        import pdb
+        pdb.set_trace()
 
         # resort feature_names
         # Previously, the categorical features are sorted before numerical features. However,