TODO: fix errors after rebase

ravinkohli · ravinkohli · commit d58dd9daf33c · 2022-08-17T15:02:09.000+02:00
diff --git a/autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py b/autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py
@@ -10,7 +10,7 @@
 
 from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
 from autoPyTorch.pipeline.components.setup.base_setup import autoPyTorchSetupComponent
-from autoPyTorch.pipeline.components.setup.early_preprocessor.utils import get_preprocess_transforms, preprocess
+from autoPyTorch.pipeline.components.setup.early_preprocessor.utils import get_preprocess_transforms, get_preprocessed_dtype, preprocess
 from autoPyTorch.utils.common import FitRequirement
 
 
@@ -39,11 +39,13 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
 
         X['X_train'] = preprocess(dataset=X_train, transforms=transforms)
 
+        preprocessed_dtype = get_preprocessed_dtype(X['X_train'])
+
         # We need to also save the preprocess transforms for inference
         X.update({
                  'preprocess_transforms': transforms,
                  'shape_after_preprocessing': X['X_train'].shape[1:],
-                 'preprocessed_dtype': X['X_train'].dtype.name
+                 'preprocessed_dtype': preprocessed_dtype
                  })
         return X
 
diff --git a/autoPyTorch/pipeline/components/setup/early_preprocessor/TimeSeriesEarlyPreProcessing.py b/autoPyTorch/pipeline/components/setup/early_preprocessor/TimeSeriesEarlyPreProcessing.py
@@ -10,7 +10,7 @@
 from autoPyTorch.pipeline.components.setup.early_preprocessor.EarlyPreprocessing import \
     EarlyPreprocessing
 from autoPyTorch.pipeline.components.setup.early_preprocessor.utils import (
-    get_preprocess_transforms, time_series_preprocess)
+    get_preprocess_transforms, get_preprocessed_dtype, time_series_preprocess)
 from autoPyTorch.utils.common import FitRequirement
 
 
@@ -62,11 +62,12 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
             new_feature_names += list(set(feature_names) - set(new_feature_names))
         X['dataset_properties']['feature_names'] = tuple(new_feature_names)
 
+        preprocessed_dtype = get_preprocessed_dtype(X['X_train'])
         # We need to also save the preprocess transforms for inference
         X.update({
             'preprocess_transforms': transforms,
             'shape_after_preprocessing': X['X_train'].shape[1:],
-            'preprocessed_dtype': X['X_train'].dtype.name
+            'preprocessed_dtype': preprocessed_dtype
             })
         return X
 
diff --git a/autoPyTorch/pipeline/components/setup/early_preprocessor/utils.py b/autoPyTorch/pipeline/components/setup/early_preprocessor/utils.py
@@ -13,6 +13,7 @@
     autoPyTorchPreprocessingComponent as aPTPre,
     autoPyTorchTargetPreprocessingComponent as aPTTPre
 )
+from .....utils.common import ispandas
 
 
 def get_preprocess_transforms(X: Dict[str, Any],
@@ -71,3 +72,10 @@ def time_series_preprocess(dataset: pd.DataFrame, transforms: torchvision.transf
         sub_dataset = composite_transforms(sub_dataset)
         dataset.iloc[:, indices] = sub_dataset
     return dataset
+
+
+def get_preprocessed_dtype(X_train: Union[np.ndarray, pd.DataFrame]):
+    if ispandas(X_train):
+        return X_train.dtypes[X_train.columns].name
+    else:
+        return X_train.dtype.name
diff --git a/autoPyTorch/pipeline/components/training/trainer/__init__.py b/autoPyTorch/pipeline/components/training/trainer/__init__.py
@@ -453,7 +453,7 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoic
             if preprocessed_dtype is None:
                 use_double = True
             else:
-                use_double = 'float64' in preprocessed_dtype
+                use_double = 'float64' in preprocessed_dtype or 'int64' in preprocessed_dtype
 
             # update batch norm statistics
             swa_model = self.choice.swa_model.double() if use_double else self.choice.swa_model
diff --git a/test/test_pipeline/test_time_series_forecasting_pipeline.py b/test/test_pipeline/test_time_series_forecasting_pipeline.py
@@ -46,7 +46,7 @@ class TestTimeSeriesForecastingPipeline:
                                                             "multi_variant_only_num"], indirect=True)
     def test_fit_predict(self, fit_dictionary_forecasting, forecasting_budgets):
         dataset_properties = fit_dictionary_forecasting['dataset_properties']
-        if not dataset_properties['uni_variant'] and len(dataset_properties['categories']) > 0:
+        if not dataset_properties['uni_variant'] and len(dataset_properties['num_categories_per_col']) > 0:
             include = {'network_embedding': ['LearnedEntityEmbedding']}
         else:
             include = None