From 4e89c19d3001c7a954c0045e122ac49d54db4ba1 Mon Sep 17 00:00:00 2001 From: Anna Banaszak <48625325+ankke@users.noreply.github.com> Date: Sun, 7 May 2023 16:18:48 +0200 Subject: [PATCH] [model] Add support for more darts models (#48) * Add quantification notebook * Add support for more darts regression models * Revert "Add quantification notebook" This reverts commit 348deb278032aeada2a38c88c9833ba3d7613c66. * fix docstring * rename notebook * Add Darts Forecasting Model * Rename models_simple to models_darts * fix isort * Remove classes from experiment name --- tests/test_models.py | 60 +++++++- tests/test_plotting.py | 6 +- tests/test_processing_panel_data.py | 5 +- tot/experiment.py | 9 +- tot/models/{__initi__.py => __init__.py} | 4 +- tot/models/models.py | 1 - .../{models_simple.py => models_darts.py} | 134 ++++++++++++++---- tot/models/models_naive.py | 4 - tot/models/models_neuralprophet.py | 2 - tot/models/models_prophet.py | 1 - 10 files changed, 183 insertions(+), 43 deletions(-) rename tot/models/{__initi__.py => __init__.py} (57%) rename tot/models/{models_simple.py => models_darts.py} (60%) diff --git a/tests/test_models.py b/tests/test_models.py index 6acb581..0f5db27 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -5,14 +5,15 @@ import pandas as pd import pytest +from darts.models import NaiveDrift from tot.benchmark import SimpleBenchmark from tot.datasets.dataset import Dataset from tot.evaluation.metrics import ERROR_FUNCTIONS +from tot.models.models_darts import DartsForecastingModel, LinearRegressionModel, RandomForestModel from tot.models.models_naive import NaiveModel, SeasonalNaiveModel from tot.models.models_neuralprophet import NeuralProphetModel, TorchProphetModel from tot.models.models_prophet import ProphetModel -from tot.models.models_simple import LinearRegressionModel log = logging.getLogger("tot.test") log.setLevel("WARNING") @@ -27,7 +28,6 @@ if not os.path.isdir(SAVE_DIR): os.makedirs(SAVE_DIR) - NROWS = 128 EPOCHS = 2 BATCH_SIZE = 64 @@ -270,6 +270,62 @@ def test_linear_regression_model(): print(results_test) +def test_random_forest_model(): + air_passengers_df = pd.read_csv(AIR_FILE, nrows=NROWS) + peyton_manning_df = pd.read_csv(PEYTON_FILE, nrows=NROWS) + dataset_list = [ + Dataset(df=air_passengers_df, name="air_passengers", freq="MS"), + Dataset(df=peyton_manning_df, name="peyton_manning", freq="D"), + ] + model_classes_and_params = [ + ( + RandomForestModel, + {"lags": 12, "output_chunk_length": 1, "n_forecasts": 4}, + ), + ] + log.debug("{}".format(model_classes_and_params)) + + benchmark = SimpleBenchmark( + model_classes_and_params=model_classes_and_params, + datasets=dataset_list, + metrics=list(ERROR_FUNCTIONS.keys()), + test_percentage=0.25, + save_dir=SAVE_DIR, + num_processes=1, + ) + results_train, results_test = benchmark.run() + log.info("#### test_random_forest_model") + print(results_test) + + +def test_darts_model(): + air_passengers_df = pd.read_csv(AIR_FILE, nrows=NROWS) + peyton_manning_df = pd.read_csv(PEYTON_FILE, nrows=NROWS) + dataset_list = [ + Dataset(df=air_passengers_df, name="air_passengers", freq="MS"), + Dataset(df=peyton_manning_df, name="peyton_manning", freq="D"), + ] + model_classes_and_params = [ + ( + DartsForecastingModel, + {"model": NaiveDrift, "retrain": True, "lags": 12, "n_forecasts": 4}, + ), + ] + log.debug("{}".format(model_classes_and_params)) + + benchmark = SimpleBenchmark( + model_classes_and_params=model_classes_and_params, + datasets=dataset_list, + metrics=list(ERROR_FUNCTIONS.keys()), + test_percentage=0.25, + save_dir=SAVE_DIR, + num_processes=1, + ) + results_train, results_test = benchmark.run() + log.info("#### test_darts_model") + print(results_test) + + def test_torch_prophet_model(): air_passengers_df = pd.read_csv(AIR_FILE, nrows=NROWS) peyton_manning_df = pd.read_csv(PEYTON_FILE, nrows=NROWS) diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 11f3465..36af19c 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -5,14 +5,15 @@ import pandas as pd import pytest +from darts.models import NaiveDrift from tot.benchmark import SimpleBenchmark from tot.datasets.dataset import Dataset from tot.evaluation.metrics import ERROR_FUNCTIONS +from tot.models.models_darts import DartsForecastingModel, LinearRegressionModel, RandomForestModel from tot.models.models_naive import NaiveModel, SeasonalNaiveModel from tot.models.models_neuralprophet import NeuralProphetModel from tot.models.models_prophet import ProphetModel -from tot.models.models_simple import LinearRegressionModel from tot.plotting import plot_plotly log = logging.getLogger("tot.test") @@ -28,7 +29,6 @@ if not os.path.isdir(SAVE_DIR): os.makedirs(SAVE_DIR) - NROWS = 128 EPOCHS = 2 BATCH_SIZE = 64 @@ -53,6 +53,8 @@ def test_basic_plot(plotting_backend): (SeasonalNaiveModel, {"n_forecasts": 4, "season_length": 12}), (ProphetModel, {}), (LinearRegressionModel, {"lags": 12, "output_chunk_length": 1, "n_forecasts": 4}), + (RandomForestModel, {"lags": 24, "output_chunk_length": 8, "n_forecasts": 8}), + (DartsForecastingModel, {"model": NaiveDrift, "retrain": True, "lags": 12, "n_forecasts": 4}), ] benchmark = SimpleBenchmark( diff --git a/tests/test_processing_panel_data.py b/tests/test_processing_panel_data.py index 65b12a3..a85584c 100644 --- a/tests/test_processing_panel_data.py +++ b/tests/test_processing_panel_data.py @@ -6,13 +6,14 @@ import pandas as pd import pytest +from darts.models import NaiveDrift from tot.benchmark import CrossValidationBenchmark, ManualCVBenchmark, SimpleBenchmark from tot.datasets.dataset import Dataset from tot.experiment import CrossValidationExperiment +from tot.models.models_darts import DartsForecastingModel, LinearRegressionModel, RandomForestModel from tot.models.models_naive import NaiveModel, SeasonalNaiveModel from tot.models.models_neuralprophet import NeuralProphetModel, TorchProphetModel -from tot.models.models_simple import LinearRegressionModel log = logging.getLogger("tot.test") log.setLevel("WARNING") @@ -85,6 +86,8 @@ def test_benchmark_panel_data_input(): (LinearRegressionModel, {"lags": 24, "output_chunk_length": 8, "n_forecasts": 8}), (NaiveModel, {"n_forecasts": 8}), (SeasonalNaiveModel, {"n_forecasts": 8, "season_length": 24}), + (RandomForestModel, {"lags": 24, "output_chunk_length": 8, "n_forecasts": 8}), + (DartsForecastingModel, {"model": NaiveDrift, "retrain": True, "lags": 12, "n_forecasts": 4}), ] log.debug("{}".format(model_classes_and_params)) diff --git a/tot/experiment.py b/tot/experiment.py index dee4b14..b02194c 100644 --- a/tot/experiment.py +++ b/tot/experiment.py @@ -52,11 +52,14 @@ def __post_init__(self): if hasattr(self.data, "freq") and self.data.freq is not None: data_params["freq"] = self.data.freq self.params.update({"_data_params": data_params}) + model_name = self.params.get("model", self.model_class).__name__ + params_repr = self.params.copy() + params_repr.pop("model", None) if not hasattr(self, "experiment_name") or self.experiment_name is None: self.experiment_name = "{}_{}{}".format( self.data.name, - self.model_class.model_name, - r"".join([r"_{0}_{1}".format(k, v) for k, v in self.params.items()]) + model_name, + r"".join([r"_{0}_{1}".format(k, v) for k, v in params_repr.items()]) .replace("'", "") .replace(":", "_") .replace("{", "_") @@ -67,7 +70,7 @@ def __post_init__(self): if not hasattr(self, "metadata") or self.metadata is None: self.metadata = { "data": self.data.name, - "model": self.model_class.model_name, + "model": model_name, "params": str(self.params), "experiment": self.experiment_name, } diff --git a/tot/models/__initi__.py b/tot/models/__init__.py similarity index 57% rename from tot/models/__initi__.py rename to tot/models/__init__.py index e46a13f..b593b24 100644 --- a/tot/models/__initi__.py +++ b/tot/models/__init__.py @@ -1,5 +1,7 @@ # make classes available upon package import +from .models_darts import DartsForecastingModel # noqa: F401 to evade flake8 +from .models_darts import LinearRegressionModel # noqa: F401 to evade flake8 +from .models_darts import RandomForestModel # noqa: F401 to evade flake8 from .models_naive import NaiveModel, SeasonalNaiveModel # noqa: F401 to evade flake8 from .models_neuralprophet import NeuralProphetModel, TorchProphetModel # noqa: F401 to evade flake8 from .models_prophet import ProphetModel # noqa: F401 to evade flake8 -from .models_simple import LinearRegressionModel # noqa: F401 to evade flake8 diff --git a/tot/models/models.py b/tot/models/models.py index e49b24e..9d0f2d0 100644 --- a/tot/models/models.py +++ b/tot/models/models.py @@ -21,7 +21,6 @@ class Model(ABC): """ params: dict - model_name: str @abstractmethod def fit(self, df: pd.DataFrame, freq: str): diff --git a/tot/models/models_simple.py b/tot/models/models_darts.py similarity index 60% rename from tot/models/models_simple.py rename to tot/models/models_darts.py index 3d01271..7196545 100644 --- a/tot/models/models_simple.py +++ b/tot/models/models_darts.py @@ -13,11 +13,13 @@ # check import of implemented models and consider order of imports try: + from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import LinearRegression _sklearn_installed = True except ImportError: LinearRegression = None + RandomForestRegressor = None _sklearn_installed = False raise ImportError( "The LinearRegression model could not be imported." @@ -38,16 +40,16 @@ @dataclass -class LinearRegressionModel(Model): +class DartsForecastingModel(Model): """ - A forecasting model using a linear regression of the target series' lags to obtain a forecast. + A forecasting model using a model from the darts library. Examples -------- >>> model_classes_and_params = [ >>> ( - >>> LinearRegressionModel, - >>> {"lags": 12, "n_forecasts": 4}, + >>> DartsForecastingModel, + >>> {"model": NaiveDrift, "retrain": True, "lags": 12, "n_forecasts": 4}, >>> ), >>> ] >>> @@ -61,32 +63,26 @@ class LinearRegressionModel(Model): >>> ) """ - model_name: str = "LinearRegressionModel" - model_class: Type = RegressionModel + retrain: bool = False def __post_init__(self): # check if installed - if not (_darts_installed or _sklearn_installed): + if not _darts_installed: raise RuntimeError( - "Requires darts and sklearn to be installed:" - "https://scikit-learn.org/stable/install.html" - "https://github.com/unit8co/darts/blob/master/INSTALL.md" + "Requires darts to be installed:" "https://github.com/unit8co/darts/blob/master/INSTALL.md" ) - + self.n_forecasts = self.params["n_forecasts"] + self.n_lags = self.params["lags"] + self.retrain = self.params.get("retrain", False) model_params = deepcopy(self.params) model_params.pop("_data_params") - # n_forecasts is not a parameter of the model model_params.pop("n_forecasts") - # overwrite output_chunk_length with n_forecasts - model_params.update({"output_chunk_length": self.params["n_forecasts"]}) - model = LinearRegression(n_jobs=-1) # n_jobs=-1 indicates to use all processors - model_params.update({"model": model}) # assign model - self.model = self.model_class(**model_params) - self.n_forecasts = self.params["n_forecasts"] - self.n_lags = model_params["lags"] - # input checks are provided by model itself + model_params.pop("lags") + model_params.pop("retrain", None) + model = model_params.pop("model") + self.model = model(**model_params) - def fit(self, df: pd.DataFrame, freq: str): + def fit(self, df: pd.DataFrame, freq: str) -> None: """Fits the regression model. Parameters @@ -99,9 +95,11 @@ def fit(self, df: pd.DataFrame, freq: str): _check_min_df_len(df=df, min_len=self.n_forecasts + self.n_lags) self.freq = freq series = convert_df_to_TimeSeries(df, freq=self.freq) - self.model = self.model.fit(series) + self.model.fit(series) - def predict(self, df: pd.DataFrame, received_single_time_series, df_historic: pd.DataFrame = None): + def predict( + self, df: pd.DataFrame, received_single_time_series: bool, df_historic: pd.DataFrame = None + ) -> pd.DataFrame: """Runs the model to make predictions. Expects all data to be present in dataframe. @@ -130,14 +128,14 @@ def predict(self, df: pd.DataFrame, received_single_time_series, df_historic: pd model=self, past_observations_per_prediction=self.n_lags, future_observations_per_prediction=self.n_forecasts, - retrain=False, + retrain=self.retrain, received_single_time_series=received_single_time_series, ) if df_historic is not None: fcst = self.maybe_drop_added_values_from_df(fcst, df) return fcst - def maybe_extend_df(self, df_train, df_test): + def maybe_extend_df(self, df_train: pd.DataFrame, df_test: pd.DataFrame) -> pd.DataFrame: """ If model depends on historic values, extend beginning of df_test with last df_train values. @@ -146,9 +144,93 @@ def maybe_extend_df(self, df_train, df_test): return df_test - def maybe_drop_added_values_from_df(self, predicted, df): + def maybe_drop_added_values_from_df(self, predicted: pd.DataFrame, df: pd.DataFrame) -> pd.DataFrame: """ If model depends on historic values, drop first values of predicted and df_test. """ predicted = drop_first_inputs_from_df(samples=self.n_lags, predicted=predicted, df=df) return predicted + + +class DartsRegressionModel(DartsForecastingModel): + """ + A forecasting model using a regression model from the darts library. + """ + + model_class: Type = RegressionModel + regression_class: Type + + def __post_init__(self): + # check if installed + if not (_darts_installed or _sklearn_installed): + raise RuntimeError( + "Requires darts and sklearn to be installed:" + "https://scikit-learn.org/stable/install.html" + "https://github.com/unit8co/darts/blob/master/INSTALL.md" + ) + params = deepcopy(self.params) + params.pop("_data_params") + # n_forecasts is not a parameter of the model + params.pop("n_forecasts") + # overwrite output_chunk_length with n_forecasts + params.update({"output_chunk_length": self.params["n_forecasts"]}) + model = self.regression_class(n_jobs=-1) # n_jobs=-1 indicates to use all processors + params.update({"model": model}) # assign model + self.model = self.model_class(**params) + self.n_forecasts = self.params["n_forecasts"] + self.n_lags = params["lags"] + # input checks are provided by model itself + + +@dataclass +class LinearRegressionModel(DartsRegressionModel): + """ + A forecasting model using a linear regression of the target series' lags to obtain a forecast. + + Examples + -------- + >>> model_classes_and_params = [ + >>> ( + >>> LinearRegressionModel, + >>> {"lags": 12, "n_forecasts": 4}, + >>> ), + >>> ] + >>> + >>> benchmark = SimpleBenchmark( + >>> model_classes_and_params=model_classes_and_params, + >>> datasets=dataset_list, + >>> metrics=list(ERROR_FUNCTIONS.keys()), + >>> test_percentage=25, + >>> save_dir=SAVE_DIR, + >>> num_processes=1, + >>> ) + """ + + regression_class: Type = LinearRegression + + +@dataclass +class RandomForestModel(DartsRegressionModel): + """ + A forecasting model using a random forest to obtain a forecast. + + Examples + -------- + >>> model_classes_and_params = [ + >>> ( + >>> RandomForestModel, + >>> {"lags": 12, "n_forecasts": 4}, + >>> ), + >>> ] + >>> + >>> benchmark = SimpleBenchmark( + >>> model_classes_and_params=model_classes_and_params, + >>> datasets=dataset_list, + >>> metrics=list(ERROR_FUNCTIONS.keys()), + >>> test_percentage=25, + >>> save_dir=SAVE_DIR, + >>> num_processes=1, + >>> ) + """ + + regression_class: Type = RandomForestRegressor diff --git a/tot/models/models_naive.py b/tot/models/models_naive.py index ee0c8f8..4f34b76 100644 --- a/tot/models/models_naive.py +++ b/tot/models/models_naive.py @@ -26,8 +26,6 @@ class SeasonalNaiveModel(Model): number of steps ahead of prediction time step to forecast """ - model_name: str = "SeasonalNaive" - def __post_init__(self): # no installation checks required @@ -144,8 +142,6 @@ class NaiveModel(SeasonalNaiveModel): If Model parameter n_forecasts is less than 1. """ - model_name: str = "NaiveModel" - def __post_init__(self): # no installation checks required diff --git a/tot/models/models_neuralprophet.py b/tot/models/models_neuralprophet.py index 788c304..4eb7e5b 100644 --- a/tot/models/models_neuralprophet.py +++ b/tot/models/models_neuralprophet.py @@ -16,7 +16,6 @@ @dataclass class NeuralProphetModel(Model): - model_name: str = "NeuralProphet" model_class: Type = NeuralProphet def __post_init__(self): @@ -157,7 +156,6 @@ def maybe_drop_added_values_from_df(self, predicted, df): @dataclass class TorchProphetModel(NeuralProphetModel): - model_name: str = "TorchProphet" model_class: Type = TorchProphet def __post_init__(self): diff --git a/tot/models/models_prophet.py b/tot/models/models_prophet.py index 1354840..1828ad0 100644 --- a/tot/models/models_prophet.py +++ b/tot/models/models_prophet.py @@ -28,7 +28,6 @@ @dataclass class ProphetModel(Model): - model_name: str = "Prophet" model_class: Type = Prophet def __post_init__(self):