Skip to content

Commit

Permalink
[model] Add support for more darts models (#48)
Browse files Browse the repository at this point in the history
* Add quantification notebook

* Add support for more darts regression models

* Revert "Add quantification notebook"

This reverts commit 348deb2.

* fix docstring

* rename notebook

* Add Darts Forecasting Model

* Rename models_simple to models_darts

* fix isort

* Remove classes from experiment name
  • Loading branch information
ankke authored May 7, 2023
1 parent 391dc93 commit 4e89c19
Show file tree
Hide file tree
Showing 10 changed files with 183 additions and 43 deletions.
60 changes: 58 additions & 2 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@

import pandas as pd
import pytest
from darts.models import NaiveDrift

from tot.benchmark import SimpleBenchmark
from tot.datasets.dataset import Dataset
from tot.evaluation.metrics import ERROR_FUNCTIONS
from tot.models.models_darts import DartsForecastingModel, LinearRegressionModel, RandomForestModel
from tot.models.models_naive import NaiveModel, SeasonalNaiveModel
from tot.models.models_neuralprophet import NeuralProphetModel, TorchProphetModel
from tot.models.models_prophet import ProphetModel
from tot.models.models_simple import LinearRegressionModel

log = logging.getLogger("tot.test")
log.setLevel("WARNING")
Expand All @@ -27,7 +28,6 @@
if not os.path.isdir(SAVE_DIR):
os.makedirs(SAVE_DIR)


NROWS = 128
EPOCHS = 2
BATCH_SIZE = 64
Expand Down Expand Up @@ -270,6 +270,62 @@ def test_linear_regression_model():
print(results_test)


def test_random_forest_model():
air_passengers_df = pd.read_csv(AIR_FILE, nrows=NROWS)
peyton_manning_df = pd.read_csv(PEYTON_FILE, nrows=NROWS)
dataset_list = [
Dataset(df=air_passengers_df, name="air_passengers", freq="MS"),
Dataset(df=peyton_manning_df, name="peyton_manning", freq="D"),
]
model_classes_and_params = [
(
RandomForestModel,
{"lags": 12, "output_chunk_length": 1, "n_forecasts": 4},
),
]
log.debug("{}".format(model_classes_and_params))

benchmark = SimpleBenchmark(
model_classes_and_params=model_classes_and_params,
datasets=dataset_list,
metrics=list(ERROR_FUNCTIONS.keys()),
test_percentage=0.25,
save_dir=SAVE_DIR,
num_processes=1,
)
results_train, results_test = benchmark.run()
log.info("#### test_random_forest_model")
print(results_test)


def test_darts_model():
air_passengers_df = pd.read_csv(AIR_FILE, nrows=NROWS)
peyton_manning_df = pd.read_csv(PEYTON_FILE, nrows=NROWS)
dataset_list = [
Dataset(df=air_passengers_df, name="air_passengers", freq="MS"),
Dataset(df=peyton_manning_df, name="peyton_manning", freq="D"),
]
model_classes_and_params = [
(
DartsForecastingModel,
{"model": NaiveDrift, "retrain": True, "lags": 12, "n_forecasts": 4},
),
]
log.debug("{}".format(model_classes_and_params))

benchmark = SimpleBenchmark(
model_classes_and_params=model_classes_and_params,
datasets=dataset_list,
metrics=list(ERROR_FUNCTIONS.keys()),
test_percentage=0.25,
save_dir=SAVE_DIR,
num_processes=1,
)
results_train, results_test = benchmark.run()
log.info("#### test_darts_model")
print(results_test)


def test_torch_prophet_model():
air_passengers_df = pd.read_csv(AIR_FILE, nrows=NROWS)
peyton_manning_df = pd.read_csv(PEYTON_FILE, nrows=NROWS)
Expand Down
6 changes: 4 additions & 2 deletions tests/test_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@

import pandas as pd
import pytest
from darts.models import NaiveDrift

from tot.benchmark import SimpleBenchmark
from tot.datasets.dataset import Dataset
from tot.evaluation.metrics import ERROR_FUNCTIONS
from tot.models.models_darts import DartsForecastingModel, LinearRegressionModel, RandomForestModel
from tot.models.models_naive import NaiveModel, SeasonalNaiveModel
from tot.models.models_neuralprophet import NeuralProphetModel
from tot.models.models_prophet import ProphetModel
from tot.models.models_simple import LinearRegressionModel
from tot.plotting import plot_plotly

log = logging.getLogger("tot.test")
Expand All @@ -28,7 +29,6 @@
if not os.path.isdir(SAVE_DIR):
os.makedirs(SAVE_DIR)


NROWS = 128
EPOCHS = 2
BATCH_SIZE = 64
Expand All @@ -53,6 +53,8 @@ def test_basic_plot(plotting_backend):
(SeasonalNaiveModel, {"n_forecasts": 4, "season_length": 12}),
(ProphetModel, {}),
(LinearRegressionModel, {"lags": 12, "output_chunk_length": 1, "n_forecasts": 4}),
(RandomForestModel, {"lags": 24, "output_chunk_length": 8, "n_forecasts": 8}),
(DartsForecastingModel, {"model": NaiveDrift, "retrain": True, "lags": 12, "n_forecasts": 4}),
]

benchmark = SimpleBenchmark(
Expand Down
5 changes: 4 additions & 1 deletion tests/test_processing_panel_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@

import pandas as pd
import pytest
from darts.models import NaiveDrift

from tot.benchmark import CrossValidationBenchmark, ManualCVBenchmark, SimpleBenchmark
from tot.datasets.dataset import Dataset
from tot.experiment import CrossValidationExperiment
from tot.models.models_darts import DartsForecastingModel, LinearRegressionModel, RandomForestModel
from tot.models.models_naive import NaiveModel, SeasonalNaiveModel
from tot.models.models_neuralprophet import NeuralProphetModel, TorchProphetModel
from tot.models.models_simple import LinearRegressionModel

log = logging.getLogger("tot.test")
log.setLevel("WARNING")
Expand Down Expand Up @@ -85,6 +86,8 @@ def test_benchmark_panel_data_input():
(LinearRegressionModel, {"lags": 24, "output_chunk_length": 8, "n_forecasts": 8}),
(NaiveModel, {"n_forecasts": 8}),
(SeasonalNaiveModel, {"n_forecasts": 8, "season_length": 24}),
(RandomForestModel, {"lags": 24, "output_chunk_length": 8, "n_forecasts": 8}),
(DartsForecastingModel, {"model": NaiveDrift, "retrain": True, "lags": 12, "n_forecasts": 4}),
]
log.debug("{}".format(model_classes_and_params))

Expand Down
9 changes: 6 additions & 3 deletions tot/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,14 @@ def __post_init__(self):
if hasattr(self.data, "freq") and self.data.freq is not None:
data_params["freq"] = self.data.freq
self.params.update({"_data_params": data_params})
model_name = self.params.get("model", self.model_class).__name__
params_repr = self.params.copy()
params_repr.pop("model", None)
if not hasattr(self, "experiment_name") or self.experiment_name is None:
self.experiment_name = "{}_{}{}".format(
self.data.name,
self.model_class.model_name,
r"".join([r"_{0}_{1}".format(k, v) for k, v in self.params.items()])
model_name,
r"".join([r"_{0}_{1}".format(k, v) for k, v in params_repr.items()])
.replace("'", "")
.replace(":", "_")
.replace("{", "_")
Expand All @@ -67,7 +70,7 @@ def __post_init__(self):
if not hasattr(self, "metadata") or self.metadata is None:
self.metadata = {
"data": self.data.name,
"model": self.model_class.model_name,
"model": model_name,
"params": str(self.params),
"experiment": self.experiment_name,
}
Expand Down
4 changes: 3 additions & 1 deletion tot/models/__initi__.py → tot/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# make classes available upon package import
from .models_darts import DartsForecastingModel # noqa: F401 to evade flake8
from .models_darts import LinearRegressionModel # noqa: F401 to evade flake8
from .models_darts import RandomForestModel # noqa: F401 to evade flake8
from .models_naive import NaiveModel, SeasonalNaiveModel # noqa: F401 to evade flake8
from .models_neuralprophet import NeuralProphetModel, TorchProphetModel # noqa: F401 to evade flake8
from .models_prophet import ProphetModel # noqa: F401 to evade flake8
from .models_simple import LinearRegressionModel # noqa: F401 to evade flake8
1 change: 0 additions & 1 deletion tot/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ class Model(ABC):
"""

params: dict
model_name: str

@abstractmethod
def fit(self, df: pd.DataFrame, freq: str):
Expand Down
134 changes: 108 additions & 26 deletions tot/models/models_simple.py → tot/models/models_darts.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@

# check import of implemented models and consider order of imports
try:
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression

_sklearn_installed = True
except ImportError:
LinearRegression = None
RandomForestRegressor = None
_sklearn_installed = False
raise ImportError(
"The LinearRegression model could not be imported."
Expand All @@ -38,16 +40,16 @@


@dataclass
class LinearRegressionModel(Model):
class DartsForecastingModel(Model):
"""
A forecasting model using a linear regression of the target series' lags to obtain a forecast.
A forecasting model using a model from the darts library.
Examples
--------
>>> model_classes_and_params = [
>>> (
>>> LinearRegressionModel,
>>> {"lags": 12, "n_forecasts": 4},
>>> DartsForecastingModel,
>>> {"model": NaiveDrift, "retrain": True, "lags": 12, "n_forecasts": 4},
>>> ),
>>> ]
>>>
Expand All @@ -61,32 +63,26 @@ class LinearRegressionModel(Model):
>>> )
"""

model_name: str = "LinearRegressionModel"
model_class: Type = RegressionModel
retrain: bool = False

def __post_init__(self):
# check if installed
if not (_darts_installed or _sklearn_installed):
if not _darts_installed:
raise RuntimeError(
"Requires darts and sklearn to be installed:"
"https://scikit-learn.org/stable/install.html"
"https://github.com/unit8co/darts/blob/master/INSTALL.md"
"Requires darts to be installed:" "https://github.com/unit8co/darts/blob/master/INSTALL.md"
)

self.n_forecasts = self.params["n_forecasts"]
self.n_lags = self.params["lags"]
self.retrain = self.params.get("retrain", False)
model_params = deepcopy(self.params)
model_params.pop("_data_params")
# n_forecasts is not a parameter of the model
model_params.pop("n_forecasts")
# overwrite output_chunk_length with n_forecasts
model_params.update({"output_chunk_length": self.params["n_forecasts"]})
model = LinearRegression(n_jobs=-1) # n_jobs=-1 indicates to use all processors
model_params.update({"model": model}) # assign model
self.model = self.model_class(**model_params)
self.n_forecasts = self.params["n_forecasts"]
self.n_lags = model_params["lags"]
# input checks are provided by model itself
model_params.pop("lags")
model_params.pop("retrain", None)
model = model_params.pop("model")
self.model = model(**model_params)

def fit(self, df: pd.DataFrame, freq: str):
def fit(self, df: pd.DataFrame, freq: str) -> None:
"""Fits the regression model.
Parameters
Expand All @@ -99,9 +95,11 @@ def fit(self, df: pd.DataFrame, freq: str):
_check_min_df_len(df=df, min_len=self.n_forecasts + self.n_lags)
self.freq = freq
series = convert_df_to_TimeSeries(df, freq=self.freq)
self.model = self.model.fit(series)
self.model.fit(series)

def predict(self, df: pd.DataFrame, received_single_time_series, df_historic: pd.DataFrame = None):
def predict(
self, df: pd.DataFrame, received_single_time_series: bool, df_historic: pd.DataFrame = None
) -> pd.DataFrame:
"""Runs the model to make predictions.
Expects all data to be present in dataframe.
Expand Down Expand Up @@ -130,14 +128,14 @@ def predict(self, df: pd.DataFrame, received_single_time_series, df_historic: pd
model=self,
past_observations_per_prediction=self.n_lags,
future_observations_per_prediction=self.n_forecasts,
retrain=False,
retrain=self.retrain,
received_single_time_series=received_single_time_series,
)
if df_historic is not None:
fcst = self.maybe_drop_added_values_from_df(fcst, df)
return fcst

def maybe_extend_df(self, df_train, df_test):
def maybe_extend_df(self, df_train: pd.DataFrame, df_test: pd.DataFrame) -> pd.DataFrame:
"""
If model depends on historic values, extend beginning of df_test with last
df_train values.
Expand All @@ -146,9 +144,93 @@ def maybe_extend_df(self, df_train, df_test):

return df_test

def maybe_drop_added_values_from_df(self, predicted, df):
def maybe_drop_added_values_from_df(self, predicted: pd.DataFrame, df: pd.DataFrame) -> pd.DataFrame:
"""
If model depends on historic values, drop first values of predicted and df_test.
"""
predicted = drop_first_inputs_from_df(samples=self.n_lags, predicted=predicted, df=df)
return predicted


class DartsRegressionModel(DartsForecastingModel):
"""
A forecasting model using a regression model from the darts library.
"""

model_class: Type = RegressionModel
regression_class: Type

def __post_init__(self):
# check if installed
if not (_darts_installed or _sklearn_installed):
raise RuntimeError(
"Requires darts and sklearn to be installed:"
"https://scikit-learn.org/stable/install.html"
"https://github.com/unit8co/darts/blob/master/INSTALL.md"
)
params = deepcopy(self.params)
params.pop("_data_params")
# n_forecasts is not a parameter of the model
params.pop("n_forecasts")
# overwrite output_chunk_length with n_forecasts
params.update({"output_chunk_length": self.params["n_forecasts"]})
model = self.regression_class(n_jobs=-1) # n_jobs=-1 indicates to use all processors
params.update({"model": model}) # assign model
self.model = self.model_class(**params)
self.n_forecasts = self.params["n_forecasts"]
self.n_lags = params["lags"]
# input checks are provided by model itself


@dataclass
class LinearRegressionModel(DartsRegressionModel):
"""
A forecasting model using a linear regression of the target series' lags to obtain a forecast.
Examples
--------
>>> model_classes_and_params = [
>>> (
>>> LinearRegressionModel,
>>> {"lags": 12, "n_forecasts": 4},
>>> ),
>>> ]
>>>
>>> benchmark = SimpleBenchmark(
>>> model_classes_and_params=model_classes_and_params,
>>> datasets=dataset_list,
>>> metrics=list(ERROR_FUNCTIONS.keys()),
>>> test_percentage=25,
>>> save_dir=SAVE_DIR,
>>> num_processes=1,
>>> )
"""

regression_class: Type = LinearRegression


@dataclass
class RandomForestModel(DartsRegressionModel):
"""
A forecasting model using a random forest to obtain a forecast.
Examples
--------
>>> model_classes_and_params = [
>>> (
>>> RandomForestModel,
>>> {"lags": 12, "n_forecasts": 4},
>>> ),
>>> ]
>>>
>>> benchmark = SimpleBenchmark(
>>> model_classes_and_params=model_classes_and_params,
>>> datasets=dataset_list,
>>> metrics=list(ERROR_FUNCTIONS.keys()),
>>> test_percentage=25,
>>> save_dir=SAVE_DIR,
>>> num_processes=1,
>>> )
"""

regression_class: Type = RandomForestRegressor
Loading

0 comments on commit 4e89c19

Please sign in to comment.