diff --git a/docs/examples/skforecast.ipynb b/docs/examples/skforecast.ipynb new file mode 100644 index 0000000..53e52d6 --- /dev/null +++ b/docs/examples/skforecast.ipynb @@ -0,0 +1,464 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a9643e49", + "metadata": {}, + "source": [ + "# Using skforecast models\n", + "\n", + "This is an example for using skforecast based models with the `timecopilot` library." + ] + }, + { + "cell_type": "markdown", + "id": "1942b910", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e863299", + "metadata": {}, + "outputs": [], + "source": [ + "import nest_asyncio\n", + "\n", + "nest_asyncio.apply()\n", + "\n", + "from timecopilot import TimeCopilot\n", + "from timecopilot.models.adapters.skforecast import SKForecastAdapter\n", + "\n", + "\n", + "from skforecast.recursive import ForecasterRecursiveMultiSeries\n", + "from skforecast.preprocessing import RollingFeatures\n", + "\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.ensemble import HistGradientBoostingRegressor\n", + "\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "id": "d89d6515", + "metadata": {}, + "source": [ + "## Setup the SKForecast model\n", + "\n", + "skforecast models can be passed in the `forecasters` argument when initializing the TimeCopilot agent where they will be wrapped in an adapter with an alias based on the type name. \n", + "\n", + "If multiple skforecast forecasters of the same type are passed, each model after the first will have be wrapped in an adapter with an alias that has `'_n'` appended to it with `n` being incremented by 1 for each additional occurrence of the same model type. \n", + "\n", + "For example, if you pass two `ForecasterRecursive` skforecast models, the first one will have an alias of `'skforecast.ForecasterRecursive'` and the second one will have an alias of `'skforecast.ForecasterRecursive_2'`. \n", + "\n", + "If you would rather specify the alias yourself, you will need to adapt the model manually with `SKForecastAdapter`.\n", + "\n", + "Note: when using a single series forecasting model, each series will be forecasted separately. You can check if a forecaster is a single series model by checking if `get_tags()['forecasting_scope']` returns `'single-series'` or skforecast's documentation." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5a6d1621", + "metadata": {}, + "outputs": [], + "source": [ + "window_features = RollingFeatures(stats=['mean', 'min', 'max'], window_sizes=7)\n", + "skf_forecaster = ForecasterRecursiveMultiSeries(\n", + " estimator = HistGradientBoostingRegressor(random_state=8523),\n", + " lags = 10,\n", + " encoding = 'ordinal',\n", + " transformer_series = StandardScaler(),\n", + " window_features = window_features,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "33057cb8", + "metadata": {}, + "source": [ + "### Manually adapt skforecast model\n", + "\n", + "If you would rather decide on the alias yourself, you will need to manually adapt the model with `SKForecastAdapter`.\n", + "\n", + "The `model` argument should be an skforecast `Forecaster` model. The `alias` argument should be a string that uniquely identifies the model.\n", + "\n", + "After adapting the model you would pass it in the `forecasters` argument when initializing the TimeCopilot agent.\n", + "\n", + "If you add multiple manually adapted skforecast models of the same type without specifying aliases, TimeCopilot may not be able to properly call all of them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a37f0958", + "metadata": {}, + "outputs": [], + "source": [ + "manually_adapted_model = SKForecastAdapter(\n", + " model=skf_forecaster,\n", + " alias=\"TrendForecaster\",\n", + ")\n", + "\n", + "tc = TimeCopilot(\n", + " llm=\"openai:gpt-4o\",\n", + " forecasters=[\n", + " manually_adapted_model\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "4bb01c41", + "metadata": {}, + "source": [ + "## Create a TimeCopilot instance with your skforecast model\n", + "\n", + "You will need to specify the forecasters you're using when using skforecast models. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "12a62681", + "metadata": {}, + "outputs": [], + "source": [ + "tc = TimeCopilot(\n", + " llm=\"openai:gpt-4o\",\n", + " forecasters=[\n", + " skf_forecaster,\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "af5cb765", + "metadata": {}, + "source": [ + "### Extending default model list with an skforecast model\n", + "\n", + "if you want to use the default list with the addition of your skforecast model you could make a copy of the default list and append your model to it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97da004b", + "metadata": {}, + "outputs": [], + "source": [ + "from timecopilot.agent import DEFAULT_MODELS\n", + "\n", + "model_list = DEFAULT_MODELS.copy()\n", + "model_list.append(skf_forecaster)\n", + "\n", + "tc = TimeCopilot(llm=\"openai:gpt-4o\", forecasters=model_list)" + ] + }, + { + "cell_type": "markdown", + "id": "16744ef9", + "metadata": {}, + "source": [ + "## Forecasting \n", + "Once that setup is complete, you can use TimeCopilot with your adapted skforecast model the same way you'd normally use TimeCopilot" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "68b6ae53", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "1bb52357", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "1it [00:00, 30.56it/s]\n", + "1it [00:00, 218.25it/s]\n", + "11it [00:00, 267.69it/s]\n" + ] + } + ], + "source": [ + "result = tc.forecast(\n", + " df=df,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "80b0c74e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The AirPassengers time series exhibits strong seasonal patterns with a seasonality period of 12 months, as indicated by the strong seasonal strength of 0.981. The trend component is quite robust and nearly linear (unitroot_kpss of 2.739 suggests trending behavior while a unitroot_pp of -6.566 indicates it's stationary in its seasonal differences). The high autocorrelation in the level series (x_acf1 of 0.948) directly reflects strong autoregressive tendencies.\n" + ] + } + ], + "source": [ + "print(result.output.tsfeatures_analysis)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ac8e53de", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unique_iddsSeasonalNaive
0AirPassengers1961-01-01417.0
1AirPassengers1961-02-01391.0
2AirPassengers1961-03-01419.0
3AirPassengers1961-04-01461.0
4AirPassengers1961-05-01472.0
5AirPassengers1961-06-01535.0
6AirPassengers1961-07-01622.0
7AirPassengers1961-08-01606.0
8AirPassengers1961-09-01508.0
9AirPassengers1961-10-01461.0
10AirPassengers1961-11-01390.0
11AirPassengers1961-12-01432.0
12AirPassengers1962-01-01417.0
13AirPassengers1962-02-01391.0
14AirPassengers1962-03-01419.0
15AirPassengers1962-04-01461.0
16AirPassengers1962-05-01472.0
17AirPassengers1962-06-01535.0
18AirPassengers1962-07-01622.0
19AirPassengers1962-08-01606.0
20AirPassengers1962-09-01508.0
21AirPassengers1962-10-01461.0
22AirPassengers1962-11-01390.0
23AirPassengers1962-12-01432.0
\n", + "
" + ], + "text/plain": [ + " unique_id ds SeasonalNaive\n", + "0 AirPassengers 1961-01-01 417.0\n", + "1 AirPassengers 1961-02-01 391.0\n", + "2 AirPassengers 1961-03-01 419.0\n", + "3 AirPassengers 1961-04-01 461.0\n", + "4 AirPassengers 1961-05-01 472.0\n", + "5 AirPassengers 1961-06-01 535.0\n", + "6 AirPassengers 1961-07-01 622.0\n", + "7 AirPassengers 1961-08-01 606.0\n", + "8 AirPassengers 1961-09-01 508.0\n", + "9 AirPassengers 1961-10-01 461.0\n", + "10 AirPassengers 1961-11-01 390.0\n", + "11 AirPassengers 1961-12-01 432.0\n", + "12 AirPassengers 1962-01-01 417.0\n", + "13 AirPassengers 1962-02-01 391.0\n", + "14 AirPassengers 1962-03-01 419.0\n", + "15 AirPassengers 1962-04-01 461.0\n", + "16 AirPassengers 1962-05-01 472.0\n", + "17 AirPassengers 1962-06-01 535.0\n", + "18 AirPassengers 1962-07-01 622.0\n", + "19 AirPassengers 1962-08-01 606.0\n", + "20 AirPassengers 1962-09-01 508.0\n", + "21 AirPassengers 1962-10-01 461.0\n", + "22 AirPassengers 1962-11-01 390.0\n", + "23 AirPassengers 1962-12-01 432.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(result.fcst_df)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "timecopilot", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/mkdocs.yml b/mkdocs.yml index 385147d..439dd28 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -30,6 +30,7 @@ nav: - examples/chronos-family.ipynb - examples/cryptocurrency-quickstart.ipynb - examples/sktime.ipynb + - examples/skforecast.ipynb - Experiments: - experiments/gift-eval.md - experiments/fev.md diff --git a/pyproject.toml b/pyproject.toml index f758004..e8613bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ dev = [ "pytest-rerunfailures>=15.1", "pytest-xdist>=3.8.0", "s3fs>=2025.3.0", + "skforecast>=0.20.0", "sktime>=0.40.1", ] docs = [ diff --git a/timecopilot/agent.py b/timecopilot/agent.py index 059cf83..5779fe3 100644 --- a/timecopilot/agent.py +++ b/timecopilot/agent.py @@ -33,6 +33,7 @@ from tsfeatures.tsfeatures import _get_feats from .forecaster import Forecaster, TimeCopilotForecaster +from .models.adapters.skforecast import SKForecastAdapter from .models.adapters.sktime import SKTimeAdapter from .models.prophet import Prophet from .models.stats import ( @@ -401,6 +402,18 @@ def _is_sktime_forecaster(obj: object) -> bool: return False +def _is_skforecast_forecaster(obj: object) -> bool: + """ + Helper function for checking if an object is an skforecast model by checking if + skforecast's ForecasterBase class is in its inheritance tree. + """ + mro_types = type(obj).__mro__ + for t in mro_types: + if t.__name__ == "ForecasterBase" and "skforecast" in t.__module__: + return True + return False + + class TimeCopilot: """ TimeCopilot: An AI agent for comprehensive time series analysis. @@ -436,9 +449,12 @@ def __init__( forecasters = DEFAULT_MODELS combined_forecasters = [] sktime_forecasters = [] + skforecast_forecasters = [] for f in forecasters: if _is_sktime_forecaster(f): sktime_forecasters.append(f) + elif _is_skforecast_forecaster(f): + skforecast_forecasters.append(f) else: combined_forecasters.append(f) type_counts: dict[str, int] = {} @@ -449,8 +465,17 @@ def __init__( alias += f"_{type_counts[type(f).__name__]}" else: type_counts[type(f).__name__] = 1 - adapted = SKTimeAdapter(f, alias=alias) - combined_forecasters.append(adapted) + adapted_skt = SKTimeAdapter(f, alias=alias) + combined_forecasters.append(adapted_skt) + for f in skforecast_forecasters: + alias = "skforecast." + type(f).__name__ + if type(f).__name__ in type_counts: + type_counts[type(f).__name__] += 1 + alias += f"_{type_counts[type(f).__name__]}" + else: + type_counts[type(f).__name__] = 1 + adapted_skf = SKForecastAdapter(f, alias=alias) + combined_forecasters.append(adapted_skf) forecasters = combined_forecasters self.forecasters = {forecaster.alias: forecaster for forecaster in forecasters} if "SeasonalNaive" not in self.forecasters: diff --git a/timecopilot/models/adapters/skforecast.py b/timecopilot/models/adapters/skforecast.py new file mode 100644 index 0000000..4345b60 --- /dev/null +++ b/timecopilot/models/adapters/skforecast.py @@ -0,0 +1,191 @@ +from copy import deepcopy +from typing import Any + +import pandas as pd +from threadpoolctl import threadpool_limits + +from ..utils.parallel_forecaster import ParallelForecaster + +# TODO: exogenous data support +# NOTE: skforecaster baseforecaster class: +# skforecast.base._forecaster_base.ForecasterBase + + +class SKForecastAdapter(ParallelForecaster): + def __init__( + self, + model, + alias: str | None = None, + *args: Any, + **kwargs: Any, + ): + """ + Args: + model (sktime.forecasting.base.BaseForecaster): sktime forecasting model + alias (str, optional): Custom name for the model instance. + By default alias is retrieved from the type name of model. + *args: Additional positional arguments passed to SKTimeAdapter. + **kwargs: Additional keyword arguments passed to SKTimeAdapter. + """ + super().__init__(*args, **kwargs) + self.alias = alias if alias is not None else type(model).__name__ + self.model = model + + def _local_forecast_impl( + self, + df: pd.DataFrame, + h: int, + freq: str, + level: list[int | float] | None = None, + quantiles: list[float] | None = None, + ) -> pd.DataFrame: + # qc = QuantileConverter(level=level, quantiles=quantiles) + model = deepcopy(self.model) + y_col = "y" + time_col = "ds" + series = df.loc[:, y_col] + series.index = df[time_col] + series = series.asfreq(freq) + model.fit(series) + fcst_series = model.predict(h) + fcst_df = fcst_series.reset_index() + pred_col = "pred" + fcst_df.rename( + {"index": time_col, pred_col: self.alias}, axis="columns", inplace=True + ) + return fcst_df + + def _local_forecast( + self, + df: pd.DataFrame, + h: int, + freq: str, + level: list[int | float] | None = None, + quantiles: list[float] | None = None, + ) -> pd.DataFrame: + with threadpool_limits(limits=1): + return self._local_forecast_impl( + df=df, + h=h, + freq=freq, + level=level, + quantiles=quantiles, + ) + + def forecast( + self, + df: pd.DataFrame, + h: int, + freq: str | None = None, + level: list[int | float] | None = None, + quantiles: list[float] | None = None, + ) -> pd.DataFrame: + # fmt: off + """ + Generate forecasts for time series data using an sktime model. + + This method produces point forecasts and, optionally, prediction + intervals or quantile forecasts. The input DataFrame can contain one + or multiple time series in stacked (long) format. + + Prediction intervals and quantile forecasts are not currently supported + with sktime based models + + Args: + df (pd.DataFrame): + DataFrame containing the time series to forecast. It must + include as columns: + + - "unique_id": an ID column to distinguish multiple series. + - "ds": a time column indicating timestamps or periods. + - "y": a target column with the observed values. + + h (int): + Forecast horizon specifying how many future steps to predict. + freq (str, optional): + Frequency of the time series (e.g. "D" for daily, "M" for + monthly). See [Pandas frequency aliases](https://pandas.pydata.org/ + pandas-docs/stable/user_guide/timeseries.html#offset-aliases) for + valid values. If not provided, the frequency will be inferred + from the data. + level (list[int | float], optional): + Confidence levels for prediction intervals, expressed as + percentages (e.g. [80, 95]). If provided, the returned + DataFrame will include lower and upper interval columns for + each specified level. + quantiles (list[float], optional): + List of quantiles to forecast, expressed as floats between 0 + and 1. Should not be used simultaneously with `level`. When + provided, the output DataFrame will contain additional columns + named in the format "model-q-{percentile}", where {percentile} + = 100 × quantile value. + + Returns: + pd.DataFrame: + DataFrame containing forecast results. Includes: + + - point forecasts for each timestamp and series. + - prediction intervals if `level` is specified. + - quantile forecasts if `quantiles` is specified. + + For multi-series data, the output retains the same unique + identifiers as the input DataFrame. + + Example: + ```python + from lightgbm import LGBMRegressor + import pandas as pd + from timecopilot import TimeCopilot + from timecopilot.models.adapters.skforecast import SKForecastAdapter + from skforecast.recursive import ForecasterRecursive + from skforecast.preprocessing import RollingFeatures + + forecaster = ForecasterRecursive( + estimator = LGBMRegressor(random_state=123, verbose=-1), + lags = 10, + window_features = RollingFeatures(stats=['mean'], window_sizes=10) + ) + + df = pd.read_csv("https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv") + adapted_skf_model = SKForecastAdapter(forecaster) + tc = TimeCopilot(llm="openai:gpt-4o", forecasters=[adapted_skf_model]) + result = tc.forecast(df, h=12, freq="MS") + print(result.output) + ``` + """ + # fmt: on + if level is not None or quantiles is not None: + raise ValueError( + "Level and quantiles are not supported for adapted skforecast" + " models yet." + ) + if self.model.get_tags()["forecasting_scope"].startswith("single-series"): + return super().forecast(df, h, freq=freq, level=level, quantiles=quantiles) + freq = self._maybe_infer_freq(df, freq) + # importing in the function since skforecast isn't a required + # dependency but should be present when using the skforecast adapter + # there is also an exogenous data conversion + from skforecast.preprocessing import reshape_series_long_to_dict + + id_col = "unique_id" + date_col = "ds" + y_col = "y" + df_dict = reshape_series_long_to_dict( + df, + freq=freq, + series_id=id_col, + index=date_col, + values=y_col, + ) + # + self.model.fit(df_dict) + fcst_df: pd.DataFrame = self.model.predict(h) + pred_col = "pred" + fcst_df.reset_index(inplace=True) + fcst_df.rename( + columns={"level": id_col, "index": date_col, pred_col: self.alias}, + inplace=True, + ) + fcst_df.sort_values([id_col, date_col], inplace=True) + fcst_df.reindex(columns=[id_col, date_col, self.alias]) + return fcst_df diff --git a/uv.lock b/uv.lock index 0c43be0..2e4629b 100644 --- a/uv.lock +++ b/uv.lock @@ -6366,6 +6366,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "skforecast" +version = "0.20.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "joblib" }, + { name = "numba" }, + { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" }, + { name = "numpy", version = "2.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" }, + { name = "optuna" }, + { name = "pandas", version = "2.1.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" }, + { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" }, + { name = "rich" }, + { name = "scikit-learn", version = "1.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" }, + { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" }, + { name = "scipy" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/53/9a/72f46e31df7e10c356ec5074842073dad400e05ca7fe05ee86d5ac2bd43c/skforecast-0.20.0.tar.gz", hash = "sha256:fd4f09883f2eb0420dc0a9d5e1ff1a83a01dd1d9e4c8cdb1fc69cfbe61c9ca84", size = 373088, upload-time = "2026-02-01T10:28:10.799Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/5d/f2f2a8c653a0353d53bb459427808fbc799f5f2394249733a5f45c8e4d49/skforecast-0.20.0-py3-none-any.whl", hash = "sha256:017a336f3837bc7f9fbf6539136d462aabdbff513d2e50ec49efb75aa760eebe", size = 393920, upload-time = "2026-02-01T10:28:09.499Z" }, +] + [[package]] name = "sktime" version = "0.40.1" @@ -6862,6 +6885,7 @@ dev = [ { name = "pytest-rerunfailures" }, { name = "pytest-xdist" }, { name = "s3fs" }, + { name = "skforecast" }, { name = "sktime" }, ] docs = [ @@ -6915,6 +6939,7 @@ dev = [ { name = "pytest-rerunfailures", specifier = ">=15.1" }, { name = "pytest-xdist", specifier = ">=3.8.0" }, { name = "s3fs", specifier = ">=2025.3.0" }, + { name = "skforecast", specifier = ">=0.20.0" }, { name = "sktime", specifier = ">=0.40.1" }, ] docs = [