Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,21 @@ TimeCopilot can pull a public time series dataset directly from the web and fore
uvx timecopilot forecast https://otexts.com/fpppy/data/AirPassengers.csv
```

Want to try a different LL​M?
Want to try a different LLM?

```bash
uvx timecopilot forecast https://otexts.com/fpppy/data/AirPassengers.csv \
--llm openai:gpt-4o
```

Need faster inference for long time series?

```bash
uvx timecopilot forecast https://otexts.com/fpppy/data/AirPassengers.csv \
--llm openai:gpt-4o \
--max_length 100
```

Have a specific question?

```bash
Expand Down Expand Up @@ -98,16 +106,19 @@ df = pd.read_csv("https://timecopilot.s3.amazonaws.com/public/data/air_passenger

# Initialize the forecasting agent
# You can use any LLM by specifying the model parameter
# Optional: Set max_length to use only the last N observations for faster inference
tc = TimeCopilot(
llm="openai:gpt-4o",
retries=3,
max_length=100, # Use only last 100 observations for faster inference
)

# Generate forecast
# You can optionally specify the following parameters:
# - freq: The frequency of your data (e.g., 'D' for daily, 'M' for monthly)
# - h: The forecast horizon, which is the number of periods to predict
# - seasonality: The seasonal period of your data, which can be inferred if not provided
# - max_length: Maximum number of observations to use from the end of each series
result = tc.forecast(df=df)

# The output contains:
Expand Down
51 changes: 51 additions & 0 deletions docs/max_length.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Max Length Parameter

The `max_length` parameter has been added to improve inference times by using only the last N values of each time series.

## Usage

### Python API

```python
from timecopilot import TimeCopilot

# Initialize with max_length
tc = TimeCopilot(llm="openai:gpt-4o", max_length=100)

# Or set it per forecast
result = tc.forecast(df=df, max_length=50)
```

### CLI

```bash
# Use max_length parameter
timecopilot forecast data.csv --max_length 100

# With other parameters
timecopilot forecast data.csv --llm openai:gpt-4o --max_length 50
```

## How it works

When `max_length` is set, each time series is truncated to use only the last N observations before training and inference. This can significantly improve performance for long time series while often maintaining good forecast accuracy.

## Example

```python
import pandas as pd
from timecopilot import TimeCopilot

# Create long time series
df = pd.DataFrame({
'unique_id': ['series_1'] * 1000,
'ds': pd.date_range('2020-01-01', periods=1000, freq='D'),
'y': range(1000)
})

# Use only last 100 observations
tc = TimeCopilot(llm="openai:gpt-4o", max_length=100)
result = tc.forecast(df=df, h=10)
```

The forecaster will automatically use only the last 100 observations from each series, potentially improving speed while maintaining accuracy.
62 changes: 62 additions & 0 deletions tests/test_agent_max_length.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import pytest
from utilsforecast.data import generate_series

from timecopilot.agent import TimeCopilot


def test_agent_max_length_parameter():
"""Test that TimeCopilot agent accepts and uses max_length parameter."""
# Create a TimeCopilot agent with max_length
agent = TimeCopilot(llm="test", max_length=50)
assert agent.max_length == 50

# Test default (None)
agent_default = TimeCopilot(llm="test")
assert agent_default.max_length is None


def test_agent_forecast_method_max_length():
"""Test that forecast method accepts max_length parameter."""
# Create test data
df = generate_series(n_series=1, freq="D", min_length=100, max_length=100)

# This is a unit test, so we just verify the parameter is passed through
# We can't test the full forecast without LLM access
agent = TimeCopilot(llm="test")

# Test that the method accepts the parameter
try:
# This will fail during actual execution due to LLM, but we can verify
# the parameter is accepted
agent.forecast(df, max_length=30)
except Exception as e:
# We expect this to fail due to LLM issues, but the parameter should be accepted
assert "max_length" not in str(e) # Parameter error would mention max_length


def test_agent_max_length_override():
"""Test that forecast method can override instance max_length."""
# Create agent with default max_length
agent = TimeCopilot(llm="test", max_length=100)
assert agent.max_length == 100

# Generate test data
df = generate_series(n_series=1, freq="D", min_length=50, max_length=50)

# Test that calling forecast with max_length parameter overrides the instance setting
original_max_length = agent.max_length

try:
# This will fail during execution, but we can verify the override works
agent.forecast(df, max_length=30)
except Exception:
# The forecast will fail due to LLM, but the max_length should be updated
assert agent.max_length == 30 # Should be overridden

# Test that None value doesn't override if instance has a value
agent.max_length = original_max_length # Reset
try:
agent.forecast(df, max_length=None)
except Exception:
# max_length should remain at original value since None was passed
assert agent.max_length == original_max_length
105 changes: 105 additions & 0 deletions tests/test_max_length.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import pytest
from utilsforecast.data import generate_series

from timecopilot.models.benchmarks.stats import ADIDA, AutoARIMA, SeasonalNaive
from timecopilot.models.foundational.timesfm import TimesFM


def test_max_length_parameter_exists():
"""Test that max_length parameter exists in model constructors."""
# Test ADIDA
model = ADIDA(max_length=50)
assert model.max_length == 50

# Test AutoARIMA
model = AutoARIMA(max_length=100)
assert model.max_length == 100

# Test SeasonalNaive
model = SeasonalNaive(max_length=30)
assert model.max_length == 30

# Test TimesFM
model = TimesFM(max_length=20)
assert model.max_length == 20


def test_max_length_none_by_default():
"""Test that max_length is None by default."""
model = ADIDA()
assert model.max_length is None

model = AutoARIMA()
assert model.max_length is None

model = SeasonalNaive()
assert model.max_length is None


def test_truncate_series_functionality():
"""Test the _maybe_truncate_series method."""
# Generate test data
df = generate_series(n_series=2, freq="D", min_length=50, max_length=50)

# Test with max_length
model = SeasonalNaive(max_length=20)
truncated_df = model._maybe_truncate_series(df)

# Check that each series has at most max_length rows
for uid in df['unique_id'].unique():
series_data = truncated_df[truncated_df['unique_id'] == uid]
assert len(series_data) <= 20

# Test without max_length (should not truncate)
model_no_limit = SeasonalNaive(max_length=None)
not_truncated_df = model_no_limit._maybe_truncate_series(df)
assert len(not_truncated_df) == len(df)


def test_truncate_series_preserves_latest_data():
"""Test that truncation preserves the latest data points."""
# Generate test data with known values
df = generate_series(n_series=1, freq="D", min_length=30, max_length=30)

# Sort by date to ensure proper ordering
df = df.sort_values(['unique_id', 'ds'])

# Test with max_length smaller than series length
model = SeasonalNaive(max_length=10)
truncated_df = model._maybe_truncate_series(df)

# The truncated data should have the same latest date as the original
original_latest = df['ds'].max()
truncated_latest = truncated_df['ds'].max()
assert original_latest == truncated_latest

# And should have 10 rows
assert len(truncated_df) == 10


def test_truncate_series_multiple_series():
"""Test truncation works correctly with multiple series."""
# Generate multiple series of different lengths
df1 = generate_series(n_series=1, freq="D", min_length=40, max_length=40)
df1['unique_id'] = 'series_1'

df2 = generate_series(n_series=1, freq="D", min_length=25, max_length=25)
df2['unique_id'] = 'series_2'

# Combine series
import pandas as pd
df = pd.concat([df1, df2], ignore_index=True)

# Test truncation
model = SeasonalNaive(max_length=20)
truncated_df = model._maybe_truncate_series(df)

# Check each series separately
series1_data = truncated_df[truncated_df['unique_id'] == 'series_1']
series2_data = truncated_df[truncated_df['unique_id'] == 'series_2']

# Series 1 should be truncated to 20 rows
assert len(series1_data) == 20

# Series 2 should remain at 25 rows (less than max_length)
assert len(series2_data) == 25
8 changes: 7 additions & 1 deletion timecopilot/_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,23 @@ def forecast(
seasonality: int | None = None,
query: str | None = None,
retries: int = 3,
max_length: int | None = None,
):
with self.console.status(
"[bold blue]TimeCopilot is navigating through time...[/bold blue]"
):
forecasting_agent = TimeCopilotAgent(llm=llm, retries=retries)
forecasting_agent = TimeCopilotAgent(
llm=llm,
retries=retries,
max_length=max_length
)
result = forecasting_agent.forecast(
df=path,
freq=freq,
h=h,
seasonality=seasonality,
query=query,
max_length=max_length,
)

result.output.prettify(self.console)
Expand Down
Loading