Skip to content

Commit 36acc59

Browse files
committed
feat: add 1m snapshot collector and tests
1 parent 62765b0 commit 36acc59

File tree

4 files changed

+856
-9
lines changed

4 files changed

+856
-9
lines changed

TODO.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@
44
Migliorare stabilita', performance e manutenibilita' senza rompere l'API pubblica.
55

66
## Next implementation (1m snapshots target)
7-
- [ ] Definire configurazione simboli spot 1m: `^SPX`, `^VIX`, `^VVIX` + ETF (`SVOL`, `VXX`, `UVIX`, `UVXY`, `SPY`).
8-
- [ ] Aggiungere `^SKEW` con schedulazione separata 1 volta al giorno a mercato chiuso.
9-
- [ ] Implementare `scripts/collect_1m_snapshots.py` con loop a 60s e `Ticker` riusati per tutta la sessione.
10-
- [ ] Precalcolare (refresh giornaliero) le scadenze mensili entro 12 mesi per `^SPX` e `^VIX`.
11-
- [ ] Per opzioni usare richieste per singola scadenza (`date=`) invece di full chain ad ogni ciclo.
12-
- [ ] Salvare output in formato partizionato per data/simbolo/scadenza (`data/raw_snapshots/YYYY-MM-DD/...`).
13-
- [ ] Gestire `^VVIX` come solo spot snapshot (fallback esplicito quando option chain non disponibile).
14-
- [ ] Aggiungere metriche runtime per ciclo: latenza totale, numero chiamate, errori/retry.
7+
- [x] Definire configurazione simboli spot 1m: `^SPX`, `^VIX`, `^VVIX` + ETF (`SVOL`, `VXX`, `UVIX`, `UVXY`, `SPY`).
8+
- [x] Aggiungere `^SKEW` con schedulazione separata 1 volta al giorno a mercato chiuso.
9+
- [x] Implementare `scripts/collect_1m_snapshots.py` con loop a 60s e `Ticker` riusati per tutta la sessione.
10+
- [x] Precalcolare (refresh giornaliero) le scadenze mensili entro 12 mesi per `^SPX` e `^VIX`.
11+
- [x] Per opzioni usare richieste per singola scadenza (`date=`) invece di full chain ad ogni ciclo.
12+
- [x] Salvare output in formato partizionato per data/simbolo/scadenza (`data/raw_snapshots/YYYY-MM-DD/...`).
13+
- [x] Gestire `^VVIX` come solo spot snapshot (fallback esplicito quando option chain non disponibile).
14+
- [x] Aggiungere metriche runtime per ciclo: latenza totale, numero chiamate, errori/retry.
1515
- [ ] Aggiungere retry/backoff + timeout separati (connect/read) configurabili da env/config.
16-
- [ ] Test unit: filtro scadenze mensili, scheduler 60s, normalizzazione schema snapshot.
16+
- [x] Test unit: filtro scadenze mensili, scheduler 60s, normalizzazione schema snapshot.
1717
- [ ] Test integration (opt-in): smoke test reale su 1 ciclo completo con guardrail di durata.
1818

1919
## Priority 1 (Next sessions)

scripts/collect_1m_snapshots.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# stdlib
2+
import sys
3+
from pathlib import Path
4+
5+
REPO_ROOT = Path(__file__).resolve().parents[1]
6+
if str(REPO_ROOT) not in sys.path:
7+
sys.path.insert(0, str(REPO_ROOT))
8+
9+
from yahooquery.snapshot_collector import main
10+
11+
12+
if __name__ == "__main__":
13+
raise SystemExit(main())

tests/test_snapshot_collector.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
# stdlib
2+
from datetime import datetime, timezone
3+
4+
# first party
5+
from yahooquery.snapshot_collector import (
6+
CollectorConfig,
7+
SnapshotCollector,
8+
select_monthly_expirations,
9+
should_capture_skew,
10+
)
11+
12+
13+
def utc_timestamp(year, month, day):
14+
return int(datetime(year, month, day, tzinfo=timezone.utc).timestamp())
15+
16+
17+
class FakeTicker:
18+
EXPIRATIONS = [
19+
utc_timestamp(2026, 3, 4),
20+
utc_timestamp(2026, 3, 18),
21+
utc_timestamp(2026, 4, 15),
22+
]
23+
24+
def __init__(self, symbols, **kwargs):
25+
if isinstance(symbols, str):
26+
self._symbols = symbols.split()
27+
else:
28+
self._symbols = list(symbols)
29+
self.kwargs = kwargs
30+
31+
@property
32+
def quotes(self):
33+
return {
34+
symbol: {
35+
"symbol": symbol,
36+
"regularMarketPrice": 100.0,
37+
"bid": 99.9,
38+
"ask": 100.1,
39+
}
40+
for symbol in self._symbols
41+
}
42+
43+
def _get_data(self, key, params=None):
44+
assert key == "options"
45+
symbol = self._symbols[0]
46+
params = params or {}
47+
if params.get("date"):
48+
expiration = int(params["date"])
49+
call_contract = {
50+
"contractSymbol": f"{symbol.replace('^', '')}{expiration}C",
51+
"expiration": expiration,
52+
"lastTradeDate": expiration - 3600,
53+
"strike": 10.0,
54+
"bid": 1.0,
55+
"ask": 1.2,
56+
}
57+
put_contract = {
58+
"contractSymbol": f"{symbol.replace('^', '')}{expiration}P",
59+
"expiration": expiration,
60+
"lastTradeDate": expiration - 3600,
61+
"strike": 10.0,
62+
"bid": 0.9,
63+
"ask": 1.1,
64+
}
65+
return {
66+
symbol: {
67+
"expirationDates": [expiration],
68+
"options": [{"calls": [call_contract], "puts": [put_contract]}],
69+
}
70+
}
71+
return {
72+
symbol: {
73+
"expirationDates": self.EXPIRATIONS,
74+
"options": [],
75+
}
76+
}
77+
78+
79+
class FakeTickerFactory:
80+
def __init__(self):
81+
self.instances = []
82+
83+
def __call__(self, symbols, **kwargs):
84+
instance = FakeTicker(symbols, **kwargs)
85+
self.instances.append(instance)
86+
return instance
87+
88+
89+
def test_select_monthly_expirations_chooses_first_expiration_each_month():
90+
as_of = datetime(2026, 1, 10, tzinfo=timezone.utc)
91+
expirations = [
92+
utc_timestamp(2026, 1, 17),
93+
utc_timestamp(2026, 1, 24),
94+
utc_timestamp(2026, 2, 21),
95+
utc_timestamp(2026, 3, 21),
96+
utc_timestamp(2026, 4, 18),
97+
]
98+
selected = select_monthly_expirations(expirations, as_of_utc=as_of, lookahead_months=3)
99+
assert selected == [
100+
utc_timestamp(2026, 1, 17),
101+
utc_timestamp(2026, 2, 21),
102+
utc_timestamp(2026, 3, 21),
103+
]
104+
105+
106+
def test_should_capture_skew_only_once_per_day_after_close():
107+
pre_close = datetime(2026, 2, 25, 15, 59)
108+
post_close = datetime(2026, 2, 25, 16, 5)
109+
weekend = datetime(2026, 2, 28, 16, 30)
110+
assert not should_capture_skew(pre_close, None, 16, 5)
111+
assert should_capture_skew(post_close, None, 16, 5)
112+
assert not should_capture_skew(post_close, post_close.date(), 16, 5)
113+
assert not should_capture_skew(weekend, None, 16, 5)
114+
115+
116+
def test_run_cycle_writes_partitioned_snapshots(tmp_path):
117+
factory = FakeTickerFactory()
118+
config = CollectorConfig(
119+
output_dir=tmp_path,
120+
spot_symbols=["^SPX", "^VIX"],
121+
options_symbols=["^SPX"],
122+
skew_symbol="^SKEW",
123+
refresh_expirations_hours=24,
124+
retries=0,
125+
)
126+
collector = SnapshotCollector(config=config, ticker_factory=factory, sleep_fn=lambda _: None)
127+
cycle_time_utc = datetime(2026, 2, 25, 21, 10, tzinfo=timezone.utc)
128+
metrics = collector.run_cycle(cycle_time_utc)
129+
metrics["duration_seconds"] = 0.25
130+
collector._append_metrics(cycle_time_utc.date(), metrics)
131+
132+
base = tmp_path / "2026-02-25"
133+
spot_files = list((base / "spot").rglob("snapshot_*.json"))
134+
option_files = list((base / "options").rglob("snapshot_*.csv"))
135+
skew_files = list((base / "daily_close").rglob("snapshot_*.json"))
136+
metrics_file = base / "metrics" / "cycle_metrics.csv"
137+
138+
assert len(spot_files) == 2
139+
assert len(option_files) == 2
140+
assert len(skew_files) == 1
141+
assert metrics_file.exists()
142+
143+
assert metrics["spot_snapshots_written"] == 2
144+
assert metrics["option_api_calls"] == 2
145+
assert metrics["option_snapshots_written"] == 2
146+
assert metrics["option_contracts"] == 4
147+
assert metrics["skew_snapshots_written"] == 1
148+
assert metrics["errors"] == []

0 commit comments

Comments
 (0)