Skip to content

Commit ea068c8

Browse files
committed
update tests for named rep weeks and add optional snapshots override
1 parent 2902a5c commit ea068c8

File tree

10 files changed

+840
-848
lines changed

10 files changed

+840
-848
lines changed

src/ispypsa/translator/create_pypsa_friendly_inputs.py

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -144,12 +144,14 @@ def create_pypsa_friendly_timeseries_inputs(
144144
ispypsa_tables: dict[str, pd.DataFrame],
145145
parsed_traces_directory: Path,
146146
pypsa_friendly_timeseries_inputs_location: Path,
147+
snapshots: pd.DataFrame | None = None,
147148
) -> pd.DataFrame:
148149
"""Creates snapshots and timeseries data files in PyPSA friendly format for generation
149150
and demand.
150151
151152
- First creates snapshots based on the temporal configuration, optionally using
152-
named_representative_weeks if configured.
153+
named_representative_weeks if configured. If snapshots are provided, they are used
154+
instead of generating new ones.
153155
154156
- Then creates a time series file for each wind and solar generator in the
155157
ecaa_generators table (table in ispypsa_tables dict). The time series data is saved
@@ -210,6 +212,10 @@ def create_pypsa_friendly_timeseries_inputs(
210212
has been parsed using isp-trace-parser is located.
211213
pypsa_friendly_timeseries_inputs_location: a pathlib.Path defining where the
212214
time series data which is to be created should be saved.
215+
snapshots: Optional pd.DataFrame containing pre-defined snapshots to use instead
216+
of generating them. If provided, must contain columns 'snapshots' (datetime)
217+
and 'investment_periods' (int). This is useful for testing or when custom
218+
snapshots are needed.
213219
214220
Returns: pd.DataFrame containing the snapshots used for filtering the timeseries
215221
"""
@@ -244,17 +250,19 @@ def create_pypsa_friendly_timeseries_inputs(
244250
year_type=config.temporal.year_type,
245251
)
246252

247-
# Create snapshots, potentially using the loaded data for named_representative_weeks
248-
# Flatten generator traces for snapshot creation
249-
all_generator_traces = _flatten_generator_traces(generator_traces_by_type)
250-
251-
snapshots = create_pypsa_friendly_snapshots(
252-
config,
253-
model_phase,
254-
existing_generators=ispypsa_tables.get("ecaa_generators"),
255-
demand_traces=demand_traces,
256-
generator_traces=all_generator_traces,
257-
)
253+
# Use provided snapshots or create new ones
254+
if snapshots is None:
255+
# Create snapshots, potentially using the loaded data for named_representative_weeks
256+
# Flatten generator traces for snapshot creation
257+
all_generator_traces = _flatten_generator_traces(generator_traces_by_type)
258+
259+
snapshots = create_pypsa_friendly_snapshots(
260+
config,
261+
model_phase,
262+
existing_generators=ispypsa_tables.get("ecaa_generators"),
263+
demand_traces=demand_traces,
264+
generator_traces=all_generator_traces,
265+
)
258266

259267
# Filter and save generator timeseries by type
260268
for gen_type, gen_traces in generator_traces_by_type.items():
@@ -277,13 +285,6 @@ def create_pypsa_friendly_timeseries_inputs(
277285
return snapshots
278286

279287

280-
def list_translator_output_files(output_path: Path | None = None) -> list[Path]:
281-
files = _BASE_TRANSLATOR_OUTPUTS
282-
if output_path is not None:
283-
files = [output_path / Path(file + ".csv") for file in files]
284-
return files
285-
286-
287288
def _flatten_generator_traces(
288289
generator_traces_by_type: dict[str, dict[str, pd.DataFrame]],
289290
) -> dict[str, pd.DataFrame]:
@@ -352,3 +353,10 @@ def _filter_and_save_timeseries(
352353

353354
# Save to parquet
354355
trace.to_parquet(Path(output_trace_path, f"{name}.parquet"), index=False)
356+
357+
358+
def list_translator_output_files(output_path: Path | None = None) -> list[Path]:
359+
files = _BASE_TRANSLATOR_OUTPUTS
360+
if output_path is not None:
361+
files = [output_path / Path(file + ".csv") for file in files]
362+
return files

src/ispypsa/translator/temporal_filters.py

Lines changed: 79 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
from datetime import datetime, timedelta
22
from typing import Literal
33

4+
import numpy as np
45
import pandas as pd
56

67
from ispypsa.config import (
7-
ModelConfig,
88
TemporalAggregationConfig,
99
TemporalRangeConfig,
1010
)
11-
from ispypsa.config.validators import TemporalConfig
1211
from ispypsa.translator.helpers import _get_iteration_start_and_end_time
1312

1413

@@ -272,11 +271,51 @@ def _filter_snapshots_for_named_representative_weeks(
272271
- Weeks are defined as Monday 00:00:00 to Monday 00:00:00 (7 days)
273272
- Monday 00:00:00 timestamps mark the END of a week, not the beginning
274273
- Weeks are assigned to the year/financial year in which they END
275-
- This means weeks spanning year boundaries (e.g., Dec 28 2024 to Jan 4 2025)
276-
are assigned to the year of their ending Monday (2025 in this example)
277-
- Only weeks that END within the specified year range are included
278-
- Partial weeks (those without data for all 7 days) are EXCLUDED from the analysis
279-
to ensure consistent comparison between weeks
274+
- Weeks spanning year boundaries (e.g., Dec 28 2024 to Jan 4 2025)
275+
are EXCLUDED from the analysis
276+
277+
Examples:
278+
279+
>>> # Create sample data for 2024
280+
>>> snapshots = pd.DataFrame({
281+
... 'snapshots': pd.date_range('2024-01-01', '2024-01-31', freq='D')
282+
... })
283+
>>> demand_data = pd.DataFrame({
284+
... 'Datetime': pd.date_range('2024-01-01', '2024-01-31', freq='D'),
285+
... 'Value': [100, 110, 120, 130, 140, 150, 160, # Week 1 (Jan 1-7)
286+
... 170, 300, 250, 200, 180, 160, 140, # Week 2 (Jan 8-14) - peak
287+
... 120, 110, 100, 90, 80, 70, 60, # Week 3 (Jan 15-21) - minimum
288+
... 50, 60, 70, 80, 90, 100, 110, # Week 4 (Jan 22-28)
289+
... 120, 130, 140] # Partial week
290+
... })
291+
292+
>>> # Filter for peak demand week
293+
>>> result = _filter_snapshots_for_named_representative_weeks(
294+
... named_representative_weeks=["peak-demand"],
295+
... snapshots=snapshots,
296+
... start_year=2024,
297+
... end_year=2025,
298+
... year_type="calendar",
299+
... demand_data=demand_data
300+
... )
301+
>>> # Returns snapshots from week containing Jan 9 (highest demand of 300)
302+
>>> # This is the week from Jan 9-15 (Tuesday to Monday)
303+
>>> len(result)
304+
7
305+
306+
>>> # Filter for minimum demand week
307+
>>> result = _filter_snapshots_for_named_representative_weeks(
308+
... named_representative_weeks=["minimum-demand"],
309+
... snapshots=snapshots,
310+
... start_year=2024,
311+
... end_year=2025,
312+
... year_type="calendar",
313+
... demand_data=demand_data
314+
... )
315+
>>> # Returns snapshots from week containing Jan 22 (lowest demand of 50)
316+
>>> # This is the week from Jan 16-22 (Tuesday to Monday)
317+
>>> len(result)
318+
7
280319
281320
Args:
282321
named_representative_weeks: List of named week types to include
@@ -356,61 +395,45 @@ def _filter_and_assign_weeks(
356395
[datetime(y + 1, month, 1) for y in range(start_year, end_year)]
357396
)
358397

359-
df = demand_df.copy()
360-
361-
# Assign years
362-
df["year"] = df["Datetime"].dt.year - (
363-
df["Datetime"].dt.month < month if month == 7 else 0
364-
)
365-
366-
# Calculate first Mondays and assign week structure
367-
first_monday_map = _calculate_first_mondays(start_year, end_year, year_starts)
368-
369-
# Filter to only include data from after the first Monday of the first year
370-
# This ensures we don't have partial weeks at the start
371-
earliest_first_monday = min(first_monday_map.values())
372-
df = df[df["Datetime"] > earliest_first_monday]
398+
output = []
373399

374-
if df.empty:
375-
return df
400+
for year_start, year_end in zip(year_starts, year_ends):
401+
df = demand_df[
402+
(demand_df["Datetime"] > year_start) & (demand_df["Datetime"] <= year_end)
403+
].copy()
376404

377-
df["first_monday"] = df["year"].map(first_monday_map)
405+
if month == 1:
406+
df["year"] = year_start.year
407+
else:
408+
df["year"] = year_end.year
378409

379-
# Calculate week number and week start
380-
# For timestamps exactly at Monday 00:00:00, they belong to the end of the previous week
381-
# We handle this by calculating days_since_first_monday and adjusting for the boundary
382-
days_since_first_monday = (
383-
df["Datetime"] - df["first_monday"]
384-
).dt.total_seconds() / 86400
410+
days_until_next_monday = (7 - df["Datetime"].dt.weekday) % 7
411+
days_until_next_monday = days_until_next_monday.where(
412+
days_until_next_monday != 0, 7
413+
)
385414

386-
# If exactly on a Monday midnight (days is exact multiple of 7), assign to previous week
387-
df["week_number"] = (days_since_first_monday // 7).astype(int)
388-
df["week_number"] = (
389-
df["week_number"].where(days_since_first_monday % 7 != 0, df["week_number"] - 1)
390-
+ 1
391-
)
415+
already_week_end_time = (
416+
(df["Datetime"].dt.weekday == 0)
417+
& (df["Datetime"].dt.hour == 0)
418+
& (df["Datetime"].dt.minute == 0)
419+
)
392420

393-
df["week_start"] = df["first_monday"] + pd.to_timedelta(
394-
df["week_number"] - 1, unit="W"
395-
)
421+
df["week_end_time"] = np.where(
422+
already_week_end_time,
423+
df["Datetime"],
424+
df["Datetime"] + pd.to_timedelta(days_until_next_monday, unit="days"),
425+
)
396426

397-
# Keep only complete weeks by excluding weeks that end after the end of the last year
398-
last_year_end = year_ends[-1]
399-
df = df[df["week_start"] + timedelta(days=7) <= last_year_end]
427+
# round back to midnight
428+
df["week_end_time"] = df["week_end_time"].dt.normalize()
400429

401-
return df
430+
# Filter out partial weeks.
431+
df = df[df["week_end_time"] <= year_end]
432+
df = df[df["week_end_time"] - timedelta(days=7) >= year_start]
402433

434+
output.append(df.copy())
403435

404-
def _calculate_first_mondays(
405-
start_year: int,
406-
end_year: int,
407-
year_starts: list[pd.Timestamp],
408-
) -> dict[int, pd.Timestamp]:
409-
"""Calculate the first Monday for each year."""
410-
return {
411-
y: start + timedelta(days=(7 - start.weekday()) % 7)
412-
for y, start in zip(range(start_year, end_year), year_starts)
413-
}
436+
return pd.concat(output)
414437

415438

416439
def _calculate_week_metrics(demand_df: pd.DataFrame) -> pd.DataFrame:
@@ -424,7 +447,7 @@ def _calculate_week_metrics(demand_df: pd.DataFrame) -> pd.DataFrame:
424447
if "residual_demand" in demand_df.columns:
425448
agg_dict["residual_demand"] = ["max", "min", "mean"]
426449

427-
metrics = demand_df.groupby(["year", "week_start"]).agg(agg_dict)
450+
metrics = demand_df.groupby(["year", "week_end_time"]).agg(agg_dict)
428451
metrics.columns = ["_".join(col).strip("_") for col in metrics.columns]
429452

430453
return metrics.reset_index()
@@ -458,7 +481,7 @@ def _find_target_weeks(
458481
else:
459482
idx = week_metrics.groupby("year")[metric_col].idxmin()
460483

461-
target_weeks.extend(week_metrics.loc[idx, "week_start"])
484+
target_weeks.extend(week_metrics.loc[idx, "week_end_time"])
462485

463486
return target_weeks
464487

@@ -477,7 +500,7 @@ def _extract_snapshots_for_weeks(
477500

478501
mask = pd.concat(
479502
[
480-
(snapshot_series > week) & (snapshot_series <= week + timedelta(days=7))
503+
(snapshot_series > week - timedelta(days=7)) & (snapshot_series <= week)
481504
for week in target_weeks
482505
],
483506
axis=1,

0 commit comments

Comments
 (0)