11from datetime import datetime , timedelta
22from typing import Literal
33
4+ import numpy as np
45import pandas as pd
56
67from ispypsa .config import (
7- ModelConfig ,
88 TemporalAggregationConfig ,
99 TemporalRangeConfig ,
1010)
11- from ispypsa .config .validators import TemporalConfig
1211from ispypsa .translator .helpers import _get_iteration_start_and_end_time
1312
1413
@@ -272,11 +271,51 @@ def _filter_snapshots_for_named_representative_weeks(
272271 - Weeks are defined as Monday 00:00:00 to Monday 00:00:00 (7 days)
273272 - Monday 00:00:00 timestamps mark the END of a week, not the beginning
274273 - Weeks are assigned to the year/financial year in which they END
275- - This means weeks spanning year boundaries (e.g., Dec 28 2024 to Jan 4 2025)
276- are assigned to the year of their ending Monday (2025 in this example)
277- - Only weeks that END within the specified year range are included
278- - Partial weeks (those without data for all 7 days) are EXCLUDED from the analysis
279- to ensure consistent comparison between weeks
274+ - Weeks spanning year boundaries (e.g., Dec 28 2024 to Jan 4 2025)
275+ are EXCLUDED from the analysis
276+
277+ Examples:
278+
279+ >>> # Create sample data for 2024
280+ >>> snapshots = pd.DataFrame({
281+ ... 'snapshots': pd.date_range('2024-01-01', '2024-01-31', freq='D')
282+ ... })
283+ >>> demand_data = pd.DataFrame({
284+ ... 'Datetime': pd.date_range('2024-01-01', '2024-01-31', freq='D'),
285+ ... 'Value': [100, 110, 120, 130, 140, 150, 160, # Week 1 (Jan 1-7)
286+ ... 170, 300, 250, 200, 180, 160, 140, # Week 2 (Jan 8-14) - peak
287+ ... 120, 110, 100, 90, 80, 70, 60, # Week 3 (Jan 15-21) - minimum
288+ ... 50, 60, 70, 80, 90, 100, 110, # Week 4 (Jan 22-28)
289+ ... 120, 130, 140] # Partial week
290+ ... })
291+
292+ >>> # Filter for peak demand week
293+ >>> result = _filter_snapshots_for_named_representative_weeks(
294+ ... named_representative_weeks=["peak-demand"],
295+ ... snapshots=snapshots,
296+ ... start_year=2024,
297+ ... end_year=2025,
298+ ... year_type="calendar",
299+ ... demand_data=demand_data
300+ ... )
301+ >>> # Returns snapshots from week containing Jan 9 (highest demand of 300)
302+ >>> # This is the week from Jan 9-15 (Tuesday to Monday)
303+ >>> len(result)
304+ 7
305+
306+ >>> # Filter for minimum demand week
307+ >>> result = _filter_snapshots_for_named_representative_weeks(
308+ ... named_representative_weeks=["minimum-demand"],
309+ ... snapshots=snapshots,
310+ ... start_year=2024,
311+ ... end_year=2025,
312+ ... year_type="calendar",
313+ ... demand_data=demand_data
314+ ... )
315+ >>> # Returns snapshots from week containing Jan 22 (lowest demand of 50)
316+ >>> # This is the week from Jan 16-22 (Tuesday to Monday)
317+ >>> len(result)
318+ 7
280319
281320 Args:
282321 named_representative_weeks: List of named week types to include
@@ -356,61 +395,45 @@ def _filter_and_assign_weeks(
356395 [datetime (y + 1 , month , 1 ) for y in range (start_year , end_year )]
357396 )
358397
359- df = demand_df .copy ()
360-
361- # Assign years
362- df ["year" ] = df ["Datetime" ].dt .year - (
363- df ["Datetime" ].dt .month < month if month == 7 else 0
364- )
365-
366- # Calculate first Mondays and assign week structure
367- first_monday_map = _calculate_first_mondays (start_year , end_year , year_starts )
368-
369- # Filter to only include data from after the first Monday of the first year
370- # This ensures we don't have partial weeks at the start
371- earliest_first_monday = min (first_monday_map .values ())
372- df = df [df ["Datetime" ] > earliest_first_monday ]
398+ output = []
373399
374- if df .empty :
375- return df
400+ for year_start , year_end in zip (year_starts , year_ends ):
401+ df = demand_df [
402+ (demand_df ["Datetime" ] > year_start ) & (demand_df ["Datetime" ] <= year_end )
403+ ].copy ()
376404
377- df ["first_monday" ] = df ["year" ].map (first_monday_map )
405+ if month == 1 :
406+ df ["year" ] = year_start .year
407+ else :
408+ df ["year" ] = year_end .year
378409
379- # Calculate week number and week start
380- # For timestamps exactly at Monday 00:00:00, they belong to the end of the previous week
381- # We handle this by calculating days_since_first_monday and adjusting for the boundary
382- days_since_first_monday = (
383- df ["Datetime" ] - df ["first_monday" ]
384- ).dt .total_seconds () / 86400
410+ days_until_next_monday = (7 - df ["Datetime" ].dt .weekday ) % 7
411+ days_until_next_monday = days_until_next_monday .where (
412+ days_until_next_monday != 0 , 7
413+ )
385414
386- # If exactly on a Monday midnight (days is exact multiple of 7), assign to previous week
387- df ["week_number" ] = (days_since_first_monday // 7 ).astype (int )
388- df ["week_number" ] = (
389- df ["week_number" ].where (days_since_first_monday % 7 != 0 , df ["week_number" ] - 1 )
390- + 1
391- )
415+ already_week_end_time = (
416+ (df ["Datetime" ].dt .weekday == 0 )
417+ & (df ["Datetime" ].dt .hour == 0 )
418+ & (df ["Datetime" ].dt .minute == 0 )
419+ )
392420
393- df ["week_start" ] = df ["first_monday" ] + pd .to_timedelta (
394- df ["week_number" ] - 1 , unit = "W"
395- )
421+ df ["week_end_time" ] = np .where (
422+ already_week_end_time ,
423+ df ["Datetime" ],
424+ df ["Datetime" ] + pd .to_timedelta (days_until_next_monday , unit = "days" ),
425+ )
396426
397- # Keep only complete weeks by excluding weeks that end after the end of the last year
398- last_year_end = year_ends [- 1 ]
399- df = df [df ["week_start" ] + timedelta (days = 7 ) <= last_year_end ]
427+ # round back to midnight
428+ df ["week_end_time" ] = df ["week_end_time" ].dt .normalize ()
400429
401- return df
430+ # Filter out partial weeks.
431+ df = df [df ["week_end_time" ] <= year_end ]
432+ df = df [df ["week_end_time" ] - timedelta (days = 7 ) >= year_start ]
402433
434+ output .append (df .copy ())
403435
404- def _calculate_first_mondays (
405- start_year : int ,
406- end_year : int ,
407- year_starts : list [pd .Timestamp ],
408- ) -> dict [int , pd .Timestamp ]:
409- """Calculate the first Monday for each year."""
410- return {
411- y : start + timedelta (days = (7 - start .weekday ()) % 7 )
412- for y , start in zip (range (start_year , end_year ), year_starts )
413- }
436+ return pd .concat (output )
414437
415438
416439def _calculate_week_metrics (demand_df : pd .DataFrame ) -> pd .DataFrame :
@@ -424,7 +447,7 @@ def _calculate_week_metrics(demand_df: pd.DataFrame) -> pd.DataFrame:
424447 if "residual_demand" in demand_df .columns :
425448 agg_dict ["residual_demand" ] = ["max" , "min" , "mean" ]
426449
427- metrics = demand_df .groupby (["year" , "week_start " ]).agg (agg_dict )
450+ metrics = demand_df .groupby (["year" , "week_end_time " ]).agg (agg_dict )
428451 metrics .columns = ["_" .join (col ).strip ("_" ) for col in metrics .columns ]
429452
430453 return metrics .reset_index ()
@@ -458,7 +481,7 @@ def _find_target_weeks(
458481 else :
459482 idx = week_metrics .groupby ("year" )[metric_col ].idxmin ()
460483
461- target_weeks .extend (week_metrics .loc [idx , "week_start " ])
484+ target_weeks .extend (week_metrics .loc [idx , "week_end_time " ])
462485
463486 return target_weeks
464487
@@ -477,7 +500,7 @@ def _extract_snapshots_for_weeks(
477500
478501 mask = pd .concat (
479502 [
480- (snapshot_series > week ) & (snapshot_series <= week + timedelta ( days = 7 ) )
503+ (snapshot_series > week - timedelta ( days = 7 )) & (snapshot_series <= week )
481504 for week in target_weeks
482505 ],
483506 axis = 1 ,
0 commit comments