88import pytest
99from pyarrow import fs
1010
11+ from tests .utils import generate_sample_records
1112from timdex_dataset_api .dataset import (
1213 DatasetNotLoadedError ,
1314 TIMDEXDataset ,
@@ -466,13 +467,31 @@ def test_dataset_current_records_index_filtering_accurate_records_yielded(
466467
467468
468469@pytest .mark .freeze_time ("2025-05-22 01:23:45.567890" )
469- def test_dataset_write_includes_minted_run_timestamp (
470- dataset_with_same_day_runs ,
471- ):
470+ def test_dataset_write_includes_minted_run_timestamp (tmp_path ):
471+ # create dataset
472+ location = str (tmp_path / "one_run_at_frozen_time" )
473+ os .mkdir (location )
474+ timdex_dataset = TIMDEXDataset (location )
475+
476+ run_id = "abc123"
477+
478+ # perform a single ETL run that should pickup the frozen time for run_timestamp
479+ records = generate_sample_records (
480+ 10 ,
481+ timdex_record_id_prefix = "alma" ,
482+ source = "alma" ,
483+ run_date = "2025-05-22" ,
484+ run_type = "full" ,
485+ action = "index" ,
486+ run_id = run_id ,
487+ )
488+ timdex_dataset .write (records )
489+ timdex_dataset .load ()
490+
472491 # assert TIMDEXDataset.write() applies current time as run_timestamp
473- row_dict = next (dataset_with_same_day_runs .read_dicts_iter ())
474- assert "run_timestamp" in row_dict
475- assert row_dict ["run_timestamp" ] == datetime (
492+ run_row_dict = next (timdex_dataset .read_dicts_iter ())
493+ assert "run_timestamp" in run_row_dict
494+ assert run_row_dict ["run_timestamp" ] == datetime (
476495 2025 ,
477496 5 ,
478497 22 ,
@@ -483,8 +502,8 @@ def test_dataset_write_includes_minted_run_timestamp(
483502 tzinfo = UTC ,
484503 )
485504
486- # assert same time is used for entire batch
487- df = dataset_with_same_day_runs .read_dataframe ()
505+ # assert the same run_timestamp is applied to all rows in the run
506+ df = timdex_dataset .read_dataframe (run_id = run_id )
488507 assert len (list (df .run_timestamp .unique ())) == 1
489508
490509
0 commit comments