File tree Expand file tree Collapse file tree 2 files changed +24
-0
lines changed
Expand file tree Collapse file tree 2 files changed +24
-0
lines changed Original file line number Diff line number Diff line change @@ -89,3 +89,10 @@ def test_read_dicts_filter_to_none_stopiteration_immediately(fixed_local_dataset
8989 batches = fixed_local_dataset .read_dicts_iter (source = "not-gonna-find-me" )
9090 with pytest .raises (StopIteration ):
9191 next (batches )
92+
93+
94+ def test_read_transformed_records_yields_parsed_dictionary (fixed_local_dataset ):
95+ batches = fixed_local_dataset .read_transformed_records_iter ()
96+ transformed_record = next (batches )
97+ assert isinstance (transformed_record , dict )
98+ assert transformed_record == {"title" : ["Hello World." ]}
Original file line number Diff line number Diff line change 11"""timdex_dataset_api/dataset.py"""
22
33import itertools
4+ import json
45import operator
56import time
67import uuid
@@ -468,3 +469,19 @@ def read_dicts_iter(
468469 columns = columns , batch_size = batch_size , ** filters
469470 ):
470471 yield from record_batch .to_pylist ()
472+
473+ def read_transformed_records_iter (
474+ self ,
475+ batch_size : int = DEFAULT_BATCH_SIZE ,
476+ ** filters : Unpack [DatasetFilters ],
477+ ) -> Iterator [dict ]:
478+ """Yield individual transformed records as dictionaries from the dataset.
479+
480+ If 'transformed_record' is None (i.e., action="skip"|"error"), the yield
481+ statement will not be executed for the row.
482+ """
483+ for record_dict in self .read_dicts_iter (
484+ columns = ["transformed_record" ], batch_size = batch_size , ** filters
485+ ):
486+ if transformed_record := record_dict ["transformed_record" ]:
487+ yield json .loads (transformed_record )
You can’t perform that action at this time.
0 commit comments