Skip to content

Commit 0aa8b92

Browse files
Merge pull request #58 from MITLibraries/TIMX-453-read-transformed-records-from-dataset
Add read method for yielding transformed records to TIMDEXDataset
2 parents 47e74c3 + 1828ae4 commit 0aa8b92

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

tests/test_dataset_read.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,10 @@ def test_read_dicts_filter_to_none_stopiteration_immediately(fixed_local_dataset
8989
batches = fixed_local_dataset.read_dicts_iter(source="not-gonna-find-me")
9090
with pytest.raises(StopIteration):
9191
next(batches)
92+
93+
94+
def test_read_transformed_records_yields_parsed_dictionary(fixed_local_dataset):
95+
batches = fixed_local_dataset.read_transformed_records_iter()
96+
transformed_record = next(batches)
97+
assert isinstance(transformed_record, dict)
98+
assert transformed_record == {"title": ["Hello World."]}

timdex_dataset_api/dataset.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""timdex_dataset_api/dataset.py"""
22

33
import itertools
4+
import json
45
import operator
56
import time
67
import uuid
@@ -468,3 +469,19 @@ def read_dicts_iter(
468469
columns=columns, batch_size=batch_size, **filters
469470
):
470471
yield from record_batch.to_pylist()
472+
473+
def read_transformed_records_iter(
474+
self,
475+
batch_size: int = DEFAULT_BATCH_SIZE,
476+
**filters: Unpack[DatasetFilters],
477+
) -> Iterator[dict]:
478+
"""Yield individual transformed records as dictionaries from the dataset.
479+
480+
If 'transformed_record' is None (i.e., action="skip"|"error"), the yield
481+
statement will not be executed for the row.
482+
"""
483+
for record_dict in self.read_dicts_iter(
484+
columns=["transformed_record"], batch_size=batch_size, **filters
485+
):
486+
if transformed_record := record_dict["transformed_record"]:
487+
yield json.loads(transformed_record)

0 commit comments

Comments
 (0)