diff --git a/nemoguardrails/tracing/adapters/filesystem.py b/nemoguardrails/tracing/adapters/filesystem.py index 3e99398b8..98c16bcb4 100644 --- a/nemoguardrails/tracing/adapters/filesystem.py +++ b/nemoguardrails/tracing/adapters/filesystem.py @@ -59,7 +59,7 @@ def transform(self, interaction_log: "InteractionLog"): } with open(self.filepath, "a") as f: - f.write(json.dumps(log_dict, indent=2) + "\n") + f.write(json.dumps(log_dict) + "\n") async def transform_async(self, interaction_log: "InteractionLog"): try: @@ -90,4 +90,4 @@ async def transform_async(self, interaction_log: "InteractionLog"): } async with aiofiles.open(self.filepath, "a") as f: - await f.write(json.dumps(log_dict, indent=2) + "\n") + await f.write(json.dumps(log_dict) + "\n") diff --git a/tests/test_tracing_adapters_filesystem.py b/tests/test_tracing_adapters_filesystem.py index df4a470c9..a69b81f31 100644 --- a/tests/test_tracing_adapters_filesystem.py +++ b/tests/test_tracing_adapters_filesystem.py @@ -19,7 +19,6 @@ import os import tempfile import unittest -from unittest.mock import MagicMock from nemoguardrails.eval.models import Span from nemoguardrails.tracing import InteractionLog @@ -109,3 +108,149 @@ async def run_test(): self.assertEqual(log_dict["spans"][0]["name"], "test_span") asyncio.run(run_test()) + + def test_jsonl_format_single_record(self): + """Test that output is valid JSONL format (single line per JSON object).""" + adapter = FileSystemAdapter(filepath=self.filepath) + + interaction_log = InteractionLog( + id="test_id", + activated_rails=[], + events=[], + trace=[ + Span( + name="test_span", + span_id="span_1", + parent_id=None, + start_time=0.0, + end_time=1.0, + duration=1.0, + metrics={}, + ) + ], + ) + + adapter.transform(interaction_log) + + with open(self.filepath, "r") as f: + lines = f.readlines() + + self.assertEqual(len(lines), 1, "Should have exactly one line") + + line = lines[0].strip() + self.assertNotEqual(line, "", "Line should not be empty") + + log_dict = json.loads(line) + self.assertEqual(log_dict["trace_id"], "test_id") + self.assertEqual(len(log_dict["spans"]), 1) + + self.assertNotIn("\n", line, "JSON object should not contain embedded newlines") + + def test_jsonl_format_multiple_records(self): + """Test that multiple records create valid JSONL with one JSON per line.""" + adapter = FileSystemAdapter(filepath=self.filepath) + + log1 = InteractionLog( + id="trace_1", + activated_rails=[], + events=[], + trace=[ + Span( + name="span_1", + span_id="span_1", + parent_id=None, + start_time=0.0, + end_time=1.0, + duration=1.0, + metrics={"count": 1}, + ) + ], + ) + + log2 = InteractionLog( + id="trace_2", + activated_rails=[], + events=[], + trace=[ + Span( + name="span_2a", + span_id="span_2a", + parent_id=None, + start_time=0.0, + end_time=0.5, + duration=0.5, + metrics={}, + ), + Span( + name="span_2b", + span_id="span_2b", + parent_id="span_2a", + start_time=0.5, + end_time=1.0, + duration=0.5, + metrics={"score": 0.95}, + ), + ], + ) + + adapter.transform(log1) + adapter.transform(log2) + + with open(self.filepath, "r") as f: + lines = f.readlines() + + self.assertEqual(len(lines), 2, "Should have exactly two lines") + + parsed_logs = [] + for i, line in enumerate(lines): + line = line.strip() + self.assertNotEqual(line, "", f"Line {i + 1} should not be empty") + + log_dict = json.loads(line) + parsed_logs.append(log_dict) + + self.assertNotIn( + "\n", line, f"Line {i + 1} should not contain embedded newlines" + ) + + self.assertEqual(parsed_logs[0]["trace_id"], "trace_1") + self.assertEqual(len(parsed_logs[0]["spans"]), 1) + + self.assertEqual(parsed_logs[1]["trace_id"], "trace_2") + self.assertEqual(len(parsed_logs[1]["spans"]), 2) + + def test_jsonl_streaming_compatible(self): + """Test that file can be processed as streaming JSONL.""" + adapter = FileSystemAdapter(filepath=self.filepath) + + for i in range(3): + log = InteractionLog( + id=f"trace_{i}", + activated_rails=[], + events=[], + trace=[ + Span( + name=f"span_{i}", + span_id=f"span_{i}", + parent_id=None, + start_time=float(i), + end_time=float(i + 1), + duration=1.0, + metrics={"index": i}, + ) + ], + ) + adapter.transform(log) + + trace_ids = [] + with open(self.filepath, "r") as f: + for line_num, line in enumerate(f, 1): + if line.strip(): + try: + record = json.loads(line) + trace_ids.append(record["trace_id"]) + except json.JSONDecodeError as e: + self.fail(f"Line {line_num} is not valid JSON: {e}") + + self.assertEqual(trace_ids, ["trace_0", "trace_1", "trace_2"]) + self.assertEqual(len(trace_ids), 3, "Should have processed 3 records")