OpenMined · FilipBolt · Apr 16, 2026
diff --git a/README.md b/README.md
@@ -127,6 +127,23 @@ runs, but application code should prefer importing `gather(...)` directly.
 ```python
 import syft_ingest
 
+report = syft_ingest.ingest_jsonl(
+    "./output/creator_social_posts.jsonl",
+    destination=syft_ingest.QdrantDestination(
+        collection_name="katy-stevens",
+        url="http://127.0.0.1:6333",
+    ),
+    embedding=syft_ingest.EmbeddingSpec(
+        backend="fastembed",
+        model="BAAI/bge-small-en-v1.5",
+    ),
+)
+```
+
+By default, `ingest_jsonl(...)` does not chunk documents. Each input record is
+ingested as a single vector point unless you explicitly opt into chunking:
+
+```python
 report = syft_ingest.ingest_jsonl(
     "./output/creator_social_posts.jsonl",
     destination=syft_ingest.QdrantDestination(

diff --git a/syft_ingest/core/ingest.py b/syft_ingest/core/ingest.py
@@ -381,16 +381,19 @@ def _enrich_text(doc: _Doc) -> str:
     return f"{header}{desc_line}\n\n{doc.text}"
 
 
-def to_rag(docs: list[_Doc], chunking: ChunkingSpec) -> list[_Doc]:
+def to_rag(docs: list[_Doc], chunking: ChunkingSpec | None) -> list[_Doc]:
     """Chunk documents for RAG ingestion.
 
     This is the single place where chunking happens, per architecture rule:
     'to_rag() chunks. export() does NOT chunk.'
+
+    When ``chunking`` is ``None``, chunking is disabled and each input document
+    is emitted as a single RAG document.
     """
     chunked: list[_Doc] = []
     for doc in docs:
         enriched = _enrich_text(doc)
-        chunks = _chunk_text(enriched, chunking)
+        chunks = [enriched] if chunking is None else _chunk_text(enriched, chunking)
         raw_hash = hashlib.sha256(doc.text.encode("utf-8")).hexdigest()
         source_id = doc.payload.get("url") or doc.payload.get("title") or "doc"
         stable_doc_key = f"{source_id}::{doc.payload.get('source', '')}::{raw_hash}"
@@ -554,7 +557,7 @@ def _ingest_docs(
     *,
     destination: QdrantDestination,
     embedding: EmbeddingSpec,
-    chunking: ChunkingSpec,
+    chunking: ChunkingSpec | None,
 ) -> IngestReport:
     if not docs:
         raise NoDocumentsError("No documents available for ingestion")
@@ -633,7 +636,7 @@ def ingest_jsonl(
         _iter_docs_from_jsonl(manifest_jsonl),
         destination=destination,
         embedding=embedding or EmbeddingSpec(),
-        chunking=chunking or ChunkingSpec(),
+        chunking=chunking,
     )
 
 
@@ -648,5 +651,5 @@ def ingest_corpus(
         _iter_docs_from_corpus(corpus),
         destination=destination,
         embedding=embedding or EmbeddingSpec(),
-        chunking=chunking or ChunkingSpec(),
+        chunking=chunking,
     )
diff --git a/tests/test_ingest_api.py b/tests/test_ingest_api.py
@@ -153,6 +153,35 @@ def test_ingest_jsonl_upserts_points(tmp_path, patch_ingest_runtime):
     assert report.embedding_contract["embedding_backend"] == "fastembed"
 
 
+def test_ingest_jsonl_defaults_to_no_chunking(tmp_path, patch_ingest_runtime):
+    manifest = tmp_path / "manifest.jsonl"
+    manifest.write_text(
+        json.dumps(
+            {
+                "title": "Long post",
+                "author": "Katy Stevens",
+                "url": "https://www.instagram.com/p/long123/",
+                "text": "word " * 600,
+                "site": "instagram.com",
+                "source_type": "social_media_post",
+                "metadata": {"platform": "instagram", "extractor": "brightdata"},
+            }
+        )
+        + "\n",
+        encoding="utf-8",
+    )
+
+    report = ingest_jsonl(
+        manifest,
+        destination=QdrantDestination(collection_name="katy-stevens-default"),
+        embedding=EmbeddingSpec(),
+    )
+
+    assert report.documents_total == 1
+    assert report.chunks_total == 1
+    assert len(report.point_ids) == 1
+
+
 def test_ingest_corpus_supports_source_spec_metadata(tmp_path, patch_ingest_runtime):
     brightdata_dir = tmp_path / "brightdata-ig"
     brightdata_dir.mkdir(parents=True, exist_ok=True)

diff --git a/tests/test_ingest_internals.py b/tests/test_ingest_internals.py
@@ -217,6 +217,22 @@ def test_chunk_text_empty_returns_empty():
     assert _chunk_text("", spec) == []
 
 
+def test_to_rag_without_chunking_emits_single_doc():
+    doc = _Doc(
+        text="Hello world, this is a test document with enough text.",
+        payload={"url": "https://example.com/page1", "source": "test"},
+    )
+
+    result = to_rag([doc], None)
+
+    assert len(result) == 1
+    payload = result[0].payload
+    assert payload["chunk_index"] == 0
+    assert payload["chunk_count"] == 1
+    assert payload["raw_text"] == doc.text
+    assert result[0].text
+
+
 # ---------------------------------------------------------------------------
 # 7. test_gather_unsupported_source_spec_kind
 # ---------------------------------------------------------------------------