Skip to content

Commit 44f14dc

Browse files
committed
Save document_type
1 parent 4f9f315 commit 44f14dc

File tree

5 files changed

+10
-1
lines changed

5 files changed

+10
-1
lines changed

src/neo4j_graphrag/experimental/components/lexical_graph.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ def create_document_node(self, document_info: DocumentInfo) -> Neo4jNode:
113113
properties={
114114
"path": document_info.path,
115115
"createdAt": datetime.datetime.now(datetime.timezone.utc).isoformat(),
116+
"document_type": document_info.document_type,
116117
**document_metadata,
117118
},
118119
)

src/neo4j_graphrag/experimental/components/pdf_loader.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,5 +89,6 @@ async def run(
8989
document_info=DocumentInfo(
9090
path=filepath,
9191
metadata=self.get_document_metadata(text, metadata),
92+
document_type="pdf",
9293
),
9394
)

src/neo4j_graphrag/experimental/components/types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class DocumentInfo(DataModel):
3838
path: str
3939
metadata: Optional[Dict[str, str]] = None
4040
uid: str = Field(default_factory=lambda: str(uuid.uuid4()))
41+
document_type: Optional[document_type] = None
4142

4243
@property
4344
def document_id(self) -> str:

src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,5 +370,6 @@ def get_run_params(self, user_input: dict[str, Any]) -> dict[str, Any]:
370370
)
371371
or "document.txt",
372372
metadata=user_input.get("document_metadata"),
373+
document_type="inline_text",
373374
)
374375
return run_params

tests/unit/experimental/components/test_lexical_graph_builder.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,11 @@ async def test_lexical_graph_builder_run_with_document() -> None:
7878
TextChunk(text="text chunk 1", index=1),
7979
]
8080
),
81-
document_info=DocumentInfo(path="test_lexical_graph", uid=doc_uid),
81+
document_info=DocumentInfo(
82+
path="test_lexical_graph",
83+
uid=doc_uid,
84+
document_type="my_type",
85+
),
8286
)
8387
assert isinstance(result, GraphResult)
8488
graph = result.graph
@@ -89,6 +93,7 @@ async def test_lexical_graph_builder_run_with_document() -> None:
8993
assert document.label == DEFAULT_DOCUMENT_NODE_LABEL
9094
assert document.properties["path"] == "test_lexical_graph"
9195
assert document.properties["createdAt"] is not None
96+
assert document.properties["document_type"] == "my_type"
9297
chunk1 = nodes[1]
9398
assert chunk1.label == DEFAULT_CHUNK_NODE_LABEL
9499
chunk2 = nodes[2]

0 commit comments

Comments
 (0)