Skip to content

Commit c35136a

Browse files
authored
Switch default values for additional_* flags (#369)
* Switch default values for `additional_*` flags * Cleaning + update example
1 parent 9dc9719 commit c35136a

File tree

6 files changed

+177
-41
lines changed

6 files changed

+177
-41
lines changed

docs/source/user_guide_kg_builder.rst

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -998,31 +998,40 @@ By default, all extracted elements — including nodes, relationships, and prope
998998
Configuration Options
999999
---------------------
10001000

1001-
- **Required Properties**
1001+
- **Required Properties** (default: ``False``)
10021002
Required properties may be specified at the node or relationship type level. Any extracted node or relationship missing one or more of its required properties will be pruned from the graph.
10031003

1004-
- **Additional Properties** *(default: True)*
1004+
- **Additional Properties**
10051005
This node- or relationship-level option determines whether extra properties not listed in the schema should be retained.
10061006

1007-
- If set to ``True`` (default), all extracted properties are retained.
1007+
- If set to ``True``, all extracted properties are retained.
10081008
- If set to ``False``, only the properties defined in the schema are preserved; all others are removed.
10091009

1010+
.. note:: Default behavior
1011+
1012+
By default, this flag is set to ``False`` if at least one property is defined, ``True`` otherwise.
1013+
1014+
The same rule applies for `additional_node_types`, `additional_relationship_types` and `additional_patterns` described below.
1015+
1016+
.. warning::
1017+
1018+
Defining a node or relationship types with no properties and `additional_properties_allowed=False` will raise a ValidationError.
10101019

10111020
.. note:: Node pruning
10121021

10131022
If, after property pruning using the above rule, a node is left without any property, it is removed from the graph.
10141023

10151024

1016-
- **Additional Node Types** *(default: True)*
1025+
- **Additional Node Types**
10171026
This schema-level option specifies whether node types not defined in the schema are included in the graph.
10181027

1019-
- If set to ``True`` (default), such node types are retained.
1028+
- If set to ``True``, such node types are retained.
10201029
- If set to ``False``, nodes with undefined types are removed.
10211030

1022-
- **Additional Relationship Types** *(default: True)*
1031+
- **Additional Relationship Types**
10231032
This schema-level option specifies whether relationship types not defined in the schema are included in the graph.
10241033

1025-
- If set to ``True`` (default), such relationships are retained.
1034+
- If set to ``True``, such relationships are retained.
10261035
- If set to ``False``, relationships with undefined types are removed.
10271036

10281037
- **Additional Patterns** *(default: True)*

examples/customize/build_graph/components/pruners/graph_pruner.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
Neo4jNode(
4242
id="Organization/Corp1",
4343
label="Organization",
44-
properties={"name": "CorpA"},
44+
properties={"name": "Corp1"},
4545
),
4646
],
4747
relationships=[
@@ -51,7 +51,7 @@
5151
type="KNOWS",
5252
),
5353
Neo4jRelationship(
54-
start_node_id="Organization/CorpA",
54+
start_node_id="Organization/Corp2",
5555
end_node_id="Person/Jack",
5656
type="WORKS_FOR",
5757
),
@@ -80,12 +80,14 @@
8080
PropertyType(name="name", type="STRING", required=True),
8181
PropertyType(name="address", type="STRING"),
8282
],
83+
additional_properties=True,
8384
),
8485
),
8586
relationship_types=(
8687
RelationshipType(
8788
label="WORKS_FOR",
8889
properties=[PropertyType(name="since", type="LOCAL_DATETIME")],
90+
additional_properties=True,
8991
),
9092
RelationshipType(
9193
label="KNOWS",

src/neo4j_graphrag/experimental/components/schema.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import json
1818
import logging
1919
import warnings
20-
from typing import Any, Dict, List, Literal, Optional, Tuple, Union, Sequence
20+
from typing import Any, Dict, List, Literal, Optional, Tuple, Union, Sequence, Callable
2121
from pathlib import Path
2222

2323
from pydantic import (
@@ -27,6 +27,7 @@
2727
validate_call,
2828
ConfigDict,
2929
ValidationError,
30+
Field,
3031
)
3132
from typing_extensions import Self
3233

@@ -74,6 +75,13 @@ class PropertyType(BaseModel):
7475
)
7576

7677

78+
def default_additional_item(key: str) -> Callable[[dict[str, Any]], bool]:
79+
def wrapper(validated_data: dict[str, Any]) -> bool:
80+
return len(validated_data.get(key, [])) == 0
81+
82+
return wrapper
83+
84+
7785
class NodeType(BaseModel):
7886
"""
7987
Represents a possible node in the graph.
@@ -82,7 +90,9 @@ class NodeType(BaseModel):
8290
label: str
8391
description: str = ""
8492
properties: list[PropertyType] = []
85-
additional_properties: bool = True
93+
additional_properties: bool = Field(
94+
default_factory=default_additional_item("properties")
95+
)
8696

8797
@model_validator(mode="before")
8898
@classmethod
@@ -96,7 +106,8 @@ def validate_additional_properties(self) -> Self:
96106
if len(self.properties) == 0 and not self.additional_properties:
97107
raise ValueError(
98108
"Using `additional_properties=False` with no defined "
99-
"properties will cause the model to be pruned during graph cleaning.",
109+
"properties will cause the model to be pruned during graph cleaning. "
110+
f"Define some properties or remove this NodeType: {self}"
100111
)
101112
return self
102113

@@ -109,7 +120,9 @@ class RelationshipType(BaseModel):
109120
label: str
110121
description: str = ""
111122
properties: list[PropertyType] = []
112-
additional_properties: bool = True
123+
additional_properties: bool = Field(
124+
default_factory=default_additional_item("properties")
125+
)
113126

114127
@model_validator(mode="before")
115128
@classmethod
@@ -123,7 +136,8 @@ def validate_additional_properties(self) -> Self:
123136
if len(self.properties) == 0 and not self.additional_properties:
124137
raise ValueError(
125138
"Using `additional_properties=False` with no defined "
126-
"properties will cause the model to be pruned during graph cleaning.",
139+
"properties will cause the model to be pruned during graph cleaning. "
140+
f"Define some properties or remove this RelationshipType: {self}"
127141
)
128142
return self
129143

@@ -145,9 +159,15 @@ class GraphSchema(DataModel):
145159
relationship_types: Tuple[RelationshipType, ...] = tuple()
146160
patterns: Tuple[Tuple[str, str, str], ...] = tuple()
147161

148-
additional_node_types: bool = True
149-
additional_relationship_types: bool = True
150-
additional_patterns: bool = True
162+
additional_node_types: bool = Field(
163+
default_factory=default_additional_item("node_types")
164+
)
165+
additional_relationship_types: bool = Field(
166+
default_factory=default_additional_item("relationship_types")
167+
)
168+
additional_patterns: bool = Field(
169+
default_factory=default_additional_item("patterns")
170+
)
151171

152172
_node_type_index: dict[str, NodeType] = PrivateAttr()
153173
_relationship_type_index: dict[str, RelationshipType] = PrivateAttr()

tests/e2e/experimental/test_graph_pruning_component_e2e.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ async def test_graph_pruning_loose(extracted_graph: Neo4jGraph) -> None:
119119
{"name": "name", "type": "STRING"},
120120
{"name": "height", "type": "INTEGER"},
121121
],
122+
"additional_properties": True,
122123
}
123124
],
124125
"relationship_types": [
@@ -129,6 +130,9 @@ async def test_graph_pruning_loose(extracted_graph: Neo4jGraph) -> None:
129130
"patterns": [
130131
("Person", "KNOWS", "Person"),
131132
],
133+
"additional_node_types": True,
134+
"additional_relationship_types": True,
135+
"additional_patterns": True,
132136
}
133137
await _test(extracted_graph, schema_dict, extracted_graph)
134138

@@ -153,6 +157,7 @@ async def test_graph_pruning_missing_required_property(
153157
},
154158
{"name": "height", "type": "INTEGER"},
155159
],
160+
"additional_properties": True,
156161
}
157162
],
158163
"relationship_types": [
@@ -163,6 +168,9 @@ async def test_graph_pruning_missing_required_property(
163168
"patterns": [
164169
("Person", "KNOWS", "Person"),
165170
],
171+
"additional_node_types": True,
172+
"additional_relationship_types": True,
173+
"additional_patterns": True,
166174
}
167175
filtered_graph = Neo4jGraph(
168176
nodes=[
@@ -253,7 +261,7 @@ async def test_graph_pruning_strict_properties_and_node_types(
253261
},
254262
{"name": "height", "type": "INTEGER"},
255263
],
256-
"additional_properties": False,
264+
# "additional_properties": False, # default value
257265
}
258266
],
259267
"relationship_types": [
@@ -264,7 +272,9 @@ async def test_graph_pruning_strict_properties_and_node_types(
264272
"patterns": [
265273
("Person", "KNOWS", "Person"),
266274
],
267-
"additional_node_types": False,
275+
# "additional_node_types": False, # default value
276+
"additional_relationship_types": True,
277+
"additional_patterns": True,
268278
}
269279
filtered_graph = Neo4jGraph(
270280
nodes=[
@@ -354,6 +364,7 @@ async def test_graph_pruning_strict_patterns(extracted_graph: Neo4jGraph) -> Non
354364
},
355365
{"name": "height", "type": "INTEGER"},
356366
],
367+
"additional_properties": True,
357368
},
358369
{
359370
"label": "Organization",
@@ -371,6 +382,7 @@ async def test_graph_pruning_strict_patterns(extracted_graph: Neo4jGraph) -> Non
371382
("Person", "KNOWS", "Person"),
372383
("Person", "KNOWS", "Organization"),
373384
),
385+
"additional_node_types": True,
374386
"additional_relationship_types": False,
375387
"additional_patterns": False,
376388
}

tests/e2e/experimental/test_simplekgpipeline_e2e.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,13 @@ async def test_pipeline_builder_happy_path_legacy_schema(
9696
]
9797

9898
# Instantiate Entity and Relation objects
99-
entities = ["PERSON", "ORGANIZATION", "HORCRUX", "LOCATION"]
99+
entities = ["Person", "Organization", "Horcrux", "Location"]
100100
relations = ["SITUATED_AT", "INTERACTS", "OWNS", "LED_BY"]
101101
potential_schema = [
102-
("PERSON", "SITUATED_AT", "LOCATION"),
103-
("PERSON", "INTERACTS", "PERSON"),
104-
("PERSON", "OWNS", "HORCRUX"),
105-
("ORGANIZATION", "LED_BY", "PERSON"),
102+
("Person", "SITUATED_AT", "Location"),
103+
("Person", "INTERACTS", "Person"),
104+
("Person", "OWNS", "Horcrux"),
105+
("Organization", "LED_BY", "Person"),
106106
]
107107

108108
# Additional arguments

0 commit comments

Comments
 (0)