Skip to content

Commit fe19482

Browse files
prasmussen15claude
andauthored
fix(summary): exclude duplicate edges from node summary generation (#1223)
* fix(summary): exclude duplicate edges from node summary generation When resolving extracted edges, edges that match existing edges in the graph were still being passed to node summary generation, causing facts to be duplicated in summaries. Changes: - Update resolve_extracted_edges to return new_edges (non-duplicates) - Update _extract_and_resolve_edges to pass through new_edges - Pass only new_edges to extract_attributes_from_nodes in add_episode - An edge is considered "new" if its resolved UUID matches extracted UUID Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * chore: bump version to 0.27.1 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 6e90bc6 commit fe19482

File tree

4 files changed

+45
-13
lines changed

4 files changed

+45
-13
lines changed

graphiti_core/graphiti.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -440,8 +440,17 @@ async def _extract_and_resolve_edges(
440440
nodes: list[EntityNode],
441441
uuid_map: dict[str, str],
442442
custom_extraction_instructions: str | None = None,
443-
) -> tuple[list[EntityEdge], list[EntityEdge]]:
444-
"""Extract edges from episode and resolve against existing graph."""
443+
) -> tuple[list[EntityEdge], list[EntityEdge], list[EntityEdge]]:
444+
"""Extract edges from episode and resolve against existing graph.
445+
446+
Returns
447+
-------
448+
tuple[list[EntityEdge], list[EntityEdge], list[EntityEdge]]
449+
A tuple of (resolved_edges, invalidated_edges, new_edges) where:
450+
- resolved_edges: All edges after resolution
451+
- invalidated_edges: Edges invalidated by new information
452+
- new_edges: Only edges that are new to the graph (not duplicates)
453+
"""
445454
extracted_edges = await extract_edges(
446455
self.clients,
447456
episode,
@@ -455,7 +464,7 @@ async def _extract_and_resolve_edges(
455464

456465
edges = resolve_edge_pointers(extracted_edges, uuid_map)
457466

458-
resolved_edges, invalidated_edges = await resolve_extracted_edges(
467+
resolved_edges, invalidated_edges, new_edges = await resolve_extracted_edges(
459468
self.clients,
460469
edges,
461470
episode,
@@ -464,7 +473,7 @@ async def _extract_and_resolve_edges(
464473
edge_type_map,
465474
)
466475

467-
return resolved_edges, invalidated_edges
476+
return resolved_edges, invalidated_edges, new_edges
468477

469478
async def _process_episode_data(
470479
self,
@@ -700,6 +709,8 @@ async def _resolve_nodes_and_edges_bulk(
700709
for result in edge_results:
701710
resolved_edges.extend(result[0])
702711
invalidated_edges.extend(result[1])
712+
# result[2] is new_edges - not used in bulk flow since attributes
713+
# are extracted before edge resolution
703714

704715
return final_hydrated_nodes, resolved_edges, invalidated_edges, uuid_map
705716

@@ -917,7 +928,7 @@ async def add_episode_endpoint(episode_data: EpisodeData):
917928
)
918929

919930
# Extract and resolve edges in parallel with attribute extraction
920-
resolved_edges, invalidated_edges = await self._extract_and_resolve_edges(
931+
resolved_edges, invalidated_edges, new_edges = await self._extract_and_resolve_edges(
921932
episode,
922933
extracted_nodes,
923934
previous_episodes,
@@ -931,14 +942,15 @@ async def add_episode_endpoint(episode_data: EpisodeData):
931942

932943
entity_edges = resolved_edges + invalidated_edges
933944

934-
# Extract node attributes
945+
# Extract node attributes - only pass new edges for summary generation
946+
# to avoid duplicating facts that already exist in the graph
935947
hydrated_nodes = await extract_attributes_from_nodes(
936948
self.clients,
937949
nodes,
938950
episode,
939951
previous_episodes,
940952
entity_types,
941-
edges=entity_edges,
953+
edges=new_edges,
942954
)
943955

944956
# Process and save episode data (including saga association if provided)

graphiti_core/utils/maintenance/edge_operations.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,17 @@ async def resolve_extracted_edges(
306306
entities: list[EntityNode],
307307
edge_types: dict[str, type[BaseModel]],
308308
edge_type_map: dict[tuple[str, str], list[str]],
309-
) -> tuple[list[EntityEdge], list[EntityEdge]]:
309+
) -> tuple[list[EntityEdge], list[EntityEdge], list[EntityEdge]]:
310+
"""Resolve extracted edges against existing graph context.
311+
312+
Returns
313+
-------
314+
tuple[list[EntityEdge], list[EntityEdge], list[EntityEdge]]
315+
A tuple of (resolved_edges, invalidated_edges, new_edges) where:
316+
- resolved_edges: All edges after resolution (may include existing edges if duplicates found)
317+
- invalidated_edges: Edges that were invalidated/contradicted by new information
318+
- new_edges: Only edges that are new to the graph (not duplicates of existing edges)
319+
"""
310320
# Fast path: deduplicate exact matches within the extracted edges before parallel processing
311321
seen: dict[tuple[str, str, str], EntityEdge] = {}
312322
deduplicated_edges: list[EntityEdge] = []
@@ -444,21 +454,29 @@ async def resolve_extracted_edges(
444454

445455
resolved_edges: list[EntityEdge] = []
446456
invalidated_edges: list[EntityEdge] = []
447-
for result in results:
457+
new_edges: list[EntityEdge] = []
458+
for extracted_edge, result in zip(extracted_edges, results, strict=True):
448459
resolved_edge = result[0]
449460
invalidated_edge_chunk = result[1]
461+
# result[2] is duplicate_edges list
450462

451463
resolved_edges.append(resolved_edge)
452464
invalidated_edges.extend(invalidated_edge_chunk)
453465

466+
# Track edges that are new (not duplicates of existing edges)
467+
# An edge is new if the resolved edge UUID matches the extracted edge UUID
468+
if resolved_edge.uuid == extracted_edge.uuid:
469+
new_edges.append(resolved_edge)
470+
454471
logger.debug(f'Resolved edges: {[(e.name, e.uuid) for e in resolved_edges]}')
472+
logger.debug(f'New edges (non-duplicates): {[(e.name, e.uuid) for e in new_edges]}')
455473

456474
await semaphore_gather(
457475
create_entity_edge_embeddings(embedder, resolved_edges),
458476
create_entity_edge_embeddings(embedder, invalidated_edges),
459477
)
460478

461-
return resolved_edges, invalidated_edges
479+
return resolved_edges, invalidated_edges, new_edges
462480

463481

464482
def resolve_edge_contradictions(

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[project]
22
name = "graphiti-core"
33
description = "A temporal graph building library"
4-
version = "0.27.0"
4+
version = "0.27.1"
55
authors = [
66
{ name = "Paul Paliychuk", email = "paul@getzep.com" },
77
{ name = "Preston Rasmussen", email = "preston@getzep.com" },

tests/utils/maintenance/test_edge_operations.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ async def immediate_gather(*aws, max_coroutines=None):
221221
edge_types = {'OCCURRED_AT': OccurredAtEdge}
222222
edge_type_map = {('Event', 'Entity'): ['OCCURRED_AT']}
223223

224-
resolved_edges, invalidated_edges = await resolve_extracted_edges(
224+
resolved_edges, invalidated_edges, new_edges = await resolve_extracted_edges(
225225
clients,
226226
[extracted_edge],
227227
episode,
@@ -232,6 +232,7 @@ async def immediate_gather(*aws, max_coroutines=None):
232232

233233
assert resolved_edges[0].name == 'INTERACTED_WITH'
234234
assert invalidated_edges == []
235+
assert new_edges == resolved_edges # No duplicates, so all edges are new
235236

236237

237238
@pytest.mark.asyncio
@@ -431,7 +432,7 @@ async def immediate_gather(*aws, max_coroutines=None):
431432
valid_at=datetime.now(timezone.utc),
432433
)
433434

434-
resolved_edges, invalidated_edges = await resolve_extracted_edges(
435+
resolved_edges, invalidated_edges, new_edges = await resolve_extracted_edges(
435436
clients,
436437
[edge1, edge2, edge3],
437438
episode,
@@ -445,6 +446,7 @@ async def immediate_gather(*aws, max_coroutines=None):
445446
assert resolve_call_count == 1
446447
assert len(resolved_edges) == 1
447448
assert invalidated_edges == []
449+
assert new_edges == resolved_edges # All edges are new (no graph duplicates)
448450

449451

450452
class InterpersonalRelationship(BaseModel):

0 commit comments

Comments
 (0)