Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
b924f6d
feat(tier0): subclassing.
everaldorodrigo Feb 25, 2026
7a688d3
feat(tier0): create feature flags.
everaldorodrigo Feb 26, 2026
71cf578
feat(tier0): create subclass tests and fix corner cases.
everaldorodrigo Feb 26, 2026
006a820
fix(tier0): Lint
everaldorodrigo Feb 26, 2026
8fdd965
fix(tier0): Lint
everaldorodrigo Feb 26, 2026
807c2c6
feat(tier0): enable symmetric and subclass expansion by default.
everaldorodrigo Feb 26, 2026
74a691a
feat(tier0): update live tests.
everaldorodrigo Feb 27, 2026
79b6401
fix(tier0): Lint
everaldorodrigo Feb 27, 2026
26b17db
fix(tier0): Simplify config retrieval in symmetric/sublclass query ex…
everaldorodrigo Feb 27, 2026
9b41232
feat(tier0): add live tests for subclass.
everaldorodrigo Feb 28, 2026
0699804
fix(tier0): intermediate subquery.
everaldorodrigo Mar 2, 2026
4132def
fix(tier0): intermediate subquery.
everaldorodrigo Mar 2, 2026
24935d7
feat(tier0): update intermediate node name.
everaldorodrigo Mar 10, 2026
0a8e0e0
feat(tier0): remove unused code.
everaldorodrigo Mar 10, 2026
8fe388d
feat(tier0): refactor intermediate node name.
everaldorodrigo Mar 10, 2026
b3de85d
proper subclass handling in _build_results()
tokebe Mar 10, 2026
5fb0de1
feat(tier0): add/refactor is_subclass_of_expansion flag to edges/nodes.
everaldorodrigo Mar 10, 2026
a915951
fix(tier0): fix is subclass expansion check.
everaldorodrigo Mar 10, 2026
4b78a7c
fix(tier0): fix my wrong rebase.
everaldorodrigo Mar 10, 2026
3e2351d
fix(tier0): fix is subclass expansion check.
everaldorodrigo Mar 11, 2026
d871ea0
fix(tier0): add subclass for multi hop queries.
everaldorodrigo Mar 11, 2026
86e8324
feat(tier0): Implement query plan for cascade OR.
everaldorodrigo Mar 23, 2026
a2380af
feat(tier0): Add section comments.
everaldorodrigo Mar 23, 2026
6465d52
_build_results() fixes
tokebe Mar 24, 2026
65d420f
fix(tier0): Lint
everaldorodrigo Mar 23, 2026
af10a46
fix(tier0): Lint
everaldorodrigo Mar 23, 2026
68daa04
feat(tier0): Refactor cascade OR function.
everaldorodrigo Mar 25, 2026
fa0c232
fix(tier0): Fix subclass case 3 form b.
everaldorodrigo Mar 26, 2026
d5736e4
fix(tier0): Fix test for subclass case 3 form b.
everaldorodrigo Mar 26, 2026
bc67355
Move subclass solving to subclass module
tokebe Mar 27, 2026
b2fd75f
solve subclass edges
tokebe Mar 27, 2026
3e37641
fix(tier0): Update subclass driver tests and set version vM.
everaldorodrigo Mar 27, 2026
9418fe2
fix import cycle
tokebe Mar 27, 2026
41d33f7
Merge branch 'tier0-feat-subclassing' of github.com:BioPack-team/retr…
tokebe Mar 27, 2026
8cd551e
fix: add end node to kg
tokebe Mar 27, 2026
8e4bc09
fix(tier0): add noqa PLR0913. Compressing args would add abstraction …
everaldorodrigo Mar 27, 2026
f2fed53
Don't do subclassing on edges that can expand to subclass_of
tokebe Mar 30, 2026
84ecbcd
fix test
tokebe Mar 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/retriever/config/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,18 @@ class DgraphSettings(BaseModel):
description="gRPC max receive message length in bytes (-1 for unlimited).",
),
] = -1
enable_symmetric_edges: Annotated[
bool,
Field(
description="Enable symmetric edge expansion.",
),
] = True
enable_subclass_edges: Annotated[
bool,
Field(
description="Enable subclass edge expansion.",
),
] = True

@property
def http_endpoint(self) -> str:
Expand Down
85 changes: 76 additions & 9 deletions src/retriever/data_tiers/tier_0/dgraph/result_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,16 @@
# It captures the part after the optional prefix and "node_"
NODE_KEY_PATTERN = re.compile(r"(?:q\d+_)?node_(\w+)")

# Detect all intermediate node key patterns generated by the transpiler:
# "intermediate_{n}" -> Forms B, C, ObjB
# "intermediateA_{n}" -> Form D, source side
# "intermediateB_{n}" -> Form D, target side
SUBCLASS_EXPANSION_INTERMEDIATE_PREFIXES = (
"intermediate_",
"intermediateA_",
"intermediateB_",
)


def _strip_prefix(d: Mapping[str, Any], prefix: str | None) -> Mapping[str, Any]:
if not prefix:
Expand Down Expand Up @@ -85,6 +95,7 @@ class Edge:
"""Represents a directed edge with its properties and a target node."""

binding: str
raw_alias: str
direction: Literal["in"] | Literal["out"]
predicate: str
node: Node
Expand All @@ -93,26 +104,31 @@ class Edge:
id: str | None = None
qualifiers: dict[str, str]
attributes: dict[str, Any]
is_subclass_of_expansion: bool = False

@classmethod
def from_dict( # noqa: PLR0913
cls,
edge_dict: Mapping[str, Any],
binding: str,
raw_alias: str,
direction: str,
prefix: str | None = None,
edge_id_map: Mapping[str, str] | None = None,
node_id_map: Mapping[str, str] | None = None,
is_subclass_of_expansion: bool = False,
) -> Self:
"""Parse an edge mapping into an Edge dataclass (handles versioned keys).

Args:
edge_dict: Raw edge data from Dgraph response
binding: Edge binding (already converted to original ID by Node.from_dict)
raw_alias: The original query alias from Dgraph (e.g. 'q0_in_edges_e0')
direction: Edge direction ('in' or 'out')
prefix: Schema version prefix (e.g., 'vC_'), or None for no prefix
edge_id_map: Optional mapping from normalized edge IDs to original IDs
node_id_map: Optional mapping from normalized node IDs to original IDs
is_subclass_of_expansion: Whether the edge is a subclass_of expansion

Returns:
Parsed Edge instance with connected node
Expand All @@ -123,17 +139,42 @@ def from_dict( # noqa: PLR0913
(v for k, v in norm.items() if k.startswith("node_")),
cast(Mapping[str, Any], {}),
)
normalized_node_binding = next(
(k.split("_", 1)[1] for k in norm if k.startswith("node_")), ""
)

# Convert node binding back to original ID
node_binding = (
node_id_map.get(normalized_node_binding, normalized_node_binding)
if node_id_map
else normalized_node_binding
# Extract the node binding from keys like:
# "node_n1" -> binding = "n1" (regular node)
# "node_intermediate_n1" -> binding = "intermediate_n1" (intermediate, forward)
# "node_intermediate_n0" -> binding = "intermediate_n0" (intermediate, backward)
# "node_intermediate_A" -> binding = "intermediate_A" (form D intermediate)
raw_node_key = next((k for k in norm if k.startswith("node_")), "")
# Strip the leading "node_" prefix to get the binding portion
raw_node_binding = raw_node_key[len("node_") :] if raw_node_key else ""

node_is_subclass_of_expansion = raw_node_binding.startswith(
SUBCLASS_EXPANSION_INTERMEDIATE_PREFIXES
)

# Strip the "intermediate_" prefix so the binding is just the node alias (e.g. "n1")
# For regular nodes, convert using node_id_map as usual.
if node_is_subclass_of_expansion:
# Strip whichever prefix matched to recover the clean node alias (e.g. "n0", "n1")
matched_prefix = next(
p
for p in SUBCLASS_EXPANSION_INTERMEDIATE_PREFIXES
if raw_node_binding.startswith(p)
)
clean_binding = raw_node_binding[len(matched_prefix) :]
node_binding = (
node_id_map.get(clean_binding, clean_binding)
if node_id_map
else clean_binding
)
else:
node_binding = (
node_id_map.get(raw_node_binding, raw_node_binding)
if node_id_map
else raw_node_binding
)

# --- Parse sources ---
sources_val = norm.get("sources")
parsed_sources: list[Source] = []
Expand Down Expand Up @@ -168,19 +209,23 @@ def from_dict( # noqa: PLR0913

return cls(
binding=binding,
raw_alias=raw_alias,
direction="in" if direction == "in" else "out",
predicate=str(norm.get("predicate", "")),
node=Node.from_dict(
node_val,
binding=node_binding,
raw_alias=raw_node_key,
prefix=prefix,
edge_id_map=edge_id_map,
node_id_map=node_id_map,
is_subclass_of_expansion=node_is_subclass_of_expansion,
),
sources=parsed_sources,
id=str(norm["eid"]) if "eid" in norm else None,
attributes=attributes,
qualifiers=qualifiers,
is_subclass_of_expansion=is_subclass_of_expansion,
)


Expand All @@ -189,30 +234,36 @@ class Node:
"""Represents a node in the graph, with its properties and connected edges."""

binding: str
raw_alias: str
id: str
name: str
edges: list[Edge] = field(default_factory=list)
category: list[str] = field(default_factory=list)
attributes: dict[str, Any]
is_subclass_of_expansion: bool = False

@classmethod
def from_dict(
def from_dict( # noqa: PLR0913
cls,
data: Mapping[str, Any],
*,
binding: str = "",
raw_alias: str = "",
prefix: str | None = None,
edge_id_map: Mapping[str, str] | None = None,
node_id_map: Mapping[str, str] | None = None,
is_subclass_of_expansion: bool = False,
) -> Self:
"""Parse a node mapping into a Node dataclass (handles versioned keys).

Args:
data: Raw node data from Dgraph response
binding: Node binding (normalized ID like 'n0')
raw_alias: The original query alias from Dgraph (e.g. 'q0_node_n0')
prefix: Schema version prefix (e.g., 'vC_'), or None for no prefix
edge_id_map: Optional mapping from normalized edge IDs to original IDs
node_id_map: Optional mapping from normalized node IDs to original IDs
is_subclass_of_expansion: Whether the node is an intermediate node

Returns:
Parsed Node instance with edges having original bindings
Expand Down Expand Up @@ -246,15 +297,21 @@ def from_dict(
# Fallback for unexpected format
edge_binding = binding

# Edge is a transpiler-generated subclass hop if the alias contains "-subclass"
# e.g. "in_edges-subclassB_e0", "in_edges-subclassD_e0", "in_edges-subclassObjB_e0"
edge_is_subclass_expansion = "-subclass" in key

if isinstance(value, list):
edges.extend(
Edge.from_dict(
e,
binding=edge_binding,
direction="in",
raw_alias=key,
prefix=prefix,
edge_id_map=edge_id_map,
node_id_map=node_id_map,
is_subclass_of_expansion=edge_is_subclass_expansion,
)
for e in filter(_is_mapping, cast(list[Any], value))
)
Expand All @@ -278,15 +335,22 @@ def from_dict(
# Fallback for unexpected format
edge_binding = binding

# Edge is a transpiler-generated subclass hop if the alias contains "-subclass"
# e.g. "out_edges-subclassC_e0", "out_edges-subclassC-tail_e0",
# "out_edges-subclassB-mid_e0", "out_edges-subclassD-tail_e0"
edge_is_subclass_expansion = "-subclass" in key

if isinstance(value, list):
edges.extend(
Edge.from_dict(
e,
binding=edge_binding,
raw_alias=key,
direction="out",
prefix=prefix,
edge_id_map=edge_id_map,
node_id_map=node_id_map,
is_subclass_of_expansion=edge_is_subclass_expansion,
)
for e in filter(_is_mapping, cast(list[Any], value))
)
Expand All @@ -297,11 +361,13 @@ def from_dict(

return cls(
binding=binding,
raw_alias=raw_alias,
id=str(norm.get("id", "")),
name=str(norm.get("name", "")),
edges=edges,
category=_to_str_list(norm.get("category")),
attributes=attributes,
is_subclass_of_expansion=is_subclass_of_expansion,
)


Expand Down Expand Up @@ -370,6 +436,7 @@ def parse(
Node.from_dict(
node_data,
binding=node_binding,
raw_alias=query_alias,
prefix=prefix,
edge_id_map=edge_id_map,
node_id_map=node_id_map,
Expand Down
Loading
Loading