Skip to content

Commit ed0c75a

Browse files
feat: allow user-defined node- and relationship- properties in MultiHopSpecificQuerySynthesizer
- Users should be able to set the relationship type to use for identifying clusters and the relationship property used for identifying overlapping concepts within the triple. - This will not change default behavior, but allows users to override.
1 parent 6c8a784 commit ed0c75a

File tree

1 file changed

+8
-16
lines changed
  • ragas/src/ragas/testset/synthesizers/multi_hop

1 file changed

+8
-16
lines changed

ragas/src/ragas/testset/synthesizers/multi_hop/specific.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,28 +27,19 @@
2727

2828
@dataclass
2929
class MultiHopSpecificQuerySynthesizer(MultiHopQuerySynthesizer):
30-
"""
31-
Synthesizes overlap based queries by choosing specific chunks and generating a
32-
keyphrase from them and then generating queries based on that.
33-
34-
Attributes
35-
----------
36-
generate_query_prompt : PydanticPrompt
37-
The prompt used for generating the query.
38-
"""
30+
"""Synthesize multi-hop queries based on a chunk cluster defined by entity overlap."""
3931

4032
name: str = "multi_hop_specific_query_synthesizer"
41-
relation_type: str = "entities_overlap"
4233
property_name: str = "entities"
34+
relation_type: str = "entities_overlap"
35+
relation_overlap_property: str = "overlapped_items"
4336
theme_persona_matching_prompt: PydanticPrompt = ThemesPersonasMatchingPrompt()
4437
generate_query_reference_prompt: PydanticPrompt = QueryAnswerGenerationPrompt()
4538

4639
def get_node_clusters(self, knowledge_graph: KnowledgeGraph) -> t.List[t.Tuple]:
47-
40+
"""Identify clusters of nodes based on the specified relationship condition."""
4841
node_clusters = knowledge_graph.find_two_nodes_single_rel(
49-
relationship_condition=lambda rel: (
50-
True if rel.type == self.relation_type else False
51-
)
42+
relationship_condition=lambda rel: rel.type == self.relation_type
5243
)
5344
logger.info("found %d clusters", len(node_clusters))
5445
return node_clusters
@@ -61,7 +52,8 @@ async def _generate_scenarios(
6152
callbacks: Callbacks,
6253
) -> t.List[MultiHopScenario]:
6354
"""
64-
Generates a list of scenarios on type MultiHopSpecificQuerySynthesizer
55+
Generate a list of scenarios of type MultiHopScenario.
56+
6557
Steps to generate scenarios:
6658
1. Filter the knowledge graph to find cluster of nodes or defined relation type. Here entities_overlap
6759
2. Calculate the number of samples that should be created per cluster to get n samples in total
@@ -87,7 +79,7 @@ async def _generate_scenarios(
8779
if len(scenarios) < n:
8880
node_a, node_b = triplet[0], triplet[-1]
8981
overlapped_items = []
90-
overlapped_items = triplet[1].properties["overlapped_items"]
82+
overlapped_items = triplet[1].properties[self.relation_overlap_property]
9183
if overlapped_items:
9284
themes = list(dict(overlapped_items).keys())
9385
prompt_input = ThemesPersonasInput(

0 commit comments

Comments
 (0)