|
35 | 35 | # BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF
|
36 | 36 | # DATA.
|
37 | 37 | # ##############################################################################
|
| 38 | +import asyncio |
38 | 39 |
|
39 | 40 | from llama_index.core.base.response.schema import Response
|
40 | 41 | from llama_index.core.chat_engine.types import AgentChatResponse
|
41 |
| -from llama_index.core.evaluation import FaithfulnessEvaluator, RelevancyEvaluator |
| 42 | +from llama_index.core.evaluation import ( |
| 43 | + FaithfulnessEvaluator, |
| 44 | + RelevancyEvaluator, |
| 45 | + EvaluationResult, |
| 46 | +) |
| 47 | +from llama_index.core.llms import LLM |
42 | 48 |
|
43 | 49 | from ..services import models
|
44 | 50 |
|
45 | 51 |
|
46 | 52 | def evaluate_response(
|
47 |
| - query: str, chat_response: AgentChatResponse, model_name: str |
| 53 | + query: str, chat_response: AgentChatResponse, model_name: str |
48 | 54 | ) -> tuple[float, float]:
|
49 | 55 | # todo: pass in the correct llm model and use it, rather than requiring querying for it like this.
|
50 | 56 | evaluator_llm = models.LLM.get(model_name)
|
| 57 | + return asyncio.run(_async_evaluate_response(query, chat_response, evaluator_llm)) |
51 | 58 |
|
52 |
| - relevancy_evaluator = RelevancyEvaluator(llm=evaluator_llm) |
53 |
| - relevance = relevancy_evaluator.evaluate_response( |
| 59 | + |
| 60 | +async def _async_evaluate_response(query: str, chat_response: AgentChatResponse, evaluator_llm: LLM) -> tuple[float, float] : |
| 61 | + relevance = await _evaluate_relevancy(chat_response, evaluator_llm, query) |
| 62 | + faithfulness = await _evaluate_faithfulness(chat_response, evaluator_llm, query) |
| 63 | + return relevance.score or 0, faithfulness.score or 0 |
| 64 | + |
| 65 | + |
| 66 | +async def _evaluate_faithfulness(chat_response: AgentChatResponse, evaluator_llm: LLM, query: str) -> EvaluationResult: |
| 67 | + faithfulness_evaluator = FaithfulnessEvaluator(llm=evaluator_llm) |
| 68 | + return await faithfulness_evaluator.aevaluate_response( |
54 | 69 | query=query,
|
55 | 70 | response=Response(
|
56 | 71 | response=chat_response.response,
|
57 | 72 | source_nodes=chat_response.source_nodes,
|
58 | 73 | metadata=chat_response.metadata,
|
59 | 74 | ),
|
60 | 75 | )
|
61 |
| - faithfulness_evaluator = FaithfulnessEvaluator(llm=evaluator_llm) |
62 |
| - faithfulness = faithfulness_evaluator.evaluate_response( |
| 76 | + |
| 77 | + |
| 78 | +async def _evaluate_relevancy(chat_response: AgentChatResponse, evaluator_llm: LLM, query: str) -> EvaluationResult: |
| 79 | + relevancy_evaluator = RelevancyEvaluator(llm=evaluator_llm) |
| 80 | + return await relevancy_evaluator.aevaluate_response( |
63 | 81 | query=query,
|
64 | 82 | response=Response(
|
65 | 83 | response=chat_response.response,
|
66 | 84 | source_nodes=chat_response.source_nodes,
|
67 | 85 | metadata=chat_response.metadata,
|
68 | 86 | ),
|
69 | 87 | )
|
70 |
| - return relevance.score or 0, faithfulness.score or 0 |
|
0 commit comments