From 43ab15f9155b7cdabee616c3a65dc431270023d1 Mon Sep 17 00:00:00 2001 From: crisschan Date: Wed, 18 Jun 2025 17:51:40 +0800 Subject: [PATCH 1/4] fix the bug that using ollama timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit add new file ollama_warpper.py,change the ChatOllama class to ChatOpenAI. --- ragas/src/ragas/llms/__init__.py | 2 + ragas/src/ragas/llms/ollama_wrapper.py | 102 +++++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 ragas/src/ragas/llms/ollama_wrapper.py diff --git a/ragas/src/ragas/llms/__init__.py b/ragas/src/ragas/llms/__init__.py index 440013051..0d369ae52 100644 --- a/ragas/src/ragas/llms/__init__.py +++ b/ragas/src/ragas/llms/__init__.py @@ -5,11 +5,13 @@ llm_factory, ) from ragas.llms.haystack_wrapper import HaystackLLMWrapper +from ragas.llms.ollama_wrapper import OllamaLLMWrapper __all__ = [ "BaseRagasLLM", "HaystackLLMWrapper", "LangchainLLMWrapper", "LlamaIndexLLMWrapper", + "OllamaLLMWrapper", "llm_factory", ] diff --git a/ragas/src/ragas/llms/ollama_wrapper.py b/ragas/src/ragas/llms/ollama_wrapper.py new file mode 100644 index 000000000..a781b3257 --- /dev/null +++ b/ragas/src/ragas/llms/ollama_wrapper.py @@ -0,0 +1,102 @@ +import typing as t +from langchain_core.callbacks import Callbacks +from langchain_core.outputs import Generation, LLMResult +from langchain_core.prompt_values import PromptValue +from langchain_ollama import ChatOllama +from langchain_openai import ChatOpenAI +from ragas.cache import CacheInterface +from ragas.llms import BaseRagasLLM +from ragas.run_config import RunConfig + + +class OllamaLLMWrapper(BaseRagasLLM): + """ + A wrapper class for using Ollama LLM within the Ragas framework. + + This class integrates Ollama's LLM into Ragas, enabling both synchronous and + asynchronous text generation. + + Parameters + ---------- + ollama_llm : ChatOllama + An instance of Ollama chat model. + run_config : RunConfig, optional + Configuration object to manage LLM execution settings, by default None. + cache : CacheInterface, optional + A cache instance for storing results, by default None. + """ + + def __init__( + self, + ollama_llm: ChatOllama, + run_config: t.Optional[RunConfig] = None, + cache: t.Optional[CacheInterface] = None, + ): + super().__init__(cache=cache) + + self.llm = ChatOpenAI( + api_key="ollama", + model=ollama_llm.model, + base_url=f"{ollama_llm.base_url}/v1" + ) + + if run_config is None: + run_config = RunConfig() + self.set_run_config(run_config) + + def is_finished(self, response: LLMResult) -> bool: + """Check if the generation is finished.""" + return True + + def generate_text( + self, + prompt: str, + stop: t.Optional[t.List[str]] = None, + run_manager: t.Optional[Callbacks] = None, + **kwargs: t.Any, + ) -> str: + """Generate text from the model.""" + response = self.llm.invoke(prompt) + return response.content + + async def agenerate_text( + self, + prompt: str, + stop: t.Optional[t.List[str]] = None, + run_manager: t.Optional[Callbacks] = None, + **kwargs: t.Any, + ) -> str: + """Generate text from the model asynchronously.""" + response = await self.llm.ainvoke(prompt) + return response.content + + def _generate( + self, + prompts: t.List[PromptValue], + stop: t.Optional[t.List[str]] = None, + run_manager: t.Optional[Callbacks] = None, + **kwargs: t.Any, + ) -> LLMResult: + """Generate text from the model.""" + generations = [] + for prompt in prompts: + response = self.llm.invoke(prompt.to_messages()) + generations.append([Generation(text=response.content)]) + return LLMResult(generations=generations) + + async def _agenerate( + self, + prompts: t.List[PromptValue], + stop: t.Optional[t.List[str]] = None, + run_manager: t.Optional[Callbacks] = None, + **kwargs: t.Any, + ) -> LLMResult: + """Generate text from the model asynchronously.""" + generations = [] + for prompt in prompts: + response = await self.llm.ainvoke(prompt.to_messages()) + generations.append([Generation(text=response.content)]) + return LLMResult(generations=generations) + + def __repr__(self) -> str: + return f"{self.__class__.__name__}(llm={self.llm.model}(...))" \ No newline at end of file From 4e809f5549105528b79446af8fa52e6a5b48b076 Mon Sep 17 00:00:00 2001 From: crisschan Date: Thu, 26 Jun 2025 10:38:09 +0800 Subject: [PATCH 2/4] add langchain_ollama to /ragas/pyproject.toml add langchain_ollama to /ragas/pyproject.toml --- ragas/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/ragas/pyproject.toml b/ragas/pyproject.toml index 573e47705..ec55df345 100644 --- a/ragas/pyproject.toml +++ b/ragas/pyproject.toml @@ -9,6 +9,7 @@ dependencies = [ "langchain-core", "langchain-community", "langchain_openai", + "langchain_ollama", "nest-asyncio", "appdirs", "pydantic>=2", From 05039ac80b8aca4594f0a899a3ec903ff372726b Mon Sep 17 00:00:00 2001 From: crisschan Date: Fri, 18 Jul 2025 11:29:03 +0800 Subject: [PATCH 3/4] modi the ollama_wrapper for Nvidia AnswerAccuracy's 1 change the llm from ChatOpenAI api to ChatOllama 2 change the generate_text and agenerate_text return type LLMResult --- ragas/src/ragas/llms/ollama_wrapper.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/ragas/src/ragas/llms/ollama_wrapper.py b/ragas/src/ragas/llms/ollama_wrapper.py index a781b3257..bf2f9b57e 100644 --- a/ragas/src/ragas/llms/ollama_wrapper.py +++ b/ragas/src/ragas/llms/ollama_wrapper.py @@ -33,13 +33,7 @@ def __init__( cache: t.Optional[CacheInterface] = None, ): super().__init__(cache=cache) - - self.llm = ChatOpenAI( - api_key="ollama", - model=ollama_llm.model, - base_url=f"{ollama_llm.base_url}/v1" - ) - + self.llm = ollama_llm # 直接用 ChatOllama 实例 if run_config is None: run_config = RunConfig() self.set_run_config(run_config) @@ -54,10 +48,11 @@ def generate_text( stop: t.Optional[t.List[str]] = None, run_manager: t.Optional[Callbacks] = None, **kwargs: t.Any, - ) -> str: + ) -> LLMResult: """Generate text from the model.""" response = self.llm.invoke(prompt) - return response.content + print("LLM raw output:", response.content) + return LLMResult(generations=[[Generation(text=response.content)]]) async def agenerate_text( self, @@ -65,10 +60,11 @@ async def agenerate_text( stop: t.Optional[t.List[str]] = None, run_manager: t.Optional[Callbacks] = None, **kwargs: t.Any, - ) -> str: + ) -> LLMResult: """Generate text from the model asynchronously.""" response = await self.llm.ainvoke(prompt) - return response.content + print("LLM raw output:", response.content) + return LLMResult(generations=[[Generation(text=response.content)]]) def _generate( self, From 95543d82ba39bc2deae578b088980cfc9ffdaa7c Mon Sep 17 00:00:00 2001 From: crisschan Date: Mon, 21 Jul 2025 17:09:12 +0800 Subject: [PATCH 4/4] add the ollama embedding model class add the ollama embedding model class --- ragas/src/ragas/embeddings/__init__.py | 2 + ragas/src/ragas/embeddings/ollama_wrapper.py | 56 ++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 ragas/src/ragas/embeddings/ollama_wrapper.py diff --git a/ragas/src/ragas/embeddings/__init__.py b/ragas/src/ragas/embeddings/__init__.py index acdda8f02..b4f839e86 100644 --- a/ragas/src/ragas/embeddings/__init__.py +++ b/ragas/src/ragas/embeddings/__init__.py @@ -6,6 +6,7 @@ embedding_factory, ) from ragas.embeddings.haystack_wrapper import HaystackEmbeddingsWrapper +from ragas.embeddings.ollama_wrapper import OllamaEmbeddingsWrapper __all__ = [ "BaseRagasEmbeddings", @@ -14,4 +15,5 @@ "LangchainEmbeddingsWrapper", "LlamaIndexEmbeddingsWrapper", "embedding_factory", + "OllamaEmbeddingsWrapper", ] diff --git a/ragas/src/ragas/embeddings/ollama_wrapper.py b/ragas/src/ragas/embeddings/ollama_wrapper.py new file mode 100644 index 000000000..8a1ea4e27 --- /dev/null +++ b/ragas/src/ragas/embeddings/ollama_wrapper.py @@ -0,0 +1,56 @@ +import typing as t +from langchain_ollama import OllamaEmbeddings + +from ragas.cache import CacheInterface +from ragas.embeddings.base import BaseRagasEmbeddings +from ragas.run_config import RunConfig + + +class OllamaEmbeddingsWrapper(BaseRagasEmbeddings): + """ + A wrapper class for using Ollama Embeddings within the Ragas framework. + + This class integrates Ollama's Embeddings into Ragas, enabling both synchronous and + asynchronous embedding generation. + + Parameters + ---------- + ollama_embeddings : OllamaEmbeddings + An instance of Ollama embeddings model. + run_config : RunConfig, optional + Configuration object to manage embedding execution settings, by default None. + cache : CacheInterface, optional + A cache instance for storing results, by default None. + """ + + def __init__( + self, + ollama_embeddings: OllamaEmbeddings, + run_config: t.Optional[RunConfig] = None, + cache: t.Optional[CacheInterface] = None, + ): + super().__init__(cache=cache) + self.embeddings = ollama_embeddings + + if run_config is None: + run_config = RunConfig() + self.set_run_config(run_config) + + def embed_query(self, text: str) -> t.List[float]: + """Generate embedding for a single text.""" + return self.embeddings.embed_query(text) + + def embed_documents(self, texts: t.List[str]) -> t.List[t.List[float]]: + """Generate embeddings for multiple texts.""" + return self.embeddings.embed_documents(texts) + + async def aembed_query(self, text: str) -> t.List[float]: + """Generate embedding for a single text asynchronously.""" + return await self.embeddings.aembed_query(text) + + async def aembed_documents(self, texts: t.List[str]) -> t.List[t.List[float]]: + """Generate embeddings for multiple texts asynchronously.""" + return await self.embeddings.aembed_documents(texts) + + def __repr__(self) -> str: + return f"{self.__class__.__name__}(embeddings={self.embeddings.model}(...))" \ No newline at end of file