From d209f1bc7a3a407c7bdf82c051f36d41adf1ae19 Mon Sep 17 00:00:00 2001 From: dishuostec Date: Sun, 12 Oct 2025 13:47:42 +0800 Subject: [PATCH 1/2] support use ollama as embedding provider --- src/server/api/memobase_server/env.py | 2 +- .../llms/embeddings/__init__.py | 3 +- .../llms/embeddings/ollama_embedding.py | 34 +++++++++++++++++++ .../memobase_server/llms/embeddings/utils.py | 11 ++++++ 4 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 src/server/api/memobase_server/llms/embeddings/ollama_embedding.py diff --git a/src/server/api/memobase_server/env.py b/src/server/api/memobase_server/env.py index 2e57e4c..ccdf9ec 100644 --- a/src/server/api/memobase_server/env.py +++ b/src/server/api/memobase_server/env.py @@ -102,7 +102,7 @@ class Config: summary_llm_model: str = None enable_event_embedding: bool = True - embedding_provider: Literal["openai", "jina"] = "openai" + embedding_provider: Literal["openai", "jina", "ollama"] = "openai" embedding_api_key: str = None embedding_base_url: str = None embedding_dim: int = 1536 diff --git a/src/server/api/memobase_server/llms/embeddings/__init__.py b/src/server/api/memobase_server/llms/embeddings/__init__.py index 5b14a6e..7504544 100644 --- a/src/server/api/memobase_server/llms/embeddings/__init__.py +++ b/src/server/api/memobase_server/llms/embeddings/__init__.py @@ -9,10 +9,11 @@ from .jina_embedding import jina_embedding from .openai_embedding import openai_embedding from .lmstudio_embedding import lmstudio_embedding +from .ollama_embedding import ollama_embedding from ...telemetry import telemetry_manager, HistogramMetricName, CounterMetricName from ...utils import get_encoded_tokens -FACTORIES = {"openai": openai_embedding, "jina": jina_embedding, "lmstudio": lmstudio_embedding} +FACTORIES = {"openai": openai_embedding, "jina": jina_embedding, "lmstudio": lmstudio_embedding, "ollama": ollama_embedding} assert ( CONFIG.embedding_provider in FACTORIES ), f"Unsupported embedding provider: {CONFIG.embedding_provider}" diff --git a/src/server/api/memobase_server/llms/embeddings/ollama_embedding.py b/src/server/api/memobase_server/llms/embeddings/ollama_embedding.py new file mode 100644 index 0000000..300a519 --- /dev/null +++ b/src/server/api/memobase_server/llms/embeddings/ollama_embedding.py @@ -0,0 +1,34 @@ +import numpy as np +from typing import Literal +from ...errors import ExternalAPIError +from ...env import CONFIG, LOG +from .utils import get_ollama_async_client_instance + +OLLAMA_TASK = { + "query": "retrieval.query", + "document": "retrieval.passage", +} + + +async def ollama_embedding( + model: str, texts: list[str], phase: Literal["query", "document"] = "document" +) -> np.ndarray: + openai_async_client = get_ollama_async_client_instance() + response = await openai_async_client.post( + "/api/embed", + json={ + "model": model, + "input": texts, + # "task": OLLAMA_TASK[phase], + "truncate": True, + "dimensions": CONFIG.embedding_dim, + }, + timeout=20, + ) + if response.status_code != 200: + raise ExternalAPIError(f"Failed to embed texts: {response.text}") + data = response.json() + LOG.info( + f"Ollama embedding, {model}, {data['load_duration']}/{data['total_duration']}" + ) + return np.array(data["embeddings"]) diff --git a/src/server/api/memobase_server/llms/embeddings/utils.py b/src/server/api/memobase_server/llms/embeddings/utils.py index 1678ec2..378d34d 100644 --- a/src/server/api/memobase_server/llms/embeddings/utils.py +++ b/src/server/api/memobase_server/llms/embeddings/utils.py @@ -5,6 +5,7 @@ _global_openai_async_client = None _global_jina_async_client = None _global_lmstudio_async_client = None +_global_ollama_async_client = None def get_openai_async_client_instance() -> AsyncOpenAI: @@ -34,3 +35,13 @@ def get_lmstudio_async_client_instance() -> AsyncClient: headers={"Authorization": f"Bearer {CONFIG.embedding_api_key}"}, ) return _global_lmstudio_async_client + +def get_ollama_async_client_instance() -> AsyncClient: + global _global_ollama_async_client + if _global_ollama_async_client is None: + _global_ollama_async_client = AsyncClient( + base_url=CONFIG.embedding_base_url, + headers={"Authorization": f"Bearer {CONFIG.embedding_api_key}"}, + ) + return _global_ollama_async_client + From 3e3e48facbb5aec84f717ddb3eff6e40ffa66366 Mon Sep 17 00:00:00 2001 From: dishuostec Date: Sun, 12 Oct 2025 21:48:23 +0800 Subject: [PATCH 2/2] add ollama embedding example config and update readme --- .../api/example_config/ollama_embedding/config.yaml | 11 +++++++++++ src/server/readme.md | 3 ++- 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 src/server/api/example_config/ollama_embedding/config.yaml diff --git a/src/server/api/example_config/ollama_embedding/config.yaml b/src/server/api/example_config/ollama_embedding/config.yaml new file mode 100644 index 0000000..5691413 --- /dev/null +++ b/src/server/api/example_config/ollama_embedding/config.yaml @@ -0,0 +1,11 @@ +llm_api_key: XXX +llm_base_url: https://api.openai.com/v1/ +best_llm_model: gpt-4o + +embedding_provider: ollama +embedding_api_key: ollama +embedding_base_url: "http://127.0.0.1:11434/" # WITHOUT "v1" at the end +embedding_model: "qwen3-embedding:4b-q4_K_M" +embedding_dim: 2560 + +language: en diff --git a/src/server/readme.md b/src/server/readme.md index e34ca40..cd8b461 100644 --- a/src/server/readme.md +++ b/src/server/readme.md @@ -29,7 +29,7 @@ Memobase uses a single `config.yaml` to initialize the server. It contains the By default, Memobase enables user profile and event memory with filter ability. That means running a Memobase server requires you to have below things: - **LLM API**: You must fill the OpenAI API Key in `llm_api_key` of `config.yaml`.Or you can change `llm_base_url` to any OpenAI-SDK-Compatible service(via [vllm](https://github.com/vllm-project/vllm), [Ollama](../../assets/tutorials/ollama+memobase/readme.md),...). Alternatively, you can set `llm_api_key` and `llm_base_url` using environment variables `MEMOBASE_LLM_API_KEY` and `MEMOBASE_LLM_BASE_URL` -- **Embedding API**: Memobase supports OpenAI-Compatible SDK and [Jina Embedding](https://jina.ai/models/jina-embeddings-v3/). Memobase uses embedding API to retrieve related user events. If you don't have a embedding API, you can set `enable_event_embedding: false` in `config.yaml` +- **Embedding API**: Memobase supports OpenAI-Compatible SDK, [Jina Embedding](https://jina.ai/models/jina-embeddings-v3/) and [Ollama Embedding](https://docs.ollama.com/api#generate-embeddings). Memobase uses embedding API to retrieve related user events. If you don't have a embedding API, you can set `enable_event_embedding: false` in `config.yaml` We have some example `config.yaml` in `examplel_config`: @@ -37,6 +37,7 @@ We have some example `config.yaml` in `examplel_config`: - [`event_tag`](./api/example_config/event_tag) is a feature to tracking temporal attributes of users. [doc](https://docs.memobase.io/features/event/event_tag) - [`only_strict_profile`](./api/example_config/only_strict_profile): disable all other features, only collect the profiles you design. - [`jina_embedding`](./api/example_config/jina_embedding) uses Jina exmbedding for event search. +- [`ollama_embedding`](./api/example_config/ollama_embedding) uses Ollama exmbedding for event search.