Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/server/api/example_config/ollama_embedding/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
llm_api_key: XXX
llm_base_url: https://api.openai.com/v1/
best_llm_model: gpt-4o

embedding_provider: ollama
embedding_api_key: ollama
embedding_base_url: "http://127.0.0.1:11434/" # WITHOUT "v1" at the end
embedding_model: "qwen3-embedding:4b-q4_K_M"
embedding_dim: 2560

language: en
2 changes: 1 addition & 1 deletion src/server/api/memobase_server/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ class Config:
summary_llm_model: str = None

enable_event_embedding: bool = True
embedding_provider: Literal["openai", "jina"] = "openai"
embedding_provider: Literal["openai", "jina", "ollama"] = "openai"
embedding_api_key: str = None
embedding_base_url: str = None
embedding_dim: int = 1536
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
from .jina_embedding import jina_embedding
from .openai_embedding import openai_embedding
from .lmstudio_embedding import lmstudio_embedding
from .ollama_embedding import ollama_embedding
from ...telemetry import telemetry_manager, HistogramMetricName, CounterMetricName
from ...utils import get_encoded_tokens

FACTORIES = {"openai": openai_embedding, "jina": jina_embedding, "lmstudio": lmstudio_embedding}
FACTORIES = {"openai": openai_embedding, "jina": jina_embedding, "lmstudio": lmstudio_embedding, "ollama": ollama_embedding}
assert (
CONFIG.embedding_provider in FACTORIES
), f"Unsupported embedding provider: {CONFIG.embedding_provider}"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import numpy as np
from typing import Literal
from ...errors import ExternalAPIError
from ...env import CONFIG, LOG
from .utils import get_ollama_async_client_instance

OLLAMA_TASK = {
"query": "retrieval.query",
"document": "retrieval.passage",
}


async def ollama_embedding(
model: str, texts: list[str], phase: Literal["query", "document"] = "document"
) -> np.ndarray:
openai_async_client = get_ollama_async_client_instance()
response = await openai_async_client.post(
"/api/embed",
json={
"model": model,
"input": texts,
# "task": OLLAMA_TASK[phase],
"truncate": True,
"dimensions": CONFIG.embedding_dim,
},
timeout=20,
)
if response.status_code != 200:
raise ExternalAPIError(f"Failed to embed texts: {response.text}")
data = response.json()
LOG.info(
f"Ollama embedding, {model}, {data['load_duration']}/{data['total_duration']}"
)
return np.array(data["embeddings"])
11 changes: 11 additions & 0 deletions src/server/api/memobase_server/llms/embeddings/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
_global_openai_async_client = None
_global_jina_async_client = None
_global_lmstudio_async_client = None
_global_ollama_async_client = None


def get_openai_async_client_instance() -> AsyncOpenAI:
Expand Down Expand Up @@ -34,3 +35,13 @@ def get_lmstudio_async_client_instance() -> AsyncClient:
headers={"Authorization": f"Bearer {CONFIG.embedding_api_key}"},
)
return _global_lmstudio_async_client

def get_ollama_async_client_instance() -> AsyncClient:
global _global_ollama_async_client
if _global_ollama_async_client is None:
_global_ollama_async_client = AsyncClient(
base_url=CONFIG.embedding_base_url,
headers={"Authorization": f"Bearer {CONFIG.embedding_api_key}"},
)
return _global_ollama_async_client

3 changes: 2 additions & 1 deletion src/server/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,15 @@ Memobase uses a single `config.yaml` to initialize the server. It contains the
By default, Memobase enables user profile and event memory with filter ability. That means running a Memobase server requires you to have below things:

- **LLM API**: You must fill the OpenAI API Key in `llm_api_key` of `config.yaml`.Or you can change `llm_base_url` to any OpenAI-SDK-Compatible service(via [vllm](https://github.com/vllm-project/vllm), [Ollama](../../assets/tutorials/ollama+memobase/readme.md),...). Alternatively, you can set `llm_api_key` and `llm_base_url` using environment variables `MEMOBASE_LLM_API_KEY` and `MEMOBASE_LLM_BASE_URL`
- **Embedding API**: Memobase supports OpenAI-Compatible SDK and [Jina Embedding](https://jina.ai/models/jina-embeddings-v3/). Memobase uses embedding API to retrieve related user events. If you don't have a embedding API, you can set `enable_event_embedding: false` in `config.yaml`
- **Embedding API**: Memobase supports OpenAI-Compatible SDK, [Jina Embedding](https://jina.ai/models/jina-embeddings-v3/) and [Ollama Embedding](https://docs.ollama.com/api#generate-embeddings). Memobase uses embedding API to retrieve related user events. If you don't have a embedding API, you can set `enable_event_embedding: false` in `config.yaml`

We have some example `config.yaml` in `examplel_config`:

- [`profile_for_assistant`](./api/example_config/profile_for_education), [`profile_for_education`](./api/example_config/profile_for_education), [`profile_for_companion`](./api/example_config/profile_for_companion) are three similar configs in term of structure, but for different user cases.
- [`event_tag`](./api/example_config/event_tag) is a feature to tracking temporal attributes of users. [doc](https://docs.memobase.io/features/event/event_tag)
- [`only_strict_profile`](./api/example_config/only_strict_profile): disable all other features, only collect the profiles you design.
- [`jina_embedding`](./api/example_config/jina_embedding) uses Jina exmbedding for event search.
- [`ollama_embedding`](./api/example_config/ollama_embedding) uses Ollama exmbedding for event search.



Expand Down