Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,4 @@ We currently release new tagged versions of the `pypi` and `npm` packages on Mon
## License

[Apache 2.0](./LICENSE)

1 change: 1 addition & 0 deletions chromadb/test/ef/test_ef.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def test_get_builtins_holds() -> None:
"InstructorEmbeddingFunction",
"JinaEmbeddingFunction",
"MistralEmbeddingFunction",
"MorphEmbeddingFunction",
"ONNXMiniLM_L6_V2",
"OllamaEmbeddingFunction",
"OpenAIEmbeddingFunction",
Expand Down
126 changes: 126 additions & 0 deletions chromadb/test/ef/test_morph_ef.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import os
import pytest
from chromadb.utils.embedding_functions.morph_embedding_function import (
MorphEmbeddingFunction,
)


def test_morph_embedding_function_with_api_key() -> None:
"""Test Morph embedding function when API key is available."""
if os.environ.get("CHROMA_MORPH_API_KEY") is None:
pytest.skip("CHROMA_MORPH_API_KEY not set")

ef = MorphEmbeddingFunction(
model_name="morph-embedding-v2"
)

# Test with code snippets (Morph's specialty)
code_snippets = [
"def hello_world():\n print('Hello, World!')",
"class Calculator:\n def add(self, a, b):\n return a + b"
]

embeddings = ef(code_snippets)
assert embeddings is not None
assert len(embeddings) == 2
assert all(isinstance(emb, list) for emb in embeddings)
assert all(len(emb) > 0 for emb in embeddings)


def test_morph_embedding_function_with_custom_parameters() -> None:
"""Test Morph embedding function with custom parameters."""
if os.environ.get("CHROMA_MORPH_API_KEY") is None:
pytest.skip("CHROMA_MORPH_API_KEY not set")

ef = MorphEmbeddingFunction(
model_name="morph-embedding-v2",
api_base="https://api.morphllm.com/v1",
encoding_format="float",
api_key_env_var="CHROMA_MORPH_API_KEY"
)

# Test with a simple function
code_snippet = ["function add(a, b) { return a + b; }"]

embeddings = ef(code_snippet)
assert embeddings is not None
assert len(embeddings) == 1
assert isinstance(embeddings[0], list)
assert len(embeddings[0]) > 0


def test_morph_embedding_function_config_roundtrip() -> None:
"""Test that Morph embedding function configuration can be saved and restored."""
ef = MorphEmbeddingFunction(
model_name="morph-embedding-v2",
api_base="https://api.morphllm.com/v1",
encoding_format="float",
api_key_env_var="CHROMA_MORPH_API_KEY"
)

# Get configuration
config = ef.get_config()

# Verify configuration contains expected keys
assert "model_name" in config
assert "api_base" in config
assert "encoding_format" in config
assert "api_key_env_var" in config

# Verify values
assert config["model_name"] == "morph-embedding-v2"
assert config["api_base"] == "https://api.morphllm.com/v1"
assert config["encoding_format"] == "float"
assert config["api_key_env_var"] == "CHROMA_MORPH_API_KEY"

# Test building from config
new_ef = MorphEmbeddingFunction.build_from_config(config)
new_config = new_ef.get_config()

# Configurations should match
assert config == new_config


def test_morph_embedding_function_name() -> None:
"""Test that Morph embedding function returns correct name."""
assert MorphEmbeddingFunction.name() == "morph"


def test_morph_embedding_function_spaces() -> None:
"""Test that Morph embedding function supports expected spaces."""
ef = MorphEmbeddingFunction(
model_name="morph-embedding-v2",
api_key_env_var="CHROMA_MORPH_API_KEY"
)

# Test default space
assert ef.default_space() == "cosine"

# Test supported spaces
supported_spaces = ef.supported_spaces()
assert "cosine" in supported_spaces
assert "l2" in supported_spaces
assert "ip" in supported_spaces


def test_morph_embedding_function_validate_config() -> None:
"""Test that Morph embedding function validates configuration correctly."""
from chromadb.utils.embedding_functions.schemas import validate_config_schema

# Valid configuration
valid_config = {
"model_name": "morph-embedding-v2",
"api_key_env_var": "CHROMA_MORPH_API_KEY"
}

# This should not raise an exception
MorphEmbeddingFunction.validate_config(valid_config)

# Invalid configuration (missing required fields)
invalid_config = {
"model_name": "morph-embedding-v2"
# Missing api_key_env_var
}

with pytest.raises(Exception):
MorphEmbeddingFunction.validate_config(invalid_config)
6 changes: 6 additions & 0 deletions chromadb/utils/embedding_functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@
from chromadb.utils.embedding_functions.mistral_embedding_function import (
MistralEmbeddingFunction,
)
from chromadb.utils.embedding_functions.morph_embedding_function import (
MorphEmbeddingFunction,
)

try:
from chromadb.is_thin_client import is_thin_client
Expand All @@ -84,6 +87,7 @@
"InstructorEmbeddingFunction",
"JinaEmbeddingFunction",
"MistralEmbeddingFunction",
"MorphEmbeddingFunction",
"VoyageAIEmbeddingFunction",
"ONNXMiniLM_L6_V2",
"OpenCLIPEmbeddingFunction",
Expand Down Expand Up @@ -145,6 +149,7 @@ def validate_config(config: Dict[str, Any]) -> None:
"instructor": InstructorEmbeddingFunction,
"jina": JinaEmbeddingFunction,
"mistral": MistralEmbeddingFunction,
"morph": MorphEmbeddingFunction,
"voyageai": VoyageAIEmbeddingFunction,
"onnx_mini_lm_l6_v2": ONNXMiniLM_L6_V2,
"open_clip": OpenCLIPEmbeddingFunction,
Expand Down Expand Up @@ -233,6 +238,7 @@ def config_to_embedding_function(config: Dict[str, Any]) -> EmbeddingFunction:
"InstructorEmbeddingFunction",
"JinaEmbeddingFunction",
"MistralEmbeddingFunction",
"MorphEmbeddingFunction",
"VoyageAIEmbeddingFunction",
"ONNXMiniLM_L6_V2",
"OpenCLIPEmbeddingFunction",
Expand Down
147 changes: 147 additions & 0 deletions chromadb/utils/embedding_functions/morph_embedding_function.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
from chromadb.api.types import Embeddings, Documents, EmbeddingFunction, Space
from typing import List, Dict, Any, Optional
import os
import numpy as np
from chromadb.utils.embedding_functions.schemas import validate_config_schema
import warnings


class MorphEmbeddingFunction(EmbeddingFunction[Documents]):
def __init__(
self,
api_key: Optional[str] = None,
model_name: str = "morph-embedding-v2",
api_base: str = "https://api.morphllm.com/v1",
encoding_format: str = "float",
api_key_env_var: str = "MORPH_API_KEY",
):
"""
Initialize the MorphEmbeddingFunction.

Args:
api_key (str, optional): The API key for the Morph API. If not provided,
it will be read from the environment variable specified by api_key_env_var.
model_name (str, optional): The name of the model to use for embeddings.
Defaults to "morph-embedding-v2".
api_base (str, optional): The base URL for the Morph API.
Defaults to "https://api.morphllm.com/v1".
encoding_format (str, optional): The format for embeddings (float or base64).
Defaults to "float".
api_key_env_var (str, optional): Environment variable name that contains your API key.
Defaults to "MORPH_API_KEY".
"""
try:
import openai
except ImportError:
raise ValueError(
"The openai python package is not installed. Please install it with `pip install openai`. "
"Note: Morph uses the OpenAI client library for API communication."
)

if api_key is not None:
warnings.warn(
"Direct api_key configuration will not be persisted. "
"Please use environment variables via api_key_env_var for persistent storage.",
DeprecationWarning,
)

self.api_key_env_var = api_key_env_var
self.api_key = api_key or os.getenv(api_key_env_var)
if not self.api_key:
raise ValueError(f"The {api_key_env_var} environment variable is not set.")

self.model_name = model_name
self.api_base = api_base
self.encoding_format = encoding_format

# Initialize the OpenAI client with Morph's base URL
self.client = openai.OpenAI(
api_key=self.api_key,
base_url=self.api_base,
)

def __call__(self, input: Documents) -> Embeddings:
"""
Generate embeddings for the given documents.

Args:
input: Documents to generate embeddings for.

Returns:
Embeddings for the documents.
"""
# Handle empty input
if not input:
return []

# Prepare embedding parameters
embedding_params: Dict[str, Any] = {
"model": self.model_name,
"input": input,
"encoding_format": self.encoding_format,
}

# Get embeddings from Morph API
response = self.client.embeddings.create(**embedding_params)

# Extract embeddings from response
return [np.array(data.embedding, dtype=np.float32) for data in response.data]

@staticmethod
def name() -> str:
return "morph"

def default_space(self) -> Space:
# Morph embeddings work best with cosine similarity
return "cosine"

def supported_spaces(self) -> List[Space]:
return ["cosine", "l2", "ip"]

@staticmethod
def build_from_config(config: Dict[str, Any]) -> "EmbeddingFunction[Documents]":
# Extract parameters from config
api_key_env_var = config.get("api_key_env_var")
model_name = config.get("model_name")
api_base = config.get("api_base")
encoding_format = config.get("encoding_format")

if api_key_env_var is None or model_name is None:
assert False, "This code should not be reached"

# Create and return the embedding function
return MorphEmbeddingFunction(
api_key_env_var=api_key_env_var,
model_name=model_name,
api_base=api_base if api_base is not None else "https://api.morphllm.com/v1",
encoding_format=encoding_format if encoding_format is not None else "float",
)

def get_config(self) -> Dict[str, Any]:
return {
"api_key_env_var": self.api_key_env_var,
"model_name": self.model_name,
"api_base": self.api_base,
"encoding_format": self.encoding_format,
}

def validate_config_update(
self, old_config: Dict[str, Any], new_config: Dict[str, Any]
) -> None:
if "model_name" in new_config:
raise ValueError(
"The model name cannot be changed after the embedding function has been initialized."
)

@staticmethod
def validate_config(config: Dict[str, Any]) -> None:
"""
Validate the configuration using the JSON schema.

Args:
config: Configuration to validate

Raises:
ValidationError: If the configuration does not match the schema
"""
validate_config_schema(config, "morph")
5 changes: 3 additions & 2 deletions clients/new-js/packages/ai-embeddings/all/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,12 @@
"@chroma-core/google-gemini": "workspace:^",
"@chroma-core/huggingface-server": "workspace:^",
"@chroma-core/jina": "workspace:^",
"@chroma-core/mistral": "workspace:^",
"@chroma-core/morph": "workspace:^",
"@chroma-core/ollama": "workspace:^",
"@chroma-core/openai": "workspace:^",
"@chroma-core/together-ai": "workspace:^",
"@chroma-core/voyageai": "workspace:^",
"@chroma-core/mistral": "workspace:^"
"@chroma-core/voyageai": "workspace:^"
},
"engines": {
"node": ">=20"
Expand Down
1 change: 1 addition & 0 deletions clients/new-js/packages/ai-embeddings/all/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export * from "@chroma-core/google-gemini";
export * from "@chroma-core/huggingface-server";
export * from "@chroma-core/jina";
export * from "@chroma-core/mistral";
export * from "@chroma-core/morph";
export * from "@chroma-core/ollama";
export * from "@chroma-core/openai";
export * from "@chroma-core/together-ai";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import voyageaiSchema from "../../../../../../schemas/embedding_functions/voyage
import cloudflareWorkersAiSchema from "../../../../../../schemas/embedding_functions/cloudflare_workers_ai.json";
import togetherAiSchema from "../../../../../../schemas/embedding_functions/together_ai.json";
import mistralSchema from "../../../../../../schemas/embedding_functions/mistral.json";
import morphSchema from "../../../../../../schemas/embedding_functions/morph.json";
import Ajv from "ajv";

// Define a common interface for all schemas
Expand Down Expand Up @@ -68,6 +69,7 @@ const schemaMap = {
"cloudflare-worker-ai": cloudflareWorkersAiSchema as Schema,
"together-ai": togetherAiSchema as Schema,
mistral: mistralSchema as Schema,
morph: morphSchema as Schema,
};

/**
Expand Down
Loading
Loading