Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/142562.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 142562
summary: "[Inference API] Add VoyageAI inference service integration"
area: Inference
type: enhancement
issues: []
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
9318000
2 changes: 1 addition & 1 deletion server/src/main/resources/transport/upper_bounds/9.4.csv
Original file line number Diff line number Diff line change
@@ -1 +1 @@
esql_async_source_bytes_buffered,9317000
voyage_ai_multimodal_embeddings_added,9318000
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@
import org.elasticsearch.xpack.inference.services.sagemaker.schema.SageMakerSchemas;
import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings;
import org.elasticsearch.xpack.inference.services.voyageai.VoyageAIServiceSettings;
import org.elasticsearch.xpack.inference.services.voyageai.embeddings.VoyageAIEmbeddingsServiceSettings;
import org.elasticsearch.xpack.inference.services.voyageai.embeddings.VoyageAIEmbeddingServiceSettings;
import org.elasticsearch.xpack.inference.services.voyageai.embeddings.VoyageAITextEmbeddingServiceSettings;
import org.elasticsearch.xpack.inference.services.voyageai.embeddings.VoyageAIEmbeddingsTaskSettings;
import org.elasticsearch.xpack.inference.services.voyageai.rerank.VoyageAIRerankServiceSettings;
import org.elasticsearch.xpack.inference.services.voyageai.rerank.VoyageAIRerankTaskSettings;
Expand Down Expand Up @@ -918,8 +919,15 @@ private static void addVoyageAINamedWriteables(List<NamedWriteableRegistry.Entry
namedWriteables.add(
new NamedWriteableRegistry.Entry(
ServiceSettings.class,
VoyageAIEmbeddingsServiceSettings.NAME,
VoyageAIEmbeddingsServiceSettings::new
VoyageAITextEmbeddingServiceSettings.NAME,
VoyageAITextEmbeddingServiceSettings::new
)
);
namedWriteables.add(
new NamedWriteableRegistry.Entry(
ServiceSettings.class,
VoyageAIEmbeddingServiceSettings.NAME,
VoyageAIEmbeddingServiceSettings::new
)
);
namedWriteables.add(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

public class SimpleEmbeddingServiceIntegrationValidator implements ServiceIntegrationValidator {
// The below data URI represents the base64 encoding of 28x28 pixel black square .jpg image
private static final String BASE64_IMAGE_DATA = "data:image/jpg;base64,/9j/4QDKRXhpZgAATU0AKgAAAAgABgESAAMAAAABAAEAAAEaAAUAAAABAAAAV"
private static final String BASE64_IMAGE_DATA = "data:image/jpeg;base64,/9j/4QDKRXhpZgAATU0AKgAAAAgABgESAAMAAAABAAEAAAEaAAUAAAABAAAAV"
+ "gEbAAUAAAABAAAAXgEoAAMAAAABAAIAAAITAAMAAAABAAEAAIdpAAQAAAABAAAAZgAAAAAAAABIAAAAAQAAAEgAAAABAAeQAAAHAAAABDAyMjGRAQAHAAAABAECAw"
+ "CgAAAHAAAABDAxMDCgAQADAAAAAQABAACgAgAEAAAAAQAAABygAwAEAAAAAQAAABykBgADAAAAAQAAAAAAAAAAAAD/2wCEABwcHBwcHDAcHDBEMDAwRFxEREREXHR"
+ "cXFxcXHSMdHR0dHR0jIyMjIyMjIyoqKioqKjExMTExNzc3Nzc3Nzc3NwBIiQkODQ4YDQ0YOacgJzm5ubm5ubm5ubm5ubm5ubm5ubm5ubm5ubm5ubm5ubm5ubm5ubm"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,28 @@ public abstract class VoyageAIModel extends RateLimitGroupingModel {

static {
Map<String, String> tempMap = new HashMap<>();
// V4 models
tempMap.put("voyage-4-large", "embed_large");
tempMap.put("voyage-4", "embed_medium");
tempMap.put("voyage-4-lite", "embed_small");
// V3.5 models
tempMap.put("voyage-3.5", "embed_medium");
tempMap.put("voyage-3.5-lite", "embed_small");
tempMap.put("voyage-multimodal-3", "embed_multimodal");
// V3 models
tempMap.put("voyage-3-large", "embed_large");
tempMap.put("voyage-code-3", "embed_large");
tempMap.put("voyage-3", "embed_medium");
tempMap.put("voyage-3-lite", "embed_small");
// Multimodal models
tempMap.put("voyage-multimodal-3", "embed_multimodal");
tempMap.put("voyage-multimodal-3.5", "embed_multimodal");
// V2 models
tempMap.put("voyage-finance-2", "embed_large");
tempMap.put("voyage-law-2", "embed_large");
tempMap.put("voyage-code-2", "embed_large");
// Reranker models
tempMap.put("rerank-2.5", "rerank_large");
tempMap.put("rerank-2.5-lite", "rerank_small");
tempMap.put("rerank-2", "rerank_large");
tempMap.put("rerank-2-lite", "rerank_small");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,19 @@

package org.elasticsearch.xpack.inference.services.voyageai;

import org.elasticsearch.ElasticsearchStatusException;
import org.elasticsearch.TransportVersion;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.ValidationException;
import org.elasticsearch.common.util.LazyInitializable;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Strings;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.inference.ChunkInferenceInput;
import org.elasticsearch.inference.ChunkedInference;
import org.elasticsearch.inference.ChunkingSettings;
import org.elasticsearch.inference.EmbeddingRequest;
import org.elasticsearch.inference.InferenceServiceConfiguration;
import org.elasticsearch.inference.InferenceServiceExtension;
import org.elasticsearch.inference.InferenceServiceResults;
Expand All @@ -29,8 +32,10 @@
import org.elasticsearch.inference.SimilarityMeasure;
import org.elasticsearch.inference.TaskType;
import org.elasticsearch.inference.configuration.SettingsConfigurationFieldType;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.xpack.core.inference.chunking.ChunkingSettingsBuilder;
import org.elasticsearch.xpack.core.inference.chunking.EmbeddingRequestChunker;
import org.elasticsearch.xpack.inference.external.action.ExecutableAction;
import org.elasticsearch.xpack.inference.external.http.sender.EmbeddingsInput;
import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender;
import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs;
Expand All @@ -43,56 +48,67 @@
import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings;
import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
import org.elasticsearch.xpack.inference.services.voyageai.action.VoyageAIActionCreator;
import org.elasticsearch.xpack.inference.services.voyageai.embeddings.BaseVoyageAIEmbeddingsServiceSettings;
import org.elasticsearch.xpack.inference.services.voyageai.embeddings.VoyageAIEmbeddingType;
import org.elasticsearch.xpack.inference.services.voyageai.embeddings.VoyageAIEmbeddingsModel;
import org.elasticsearch.xpack.inference.services.voyageai.embeddings.VoyageAIEmbeddingsModelCreator;
import org.elasticsearch.xpack.inference.services.voyageai.embeddings.VoyageAIEmbeddingsServiceSettings;
import org.elasticsearch.xpack.inference.services.voyageai.rerank.VoyageAIRerankModelCreator;

import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.elasticsearch.inference.InferenceStringGroup.containsNonTextEntry;
import static org.elasticsearch.xpack.inference.services.ServiceFields.DIMENSIONS;
import static org.elasticsearch.xpack.inference.services.ServiceFields.EMBEDDING_TYPE;
import static org.elasticsearch.xpack.inference.services.ServiceFields.MODEL_ID;
import static org.elasticsearch.xpack.inference.services.ServiceFields.SIMILARITY;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.createInvalidModelException;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrDefaultEmpty;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwIfNotEmptyMap;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwUnsupportedUnifiedCompletionOperation;
import static org.elasticsearch.xpack.inference.services.voyageai.embeddings.BaseVoyageAIEmbeddingsServiceSettings.updateEmbeddingDetails;

public class VoyageAIService extends SenderService<VoyageAIModel> implements RerankingInferenceService {
public static final String NAME = "voyageai";

private static final String SERVICE_NAME = "Voyage AI";
private static final EnumSet<TaskType> SUPPORTED_TASK_TYPES = EnumSet.of(TaskType.TEXT_EMBEDDING, TaskType.RERANK);
private static final EnumSet<TaskType> SUPPORTED_TASK_TYPES = EnumSet.of(
TaskType.TEXT_EMBEDDING,
TaskType.RERANK,
TaskType.EMBEDDING
);
private static final VoyageAIEmbeddingsModelCreator EMBEDDINGS_MODEL_CREATOR = new VoyageAIEmbeddingsModelCreator();
private static final Map<TaskType, ModelCreator<? extends VoyageAIModel>> MODEL_CREATORS = Map.of(
TaskType.TEXT_EMBEDDING,
new VoyageAIEmbeddingsModelCreator(),
EMBEDDINGS_MODEL_CREATOR,
TaskType.EMBEDDING,
EMBEDDINGS_MODEL_CREATOR,
TaskType.RERANK,
new VoyageAIRerankModelCreator()
);
// Batch sizes are tuned to stay within the VoyageAI per-model token limits per request.
// See https://docs.voyageai.com/docs/embeddings for model-specific token limits.
private static final Integer DEFAULT_BATCH_SIZE = 7;
private static final Map<String, Integer> MODEL_BATCH_SIZES = Map.of(
"voyage-multimodal-3",
7,
"voyage-3-large",
7,
"voyage-code-3",
7,
"voyage-3",
10,
"voyage-3-lite",
30,
"voyage-finance-2",
7,
"voyage-law-2",
7,
"voyage-code-2",
7,
"voyage-2",
72,
"voyage-02",
72
private static final Map<String, Integer> MODEL_BATCH_SIZES = Map.ofEntries(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where are these values coming from? In the documentation for the embeddings API and the multimodal embeddings API, the maximum number of inputs is given as 1000. 7 seems extremely small.

Copy link
Contributor Author

@fzowl fzowl Mar 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@DonalEvans These values are based on the safe and conservative calculation. We have a token limit for the request (batch) and per document, so to be on the safe side with batching, the total number of documents can be calculated as total_number_of_tokens_per_batch/maximum_number_of_tokens_per_document.

The ideal solution would be to build token-aware batching (which is technically possible), but it requires the tokeniser to be downloaded from HuggingFace. If you are fine with this, i'd be more than happy to build it, that'd be a more elegant solution.

Map.entry("voyage-multimodal-3", 7),
Map.entry("voyage-multimodal-3.5", 7),
Map.entry("voyage-3-large", 7),
Map.entry("voyage-code-3", 7),
Map.entry("voyage-3", 10),
Map.entry("voyage-3.5", 10),
Map.entry("voyage-3-lite", 30),
Map.entry("voyage-3.5-lite", 30),
Map.entry("voyage-finance-2", 7),
Map.entry("voyage-law-2", 7),
Map.entry("voyage-code-2", 7),
Map.entry("voyage-2", 72),
Map.entry("voyage-02", 72),
Map.entry("voyage-4-large", 7),
Map.entry("voyage-4", 10),
Map.entry("voyage-4-lite", 30)
);

private static final Map<String, Integer> RERANKERS_INPUT_SIZE = Map.of(
Expand Down Expand Up @@ -144,7 +160,7 @@ public void parseRequestConfig(
Map<String, Object> taskSettingsMap = removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS);

ChunkingSettings chunkingSettings = null;
if (TaskType.TEXT_EMBEDDING.equals(taskType)) {
if (TaskType.TEXT_EMBEDDING.equals(taskType) || TaskType.EMBEDDING.equals(taskType)) {
chunkingSettings = ChunkingSettingsBuilder.fromMap(
removeFromMapOrDefaultEmpty(config, ModelConfigurations.CHUNKING_SETTINGS)
);
Expand Down Expand Up @@ -275,6 +291,27 @@ protected void doChunkedInfer(
}
}

@Override
protected void doEmbeddingInfer(
Model model,
EmbeddingRequest request,
TimeValue timeout,
ActionListener<InferenceServiceResults> listener
) {
if (model instanceof VoyageAIEmbeddingsModel voyageAIModel) {
if (model.getServiceSettings().isMultimodal() == false && containsNonTextEntry(request.inputs())) {
listener.onFailure(new ElasticsearchStatusException("Non-text input provided for text-only model", RestStatus.BAD_REQUEST));
} else {
var actionCreator = new VoyageAIActionCreator(getSender(), getServiceComponents());

ExecutableAction action = voyageAIModel.accept(actionCreator, request.taskSettings());
action.execute(new EmbeddingsInput(request::inputs, request.inputType()), timeout, listener);
}
} else {
listener.onFailure(createInvalidModelException(model));
}
}

private static int getBatchSize(VoyageAIModel model) {
return MODEL_BATCH_SIZES.getOrDefault(model.getServiceSettings().modelId(), DEFAULT_BATCH_SIZE);
}
Expand All @@ -285,20 +322,12 @@ public Model updateModelWithEmbeddingDetails(Model model, int embeddingSize) {
var serviceSettings = embeddingsModel.getServiceSettings();
var similarityFromModel = serviceSettings.similarity();
var similarityToUse = similarityFromModel == null ? defaultSimilarity() : similarityFromModel;
var maxInputTokens = serviceSettings.maxInputTokens();
var dimensionSetByUser = serviceSettings.dimensionsSetByUser();

var updatedServiceSettings = new VoyageAIEmbeddingsServiceSettings(
new VoyageAIServiceSettings(
serviceSettings.getCommonSettings().modelId(),
serviceSettings.getCommonSettings().rateLimitSettings()
),
serviceSettings.getEmbeddingType(),
similarityToUse,
embeddingSize,
maxInputTokens,
dimensionSetByUser
);

var updatedServiceSettings = updateEmbeddingDetails(serviceSettings, embeddingSize, similarityToUse);

if (updatedServiceSettings.equals(serviceSettings)) {
return model;
}

return new VoyageAIEmbeddingsModel(embeddingsModel, updatedServiceSettings);
} else {
Expand Down Expand Up @@ -351,6 +380,53 @@ public static InferenceServiceConfiguration get() {
.build()
);

configurationMap.put(
DIMENSIONS,
new SettingsConfiguration.Builder(EnumSet.of(TaskType.TEXT_EMBEDDING, TaskType.EMBEDDING)).setDescription(
"The number of dimensions the resulting embeddings should have."
)
.setLabel("Dimensions")
.setRequired(false)
.setSensitive(false)
.setUpdatable(false)
.setType(SettingsConfigurationFieldType.INTEGER)
.build()
);

configurationMap.put(
EMBEDDING_TYPE,
new SettingsConfiguration.Builder(EnumSet.of(TaskType.TEXT_EMBEDDING, TaskType.EMBEDDING)).setDescription(
Strings.format(
"The type of embedding to return. One of %s. int8 and byte are equivalent and are encoded as "
+ "bytes with signed int8 precision. bit and binary are equivalent.",
EnumSet.allOf(VoyageAIEmbeddingType.class)
)
)
.setLabel("Embedding type")
.setDefaultValue("float")
.setRequired(false)
.setSensitive(false)
.setUpdatable(false)
.setType(SettingsConfigurationFieldType.STRING)
.build()
);

configurationMap.put(
SIMILARITY,
new SettingsConfiguration.Builder(EnumSet.of(TaskType.TEXT_EMBEDDING, TaskType.EMBEDDING)).setDescription(
Strings.format(
"The similarity measure. One of %s. The default similarity is dot_product.",
EnumSet.allOf(SimilarityMeasure.class)
)
)
.setLabel("Similarity")
.setRequired(false)
.setSensitive(false)
.setUpdatable(false)
.setType(SettingsConfigurationFieldType.STRING)
.build()
);

configurationMap.putAll(DefaultSecretSettings.toSettingsConfiguration(SUPPORTED_TASK_TYPES));
configurationMap.putAll(RateLimitSettings.toSettingsConfiguration(SUPPORTED_TASK_TYPES));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public ExecutableAction create(VoyageAIEmbeddingsModel model, Map<String, Object
overriddenModel,
EMBEDDINGS_HANDLER,
(embeddingsInput) -> new VoyageAIEmbeddingsRequest(
embeddingsInput.getTextInputs(),
embeddingsInput.getInputs(),
embeddingsInput.getInputType(),
overriddenModel
),
Expand Down
Loading
Loading