Skip to content

RDBC-921 Added Vector Search to Client API #114

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: v7.1
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ public abstract class AbstractGenericIndexCreationTask extends AbstractIndexCrea
protected final Set<String> indexSuggestions;
protected final Map<String, FieldTermVector> termVectorsStrings;
protected final Map<String, SpatialOptions> spatialOptionsStrings;
protected Map<String, VectorFieldOptions> vectorOptionsStrings;

protected String outputReduceToCollection;
protected String patternForOutputReduceToCollectionReferences;
Expand All @@ -33,6 +34,7 @@ public AbstractGenericIndexCreationTask() {
indexSuggestions = new HashSet<>();
termVectorsStrings = new HashMap<>();
spatialOptionsStrings = new HashMap<>();
vectorOptionsStrings = new HashMap<>();
}

/**
Expand All @@ -43,6 +45,26 @@ public boolean isMapReduce() {
return reduce != null;
}

/**
* Register a field for vector search options
* @param field Field name
* @param options Vector field options
*/
protected void vectorField(String field, VectorFieldOptions options) {
vectorOptionsStrings.put(field, options);
}

public Map<String, VectorFieldOptions> getVectorOptionsStrings() {
return vectorOptionsStrings;
}

public void setVectorOptionsStrings(Map<String, VectorFieldOptions> vectorOptionsStrings) {
this.vectorOptionsStrings.clear();
if (vectorOptionsStrings != null) {
this.vectorOptionsStrings.putAll(vectorOptionsStrings);
}
}

// AbstractGenericIndexCreationTask

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ public IndexDefinition createIndexDefinition() {
indexDefinitionBuilder.setSuggestionsOptions(indexSuggestions);
indexDefinitionBuilder.setTermVectorsStrings(termVectorsStrings);
indexDefinitionBuilder.setSpatialIndexesStrings(spatialOptionsStrings);
indexDefinitionBuilder.setVectorOptionsStrings(vectorOptionsStrings);
indexDefinitionBuilder.setOutputReduceToCollection(outputReduceToCollection);
indexDefinitionBuilder.setPatternForOutputReduceToCollectionReferences(patternForOutputReduceToCollectionReferences);
indexDefinitionBuilder.setPatternReferencesCollectionName(patternReferencesCollectionName);
Expand All @@ -50,6 +51,9 @@ public IndexDefinition createIndexDefinition() {

if (searchEngineType != null) {
indexDefinitionBuilder.getConfiguration().put(Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE, SharpEnum.value(searchEngineType));
} else if (vectorOptionsStrings != null && !vectorOptionsStrings.isEmpty()) {
indexDefinitionBuilder.getConfiguration()
.put(Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE, "Corax");
}

return indexDefinitionBuilder.toIndexDefinition(conventions);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ public abstract class AbstractIndexDefinitionBuilder<TIndexDefinition extends In
private Map<String, FieldStorage> storesStrings;
private Map<String, FieldIndexing> indexesStrings;
private Map<String, String> analyzersStrings;
private Map<String, VectorFieldOptions> vectorFieldStrings = new HashMap<>();
private Set<String> suggestionsOptions;
private Map<String, FieldTermVector> termVectorsStrings;
private Map<String, SpatialOptions> spatialIndexesStrings;
Expand Down Expand Up @@ -75,6 +76,12 @@ public TIndexDefinition toIndexDefinition(DocumentConventions conventions, boole
applyValues(indexDefinition, termVectorsStrings, (options, value) -> options.setTermVector(value));
applyValues(indexDefinition, spatialIndexesStrings, (options, value) -> options.setSpatial(value));
applyValues(indexDefinition, suggestions, (options, value) -> options.setSuggestions(value));
applyValues(indexDefinition, vectorFieldStrings, (options, value) -> options.setVector(value));

// Set Corax search engine type if vector fields are present
if (!vectorFieldStrings.isEmpty()) {
indexDefinition.getConfiguration().setSetting("Indexing.Static.SearchEngineType", "Corax");
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE

}

indexDefinition.setAdditionalSources(additionalSources);
indexDefinition.setAdditionalAssemblies(additionalAssemblies);
Expand Down Expand Up @@ -107,6 +114,10 @@ public void setReduce(String reduce) {
this.reduce = reduce;
}

public void setVectorOptionsStrings(Map<String, VectorFieldOptions> vectorOptionsStrings) {
this.vectorFieldStrings = vectorOptionsStrings;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is difference between this method and setVectorFieldStrings() ?

why we have 2 methods that are setting this.vectorFieldStrings ?

}

public Map<String, FieldStorage> getStoresStrings() {
return storesStrings;
}
Expand All @@ -123,6 +134,10 @@ public void setIndexesStrings(Map<String, FieldIndexing> indexesStrings) {
this.indexesStrings = indexesStrings;
}

public void setVectorFieldStrings(Map<String, VectorFieldOptions> vectorFieldStrings) {
this.vectorFieldStrings = vectorFieldStrings;
}

public Map<String, String> getAnalyzersStrings() {
return analyzersStrings;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ public IndexDefinition createIndexDefinition() {

if (searchEngineType != null) {
_definition.getConfiguration().put(Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE, SharpEnum.value(searchEngineType));
} else if (_definition.getFields() != null && _definition.getFields().values().stream().anyMatch(field -> field.getVector() != null)) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we need to set vectorFieldStrings in the index definition as we do in c# index ?

_definition.getConfiguration().put(Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE, "Corax");
}
return _definition;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
import net.ravendb.client.documents.conventions.DocumentConventions;
import net.ravendb.client.primitives.SharpEnum;


import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;


public class AbstractMultiMapIndexCreationTask extends AbstractGenericIndexCreationTask {

private final List<String> maps = new ArrayList<>();
Expand All @@ -33,6 +35,7 @@ public IndexDefinition createIndexDefinition() {
indexDefinitionBuilder.setSuggestionsOptions(indexSuggestions);
indexDefinitionBuilder.setTermVectorsStrings(termVectorsStrings);
indexDefinitionBuilder.setSpatialIndexesStrings(spatialOptionsStrings);
indexDefinitionBuilder.setVectorFieldStrings(vectorOptionsStrings);
indexDefinitionBuilder.setOutputReduceToCollection(outputReduceToCollection);
indexDefinitionBuilder.setPatternForOutputReduceToCollectionReferences(patternForOutputReduceToCollectionReferences);
indexDefinitionBuilder.setPatternReferencesCollectionName(patternReferencesCollectionName);
Expand All @@ -43,9 +46,12 @@ public IndexDefinition createIndexDefinition() {
indexDefinitionBuilder.setPriority(getPriority());
indexDefinitionBuilder.setState(getState());
indexDefinitionBuilder.setDeploymentMode(getDeploymentMode());
indexDefinitionBuilder.setVectorFieldStrings(vectorOptionsStrings);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how this is different from line 38 ?


if (searchEngineType != null) {
indexDefinitionBuilder.getConfiguration().put(Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE, SharpEnum.value(searchEngineType));
} else if (vectorOptionsStrings != null && !vectorOptionsStrings.isEmpty()) {
indexDefinitionBuilder.getConfiguration().put(Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE, "Corax");
}

IndexDefinition indexDefinition = indexDefinitionBuilder.toIndexDefinition(conventions, false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,7 @@
import java.util.HashMap;

public class IndexConfiguration extends HashMap<String, String> {
public void setSetting(String key, String value) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we actually need this helper method?

this.put(key, value);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ public class IndexFieldOptions {
private SpatialOptions spatial;
private String analyzer;
private boolean suggestions;
private VectorFieldOptions vector;

public FieldStorage getStorage() {
return storage;
Expand Down Expand Up @@ -57,4 +58,12 @@ public boolean isSuggestions() {
public void setSuggestions(boolean suggestions) {
this.suggestions = suggestions;
}

public VectorFieldOptions getVector() {
return vector;
}

public void setVector(VectorFieldOptions vector) {
this.vector = vector;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package net.ravendb.client.documents.indexes;

import net.ravendb.client.documents.queries.vectorSearch.VectorEmbeddingType;

public class VectorFieldOptions {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in nodejs we name it FieldVectorOptions lets have same naming

private Integer dimensions;
private VectorEmbeddingType sourceEmbeddingType;
private VectorEmbeddingType destinationEmbeddingType;
private Integer numberOfEdges;
private Integer numberOfCandidatesForIndexing;

public Integer getDimensions() {
return dimensions;
}

public void setDimensions(Integer dimensions) {
this.dimensions = dimensions;
}

public VectorEmbeddingType getSourceEmbeddingType() {
return sourceEmbeddingType;
}

public void setSourceEmbeddingType(VectorEmbeddingType sourceEmbeddingType) {
this.sourceEmbeddingType = sourceEmbeddingType;
}

public VectorEmbeddingType getDestinationEmbeddingType() {
return destinationEmbeddingType;
}

public void setDestinationEmbeddingType(VectorEmbeddingType destinationEmbeddingType) {
this.destinationEmbeddingType = destinationEmbeddingType;
}

public Integer getNumberOfEdges() {
return numberOfEdges;
}

public void setNumberOfEdges(Integer numberOfEdges) {
this.numberOfEdges = numberOfEdges;
}

public Integer getNumberOfCandidatesForIndexing() {
return numberOfCandidatesForIndexing;
}

public void setNumberOfCandidatesForIndexing(Integer numberOfCandidatesForIndexing) {
this.numberOfCandidatesForIndexing = numberOfCandidatesForIndexing;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package net.ravendb.client.documents.queries.vectorSearch;

public class IVectorOptions {
private Integer numberOfCandidates;
private Double similarity;
private Boolean isExact;

// Getters and setters
public Integer getNumberOfCandidates() { return numberOfCandidates; }
public void setNumberOfCandidates(Integer numberOfCandidates) { this.numberOfCandidates = numberOfCandidates; }

public Double getSimilarity() { return similarity; }
public void setSimilarity(Double similarity) { this.similarity = similarity; }

public Boolean getIsExact() { return isExact; }
public void setIsExact(Boolean isExact) { this.isExact = isExact; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package net.ravendb.client.documents.queries.vectorSearch;

import net.ravendb.client.documents.session.IVectorFieldFactory;
import net.ravendb.client.documents.queries.vectorSearch.fields.VectorEmbeddingField;
import net.ravendb.client.documents.queries.vectorSearch.fields.VectorEmbeddingTextField;
import net.ravendb.client.documents.queries.vectorSearch.fields.VectorField;
import net.ravendb.client.documents.session.IVectorEmbeddingField;
import net.ravendb.client.documents.session.IVectorEmbeddingTextField;
import net.ravendb.client.documents.session.IVectorField;

/**
* Factory for creating vector fields
* @param <T> The type of the field
*/
public class VectorEmbeddingFieldFactory<T> implements IVectorFieldFactory<T> {

@Override
public IVectorEmbeddingTextField withText(T fieldName) {
return new VectorEmbeddingTextField<>(fieldName);
}

@Override
public IVectorEmbeddingField withEmbedding(T fieldName, VectorEmbeddingType storedEmbeddingQuantization) {
return new VectorEmbeddingField<>(fieldName, storedEmbeddingQuantization, false);
}

@Override
public IVectorEmbeddingField withBase64(T fieldName, VectorEmbeddingType storedEmbeddingQuantization) {
return new VectorEmbeddingField<>(fieldName, storedEmbeddingQuantization, true);
}

@Override
public IVectorField withField(T fieldName) {
return new VectorField<>(fieldName);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package net.ravendb.client.documents.queries.vectorSearch;

import net.ravendb.client.primitives.UseSharpEnum;

/**
* Represents the type of vector embedding.
*/
@UseSharpEnum
public enum VectorEmbeddingType {
/**
* Single precision floating point (32-bit) vector
*/
SINGLE,

/**
* 8-bit integer vector (quantized from floating point)
*/
INT8,

/**
* Binary vector (1 bit per dimension)
*/
BINARY,

/**
* Text that will be converted to vector embedding
*/
TEXT
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package net.ravendb.client.documents.queries.vectorSearch;

import java.nio.ByteBuffer;
import java.nio.ByteOrder;

/**
* Utility class for quantizing vectors
*/
public class VectorQuantizer {
/**
* Converts a float array to an int8 array.
* Finds the maximum absolute value and scales all values to fit in int8 range (-127 to 127).
* Appends the maximum absolute value as a float at the end.
*
* @param rawEmbedding The float array to convert
* @return A new array with the quantized values
*/
public static int[] toInt8(float[] rawEmbedding) {
int length = rawEmbedding.length;
int[] result = new int[length + 4]; // +4 for the float at the end

float maxAbsValue = 0;
for (int i = 0; i < length; i++) {
maxAbsValue = Math.max(maxAbsValue, Math.abs(rawEmbedding[i]));
}

float scaleFactor = maxAbsValue == 0 ? 1 : 127 / maxAbsValue;

for (int i = 0; i < length; i++) {
result[i] = Math.round(rawEmbedding[i] * scaleFactor);
}

// Convert the maxAbsValue float to bytes and append to the result
ByteBuffer buffer = ByteBuffer.allocate(4);
buffer.order(ByteOrder.LITTLE_ENDIAN);
buffer.putFloat(maxAbsValue);
byte[] bytes = buffer.array();

for (int i = 0; i < 4; i++) {
result[length + i] = bytes[i];
}

return result;
}

/**
* Converts a float array to a binary representation where each value is represented by 1 bit.
* 1 if the value is non-negative, 0 if negative. Packs 8 values per byte.
*
* @param rawEmbedding The float array to convert
* @return A new array with the binary-packed values
*/
public static int[] toInt1(float[] rawEmbedding) {
int length = rawEmbedding.length;
int outputLength = (int) Math.ceil(length / 8.0);
int[] result = new int[outputLength];

for (int i = 0; i < length; i++) {
int byteIndex = i / 8;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in nodejs its

            const byteIndex = Math.floor(i / 8);

int bitPosition = 7 - (i % 8);

if (rawEmbedding[i] >= 0) {
result[byteIndex] |= (1 << bitPosition);
}
}

return result;
}
}
Loading