-
Notifications
You must be signed in to change notification settings - Fork 16
RDBC-921 Added Vector Search to Client API #114
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: v7.1
Are you sure you want to change the base?
Changes from all commits
e8a103e
ccdd8de
2500e14
bb6283d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ public abstract class AbstractIndexDefinitionBuilder<TIndexDefinition extends In | |
private Map<String, FieldStorage> storesStrings; | ||
private Map<String, FieldIndexing> indexesStrings; | ||
private Map<String, String> analyzersStrings; | ||
private Map<String, VectorFieldOptions> vectorFieldStrings = new HashMap<>(); | ||
private Set<String> suggestionsOptions; | ||
private Map<String, FieldTermVector> termVectorsStrings; | ||
private Map<String, SpatialOptions> spatialIndexesStrings; | ||
|
@@ -75,6 +76,12 @@ public TIndexDefinition toIndexDefinition(DocumentConventions conventions, boole | |
applyValues(indexDefinition, termVectorsStrings, (options, value) -> options.setTermVector(value)); | ||
applyValues(indexDefinition, spatialIndexesStrings, (options, value) -> options.setSpatial(value)); | ||
applyValues(indexDefinition, suggestions, (options, value) -> options.setSuggestions(value)); | ||
applyValues(indexDefinition, vectorFieldStrings, (options, value) -> options.setVector(value)); | ||
|
||
// Set Corax search engine type if vector fields are present | ||
if (!vectorFieldStrings.isEmpty()) { | ||
indexDefinition.getConfiguration().setSetting("Indexing.Static.SearchEngineType", "Corax"); | ||
} | ||
|
||
indexDefinition.setAdditionalSources(additionalSources); | ||
indexDefinition.setAdditionalAssemblies(additionalAssemblies); | ||
|
@@ -107,6 +114,10 @@ public void setReduce(String reduce) { | |
this.reduce = reduce; | ||
} | ||
|
||
public void setVectorOptionsStrings(Map<String, VectorFieldOptions> vectorOptionsStrings) { | ||
this.vectorFieldStrings = vectorOptionsStrings; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is difference between this method and why we have 2 methods that are setting |
||
} | ||
|
||
public Map<String, FieldStorage> getStoresStrings() { | ||
return storesStrings; | ||
} | ||
|
@@ -123,6 +134,10 @@ public void setIndexesStrings(Map<String, FieldIndexing> indexesStrings) { | |
this.indexesStrings = indexesStrings; | ||
} | ||
|
||
public void setVectorFieldStrings(Map<String, VectorFieldOptions> vectorFieldStrings) { | ||
this.vectorFieldStrings = vectorFieldStrings; | ||
} | ||
|
||
public Map<String, String> getAnalyzersStrings() { | ||
return analyzersStrings; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,6 +109,8 @@ public IndexDefinition createIndexDefinition() { | |
|
||
if (searchEngineType != null) { | ||
_definition.getConfiguration().put(Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE, SharpEnum.value(searchEngineType)); | ||
} else if (_definition.getFields() != null && _definition.getFields().values().stream().anyMatch(field -> field.getVector() != null)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't we need to set |
||
_definition.getConfiguration().put(Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE, "Corax"); | ||
} | ||
return _definition; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,10 +4,12 @@ | |
import net.ravendb.client.documents.conventions.DocumentConventions; | ||
import net.ravendb.client.primitives.SharpEnum; | ||
|
||
|
||
import java.util.ArrayList; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
|
||
|
||
public class AbstractMultiMapIndexCreationTask extends AbstractGenericIndexCreationTask { | ||
|
||
private final List<String> maps = new ArrayList<>(); | ||
|
@@ -33,6 +35,7 @@ public IndexDefinition createIndexDefinition() { | |
indexDefinitionBuilder.setSuggestionsOptions(indexSuggestions); | ||
indexDefinitionBuilder.setTermVectorsStrings(termVectorsStrings); | ||
indexDefinitionBuilder.setSpatialIndexesStrings(spatialOptionsStrings); | ||
indexDefinitionBuilder.setVectorFieldStrings(vectorOptionsStrings); | ||
indexDefinitionBuilder.setOutputReduceToCollection(outputReduceToCollection); | ||
indexDefinitionBuilder.setPatternForOutputReduceToCollectionReferences(patternForOutputReduceToCollectionReferences); | ||
indexDefinitionBuilder.setPatternReferencesCollectionName(patternReferencesCollectionName); | ||
|
@@ -43,9 +46,12 @@ public IndexDefinition createIndexDefinition() { | |
indexDefinitionBuilder.setPriority(getPriority()); | ||
indexDefinitionBuilder.setState(getState()); | ||
indexDefinitionBuilder.setDeploymentMode(getDeploymentMode()); | ||
indexDefinitionBuilder.setVectorFieldStrings(vectorOptionsStrings); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how this is different from line 38 ? |
||
|
||
if (searchEngineType != null) { | ||
indexDefinitionBuilder.getConfiguration().put(Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE, SharpEnum.value(searchEngineType)); | ||
} else if (vectorOptionsStrings != null && !vectorOptionsStrings.isEmpty()) { | ||
indexDefinitionBuilder.getConfiguration().put(Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE, "Corax"); | ||
} | ||
|
||
IndexDefinition indexDefinition = indexDefinitionBuilder.toIndexDefinition(conventions, false); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,4 +3,7 @@ | |
import java.util.HashMap; | ||
|
||
public class IndexConfiguration extends HashMap<String, String> { | ||
public void setSetting(String key, String value) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we actually need this helper method? |
||
this.put(key, value); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
package net.ravendb.client.documents.indexes; | ||
|
||
import net.ravendb.client.documents.queries.vectorSearch.VectorEmbeddingType; | ||
|
||
public class VectorFieldOptions { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in nodejs we name it |
||
private Integer dimensions; | ||
private VectorEmbeddingType sourceEmbeddingType; | ||
private VectorEmbeddingType destinationEmbeddingType; | ||
private Integer numberOfEdges; | ||
private Integer numberOfCandidatesForIndexing; | ||
|
||
public Integer getDimensions() { | ||
return dimensions; | ||
} | ||
|
||
public void setDimensions(Integer dimensions) { | ||
this.dimensions = dimensions; | ||
} | ||
|
||
public VectorEmbeddingType getSourceEmbeddingType() { | ||
return sourceEmbeddingType; | ||
} | ||
|
||
public void setSourceEmbeddingType(VectorEmbeddingType sourceEmbeddingType) { | ||
this.sourceEmbeddingType = sourceEmbeddingType; | ||
} | ||
|
||
public VectorEmbeddingType getDestinationEmbeddingType() { | ||
return destinationEmbeddingType; | ||
} | ||
|
||
public void setDestinationEmbeddingType(VectorEmbeddingType destinationEmbeddingType) { | ||
this.destinationEmbeddingType = destinationEmbeddingType; | ||
} | ||
|
||
public Integer getNumberOfEdges() { | ||
return numberOfEdges; | ||
} | ||
|
||
public void setNumberOfEdges(Integer numberOfEdges) { | ||
this.numberOfEdges = numberOfEdges; | ||
} | ||
|
||
public Integer getNumberOfCandidatesForIndexing() { | ||
return numberOfCandidatesForIndexing; | ||
} | ||
|
||
public void setNumberOfCandidatesForIndexing(Integer numberOfCandidatesForIndexing) { | ||
this.numberOfCandidatesForIndexing = numberOfCandidatesForIndexing; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package net.ravendb.client.documents.queries.vectorSearch; | ||
|
||
public class IVectorOptions { | ||
private Integer numberOfCandidates; | ||
private Double similarity; | ||
private Boolean isExact; | ||
|
||
// Getters and setters | ||
public Integer getNumberOfCandidates() { return numberOfCandidates; } | ||
public void setNumberOfCandidates(Integer numberOfCandidates) { this.numberOfCandidates = numberOfCandidates; } | ||
|
||
public Double getSimilarity() { return similarity; } | ||
public void setSimilarity(Double similarity) { this.similarity = similarity; } | ||
|
||
public Boolean getIsExact() { return isExact; } | ||
public void setIsExact(Boolean isExact) { this.isExact = isExact; } | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
package net.ravendb.client.documents.queries.vectorSearch; | ||
|
||
import net.ravendb.client.documents.session.IVectorFieldFactory; | ||
import net.ravendb.client.documents.queries.vectorSearch.fields.VectorEmbeddingField; | ||
import net.ravendb.client.documents.queries.vectorSearch.fields.VectorEmbeddingTextField; | ||
import net.ravendb.client.documents.queries.vectorSearch.fields.VectorField; | ||
import net.ravendb.client.documents.session.IVectorEmbeddingField; | ||
import net.ravendb.client.documents.session.IVectorEmbeddingTextField; | ||
import net.ravendb.client.documents.session.IVectorField; | ||
|
||
/** | ||
* Factory for creating vector fields | ||
* @param <T> The type of the field | ||
*/ | ||
public class VectorEmbeddingFieldFactory<T> implements IVectorFieldFactory<T> { | ||
|
||
@Override | ||
public IVectorEmbeddingTextField withText(T fieldName) { | ||
return new VectorEmbeddingTextField<>(fieldName); | ||
} | ||
|
||
@Override | ||
public IVectorEmbeddingField withEmbedding(T fieldName, VectorEmbeddingType storedEmbeddingQuantization) { | ||
return new VectorEmbeddingField<>(fieldName, storedEmbeddingQuantization, false); | ||
} | ||
|
||
@Override | ||
public IVectorEmbeddingField withBase64(T fieldName, VectorEmbeddingType storedEmbeddingQuantization) { | ||
return new VectorEmbeddingField<>(fieldName, storedEmbeddingQuantization, true); | ||
} | ||
|
||
@Override | ||
public IVectorField withField(T fieldName) { | ||
return new VectorField<>(fieldName); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
package net.ravendb.client.documents.queries.vectorSearch; | ||
|
||
import net.ravendb.client.primitives.UseSharpEnum; | ||
|
||
/** | ||
* Represents the type of vector embedding. | ||
*/ | ||
@UseSharpEnum | ||
public enum VectorEmbeddingType { | ||
/** | ||
* Single precision floating point (32-bit) vector | ||
*/ | ||
SINGLE, | ||
|
||
/** | ||
* 8-bit integer vector (quantized from floating point) | ||
*/ | ||
INT8, | ||
|
||
/** | ||
* Binary vector (1 bit per dimension) | ||
*/ | ||
BINARY, | ||
|
||
/** | ||
* Text that will be converted to vector embedding | ||
*/ | ||
TEXT | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
package net.ravendb.client.documents.queries.vectorSearch; | ||
|
||
import java.nio.ByteBuffer; | ||
import java.nio.ByteOrder; | ||
|
||
/** | ||
* Utility class for quantizing vectors | ||
*/ | ||
public class VectorQuantizer { | ||
/** | ||
* Converts a float array to an int8 array. | ||
* Finds the maximum absolute value and scales all values to fit in int8 range (-127 to 127). | ||
* Appends the maximum absolute value as a float at the end. | ||
* | ||
* @param rawEmbedding The float array to convert | ||
* @return A new array with the quantized values | ||
*/ | ||
public static int[] toInt8(float[] rawEmbedding) { | ||
int length = rawEmbedding.length; | ||
int[] result = new int[length + 4]; // +4 for the float at the end | ||
|
||
float maxAbsValue = 0; | ||
for (int i = 0; i < length; i++) { | ||
maxAbsValue = Math.max(maxAbsValue, Math.abs(rawEmbedding[i])); | ||
} | ||
|
||
float scaleFactor = maxAbsValue == 0 ? 1 : 127 / maxAbsValue; | ||
|
||
for (int i = 0; i < length; i++) { | ||
result[i] = Math.round(rawEmbedding[i] * scaleFactor); | ||
} | ||
|
||
// Convert the maxAbsValue float to bytes and append to the result | ||
ByteBuffer buffer = ByteBuffer.allocate(4); | ||
buffer.order(ByteOrder.LITTLE_ENDIAN); | ||
buffer.putFloat(maxAbsValue); | ||
byte[] bytes = buffer.array(); | ||
|
||
for (int i = 0; i < 4; i++) { | ||
result[length + i] = bytes[i]; | ||
} | ||
|
||
return result; | ||
} | ||
|
||
/** | ||
* Converts a float array to a binary representation where each value is represented by 1 bit. | ||
* 1 if the value is non-negative, 0 if negative. Packs 8 values per byte. | ||
* | ||
* @param rawEmbedding The float array to convert | ||
* @return A new array with the binary-packed values | ||
*/ | ||
public static int[] toInt1(float[] rawEmbedding) { | ||
int length = rawEmbedding.length; | ||
int outputLength = (int) Math.ceil(length / 8.0); | ||
int[] result = new int[outputLength]; | ||
|
||
for (int i = 0; i < length; i++) { | ||
int byteIndex = i / 8; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in nodejs its
|
||
int bitPosition = 7 - (i % 8); | ||
|
||
if (rawEmbedding[i] >= 0) { | ||
result[byteIndex] |= (1 << bitPosition); | ||
} | ||
} | ||
|
||
return result; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use
Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE