diff --git a/src/main/java/net/ravendb/client/documents/indexes/AbstractGenericIndexCreationTask.java b/src/main/java/net/ravendb/client/documents/indexes/AbstractGenericIndexCreationTask.java index 9e9e08ea..45c50e59 100644 --- a/src/main/java/net/ravendb/client/documents/indexes/AbstractGenericIndexCreationTask.java +++ b/src/main/java/net/ravendb/client/documents/indexes/AbstractGenericIndexCreationTask.java @@ -21,6 +21,7 @@ public abstract class AbstractGenericIndexCreationTask extends AbstractIndexCrea protected final Set indexSuggestions; protected final Map termVectorsStrings; protected final Map spatialOptionsStrings; + protected Map vectorOptionsStrings; protected String outputReduceToCollection; protected String patternForOutputReduceToCollectionReferences; @@ -33,6 +34,7 @@ public AbstractGenericIndexCreationTask() { indexSuggestions = new HashSet<>(); termVectorsStrings = new HashMap<>(); spatialOptionsStrings = new HashMap<>(); + vectorOptionsStrings = new HashMap<>(); } /** @@ -43,6 +45,26 @@ public boolean isMapReduce() { return reduce != null; } + /** + * Register a field for vector search options + * @param field Field name + * @param options Vector field options + */ + protected void vectorField(String field, VectorFieldOptions options) { + vectorOptionsStrings.put(field, options); + } + + public Map getVectorOptionsStrings() { + return vectorOptionsStrings; + } + + public void setVectorOptionsStrings(Map vectorOptionsStrings) { + this.vectorOptionsStrings.clear(); + if (vectorOptionsStrings != null) { + this.vectorOptionsStrings.putAll(vectorOptionsStrings); + } + } + // AbstractGenericIndexCreationTask /** diff --git a/src/main/java/net/ravendb/client/documents/indexes/AbstractIndexCreationTask.java b/src/main/java/net/ravendb/client/documents/indexes/AbstractIndexCreationTask.java index 7114144c..bc09916a 100644 --- a/src/main/java/net/ravendb/client/documents/indexes/AbstractIndexCreationTask.java +++ b/src/main/java/net/ravendb/client/documents/indexes/AbstractIndexCreationTask.java @@ -35,6 +35,7 @@ public IndexDefinition createIndexDefinition() { indexDefinitionBuilder.setSuggestionsOptions(indexSuggestions); indexDefinitionBuilder.setTermVectorsStrings(termVectorsStrings); indexDefinitionBuilder.setSpatialIndexesStrings(spatialOptionsStrings); + indexDefinitionBuilder.setVectorOptionsStrings(vectorOptionsStrings); indexDefinitionBuilder.setOutputReduceToCollection(outputReduceToCollection); indexDefinitionBuilder.setPatternForOutputReduceToCollectionReferences(patternForOutputReduceToCollectionReferences); indexDefinitionBuilder.setPatternReferencesCollectionName(patternReferencesCollectionName); @@ -50,6 +51,9 @@ public IndexDefinition createIndexDefinition() { if (searchEngineType != null) { indexDefinitionBuilder.getConfiguration().put(Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE, SharpEnum.value(searchEngineType)); + } else if (vectorOptionsStrings != null && !vectorOptionsStrings.isEmpty()) { + indexDefinitionBuilder.getConfiguration() + .put(Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE, "Corax"); } return indexDefinitionBuilder.toIndexDefinition(conventions); diff --git a/src/main/java/net/ravendb/client/documents/indexes/AbstractIndexDefinitionBuilder.java b/src/main/java/net/ravendb/client/documents/indexes/AbstractIndexDefinitionBuilder.java index e961e29b..63102d42 100644 --- a/src/main/java/net/ravendb/client/documents/indexes/AbstractIndexDefinitionBuilder.java +++ b/src/main/java/net/ravendb/client/documents/indexes/AbstractIndexDefinitionBuilder.java @@ -17,6 +17,7 @@ public abstract class AbstractIndexDefinitionBuilder storesStrings; private Map indexesStrings; private Map analyzersStrings; + private Map vectorFieldStrings = new HashMap<>(); private Set suggestionsOptions; private Map termVectorsStrings; private Map spatialIndexesStrings; @@ -75,6 +76,12 @@ public TIndexDefinition toIndexDefinition(DocumentConventions conventions, boole applyValues(indexDefinition, termVectorsStrings, (options, value) -> options.setTermVector(value)); applyValues(indexDefinition, spatialIndexesStrings, (options, value) -> options.setSpatial(value)); applyValues(indexDefinition, suggestions, (options, value) -> options.setSuggestions(value)); + applyValues(indexDefinition, vectorFieldStrings, (options, value) -> options.setVector(value)); + + // Set Corax search engine type if vector fields are present + if (!vectorFieldStrings.isEmpty()) { + indexDefinition.getConfiguration().setSetting("Indexing.Static.SearchEngineType", "Corax"); + } indexDefinition.setAdditionalSources(additionalSources); indexDefinition.setAdditionalAssemblies(additionalAssemblies); @@ -107,6 +114,10 @@ public void setReduce(String reduce) { this.reduce = reduce; } + public void setVectorOptionsStrings(Map vectorOptionsStrings) { + this.vectorFieldStrings = vectorOptionsStrings; + } + public Map getStoresStrings() { return storesStrings; } @@ -123,6 +134,10 @@ public void setIndexesStrings(Map indexesStrings) { this.indexesStrings = indexesStrings; } + public void setVectorFieldStrings(Map vectorFieldStrings) { + this.vectorFieldStrings = vectorFieldStrings; + } + public Map getAnalyzersStrings() { return analyzersStrings; } diff --git a/src/main/java/net/ravendb/client/documents/indexes/AbstractJavaScriptIndexCreationTask.java b/src/main/java/net/ravendb/client/documents/indexes/AbstractJavaScriptIndexCreationTask.java index d0a4623e..585fa3cd 100644 --- a/src/main/java/net/ravendb/client/documents/indexes/AbstractJavaScriptIndexCreationTask.java +++ b/src/main/java/net/ravendb/client/documents/indexes/AbstractJavaScriptIndexCreationTask.java @@ -109,6 +109,8 @@ public IndexDefinition createIndexDefinition() { if (searchEngineType != null) { _definition.getConfiguration().put(Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE, SharpEnum.value(searchEngineType)); + } else if (_definition.getFields() != null && _definition.getFields().values().stream().anyMatch(field -> field.getVector() != null)) { + _definition.getConfiguration().put(Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE, "Corax"); } return _definition; } diff --git a/src/main/java/net/ravendb/client/documents/indexes/AbstractMultiMapIndexCreationTask.java b/src/main/java/net/ravendb/client/documents/indexes/AbstractMultiMapIndexCreationTask.java index 4665fc05..3b3524f9 100644 --- a/src/main/java/net/ravendb/client/documents/indexes/AbstractMultiMapIndexCreationTask.java +++ b/src/main/java/net/ravendb/client/documents/indexes/AbstractMultiMapIndexCreationTask.java @@ -4,10 +4,12 @@ import net.ravendb.client.documents.conventions.DocumentConventions; import net.ravendb.client.primitives.SharpEnum; + import java.util.ArrayList; import java.util.HashSet; import java.util.List; + public class AbstractMultiMapIndexCreationTask extends AbstractGenericIndexCreationTask { private final List maps = new ArrayList<>(); @@ -33,6 +35,7 @@ public IndexDefinition createIndexDefinition() { indexDefinitionBuilder.setSuggestionsOptions(indexSuggestions); indexDefinitionBuilder.setTermVectorsStrings(termVectorsStrings); indexDefinitionBuilder.setSpatialIndexesStrings(spatialOptionsStrings); + indexDefinitionBuilder.setVectorFieldStrings(vectorOptionsStrings); indexDefinitionBuilder.setOutputReduceToCollection(outputReduceToCollection); indexDefinitionBuilder.setPatternForOutputReduceToCollectionReferences(patternForOutputReduceToCollectionReferences); indexDefinitionBuilder.setPatternReferencesCollectionName(patternReferencesCollectionName); @@ -43,9 +46,12 @@ public IndexDefinition createIndexDefinition() { indexDefinitionBuilder.setPriority(getPriority()); indexDefinitionBuilder.setState(getState()); indexDefinitionBuilder.setDeploymentMode(getDeploymentMode()); + indexDefinitionBuilder.setVectorFieldStrings(vectorOptionsStrings); if (searchEngineType != null) { indexDefinitionBuilder.getConfiguration().put(Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE, SharpEnum.value(searchEngineType)); + } else if (vectorOptionsStrings != null && !vectorOptionsStrings.isEmpty()) { + indexDefinitionBuilder.getConfiguration().put(Constants.Configuration.Indexes.INDEXING_STATIC_SEARCH_ENGINE_TYPE, "Corax"); } IndexDefinition indexDefinition = indexDefinitionBuilder.toIndexDefinition(conventions, false); diff --git a/src/main/java/net/ravendb/client/documents/indexes/IndexConfiguration.java b/src/main/java/net/ravendb/client/documents/indexes/IndexConfiguration.java index b5a1f3ac..b0bd10b6 100644 --- a/src/main/java/net/ravendb/client/documents/indexes/IndexConfiguration.java +++ b/src/main/java/net/ravendb/client/documents/indexes/IndexConfiguration.java @@ -3,4 +3,7 @@ import java.util.HashMap; public class IndexConfiguration extends HashMap { + public void setSetting(String key, String value) { + this.put(key, value); + } } diff --git a/src/main/java/net/ravendb/client/documents/indexes/IndexFieldOptions.java b/src/main/java/net/ravendb/client/documents/indexes/IndexFieldOptions.java index 98d0c7d8..00b3c8e6 100644 --- a/src/main/java/net/ravendb/client/documents/indexes/IndexFieldOptions.java +++ b/src/main/java/net/ravendb/client/documents/indexes/IndexFieldOptions.java @@ -9,6 +9,7 @@ public class IndexFieldOptions { private SpatialOptions spatial; private String analyzer; private boolean suggestions; + private VectorFieldOptions vector; public FieldStorage getStorage() { return storage; @@ -57,4 +58,12 @@ public boolean isSuggestions() { public void setSuggestions(boolean suggestions) { this.suggestions = suggestions; } + + public VectorFieldOptions getVector() { + return vector; + } + + public void setVector(VectorFieldOptions vector) { + this.vector = vector; + } } diff --git a/src/main/java/net/ravendb/client/documents/indexes/VectorFieldOptions.java b/src/main/java/net/ravendb/client/documents/indexes/VectorFieldOptions.java new file mode 100644 index 00000000..6db50e04 --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/indexes/VectorFieldOptions.java @@ -0,0 +1,51 @@ +package net.ravendb.client.documents.indexes; + +import net.ravendb.client.documents.queries.vectorSearch.VectorEmbeddingType; + +public class VectorFieldOptions { + private Integer dimensions; + private VectorEmbeddingType sourceEmbeddingType; + private VectorEmbeddingType destinationEmbeddingType; + private Integer numberOfEdges; + private Integer numberOfCandidatesForIndexing; + + public Integer getDimensions() { + return dimensions; + } + + public void setDimensions(Integer dimensions) { + this.dimensions = dimensions; + } + + public VectorEmbeddingType getSourceEmbeddingType() { + return sourceEmbeddingType; + } + + public void setSourceEmbeddingType(VectorEmbeddingType sourceEmbeddingType) { + this.sourceEmbeddingType = sourceEmbeddingType; + } + + public VectorEmbeddingType getDestinationEmbeddingType() { + return destinationEmbeddingType; + } + + public void setDestinationEmbeddingType(VectorEmbeddingType destinationEmbeddingType) { + this.destinationEmbeddingType = destinationEmbeddingType; + } + + public Integer getNumberOfEdges() { + return numberOfEdges; + } + + public void setNumberOfEdges(Integer numberOfEdges) { + this.numberOfEdges = numberOfEdges; + } + + public Integer getNumberOfCandidatesForIndexing() { + return numberOfCandidatesForIndexing; + } + + public void setNumberOfCandidatesForIndexing(Integer numberOfCandidatesForIndexing) { + this.numberOfCandidatesForIndexing = numberOfCandidatesForIndexing; + } +} diff --git a/src/main/java/net/ravendb/client/documents/queries/vectorSearch/IVectorOptions.java b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/IVectorOptions.java new file mode 100644 index 00000000..77e09d04 --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/IVectorOptions.java @@ -0,0 +1,17 @@ +package net.ravendb.client.documents.queries.vectorSearch; + +public class IVectorOptions { + private Integer numberOfCandidates; + private Double similarity; + private Boolean isExact; + + // Getters and setters + public Integer getNumberOfCandidates() { return numberOfCandidates; } + public void setNumberOfCandidates(Integer numberOfCandidates) { this.numberOfCandidates = numberOfCandidates; } + + public Double getSimilarity() { return similarity; } + public void setSimilarity(Double similarity) { this.similarity = similarity; } + + public Boolean getIsExact() { return isExact; } + public void setIsExact(Boolean isExact) { this.isExact = isExact; } +} diff --git a/src/main/java/net/ravendb/client/documents/queries/vectorSearch/VectorEmbeddingFieldFactory.java b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/VectorEmbeddingFieldFactory.java new file mode 100644 index 00000000..f901c552 --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/VectorEmbeddingFieldFactory.java @@ -0,0 +1,36 @@ +package net.ravendb.client.documents.queries.vectorSearch; + +import net.ravendb.client.documents.session.IVectorFieldFactory; +import net.ravendb.client.documents.queries.vectorSearch.fields.VectorEmbeddingField; +import net.ravendb.client.documents.queries.vectorSearch.fields.VectorEmbeddingTextField; +import net.ravendb.client.documents.queries.vectorSearch.fields.VectorField; +import net.ravendb.client.documents.session.IVectorEmbeddingField; +import net.ravendb.client.documents.session.IVectorEmbeddingTextField; +import net.ravendb.client.documents.session.IVectorField; + +/** + * Factory for creating vector fields + * @param The type of the field + */ +public class VectorEmbeddingFieldFactory implements IVectorFieldFactory { + + @Override + public IVectorEmbeddingTextField withText(T fieldName) { + return new VectorEmbeddingTextField<>(fieldName); + } + + @Override + public IVectorEmbeddingField withEmbedding(T fieldName, VectorEmbeddingType storedEmbeddingQuantization) { + return new VectorEmbeddingField<>(fieldName, storedEmbeddingQuantization, false); + } + + @Override + public IVectorEmbeddingField withBase64(T fieldName, VectorEmbeddingType storedEmbeddingQuantization) { + return new VectorEmbeddingField<>(fieldName, storedEmbeddingQuantization, true); + } + + @Override + public IVectorField withField(T fieldName) { + return new VectorField<>(fieldName); + } +} \ No newline at end of file diff --git a/src/main/java/net/ravendb/client/documents/queries/vectorSearch/VectorEmbeddingType.java b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/VectorEmbeddingType.java new file mode 100644 index 00000000..04b16986 --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/VectorEmbeddingType.java @@ -0,0 +1,29 @@ +package net.ravendb.client.documents.queries.vectorSearch; + +import net.ravendb.client.primitives.UseSharpEnum; + +/** + * Represents the type of vector embedding. + */ +@UseSharpEnum +public enum VectorEmbeddingType { + /** + * Single precision floating point (32-bit) vector + */ + SINGLE, + + /** + * 8-bit integer vector (quantized from floating point) + */ + INT8, + + /** + * Binary vector (1 bit per dimension) + */ + BINARY, + + /** + * Text that will be converted to vector embedding + */ + TEXT +} \ No newline at end of file diff --git a/src/main/java/net/ravendb/client/documents/queries/vectorSearch/VectorQuantizer.java b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/VectorQuantizer.java new file mode 100644 index 00000000..f21662a0 --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/VectorQuantizer.java @@ -0,0 +1,69 @@ +package net.ravendb.client.documents.queries.vectorSearch; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/** + * Utility class for quantizing vectors + */ +public class VectorQuantizer { + /** + * Converts a float array to an int8 array. + * Finds the maximum absolute value and scales all values to fit in int8 range (-127 to 127). + * Appends the maximum absolute value as a float at the end. + * + * @param rawEmbedding The float array to convert + * @return A new array with the quantized values + */ + public static int[] toInt8(float[] rawEmbedding) { + int length = rawEmbedding.length; + int[] result = new int[length + 4]; // +4 for the float at the end + + float maxAbsValue = 0; + for (int i = 0; i < length; i++) { + maxAbsValue = Math.max(maxAbsValue, Math.abs(rawEmbedding[i])); + } + + float scaleFactor = maxAbsValue == 0 ? 1 : 127 / maxAbsValue; + + for (int i = 0; i < length; i++) { + result[i] = Math.round(rawEmbedding[i] * scaleFactor); + } + + // Convert the maxAbsValue float to bytes and append to the result + ByteBuffer buffer = ByteBuffer.allocate(4); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putFloat(maxAbsValue); + byte[] bytes = buffer.array(); + + for (int i = 0; i < 4; i++) { + result[length + i] = bytes[i]; + } + + return result; + } + + /** + * Converts a float array to a binary representation where each value is represented by 1 bit. + * 1 if the value is non-negative, 0 if negative. Packs 8 values per byte. + * + * @param rawEmbedding The float array to convert + * @return A new array with the binary-packed values + */ + public static int[] toInt1(float[] rawEmbedding) { + int length = rawEmbedding.length; + int outputLength = (int) Math.ceil(length / 8.0); + int[] result = new int[outputLength]; + + for (int i = 0; i < length; i++) { + int byteIndex = i / 8; + int bitPosition = 7 - (i % 8); + + if (rawEmbedding[i] >= 0) { + result[byteIndex] |= (1 << bitPosition); + } + } + + return result; + } +} \ No newline at end of file diff --git a/src/main/java/net/ravendb/client/documents/queries/vectorSearch/common/VectorFieldBase.java b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/common/VectorFieldBase.java new file mode 100644 index 00000000..0a8d1c5e --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/common/VectorFieldBase.java @@ -0,0 +1,126 @@ +package net.ravendb.client.documents.queries.vectorSearch.common; + +import net.ravendb.client.documents.queries.vectorSearch.VectorEmbeddingType; + +import java.util.HashMap; +import java.util.Map; + +/** + * Base class for vector field implementations + * @param The type of the field + */ +public abstract class VectorFieldBase { + private final T rawFieldName; + private String fieldName; + protected boolean byFieldMethodUsed = false; + + /** + * Creates a new instance of VectorFieldBase + * @param fieldName The field name + */ + protected VectorFieldBase(T fieldName) { + this.rawFieldName = fieldName; + this.fieldName = fieldName.toString(); + } + + /** + * Gets the raw field name + * @return The raw field name + */ + public T getRawFieldName() { + return rawFieldName; + } + + /** + * Gets the formatted field name + * @return The formatted field name + */ + public String getFieldName() { + return fieldName; + } + + /** + * Sets the formatted field name + * @param fieldName The formatted field name + */ + protected void setFieldName(String fieldName) { + this.fieldName = fieldName; + } + + /** + * Gets the formatted field name based on source and destination embedding types + * @param rawFieldName The raw field name + * @param sourceType The source embedding type + * @param destType The destination embedding type + * @param taskIdentifier The task identifier (optional) + * @return The formatted field name + */ + protected String getFormattedFieldName(T rawFieldName, VectorEmbeddingType sourceType, + VectorEmbeddingType destType, String taskIdentifier) { + // If using withField, return the field name as is + if (byFieldMethodUsed) { + return rawFieldName.toString(); + } + + Map> configurationMap = new HashMap<>(); + + // Single source type mappings + Map singleMap = new HashMap<>(); + singleMap.put(VectorEmbeddingType.SINGLE, ""); + singleMap.put(VectorEmbeddingType.INT8, "embedding.f32_i8"); + singleMap.put(VectorEmbeddingType.BINARY, "embedding.f32_i1"); + configurationMap.put(VectorEmbeddingType.SINGLE, singleMap); + + // Text source type mappings + Map textMap = new HashMap<>(); + textMap.put(VectorEmbeddingType.SINGLE, "embedding.text"); + textMap.put(VectorEmbeddingType.INT8, "embedding.text_i8"); + textMap.put(VectorEmbeddingType.BINARY, "embedding.text_i1"); + configurationMap.put(VectorEmbeddingType.TEXT, textMap); + + // Int8 source type mappings + Map int8Map = new HashMap<>(); + int8Map.put(VectorEmbeddingType.INT8, "embedding.i8"); + configurationMap.put(VectorEmbeddingType.INT8, int8Map); + + // Binary source type mappings + Map binaryMap = new HashMap<>(); + binaryMap.put(VectorEmbeddingType.BINARY, "embedding.i1"); + configurationMap.put(VectorEmbeddingType.BINARY, binaryMap); + + // Get the embedding function for the source and destination types + String embeddingFunction = null; + if (configurationMap.containsKey(sourceType) && configurationMap.get(sourceType).containsKey(destType)) { + embeddingFunction = configurationMap.get(sourceType).get(destType); + } + + if (embeddingFunction == null) { + return rawFieldName.toString(); + } + + // For text source type with task identifier, handle specially + if (sourceType == VectorEmbeddingType.TEXT && taskIdentifier != null && !taskIdentifier.isEmpty()) { + return embeddingFunction.isEmpty() + ? rawFieldName.toString() + : embeddingFunction + "(" + rawFieldName + ", ai.task('" + taskIdentifier + "'))"; + } + + // For empty embedding function (same source and destination for Single), return just the field name + if (embeddingFunction.isEmpty()) { + return rawFieldName.toString(); + } + + return embeddingFunction + "(" + rawFieldName + ")"; + } + + /** + * Gets the formatted field name based on source and destination embedding types + * @param rawFieldName The raw field name + * @param sourceType The source embedding type + * @param destType The destination embedding type + * @return The formatted field name + */ + protected String getFormattedFieldName(T rawFieldName, VectorEmbeddingType sourceType, VectorEmbeddingType destType) { + return getFormattedFieldName(rawFieldName, sourceType, destType, null); + } +} \ No newline at end of file diff --git a/src/main/java/net/ravendb/client/documents/queries/vectorSearch/fields/IVectorOptionsJson.java b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/fields/IVectorOptionsJson.java new file mode 100644 index 00000000..120e3d00 --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/fields/IVectorOptionsJson.java @@ -0,0 +1,80 @@ +package net.ravendb.client.documents.queries.vectorSearch.fields; + +import net.ravendb.client.documents.queries.vectorSearch.VectorEmbeddingType; + +public class IVectorOptionsJson { + private VectorEmbeddingType sourceEmbeddingType; + private VectorEmbeddingType destinationEmbeddingType; + private Integer dimensions; + private Integer numberOfEdges; + private Integer numberOfCandidates; + private Double similarity; + private Boolean isExact; + private String sourceFieldName; + + // Getters and setters + + public VectorEmbeddingType getSourceEmbeddingType() { + return sourceEmbeddingType; + } + + public void setSourceEmbeddingType(VectorEmbeddingType sourceEmbeddingType) { + this.sourceEmbeddingType = sourceEmbeddingType; + } + + public VectorEmbeddingType getDestinationEmbeddingType() { + return destinationEmbeddingType; + } + + public void setDestinationEmbeddingType(VectorEmbeddingType destinationEmbeddingType) { + this.destinationEmbeddingType = destinationEmbeddingType; + } + + public Integer getDimensions() { + return dimensions; + } + + public void setDimensions(Integer dimensions) { + this.dimensions = dimensions; + } + + public Integer getNumberOfEdges() { + return numberOfEdges; + } + + public void setNumberOfEdges(Integer numberOfEdges) { + this.numberOfEdges = numberOfEdges; + } + + public Integer getNumberOfCandidates() { + return numberOfCandidates; + } + + public void setNumberOfCandidates(Integer numberOfCandidates) { + this.numberOfCandidates = numberOfCandidates; + } + + public Double getSimilarity() { + return similarity; + } + + public void setSimilarity(Double similarity) { + this.similarity = similarity; + } + + public Boolean getIsExact() { + return isExact; + } + + public void setIsExact(Boolean isExact) { + this.isExact = isExact; + } + + public String getSourceFieldName() { + return sourceFieldName; + } + + public void setSourceFieldName(String sourceFieldName) { + this.sourceFieldName = sourceFieldName; + } +} diff --git a/src/main/java/net/ravendb/client/documents/queries/vectorSearch/fields/VectorEmbeddingField.java b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/fields/VectorEmbeddingField.java new file mode 100644 index 00000000..2c62982c --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/fields/VectorEmbeddingField.java @@ -0,0 +1,92 @@ +package net.ravendb.client.documents.queries.vectorSearch.fields; + +import net.ravendb.client.documents.queries.vectorSearch.VectorEmbeddingType; +import net.ravendb.client.documents.queries.vectorSearch.common.VectorFieldBase; +import net.ravendb.client.documents.session.IVectorEmbeddingField; +import net.ravendb.client.documents.session.IVectorEmbeddingFieldFactoryAccessor; + +/** + * Vector embedding field implementation + * @param The type of the field + */ +public class VectorEmbeddingField extends VectorFieldBase implements + IVectorEmbeddingField, + IVectorEmbeddingFieldFactoryAccessor { + + private VectorEmbeddingType sourceQuantizationType; + private VectorEmbeddingType destinationQuantizationType; + private boolean isBase64Encoded; + private String embeddingsGenerationTaskIdentifier = ""; + + /** + * Creates a new instance of VectorEmbeddingField + * @param fieldName The field name + * @param sourceQuantizationType The source quantization type (default: SINGLE) + * @param isBase64Encoded Whether the embedding is base64 encoded (default: false) + */ + public VectorEmbeddingField(T fieldName, + VectorEmbeddingType sourceQuantizationType, + boolean isBase64Encoded) { + super(fieldName); + this.sourceQuantizationType = sourceQuantizationType != null ? sourceQuantizationType : VectorEmbeddingType.SINGLE; + this.destinationQuantizationType = this.sourceQuantizationType; + this.isBase64Encoded = isBase64Encoded; + updateFieldName(); + } + + /** + * Creates a new instance of VectorEmbeddingField with default values + * @param fieldName The field name + */ + public VectorEmbeddingField(T fieldName) { + this(fieldName, VectorEmbeddingType.SINGLE, false); + } + + private void updateFieldName() { + setFieldName(getFormattedFieldName( + getRawFieldName(), + sourceQuantizationType, + destinationQuantizationType + )); + } + + @Override + public IVectorEmbeddingField targetQuantization(VectorEmbeddingType targetEmbeddingQuantization) { + if (targetEmbeddingQuantization == VectorEmbeddingType.TEXT) { + throw new IllegalArgumentException("Cannot quantize the embedding to Text. This option is only available for sourceQuantizationType."); + } + + this.destinationQuantizationType = targetEmbeddingQuantization; + + if ((this.sourceQuantizationType == VectorEmbeddingType.INT8 || + this.sourceQuantizationType == VectorEmbeddingType.BINARY) && + this.destinationQuantizationType != this.sourceQuantizationType) { + throw new IllegalArgumentException( + String.format("Cannot quantize already quantized embeddings. Source VectorEmbeddingType is %s; however the destination is %s.", + this.sourceQuantizationType, this.destinationQuantizationType)); + } + + updateFieldName(); + return this; + } + + @Override + public VectorEmbeddingType getSourceQuantizationType() { + return sourceQuantizationType; + } + + @Override + public VectorEmbeddingType getDestinationQuantizationType() { + return destinationQuantizationType; + } + + @Override + public boolean isBase64Encoded() { + return isBase64Encoded; + } + + @Override + public String getEmbeddingsGenerationTaskIdentifier() { + return embeddingsGenerationTaskIdentifier; + } +} \ No newline at end of file diff --git a/src/main/java/net/ravendb/client/documents/queries/vectorSearch/fields/VectorEmbeddingTextField.java b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/fields/VectorEmbeddingTextField.java new file mode 100644 index 00000000..ec9ff28e --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/fields/VectorEmbeddingTextField.java @@ -0,0 +1,80 @@ +package net.ravendb.client.documents.queries.vectorSearch.fields; + +import net.ravendb.client.documents.queries.vectorSearch.VectorEmbeddingType; +import net.ravendb.client.documents.queries.vectorSearch.common.VectorFieldBase; +import net.ravendb.client.documents.session.IVectorEmbeddingFieldFactoryAccessor; +import net.ravendb.client.documents.session.IVectorEmbeddingTextField; + +/** + * Vector embedding text field implementation + * @param The type of the field + */ +public class VectorEmbeddingTextField extends VectorFieldBase implements + IVectorEmbeddingTextField, + IVectorEmbeddingFieldFactoryAccessor { + + private VectorEmbeddingType sourceQuantizationType = VectorEmbeddingType.TEXT; + private VectorEmbeddingType destinationQuantizationType = VectorEmbeddingType.SINGLE; + private boolean isBase64Encoded = false; + private String embeddingsGenerationTaskIdentifier = ""; + + /** + * Creates a new instance of VectorEmbeddingTextField + * @param fieldName The field name + */ + public VectorEmbeddingTextField(T fieldName) { + super(fieldName); + updateFieldName(); + } + + private void updateFieldName() { + setFieldName(getFormattedFieldName( + getRawFieldName(), + sourceQuantizationType, + destinationQuantizationType, + embeddingsGenerationTaskIdentifier + )); + } + + @Override + public IVectorEmbeddingTextField targetQuantization(VectorEmbeddingType targetEmbeddingQuantization) { + if (targetEmbeddingQuantization == VectorEmbeddingType.TEXT) { + throw new IllegalArgumentException("Cannot quantize the embedding to Text. This option is only available for sourceQuantizationType."); + } + + this.destinationQuantizationType = targetEmbeddingQuantization; + updateFieldName(); + return this; + } + + @Override + public IVectorEmbeddingTextField usingTask(String embeddingsGenerationTaskIdentifier) { + if (this.sourceQuantizationType != VectorEmbeddingType.TEXT) { + throw new IllegalArgumentException("The usingTask method can only be used with text embeddings (withText)"); + } + + this.embeddingsGenerationTaskIdentifier = embeddingsGenerationTaskIdentifier; + updateFieldName(); + return this; + } + + @Override + public VectorEmbeddingType getSourceQuantizationType() { + return sourceQuantizationType; + } + + @Override + public VectorEmbeddingType getDestinationQuantizationType() { + return destinationQuantizationType; + } + + @Override + public boolean isBase64Encoded() { + return isBase64Encoded; + } + + @Override + public String getEmbeddingsGenerationTaskIdentifier() { + return embeddingsGenerationTaskIdentifier; + } +} \ No newline at end of file diff --git a/src/main/java/net/ravendb/client/documents/queries/vectorSearch/fields/VectorField.java b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/fields/VectorField.java new file mode 100644 index 00000000..97159503 --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/queries/vectorSearch/fields/VectorField.java @@ -0,0 +1,20 @@ +package net.ravendb.client.documents.queries.vectorSearch.fields; + +import net.ravendb.client.documents.queries.vectorSearch.common.VectorFieldBase; +import net.ravendb.client.documents.session.IVectorField; + +/** + * Vector field implementation + * @param The type of the field + */ +public class VectorField extends VectorFieldBase implements IVectorField { + /** + * Creates a new instance of VectorField + * @param fieldName The field name + */ + public VectorField(T fieldName) { + super(fieldName); + this.byFieldMethodUsed = true; + setFieldName(fieldName.toString()); + } +} \ No newline at end of file diff --git a/src/main/java/net/ravendb/client/documents/session/AbstractDocumentQuery.java b/src/main/java/net/ravendb/client/documents/session/AbstractDocumentQuery.java index 585bb4fd..74fb2b39 100644 --- a/src/main/java/net/ravendb/client/documents/session/AbstractDocumentQuery.java +++ b/src/main/java/net/ravendb/client/documents/session/AbstractDocumentQuery.java @@ -27,6 +27,8 @@ import net.ravendb.client.documents.queries.timeSeries.ITimeSeriesQueryBuilder; import net.ravendb.client.documents.queries.timeSeries.TimeSeriesQueryBuilder; import net.ravendb.client.documents.queries.timings.QueryTimings; +import net.ravendb.client.documents.queries.vectorSearch.IVectorOptions; +import net.ravendb.client.documents.queries.vectorSearch.VectorEmbeddingFieldFactory; import net.ravendb.client.documents.session.loaders.IncludeBuilderBase; import net.ravendb.client.documents.session.operations.QueryOperation; import net.ravendb.client.documents.session.operations.lazy.LazyQueryOperation; @@ -46,6 +48,7 @@ import java.time.Duration; import java.util.*; import java.util.function.Consumer; +import java.util.function.Function; /** * A query against a Raven index @@ -969,6 +972,65 @@ protected CleanCloseable setFilterMode(boolean on) { return new FilterModeScope(filterModeStack, on); } + protected void _vectorSearch(Object fieldName, Object valueOrFactory, IVectorOptions options){ + this.assertMethodIsCurrentlySupported("vectorSearch"); + VectorEmbeddingFieldFactory vectorFactory = new VectorEmbeddingFieldFactory(); + IVectorField fieldAccessor; + + if (fieldName instanceof String) { + fieldAccessor = vectorFactory.withField((String) fieldName); + } else if (fieldName instanceof Function) { + Function> func = + (Function>) fieldName; + fieldAccessor = func.apply(vectorFactory); + } else { + throw new IllegalArgumentException( + "fieldName must be either a string or a function that selects a vector field" + ); + } + + WhereParams whereParams = new WhereParams(); + whereParams.setFieldName(fieldAccessor.getFieldName()); + + if (valueOrFactory instanceof Consumer) { + // Function case + VectorEmbeddingFieldValueFactory fieldValueFactory = new VectorEmbeddingFieldValueFactory(); + @SuppressWarnings("unchecked") + Consumer func = + (Consumer) valueOrFactory; + func.accept(fieldValueFactory); + + if (fieldValueFactory.getEmbeddings() != null) { + whereParams.setValue(fieldValueFactory.getEmbeddings()); + } else if (fieldValueFactory.getEmbedding() != null) { + whereParams.setValue(fieldValueFactory.getEmbedding()); + } else if (fieldValueFactory.getText() != null) { + whereParams.setValue(fieldValueFactory.getText()); + } else if (fieldValueFactory.getTexts() != null) { + whereParams.setValue(fieldValueFactory.getTexts()); + } else { + throw new IllegalStateException("No value was provided in the valueFactory"); + } + } else { + whereParams.setValue(valueOrFactory); + } + + whereParams.setAllowWildcards(true); + Object transformToEqualValue = transformValue(whereParams); + List tokens = getCurrentWhereTokens(); + appendOperatorIfNeeded(tokens); + negateIfNeeded(tokens, whereParams.getFieldName()); + + WhereToken whereToken = WhereToken.create( + WhereOperator.VECTOR_SEARCH, + whereParams.getFieldName(), + this.addQueryParameter(transformToEqualValue), + new WhereToken.WhereOptions(options) + ); + + tokens.add(whereToken); + } + private static class FilterModeScope implements CleanCloseable { private final Stack _modeStack; diff --git a/src/main/java/net/ravendb/client/documents/session/DocumentQuery.java b/src/main/java/net/ravendb/client/documents/session/DocumentQuery.java index ca3284f1..25f800dc 100644 --- a/src/main/java/net/ravendb/client/documents/session/DocumentQuery.java +++ b/src/main/java/net/ravendb/client/documents/session/DocumentQuery.java @@ -21,6 +21,7 @@ import net.ravendb.client.documents.queries.suggestions.*; import net.ravendb.client.documents.queries.timeSeries.ITimeSeriesQueryBuilder; import net.ravendb.client.documents.queries.timings.QueryTimings; +import net.ravendb.client.documents.queries.vectorSearch.IVectorOptions; import net.ravendb.client.documents.session.loaders.IQueryIncludeBuilder; import net.ravendb.client.documents.session.loaders.QueryIncludeBuilder; import net.ravendb.client.documents.session.querying.sharding.IQueryShardedContextBuilder; @@ -947,4 +948,24 @@ public IDocumentQuery shardContext(Consumer buil _shardContext(builder); return this; } + + @Override + public IDocumentQuery vectorSearch( + Object fieldName, + Object valueFactory, + IVectorOptions options + ) { + this._vectorSearch(fieldName, valueFactory, options); + return this; + } + + @Override + public IDocumentQuery vectorSearch( + Object fieldName, + Runnable valueFactory, + IVectorOptions options + ) { + this._vectorSearch(fieldName, valueFactory, options); + return this; + } } diff --git a/src/main/java/net/ravendb/client/documents/session/IDocumentQuery.java b/src/main/java/net/ravendb/client/documents/session/IDocumentQuery.java index e2d9c02c..cb52f2eb 100644 --- a/src/main/java/net/ravendb/client/documents/session/IDocumentQuery.java +++ b/src/main/java/net/ravendb/client/documents/session/IDocumentQuery.java @@ -9,6 +9,7 @@ import net.ravendb.client.documents.queries.suggestions.ISuggestionDocumentQuery; import net.ravendb.client.documents.queries.suggestions.SuggestionBase; import net.ravendb.client.documents.queries.timeSeries.ITimeSeriesQueryBuilder; +import net.ravendb.client.documents.queries.vectorSearch.IVectorOptions; import net.ravendb.client.documents.session.querying.sharding.IQueryShardedContextBuilder; import java.util.function.Consumer; @@ -139,4 +140,16 @@ public interface IDocumentQuery extends IDocumentQueryBase suggestUsing(Consumer> builder); IDocumentQuery shardContext(Consumer builder); + + IDocumentQuery vectorSearch( + Object fieldName, + Object value, + IVectorOptions options + ); + + IDocumentQuery vectorSearch( + Object fieldName, + Runnable factory, + IVectorOptions options + ); } diff --git a/src/main/java/net/ravendb/client/documents/session/IRavenVector.java b/src/main/java/net/ravendb/client/documents/session/IRavenVector.java new file mode 100644 index 00000000..a61cdaa5 --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/session/IRavenVector.java @@ -0,0 +1,7 @@ +package net.ravendb.client.documents.session; + +import java.sql.Array; + +public interface IRavenVector extends Array { + // No additional methods; just a marker for type consistency +} \ No newline at end of file diff --git a/src/main/java/net/ravendb/client/documents/session/IVectorEmbeddingField.java b/src/main/java/net/ravendb/client/documents/session/IVectorEmbeddingField.java new file mode 100644 index 00000000..847d9cbe --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/session/IVectorEmbeddingField.java @@ -0,0 +1,15 @@ +package net.ravendb.client.documents.session; + +import net.ravendb.client.documents.queries.vectorSearch.VectorEmbeddingType; + +/** + * Interface for vector embedding field + */ +public interface IVectorEmbeddingField extends IVectorField { + /** + * Sets the target quantization + * @param targetEmbeddingQuantization The target embedding quantization + * @return The vector embedding field + */ + IVectorEmbeddingField targetQuantization(VectorEmbeddingType targetEmbeddingQuantization); +} \ No newline at end of file diff --git a/src/main/java/net/ravendb/client/documents/session/IVectorEmbeddingFieldFactoryAccessor.java b/src/main/java/net/ravendb/client/documents/session/IVectorEmbeddingFieldFactoryAccessor.java new file mode 100644 index 00000000..8af7bfa2 --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/session/IVectorEmbeddingFieldFactoryAccessor.java @@ -0,0 +1,33 @@ +package net.ravendb.client.documents.session; + +import net.ravendb.client.documents.queries.vectorSearch.VectorEmbeddingType; + +/** + * Interface for vector embedding field factory accessor + * @param The type of the field + */ +public interface IVectorEmbeddingFieldFactoryAccessor extends IVectorField{ + /** + * Gets the source quantization type + * @return The source quantization type + */ + VectorEmbeddingType getSourceQuantizationType(); + + /** + * Gets the destination quantization type + * @return The destination quantization type + */ + VectorEmbeddingType getDestinationQuantizationType(); + + /** + * Gets whether the embedding is base64 encoded + * @return Whether the embedding is base64 encoded + */ + boolean isBase64Encoded(); + + /** + * Gets the embeddings generation task identifier + * @return The embeddings generation task identifier + */ + String getEmbeddingsGenerationTaskIdentifier(); +} \ No newline at end of file diff --git a/src/main/java/net/ravendb/client/documents/session/IVectorEmbeddingFieldValueFactory.java b/src/main/java/net/ravendb/client/documents/session/IVectorEmbeddingFieldValueFactory.java new file mode 100644 index 00000000..c850e9fa --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/session/IVectorEmbeddingFieldValueFactory.java @@ -0,0 +1,30 @@ +package net.ravendb.client.documents.session; + +import java.util.Map; + +public interface IVectorEmbeddingFieldValueFactory { + + /** + * Defines a queried embedding. + * @param embedding Array containing embedding values + */ + void byEmbedding(T[] embedding); + + /** + * Defines queried embeddings. + * @param embeddings Array containing embeddings values + */ + void byEmbeddings(T[][] embeddings); + + /** + * Defines queried embedding in base64 format. + * @param base64Embedding Embedding encoded as base64 string + */ + void byBase64(String base64Embedding); + + /** + * Defines queried embedding using a RavenVector wrapper. + * @param embedding Map with "@vector" key containing a RavenVector + */ + void byEmbedding(Map> embedding); +} diff --git a/src/main/java/net/ravendb/client/documents/session/IVectorEmbeddingTextField.java b/src/main/java/net/ravendb/client/documents/session/IVectorEmbeddingTextField.java new file mode 100644 index 00000000..68f26090 --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/session/IVectorEmbeddingTextField.java @@ -0,0 +1,22 @@ +package net.ravendb.client.documents.session; + +import net.ravendb.client.documents.queries.vectorSearch.VectorEmbeddingType; + +/** + * Interface for vector embedding text field + */ +public interface IVectorEmbeddingTextField extends IVectorField { + /** + * Sets the target quantization + * @param targetEmbeddingQuantization The target embedding quantization + * @return The vector embedding text field + */ + IVectorEmbeddingTextField targetQuantization(VectorEmbeddingType targetEmbeddingQuantization); + + /** + * Sets the task to use for generating embeddings + * @param embeddingsGenerationTaskIdentifier The embeddings generation task identifier + * @return The vector embedding text field + */ + IVectorEmbeddingTextField usingTask(String embeddingsGenerationTaskIdentifier); +} \ No newline at end of file diff --git a/src/main/java/net/ravendb/client/documents/session/IVectorEmbeddingTextFieldValueFactory.java b/src/main/java/net/ravendb/client/documents/session/IVectorEmbeddingTextFieldValueFactory.java new file mode 100644 index 00000000..9fe66faf --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/session/IVectorEmbeddingTextFieldValueFactory.java @@ -0,0 +1,15 @@ +package net.ravendb.client.documents.session; + +public interface IVectorEmbeddingTextFieldValueFactory { + /** + * Defines queried text. + * @param text Queried text + */ + void byText(String text); + + /** + * Defines queried texts. + * @param texts Queried texts + */ + void byTexts(String[] texts); +} diff --git a/src/main/java/net/ravendb/client/documents/session/IVectorField.java b/src/main/java/net/ravendb/client/documents/session/IVectorField.java new file mode 100644 index 00000000..da699ea3 --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/session/IVectorField.java @@ -0,0 +1,12 @@ +package net.ravendb.client.documents.session; + +/** + * Interface for vector field + */ +public interface IVectorField { + /** + * Gets the field name + * @return The field name + */ + String getFieldName(); +} \ No newline at end of file diff --git a/src/main/java/net/ravendb/client/documents/session/IVectorFieldFactory.java b/src/main/java/net/ravendb/client/documents/session/IVectorFieldFactory.java new file mode 100644 index 00000000..d15adf1a --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/session/IVectorFieldFactory.java @@ -0,0 +1,39 @@ +package net.ravendb.client.documents.session; + +import net.ravendb.client.documents.queries.vectorSearch.VectorEmbeddingType; + +/** + * Interface for vector field factory + * @param The type of the field + */ +public interface IVectorFieldFactory { + /** + * Creates a vector field from text + * @param fieldName The field name + * @return The vector embedding text field + */ + IVectorEmbeddingTextField withText(T fieldName); + + /** + * Creates a vector field from embedding + * @param fieldName The field name + * @param storedEmbeddingQuantization The stored embedding quantization (optional) + * @return The vector embedding field + */ + IVectorEmbeddingField withEmbedding(T fieldName, VectorEmbeddingType storedEmbeddingQuantization); + + /** + * Creates a vector field from base64 encoded embedding + * @param fieldName The field name + * @param storedEmbeddingQuantization The stored embedding quantization (optional) + * @return The vector embedding field + */ + IVectorEmbeddingField withBase64(T fieldName, VectorEmbeddingType storedEmbeddingQuantization); + + /** + * Creates a vector field + * @param fieldName The field name + * @return The vector field + */ + IVectorField withField(T fieldName); +} diff --git a/src/main/java/net/ravendb/client/documents/session/IVectorFieldValueFactory.java b/src/main/java/net/ravendb/client/documents/session/IVectorFieldValueFactory.java new file mode 100644 index 00000000..27782fe5 --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/session/IVectorFieldValueFactory.java @@ -0,0 +1,4 @@ +package net.ravendb.client.documents.session; + +public interface IVectorFieldValueFactory extends IVectorEmbeddingTextFieldValueFactory,IVectorEmbeddingFieldValueFactory { +} diff --git a/src/main/java/net/ravendb/client/documents/session/VectorEmbeddingFieldValueFactory.java b/src/main/java/net/ravendb/client/documents/session/VectorEmbeddingFieldValueFactory.java new file mode 100644 index 00000000..88dbb2c8 --- /dev/null +++ b/src/main/java/net/ravendb/client/documents/session/VectorEmbeddingFieldValueFactory.java @@ -0,0 +1,55 @@ +package net.ravendb.client.documents.session; + +import java.util.Map; + +public class VectorEmbeddingFieldValueFactory implements IVectorFieldValueFactory { + + private Object embedding; + private Integer[][] embeddings; + private String text; + private String[] texts; + + public Object getEmbedding() { + return embedding; + } + public Integer[][] getEmbeddings() { + return embeddings; + } + public String getText() { + return text; + } + public String[] getTexts() { + return texts; + } + + + @Override + public void byEmbedding(T[] embedding) { + this.embedding = (Integer[])embedding; + } + + @Override + public void byEmbedding(Map> embedding) { + this.embedding = embedding; + } + + @Override + public void byEmbeddings(T[][] embeddings) { + this.embeddings = (Integer[][])embeddings; + } + + @Override + public void byBase64(String base64Embedding) { + this.text = base64Embedding; + } + + @Override + public void byText(String text) { + this.text = text; + } + + @Override + public void byTexts(String[] texts) { + this.texts = texts; + } +} diff --git a/src/main/java/net/ravendb/client/documents/session/tokens/WhereOperator.java b/src/main/java/net/ravendb/client/documents/session/tokens/WhereOperator.java index 3a190844..8365cdb3 100644 --- a/src/main/java/net/ravendb/client/documents/session/tokens/WhereOperator.java +++ b/src/main/java/net/ravendb/client/documents/session/tokens/WhereOperator.java @@ -19,5 +19,6 @@ public enum WhereOperator { SPATIAL_CONTAINS, SPATIAL_DISJOINT, SPATIAL_INTERSECTS, - REGEX + REGEX, + VECTOR_SEARCH } diff --git a/src/main/java/net/ravendb/client/documents/session/tokens/WhereToken.java b/src/main/java/net/ravendb/client/documents/session/tokens/WhereToken.java index 3202baf4..0ed22bb0 100644 --- a/src/main/java/net/ravendb/client/documents/session/tokens/WhereToken.java +++ b/src/main/java/net/ravendb/client/documents/session/tokens/WhereToken.java @@ -2,6 +2,7 @@ import net.ravendb.client.Constants; import net.ravendb.client.documents.queries.SearchOperator; +import net.ravendb.client.documents.queries.vectorSearch.IVectorOptions; import net.ravendb.client.primitives.UseSharpEnum; import org.apache.commons.lang3.ObjectUtils; @@ -32,6 +33,7 @@ public static class WhereOptions { private WhereMethodCall method; private ShapeToken whereShape; private double distanceErrorPct; + public IVectorOptions vectorSearch; public static WhereOptions defaultOptions() { return new WhereOptions(); @@ -40,6 +42,11 @@ public static WhereOptions defaultOptions() { private WhereOptions() { } + public WhereOptions(IVectorOptions vectorSearch) { + this.vectorSearch = vectorSearch; + this.exact = vectorSearch != null && Boolean.TRUE.equals(vectorSearch.getIsExact()); + } + public WhereOptions(boolean exact) { this.exact = exact; } @@ -298,6 +305,9 @@ public void writeTo(StringBuilder writer) { case REGEX: writer.append("regex("); break; + case VECTOR_SEARCH: + writer.append("vector.search("); + break; } writeInnerWhere(writer); @@ -428,6 +438,24 @@ private void specialOperator(StringBuilder writer) { writer .append(")"); break; + case VECTOR_SEARCH: + WhereOptions options = this.options; + writer.append(", $") + .append(this.parameterName) + .append(", ") + .append(options.vectorSearch != null + ? options.vectorSearch.getSimilarity() + != null ? options.vectorSearch.getSimilarity().toString() : "null" + : "null") + .append(", ") + .append(options.vectorSearch != null + ? options.vectorSearch.getNumberOfCandidates() + != null ? options.vectorSearch.getNumberOfCandidates().toString() : "null" + : "null") + + .append(")"); + break; + default: throw new IllegalArgumentException(); } diff --git a/src/test/java/net/ravendb/client/documents/queries/vectorSearch/VectorQuantizierTest.java b/src/test/java/net/ravendb/client/documents/queries/vectorSearch/VectorQuantizierTest.java new file mode 100644 index 00000000..7f97a984 --- /dev/null +++ b/src/test/java/net/ravendb/client/documents/queries/vectorSearch/VectorQuantizierTest.java @@ -0,0 +1,94 @@ +package net.ravendb.client.documents.queries.vectorSearch; + +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class VectorQuantizierTest { + @Test + public void shouldCorrectlyQuantizeVectorElementsAndStoreScaleFactor() { + float[] input = {0.1f, 0.2f}; + int[] result = VectorQuantizer.toInt8(input); + int[] expected = {64, 127, -51, -52, 76, 62}; + + for (int i = 0; i < expected.length; i++) { + assertEquals(expected[i], result[i]); + } + } + + @Test + public void shouldMaintainExpectedByteValuesInQuantizedOutput() { + float[] input = {0.1f, 0.2f}; + int[] result = VectorQuantizer.toInt8(input); + int[] expected = {64, 127, -51, -52, 76, 62}; + + for (int i = 0; i < expected.length; i++) { + assertEquals(expected[i], result[i]); + } + } + + @Test + public void shouldHandleZeroVectorInputCorrectly() { + float[] input = {0, 0, 0, 0}; + int[] result = VectorQuantizer.toInt8(input); + int[] expected = new int[8]; + + for (int i = 0; i < expected.length; i++) { + assertEquals(expected[i], result[i]); + } + } + + @Test + public void shouldCorrectlyQuantizeFloatsWithPositiveAndNegativeValues() { + float[] input = {0.5f, -1.5f, 2.5f, -3.5f}; + int[] result = VectorQuantizer.toInt8(input); + int[] expected = {18, -54, 91, -127, 0, 0, 96, 64}; + + for (int i = 0; i < expected.length; i++) { + assertEquals(expected[i], result[i]); + } + } + + @Test + public void shouldCorrectlyConvertVectorToBinaryBitRepresentation() { + float[] input = {1, -2, 3, -4, 5, -6, 7, -8, 9}; + int[] result = VectorQuantizer.toInt1(input); + int[] expected = {0xAA, 0x80}; + + for (int i = 0; i < expected.length; i++) { + assertEquals(expected[i], result[i]); + } + } + + @Test + public void shouldRepresentZeroValuesAsPositiveBits() { + float[] input = {0, 0, 0, 0, 0, 0, 0, 0}; + int[] result = VectorQuantizer.toInt1(input); + int[] expected = {0xFF}; + + for (int i = 0; i < expected.length; i++) { + assertEquals(expected[i], result[i]); + } + } + + @Test + public void shouldProperlyPadVectorsNotDivisibleBy8() { + float[] input = {1, 2, 3, 4, 5}; + int[] result = VectorQuantizer.toInt1(input); + int[] expected = {0xF8}; + + for (int i = 0; i < expected.length; i++) { + assertEquals(expected[i], result[i]); + } + } + + @Test + public void shouldCorrectlyConvertAlternatingSignsToBitPattern() { + float[] input = {-1, 2, -3, 4, -5, 6, -7, 8}; + int[] result = VectorQuantizer.toInt1(input); + int[] expected = {0x55}; + + for (int i = 0; i < expected.length; i++) { + assertEquals(expected[i], result[i]); + } + } +} diff --git a/src/test/java/net/ravendb/client/documents/queries/vectorSearch/VectorSearchTest.java b/src/test/java/net/ravendb/client/documents/queries/vectorSearch/VectorSearchTest.java new file mode 100644 index 00000000..ea545e28 --- /dev/null +++ b/src/test/java/net/ravendb/client/documents/queries/vectorSearch/VectorSearchTest.java @@ -0,0 +1,1018 @@ +package net.ravendb.client.documents.queries.vectorSearch; + +import net.ravendb.client.RemoteTestBase; +import net.ravendb.client.documents.IDocumentStore; +import net.ravendb.client.documents.indexes.*; +import net.ravendb.client.documents.operations.indexes.GetIndexesOperation; +import net.ravendb.client.documents.operations.indexes.PutIndexesOperation; +import net.ravendb.client.documents.session.IDocumentSession; +import net.ravendb.client.documents.session.VectorEmbeddingFieldValueFactory; +import net.ravendb.client.documents.indexes.IndexType; +import com.google.common.collect.Sets; +import net.ravendb.client.infrastructure.EnableOn70Server; +import org.junit.jupiter.api.Test; + +import java.util.Collections; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.assertj.core.api.Assertions.assertThat; + + +public class VectorSearchTest extends RemoteTestBase { + + public static class User { + private String name; + private int age; + private String embeddingBase64; + private double[] embeddingSingles; + private byte[] embeddingSBytes; + private byte[] embeddingBinary; + private String textualValue; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public int getAge() { + return age; + } + + public void setAge(int age) { + this.age = age; + } + public String getEmbeddingBase64() { + return embeddingBase64; + } + + public void setEmbeddingBase64(String embeddingBase64) { + this.embeddingBase64 = embeddingBase64; + } + + public double[] getEmbeddingSingles() { + return embeddingSingles; + } + + public void setEmbeddingSingles(double[] embeddingSingles) { + this.embeddingSingles = embeddingSingles; + } + + public byte[] getEmbeddingSBytes() { + return embeddingSBytes; + } + + public void setEmbeddingSBytes(byte[] embeddingSBytes) { + this.embeddingSBytes = embeddingSBytes; + } + + public byte[] getEmbeddingBinary() { + return embeddingBinary; + } + + public void setEmbeddingBinary(byte[] embeddingBinary) { + this.embeddingBinary = embeddingBinary; + } + + public String getTextualValue() { + return textualValue; + } + + public void setTextualValue(String textualValue) { + this.textualValue = textualValue; + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithInt8QuantizedEmbeddingField() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + Float[] arr = new Float[]{2.5f, 3.3f}; + IVectorOptions options = new IVectorOptions(); + options.setSimilarity(0.65); + options.setNumberOfCandidates(12); + + try (IDocumentStore store = getDocumentStore()) { + try (IDocumentSession session = store.openSession()) { + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withEmbedding("EmbeddingField", VectorEmbeddingType.INT8).targetQuantization(VectorEmbeddingType.INT8).getFieldName(), + ()-> valueFactory.byEmbedding(arr), + options + ).toString(); + + assertThat(query) + .isEqualTo("from 'Users' where vector.search(embedding.i8(EmbeddingField), $p0, 0.65, 12)"); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithTextEmbeddingUsingAiTask() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + try (IDocumentStore store = getDocumentStore()) { + try (IDocumentSession session = store.openSession()) { + + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withText("VectorField").usingTask("id-for-task-open-ai").getFieldName(), + ()-> valueFactory.byText("aaaa"), + null + ).toString(); + + assertEquals( + "from 'Users' where vector.search(embedding.text(VectorField, ai.task('id-for-task-open-ai')), $p0, null, null)", + query + ); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForBasicVectorSearchWithNumericEmbeddingValues() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + Float[] arr = new Float[]{2.5f, 3.3f}; + + try (IDocumentStore store = getDocumentStore()) { + try (IDocumentSession session = store.openSession()) { + + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withField("VectorField").getFieldName(), + () -> valueFactory.byEmbedding(arr), + null + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(VectorField, $p0, null, null)", + query + ); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithBase64EncodedEmbedding() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + + try (IDocumentStore store = getDocumentStore()) { + try (IDocumentSession session = store.openSession()) { + + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withField("VectorField").getFieldName(), + ()-> valueFactory.byBase64("aaaa=="), + null + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(VectorField, $p0, null, null)", + query + ); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithTextFieldAndInt8Quantization() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withText("EmbeddingSingles").targetQuantization(VectorEmbeddingType.INT8).getFieldName(), + () -> valueFactory.byText("aaaa"), + null + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(embedding.text_i8(EmbeddingSingles), $p0, null, null)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchUsingPropertySelectorForEmbeddingField() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withEmbedding("EmbeddingSingles", null).getFieldName(), + () -> valueFactory.byEmbedding(new Float[]{0.1f, 0.2f, 0.3f}), + null + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(EmbeddingSingles, $p0, null, null)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithPropertySelectorAndExplicitInt8Quantization() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + IVectorOptions options = new IVectorOptions(); + options.setSimilarity(0.75); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withEmbedding("EmbeddingSBytes", VectorEmbeddingType.INT8).getFieldName(), + () -> valueFactory.byEmbedding(new Integer[]{1, 2, 3}), + options + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(embedding.i8(EmbeddingSBytes), $p0, 0.75, null)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithPropertySelectorAndExplicitBinaryQuantization() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withEmbedding("EmbeddingBinary", VectorEmbeddingType.BINARY).getFieldName(), + ()-> valueFactory.byEmbedding(new Integer[]{0, 1, 0, 1}), + null + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(embedding.i1(EmbeddingBinary), $p0, null, null)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithPropertySelectorForTextFieldConversion() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withText("TextualValue").getFieldName(), + () -> valueFactory.byText("search text"), + null + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(embedding.text(TextualValue), $p0, null, null)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithTextFieldUsingNamedAiTask() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withText("TextualValue") + .usingTask("taskId-123") + .getFieldName(), + () -> valueFactory.byText("query text"), + null + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(embedding.text(TextualValue, ai.task('taskId-123')), $p0, null, null)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithBase64FieldUsingPropertySelector() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withBase64("EmbeddingBase64", null).getFieldName(), + ()-> valueFactory.byBase64("aGVsbG8="), + null + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(EmbeddingBase64, $p0, null, null)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithSingleToInt8ConversionQuantization() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withEmbedding("EmbeddingSingles", null) + .targetQuantization(VectorEmbeddingType.INT8) + .getFieldName(), + () -> valueFactory.byEmbedding(new Float[]{0.1f, 0.2f, 0.3f}), + null + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(embedding.f32_i8(EmbeddingSingles), $p0, null, null)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithSingleToBinaryConversionQuantization() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withEmbedding("EmbeddingSingles", null) + .targetQuantization(VectorEmbeddingType.BINARY) + .getFieldName(), + () -> valueFactory.byEmbedding(new Float[]{0.1f, 0.2f, 0.3f}), + null + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(embedding.f32_i1(EmbeddingSingles), $p0, null, null)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithTextFieldAndInt8TargetQuantization() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withText("TextualValue") + .targetQuantization(VectorEmbeddingType.INT8) + .getFieldName(), + () -> valueFactory.byText("query text"), + null + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(embedding.text_i8(TextualValue), $p0, null, null)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithTextAiTaskAndBinaryQuantization() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withText("TextualValue") + .usingTask("openai-embeddings") + .targetQuantization(VectorEmbeddingType.BINARY) + .getFieldName(), + () -> valueFactory.byText("query text"), + null + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(embedding.text_i1(TextualValue, ai.task('openai-embeddings')), $p0, null, null)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithWithFieldMethodAndPropertySelector() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + IVectorOptions options = new IVectorOptions(); + options.setNumberOfCandidates(20); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withField("EmbeddingSingles").getFieldName(), + () -> valueFactory.byEmbedding(new Float[]{0.1f, 0.2f, 0.3f}), + options + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(EmbeddingSingles, $p0, null, 20)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithExactMatchingParameter() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + IVectorOptions options = new IVectorOptions(); + options.setIsExact(true); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withField("VectorField").getFieldName(), + () -> valueFactory.byEmbedding(new Float[]{0.3f, 0.4f, 0.5f}), + options + ) + .toString(); + + assertEquals( + "from 'Users' where exact(vector.search(VectorField, $p0, null, null))", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithSimilarityCandidatesAndExactParameters() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + IVectorOptions options = new IVectorOptions(); + options.setSimilarity(0.75); + options.setNumberOfCandidates(50); + options.setIsExact(true); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withField("VectorField").getFieldName(), + () -> valueFactory.byEmbedding(new Float[]{0.3f, 0.4f, 0.5f}), + options + ) + .toString(); + + assertEquals( + "from 'Users' where exact(vector.search(VectorField, $p0, 0.75, 50))", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithExactParameterAndEmbeddingField() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + IVectorOptions options = new IVectorOptions(); + options.setIsExact(true); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withEmbedding("EmbeddingSingles",null).getFieldName(), + () -> valueFactory.byEmbedding(new Float[]{0.1f, 0.2f, 0.3f}), + options + ) + .toString(); + + assertEquals( + "from 'Users' where exact(vector.search(EmbeddingSingles, $p0, null, null))", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithExactParameterAndTextEmbeddingWithSimilarity() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + IVectorOptions options = new IVectorOptions(); + options.setSimilarity(0.8); + options.setIsExact(true); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withText("TextualValue").getFieldName(), + () -> valueFactory.byText("query text"), + options + ) + .toString(); + + assertEquals( + "from 'Users' where exact(vector.search(embedding.text(TextualValue), $p0, 0.8, null))", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithMultipleTextQueriesAsInput() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + IVectorOptions options = new IVectorOptions(); + options.setSimilarity(0.75); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withText("TextualValue").getFieldName(), + () -> valueFactory.byTexts(new String[]{"first query", "second query"}), + options + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(embedding.text(TextualValue), $p0, 0.75, null)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithMultipleEmbeddingVectorsAsInput() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + IVectorOptions options = new IVectorOptions(); + options.setNumberOfCandidates(30); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withField("EmbeddingSingles").getFieldName(), + () -> valueFactory.byEmbeddings(new Float[][]{ + {0.1f, 0.2f, 0.3f}, + {0.4f, 0.5f, 0.6f} + }), + options + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(EmbeddingSingles, $p0, null, 30)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithMultipleEmbeddingsAndInt8Quantization() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withEmbedding("EmbeddingSBytes", VectorEmbeddingType.INT8) + .targetQuantization(VectorEmbeddingType.INT8) + .getFieldName(), + () -> valueFactory.byEmbeddings(new Integer[][]{ + {1, 2, 3}, + {4, 5, 6} + }), + null + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(embedding.i8(EmbeddingSBytes), $p0, null, null)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithMultipleTextsAiTaskAndBinaryQuantization() { + VectorEmbeddingFieldFactory vectorFieldFactory = new VectorEmbeddingFieldFactory(); + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + IVectorOptions options = new IVectorOptions(); + options.setIsExact(true); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + String query = session.query(User.class) + .vectorSearch( + vectorFieldFactory.withText("TextualValue") + .usingTask("openai-embeddings") + .targetQuantization(VectorEmbeddingType.BINARY) + .getFieldName(), + () -> valueFactory.byTexts(new String[]{"query one", "query two", "query three"}), + options + ) + .toString(); + + assertEquals( + "from 'Users' where exact(vector.search(embedding.text_i1(TextualValue, ai.task('openai-embeddings')), $p0, null, null))", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithFieldNameAsString() { + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + + String query = session.query(User.class) + .vectorSearch( + "VectorField", + () -> valueFactory.byEmbedding(new Float[]{0.3f, 0.4f, 0.5f}), + null // options + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(VectorField, $p0, null, null)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithFieldNameAsStringAndOptions() { + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + IVectorOptions options = new IVectorOptions(); + options.setSimilarity(0.75); + options.setNumberOfCandidates(20); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + String query = session.query(User.class) + .vectorSearch( + "EmbeddingSingles", + () -> valueFactory.byEmbedding(new Float[]{0.1f, 0.2f, 0.3f}), + options + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(EmbeddingSingles, $p0, 0.75, 20)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithFieldNameAsStringAndExactParameter() { + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + IVectorOptions options = new IVectorOptions(); + options.setIsExact(true); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + String query = session.query(User.class) + .vectorSearch( + "VectorField", + () -> valueFactory.byEmbedding(new Float[]{0.3f, 0.4f, 0.5f}), + options + ) + .toString(); + + assertEquals( + "from 'Users' where exact(vector.search(VectorField, $p0, null, null))", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithFieldNameAsStringAndMultipleEmbeddings() { + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + IVectorOptions options = new IVectorOptions(); + options.setSimilarity(0.8); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + String query = session.query(User.class) + .vectorSearch( + "EmbeddingSingles", + () -> valueFactory.byEmbeddings(new Float[][]{ + {0.1f, 0.2f, 0.3f}, + {0.4f, 0.5f, 0.6f} + }), + options + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(EmbeddingSingles, $p0, 0.8, null)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldGenerateRqlForVectorSearchWithFieldNameAsStringAndByTextFactory() { + VectorEmbeddingFieldValueFactory valueFactory = new VectorEmbeddingFieldValueFactory(); + + try (IDocumentStore store = getDocumentStore(); + IDocumentSession session = store.openSession()) { + + String query = session.query(User.class) + .vectorSearch( + "TextualValue", + () -> valueFactory.byText("query text"), + null + ) + .toString(); + + assertEquals( + "from 'Users' where vector.search(TextualValue, $p0, null, null)", + query + ); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldCreateIndexDefinitionWithVectorSearchFieldAndProperConfiguration() { + try (IDocumentStore store = getDocumentStore()) { + setupIndexDefinition(store); + IndexDefinition[] indexDefinitions = store.maintenance() + .send(new GetIndexesOperation(0, 10)); + + assertThat(indexDefinitions).hasSizeGreaterThan(0); + + IndexDefinition indexDef = indexDefinitions[0]; + + assertThat(indexDef.getName()).isEqualTo("Users/ByEmbeddingSingles"); + assertThat(indexDef.getType().name()).isEqualTo("MAP"); + assertThat(indexDef.getConfiguration().get("Indexing.Static.SearchEngineType")).isEqualTo("Corax"); + + IndexFieldOptions vectorField = indexDef.getFields().get("FirstName"); + VectorFieldOptions v = vectorField.getVector(); + + assertThat(v.getSourceEmbeddingType()).isEqualTo(VectorEmbeddingType.TEXT); + assertThat(v.getDestinationEmbeddingType()).isEqualTo(VectorEmbeddingType.SINGLE); + assertThat(v.getNumberOfEdges()).isEqualTo(23); + assertThat(v.getNumberOfCandidatesForIndexing()).isEqualTo(20); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @EnableOn70Server + @Test + public void shouldCreateIndexWithVectorSearchConfigurationUsingClassBasedDefinition() { + try (IDocumentStore store = getDocumentStore()) { + setupIndexClass(store); + IndexDefinition[] indexDefinitions = store.maintenance() + .send(new GetIndexesOperation(0, 10)); + + assertThat(indexDefinitions).hasSize(1); + + IndexDefinition indexDefinition = indexDefinitions[0]; + assertThat(indexDefinition.getName()).isEqualTo("Users/ByEmbeddingSingles"); + assertThat(indexDefinition.getType()).isEqualTo(IndexType.JAVA_SCRIPT_MAP); + assertThat(indexDefinition.getConfiguration().get("Indexing.Static.SearchEngineType")) + .isEqualTo("Corax"); + + IndexFieldOptions vectorField = indexDefinition.getFields().get("vectorField"); + assertThat(vectorField.getVector().getSourceEmbeddingType()).isEqualTo(VectorEmbeddingType.TEXT); + assertThat(vectorField.getVector().getDestinationEmbeddingType()).isEqualTo(VectorEmbeddingType.SINGLE); + assertThat(vectorField.getVector().getNumberOfEdges()).isEqualTo(33); + assertThat(vectorField.getVector().getNumberOfCandidatesForIndexing()).isEqualTo(43); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private static void setupIndexDefinition(IDocumentStore store) { + IndexDefinition indexDefinition = new IndexDefinition(); + indexDefinition.setName("Users/ByEmbeddingSingles"); + + indexDefinition.setMaps(Collections.singleton( + "from doc in docs.Users " + + "select new { " + + " doc.EmbeddingSingles, " + + " EmbeddingSinglesVector = CreateVector(doc.EmbeddingSingles) " + + "}" + )); + + + IndexFieldOptions fieldOptions = new IndexFieldOptions(); + VectorFieldOptions vectorOptions = new VectorFieldOptions(); + vectorOptions.setNumberOfEdges(23); + vectorOptions.setNumberOfCandidatesForIndexing(20); + vectorOptions.setSourceEmbeddingType(VectorEmbeddingType.TEXT); + vectorOptions.setDestinationEmbeddingType(VectorEmbeddingType.SINGLE); + fieldOptions.setVector(vectorOptions); + + indexDefinition.getFields().put("FirstName", fieldOptions); + + indexDefinition.getConfiguration().put("Indexing.Static.SearchEngineType", "Corax"); + + PutIndexesOperation putIndexesOperation = new PutIndexesOperation(indexDefinition); + PutIndexResult[] results = store.maintenance().send(putIndexesOperation); + assertThat(results).hasSize(1); + assertThat(results[0].getIndex()).isEqualTo(indexDefinition.getName()); + } + + private class Users_ByEmbeddingSingles extends AbstractJavaScriptIndexCreationTask { + public Users_ByEmbeddingSingles() { + super(); + setMaps(Sets.newHashSet( + "map('Users', function (doc) { " + + " return { " + + " EmbeddingSingles: doc.EmbeddingSingles, " + + " EmbeddingSinglesVector: createVector(doc.EmbeddingSingles) " + + " }; " + + "})" + )); + + IndexFieldOptions vectorOptions = new IndexFieldOptions(); + VectorFieldOptions fieldVectorOptions = new VectorFieldOptions(); + fieldVectorOptions.setNumberOfEdges(33); + fieldVectorOptions.setNumberOfCandidatesForIndexing(43); + fieldVectorOptions.setSourceEmbeddingType(VectorEmbeddingType.TEXT); + fieldVectorOptions.setDestinationEmbeddingType(VectorEmbeddingType.SINGLE); + vectorOptions.setVector(fieldVectorOptions); + this.getFields().put("vectorField", vectorOptions); + this.getConfiguration().put("Indexing.Static.SearchEngineType", "Corax"); + } + } + + private void setupIndexClass(IDocumentStore store) { + Users_ByEmbeddingSingles dtoIndex = new Users_ByEmbeddingSingles(); + dtoIndex.execute(store); + } +} \ No newline at end of file diff --git a/src/test/java/net/ravendb/client/infrastructure/EnableOn70Server.java b/src/test/java/net/ravendb/client/infrastructure/EnableOn70Server.java new file mode 100644 index 00000000..32c5f459 --- /dev/null +++ b/src/test/java/net/ravendb/client/infrastructure/EnableOn70Server.java @@ -0,0 +1,14 @@ +package net.ravendb.client.infrastructure; + +import org.junit.jupiter.api.extension.ExtendWith; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Target({ ElementType.TYPE, ElementType.METHOD }) +@Retention(RetentionPolicy.RUNTIME) +@ExtendWith(EnableOn70ServerCondition.class) +public @interface EnableOn70Server { +} diff --git a/src/test/java/net/ravendb/client/infrastructure/EnableOn70ServerCondition.java b/src/test/java/net/ravendb/client/infrastructure/EnableOn70ServerCondition.java new file mode 100644 index 00000000..fa813ed0 --- /dev/null +++ b/src/test/java/net/ravendb/client/infrastructure/EnableOn70ServerCondition.java @@ -0,0 +1,22 @@ +package net.ravendb.client.infrastructure; + +import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.extension.ConditionEvaluationResult; +import org.junit.jupiter.api.extension.ExecutionCondition; +import org.junit.jupiter.api.extension.ExtensionContext; + +public class EnableOn70ServerCondition implements ExecutionCondition { + + public static final String ENV_RAVENDB_SERVER_VERSION = "RAVENDB_SERVER_VERSION"; + + @Override + public ConditionEvaluationResult evaluateExecutionCondition(ExtensionContext extensionContext) { + String ravenServerVersion = System.getenv(ENV_RAVENDB_SERVER_VERSION); + + if (StringUtils.isEmpty(ravenServerVersion) || ravenServerVersion.compareTo("7.0") < 0) { + return ConditionEvaluationResult.disabled("Test disabled on server lower than 7.0"); + } + + return ConditionEvaluationResult.enabled("Test enabled"); + } +}