Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/alpine3.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ jobs:
with:
container: alpine:3
pre-checkout-script: apk add bash git
run-valgrind: true
run-asan: true
2 changes: 1 addition & 1 deletion .github/workflows/arm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
uses: ./.github/workflows/task-unit-test.yml
with:
env: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
run-valgrind: false # run the job without valgrind
run-asan: false # run the job without valgrind

stop-runner:
name: Stop self-hosted EC2 runner
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/flow-temp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,23 @@ jobs:
# uses: ./.github/workflows/task-unit-test.yml
# with:
# container: ubuntu:jammy
# run-valgrind: true
# run-asan: true
# alpine3:
# uses: ./.github/workflows/task-unit-test.yml
# with:
# container: alpine:3
# pre-checkout-script: apk add bash
# run-valgrind: true
# run-asan: true
# focal:
# uses: ./.github/workflows/task-unit-test.yml
# with:
# container: ubuntu:focal
# run-valgrind: false
# run-asan: false
# bionic:
# uses: ./.github/workflows/task-unit-test.yml
# with:
# container: ubuntu:focal
# run-valgrind: false
# run-asan: false
# bullseye:
# uses: ./.github/workflows/task-unit-test.yml
# with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ jobs:
uses: ./.github/workflows/task-unit-test.yml
with:
env: macos-latest
run-valgrind: false
run-asan: false
2 changes: 1 addition & 1 deletion .github/workflows/mariner2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ jobs:
with:
container: mcr.microsoft.com/cbl-mariner/base/core:2.0
pre-checkout-script: tdnf install -y --noplugins --skipsignature tar gzip ca-certificates git
run-valgrind: false # TODO: enable valgrind? (requires to install valgrind)
run-asan: false # TODO: enable AddressSanitizer? (requires clang)
16 changes: 8 additions & 8 deletions .github/workflows/task-unit-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ on:
pre-checkout-script:
description: 'Script to run before checkout'
type: string
run-valgrind:
description: 'Run valgrind tests'
run-asan:
description: 'Run AddressSanitizer tests'
type: boolean
default: true

Expand Down Expand Up @@ -67,12 +67,12 @@ jobs:

- name: unit tests
run: make unit_test
- name: valgrind
if: ${{ inputs.run-valgrind }}
run: make valgrind
- name: Archive valgrind tests reports
if: ${{ inputs.run-valgrind && failure() }}
- name: AddressSanitizer tests
if: ${{ inputs.run-asan }}
run: make asan
- name: Archive AddressSanitizer test reports
if: ${{ inputs.run-asan && failure() }}
uses: actions/upload-artifact@v4
with:
name: valgrind tests reports on ${{ steps.artifact-name.outputs.name }}
name: AddressSanitizer test reports on ${{ steps.artifact-name.outputs.name }}
path: bin/Linux-x86_64-debug/unit_tests/Testing/Temporary/
31 changes: 13 additions & 18 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ ifneq ($(filter coverage show-cov upload-cov,$(MAKECMDGOALS)),)
COV=1
endif

ifneq ($(VG),)
VALGRIND=$(VG)
endif

ifeq ($(VALGRIND),1)
ifneq ($(ASAN),1)
# ASAN is not enabled
else
# ASAN is enabled - force debug build and set SAN=address
override DEBUG ?= 1
override SAN = address
endif

ifeq ($(COV),1)
Expand All @@ -38,7 +38,7 @@ endif

ifeq ($(SAN),memory)
CMAKE_SAN=-DUSE_MSAN=ON
override CTEST_ARGS += --exclude-regex BruteForceTest.sanity_rinsert_1280
#override CTEST_ARGS += --exclude-regex BruteForceTest.sanity_rinsert_1280

else ifeq ($(SAN),address)
CMAKE_SAN=-DUSE_ASAN=ON
Expand All @@ -60,7 +60,7 @@ make build
DEBUG=1 # build debug variant
COV=1 # build for code coverage
VERBOSE=1 # print detailed build info
VG|VALGRIND=1 # build for Valgrind
ASAN=1 # build with AddressSanitizer (clang)
SAN=type # build with LLVM sanitizer (type=address|memory|leak|thread)
SLOW=1 # don't run build in parallel (for diagnostics)
PROFILE=1 # enable profiling compile flags (and debug symbols) for release type.
Expand All @@ -70,9 +70,9 @@ make clean # remove binary files

make unit_test # run unit tests
CTEST_ARGS=args # extra CTest arguments
VG|VALGRIND=1 # run tests with valgrind
ASAN=1 # run tests with AddressSanitizer
FP_64=1 # run tests with 64-bit floating point
make valgrind # build for Valgrind and run tests
make asan # build with AddressSanitizer and run tests
make flow_test # run flow tests (with pytest)
TEST=file::name # run specific test
VERBOSE=1 # print detailed bindings build info
Expand Down Expand Up @@ -173,23 +173,18 @@ ifeq ($(VERBOSE),1)
_CTEST_ARGS += -V
endif

ifeq ($(VALGRIND),1)
_CTEST_ARGS += \
-T memcheck \
--overwrite MemoryCheckCommandOptions="--leak-check=full --fair-sched=yes --error-exitcode=255"
CMAKE_FLAGS += -DUSE_VALGRIND=ON
endif
# AddressSanitizer is handled via SAN=address in cmake/san.cmake

unit_test:
$(SHOW)mkdir -p $(BINDIR)
$(SHOW)cd $(BINDIR) && cmake $(CMAKE_FLAGS) $(CMAKE_DIR)
@make --no-print-directory -C $(BINDIR) $(MAKE_J)
$(SHOW)cd $(TESTDIR) && GTEST_COLOR=1 ctest $(_CTEST_ARGS)

valgrind:
$(SHOW)$(MAKE) VG=1 unit_test
asan:
$(SHOW)$(MAKE) ASAN=1 unit_test

.PHONY: unit_test valgrind
.PHONY: unit_test asan

#----------------------------------------------------------------------------------------------
ifeq ($(VERBOSE),1)
Expand Down
6 changes: 3 additions & 3 deletions cmake/san.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ option(USE_ASAN "Use AddressSanitizer (clang)" OFF)
option(USE_MSAN "Use MemorySanitizer (clang)" OFF)
if (USE_ASAN OR USE_MSAN)
# define this before project()
find_file(CMAKE_C_COMPILER "clang")
find_file(CMAKE_CXX_COMPILER "clang++")
find_file(CMAKE_C_COMPILER "clang-18")
find_file(CMAKE_CXX_COMPILER "clang++-18")
set(CMAKE_LINKER "${CMAKE_C_COMPILER}")

if (USE_ASAN)
set(CLANG_SAN_FLAGS "-fno-omit-frame-pointer -fsanitize=address")
set(CLANG_SAN_FLAGS "-fno-omit-frame-pointer -fsanitize=address -fsized-deallocation")

elseif (USE_MSAN)
set(CLANG_SAN_FLAGS "-fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins=2")
Expand Down
10 changes: 5 additions & 5 deletions cmake/svs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ if(USE_SVS)
set(SVS_LVQ_SUPPORTED 1)
# Valgrind does not support AVX512 and Valgrind in running in Debug
# so disable it if we are in Debug mode
string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
if(uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
message(STATUS "SVS: Disabling AVX512 support in Debug mode due to Valgrind")
set(SVS_NO_AVX512 ON)
endif()
# string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
# if(uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
# message(STATUS "SVS: Disabling AVX512 support in Debug mode due to Valgrind")
# set(SVS_NO_AVX512 ON)
# endif()
else()
set(SVS_LVQ_SUPPORTED 0)
message(STATUS "SVS LVQ is not supported on this architecture")
Expand Down
34 changes: 20 additions & 14 deletions src/VecSim/vec_sim_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
* @param allocator The allocator to use for the index.
* @param dim The dimension of the vectors in the index.
* @param vecType The type of the vectors in the index.
* @param dataSize The size of stored vectors in bytes.
* @param dataSize The size of stored vectors (possibly after pre-processing) in bytes.
* @param metric The metric to use in the index.
* @param blockSize The block size to use in the index.
* @param multi Determines if the index should multi-index or not.
Expand Down Expand Up @@ -68,18 +68,20 @@ struct IndexComponents {
template <typename DataType, typename DistType>
struct VecSimIndexAbstract : public VecSimIndexInterface {
protected:
size_t dim; // Vector's dimension.
VecSimType vecType; // Datatype to index.
size_t dataSize; // Vector size in bytes
VecSimMetric metric; // Distance metric to use in the index.
size_t blockSize; // Index's vector block size (determines by how many vectors to resize when
// resizing)
size_t dim; // Vector's dimension.
VecSimType vecType; // Datatype to index.
VecSimMetric metric; // Distance metric to use in the index.
size_t inputBlobSize; // The size of the vector input blob in bytes.
size_t blockSize; // Index's vector block size (determines by how many vectors to resize when
// resizing)
IndexCalculatorInterface<DistType> *indexCalculator; // Distance calculator.
PreprocessorsContainerAbstract *preprocessors; // Stroage and query preprocessors.
PreprocessorsContainerAbstract *preprocessors; // Storage and query preprocessors.
mutable VecSearchMode lastMode; // The last search mode in RediSearch (used for debug/testing).
bool isMulti; // Determines if the index should multi-index or not.
void *logCallbackCtx; // Context for the log callback.

size_t dataSize; // Vector element data size in bytes to be stored
// (possibly after pre-processing and different from inputBlobSize).
RawDataContainer *vectors; // The raw vectors data container.

/**
Expand All @@ -105,10 +107,11 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
VecSimIndexAbstract(const AbstractIndexInitParams &params,
const IndexComponents<DataType, DistType> &components)
: VecSimIndexInterface(params.allocator), dim(params.dim), vecType(params.vecType),
dataSize(params.dataSize), metric(params.metric),
metric(params.metric), inputBlobSize(this->dim * sizeof(DataType)),
blockSize(params.blockSize ? params.blockSize : DEFAULT_BLOCK_SIZE),
indexCalculator(components.indexCalculator), preprocessors(components.preprocessors),
lastMode(EMPTY_MODE), isMulti(params.multi), logCallbackCtx(params.logCtx) {
lastMode(EMPTY_MODE), isMulti(params.multi), logCallbackCtx(params.logCtx),
dataSize(params.dataSize) {
assert(VecSimType_sizeof(vecType));
assert(dataSize);
this->vectors = new (this->allocator) DataBlocksContainer(
Expand Down Expand Up @@ -323,23 +326,26 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {

template <typename DataType, typename DistType>
ProcessedBlobs VecSimIndexAbstract<DataType, DistType>::preprocess(const void *blob) const {
return this->preprocessors->preprocess(blob, this->dataSize);
return this->preprocessors->preprocess(blob, inputBlobSize);
}

template <typename DataType, typename DistType>
MemoryUtils::unique_blob
VecSimIndexAbstract<DataType, DistType>::preprocessQuery(const void *queryBlob,
bool force_copy) const {
return this->preprocessors->preprocessQuery(queryBlob, this->dataSize, force_copy);
// force_copy indicates that we copy a processed blob (e.g., for batch iterator) - hence we're
// currently using the dataSize (post-processed size) as the effective input size.
const auto effective_input_size = force_copy ? dataSize : inputBlobSize;
return this->preprocessors->preprocessQuery(queryBlob, effective_input_size, force_copy);
}

template <typename DataType, typename DistType>
MemoryUtils::unique_blob
VecSimIndexAbstract<DataType, DistType>::preprocessForStorage(const void *original_blob) const {
return this->preprocessors->preprocessForStorage(original_blob, this->dataSize);
return this->preprocessors->preprocessForStorage(original_blob, inputBlobSize);
}

template <typename DataType, typename DistType>
void VecSimIndexAbstract<DataType, DistType>::preprocessStorageInPlace(void *blob) const {
this->preprocessors->preprocessStorageInPlace(blob, this->dataSize);
this->preprocessors->preprocessStorageInPlace(blob, inputBlobSize);
}
7 changes: 2 additions & 5 deletions tests/unit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,8 @@ if(FP64_TESTS)
add_definitions(-DFP64_TESTS)
endif()

option(USE_VALGRIND "Building for Valgrind" OFF)
if(USE_VALGRIND)
add_definitions(-DRUNNING_ON_VALGRIND)
message(STATUS "Building with RUNNING_ON_VALGRIND")
endif()
# AddressSanitizer is handled via cmake/san.cmake
# No special definitions needed for ASan

if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(aarch64)|(arm64)|(ARM64)|(armv8)|(armv9)")
include(${root}/cmake/aarch64InstructionFlags.cmake)
Expand Down
9 changes: 4 additions & 5 deletions tests/unit/test_index_test_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class Int8IndexTestUtilsTest : public IndexTestUtilsTest {
std::vector<std::vector<int8_t>> vectors;
void GenerateRandomAndAddVector(size_t label, size_t id) override {
std::vector<int8_t> v(dim);
test_utils::populate_int8_vec(v.data(), dim, id);
test_utils::populate_int8_vec(v.data(), dim, static_cast<int>(id));
VecSimIndex_AddVector(index, v.data(), label);

vectors.emplace_back(v);
Expand Down Expand Up @@ -154,8 +154,8 @@ class Float32IndexTestUtilsTest : public IndexTestUtilsTest {

TEST_P(Int8IndexTestUtilsTest, BF) {
BFParams params = {.type = VecSimType_INT8, .dim = dim};
sleep(10);
SetUp(params);

EXPECT_NO_FATAL_FAILURE(get_stored_vector_data_single_test());
VecSimMetric metric = std::get<1>(GetParam());
if (metric == VecSimMetric_Cosine) {
Expand Down Expand Up @@ -189,7 +189,6 @@ INSTANTIATE_TEST_SUITE_P(Int8IndexTestUtilsTest, Int8IndexTestUtilsTest,
TEST_P(Float32IndexTestUtilsTest, BF) {
BFParams params = {.type = VecSimType_FLOAT32, .dim = dim};
SetUp(params);

EXPECT_NO_FATAL_FAILURE(get_stored_vector_data_single_test());
VecSimMetric metric = std::get<1>(GetParam());
}
Expand All @@ -215,11 +214,11 @@ INSTANTIATE_TEST_SUITE_P(
});

void IndexTestUtilsTest::get_stored_vector_data_single_test() {
size_t n = this->labels_count * this->vec_per_label;
size_t n = IndexTestUtilsTest::labels_count * this->vec_per_label;

// Add vectors to the index
int id = 0;
for (size_t i = 0; i < this->labels_count; i++) {
for (size_t i = 0; i < IndexTestUtilsTest::labels_count; i++) {
for (size_t j = 0; j < vec_per_label; j++) {
this->GenerateRandomAndAddVector(i, id++);
}
Expand Down
Loading