diff --git a/.bazelignore b/.bazelignore new file mode 100644 index 00000000..87508c8e --- /dev/null +++ b/.bazelignore @@ -0,0 +1,4 @@ +#ignore typical cmake build folders +build +out +cmake-build-debug diff --git a/.bazelrc b/.bazelrc index adcf592b..e0af0f7d 100644 --- a/.bazelrc +++ b/.bazelrc @@ -101,9 +101,9 @@ build:ubsan --linkopt="-lubsan" test:ubsan --run_under=//tools/runners/sanitizers/ubsan # MSAN is disabled for now, as there are false positives and we can't suppress them easily. -#build:msan --config=base-sanitizer -#build:msan --copt="-fsanitize=memory" -#build:msan --linkopt="-fsanitize=memory" -#test:msan --run_under=//tools/runners/sanitizers/msan +build:msan --config=base-sanitizer +build:msan --copt="-fsanitize=memory" +build:msan --linkopt="-fsanitize=memory" +test:msan --run_under=//tools/runners/sanitizers/msan build:lint --define linting_only=true diff --git a/.github/workflows/bazel.yml b/.github/workflows/bazel.yml index 030eaaea..a209c1c0 100644 --- a/.github/workflows/bazel.yml +++ b/.github/workflows/bazel.yml @@ -1,6 +1,6 @@ name: Bazel build -on: [push, pull_request] +on: [ push ] jobs: build: @@ -16,11 +16,21 @@ jobs: uses: actions/checkout@v2 - name: Setup bazel - # install bazelisk to install the appropriate bazel version - run: | - export PATH=$PATH:$HOME/bin && mkdir -p $HOME/bin - wget https://github.com/bazelbuild/bazelisk/releases/download/v1.5.0/bazelisk-linux-amd64 && chmod +x bazelisk-linux-amd64 && mv bazelisk-linux-amd64 $HOME/bin/bazel - wget https://github.com/bazelbuild/buildtools/releases/download/0.22.0/buildifier && chmod +x buildifier && mv buildifier $HOME/bin/ + uses: bazelbuild/setup-bazelisk@v2 + + - name: Mount bazel cache # Optional + uses: actions/cache@v3 + with: + path: "~/.cache/bazel" + key: bazel + + - name: Clang format + shell: bash + run: ./ci/linting/clang-format.sh + + - name: Bazel format + shell: bash + run: ./ci/linting/buildifier.sh - name: Build shell: bash diff --git a/.github/workflows/cmake-codecov.yml b/.github/workflows/cmake-codecov.yml new file mode 100644 index 00000000..b436cb7c --- /dev/null +++ b/.github/workflows/cmake-codecov.yml @@ -0,0 +1,49 @@ +name: CMake Codecov + +on: [ push ] + +env: + BUILD_TYPE: Debug + +defaults: + run: + shell: bash + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - uses: hendrikmuhs/ccache-action@v1.2 + + - name: Install lcov + run: sudo apt-get install lcov -y + + - name: Create Build Environment + run: | + cmake -E make_directory ${{github.workspace}}/build + cd build + + - name: Configure CMake + working-directory: ${{github.workspace}}/build + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DPHTREE_CODE_COVERAGE=ON + + - name: Build + working-directory: ${{github.workspace}}/build + run: cmake --build . --config $BUILD_TYPE -j2 + + - name: Run tests + working-directory: ${{github.workspace}}/build + run: ctest -C $BUILD_TYPE + + - name: Create and upload coverage + working-directory: ${{github.workspace}}/build + run: | + cd test/CMakeFiles/all_tests.dir/ + lcov --directory . --capture -o coverage.info + lcov -r coverage.info */build/* */test/* */c++/* */gtest/* -o coverageFiltered.info + lcov --list coverageFiltered.info + bash <(curl -s https://codecov.io/bash) -f coverageFiltered.info || echo "Upload failed" + diff --git a/.github/workflows/cmake-windows.yml b/.github/workflows/cmake-windows.yml new file mode 100644 index 00000000..c376b7a9 --- /dev/null +++ b/.github/workflows/cmake-windows.yml @@ -0,0 +1,32 @@ +name: CMake Windows build + +on: [ push ] + +env: + BUILD_TYPE: Release + +jobs: + build: + runs-on: windows-latest + + steps: + - uses: actions/checkout@v2 + + - uses: hendrikmuhs/ccache-action@v1.2 + + - uses: ilammy/msvc-dev-cmd@v1 + + - name: Create Build Environment + run: cmake -E make_directory ${{github.workspace}}\out + + - name: Configure CMake + working-directory: ${{github.workspace}}\out + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -S ${{github.workspace}} -B ${{github.workspace}}\out -DPHTREE_BUILD_EXAMPLES=ON -DPHTREE_BUILD_TESTS=ON + + - name: Build + working-directory: ${{github.workspace}}\out + run: cmake --build . --config ${env:BUILD_TYPE} -j2 + + - name: Test + working-directory: ${{github.workspace}}\out + run: ctest -C ${env:BUILD_TYPE} diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 22599941..b909531e 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -1,40 +1,41 @@ name: CMake build -on: [push, pull_request] +on: [ push ] env: BUILD_TYPE: Release +defaults: + run: + shell: bash + jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - - name: Create Build Environment - run: cmake -E make_directory ${{github.workspace}}/build - - - name: Configure CMake - # Use a bash shell so we can use the same syntax for environment variable - # access regardless of the host operating system - shell: bash - working-directory: ${{github.workspace}}/build - # Note the current convention is to use the -S and -B options here to specify source - # and build directories, but this is only available with CMake 3.13 and higher. - # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 - run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE - - - name: Build - working-directory: ${{github.workspace}}/build - shell: bash - # Execute the build. You can specify a specific target with "--target " - run: cmake --build . --config $BUILD_TYPE - - - name: Test - working-directory: ${{github.workspace}}/build - shell: bash - # Execute tests defined by the CMake configuration. - # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail - # TODO Currently tests are run via bazel only. - run: ctest -C $BUILD_TYPE + - uses: actions/checkout@v2 + + - uses: hendrikmuhs/ccache-action@v1.2 + + - name: Create Build Environment + run: cmake -E make_directory ${{github.workspace}}/build + + - name: Configure CMake + working-directory: ${{github.workspace}}/build + # Note the current convention is to use the -S and -B options here to specify source + # and build directories, but this is only available with CMake 3.13 and higher. + # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DPHTREE_BUILD_ALL=ON + + - name: Build + working-directory: ${{github.workspace}}/build + run: cmake --build . --config $BUILD_TYPE -j2 + + - name: Test + working-directory: ${{github.workspace}}/build + run: ctest -C $BUILD_TYPE + + - name: Example + working-directory: ${{github.workspace}}/build + run: examples/Example diff --git a/.github/workflows/codcecov.yml_old b/.github/workflows/codcecov.yml_old new file mode 100644 index 00000000..e5eca13a --- /dev/null +++ b/.github/workflows/codcecov.yml_old @@ -0,0 +1,42 @@ +name: Upload CodeCov Report +on: [ push ] +jobs: + run: + runs-on: windows-latest + name: Build, Test , Upload Code Coverage Report + steps: + - name: Checkout code + uses: actions/checkout@v2 + with: + fetch-depth: ‘2’ + id: checkout_code + - name: Setup MSBuild and add to PATH + uses: microsoft/setup-msbuild@v1.0.2 + id: setup_msbuild + + - name: Generate Solution + run: cmake -G "Visual Studio 17 2022" -A x64 . -DPHTREE_CODE_COVERAGE=ON -DCMAKE_BUILD_TYPE=Debug + + - name: Run MSBuild + id: run_msbuild + run: msbuild /p:Configuration=Debug /p:Platform=x64 /p:gtest_force_shared_crt=on phtree.sln + - name: Setup VSTest and add to PATH + uses: darenm/Setup-VSTest@v1 + id: setup_vstest + + - name: Setup OpenCppCoverage and add to PATH + id: setup_opencppcoverage + run: | + choco install OpenCppCoverage -y + echo "C:\Program Files\OpenCppCoverage" >> $env:GITHUB_PATH + + - name: Generate Report + id: generate_test_report + shell: cmd + run: OpenCppCoverage.exe --modules phtree --export_type cobertura:phtree.xml -- "vstest.console.exe" test\Debug\all_tests.exe + - name: Upload Report to Codecov + uses: codecov/codecov-action@v3 + with: + files: ./phtree.xml + fail_ci_if_error: true + functionalities: fix diff --git a/.gitignore b/.gitignore index 55098c94..ed782f6b 100644 --- a/.gitignore +++ b/.gitignore @@ -4,9 +4,14 @@ !.clang-format !.gitignore !.github +!*.yml bazel-* !bazel-*.sh compile_commands.json perf.data* build +out +cygwin +CMakeSettings.json +**/cmake-build-debug/ diff --git a/BUILD b/BUILD index 0bf4e407..98e63732 100644 --- a/BUILD +++ b/BUILD @@ -1,5 +1,12 @@ package(default_visibility = ["//visibility:public"]) +licenses(["notice"]) # Apache 2.0 + +# Expose license for external usage through bazel. +exports_files([ + "LICENSE", +]) + # Platform configuration definitions for select() config_setting( @@ -62,3 +69,30 @@ filegroup( name = "dot_clang_format", srcs = [".clang-format"], ) + +cc_library( + name = "phtree", + srcs = glob( + include = [ + "include/**/*.h", + ], + ), + hdrs = [ + "include/phtree/converter.h", + "include/phtree/distance.h", + "include/phtree/filter.h", + "include/phtree/phtree.h", + "include/phtree/phtree_multimap.h", + ], + includes = [ + "include", + ], + linkstatic = True, + visibility = [ + "//visibility:public", + ], + deps = [ + "//include/phtree/common", + "//include/phtree/v16", + ], +) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fd2a904..2817a5b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,100 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] -Nothing yet. +### Added + +## [1.4.0] +### Added +- Added build features: [#53](https://github.com/tzaeschke/phtree-cpp/issues/53) + - linting for C++ and bazel files. + - Added CI status badges. + - Added test coverage +- Added support for cmake `FetchContent`. + See README for details. [#75](https://github.com/tzaeschke/phtree-cpp/issues/75) +- Added support for cmake `find_packet()` and direct import via `add_sub_directory()`. + See README for details. [#83](https://github.com/tzaeschke/phtree-cpp/issues/83) + +### Changed +- Cleaned up build scripts. [#53](https://github.com/tzaeschke/phtree-cpp/issues/53) +- Fixed code coverage + migrate to linux. [#80](https://github.com/tzaeschke/phtree-cpp/issues/80) +- ***BREAKING CHANGE*** The project has been restructured to have a more "standard" directory structure. + This affects how **bazel** dependencies work (use `deps = ["@phtree//:phtree",]`) and enables **cmake FetchContent_**. + See README for details. [#75](https://github.com/tzaeschke/phtree-cpp/issues/75) + +### Removed +- Nothing. + +### Fixed +- Nothing. + +## [1.3.0] - 2022-08-28 +### Added +- Added flag to relocate() allow short cutting in case of identical keys. + [#68](https://github.com/tzaeschke/phtree-cpp/issues/68) +- Added tested support for move-only and copy-only value objects. + [#56](https://github.com/tzaeschke/phtree-cpp/issues/56) +- Added custom bucket implementation (similar to std::unordered_set). This improves update performance by 5%-20%. + [#44](https://github.com/tzaeschke/phtree-cpp/issues/44) +- Added `PhTree.relocate(old_key, new_key)` and `PhTree.relocate_if(old_key, new_key, predicate)`. + This is **a lot faster** than using other methods. + [#43](https://github.com/tzaeschke/phtree-cpp/issues/43) +- Added try_emplace(key, value) and try_emplace(iter_hint, key, value) + [#40](https://github.com/tzaeschke/phtree-cpp/issues/40) +- Added FilterBoxAABB and FilterSphereAABB as examples for filtering a PH-Tree with box keys + [#33](https://github.com/tzaeschke/phtree-cpp/issues/33) +### Changed +- Moved tests and benchmarks into separate folders. [#67](https://github.com/tzaeschke/phtree-cpp/pull/67) +- Cleaned up unit tests. [#54](https://github.com/tzaeschke/phtree-cpp/pull/54) +- Simplified internals of `erase()`. [#47](https://github.com/tzaeschke/phtree-cpp/pull/47) +- Removed internal use of `std::optional()` to slightly reduce memory overhead + [#38](https://github.com/tzaeschke/phtree-cpp/issues/38) +- Removed restrictions on bazel version [#35](https://github.com/tzaeschke/phtree-cpp/issues/35) +- **API BREAKING CHANGE**: API of filters have been changed to be more correct, explicit and flexible. + [#21](https://github.com/tzaeschke/phtree-cpp/issues/21) + - Correctness: Converters and distance functions are not copied unnecessarily anymore. + - Explicit: + Filters *must* have a mandatory parameter for a converter reference. This ensures that the correct + converter is used, probably `tree.converter()`. + - Flexible: + Distance functions can be provided through a universal reference (forwarding reference). + Also, filters are now movable and copyable. + +- **API BREAKING CHANGE**: Allow filtering on buckets in multimaps. Multimap filters have different functions + and function signatures than normal `PhTree` filters. [#26](https://github.com/tzaeschke/phtree-cpp/issues/26) + +### Fixed +- Fixed compiler warnings when compiling with Visual Studio 2019. + [#74](https://github.com/tzaeschke/phtree-cpp/issues/74) +- Fixed cmake to work with Visual Studio 2019. Added tests and benchmarks to cmake. + (benchmarks still do not work with VS at the moment). + [#62](https://github.com/tzaeschke/phtree-cpp/issues/62) +- Fixed compilation problems and a memory leak when compiling with Visual Studio 2019. + (also added `msan` support). [#64](https://github.com/tzaeschke/phtree-cpp/pull/64) + +## [1.2.0] - 2022-04-14 +### Changed +- Bugfix: FilterSphere was not working correctly. [#27](https://github.com/tzaeschke/phtree-cpp/issues/27) +- Potentially **BREAKING CHANGE**: Refactored API of all methods that accept callbacks and filters to + accept universal/forwarding references. + Also changed filters and callback to not require `const` methods. + [#22](https://github.com/tzaeschke/phtree-cpp/issues/22) +- Clean up iterator implementations. [#19](https://github.com/tzaeschke/phtree-cpp/issues/19) +- Make PhTree and PhTreeMultimap movable (move-assign/copy). [#18](https://github.com/tzaeschke/phtree-cpp/issues/18) +- Potentially **BREAKING CHANGE** when using `IsNodeValid()` in provided filters: + Changed `bit_width_t` from `uin16_t` to `uint32_t`. This improves performance of 3D insert/emplace + on small datasets by up to 15%. To avoid warnings that meant that the API of `FilterAABB` and `FilterSphere` + had to be changed to accept `uint32_t` instead of `int`. This may break some implementations. + [#17](https://github.com/tzaeschke/phtree-cpp/pull/17) +- DIM>8 now uses custom b_plus_tree_map instead of std::map. This improves performance for all operations, e.g. + window queries on large datasets are up to 4x faster. Benchmarks results can be found in the issue. + [#14](https://github.com/tzaeschke/phtree-cpp/issues/14) +- postfix/infix field moved from Node to Entry. This avoids indirections and improves performance of most by ~10%. + operations by 5-15%. [#11](https://github.com/tzaeschke/phtree-cpp/issues/11) +- Entries now use 'union' to store children. [#9](https://github.com/tzaeschke/phtree-cpp/issues/9) +- Avoid unnecessary find() when removing a node. [#5](https://github.com/tzaeschke/phtree-cpp/issues/5) +- Avoid unnecessary key copy when inserting a node. [#4](https://github.com/tzaeschke/phtree-cpp/issues/4) +- for_each(callback, filter) was traversing too many nodes. [#2](https://github.com/tzaeschke/phtree-cpp/issues/2) +- Build improvements for bazel/cmake ## [1.1.1] - 2022-01-30 ### Changed @@ -70,7 +163,10 @@ Nothing yet. - Nothing. -[Unreleased]: https://github.com/improbable-eng/phtree-cpp/compare/v1.1.1...HEAD +[Unreleased]: https://github.com/improbable-eng/phtree-cpp/compare/v1.4.0...HEAD +[1.4.0]: https://github.com/improbable-eng/phtree-cpp/compare/v1.3.0...v1.4.0 +[1.3.0]: https://github.com/improbable-eng/phtree-cpp/compare/v1.2.0...v1.3.0 +[1.2.0]: https://github.com/improbable-eng/phtree-cpp/compare/v1.1.0...v1.2.0 [1.1.1]: https://github.com/improbable-eng/phtree-cpp/compare/v1.1.0...v1.1.1 [1.1.0]: https://github.com/improbable-eng/phtree-cpp/compare/v1.0.0...v1.1.0 [1.0.1]: https://github.com/improbable-eng/phtree-cpp/compare/v1.0.0...v1.0.1 diff --git a/CMakeLists.txt b/CMakeLists.txt index 18a5da8a..ac6c17af 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,19 +1,161 @@ cmake_minimum_required(VERSION 3.14) -# set the project name -project(PH_Tree_Main VERSION 1.1.1 +project(phtree VERSION 1.4.0 DESCRIPTION "PH-Tree C++" + HOMEPAGE_URL "https://github.com/tzaeschke/phtree-cpp" LANGUAGES CXX) -if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE Release) -endif() + +cmake_policy(SET CMP0077 NEW) + +# --------------------------------------------------------------------------------------- +# Set default build to release +# --------------------------------------------------------------------------------------- +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose Release or Debug" FORCE) +endif () + + +# --------------------------------------------------------------------------------------- +# Build options +# --------------------------------------------------------------------------------------- +option(PHTREE_BUILD_ALL "Build examples, tests and benchmarks" OFF) + +# example options +option(PHTREE_BUILD_EXAMPLES "Build examples" OFF) + +# testing options +option(PHTREE_BUILD_TESTS "Build tests" OFF) +option(PHTREE_CODE_COVERAGE "Collect coverage from test library" OFF) +if (PHTREE_CODE_COVERAGE) + set(PHTREE_BUILD_TESTS ON) +endif () + +# bench options +option(PHTREE_BUILD_BENCHMARKS "Build benchmarks (Requires https://github.com/google/benchmark.git to be installed)" OFF) + +# install options +option(PHTREE_INSTALL "Generate the install target" OFF) + + +# --------------------------------------------------------------------------------------- +# Compiler config +# --------------------------------------------------------------------------------------- +find_program(CCACHE_FOUND ccache) +if (CCACHE_FOUND) + message("CCACHE is found") + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) + set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) +else (CCACHE_FOUND) + message("CCACHE is NOT found") +endif (CCACHE_FOUND) # specify the C++ standard -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED True) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Wall -Werror") -set(CMAKE_CXX_FLAGS_RELEASE "-O3") +if (NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 17) + set(CMAKE_CXX_STANDARD_REQUIRED ON) +endif () + +if (MSVC) + #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Wall") + set(CMAKE_CXX_FLAGS_RELEASE "/O2") + + if (PHTREE_BUILD_TESTS OR PHTREE_BUILD_ALL) + add_compile_options(/bigobj) + endif () + + # For google benchmark + if (PHTREE_BUILD_BENCHMARKS) # OR PHTREE_BUILD_ALL) + # This still doesn't work. This also breaks gtest + # See for example + # https://stackoverflow.com/questions/55376111/how-to-build-and-link-google-benchmark-using-cmake-in-windows + # https://github.com/google/benchmark/issues/1348 + # https://github.com/google/benchmark/issues/639 + # set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE) + # set(BUILD_SHARED_LIBS TRUE) #=TRUE + # set(BENCHMARK_DOWNLOAD_DEPENDENCIES on) + # set(BENCHMARK_ENABLE_GTEST_TESTS OFF) + endif () +else () + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror") + if (PHTREE_BUILD_BENCHMARKS) + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -mavx -pthread") + else () + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -mavx") + endif () + if (PHTREE_CODE_COVERAGE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage") # -Wa,-mbig-obj") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --coverage") + endif () +endif () + +# --------------------------------------------------------------------------------------- +# Build binaries +# --------------------------------------------------------------------------------------- + +# --------------------------------------------------------------------------------------- +# Header only version +# --------------------------------------------------------------------------------------- +add_library(phtree INTERFACE) +add_library(phtree::phtree ALIAS phtree) +target_compile_features(phtree INTERFACE cxx_std_17) + +target_include_directories(phtree INTERFACE + $ + $) + +if (PHTREE_BUILD_EXAMPLES OR PHTREE_BUILD_ALL) + message(STATUS "Generating examples") + add_subdirectory(examples) +endif () + +if ((PHTREE_BUILD_BENCHMARKS OR PHTREE_BUILD_ALL) AND NOT MSVC) + message(STATUS "Generating benchmarks") + add_subdirectory(benchmark) +endif () + +if (PHTREE_BUILD_TESTS OR PHTREE_BUILD_ALL) + message(STATUS "Generating tests") + enable_testing() + include(GoogleTest) + add_subdirectory(test) +endif () + +# --------------------------------------------------------------------------------------- +# Install +# --------------------------------------------------------------------------------------- +if (PHTREE_INSTALL) + include(GNUInstallDirs) + + install(TARGETS phtree + EXPORT ${PROJECT_NAME}_Targets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) + + include(CMakePackageConfigHelpers) + write_basic_package_version_file("phtreeConfigVersion.cmake" + VERSION ${PROJECT_VERSION} + COMPATIBILITY SameMajorVersion) + + configure_package_config_file( + "${PROJECT_SOURCE_DIR}/cmake/${PROJECT_NAME}Config.cmake.in" + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" + INSTALL_DESTINATION + ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake) + + install(EXPORT ${PROJECT_NAME}_Targets + FILE ${PROJECT_NAME}Targets.cmake + NAMESPACE phtree:: + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake) + + install(FILES "${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake" + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake) + + install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/phtree + DESTINATION include + PATTERN "BUILD" EXCLUDE + PATTERN "*.md" EXCLUDE) -add_subdirectory(phtree) -add_subdirectory(examples) +endif () diff --git a/LICENSE b/LICENSE index e46c5961..13cd100a 100644 --- a/LICENSE +++ b/LICENSE @@ -188,6 +188,7 @@ identification within third-party archives. Copyright 2020 Improbable Worlds Limited + Copyright 2022 Tilmann Zäschke Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index fad24140..68a86f98 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,13 @@ -**Note: for updates please also check the [fork](https://github.com/tzaeschke/phtree-cpp) by the original PH-Tree developer.** +**This is a fork of [Improbable's (currently unmaintained) PH-tree](https://github.com/improbable-eng/phtree-cpp)**. + +Multi-dimensional / spatial index with very fast insert/erase/relocate operations and scalability with large datasets. +This library is C++ / header only. + +![Bazel Linux build](https://github.com/tzaeschke/phtree-cpp/actions/workflows/bazel.yml/badge.svg) +![CMake Linux build](https://github.com/tzaeschke/phtree-cpp/actions/workflows/cmake.yml/badge.svg) +![CMake MSBuild 17.3.1](https://github.com/tzaeschke/phtree-cpp/actions/workflows/cmake-windows.yml/badge.svg) +[![codecov](https://codecov.io/gh/tzaeschke/phtree-cpp/branch/master/graph/badge.svg?token=V5XVRQG754)](https://codecov.io/gh/tzaeschke/phtree-cpp) +[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) # PH-Tree C++ @@ -23,45 +32,33 @@ More information about PH-Trees (including a Java implementation) is available [ ### API Usage -[Key Types](#key-types) - -[Basic operations](#basic-operations) - -[Queries](#queries) - -* [for_each](#for-each-example) - -* [Iterators](#iterator-examples) - -* [Filters](#filters) - -* [Distance Functions](#distance-functions) - -[Converters](#converters) - -[Custom Key Types](#custom-key-types) - -[Restrictions](#restrictions) - -[Troubleshooting / FAQ](#troubleshooting-faq) +* [Key Types](#key-types) +* [Basic operations](#basic-operations) +* [Queries](#queries) + * [for_each](#for-each-example) + * [Iterators](#iterator-examples) + * [Filters](#filters) + * [Filters for MultiMaps](#filters-for-multimaps) + * [Distance Functions](#distance-functions) +* [Converters](#converters) +* [Custom Key Types](#custom-key-types) +* [Restrictions](#restrictions) +* [Troubleshooting / FAQ](#troubleshooting-faq) ### Performance -[When to use a PH-Tree](#when-to-use-a-ph-tree) - -[Optimising Performance](#optimising-performance) +* [When to use a PH-Tree](#when-to-use-a-ph-tree) +* [Optimising Performance](#optimizing-performance) ### Compiling / Building -[Build system & dependencies](#build-system-and-dependencies) - -[bazel](#bazel) - -[cmake](#cmake) +* [Build system & dependencies](#build-system-and-dependencies) +* [bazel](#bazel) +* [cmake](#cmake) ## Further Resources -[Theory](#theory) +* [Theory](#theory) ---------------------------------- @@ -71,7 +68,7 @@ More information about PH-Trees (including a Java implementation) is available [ #### Key Types -The **PH-Tree Map** supports out of the box five types: +The **PH-Tree Map** has five predefined tree types: - `PhTreeD` uses `PhPointD` keys, which are vectors/points of 64 bit `double`. - `PhTreeF` uses `PhPointF` keys, which are vectors/points of 32 bit `float`. @@ -79,18 +76,18 @@ The **PH-Tree Map** supports out of the box five types: - `PhTreeBoxF` uses `PhBoxF` keys, which consist of two `PhPointF` that define an axis-aligned rectangle/box. - `PhTree` uses `PhPoint` keys, which are vectors/points of `std::int64` -The **PH-Tree MultiMap** supports out of the box three types: +The **PH-Tree MultiMap** has three predefined tree types: - `PhTreeMultiMapD` uses `PhPointD` keys, which are vectors/points of 64 bit `double`. - `PhTreeMultiMapBoxD` uses `PhBoxD` keys, which consist of two `PhPointD` that define an axis-aligned rectangle/box. - `PhTreeMultiMap` uses `PhPoint` keys, which are vectors/points of `std::int64` -Additional tree types can be defined easily analogous to the types above, please refer to the declaration of the tree +Additional key types and tree types can be defined easily analogous to the types above, please refer to the declaration of the types for an example. Support for custom key classes (points and boxes) as well as custom coordinate mappings can be implemented using custom `Converter` classes, see below. The `PhTreeMultiMap` is by default backed by `std::unordered_set` but this can be changed via a template parameter. -The `PhTree` and `PhTreeMultiMap` types are available from `phtree.h` and `phtree_multimap.h`. +The `PhTree` and `PhTreeMultiMap` types are declared in `phtree.h` and `phtree_multimap.h`. @@ -107,8 +104,12 @@ auto tree = PhTreeD<3, MyData>(); PhPointD<3> p{1.1, 1.0, 10.}; // Some operations +tree.relocate(p1, p2); // Move an entry from point 1 to point 2 +tree.relocate_if(p1, p2, predicate); // Conditionally move an entry from point 1 to point 2 tree.emplace(p, my_data); tree.emplace_hint(hint, p, my_data); +tree.try_emplace(p, my_data); +tree.try_emplace(hint, p, my_data); tree.insert(p, my_data); tree[p] = my_data; tree.count(p); @@ -120,7 +121,6 @@ tree.empty(); tree.clear(); // Multi-map only -tree.relocate(p_old, p_new, value); tree.estimate_count(query); ``` @@ -128,9 +128,10 @@ tree.estimate_count(query); #### Queries -* For-each over all elements: `tree.fore_each(callback);` +* For-each over all elements: `tree.for_each(callback);` + **Note that `for_each` tends to be 10%-20% faster than using an iterator.** * Iterator over all elements: `auto iterator = tree.begin();` -* For-each with box shaped window queries: `tree.fore_each(PhBoxD(min, max), callback);` +* For-each with box shaped window queries: `tree.for_each(PhBoxD(min, max), callback);` * Iterator for box shaped window queries: `auto q = tree.begin_query(PhBoxD(min, max));` * Iterator for _k_ nearest neighbor queries: `auto q = tree.begin_knn_query(k, center_point, distance_function);` * Custom query shapes, such as spheres: `tree.for_each(callback, FilterSphere(center, radius, tree.converter()));` @@ -148,7 +149,7 @@ struct Counter { size_t n_ = 0; }; -// Count entries inside of an axis aligned box defined by the two points (1,1,1) and (3,3,3) +// Count entries inside an axis aligned box defined by the two points (1,1,1) and (3,3,3) Counter callback; tree.for_each({{1, 1, 1}, {3, 3, 3}}, callback); // callback.n_ is now the number of entries in the box. @@ -164,18 +165,18 @@ for (auto it : tree) { ... } -// Iterate over all entries inside of an axis aligned box defined by the two points (1,1,1) and (3,3,3) +// Iterate over all entries inside an axis aligned box defined by the two points (1,1,1) and (3,3,3) for (auto it = tree.begin_query({{1, 1, 1}, {3, 3, 3}}); it != tree.end(); ++it) { ... } // Find 5 nearest neighbors of (1,1,1) -for (auto it = tree.begin_knn_query(5, {1, 1, 1}); it != tree.end(); ++it) { +for (auto it = tree.begin_knn_query(5, {1, 1, 1}, DistanceEuclidean<3>())); it != tree.end(); ++it) { ... } ``` - + ##### Filters @@ -183,7 +184,8 @@ All queries allow specifying an additional filter. The filter is called for ever returned (subject to query constraints) and to every node in the tree that the query decides to traverse (also subject to query constraints). Returning `true` in the filter does not change query behaviour, returning `false` means that the current value or child node is not returned or traversed. An example of a geometric filter can be found -in `phtree/common/filter.h` in `FilterAABB`. +in `phtree/common/filter.h` in `FilterAABB` or `FilterSphere` (for examples with box keys see +`FilterBoxAABB` or `FilterBoxSphere`). ```C++ template @@ -198,13 +200,47 @@ struct FilterByValueId { } }; -// Iterate over all entries inside of an axis aligned box defined by the two points (1,1,1) and (3,3,3). +// Iterate over all entries inside an axis aligned box defined by the two points (1,1,1) and (3,3,3). // Return only entries that suffice the filter condition. for (auto it = tree.begin_query({1, 1, 1}, {3, 3, 3}, FilterByValueId<3, T>())); it != tree.end(); ++it) { ... } ``` +Note: The filter example works only for the 'map' version of the PH-Tree, such as `PhTree`, `PhTreeD`, ... . Filters for +the `PhTreeMultiMap` are discussed in the next section. + + + +#### Filters for MultiMaps + +The `PhTreeMultiMap` requires a different type of filter. In order to function as a multimap, it uses a collections +("buckets") as entries for each occupied coordinate. The buckets allow it to store several values per coordinate. When +using a filter, the PH-Tree will check `IsEntryValid` for every *bucket* (this is different from version 1.x.x where it +called `IsEntryValid` for every entry in a bucket but never for the bucket itself). Since 2.0.0 there is a new function +required in every multimap filter: `IsBucketEntryValid`. It is called once for every entry in a bucket if the bucket +passed `IsEntryValid`. An example of a geometric filter can be found in `phtree/common/filter.h` in `FilterMultiMapAABB` +. + +```C++ +template +struct FilterMultiMapByValueId { + template + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint& key, const BucketT& bucket) const { + // Arbitrary example: Only allow keys/buckets with a certain property, e.g. keys that lie within a given sphere. + return check_some_geometric_propert_of_key(key); + } + [[nodiscard]] constexpr bool IsBucketEntryValid(const PhPoint& key, const T& value) const { + // Arbitrary example: Only allow values with even values of id_ + return value.id_ % 2 == 0; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + // Allow all nodes + return true; + } +}; +``` + ##### Distance function @@ -250,7 +286,14 @@ double resultung_float = ((double)my_int) / 1000000.; It is obvious that this approach leads to a loss of numerical precision. Moreover, the loss of precision depends on the actual range of the double values and the constant. The chosen constant should probably be as large as possible but small enough such that converted values do not exceed the 64bit limit of `std::int64_t`. Note that the PH-Tree provides -several `ConverterMultiply` implementations for point/box and double/float. +several `ConverterMultiply` implementations for point/box and double/float. For example: + +```C++ +// Multiply converter that multiplies by 1'000'000 (and divides by 1). +auto tree = PhTreeD>(); +``` + +You can also write your own converter. For example: ```C++ template @@ -371,10 +414,10 @@ void test() { **Problem**: The PH-Tree appears to be losing updates/insertions. **Solution**: Remember that the PH-Tree is a *map*, keys will not be inserted if an identical key already exists. The -easiest solution is to use one of the `PhTreeMultiMap` implementations. Alternatively, this can be solved by turning the -PH-Tree into a multi-map, for example by using something like `std::map` or `std::set` as member type: -`PhTree<3, std::set>`. The `set` instances can then be used to handle key conflicts by storing multiple -entries for the same key. The logic to handle conflicts must currently be implemented manually by the user. +easiest solution is to use one of the `PhTreeMultiMap` implementations. Alternatively, this can be solved by turning a +`PhTree` into a multi-map, for example by using something like `std::map` or `std::set` as member type: +`PhTree<3, T, CONVERTER, std::set>`. The `set` instances can then be used to handle key conflicts by +storing multiple entries for the same key. The logic to handle conflicts must currently be implemented manually. ---------------------------------- @@ -393,7 +436,7 @@ heavily on the actual dataset, usage patterns, hardware, ... . **Generally, the PH-Tree tends to have the following advantages:** * Fast insertion/removal times. While some indexes, such as *k*-D-trees, trees can be build from scratch very fast, they - tend to be be much slower when removing entries or when indexing large datasets. Also, most indexes require + tend to be much slower when removing entries or when indexing large datasets. Also, most indexes require rebalancing which may result in unpredictable latency (R-trees) or may result in index degradation if delayed (*k*D-trees). @@ -407,7 +450,7 @@ heavily on the actual dataset, usage patterns, hardware, ... . * Scalability with the number of dimensions. The PH-Tree has been shown to deal "well" with high dimensional data ( 1000k+ dimensions). What does "well" mean? * It works very well for up to 30 (sometimes 50) dimensions. **Please note that the C++ implementation has not been - optimised nearly as much as the Java implementation.** + optimized nearly as much as the Java implementation.** * For more dimensions (Java was tested with 1000+ dimensions) the PH-Tree still has excellent insertion/deletion performance. However, the query performance cannot compete with specialised high-dim indexes such as cover-trees or pyramid-trees (these tend to be *very slow* on insertion/deletion though). @@ -426,22 +469,25 @@ heavily on the actual dataset, usage patterns, hardware, ... . * PH-Trees are not very efficient in scenarios where queries tend to return large result sets in the order of 1000 or more. - + -### Optimising Performance +### Optimizing Performance There are numerous ways to improve performance. The following list gives an overview over the possibilities. -1) **Use `for_each` instead of iterators**. This should improve performance of queries by 5%-10%. +1) **Use `for_each` instead of iterators**. This should improve performance of queries by 10%-20%. -2) **Use `emplace_hint` if possible**. When updating the position of an entry, the naive way is to use `erase()` - /`emplace()`. With `emplace_hint`, insertion can avoid navigation to the target node if the insertion coordinate is - close to the removal coordinate. - ```c++ - auto iter = tree.find(old_position); - tree.erase(iter); - tree.emplace_hint(iter, new_position, value); - ``` +2) **Use `relocate()` / `relocate_if()` if possible**. When updating the position of an entry, the naive way is + to use `erase()` / `emplace()`. With `relocate` / `relocate_if()`, insertion can avoid a lot of duplicate + navigation in the tree if the new coordinate is close to the old coordinate. + ```c++ + relocate(old_position, new_position); + relocate_if(old_position, new_position, [](const T& value) { return [true/false]; }); + ``` + The multi-map version relocates all values unless a 'value' is specified to identify the value to be relocated: + ```c++ + relocate(old_position, new_position, value); + ``` 3) **Store pointers instead of large data objects**. For example, use `PhTree<3, MyLargeClass*>` instead of `PhTree<3, MyLargeClass>` if `MyLargeClass` is large. @@ -462,7 +508,7 @@ There are numerous ways to improve performance. The following list gives an over caused by faster operation in the converter itself but by a more compact tree shape. The example shows how to use a converter that multiplies coordinates by 100'000, thus preserving roughly 5 fractional digits: - `PhTreeD>` + `PhTreeD>()` 6) **Use custom key types**. By default, the PH-Tree accepts only coordinates in the form of its own key types, such as `PhPointD`, `PhBoxF` or similar. To avoid conversion from custom types to PH-Tree key types, custom classes can @@ -487,67 +533,125 @@ There are numerous ways to improve performance. The following list gives an over ## Compiling the PH-Tree -This section will guide you through the initial build system and IDE you need to go through in order to build and run -custom versions of the PH-Tree on your machine. +The PH-Tree index itself is a *header only* library, it can be used by simply copying everything in the +`include/phtree` folder. +The examples, tests and benchmarks can be build with bazel or cmake. ### Build system & dependencies -PH-Tree can be built with *cmake 3.14* or [Bazel](https://bazel.build) as build system. All code is written in C++ -targeting the C++17 standard. The code has been verified to compile on Linux with Clang 9, 10, 11, 12, and GCC 9, 10, -11, and on Windows with Visual Studio 2019. - -#### Ubuntu Linux - -* Installing [clang](https://apt.llvm.org/) - -* Installing [bazel](https://docs.bazel.build/versions/main/install-ubuntu.html) - -* To install [cmake](https://launchpad.net/~hnakamur/+archive/ubuntu/cmake): - +PH-Tree can be built with [Bazel](https://bazel.build) (primary build system) or with +[cmake](https://cmake.org/) *3.14*. +All code is written in C++ targeting the C++17 standard. +The code has been verified to compile on Linux with Clang 11 and GCC 9, and on Windows with Visual Studio 2019 +(except benchmarks, which don't work with VS). +The PH-tree makes use of vectorization, so suggested compilation options for clang/gcc are: ``` -sudo add-apt-repository ppa:hnakamur/libarchive -sudo add-apt-repository ppa:hnakamur/libzstd -sudo add-apt-repository ppa:hnakamur/cmake -sudo apt update -sudo apt install cmake +-O3 -mavx ``` -#### Windows - -To build on Windows, you'll need to have a version of Visual Studio 2019 installed (likely Professional), in addition to -[Bazel](https://docs.bazel.build/versions/master/windows.html) or -[cmake](https://cmake.org/download/). ### Bazel +`WORKSPACE` file: +``` +http_archive( + name = "phtree", + strip_prefix = "phtree-cpp-v1.4.0", + url = "https://github.com/tzaeschke/phtree-cpp", +) +``` +`BUILD` file: +``` +cc_binary( + ... + deps = [ + "@phtree//:phtree", + ], +) +``` Once you have set up your dependencies, you should be able to build the PH-Tree repository by running: - ``` bazel build ... ``` Similarly, you can run all unit tests with: - ``` bazel test ... ``` +Benchmarks: +``` +bazel run //benchmark:update_mm_d_benchmark --config=benchmark -- --benchmark_counters_tabular=true +``` + + -### cmake +### cmake dependency +The library supports three types of cmake dependency management, `FetchContent`, `find_package()` and `add_subfolder()`. +All three approaches are used in [this example project](https://github.com/tzaeschke/test-phtree-cpp-cmake). +#### FetchContent +With `FetchContent_...()`: +``` +include(FetchContent) +FetchContent_Declare( + phtree + GIT_REPOSITORY https://github.com/tzaeschke/phtree-cpp.git + GIT_TAG v1.4.0 +) +FetchContent_MakeAvailable(phtree) +``` + +#### find_package() +You need to build the library with: +``` +mkdir out && cd out +cmake .. -DPHTREE_INSTALL=on +sudo cmake --build . --config Release --target install -- -j +``` +Note that the option `CMAKE_INSTALL_PREFIX:PATH=...` does _not_ work. +The library can then be included with: +``` +find_package(phtree CONFIG REQUIRED) +add_executable(ExampleProject example.cc) +target_link_libraries(ExampleProject phtree::phtree) +``` + +#### add_subfolder() +For this you can simply copy the PH-Tree source code into your project (you can skip `benchmark` and `test`) and +then include the folder with `add_subdirectory(phtree-cpp)`. + +### cmake build +`cmake` uses `ccache` when available. ``` mkdir build cd build cmake .. cmake --build . +``` + +Run example: +``` +cmake .. -DPHTREE_BUILD_EXAMPLES=ON +cmake --build . ./example/Example ``` +Run tests: +``` +cmake .. -DPHTREE_BUILD_TESTS=ON +cmake --build . +ctest +``` +Next to example (`PHTREE_BUILD_EXAMPLES`) there are also tests (`PHTREE_BUILD_TESTS`) and +benchmarks (`PHTREE_BUILD_BENCHMARKS`). To build all, use `PHTREE_BUILD_ALL`. +**Note that the benchmarks currently don't work on Windows.** + ## Further Resources diff --git a/TODO.txt b/TODO.txt new file mode 100644 index 00000000..9bf73e5e --- /dev/null +++ b/TODO.txt @@ -0,0 +1,76 @@ +Fix const-ness +============== +- operator[] should have a const overload +- find() should have a non-const overload +- test: + +TEST(PhTreeTest, SmokeTestConstTree) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree1; + tree1.emplace(p, Id{1}); + tree1.emplace(p, Id{2}); + Id id3{3}; + tree1.insert(p, id3); + Id id4{4}; + tree1.insert(p, id4); + const auto& tree = tree1; + ASSERT_EQ(tree.size(), 1); + ASSERT_EQ(tree.find(p).second()._i, 1); + ASSERT_EQ(tree[p]._i, 1); + + auto q_window = tree.begin_query({p, p}); + ASSERT_EQ(1, q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(1, q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + ASSERT_EQ(1, q_knn->_i); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(1, tree1.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_EQ(0, tree1.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + + +b_plus_tree_map - binary search +=============== +Use custom binary search: + + // return BptEntry* ?!?!? + template + [[nodiscard]] auto lower_bound(key_t key, std::vector& data) noexcept { + return std::lower_bound(data.begin(), data.end(), key, [](E& left, const key_t key) { + return left.first < key; + }); + // auto pos = __lower_bound(&*data_leaf_.begin(), &*data_leaf_.end(), key); + // return data_leaf_.begin() + pos; + } + + template + inline auto __lower_bound(const TT* __first, const TT* __last, key_t __val) const noexcept { + const TT* const_first = __first; + auto __len = __last - __first; + + while (__len > 0) { + auto __half = __len >> 1; + const TT* __middle = __first + __half; + if (__middle->first < __val) { + __first = __middle; + ++__first; + __len = __len - __half - 1; + } else + __len = __half; + } + return __first - const_first; + } + diff --git a/WORKSPACE b/WORKSPACE index 0bd3d32b..4520a3c8 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -1,19 +1,6 @@ # Bazel bootstrapping -load("//tools/build_rules:http.bzl", "http_archive", "http_file") - -http_archive( - name = "bazel_skylib", - sha256 = "1dde365491125a3db70731e25658dfdd3bc5dbdfd11b840b3e987ecf043c7ca0", - url = "https://github.com/bazelbuild/bazel-skylib/releases/download/0.9.0/bazel_skylib-0.9.0.tar.gz", -) - -load("@bazel_skylib//lib:versions.bzl", "versions") - -versions.check( - minimum_bazel_version = "4.2.2", - maximum_bazel_version = "4.2.2", -) +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file") # NOTE: We make third_party/ its own bazel workspace because it allows to run `bazel build ...` without # having all targets defined in third-party BUILD files in that directory buildable. @@ -34,17 +21,16 @@ http_archive( http_archive( name = "gbenchmark", - sha256 = "dccbdab796baa1043f04982147e67bb6e118fe610da2c65f88912d73987e700c", - strip_prefix = "benchmark-1.5.2", - url = "https://github.com/google/benchmark/archive/v1.5.2.tar.gz", + sha256 = "6132883bc8c9b0df5375b16ab520fac1a85dc9e4cf5be59480448ece74b278d4", + strip_prefix = "benchmark-1.6.1", + url = "https://github.com/google/benchmark/archive/v1.6.1.tar.gz", ) http_archive( name = "gtest", - build_file = "@third_party//gtest:BUILD", - sha256 = "9dc9157a9a1551ec7a7e43daea9a694a0bb5fb8bec81235d8a1e6ef64c716dcb", - strip_prefix = "googletest-release-1.10.0", - url = "https://github.com/google/googletest/archive/release-1.10.0.tar.gz", + sha256 = "b4870bf121ff7795ba20d20bcdd8627b8e088f2d1dab299a031c1034eddc93d5", + strip_prefix = "googletest-release-1.11.0", + url = "https://github.com/google/googletest/archive/release-1.11.0.tar.gz", ) # Development environment tooling diff --git a/phtree/benchmark/BUILD b/benchmark/BUILD similarity index 67% rename from phtree/benchmark/BUILD rename to benchmark/BUILD index 95315788..102ea0d2 100644 --- a/phtree/benchmark/BUILD +++ b/benchmark/BUILD @@ -3,9 +3,6 @@ package(default_visibility = ["//visibility:private"]) cc_library( name = "benchmark", testonly = True, - srcs = [ - "logging.cc", - ], hdrs = [ "benchmark_util.h", "logging.h", @@ -28,8 +25,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -43,8 +40,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -58,8 +55,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -73,8 +70,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -88,8 +85,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -103,8 +100,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -118,8 +115,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -133,8 +130,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -148,8 +145,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -163,8 +160,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -178,8 +175,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -193,8 +190,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -208,8 +205,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -223,8 +220,23 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "query_mm_d_filter_benchmark", + testonly = True, + srcs = [ + "query_mm_d_filter_benchmark.cc", + ], + linkstatic = True, + deps = [ + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -238,8 +250,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -253,8 +265,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -268,8 +280,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -283,8 +295,8 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], @@ -298,8 +310,68 @@ cc_binary( ], linkstatic = True, deps = [ - "//phtree", - "//phtree/benchmark", + ":benchmark", + "//:phtree", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "hd_insert_d_benchmark", + testonly = True, + srcs = [ + "hd_insert_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + ":benchmark", + "//:phtree", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "hd_erase_d_benchmark", + testonly = True, + srcs = [ + "hd_erase_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + ":benchmark", + "//:phtree", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "hd_query_d_benchmark", + testonly = True, + srcs = [ + "hd_query_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + ":benchmark", + "//:phtree", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "hd_knn_d_benchmark", + testonly = True, + srcs = [ + "hd_knn_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + ":benchmark", + "//:phtree", "@gbenchmark//:benchmark", "@spdlog", ], diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt new file mode 100644 index 00000000..8af1e7cd --- /dev/null +++ b/benchmark/CMakeLists.txt @@ -0,0 +1,54 @@ +cmake_minimum_required(VERSION 3.14) +project(phtree-benchmarks) + +set(BENCHMARK_ENABLE_TESTING OFF) + +include(FetchContent) + +FetchContent_Declare( + googlebenchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG v1.7.0 +) +FetchContent_MakeAvailable(googlebenchmark) + +FetchContent_Declare( + spdlog + GIT_REPOSITORY https://github.com/gabime/spdlog.git + GIT_TAG v1.10.0 +) +FetchContent_MakeAvailable(spdlog) + +macro(package_add_benchmark TESTNAME) + add_executable(${TESTNAME} ${ARGN} benchmark_util.h logging.h) + target_link_libraries(${TESTNAME} PRIVATE benchmark::benchmark) + target_link_libraries(${TESTNAME} PRIVATE spdlog::spdlog) + target_link_libraries(${TESTNAME} PRIVATE phtree::phtree) +endmacro() + +add_compile_definitions(RUN_HAVE_STD_REGEX=0 RUN_HAVE_POSIX_REGEX=0 COMPILE_HAVE_GNU_POSIX_REGEX=0) + +package_add_benchmark(count_mm_d_benchmark count_mm_d_benchmark.cc) +package_add_benchmark(erase_benchmark erase_benchmark.cc) +package_add_benchmark(erase_d_benchmark erase_d_benchmark.cc) +package_add_benchmark(extent_benchmark extent_benchmark.cc) +package_add_benchmark(extent_benchmark_weird extent_benchmark_weird.cc) +package_add_benchmark(find_benchmark find_benchmark.cc) +package_add_benchmark(hd_erase_d_benchmark hd_erase_d_benchmark.cc) +package_add_benchmark(hd_insert_d_benchmark hd_insert_d_benchmark.cc) +package_add_benchmark(hd_knn_d_benchmark hd_knn_d_benchmark.cc) +package_add_benchmark(hd_query_d_benchmark hd_query_d_benchmark.cc) +package_add_benchmark(insert_benchmark insert_benchmark.cc) +package_add_benchmark(insert_box_d_benchmark insert_box_d_benchmark.cc) +package_add_benchmark(insert_d_benchmark insert_d_benchmark.cc) +package_add_benchmark(knn_d_benchmark knn_d_benchmark.cc) +package_add_benchmark(query_benchmark query_benchmark.cc) +package_add_benchmark(query_box_d_benchmark query_box_d_benchmark.cc) +package_add_benchmark(query_d_benchmark query_d_benchmark.cc) +package_add_benchmark(query_mm_box_d_benchmark query_mm_box_d_benchmark.cc) +package_add_benchmark(query_mm_d_benchmark query_mm_d_benchmark.cc) +package_add_benchmark(query_mm_d_filter_benchmark query_mm_d_filter_benchmark.cc) +package_add_benchmark(update_box_d_benchmark update_box_d_benchmark.cc) +package_add_benchmark(update_d_benchmark update_d_benchmark.cc) +package_add_benchmark(update_mm_box_d_benchmark update_mm_box_d_benchmark.cc) +package_add_benchmark(update_mm_d_benchmark update_mm_d_benchmark.cc) diff --git a/phtree/benchmark/benchmark_util.h b/benchmark/benchmark_util.h similarity index 93% rename from phtree/benchmark/benchmark_util.h rename to benchmark/benchmark_util.h index 5af70367..73069710 100644 --- a/phtree/benchmark/benchmark_util.h +++ b/benchmark/benchmark_util.h @@ -81,7 +81,7 @@ auto CreateDataCLUSTER = [](auto& points, }; auto CreateDuplicates = - [](auto& points, size_t num_unique_entries, size_t num_total_entities, std::uint32_t seed) { + [](auto& points, int num_unique_entries, size_t num_total_entities, std::uint32_t seed) { std::default_random_engine random_engine{seed}; std::uniform_int_distribution<> distribution(0, num_unique_entries); for (size_t i = num_unique_entries; i < num_total_entities; ++i) { @@ -91,7 +91,7 @@ auto CreateDuplicates = }; } // namespace -enum TestGenerator { CUBE, CLUSTER }; +enum TestGenerator { CUBE = 4, CLUSTER = 7 }; template auto CreatePointDataMinMax = [](auto& points, @@ -101,11 +101,13 @@ auto CreatePointDataMinMax = [](auto& points, double world_minimum, double world_maximum, double fraction_of_duplicates) { - auto set_coordinate_lambda = [](auto& p, dimension_t dim, auto value) { p[dim] = value; }; + auto set_coordinate_lambda = [](auto& p, dimension_t dim, auto value) { + p[dim] = static_cast < typename std::remove_reference_t>(value); + }; // Create at least 1 unique point // Note that the following point generator is likely, but not guaranteed, to created unique // points. - size_t num_unique_entries = 1 + (num_entities - 1) * (1. - fraction_of_duplicates); + int num_unique_entries = static_cast(1 + (num_entities - 1) * (1. - fraction_of_duplicates)); points.reserve(num_entities); switch (test_generator) { case CUBE: @@ -140,7 +142,7 @@ auto CreateBoxDataMinMax = [](auto& points, // Create at least 1 unique point // Note that the following point generator is likely, but not guaranteed, to created unique // points. - int num_unique_entries = 1 + (num_entities - 1) * (1. - fraction_of_duplicates); + int num_unique_entries = static_cast(1 + (num_entities - 1) * (1. - fraction_of_duplicates)); points.reserve(num_entities); switch (test_generator) { case CUBE: diff --git a/phtree/benchmark/count_mm_d_benchmark.cc b/benchmark/count_mm_d_benchmark.cc similarity index 100% rename from phtree/benchmark/count_mm_d_benchmark.cc rename to benchmark/count_mm_d_benchmark.cc diff --git a/phtree/benchmark/erase_benchmark.cc b/benchmark/erase_benchmark.cc similarity index 89% rename from phtree/benchmark/erase_benchmark.cc rename to benchmark/erase_benchmark.cc index 1e59a6d2..99881ff4 100644 --- a/phtree/benchmark/erase_benchmark.cc +++ b/benchmark/erase_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -26,6 +26,7 @@ using namespace improbable::phtree::phbenchmark; namespace { const int GLOBAL_MAX = 10000; +using payload_t = std::uint32_t; /* * Benchmark for removing entries. @@ -39,11 +40,11 @@ class IndexBenchmark { private: void SetupWorld(benchmark::State& state); - void Insert(benchmark::State& state, PhTree& tree); - void Remove(benchmark::State& state, PhTree& tree); + void Insert(benchmark::State& state, PhTree& tree); + void Remove(benchmark::State& state, PhTree& tree); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; std::default_random_engine random_engine_; std::uniform_int_distribution<> cube_distribution_; @@ -66,7 +67,7 @@ template void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - auto* tree = new PhTree(); + auto* tree = new PhTree(); Insert(state, *tree); state.ResumeTiming(); @@ -91,16 +92,16 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { } template -void IndexBenchmark::Insert(benchmark::State&, PhTree& tree) { - for (int i = 0; i < num_entities_; ++i) { - tree.emplace(points_[i], i); +void IndexBenchmark::Insert(benchmark::State&, PhTree& tree) { + for (size_t i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i], (int)i); } } template -void IndexBenchmark::Remove(benchmark::State& state, PhTree& tree) { - int n = 0; - for (int i = 0; i < num_entities_; ++i) { +void IndexBenchmark::Remove(benchmark::State& state, PhTree& tree) { + size_t n = 0; + for (size_t i = 0; i < num_entities_; ++i) { n += tree.erase(points_[i]); } diff --git a/phtree/benchmark/erase_d_benchmark.cc b/benchmark/erase_d_benchmark.cc similarity index 90% rename from phtree/benchmark/erase_d_benchmark.cc rename to benchmark/erase_d_benchmark.cc index a544a4e0..0500f88c 100644 --- a/phtree/benchmark/erase_d_benchmark.cc +++ b/benchmark/erase_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -26,6 +26,7 @@ using namespace improbable::phtree::phbenchmark; namespace { const int GLOBAL_MAX = 10000; +using payload_t = std::uint32_t; /* * Benchmark for removing entries. @@ -39,11 +40,11 @@ class IndexBenchmark { private: void SetupWorld(benchmark::State& state); - void Insert(benchmark::State& state, PhTreeD& tree); - void Remove(benchmark::State& state, PhTreeD& tree); + void Insert(benchmark::State& state, PhTreeD& tree); + void Remove(benchmark::State& state, PhTreeD& tree); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; std::default_random_engine random_engine_; std::uniform_real_distribution<> cube_distribution_; @@ -66,7 +67,7 @@ template void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - auto* tree = new PhTreeD(); + auto* tree = new PhTreeD(); Insert(state, *tree); state.ResumeTiming(); @@ -91,16 +92,16 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { } template -void IndexBenchmark::Insert(benchmark::State&, PhTreeD& tree) { - for (int i = 0; i < num_entities_; ++i) { +void IndexBenchmark::Insert(benchmark::State&, PhTreeD& tree) { + for (payload_t i = 0; i < num_entities_; ++i) { tree.emplace(points_[i], i); } } template -void IndexBenchmark::Remove(benchmark::State& state, PhTreeD& tree) { - int n = 0; - for (int i = 0; i < num_entities_; ++i) { +void IndexBenchmark::Remove(benchmark::State& state, PhTreeD& tree) { + size_t n = 0; + for (size_t i = 0; i < num_entities_; ++i) { n += tree.erase(points_[i]); } diff --git a/phtree/benchmark/extent_benchmark.cc b/benchmark/extent_benchmark.cc similarity index 96% rename from phtree/benchmark/extent_benchmark.cc rename to benchmark/extent_benchmark.cc index 760a5749..85dba744 100644 --- a/phtree/benchmark/extent_benchmark.cc +++ b/benchmark/extent_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -42,7 +42,7 @@ class IndexBenchmark { void QueryWorld(benchmark::State& state); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; PhTree tree_; std::default_random_engine random_engine_; @@ -73,8 +73,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); diff --git a/phtree/benchmark/extent_benchmark_weird.cc b/benchmark/extent_benchmark_weird.cc similarity index 98% rename from phtree/benchmark/extent_benchmark_weird.cc rename to benchmark/extent_benchmark_weird.cc index bee6ecb0..7653bece 100644 --- a/phtree/benchmark/extent_benchmark_weird.cc +++ b/benchmark/extent_benchmark_weird.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -48,7 +48,7 @@ class IndexBenchmark { void QueryWorld(benchmark::State& state); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; PhTree tree_; std::default_random_engine random_engine_; @@ -81,8 +81,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); diff --git a/phtree/benchmark/find_benchmark.cc b/benchmark/find_benchmark.cc similarity index 97% rename from phtree/benchmark/find_benchmark.cc rename to benchmark/find_benchmark.cc index 0621dd7b..d02fa6a0 100644 --- a/phtree/benchmark/find_benchmark.cc +++ b/benchmark/find_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -49,7 +49,7 @@ class IndexBenchmark { int QueryWorldFind(benchmark::State& state); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; const QueryType query_type_; PhTree tree_; @@ -102,8 +102,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); diff --git a/benchmark/hd_erase_d_benchmark.cc b/benchmark/hd_erase_d_benchmark.cc new file mode 100644 index 00000000..f2650c12 --- /dev/null +++ b/benchmark/hd_erase_d_benchmark.cc @@ -0,0 +1,146 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; +using payload_t = std::uint32_t; + +/* + * Benchmark for removing entries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state); + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void Insert(benchmark::State& state, PhTreeD& tree); + void Remove(benchmark::State& state, PhTreeD& tree); + + const TestGenerator data_type_; + const size_t num_entities_; + + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new PhTreeD(); + Insert(state, *tree); + state.ResumeTiming(); + + Remove(state, *tree); + + state.PauseTiming(); + // avoid measuring deallocation + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + + state.counters["total_remove_count"] = benchmark::Counter(0); + state.counters["remove_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + + logging::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State&, PhTreeD& tree) { + for (size_t i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i], (int)i); + } +} + +template +void IndexBenchmark::Remove(benchmark::State& state, PhTreeD& tree) { + size_t n = 0; + for (size_t i = 0; i < num_entities_; ++i) { + n += tree.erase(points_[i]); + } + + state.counters["total_remove_count"] += n; + state.counters["remove_rate"] += n; +} + +} // namespace + +template +void PhTree6D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree10D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree20D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20> benchmark{state}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities +BENCHMARK_CAPTURE(PhTree6D, ERASE, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D, ERASE, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D, ERASE, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/benchmark/hd_insert_d_benchmark.cc b/benchmark/hd_insert_d_benchmark.cc new file mode 100644 index 00000000..b2f8d9c7 --- /dev/null +++ b/benchmark/hd_insert_d_benchmark.cc @@ -0,0 +1,132 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; + +/* + * Benchmark for adding entries to the index. + */ +template +class IndexBenchmark { + using Index = PhTreeD; + + public: + explicit IndexBenchmark(benchmark::State& state); + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void Insert(benchmark::State& state, Index& tree); + + const TestGenerator data_type_; + const size_t num_entities_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new Index(); + state.ResumeTiming(); + + Insert(state, *tree); + + // we do this top avoid measuring deallocation + state.PauseTiming(); + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + + state.counters["total_put_count"] = benchmark::Counter(0); + state.counters["put_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + + logging::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State& state, Index& tree) { + for (size_t i = 0; i < num_entities_; ++i) { + PhPointD& p = points_[i]; + tree.emplace(p, (int)i); + } + + state.counters["total_put_count"] += num_entities_; + state.counters["put_rate"] += num_entities_; +} + +} // namespace + +template +void PhTree6D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree10D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree20D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20> benchmark{state}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities +BENCHMARK_CAPTURE(PhTree6D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/benchmark/hd_knn_d_benchmark.cc b/benchmark/hd_knn_d_benchmark.cc new file mode 100644 index 00000000..44ecad2a --- /dev/null +++ b/benchmark/hd_knn_d_benchmark.cc @@ -0,0 +1,152 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; +using payload_t = std::uint32_t; + +/* + * Benchmark for k-nearest-neighbour queries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state, PhPointD& center); + void CreateQuery(PhPointD& center); + + const TestGenerator data_type_; + const size_t num_entities_; + const size_t knn_result_size_; + + PhTreeD tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state) +: data_type_{static_cast(state.range(2))} +, num_entities_(state.range(0)) +, knn_result_size_(state.range(1)) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + PhPointD center; + CreateQuery(center); + state.ResumeTiming(); + + QueryWorld(state, center); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); + } + + state.counters["total_query_count"] = benchmark::Counter(0); + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + + logging::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, PhPointD& center) { + size_t n = 0; + for (auto q = tree_.begin_knn_query(knn_result_size_, center, DistanceEuclidean()); + q != tree_.end(); + ++q) { + ++n; + } + + state.counters["total_query_count"] += 1; + state.counters["query_rate"] += 1; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(PhPointD& center) { + for (dimension_t d = 0; d < DIM; ++d) { + center[d] = cube_distribution_(random_engine_) * GLOBAL_MAX; + } +} + +} // namespace + +template +void PhTree6D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree10D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree20D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20> benchmark{state}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, query_result_size +BENCHMARK_CAPTURE(PhTree6D, KNN, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {1, 10}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D, KNN, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {1, 10}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D, KNN, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {1, 10}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/benchmark/hd_query_d_benchmark.cc b/benchmark/hd_query_d_benchmark.cc new file mode 100644 index 00000000..ac2ac82a --- /dev/null +++ b/benchmark/hd_query_d_benchmark.cc @@ -0,0 +1,214 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; + +enum QueryType { MIN_MAX_ITER, MIN_MAX_FOR_EACH }; + +template +using BoxType = PhBoxD; + +template +using PointType = PhPointD; + +template +using TreeType = PhTreeD; + +/* + * Benchmark for window queries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, double avg_query_result_size_ = 100); + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state, BoxType& query_box); + void CreateQuery(BoxType& query_box); + + const TestGenerator data_type_; + const size_t num_entities_; + const double avg_query_result_size_; + + constexpr int query_edge_length() { + return (int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); + }; + + TreeType tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, double avg_query_result_size) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, avg_query_result_size_(avg_query_result_size) +, tree_{} +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + BoxType query_box; + CreateQuery(query_box); + state.ResumeTiming(); + + QueryWorld(state, query_box); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); + } + + state.counters["total_result_count"] = benchmark::Counter(0); + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + + logging::info("World setup complete."); +} + +template +struct Counter { + void operator()(PointType, T&) { + ++n_; + } + + size_t n_ = 0; +}; + +template +size_t Count_MMI(TreeType& tree, BoxType& query_box) { + size_t n = 0; + for (auto q = tree.begin_query(query_box); q != tree.end(); ++q) { + ++n; + } + return n; +} + +template +size_t Count_MMFE(TreeType& tree, BoxType& query_box) { + Counter callback; + tree.for_each(query_box, callback); + return callback.n_; +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, BoxType& query_box) { + size_t n = 0; + switch (QUERY_TYPE) { + case MIN_MAX_ITER: + n = Count_MMI(tree_, query_box); + break; + case MIN_MAX_FOR_EACH: + n = Count_MMFE(tree_, query_box); + break; + } + + state.counters["total_result_count"] += n; + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(BoxType& query_box) { + int length = query_edge_length(); + // scale to ensure query lies within boundary + double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; + for (dimension_t d = 0; d < DIM; ++d) { + auto s = cube_distribution_(random_engine_); + s = s * scale; + query_box.min()[d] = s; + query_box.max()[d] = s + length; + } +} + +} // namespace + +template +void PhTree6D_FE(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6, MIN_MAX_FOR_EACH> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree6D_IT(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6, MIN_MAX_ITER> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree10D_IT(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10, MIN_MAX_ITER> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree20D_IT(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20, MIN_MAX_ITER> benchmark{state}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, query_result_size +BENCHMARK_CAPTURE(PhTree6D_FE, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree6D_IT, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D_IT, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D_IT, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/insert_benchmark.cc b/benchmark/insert_benchmark.cc similarity index 94% rename from phtree/benchmark/insert_benchmark.cc rename to benchmark/insert_benchmark.cc index c48e7778..0cdaadfe 100644 --- a/phtree/benchmark/insert_benchmark.cc +++ b/benchmark/insert_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include @@ -52,7 +52,7 @@ class IndexBenchmark { void Insert(benchmark::State& state, PhTree& tree); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; const InsertionType insertion_type_; std::vector> points_; }; @@ -99,20 +99,20 @@ template void IndexBenchmark::Insert(benchmark::State& state, PhTree& tree) { switch (insertion_type_) { case INSERT: { - for (int i = 0; i < num_entities_; ++i) { - tree.insert(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree.insert(points_[i], (int)i); } break; } case EMPLACE: { - for (int i = 0; i < num_entities_; ++i) { - tree.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i], (int)i); } break; } case SQUARE_BR: { - for (int i = 0; i < num_entities_; ++i) { - tree[points_[i]] = i; + for (size_t i = 0; i < num_entities_; ++i) { + tree[points_[i]] = (int)i; } break; } diff --git a/phtree/benchmark/insert_box_d_benchmark.cc b/benchmark/insert_box_d_benchmark.cc similarity index 96% rename from phtree/benchmark/insert_box_d_benchmark.cc rename to benchmark/insert_box_d_benchmark.cc index 817e848d..8b581171 100644 --- a/phtree/benchmark/insert_box_d_benchmark.cc +++ b/benchmark/insert_box_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include @@ -43,7 +43,7 @@ class IndexBenchmark { void Insert(benchmark::State& state, PhTreeBoxD& tree); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; std::vector> boxes_; }; @@ -84,9 +84,9 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { template void IndexBenchmark::Insert(benchmark::State& state, PhTreeBoxD& tree) { - for (int i = 0; i < num_entities_; ++i) { + for (size_t i = 0; i < num_entities_; ++i) { PhBoxD& p = boxes_[i]; - tree.emplace(p, i); + tree.emplace(p, (int)i); } state.counters["total_put_count"] += num_entities_; diff --git a/phtree/benchmark/insert_d_benchmark.cc b/benchmark/insert_d_benchmark.cc similarity index 93% rename from phtree/benchmark/insert_d_benchmark.cc rename to benchmark/insert_d_benchmark.cc index 7ef06a36..871133e7 100644 --- a/phtree/benchmark/insert_d_benchmark.cc +++ b/benchmark/insert_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include @@ -31,6 +31,7 @@ const double GLOBAL_MAX = 10000; */ template class IndexBenchmark { + using Index = PhTreeD; public: IndexBenchmark(benchmark::State& state, TestGenerator data_type, int num_entities); @@ -39,10 +40,10 @@ class IndexBenchmark { private: void SetupWorld(benchmark::State& state); - void Insert(benchmark::State& state, PhTreeD& tree); + void Insert(benchmark::State& state, Index& tree); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; std::vector> points_; }; @@ -58,7 +59,7 @@ template void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - auto* tree = new PhTreeD(); + auto* tree = new Index(); state.ResumeTiming(); Insert(state, *tree); @@ -82,10 +83,10 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { } template -void IndexBenchmark::Insert(benchmark::State& state, PhTreeD& tree) { - for (int i = 0; i < num_entities_; ++i) { +void IndexBenchmark::Insert(benchmark::State& state, Index& tree) { + for (size_t i = 0; i < num_entities_; ++i) { PhPointD& p = points_[i]; - tree.emplace(p, i); + tree.emplace(p, (int)i); } state.counters["total_put_count"] += num_entities_; diff --git a/phtree/benchmark/knn_d_benchmark.cc b/benchmark/knn_d_benchmark.cc similarity index 96% rename from phtree/benchmark/knn_d_benchmark.cc rename to benchmark/knn_d_benchmark.cc index 7c56b852..dcf5abf1 100644 --- a/phtree/benchmark/knn_d_benchmark.cc +++ b/benchmark/knn_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -44,8 +44,8 @@ class IndexBenchmark { void CreateQuery(PhPointD& center); const TestGenerator data_type_; - const int num_entities_; - const double knn_result_size_; + const size_t num_entities_; + const size_t knn_result_size_; PhTreeD tree_; std::default_random_engine random_engine_; @@ -82,8 +82,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); diff --git a/phtree/benchmark/logging.h b/benchmark/logging.h similarity index 75% rename from phtree/benchmark/logging.h rename to benchmark/logging.h index 14b7ae68..64573099 100644 --- a/phtree/benchmark/logging.h +++ b/benchmark/logging.h @@ -22,11 +22,34 @@ constexpr auto kInternalLoggerName = "internal"; // Sets up spdlog for internal and external. If you need to do some logging before doing this // call, use instead CaptureLogMessagesToBufferSink()/SetupLoggingAndFlushBuffer. -void SetupLogging(std::vector sinks, spdlog::level::level_enum log_level); +void SetupLogging(std::vector sinks, spdlog::level::level_enum log_level) { + auto& console_sink = sinks.emplace_back(std::make_shared()); + console_sink->set_level(log_level); + + // Find the minimum log level, in case one of the sinks passed to us has a lower log level. + const auto& sink_with_lowest_log_level = *std::min_element( + sinks.begin(), + sinks.end(), + [](const spdlog::sink_ptr& a, const spdlog::sink_ptr& b) -> bool { + return a->level() < b->level(); + }); + spdlog::level::level_enum min_log_level = + std::min(sink_with_lowest_log_level->level(), log_level); + + // Create the external logger, worker logger and the internal (default) logger from the same log + // sinks. Each logsink can use `GetLoggerTypeFromMessage` to determine which logger a message + // was logged to. + spdlog::set_default_logger( + std::make_shared(kInternalLoggerName, sinks.begin(), sinks.end())); + spdlog::set_level(min_log_level); + spdlog::flush_on(min_log_level); +} // Sets up default logging typically used for tests/benchmarks. Also used for default // initialization if the logging hasn't been initialized before the first logging line. -void SetupDefaultLogging(); +void SetupDefaultLogging() { + SetupLogging({}, spdlog::level::warn); +} template inline void log( diff --git a/phtree/benchmark/query_benchmark.cc b/benchmark/query_benchmark.cc similarity index 93% rename from phtree/benchmark/query_benchmark.cc rename to benchmark/query_benchmark.cc index b0f50f39..0aca154c 100644 --- a/phtree/benchmark/query_benchmark.cc +++ b/benchmark/query_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -49,11 +49,11 @@ class IndexBenchmark { void CreateQuery(PhBox& query); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; const double avg_query_result_size_; constexpr int query_endge_length() { - return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + return (int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); }; PhTree tree_; @@ -94,8 +94,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); @@ -125,8 +125,8 @@ void IndexBenchmark::CreateQuery(PhBox& query_box) { // scale to ensure query lies within boundary double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto s = cube_distribution_(random_engine_); - s = s * scale; + scalar_64_t s = cube_distribution_(random_engine_); + s = (scalar_64_t)(s * scale); query_box.min()[d] = s; query_box.max()[d] = s + length; } diff --git a/phtree/benchmark/query_box_d_benchmark.cc b/benchmark/query_box_d_benchmark.cc similarity index 96% rename from phtree/benchmark/query_box_d_benchmark.cc rename to benchmark/query_box_d_benchmark.cc index ecd736a8..bcf6d90f 100644 --- a/phtree/benchmark/query_box_d_benchmark.cc +++ b/benchmark/query_box_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -61,11 +61,11 @@ class IndexBenchmark { void CreateQuery(BoxType& query_box); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; const double avg_query_result_size_; constexpr int query_endge_length() { - return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + return (int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); }; TreeType tree_; @@ -106,8 +106,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreateBoxData(boxes_, data_type_, num_entities_, 0, GLOBAL_MAX, BOX_LEN); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(boxes_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(boxes_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); @@ -145,7 +145,7 @@ size_t Count_MMFE(TreeType& tree, BoxType& query_box) { template void IndexBenchmark::QueryWorld(benchmark::State& state, BoxType& query_box) { - int n = 0; + size_t n = 0; switch (QUERY_TYPE) { case MIN_MAX_ITER: n = Count_MMI(tree_, query_box); diff --git a/phtree/benchmark/query_d_benchmark.cc b/benchmark/query_d_benchmark.cc similarity index 96% rename from phtree/benchmark/query_d_benchmark.cc rename to benchmark/query_d_benchmark.cc index 57fd2268..31509f62 100644 --- a/phtree/benchmark/query_d_benchmark.cc +++ b/benchmark/query_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include #include @@ -60,11 +60,11 @@ class IndexBenchmark { void CreateQuery(BoxType& query_box); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; const double avg_query_result_size_; constexpr int query_endge_length() { - return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + return (int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); }; TreeType tree_; @@ -106,8 +106,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); @@ -145,7 +145,7 @@ size_t Count_MMFE(TreeType& tree, BoxType& query_box) { template void IndexBenchmark::QueryWorld(benchmark::State& state, BoxType& query_box) { - int n = 0; + size_t n = 0; switch (QUERY_TYPE) { case MIN_MAX_ITER: n = Count_MMI(tree_, query_box); diff --git a/phtree/benchmark/query_mm_box_d_benchmark.cc b/benchmark/query_mm_box_d_benchmark.cc similarity index 96% rename from phtree/benchmark/query_mm_box_d_benchmark.cc rename to benchmark/query_mm_box_d_benchmark.cc index 538e73d9..9f42cccb 100644 --- a/phtree/benchmark/query_mm_box_d_benchmark.cc +++ b/benchmark/query_mm_box_d_benchmark.cc @@ -144,7 +144,7 @@ struct CounterMultiMap { }; template -typename std::enable_if::type CountEntries( +typename std::enable_if::type CountEntries( TestMap& tree, const Query& query) { CounterTreeWithMap counter{query.box_, 0}; tree.for_each(query.box_, counter); @@ -152,7 +152,7 @@ typename std::enable_if::type CountEnt } template -int CountEntries(TestMap& tree, const Query& query) { +size_t CountEntries(TestMap& tree, const Query& query) { CounterMultiMap counter{query.box_, 0}; tree.for_each(query.box_, counter); return counter.n_; @@ -175,7 +175,7 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { template void IndexBenchmark::QueryWorld(benchmark::State& state, const Query& query) { - int n = CountEntries(tree_, query); + size_t n = CountEntries(tree_, query); state.counters["query_rate"] += 1; state.counters["result_rate"] += n; diff --git a/phtree/benchmark/query_mm_d_benchmark.cc b/benchmark/query_mm_d_benchmark.cc similarity index 78% rename from phtree/benchmark/query_mm_d_benchmark.cc rename to benchmark/query_mm_d_benchmark.cc index 9e819450..335a529d 100644 --- a/phtree/benchmark/query_mm_d_benchmark.cc +++ b/benchmark/query_mm_d_benchmark.cc @@ -32,7 +32,7 @@ namespace { const double GLOBAL_MAX = 10000; -enum Scenario { TREE_WITH_MAP, MULTI_MAP }; +enum Scenario { TREE_WITH_MAP, MULTI_MAP, MULTI_MAP_STD }; using TestPoint = PhPointD<3>; using QueryBox = PhBoxD<3>; @@ -52,7 +52,10 @@ template using TestMap = typename std::conditional_t< SCENARIO == TREE_WITH_MAP, PhTreeD>, - PhTreeMultiMapD>>; + typename std::conditional_t< + SCENARIO == MULTI_MAP, + PhTreeMultiMapD, b_plus_tree_hash_set>, + PhTreeMultiMapD, std::unordered_set>>>; template class IndexBenchmark { @@ -120,12 +123,20 @@ void InsertEntry( tree.emplace(point, data); } -bool CheckPosition(const payload_t& entity, const TestPoint& center, double radius) { +template +void InsertEntry( + TestMap& tree, + const PhPointD& point, + const payload_t& data) { + tree.emplace(point, data); +} + +int CheckPosition(const payload_t& entity, const TestPoint& center, double radius) { const auto& point = entity; - double dx = center[0] - point[0]; - double dy = center[1] - point[1]; - double dz = center[2] - point[2]; - return dx * dx + dy * dy + dz * dz <= radius * radius; + bool dx = abs(center[0] - point[0]) <= radius; + bool dy = abs(center[1] - point[1]) <= radius; + bool dz = abs(center[2] - point[2]) <= radius; + return dx && dy && dz ? 1 : -100000000; } struct CounterTreeWithMap { @@ -150,7 +161,7 @@ struct CounterMultiMap { }; template -typename std::enable_if::type CountEntries( +typename std::enable_if::type CountEntries( TestMap& tree, const Query& query) { CounterTreeWithMap counter{query.center, query.radius, 0}; tree.for_each(query.box, counter); @@ -158,7 +169,14 @@ typename std::enable_if::type CountEnt } template -int CountEntries(TestMap& tree, const Query& query) { +size_t CountEntries(TestMap& tree, const Query& query) { + CounterMultiMap counter{query.center, query.radius, 0}; + tree.for_each(query.box, counter); + return counter.n_; +} + +template +size_t CountEntries(TestMap& tree, const Query& query) { CounterMultiMap counter{query.center, query.radius, 0}; tree.for_each(query.box, counter); return counter.n_; @@ -181,7 +199,7 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { template void IndexBenchmark::QueryWorld(benchmark::State& state, const Query& query) { - int n = CountEntries(tree_, query); + size_t n = CountEntries(tree_, query); state.counters["query_rate"] += 1; state.counters["result_rate"] += n; @@ -209,11 +227,17 @@ void PhTree3D(benchmark::State& state, Arguments&&... arguments) { } template -void PhTreeMultiMapM3D(benchmark::State& state, Arguments&&... arguments) { +void PhTreeMultiMap3D(benchmark::State& state, Arguments&&... arguments) { IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; benchmark.Benchmark(state); } +template +void PhTreeMultiMapStd3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MULTI_MAP_STD> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + // index type, scenario name, data_type, num_entities, avg_query_result_size // PhTree BENCHMARK_CAPTURE(PhTree3D, WQ_100, 100.0) @@ -222,7 +246,13 @@ BENCHMARK_CAPTURE(PhTree3D, WQ_100, 100.0) ->Unit(benchmark::kMillisecond); // PhTreeMultiMap -BENCHMARK_CAPTURE(PhTreeMultiMapM3D, WQ_100, 100.0) +BENCHMARK_CAPTURE(PhTreeMultiMap3D, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTreeMultiMap +BENCHMARK_CAPTURE(PhTreeMultiMapStd3D, WQ_100, 100.0) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); diff --git a/benchmark/query_mm_d_filter_benchmark.cc b/benchmark/query_mm_d_filter_benchmark.cc new file mode 100644 index 00000000..4cfbbdf8 --- /dev/null +++ b/benchmark/query_mm_d_filter_benchmark.cc @@ -0,0 +1,350 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include "phtree/phtree_multimap.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +/* + * Benchmark for querying entries in multi-map implementations. + * This benchmarks uses a SPHERE shaped query! + */ +namespace { + +const double GLOBAL_MAX = 10000; + +enum Scenario { SPHERE_WQ, SPHERE, WQ, SPHERE_IT_WQ, LEGACY_WQ }; + +using TestPoint = PhPointD<3>; +using QueryBox = PhBoxD<3>; +using payload_t = TestPoint; +using BucketType = std::set; + +struct Query { + QueryBox box{}; + TestPoint center{}; + double radius{}; +}; + +template +using CONVERTER = ConverterIEEE; + +template +using DistanceFn = DistanceEuclidean; + +template +using TestMap = PhTreeMultiMapD>; + +template < + typename CONVERTER = ConverterIEEE<3>, + typename DISTANCE = DistanceEuclidean> +class FilterSphereLegacy { + using KeyExternal = typename CONVERTER::KeyExternal; + using KeyInternal = typename CONVERTER::KeyInternal; + using ScalarInternal = typename CONVERTER::ScalarInternal; + using ScalarExternal = typename CONVERTER::ScalarExternal; + + static constexpr auto DIM = CONVERTER::DimInternal; + + public: + FilterSphereLegacy( + const KeyExternal& center, + const ScalarExternal& radius, + CONVERTER converter = CONVERTER(), + DISTANCE distance_function = DISTANCE()) + : center_external_{center} + , center_internal_{converter.pre(center)} + , radius_{radius} + , converter_{converter} + , distance_function_{distance_function} {}; + + template + [[nodiscard]] bool IsEntryValid(const KeyInternal&, const BucketT&) const { + // We simulate a legacy filter by returning 'true' for all buckets + return true; + } + + template + [[nodiscard]] bool IsBucketEntryValid(const KeyInternal& key, const T&) const { + KeyExternal point = converter_.post(key); + return distance_function_(center_external_, point) <= radius_; + } + + /* + * Calculate whether AABB encompassing all possible points in the node intersects with the + * sphere. + */ + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { + // we always want to traverse the root node (bits_to_ignore == 64) + + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + return true; + } + + ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; + ScalarInternal node_max_bits = ~node_min_bits; + + KeyInternal closest_in_bounds; + for (dimension_t i = 0; i < DIM; ++i) { + // calculate lower and upper bound for dimension for given node + ScalarInternal lo = prefix[i] & node_min_bits; + ScalarInternal hi = prefix[i] | node_max_bits; + + // choose value closest to center for dimension + closest_in_bounds[i] = std::clamp(center_internal_[i], lo, hi); + } + + KeyExternal closest_point = converter_.post(closest_in_bounds); + return distance_function_(center_external_, closest_point) <= radius_; + } + + private: + const KeyExternal center_external_; + const KeyInternal center_internal_; + const ScalarExternal radius_; + const CONVERTER converter_; + const DISTANCE distance_function_; +}; + +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, double avg_query_result_size_); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state, const Query& query); + void CreateQuery(Query& query); + + const TestGenerator data_type_; + const size_t num_entities_; + const double avg_query_result_size_; + + constexpr double query_endge_length() { + return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + }; + + TestMap tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state, double avg_query_result_size) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, avg_query_result_size_(avg_query_result_size) +, tree_{} +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(num_entities_) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + Query query{}; + for (auto _ : state) { + state.PauseTiming(); + CreateQuery(query); + state.ResumeTiming(); + + QueryWorld(state, query); + } +} + +template +void InsertEntry(TestMap& tree, const PhPointD& point, const payload_t& data) { + tree.emplace(point, data); +} + +bool CheckPosition(const payload_t& entity, const TestPoint& center, double radius) { + const auto& point = entity; + double dx = center[0] - point[0]; + double dy = center[1] - point[1]; + double dz = center[2] - point[2]; + return dx * dx + dy * dy + dz * dz <= radius * radius; +} + +struct CounterCheckPosition { + template + void operator()(const PhPointD<3>& p, const T&) { + n_ += CheckPosition(p, center_, radius_); + } + const TestPoint& center_; + double radius_; + size_t n_; +}; + +struct Counter { + void operator()(const PhPointD<3>&, const payload_t&) { + ++n_; + } + size_t n_; +}; + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + FilterMultiMapSphere filter{query.center, query.radius, tree.converter(), DistanceFn()}; + Counter counter{0}; + tree.for_each(query.box, counter, filter); + return counter.n_; +} + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + FilterMultiMapSphere filter{query.center, query.radius, tree.converter(), DistanceFn()}; + Counter counter{0}; + tree.for_each(counter, filter); + return counter.n_; +} + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + CounterCheckPosition counter{query.center, query.radius, 0}; + tree.for_each(query.box, counter); + return counter.n_; +} + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + FilterMultiMapSphere filter{query.center, query.radius, tree.converter(), DistanceFn()}; + Counter counter{0}; + for (auto it = tree.begin_query(query.box, filter); it != tree.end(); ++it) { + ++counter.n_; + } + return counter.n_; +} + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + // Legacy: use non-multi-map filter + FilterSphereLegacy filter{query.center, query.radius, tree.converter(), DistanceFn()}; + Counter counter{0}; + tree.for_each(query.box, counter, filter); + return counter.n_; +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + // create data with about 10% duplicate coordinates + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX, 0.8); + for (size_t i = 0; i < num_entities_; ++i) { + InsertEntry(tree_, points_[i], points_[i]); + } + + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + logging::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, const Query& query) { + size_t n = CountEntries(tree_, query); + + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(Query& query) { + double radius = query_endge_length() * 0.5; + for (dimension_t d = 0; d < DIM; ++d) { + auto x = cube_distribution_(random_engine_); + query.box.min()[d] = x - radius; + query.box.max()[d] = x + radius; + query.center[d] = x; + } + query.radius = radius; +} + +} // namespace + +template +void PhTree3DSphereWQ(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::SPHERE_WQ> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3DSphere(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::SPHERE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3DWQ(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::WQ> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3DSphereITWQ(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::SPHERE_IT_WQ> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3DLegacyWQ(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::LEGACY_WQ> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, avg_query_result_size +BENCHMARK_CAPTURE(PhTree3DSphereWQ, _100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3DSphere, _100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3DWQ, _100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3DSphereITWQ, _100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3DLegacyWQ, _100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/update_box_d_benchmark.cc b/benchmark/update_box_d_benchmark.cc similarity index 62% rename from phtree/benchmark/update_box_d_benchmark.cc rename to benchmark/update_box_d_benchmark.cc index ab825e26..918bbc4b 100644 --- a/phtree/benchmark/update_box_d_benchmark.cc +++ b/benchmark/update_box_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include @@ -24,12 +24,14 @@ using namespace improbable::phtree::phbenchmark; namespace { -constexpr int UPDATES_PER_ROUND = 1000; +constexpr size_t UPDATES_PER_ROUND = 1000; constexpr double MOVE_DISTANCE = 10; const double GLOBAL_MAX = 10000; const double BOX_LEN = 10; +enum UpdateType { RELOCATE, ERASE_BY_KEY }; + template using BoxType = PhBoxD; @@ -46,14 +48,12 @@ struct UpdateOp { /* * Benchmark for updating the position of entries. */ -template +template class IndexBenchmark { public: IndexBenchmark( benchmark::State& state, - TestGenerator data_type, - int num_entities, - int updates_per_round = UPDATES_PER_ROUND, + size_t updates_per_round = UPDATES_PER_ROUND, double move_distance = MOVE_DISTANCE); void Benchmark(benchmark::State& state); @@ -75,27 +75,23 @@ class IndexBenchmark { std::uniform_int_distribution<> entity_id_distribution_; }; -template -IndexBenchmark::IndexBenchmark( - benchmark::State& state, - TestGenerator data_type, - int num_entities, - int updates_per_round, - double move_distance) -: data_type_{data_type} -, num_entities_(num_entities) +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, size_t updates_per_round, double move_distance) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) , updates_per_round_(updates_per_round) , move_distance_(move_distance) -, boxes_(num_entities) +, boxes_(num_entities_) , updates_(updates_per_round) , random_engine_{0} -, entity_id_distribution_{0, num_entities - 1} { +, entity_id_distribution_{0, static_cast(num_entities_ - 1)} { logging::SetupDefaultLogging(); SetupWorld(state); } -template -void IndexBenchmark::Benchmark(benchmark::State& state) { +template +void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); BuildUpdates(); @@ -105,12 +101,12 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { } } -template -void IndexBenchmark::SetupWorld(benchmark::State& state) { +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreateBoxData(boxes_, data_type_, num_entities_, 0, GLOBAL_MAX, BOX_LEN); for (size_t i = 0; i < num_entities_; ++i) { - tree_.emplace(boxes_[i], i); + tree_.emplace(boxes_[i], (int)i); } state.counters["total_upd_count"] = benchmark::Counter(0); @@ -118,8 +114,8 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("World setup complete."); } -template -void IndexBenchmark::BuildUpdates() { +template +void IndexBenchmark::BuildUpdates() { for (auto& update : updates_) { int box_id = entity_id_distribution_(random_engine_); update.id_ = box_id; @@ -134,14 +130,37 @@ void IndexBenchmark::BuildUpdates() { } template -void IndexBenchmark::UpdateWorld(benchmark::State& state) { - size_t initial_tree_size = tree_.size(); +size_t UpdateByRelocate(TreeType& tree, std::vector>& updates) { size_t n = 0; - for (auto& update : updates_) { - size_t result_erase = tree_.erase(update.old_); - auto result_emplace = tree_.emplace(update.new_, update.id_); + for (auto& update : updates) { + n += tree.relocate(update.old_, update.new_); + } + return n; +} + +template +size_t UpdateByKey(TreeType& tree, std::vector>& updates) { + size_t n = 0; + for (auto& update : updates) { + size_t result_erase = tree.erase(update.old_); + auto result_emplace = tree.emplace(update.new_, update.id_); n += result_erase == 1 && result_emplace.second; } + return n; +} + +template +void IndexBenchmark::UpdateWorld(benchmark::State& state) { + size_t initial_tree_size = tree_.size(); + size_t n = 0; + switch (UPDATE_TYPE) { + case UpdateType::ERASE_BY_KEY: + n = UpdateByKey(tree_, updates_); + break; + case UpdateType::RELOCATE: + n = UpdateByRelocate(tree_, updates_); + break; + } if (n != updates_.size()) { logging::error("Invalid update count: {}/{}", updates_.size(), n); @@ -159,37 +178,29 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { } // namespace +template +void PhTreeRelocate3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, UpdateType::RELOCATE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + template void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3> benchmark{state, arguments...}; + IndexBenchmark<3, UpdateType::ERASE_BY_KEY> benchmark{state, arguments...}; benchmark.Benchmark(state); } // index type, scenario name, data_type, num_entities, updates_per_round, move_distance -// PhTree3D CUBE -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000) - ->Unit(benchmark::kMillisecond); - -// PhTree3D CLUSTER -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000) +// PhTree with relocate() +BENCHMARK_CAPTURE(PhTreeRelocate3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) +// PhTree with relocate() +BENCHMARK_CAPTURE(PhTree3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/phtree/benchmark/update_d_benchmark.cc b/benchmark/update_d_benchmark.cc similarity index 66% rename from phtree/benchmark/update_d_benchmark.cc rename to benchmark/update_d_benchmark.cc index f358c564..c790c6a9 100644 --- a/phtree/benchmark/update_d_benchmark.cc +++ b/benchmark/update_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "benchmark_util.h" #include "logging.h" -#include "phtree/benchmark/benchmark_util.h" #include "phtree/phtree.h" #include @@ -29,7 +29,7 @@ std::vector MOVE_DISTANCE = {0, 1.0, 10}; const double GLOBAL_MAX = 10000; -enum UpdateType { ERASE_BY_KEY, ERASE_BY_ITER, EMPLACE_HINT }; +enum UpdateType { RELOCATE, ERASE_BY_KEY, ERASE_BY_ITER, EMPLACE_HINT }; template using PointType = PhPointD; @@ -52,9 +52,7 @@ class IndexBenchmark { public: IndexBenchmark( benchmark::State& state, - TestGenerator data_type, - int num_entities, - int updates_per_round = UPDATES_PER_ROUND, + size_t updates_per_round = UPDATES_PER_ROUND, std::vector move_distance = MOVE_DISTANCE); void Benchmark(benchmark::State& state); @@ -78,19 +76,15 @@ class IndexBenchmark { template IndexBenchmark::IndexBenchmark( - benchmark::State& state, - TestGenerator data_type, - int num_entities, - int updates_per_round, - std::vector move_distance) -: data_type_{data_type} -, num_entities_(num_entities) + benchmark::State& state, size_t updates_per_round, std::vector move_distance) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) , updates_per_round_(updates_per_round) , move_distance_(std::move(move_distance)) -, points_(num_entities) +, points_(num_entities_) , updates_(updates_per_round) , random_engine_{0} -, entity_id_distribution_{0, num_entities - 1} { +, entity_id_distribution_{0, static_cast(num_entities_ - 1)} { logging::SetupDefaultLogging(); SetupWorld(state); } @@ -136,6 +130,15 @@ void IndexBenchmark::BuildUpdates() { } } +template +size_t UpdateByRelocate(TreeType& tree, std::vector>& updates) { + size_t n = 0; + for (auto& update : updates) { + n += tree.relocate(update.old_, update.new_); + } + return n; +} + template size_t UpdateByKey(TreeType& tree, std::vector>& updates) { size_t n = 0; @@ -190,6 +193,9 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { case UpdateType::EMPLACE_HINT: n = UpdateByIterHint(tree_, updates_); break; + case UpdateType::RELOCATE: + n = UpdateByRelocate(tree_, updates_); + break; } if (n != updates_.size()) { @@ -208,6 +214,12 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { } // namespace +template +void PhTreeRelocate3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, UpdateType::RELOCATE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + template void PhTreeEraseKey3D(benchmark::State& state, Arguments&&... arguments) { IndexBenchmark<3, UpdateType::ERASE_BY_KEY> benchmark{state, arguments...}; @@ -227,83 +239,28 @@ void PhTreeEmplaceHint3D(benchmark::State& state, Arguments&&... arguments) { } // index type, scenario name, data_type, num_entities, updates_per_round, move_distance -// PhTree3D CUBE -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000) +// PhTree with relocate() +BENCHMARK_CAPTURE(PhTreeRelocate3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000) - ->Unit(benchmark::kMillisecond); - -// PhTree3D CLUSTER -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) - ->Unit(benchmark::kMillisecond); - -// index type, scenario name, data_type, num_entities, updates_per_round, move_distance -// PhTree3D CUBE -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000) +// PhTree with erase()/emplace +BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000) +// PhTree with erase(iter) +BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000) +// PhTree with emplace_hint() +BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000) - ->Unit(benchmark::kMillisecond); - -// PhTree3D CLUSTER -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) - ->Unit(benchmark::kMillisecond); - -// index type, scenario name, data_type, num_entities, updates_per_round, move_distance -// PhTree3D CUBE -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000) - ->Unit(benchmark::kMillisecond); - -// PhTree3D CLUSTER -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) - ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/phtree/benchmark/update_mm_box_d_benchmark.cc b/benchmark/update_mm_box_d_benchmark.cc similarity index 80% rename from phtree/benchmark/update_mm_box_d_benchmark.cc rename to benchmark/update_mm_box_d_benchmark.cc index 13f58b5e..271637ba 100644 --- a/phtree/benchmark/update_mm_box_d_benchmark.cc +++ b/benchmark/update_mm_box_d_benchmark.cc @@ -35,7 +35,7 @@ std::vector MOVE_DISTANCE = {0, 1.0, 10}; const double GLOBAL_MAX = 10000; const double BOX_LEN = 100; -enum Scenario { TREE_WITH_MAP, MULTI_MAP }; +enum Scenario { ERASE_EMPLACE, MM_BPT_RELOCATE, MM_SET_RELOCATE }; using payload_t = scalar_64_t; @@ -46,9 +46,16 @@ using CONVERTER = ConverterBoxIEEE; template using TestMap = typename std::conditional_t< - SCENARIO == TREE_WITH_MAP, + SCENARIO == ERASE_EMPLACE, PhTreeBoxD>, - PhTreeMultiMapBoxD>>; + typename std::conditional_t< + SCENARIO == MM_BPT_RELOCATE, + PhTreeMultiMapBoxD< + DIM, + payload_t, + CONVERTER, + b_plus_tree_hash_set>, + PhTreeMultiMapBoxD, std::set>>>; template struct UpdateOp { @@ -112,19 +119,25 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { template void InsertEntry( - TestMap& tree, const PhBoxD& point, payload_t data) { + TestMap& tree, const PhBoxD& point, payload_t data) { BucketType& bucket = tree.emplace(point).first; bucket.emplace(data); } template void InsertEntry( - TestMap& tree, const PhBoxD& point, payload_t data) { + TestMap& tree, const PhBoxD& point, payload_t data) { + tree.emplace(point, data); +} + +template +void InsertEntry( + TestMap& tree, const PhBoxD& point, payload_t data) { tree.emplace(point, data); } template -typename std::enable_if::type UpdateEntry( +typename std::enable_if::type UpdateEntry( TestMap& tree, std::vector>& updates) { size_t n = 0; for (auto& update : updates) { @@ -151,7 +164,7 @@ typename std::enable_if::type Updat } template -typename std::enable_if::type UpdateEntry( +typename std::enable_if::type UpdateEntry( TestMap& tree, std::vector>& updates) { size_t n = 0; for (auto& update : updates) { @@ -202,7 +215,7 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { logging::error("Invalid update count: {}/{}", updates_.size(), n); } - if constexpr (SCENARIO == MULTI_MAP) { + if constexpr (SCENARIO == MM_BPT_RELOCATE) { (void)initial_tree_size; if (tree_.size() != num_entities_) { logging::error("Invalid index size after update: {}/{}", tree_.size(), num_entities_); @@ -222,26 +235,38 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { } // namespace template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; +void PhTreeBox3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::ERASE_EMPLACE> benchmark{state, arguments...}; benchmark.Benchmark(state); } template -void PhTreeMultiMap3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; +void PhTreeMultiMapBox3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MM_BPT_RELOCATE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeMultiMapStdBox3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MM_SET_RELOCATE> benchmark{state, arguments...}; benchmark.Benchmark(state); } // index type, scenario name, data_type, num_entities, updates_per_round, move_distance // PhTree -BENCHMARK_CAPTURE(PhTree3D, UPDATE_1000, UPDATES_PER_ROUND) +BENCHMARK_CAPTURE(PhTreeBox3D, UPDATE_1000, UPDATES_PER_ROUND) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); // PhTreeMultiMap -BENCHMARK_CAPTURE(PhTreeMultiMap3D, UPDATE_1000, UPDATES_PER_ROUND) +BENCHMARK_CAPTURE(PhTreeMultiMapBox3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTreeMultiMap with std::map +BENCHMARK_CAPTURE(PhTreeMultiMapStdBox3D, UPDATE_1000, UPDATES_PER_ROUND) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); diff --git a/phtree/benchmark/update_mm_d_benchmark.cc b/benchmark/update_mm_d_benchmark.cc similarity index 71% rename from phtree/benchmark/update_mm_d_benchmark.cc rename to benchmark/update_mm_d_benchmark.cc index f3149403..6c5cfa57 100644 --- a/phtree/benchmark/update_mm_d_benchmark.cc +++ b/benchmark/update_mm_d_benchmark.cc @@ -34,7 +34,7 @@ std::vector MOVE_DISTANCE = {0, 1.0, 10}; const double GLOBAL_MAX = 10000; -enum Scenario { TREE_WITH_MAP, MULTI_MAP }; +enum Scenario { ERASE_EMPLACE, MM_BPT_RELOCATE, MM_SET_RELOCATE, MM_SET_RELOCATE_IF }; using payload_t = scalar_64_t; @@ -48,9 +48,12 @@ using CONVERTER = ConverterIEEE; template using TestMap = typename std::conditional_t< - SCENARIO == TREE_WITH_MAP, + SCENARIO == ERASE_EMPLACE, PhTreeD>, - PhTreeMultiMapD>>; + typename std::conditional_t< + SCENARIO == MM_BPT_RELOCATE, + PhTreeMultiMapD, b_plus_tree_hash_set>, + PhTreeMultiMapD, std::set>>>; template struct UpdateOp { @@ -114,19 +117,25 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { template void InsertEntry( - TestMap& tree, const PointType& point, payload_t data) { + TestMap& tree, const PointType& point, payload_t data) { BucketType& bucket = tree.emplace(point).first; bucket.emplace(data); } template void InsertEntry( - TestMap& tree, const PointType& point, payload_t data) { + TestMap& tree, const PointType& point, payload_t data) { + tree.emplace(point, data); +} + +template +void InsertEntry( + TestMap& tree, const PointType& point, payload_t data) { tree.emplace(point, data); } template -typename std::enable_if::type UpdateEntry( +typename std::enable_if::type UpdateEntry( TestMap& tree, std::vector>& updates) { size_t n = 0; for (auto& update : updates) { @@ -138,8 +147,6 @@ typename std::enable_if::type Updat continue; } - // TODO implement erase_hint or find_hint or something? - // Entry is already inserted, now remove old entry. auto iter_old_bucket = tree.find(update.old_); assert(iter_old_bucket != tree.end()); @@ -153,8 +160,10 @@ typename std::enable_if::type Updat } template -typename std::enable_if::type UpdateEntry( - TestMap& tree, std::vector>& updates) { +typename std::enable_if< + SCENARIO == Scenario::MM_BPT_RELOCATE || SCENARIO == Scenario::MM_SET_RELOCATE, + size_t>::type +UpdateEntry(TestMap& tree, std::vector>& updates) { size_t n = 0; for (auto& update : updates) { n += tree.relocate(update.old_, update.new_, update.id_); @@ -162,6 +171,17 @@ typename std::enable_if::type UpdateEnt return n; } +template +typename std::enable_if::type UpdateEntry( + TestMap& tree, std::vector>& updates) { + size_t n = 0; + for (auto& update : updates) { + n += tree.relocate_if( + update.old_, update.new_, [&update](const payload_t& v) { return v == update.id_; }); + } + return n; +} + template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); @@ -215,26 +235,50 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { } // namespace template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; +void PhTreeMMRelocateIfStdSet3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MM_SET_RELOCATE_IF> benchmark{state, arguments...}; benchmark.Benchmark(state); } template -void PhTreeMultiMap3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; +void PhTreeMMRelocateBpt3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MM_BPT_RELOCATE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeMMRelocateStdSet3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MM_SET_RELOCATE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeMMEraseEmplace3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::ERASE_EMPLACE> benchmark{state, arguments...}; benchmark.Benchmark(state); } // index type, scenario name, data_type, num_entities, updates_per_round, move_distance -// PhTree -BENCHMARK_CAPTURE(PhTree3D, UPDATE_1000, UPDATES_PER_ROUND) +// PhTreeMultiMap +BENCHMARK_CAPTURE(PhTreeMMRelocateIfStdSet3D, UPDATE_1000, UPDATES_PER_ROUND) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -// PhTreeMultiMap -BENCHMARK_CAPTURE(PhTreeMultiMap3D, UPDATE_1000, UPDATES_PER_ROUND) +// PhTreeMultiMap with b_plus_tree_hash_map +BENCHMARK_CAPTURE(PhTreeMMRelocateBpt3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTreeMultiMap with std::set +BENCHMARK_CAPTURE(PhTreeMMRelocateStdSet3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTree (manual bucket handling) +BENCHMARK_CAPTURE(PhTreeMMEraseEmplace3D, UPDATE_1000, UPDATES_PER_ROUND) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); diff --git a/ci/includes/bazel.sh b/ci/includes/bazel.sh deleted file mode 100755 index 79a70e5d..00000000 --- a/ci/includes/bazel.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env bash - -source ci/includes/os.sh - -# Main function that should be used by scripts sourcing this file. -function runBazel() { - BAZEL_SUBCOMMAND="$1" - shift - "$(pwd)/tools/bazel" "$BAZEL_SUBCOMMAND" ${BAZEL_CI_CONFIG:-} "$@" -} - -function getBazelVersion() { - echo "4.2.2" -} diff --git a/ci/linting/buildifier.sh b/ci/linting/buildifier.sh index 1be7b1c3..1344e2e3 100755 --- a/ci/linting/buildifier.sh +++ b/ci/linting/buildifier.sh @@ -4,7 +4,6 @@ set -x -e -u -o pipefail cd "$(dirname "$0")/../../" -source ci/includes/bazel.sh source ci/includes/os.sh MAYBEARG='-mode=check' @@ -16,9 +15,9 @@ if [ $# -eq 1 ]; then fi # Ensure Bazel is installed. -runBazel version +bazel version -if runBazel run buildifier -- ${MAYBEARG} -v $(find "$(pwd)/" \( -name BUILD -o -name WORKSPACE \) -type f); then +if bazel run buildifier -- ${MAYBEARG} -v $(find "$(pwd)/" \( -name BUILD -o -name WORKSPACE \) -type f); then echo -e "\033[0;32mAll BUILD and WORKSPACE files passed buildifier linting check.\033[0m" else echo -e "\033[0;31mThe above listed BUILD and WORKSPACE file(s) didn't pass the buildifier linting check!\033[0m" diff --git a/ci/linting/clang-format.sh b/ci/linting/clang-format.sh index cebf4a22..551151be 100755 --- a/ci/linting/clang-format.sh +++ b/ci/linting/clang-format.sh @@ -3,7 +3,6 @@ set -e -u -o pipefail source ci/includes/os.sh -source ci/includes/bazel.sh TARGETS="//..." EXCLUDED_TARGETS="" @@ -66,22 +65,22 @@ function generateAqueryTargetString() { function bazelLintTest() { # Use bazel to create patch files for all eligible source files. # Fail if any of the patch files are non-empty (i.e. lint was detected). - CLANG_FORMAT="$(clangFormatLocation)" runBazel build --config lint --output_groups=clang_format_test -- $(generateBuildTargetString) + CLANG_FORMAT="$(clangFormatLocation)" bazel build --config lint --output_groups=clang_format_test -- $(generateBuildTargetString) } function bazelLintFix() { # Use bazel to create patch files for all eligible source files. - CLANG_FORMAT="$(clangFormatLocation)" runBazel build --config lint --output_groups=clang_format_patches_only -- $(generateBuildTargetString) + CLANG_FORMAT="$(clangFormatLocation)" bazel build --config lint --output_groups=clang_format_patches_only -- $(generateBuildTargetString) # Find bazel-bin prefix. - BAZEL_BIN=$(runBazel info bazel-bin) + BAZEL_BIN=$(bazel info bazel-bin) # I.e. on Linux, this is `bazel-out/k8-gcc-opt/bin`. - PREFIX=${BAZEL_BIN#$(runBazel info execution_root)/} + PREFIX=${BAZEL_BIN#$(bazel info execution_root)/} # Use aquery to get the list of output files of the `CreatePatch` action, # Then strip the patch path down to that of its source file, and apply # the patch file generated by Bazel to the original source file. - CLANG_FORMAT="$(clangFormatLocation)" runBazel aquery --config lint --include_aspects --output_groups clang_format_patches_only "mnemonic(\"CreatePatch\", $(generateAqueryTargetString))" --output textproto \ + CLANG_FORMAT="$(clangFormatLocation)" bazel aquery --config lint --include_aspects --output_groups clang_format_patches_only "mnemonic(\"CreatePatch\", $(generateAqueryTargetString))" --output textproto \ `# Get relative paths to source files` \ `# perl used instead of grep --perl-regexp since grep macOS doesnt support it` \ | perl -ne "while(/(?<=exec_path: \"${PREFIX//\//\\/}\/).*\.patch_.+(?=\")/g){print \"\$&\n\";}" \ diff --git a/cmake/phtreeConfig.cmake.in b/cmake/phtreeConfig.cmake.in new file mode 100644 index 00000000..9c15f36a --- /dev/null +++ b/cmake/phtreeConfig.cmake.in @@ -0,0 +1,4 @@ +@PACKAGE_INIT@ + +include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake") +check_required_components("@PROJECT_NAME@") diff --git a/examples/BUILD b/examples/BUILD index 56f61fe1..376c48d4 100644 --- a/examples/BUILD +++ b/examples/BUILD @@ -7,6 +7,6 @@ cc_binary( "//visibility:public", ], deps = [ - "//phtree", + "//:phtree", ], ) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 370887f6..ce8a6792 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,6 +1,5 @@ cmake_minimum_required(VERSION 3.14) -project(Example) +project(phtree-examples) -set(SOURCE_FILES example.cc) -add_executable(Example ${SOURCE_FILES}) +add_executable(Example example.cc) target_link_libraries(Example phtree) diff --git a/examples/example.cc b/examples/example.cc index b0ceb5e9..aecbb049 100644 --- a/examples/example.cc +++ b/examples/example.cc @@ -14,11 +14,48 @@ * limitations under the License. */ -#include "../phtree/phtree.h" +#include "phtree/phtree.h" +#include "phtree/phtree_multimap.h" +#include #include +#include using namespace improbable::phtree; +int relocate_example() { + //auto tree = PhTreeMultiMapD<2, int, ConverterIEEE<2>, std::unordered_set>(); + auto tree = PhTreeMultiMapD<2, int, ConverterMultiply<2, 1, 200>, std::unordered_set>(); + std::vector> vecPos; + int dim = 1000; + + int num = 30000; + for (int i = 0; i < num; ++i) { + PhPointD<2> p = {(double)(rand() % dim), (double)(rand() % dim)}; + vecPos.push_back(p); + tree.emplace(p, i); + } + + long T = 0; + int nT = 0; + while (true) { + auto t1 = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < num; ++i) { + PhPointD<2>& p = vecPos[i]; + PhPointD<2> newp = {p[0] + 1, p[1] + 1}; + tree.relocate(p, newp, i, false); + p = newp; + } + auto t2 = std::chrono::high_resolution_clock::now(); + auto s = std::chrono::duration_cast(t2 - t1); + ++nT; + T += (long)s.count() / 1000; + std::cout << s.count() << " " << (T / nT) + << " msec/num= " << (s.count() / (double)num) << std::endl; + } + + return 0; +} + int main() { std::cout << "PH-Tree example with 3D `double` coordinates." << std::endl; PhPointD<3> p1({1, 1, 1}); @@ -55,4 +92,8 @@ int main() { std::cout << "ID at " << p4b << ": " << tree.find(p4b).second() << std::endl; std::cout << "Done." << std::endl; -} \ No newline at end of file + + //relocate_example(); + + return 0; +} diff --git a/include/phtree/common/BUILD b/include/phtree/common/BUILD new file mode 100644 index 00000000..890816af --- /dev/null +++ b/include/phtree/common/BUILD @@ -0,0 +1,21 @@ +package(default_visibility = ["//visibility:private"]) + +cc_library( + name = "common", + hdrs = [ + "b_plus_tree_hash_map.h", + "b_plus_tree_map.h", + "base_types.h", + "bits.h", + "common.h", + "debug_helper.h", + "flat_array_map.h", + "flat_sparse_map.h", + "tree_stats.h", + ], + visibility = [ + "//visibility:public", + ], + deps = [ + ], +) diff --git a/phtree/common/README.md b/include/phtree/common/README.md similarity index 100% rename from phtree/common/README.md rename to include/phtree/common/README.md diff --git a/include/phtree/common/b_plus_tree_hash_map.h b/include/phtree/common/b_plus_tree_hash_map.h new file mode 100644 index 00000000..f3ea6028 --- /dev/null +++ b/include/phtree/common/b_plus_tree_hash_map.h @@ -0,0 +1,942 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_B_PLUS_TREE_HASH_MAP_H +#define PHTREE_COMMON_B_PLUS_TREE_HASH_MAP_H + +#include "bits.h" +#include +#include +#include + +/* + * PLEASE do not include this file directly, it is included via common.h. + * + * This file contains the B+tree implementation which is used in high-dimensional nodes in + * the PH-Tree. + */ +namespace improbable::phtree { + +/* + * The b_plus_tree_hash_map is a B+tree implementation that uses a hierarchy of horizontally + * connected nodes for fast traversal through all entries. + * + * Behavior + * ======== + * This is a hash set/map. It behaves just like std::unordered_set / std::unordered_map, minus + * some API functions. + * The set/map is ordered by their hash. Entries with identical hash have no specific ordering + * but the order is stable with respect to insertion/removal of other entries. + * + * + * Rationale + * ========= + * This implementations is optimized for small entry count (for the multi-map PH-tree we + * expect small numbers of entries that actually have identical positions), however it should + * scale well with large entry counts (it is a tree, so there is no need for rehashing). + * Benchmarks show 10%-20% performance improvements for relocate() when using this custom set/map. + * + * + * Internals + * ========= + * The individual nodes have at most M entries. + * The tree has O(log n) lookup and O(M log n) insertion/removal time complexity, + * space complexity is O(n). + * + * Tree structure: + * - Inner nodes: have other nodes as children; their key of an entry represents the highest + * key of any subnode in that entry + * - Leaf nodes: have values as children; their key represents the key of a key/value pair + * - Every node is either a leaf (l-node; contains values) or an inner node + * (n-node; contains nodes). + * - "Sibling" nodes refer to the nodes linked by prev_node_ or next_node_. Sibling nodes + * usually have the same parent but may also be children of their parent's siblings. + * + * - Guarantee: All leaf nodes are horizontally connected + * - Inner nodes may or may not be connected. Specifically: + * - New inner nodes will be assigned siblings from the same parent or the parent's sibling + * (if the new node is the first or last node in a parent) + * - There is no guarantee that inner nodes know about their potential sibling (=other inner + * nodes that own bordering values/child-nodes). + * - There is no guarantee that siblings are on the same depth of the tree. + * - The tree is not balanced + * + */ +template , typename PredT = std::equal_to> +class b_plus_tree_hash_set { + class bpt_node_base; + template + class bpt_node_data; + class bpt_node_leaf; + class bpt_node_inner; + class bpt_iterator; + + using hash_t = std::uint32_t; + + using bpt_entry_inner = std::pair; + using bpt_entry_leaf = std::pair; + + using IterT = bpt_iterator; + using NodeT = bpt_node_base; + using NLeafT = bpt_node_leaf; + using NInnerT = bpt_node_inner; + using LeafIteratorT = decltype(std::vector().begin()); + using TreeT = b_plus_tree_hash_set; + + public: + explicit b_plus_tree_hash_set() : root_{new NLeafT(nullptr, nullptr, nullptr)}, size_{0} {}; + + b_plus_tree_hash_set(const b_plus_tree_hash_set& other) : size_{other.size_} { + root_ = other.root_->is_leaf() ? new NLeafT(*other.root_->as_leaf()) + : new NInnerT(*other.root_->as_inner()); + } + + b_plus_tree_hash_set(b_plus_tree_hash_set&& other) noexcept + : root_{other.root_}, size_{other.size_} { + other.root_ = nullptr; + other.size_ = 0; + } + + b_plus_tree_hash_set& operator=(const b_plus_tree_hash_set& other) { + assert(this != &other); + delete root_; + root_ = other.root_->is_leaf() ? new NLeafT(*other.root_->as_leaf()) + : new NInnerT(*other.root_->as_inner()); + size_ = other.size_; + return *this; + } + + b_plus_tree_hash_set& operator=(b_plus_tree_hash_set&& other) noexcept { + delete root_; + root_ = other.root_; + other.root_ = nullptr; + size_ = other.size_; + other.size_ = 0; + return *this; + } + + ~b_plus_tree_hash_set() { + delete root_; + root_ = nullptr; + } + + [[nodiscard]] auto find(const T& value) { + auto node = root_; + auto hash = (hash_t)HashT{}(value); + while (!node->is_leaf()) { + node = node->as_inner()->find(hash); + if (node == nullptr) { + return end(); + } + } + return node->as_leaf()->find(hash, value); + } + + [[nodiscard]] auto find(const T& value) const { + return const_cast(*this).find(value); + } + + [[nodiscard]] size_t count(const T& value) const { + return const_cast(*this).find(value) != end(); + } + + [[nodiscard]] auto begin() noexcept { + return IterT(root_); + } + + [[nodiscard]] auto begin() const noexcept { + return IterT(root_); + } + + [[nodiscard]] auto cbegin() const noexcept { + return IterT(root_); + } + + [[nodiscard]] auto end() noexcept { + return IterT(); + } + + [[nodiscard]] auto end() const noexcept { + return IterT(); + } + + template + auto emplace(Args&&... args) { + T t(std::forward(args)...); + hash_t hash = (hash_t)HashT{}(t); + auto node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->find_or_last(hash); + } + return node->as_leaf()->try_emplace(hash, *this, size_, std::move(t)); + } + + template + auto emplace_hint(const IterT& hint, Args&&... args) { + if (empty() || hint.is_end()) { + return emplace(std::forward(args)...).first; + } + assert(hint.node_->is_leaf()); + + T t(std::forward(args)...); + auto hash = (hash_t)HashT{}(t); + auto node = hint.node_->as_leaf(); + + // The following may drop a valid hint but is easy to check. + if (node->data_.begin()->first > hash || (node->data_.end() - 1)->first < hash) { + return emplace(std::move(t)).first; + } + + return node->try_emplace(hash, *this, size_, std::move(t)).first; + } + + size_t erase(const T& value) { + auto node = root_; + auto hash = (hash_t)HashT{}(value); + while (!node->is_leaf()) { + node = node->as_inner()->find(hash); + if (node == nullptr) { + return 0; + } + } + auto n = node->as_leaf()->erase_key(hash, value, *this); + size_ -= n; + return n; + } + + auto erase(const IterT& iterator) { + assert(iterator != end()); + --size_; + auto result = iterator.node_->erase_it(iterator.iter_, *this); + if (result.node_) { + return IterT(static_cast(result.node_), result.iter_); + } + return IterT(); + } + + [[nodiscard]] size_t size() const noexcept { + return size_; + } + + [[nodiscard]] bool empty() const noexcept { + return size_ == 0; + } + + void _check() { + size_t count = 0; + NLeafT* prev_leaf = nullptr; + hash_t known_min = std::numeric_limits::max(); + root_->_check(count, nullptr, prev_leaf, known_min, 0); + assert(count == size()); + } + + private: + class bpt_node_base { + public: + explicit bpt_node_base(bool is_leaf, NInnerT* parent) noexcept + : is_leaf_{is_leaf}, parent_{parent} {} + + virtual ~bpt_node_base() noexcept = default; + + [[nodiscard]] inline bool is_leaf() const noexcept { + return is_leaf_; + } + + [[nodiscard]] inline NInnerT* as_inner() noexcept { + assert(!is_leaf_); + return static_cast(this); + } + + [[nodiscard]] inline NLeafT* as_leaf() noexcept { + assert(is_leaf_); + return static_cast(this); + } + + virtual void _check(size_t&, NInnerT*, NLeafT*&, hash_t&, hash_t) = 0; + + public: + const bool is_leaf_; + NInnerT* parent_; + }; + + template + class bpt_node_data : public bpt_node_base { + using DataIteratorT = decltype(std::vector().begin()); + friend IterT; + + constexpr static size_t M_leaf = 16; + constexpr static size_t M_inner = 16; + // A value >2 requires a code change to move > 1 entry when merging. + constexpr static size_t M_leaf_min = 2; // std::max((size_t)2, M_leaf >> 2); + constexpr static size_t M_inner_min = 2; // std::max((size_t)2, M_inner >> 2); + constexpr static size_t M_leaf_init = 8; + constexpr static size_t M_inner_init = 4; + + public: + explicit bpt_node_data(bool is_leaf, NInnerT* parent, ThisT* prev, ThisT* next) noexcept + : bpt_node_base(is_leaf, parent), data_{}, prev_node_{prev}, next_node_{next} { + data_.reserve(this->M_init()); + } + + virtual ~bpt_node_data() noexcept = default; + + [[nodiscard]] inline size_t M_min() { + return this->is_leaf_ ? M_leaf_min : M_inner_min; + } + + [[nodiscard]] inline size_t M_max() { + return this->is_leaf_ ? M_leaf : M_inner; + } + + [[nodiscard]] inline size_t M_init() { + return this->is_leaf_ ? M_leaf_init : M_inner_init; + } + + [[nodiscard]] auto lower_bound(hash_t hash) noexcept { + return std::lower_bound( + data_.begin(), data_.end(), hash, [](EntryT& left, const hash_t hash) { + return left.first < hash; + }); + } + + [[nodiscard]] size_t size() const noexcept { + return data_.size(); + } + + struct EraseResult { + bpt_node_data* node_ = nullptr; + DataIteratorT iter_; + }; + + auto erase_entry(DataIteratorT it_to_erase, TreeT& tree) { + using ER = EraseResult; + auto& parent_ = this->parent_; + hash_t max_key_old = data_.back().first; + + auto result = data_.erase(it_to_erase); + bool tail_entry_erased = result == data_.end(); + if (parent_ == nullptr) { + if constexpr (std::is_same_v) { + if (data_.size() < 2) { + auto remaining_node = data_.begin()->second; + data_.begin()->second = nullptr; + remaining_node->parent_ = nullptr; + tree.root_ = remaining_node; + delete this; + } + } + return tail_entry_erased ? ER{} : ER{this, result}; + } + + if (data_.empty()) { + // Nothing to merge, just remove node. This should be rare, i.e. only happens when + // a rare 1-entry node has its last entry removed. + remove_from_siblings(); + parent_->remove_node(max_key_old, this, tree); + return next_node_ == nullptr ? ER{} : ER{next_node_, next_node_->data_.begin()}; + } + + if (data_.size() < this->M_min()) { + // merge + if (prev_node_ != nullptr && prev_node_->data_.size() < this->M_max()) { + remove_from_siblings(); + auto& prev_data = prev_node_->data_; + if constexpr (std::is_same_v) { + prev_data.emplace_back(std::move(data_[0])); + } else { + data_[0].second->parent_ = prev_node_; + prev_data.emplace_back(std::move(data_[0])); + data_[0].second = nullptr; + } + auto prev_node = prev_node_; // create copy because (this) will be deleted + auto next_node = next_node_; // create copy because (this) will be deleted + parent_->remove_node(max_key_old, this, tree); + if (prev_node->parent_ != nullptr) { + hash_t old1 = (prev_data.end() - 2)->first; + hash_t new1 = (prev_data.end() - 1)->first; + prev_node->parent_->update_key(old1, new1, prev_node); + } + if (!tail_entry_erased) { + return ER{prev_node, --prev_data.end()}; + } + return next_node == nullptr ? ER{} : ER{next_node, next_node->data_.begin()}; + } else if (next_node_ != nullptr && next_node_->data_.size() < this->M_max()) { + remove_from_siblings(); + auto* next_node = next_node_; + auto& next_data = next_node_->data_; + if constexpr (std::is_same_v) { + next_data.emplace(next_data.begin(), std::move(data_[0])); + } else { + data_[0].second->parent_ = next_node_; + next_data.emplace(next_data.begin(), std::move(data_[0])); + data_[0].second = nullptr; + } + parent_->remove_node(max_key_old, this, tree); + if (tail_entry_erased) { + return ER{next_node, next_data.begin() + 1}; + } + return next_node == nullptr ? ER() : ER{next_node, next_data.begin()}; + } + // This node is too small but there is nothing we can do. + } + if (tail_entry_erased) { + parent_->update_key(max_key_old, data_.back().first, this); + return next_node_ == nullptr ? ER() : ER{next_node_, next_node_->data_.begin()}; + } + return ER{this, result}; + } + + /* + * Check whether a split is required and, if so, perform it. + * It returns the node to which the new entry should be added. + */ + ThisT* check_split(hash_t key_to_add, TreeT& tree) { + if (data_.size() < this->M_max()) { + if (this->parent_ != nullptr && key_to_add > data_.back().first) { + this->parent_->update_key(data_.back().first, key_to_add, this); + } + return static_cast(this); + } + return this->split_node(key_to_add, tree); + } + + void _check_data(NInnerT* parent, hash_t known_max) { + (void)parent; + (void)known_max; + // assert(parent_ == nullptr || data_.size() >= M_min); + assert(this->parent_ == parent); + if (this->data_.empty()) { + assert(parent == nullptr); + return; + } + assert(this->parent_ == nullptr || known_max == this->data_.back().first); + } + + private: + ThisT* split_node(hash_t key_to_add, TreeT& tree) { + auto max_key = data_.back().first; + if (this->parent_ == nullptr) { + auto* new_parent = new NInnerT(nullptr, nullptr, nullptr); + new_parent->emplace_back(max_key, this); + tree.root_ = new_parent; + this->parent_ = new_parent; + } + + // create new node + auto* node2 = new ThisT(this->parent_, static_cast(this), next_node_); + if (next_node_ != nullptr) { + next_node_->prev_node_ = node2; + } + next_node_ = node2; + + // populate new node + // TODO Optimize populating new node: move 1st part, insert new value, move 2nd part...? + auto split_pos = this->M_max() >> 1; + node2->data_.insert( + node2->data_.end(), + std::make_move_iterator(data_.begin() + split_pos), + std::make_move_iterator(data_.end())); + data_.erase(data_.begin() + split_pos, data_.end()); + + if constexpr (std::is_same_v) { + for (auto& e : node2->data_) { + e.second->parent_ = node2; + } + } + + // Add node to parent + auto split_key = data_.back().first; + this->parent_->update_key_and_add_node( + max_key, split_key, std::max(max_key, key_to_add), this, node2, tree); + + // Return node for insertion of new value + return key_to_add > split_key ? node2 : static_cast(this); + } + + void remove_from_siblings() { + if (next_node_ != nullptr) { + next_node_->prev_node_ = prev_node_; + } + if (prev_node_ != nullptr) { + prev_node_->next_node_ = next_node_; + } + } + + public: + std::vector data_; + ThisT* prev_node_; + ThisT* next_node_; + }; + + class bpt_node_leaf : public bpt_node_data { + public: + explicit bpt_node_leaf(NInnerT* parent, NLeafT* prev, NLeafT* next) noexcept + : bpt_node_data(true, parent, prev, next) {} + + ~bpt_node_leaf() noexcept = default; + + [[nodiscard]] IterT find(hash_t hash, const T& value) noexcept { + PredT equals{}; + IterT iter_full(this, this->lower_bound(hash)); + while (!iter_full.is_end() && iter_full.hash() == hash) { + if (equals(*iter_full, value)) { + return iter_full; + } + ++iter_full; + } + return IterT(); + } + + [[nodiscard]] auto lower_bound_value(hash_t hash, const T& value) noexcept { + PredT equals{}; + IterT iter_full(this, this->lower_bound(hash)); + while (!iter_full.is_end() && iter_full.hash() == hash) { + if (equals(*iter_full, value)) { + break; + } + ++iter_full; + } + return iter_full; + } + + auto try_emplace(hash_t hash, TreeT& tree, size_t& entry_count, T&& t) { + auto it = this->lower_bound(hash); + if (it != this->data_.end() && it->first == hash) { + // Hash collision ! + PredT equals{}; + IterT full_iter(this, it); + while (!full_iter.is_end() && full_iter.hash() == hash) { + if (equals(*full_iter, t)) { + return std::make_pair(full_iter, false); + } + ++full_iter; + } + } + ++entry_count; + auto old_pos = it - this->data_.begin(); + auto dest = this->check_split(hash, tree); + if (dest != this) { + // The insertion pos in `dest` can be calculated: + it = dest->data_.begin() + (old_pos - this->data_.size()); + } + auto it2 = dest->data_.emplace(it, hash, std::move(t)); + return std::make_pair(IterT(dest, it2), true); + } + + bool erase_key(hash_t hash, const T& value, TreeT& tree) { + auto iter = this->lower_bound_value(hash, value); + if (!iter.is_end() && PredT{}(*iter, value)) { + iter.node_->erase_entry(iter.iter_, tree); + return true; + } + return false; + } + + auto erase_it(LeafIteratorT iter, TreeT& tree) { + return this->erase_entry(iter, tree); + } + + void _check( + size_t& count, + NInnerT* parent, + NLeafT*& prev_leaf, + hash_t& known_min, + hash_t known_max) { + this->_check_data(parent, known_max); + + assert(prev_leaf == this->prev_node_); + for (auto& e : this->data_) { + assert(count == 0 || e.first >= known_min); + assert(this->parent_ == nullptr || e.first <= known_max); + ++count; + known_min = e.first; + } + prev_leaf = this; + } + }; + + class bpt_node_inner : public bpt_node_data { + public: + explicit bpt_node_inner(NInnerT* parent, NInnerT* prev, NInnerT* next) noexcept + : bpt_node_data(false, parent, prev, next) {} + + ~bpt_node_inner() noexcept { + for (auto& e : this->data_) { + if (e.second != nullptr) { + delete e.second; + } + } + } + + [[nodiscard]] auto lower_bound_node(hash_t hash, const NodeT* node) noexcept { + auto it = this->lower_bound(hash); + while (it != this->data_.end() && it->first == hash) { + if (it->second == node) { + return it; + } + ++it; + } + return this->data_.end(); + } + + [[nodiscard]] NodeT* find(hash_t hash) noexcept { + auto it = this->lower_bound(hash); + return it != this->data_.end() ? it->second : nullptr; + } + + [[nodiscard]] NodeT* find_or_last(hash_t hash) noexcept { + auto it = this->lower_bound(hash); + return it != this->data_.end() ? it->second : this->data_.back().second; + } + + void emplace_back(hash_t hash, NodeT* node) { + this->data_.emplace_back(hash, node); + } + + void _check( + size_t& count, + NInnerT* parent, + NLeafT*& prev_leaf, + hash_t& known_min, + hash_t known_max) { + this->_check_data(parent, known_max); + + assert(this->parent_ == nullptr || known_max == this->data_.back().first); + auto prev_key = this->data_[0].first; + int n = 0; + for (auto& e : this->data_) { + assert(n == 0 || e.first >= prev_key); + e.second->_check(count, this, prev_leaf, known_min, e.first); + assert(this->parent_ == nullptr || e.first <= known_max); + prev_key = e.first; + ++n; + } + } + + void update_key(hash_t old_key, hash_t new_key, NodeT* node) { + if (old_key == new_key) { + return; // This can happen due to multiple entries with same hash. + } + assert(new_key != old_key); + auto it = this->lower_bound_node(old_key, node); + assert(it != this->data_.end()); + assert(it->first == old_key); + it->first = new_key; + if (this->parent_ != nullptr && ++it == this->data_.end()) { + this->parent_->update_key(old_key, new_key, this); + } + } + + /* + * This method does two things: + * - It changes the key of the node (node 1) at 'key1_old' to 'key1_new'. + * - It inserts a new node (node 2) after 'new_key1' with value 'key2' + * Invariants: + * - Node1: key1_old >= key1_new; Node 1 vs 2: key2 >= new_key1 + */ + void update_key_and_add_node( + hash_t key1_old, + hash_t key1_new, + hash_t key2, + NodeT* child1, + NodeT* child2, + TreeT& tree) { + auto it = this->lower_bound_node(key1_old, child1); + assert(key2 >= key1_new && key1_old >= key1_new && it != this->data_.end()); + + auto old_pos = it - this->data_.begin(); // required for MSVC + auto dest = this->check_split(key2, tree); + child2->parent_ = dest; + if (this != dest && this->data_.back().second == child1) { + it->first = key1_new; + dest->data_.emplace(dest->data_.begin(), key2, child2); + } else { + // child1 & 2 in same node + if (this != dest) { + it = old_pos - this->data_.size() + dest->data_.begin(); + } + it->first = key1_new; + ++it; + dest->data_.emplace(it, key2, child2); + } + + // The following alternative code works, but I don't understand why! + // auto dest = this->check_split(key2, tree); + // auto it = dest->lower_bound_node(key1_old, child1); + // assert(key2 >= key1_new && key1_old >= key1_new && it != + // dest->data_.end()); + // it->first = key1_new; + // ++it; + // child2->parent_ = dest; + // dest->data_.emplace(it, key2, child2); + } + + void remove_node(hash_t key_remove, NodeT* node, TreeT& tree) { + auto it_to_erase = this->lower_bound(key_remove); + while (it_to_erase != this->data_.end() && it_to_erase->first == key_remove) { + if (it_to_erase->second == node) { + delete it_to_erase->second; + this->erase_entry(it_to_erase, tree); + return; + } + ++it_to_erase; + } + assert(false && "Node not found!"); + } + }; + + class bpt_iterator { + using EntryT = typename b_plus_tree_hash_set::bpt_entry_leaf; + friend b_plus_tree_hash_set; + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using reference = T&; + + // Arbitrary position iterator + explicit bpt_iterator(NLeafT* node, LeafIteratorT it) noexcept + : node_{it == node->data_.end() ? nullptr : node} + , iter_{node_ == nullptr ? LeafIteratorT{} : it} { + assert(node->is_leaf_ && "just for consistency, insist that we iterate leaves only "); + } + + // begin() iterator + explicit bpt_iterator(NodeT* node) noexcept { + assert(node->parent_ == nullptr && "must start with root node"); + // move iterator to first value + while (!node->is_leaf_) { + node = node->as_inner()->data_[0].second; + } + node_ = node->as_leaf(); + + if (node_->size() == 0) { + node_ = nullptr; + iter_ = {}; + return; + } + iter_ = node_->data_.begin(); + } + + // end() iterator + bpt_iterator() noexcept : node_{nullptr}, iter_{} {} + + auto& operator*() const noexcept { + assert(AssertNotEnd()); + return const_cast(iter_->second); + } + + auto* operator->() const noexcept { + assert(AssertNotEnd()); + return const_cast(&iter_->second); + } + + auto& operator++() noexcept { + assert(AssertNotEnd()); + ++iter_; + if (iter_ == node_->data_.end()) { + // this may be a nullptr -> end of data + node_ = node_->next_node_; + iter_ = node_ != nullptr ? node_->data_.begin() : LeafIteratorT{}; + } + return *this; + } + + auto operator++(int) const noexcept { + IterT iterator(*this); + ++(*this); + return iterator; + } + + friend bool operator==(const IterT& left, const IterT& right) noexcept { + return left.node_ == right.node_ && left.iter_ == right.iter_; + } + + friend bool operator!=(const IterT& left, const IterT& right) noexcept { + return !(left == right); + } + + // TODO private + bool is_end() const noexcept { + return node_ == nullptr; + } + + private: + [[nodiscard]] inline bool AssertNotEnd() const noexcept { + return node_ != nullptr; + } + + hash_t hash() { + return iter_->first; + } + + NLeafT* node_; + LeafIteratorT iter_; + }; + + private: + NodeT* root_; + size_t size_; +}; + +template < + typename KeyT, + typename ValueT, + typename HashT = std::hash, + typename PredT = std::equal_to> +class b_plus_tree_hash_map { + class iterator; + using IterT = iterator; + using EntryT = std::pair; + + public: + b_plus_tree_hash_map() : map_{} {}; + + b_plus_tree_hash_map(const b_plus_tree_hash_map&) = default; + b_plus_tree_hash_map(b_plus_tree_hash_map&&) noexcept = default; + b_plus_tree_hash_map& operator=(const b_plus_tree_hash_map&) = default; + b_plus_tree_hash_map& operator=(b_plus_tree_hash_map&&) noexcept = default; + ~b_plus_tree_hash_map() = default; + + auto begin() const { + return IterT(map_.begin()); + } + + auto end() const { + return IterT(map_.end()); + } + + auto find(const KeyT& key) const { + return IterT(map_.find(EntryT{key, {}})); + } + + auto count(const KeyT& key) const { + return map_.count(EntryT{key, {}}); + } + + template + auto emplace(Args&&... args) { + return try_emplace(std::forward(args)...); + } + + template + auto emplace_hint(const IterT& hint, Args&&... args) { + return try_emplace(hint, std::forward(args)...); + } + + template + auto try_emplace(const KeyT& key, Args&&... args) { + auto result = map_.emplace(key, std::forward(args)...); + return std::make_pair(iterator(result.first), result.second); + } + + template + auto try_emplace(const IterT& hint, const KeyT& key, Args&&... args) { + auto result = map_.emplace_hint(hint.map_iter_, key, std::forward(args)...); + return IterT(result); + } + + auto erase(const KeyT& key) { + return map_.erase({key, {}}); + } + + auto erase(const IterT& iterator) { + return IterT(map_.erase(iterator.map_iter_)); + } + + auto size() const { + return map_.size(); + } + + auto empty() const { + return map_.empty(); + } + + void _check() { + map_._check(); + } + + private: + struct EntryHashT { + size_t operator()(const EntryT& x) const { + return HashT{}(x.first); + } + }; + + struct EntryEqualsT { + bool operator()(const EntryT& x, const EntryT& y) const { + return PredT{}(x.first, y.first); + } + }; + + class iterator { + using T = EntryT; + using MapIterType = + decltype(std::declval>() + .begin()); + friend b_plus_tree_hash_map; + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using reference = T&; + + explicit iterator(MapIterType map_iter) noexcept : map_iter_{map_iter} {} + + // end() iterator + iterator() noexcept : map_iter_{} {} + + auto& operator*() const noexcept { + return *map_iter_; + } + + auto* operator->() const noexcept { + return &*map_iter_; + } + + auto& operator++() noexcept { + ++map_iter_; + return *this; + } + + auto operator++(int) noexcept { + IterT iterator(*this); + ++(*this); + return iterator; + } + + friend bool operator==(const IterT& left, const IterT& right) noexcept { + return left.map_iter_ == right.map_iter_; + } + + friend bool operator!=(const IterT& left, const IterT& right) noexcept { + return !(left == right); + } + + private: + MapIterType map_iter_; + }; + + b_plus_tree_hash_set map_; +}; + +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_B_PLUS_TREE_HASH_MAP_H diff --git a/include/phtree/common/b_plus_tree_map.h b/include/phtree/common/b_plus_tree_map.h new file mode 100644 index 00000000..a9705e0a --- /dev/null +++ b/include/phtree/common/b_plus_tree_map.h @@ -0,0 +1,677 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_B_PLUS_TREE_H +#define PHTREE_COMMON_B_PLUS_TREE_H + +#include "bits.h" +#include +#include +#include + +/* + * PLEASE do not include this file directly, it is included via common.h. + * + * This file contains the B+tree implementation which is used in high-dimensional nodes in + * the PH-Tree. + */ +namespace improbable::phtree { + +/* + * The b_plus_tree_map is a B+tree implementation that uses a hierarchy of horizontally + * connected nodes for fast traversal through all entries. + * + * Behavior: + * This is a key-value map. Keys are unique, so for every key there is at most one entry. + * + * The individual nodes have at most M entries. + * The tree has O(log n) lookup and O(M log n) insertion/removal time complexity, + * space complexity is O(n). + * + * Tree structure: + * - Inner nodes: have other nodes as children; their key of an entry represents the highest + * key of any subnode in that entry + * - Leaf nodes: have values as children; their key represents the key of a key/value pair + * - Every node is either a leaf (l-node; contains values) or an inner node + * (n-node; contains nodes). + * - "Sibling" nodes refer to the nodes linked by prev_node_ or next_node_. Sibling nodes + * usually have the same parent but may also be children of their parent's siblings. + * + * - Guarantee: All leaf nodes are horizontally connected + * - Inner nodes may or may not be connected. Specifically: + * - New inner nodes will be assigned siblings from the same parent or the parent's sibling + * (if the new node is the first or last node in a parent) + * - There is no guarantee that inner nodes know about their potential sibling (=other inner + * nodes that own bordering values/child-nodes). + * - There is no guarantee that siblings are on the same depth of the tree. + * - The tree is not balanced + * + * TODO since this is a "map" (with 1:1 mapping of key:value), we could optimize splitting and + * merging by trying to reduce `dead space` + * (space between key1 and key2 that exceeds (key2 - key1)). + */ +template +class b_plus_tree_map { + class bpt_node_base; + template + class bpt_node_data; + class bpt_node_leaf; + class bpt_node_inner; + class bpt_iterator; + + using key_t = std::uint64_t; + + using bpt_entry_inner = std::pair; + using bpt_entry_leaf = std::pair; + + using IterT = bpt_iterator; + using NodeT = bpt_node_base; + using NLeafT = bpt_node_leaf; + using NInnerT = bpt_node_inner; + using LeafIteratorT = decltype(std::vector().begin()); + using TreeT = b_plus_tree_map; + + public: + explicit b_plus_tree_map() : root_{new NLeafT(nullptr, nullptr, nullptr)}, size_{0} {}; + + b_plus_tree_map(const b_plus_tree_map& other) : size_{other.size_} { + root_ = other.root_->is_leaf() ? new NLeafT(*other.root_->as_leaf()) + : new NInnerT(*other.root_->as_inner()); + } + + b_plus_tree_map(b_plus_tree_map&& other) noexcept : root_{other.root_}, size_{other.size_} { + other.root_ = nullptr; + other.size_ = 0; + } + + b_plus_tree_map& operator=(const b_plus_tree_map& other) { + assert(this != &other); + delete root_; + root_ = other.root_->is_leaf() ? new NLeafT(*other.root_->as_leaf()) + : new NInnerT(*other.root_->as_inner()); + size_ = other.size_; + return *this; + } + + b_plus_tree_map& operator=(b_plus_tree_map&& other) noexcept { + delete root_; + root_ = other.root_; + other.root_ = nullptr; + size_ = other.size_; + other.size_ = 0; + return *this; + } + + ~b_plus_tree_map() { + delete root_; + root_ = nullptr; + } + + [[nodiscard]] auto find(key_t key) noexcept { + auto node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->find(key); + if (node == nullptr) { + return end(); + } + } + return node->as_leaf()->find(key); + } + + [[nodiscard]] auto find(key_t key) const noexcept { + return const_cast(*this).find(key); + } + + [[nodiscard]] auto lower_bound(key_t key) noexcept { + auto node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->find(key); + if (node == nullptr) { + return end(); + } + } + return node->as_leaf()->lower_bound_as_iter(key); + } + + [[nodiscard]] auto begin() noexcept { + return IterT(root_); + } + + [[nodiscard]] auto begin() const noexcept { + return IterT(root_); + } + + [[nodiscard]] auto cbegin() const noexcept { + return IterT(root_); + } + + [[nodiscard]] auto end() noexcept { + return IterT(); + } + + [[nodiscard]] auto end() const noexcept { + return IterT(); + } + + template + auto emplace(Args&&... args) { + return try_emplace(std::forward(args)...); + } + + template + auto try_emplace(key_t key, Args&&... args) { + auto node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->find_or_last(key); + } + return node->as_leaf()->try_emplace(key, *this, size_, std::forward(args)...); + } + + void erase(key_t key) { + auto node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->find(key); + if (node == nullptr) { + return; + } + } + size_ -= node->as_leaf()->erase_key(key, *this); + } + + void erase(const IterT& iterator) { + assert(iterator != end()); + --size_; + iterator.node_->erase_it(iterator.iter_, *this); + } + + [[nodiscard]] size_t size() const noexcept { + return size_; + } + + void _check() { + size_t count = 0; + NLeafT* prev_leaf = nullptr; + key_t known_min = std::numeric_limits::max(); + root_->_check(count, nullptr, prev_leaf, known_min, 0); + assert(count == size()); + } + + private: + class bpt_node_base { + public: + explicit bpt_node_base(bool is_leaf, NInnerT* parent) noexcept + : is_leaf_{is_leaf}, parent_{parent} {} + + virtual ~bpt_node_base() noexcept = default; + + [[nodiscard]] inline bool is_leaf() const noexcept { + return is_leaf_; + } + + [[nodiscard]] inline NInnerT* as_inner() noexcept { + assert(!is_leaf_); + return static_cast(this); + } + + [[nodiscard]] inline NLeafT* as_leaf() noexcept { + assert(is_leaf_); + return static_cast(this); + } + + virtual void _check(size_t&, NInnerT*, NLeafT*&, key_t&, key_t) = 0; + + public: + const bool is_leaf_; + NInnerT* parent_; + }; + + template + class bpt_node_data : public bpt_node_base { + using DataIteratorT = decltype(std::vector().begin()); + friend IterT; + + constexpr static size_t M_leaf = std::min(size_t(16), COUNT_MAX); + // Default MAX is 32. Special case for small COUNT with smaller inner leaf or + // trees with a single inner leaf. '*2' is added because leaf filling is not compact. + constexpr static size_t M_inner = std::min(size_t(16), COUNT_MAX / M_leaf * 2); + // TODO This could be improved but requires a code change to move > 1 entry when merging. + constexpr static size_t M_leaf_min = 2; // std::max((size_t)2, M_leaf >> 2); + constexpr static size_t M_inner_min = 2; // std::max((size_t)2, M_inner >> 2); + // There is no point in allocating more leaf space than the max amount of entries. + constexpr static size_t M_leaf_init = std::min(size_t(8), COUNT_MAX); + constexpr static size_t M_inner_init = 4; + + public: + explicit bpt_node_data(bool is_leaf, NInnerT* parent, ThisT* prev, ThisT* next) noexcept + : bpt_node_base(is_leaf, parent), data_{}, prev_node_{prev}, next_node_{next} { + data_.reserve(this->M_init()); + } + + virtual ~bpt_node_data() noexcept = default; + + [[nodiscard]] inline size_t M_min() { + return this->is_leaf_ ? M_leaf_min : M_inner_min; + } + + [[nodiscard]] inline size_t M_max() { + return this->is_leaf_ ? M_leaf : M_inner; + } + + [[nodiscard]] inline size_t M_init() { + return this->is_leaf_ ? M_leaf_init : M_inner_init; + } + + [[nodiscard]] auto lower_bound(key_t key) noexcept { + return std::lower_bound( + data_.begin(), data_.end(), key, [](EntryT& left, const key_t key) { + return left.first < key; + }); + } + + [[nodiscard]] size_t size() const noexcept { + return data_.size(); + } + + void erase_entry(DataIteratorT it_to_erase, TreeT& tree) { + auto& parent_ = this->parent_; + key_t max_key_old = data_.back().first; + + size_t pos_to_erase = it_to_erase - data_.begin(); + data_.erase(it_to_erase); + if (parent_ == nullptr) { + if constexpr (std::is_same_v) { + if (data_.size() < 2) { + auto remaining_node = data_.begin()->second; + data_.begin()->second = nullptr; + remaining_node->parent_ = nullptr; + tree.root_ = remaining_node; + delete this; + } + } + return; + } + + if (data_.empty()) { + // Nothing to merge, just remove node. This should be rare, i.e. only happens when + // a rare 1-entry node has its last entry removed. + remove_from_siblings(); + parent_->remove_node(max_key_old, tree); + return; + } + + if (data_.size() < this->M_min()) { + // merge + if (prev_node_ != nullptr && prev_node_->data_.size() < this->M_max()) { + remove_from_siblings(); + auto& prev_data = prev_node_->data_; + if constexpr (std::is_same_v) { + prev_data.emplace_back(std::move(data_[0])); + } else { + data_[0].second->parent_ = prev_node_; + prev_data.emplace_back(std::move(data_[0])); + data_[0].second = nullptr; + } + auto prev_node = prev_node_; // create copy because (this) will be deleted + parent_->remove_node(max_key_old, tree); + if (prev_node->parent_ != nullptr) { + key_t old1 = (prev_data.end() - 2)->first; + key_t new1 = (prev_data.end() - 1)->first; + prev_node->parent_->update_key(old1, new1); + } + return; + } else if (next_node_ != nullptr && next_node_->data_.size() < this->M_max()) { + remove_from_siblings(); + auto& next_data = next_node_->data_; + if constexpr (std::is_same_v) { + next_data.emplace(next_data.begin(), std::move(data_[0])); + } else { + data_[0].second->parent_ = next_node_; + next_data.emplace(next_data.begin(), std::move(data_[0])); + data_[0].second = nullptr; + } + parent_->remove_node(max_key_old, tree); + return; + } + // This node is too small but there is nothing we can do. + } + if (pos_to_erase == data_.size()) { + parent_->update_key(max_key_old, data_.back().first); + } + } + + auto check_split(key_t key, TreeT& tree, size_t& pos_in_out) { + if (data_.size() < this->M_max()) { + if (this->parent_ != nullptr && key > data_.back().first) { + this->parent_->update_key(data_.back().first, key); + } + return static_cast(this); + } + + ThisT* dest = this->split_node(key, tree); + if (dest != this) { + // The insertion pos in node2 can be calculated: + pos_in_out = pos_in_out - data_.size(); + } + return dest; + } + + void _check_data(NInnerT* parent, key_t known_max) { + (void)parent; + (void)known_max; + // assert(parent_ == nullptr || data_.size() >= M_min); + assert(this->parent_ == parent); + if (this->data_.empty()) { + assert(parent == nullptr); + return; + } + assert(this->parent_ == nullptr || known_max == this->data_.back().first); + } + + private: + ThisT* split_node(key_t key_to_add, TreeT& tree) { + auto max_key = data_.back().first; + if (this->parent_ == nullptr) { + auto* new_parent = new NInnerT(nullptr, nullptr, nullptr); + new_parent->emplace_back(max_key, this); + tree.root_ = new_parent; + this->parent_ = new_parent; + } + + // create new node + auto* node2 = new ThisT(this->parent_, static_cast(this), next_node_); + if (next_node_ != nullptr) { + next_node_->prev_node_ = node2; + } + next_node_ = node2; + + // populate new node + // TODO Optimize populating new node: move 1st part, insert new value, move 2nd part...? + auto split_pos = this->M_max() >> 1; + node2->data_.insert( + node2->data_.end(), + std::make_move_iterator(data_.begin() + split_pos), + std::make_move_iterator(data_.end())); + data_.erase(data_.begin() + split_pos, data_.end()); + + if constexpr (std::is_same_v) { + for (auto& e : node2->data_) { + e.second->parent_ = node2; + } + } + + // Add node to parent + auto split_key = data_.back().first; + this->parent_->update_key_and_add_node( + max_key, split_key, std::max(max_key, key_to_add), node2, tree); + + // Return node for insertion of new value + return key_to_add > split_key ? node2 : static_cast(this); + } + + void remove_from_siblings() { + if (next_node_ != nullptr) { + next_node_->prev_node_ = prev_node_; + } + if (prev_node_ != nullptr) { + prev_node_->next_node_ = next_node_; + } + } + + protected: + std::vector data_; + ThisT* prev_node_; + ThisT* next_node_; + }; + + class bpt_node_leaf : public bpt_node_data { + public: + explicit bpt_node_leaf(NInnerT* parent, NLeafT* prev, NLeafT* next) noexcept + : bpt_node_data(true, parent, prev, next) {} + + ~bpt_node_leaf() noexcept = default; + + [[nodiscard]] IterT find(key_t key) noexcept { + auto it = this->lower_bound(key); + if (it != this->data_.end() && it->first == key) { + return IterT(this, it); + } + return IterT(); + } + + [[nodiscard]] IterT lower_bound_as_iter(key_t key) noexcept { + auto it = this->lower_bound(key); + if (it != this->data_.end()) { + return IterT(this, it); + } + return IterT(); + } + + template + auto try_emplace(key_t key, TreeT& tree, size_t& entry_count, Args&&... args) { + auto it = this->lower_bound(key); + if (it != this->data_.end() && it->first == key) { + return std::make_pair(IterT(this, it), false); + } + ++entry_count; + + size_t pos = it - this->data_.begin(); // Must be done before split because of MSVC + auto dest = this->check_split(key, tree, pos); + auto x = dest->data_.emplace( + dest->data_.begin() + pos, + std::piecewise_construct, + std::forward_as_tuple(key), + std::forward_as_tuple(std::forward(args)...)); + return std::make_pair(IterT(this, x), true); + } + + bool erase_key(key_t key, TreeT& tree) { + auto it = this->lower_bound(key); + if (it != this->data_.end() && it->first == key) { + this->erase_entry(it, tree); + return true; + } + return false; + } + + void erase_it(LeafIteratorT iter, TreeT& tree) { + this->erase_entry(iter, tree); + } + + void _check( + size_t& count, NInnerT* parent, NLeafT*& prev_leaf, key_t& known_min, key_t known_max) { + this->_check_data(parent, known_max); + + assert(prev_leaf == this->prev_node_); + for (auto& e : this->data_) { + assert(count == 0 || e.first > known_min); + assert(this->parent_ == nullptr || e.first <= known_max); + ++count; + known_min = e.first; + } + prev_leaf = this; + } + }; + + class bpt_node_inner : public bpt_node_data { + public: + explicit bpt_node_inner(NInnerT* parent, NInnerT* prev, NInnerT* next) noexcept + : bpt_node_data(false, parent, prev, next) {} + + ~bpt_node_inner() noexcept { + for (auto& e : this->data_) { + if (e.second != nullptr) { + delete e.second; + } + } + } + + [[nodiscard]] NodeT* find(key_t key) noexcept { + auto it = this->lower_bound(key); + return it != this->data_.end() ? it->second : nullptr; + } + + [[nodiscard]] NodeT* find_or_last(key_t key) noexcept { + auto it = this->lower_bound(key); + return it != this->data_.end() ? it->second : this->data_.back().second; + } + + void emplace_back(key_t key, NodeT* node) { + this->data_.emplace_back(key, node); + } + + void _check( + size_t& count, NInnerT* parent, NLeafT*& prev_leaf, key_t& known_min, key_t known_max) { + this->_check_data(parent, known_max); + + assert(this->parent_ == nullptr || known_max == this->data_.back().first); + auto prev_key = this->data_[0].first; + int n = 0; + for (auto& e : this->data_) { + assert(n == 0 || e.first > prev_key); + e.second->_check(count, this, prev_leaf, known_min, e.first); + assert(this->parent_ == nullptr || e.first <= known_max); + prev_key = e.first; + ++n; + } + } + + void update_key(key_t old_key, key_t new_key) { + assert(new_key != old_key); + auto it = this->lower_bound(old_key); + assert(it != this->data_.end()); + assert(it->first == old_key); + it->first = new_key; + if (this->parent_ != nullptr && ++it == this->data_.end()) { + this->parent_->update_key(old_key, new_key); + } + } + + /* + * This method does two things: + * - It changes the key of the node (node 1) at 'key1_old' to 'key1_new'. + * - It inserts a new node (node 2) after 'new_key1' with value 'key2' + * Invariants: + * - Node1: key1_old > key1_new; Node 1 vs 2: key2 > new_key1 + */ + void update_key_and_add_node( + key_t key1_old, key_t key1_new, key_t key2, NodeT* child2, TreeT& tree) { + assert(key2 > key1_new); + assert(key1_old >= key1_new); + auto it2 = this->lower_bound(key1_old) + 1; + + size_t pos = it2 - this->data_.begin(); // Must be done before split because of MSVC + auto dest = this->check_split(key2, tree, pos); + // check_split() guarantees that child2 is in the same node as child1 + assert(pos > 0); + dest->data_[pos - 1].first = key1_new; + child2->parent_ = dest; + dest->data_.emplace(dest->data_.begin() + pos, key2, child2); + } + + void remove_node(key_t key_remove, TreeT& tree) { + auto it_to_erase = this->lower_bound(key_remove); + delete it_to_erase->second; + this->erase_entry(it_to_erase, tree); + } + }; + + class bpt_iterator { + using EntryT = typename b_plus_tree_map::bpt_entry_leaf; + friend b_plus_tree_map; + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using reference = T&; + + // Arbitrary position iterator + explicit bpt_iterator(NLeafT* node, LeafIteratorT it) noexcept : node_{node}, iter_{it} { + assert(node->is_leaf_ && "just for consistency, insist that we iterate leaves only "); + } + + // begin() iterator + explicit bpt_iterator(NodeT* node) noexcept { + assert(node->parent_ == nullptr && "must start with root node"); + // move iterator to first value + while (!node->is_leaf_) { + node = node->as_inner()->data_[0].second; + } + node_ = node->as_leaf(); + + if (node_->size() == 0) { + node_ = nullptr; + iter_ = {}; + return; + } + iter_ = node_->data_.begin(); + } + + // end() iterator + bpt_iterator() noexcept : node_{nullptr}, iter_{} {} + + auto& operator*() const noexcept { + assert(AssertNotEnd()); + return const_cast(*iter_); + } + + auto* operator->() const noexcept { + assert(AssertNotEnd()); + return const_cast(&*iter_); + } + + auto& operator++() noexcept { + assert(AssertNotEnd()); + ++iter_; + if (iter_ == node_->data_.end()) { + // this may be a nullptr -> end of data + node_ = node_->next_node_; + iter_ = node_ != nullptr ? node_->data_.begin() : LeafIteratorT{}; + } + return *this; + } + + auto operator++(int) const noexcept { + IterT iterator(*this); + ++(*this); + return iterator; + } + + friend bool operator==(const IterT& left, const IterT& right) noexcept { + return left.node_ == right.node_ && left.iter_ == right.iter_; + } + + friend bool operator!=(const IterT& left, const IterT& right) noexcept { + return !(left == right); + } + + private: + [[nodiscard]] inline bool AssertNotEnd() const noexcept { + return node_ != nullptr; + } + + NLeafT* node_; + LeafIteratorT iter_; + }; + + private: + NodeT* root_; + size_t size_; +}; +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_B_PLUS_TREE_H diff --git a/phtree/common/base_types.h b/include/phtree/common/base_types.h similarity index 93% rename from phtree/common/base_types.h rename to include/phtree/common/base_types.h index 5ad77ea2..a95a721b 100644 --- a/phtree/common/base_types.h +++ b/include/phtree/common/base_types.h @@ -40,8 +40,10 @@ using scalar_64_t = int64_t; using scalar_32_t = int32_t; using scalar_16_t = int16_t; -// Bits in a coordinate (usually a double or long has 64 bits, so uint_8 suffices) -using bit_width_t = uint16_t; +// Bits in a coordinate (usually a double or long has 64 bits, so uint_8 suffices). +// However, uint32_t turned out to be faster, probably due to fewer cycles required for 32bit +// instructions (8bit/16bit tend to require more cycles, see CPU tables available on the web). +using bit_width_t = uint32_t; // Number of bit for 'scalar_64_t' or 'scalar_32_t'. Note that 'digits' does _not_ include sign bit, // so e.g. int64_t has 63 `digits`, however we need all bits, i.e. 64. template @@ -109,6 +111,10 @@ class PhBox { return min_ == other.min_ && max_ == other.max_; } + auto operator!=(const PhBox& other) const -> bool { + return !(*this == other); + } + private: Point min_; Point max_; diff --git a/phtree/common/bits.h b/include/phtree/common/bits.h similarity index 100% rename from phtree/common/bits.h rename to include/phtree/common/bits.h diff --git a/phtree/common/common.h b/include/phtree/common/common.h similarity index 98% rename from phtree/common/common.h rename to include/phtree/common/common.h index 2912c8ec..638d0e0a 100644 --- a/phtree/common/common.h +++ b/include/phtree/common/common.h @@ -17,11 +17,9 @@ #ifndef PHTREE_COMMON_COMMON_H #define PHTREE_COMMON_COMMON_H +#include "b_plus_tree_map.h" #include "base_types.h" #include "bits.h" -#include "converter.h" -#include "distance.h" -#include "filter.h" #include "flat_array_map.h" #include "flat_sparse_map.h" #include "tree_stats.h" diff --git a/phtree/common/debug_helper.h b/include/phtree/common/debug_helper.h similarity index 98% rename from phtree/common/debug_helper.h rename to include/phtree/common/debug_helper.h index ede89586..e3dc136e 100644 --- a/phtree/common/debug_helper.h +++ b/include/phtree/common/debug_helper.h @@ -39,6 +39,7 @@ class PhTreeDebugHelper { template static void CheckConsistency(const TREE& tree) { tree.GetInternalTree().GetDebugHelper().CheckConsistency(); + tree.CheckConsistencyExternal(); } /* diff --git a/phtree/common/flat_array_map.h b/include/phtree/common/flat_array_map.h similarity index 100% rename from phtree/common/flat_array_map.h rename to include/phtree/common/flat_array_map.h diff --git a/phtree/common/flat_sparse_map.h b/include/phtree/common/flat_sparse_map.h similarity index 89% rename from phtree/common/flat_sparse_map.h rename to include/phtree/common/flat_sparse_map.h index 3c264223..f822d3d8 100644 --- a/phtree/common/flat_sparse_map.h +++ b/include/phtree/common/flat_sparse_map.h @@ -32,7 +32,7 @@ namespace improbable::phtree { namespace { template -using PhFlatMapPair = std::pair; +using PhSparseMapPair = std::pair; using index_t = std::int32_t; } // namespace @@ -46,7 +46,9 @@ using index_t = std::int32_t; template class sparse_map { public: - explicit sparse_map() : data_{} {}; + explicit sparse_map() : data_{} { + data_.reserve(4); + } [[nodiscard]] auto find(size_t key) { auto it = lower_bound(key); @@ -66,14 +68,14 @@ class sparse_map { [[nodiscard]] auto lower_bound(size_t key) { return std::lower_bound( - data_.begin(), data_.end(), key, [](PhFlatMapPair& left, const size_t key) { + data_.begin(), data_.end(), key, [](PhSparseMapPair& left, const size_t key) { return left.first < key; }); } [[nodiscard]] auto lower_bound(size_t key) const { return std::lower_bound( - data_.cbegin(), data_.cend(), key, [](const PhFlatMapPair& left, const size_t key) { + data_.cbegin(), data_.cend(), key, [](const PhSparseMapPair& left, const size_t key) { return left.first < key; }); } @@ -115,7 +117,7 @@ class sparse_map { } } - void erase(const typename std::vector>::iterator& iterator) { + void erase(const typename std::vector>::iterator& iterator) { data_.erase(iterator); } @@ -149,7 +151,7 @@ class sparse_map { } } - std::vector> data_; + std::vector> data_; }; } // namespace improbable::phtree diff --git a/phtree/common/tree_stats.h b/include/phtree/common/tree_stats.h similarity index 100% rename from phtree/common/tree_stats.h rename to include/phtree/common/tree_stats.h diff --git a/phtree/common/converter.h b/include/phtree/converter.h similarity index 93% rename from phtree/common/converter.h rename to include/phtree/converter.h index 012c0454..9781d39b 100644 --- a/phtree/common/converter.h +++ b/include/phtree/converter.h @@ -17,7 +17,7 @@ #ifndef PHTREE_COMMON_CONVERTER_H #define PHTREE_COMMON_CONVERTER_H -#include "base_types.h" +#include "common/common.h" #include /* @@ -90,7 +90,7 @@ class ScalarConverterMultiply { public: static scalar_64_t pre(double value) { - return value * MULTIPLY; + return static_cast(value * MULTIPLY); } static double post(scalar_64_t value) { @@ -98,7 +98,7 @@ class ScalarConverterMultiply { } static scalar_32_t pre(float value) { - return value * MULTIPLY; + return static_cast(value * MULTIPLY); } static float post(scalar_32_t value) { @@ -126,7 +126,9 @@ class ConverterBase { using KeyExternal = KEY_EXTERNAL; using KeyInternal = PhPoint; using QueryBoxExternal = QUERY_POINT_EXTERNAL; - using QueryBoxInternal = PhBox; + using QueryBoxInternal = PhBox; + using QueryPointExternal = PhPoint; + using QueryPointInternal = PhPoint; }; /* @@ -174,6 +176,8 @@ template < typename CONVERT = ScalarConverterIEEE> class SimplePointConverter : public ConverterPointBase { using BASE = ConverterPointBase; + + public: using Point = typename BASE::KeyExternal; using PointInternal = typename BASE::KeyInternal; using QueryBox = typename BASE::QueryBoxExternal; @@ -215,9 +219,14 @@ template < typename CONVERT = ScalarConverterIEEE> class SimpleBoxConverter : public ConverterBoxBase { using BASE = ConverterBoxBase; + + public: using Box = typename BASE::KeyExternal; using PointInternal = typename BASE::KeyInternal; using QueryBox = typename BASE::QueryBoxExternal; + using QueryBoxInternal = typename BASE::QueryBoxInternal; + using QueryPoint = typename BASE::QueryPointExternal; + using QueryPointInternal = typename BASE::QueryPointInternal; static_assert(std::is_same>::value); static_assert(std::is_same>::value); @@ -243,7 +252,7 @@ class SimpleBoxConverter : public ConverterBoxBase out; + QueryBoxInternal out; auto& min = out.min(); auto& max = out.max(); for (dimension_t i = 0; i < DIM; ++i) { @@ -253,6 +262,22 @@ class SimpleBoxConverter : public ConverterBoxBase #include #include diff --git a/include/phtree/filter.h b/include/phtree/filter.h new file mode 100644 index 00000000..5e57a3dd --- /dev/null +++ b/include/phtree/filter.h @@ -0,0 +1,419 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_FILTERS_H +#define PHTREE_COMMON_FILTERS_H + +#include "converter.h" +#include "distance.h" +#include +#include +#include +#include +#include + +namespace improbable::phtree { + +/* + * Any iterator that has a filter defined will traverse nodes or return values if and only if the + * filter function returns 'true'. The filter functions are called for every node and every entry + * (note: internally, nodes are also stored in entries, but these entries will be passed to the + * filter for nodes) that the iterator encounters. By implication, it will never call the filter + * function for nodes of entries if their respective parent node has already been rejected. + * + * There are separate filter functions for nodes and for key/value entries. + * + * Every filter needs to provide two functions: + * - bool IsEntryValid(const PhPoint& key, const T& value); + * This function is called for every key/value pair that the query encounters. The function + * should return 'true' iff the key/value should be added to the query result. + * The parameters are the key and value of the key/value pair. + * NOTE: WHen using a MultiMap, 'T' becomes the type of the 'bucket', i.e. the type of the + * container that holds multiple entries for a given coordinate. + * - bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore); + * This function is called for every node that the query encounters. The function should + * return 'true' if the node should be traversed and searched for potential results. + * The parameters are the prefix of the node and the number of least significant bits of the + * prefix that can (and should) be ignored. The bits of the prefix that should be ignored can + * have any value. + * + * - bool IsBucketEntryValid(const KeyT& key, const ValueT& value); + * This is only used/required for MultiMaps, implementations for a normal PhTree are ignored. + * In case of a MultiMap, this method is called for every entry in a bucket (see above). + */ + +/* + * The no-op filter is the default filter for the PH-Tree. It always returns 'true'. + */ +struct FilterNoOp { + /* + * @param key The key/coordinate of the entry. + * @param value The value of the entry. For MultiMaps, this is a container of values. + * @returns This default implementation always returns `true`. + */ + template + constexpr bool IsEntryValid(const KeyT& /*key*/, const ValueT& /*value*/) const noexcept { + return true; + } + + /* + * @param prefix The prefix of node. Any coordinate in the nodes shares this prefix. + * @param bits_to_ignore The number of bits of the prefix that should be ignored because they + * are NOT the same for all coordinates in the node. For example, assuming 64bit values, if the + * node represents coordinates that all share the first 10 bits of the prefix, then the value of + * bits_to_ignore is 64-10=54. + * @returns This default implementation always returns `true`. + */ + template + constexpr bool IsNodeValid(const KeyT& /*prefix*/, int /*bits_to_ignore*/) const noexcept { + return true; + } + + /* + * This is checked once for every entry in a bucket. The method is called once a call to + * 'IsEntryValid` for the same bucket has returned 'true'. A typical implementation + * simply returns `true` or checks some values of the entry. + * @param key The key/coordinate of the bucket entry. + * @param value The value of the entry. + * @returns This default implementation always returns `true`. + */ + template + constexpr bool IsBucketEntryValid(const KeyT& /*key*/, const ValueT& /*value*/) const noexcept { + return true; + } +}; + +/* + * The AABB filter can be used to query a point tree for an axis aligned bounding box (AABB). + * The result is equivalent to that of the 'begin_query(...)' function. + */ +template +class FilterAABB { + using KeyExternal = typename CONVERTER::KeyExternal; + using KeyInternal = typename CONVERTER::KeyInternal; + using ScalarInternal = typename CONVERTER::ScalarInternal; + static constexpr auto DIM = CONVERTER::DimInternal; + + public: + FilterAABB( + const KeyExternal& min_include, const KeyExternal& max_include, const CONVERTER& converter) + : min_external_{min_include} + , max_external_{max_include} + , min_internal_{converter.pre(min_include)} + , max_internal_{converter.pre(max_include)} + , converter_{converter} {}; + + /* + * This function allows resizing/shifting the AABB while iterating over the tree. + */ + void set(const KeyExternal& min_include, const KeyExternal& max_include) { + min_external_ = min_include; + max_external_ = max_include; + min_internal_ = converter_.get().pre(min_include); + max_internal_ = converter_.get().pre(max_include); + } + + template + [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T& /*value*/) const { + auto point = converter_.get().post(key); + for (dimension_t i = 0; i < DIM; ++i) { + if (point[i] < min_external_[i] || point[i] > max_external_[i]) { + return false; + } + } + return true; + } + + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { + // Let's assume that we always want to traverse the root node (bits_to_ignore == 64) + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + return true; + } + ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; + ScalarInternal node_max_bits = ~node_min_bits; + + for (dimension_t i = 0; i < DIM; ++i) { + if ((prefix[i] | node_max_bits) < min_internal_[i] || + (prefix[i] & node_min_bits) > max_internal_[i]) { + return false; + } + } + return true; + } + + private: + KeyExternal min_external_; + KeyExternal max_external_; + KeyInternal min_internal_; + KeyInternal max_internal_; + std::reference_wrapper converter_; +}; + +/* + * The sphere filter can be used to query a point tree for a sphere. + */ +template +class FilterSphere { + using KeyExternal = typename CONVERTER::KeyExternal; + using KeyInternal = typename CONVERTER::KeyInternal; + using ScalarInternal = typename CONVERTER::ScalarInternal; + static constexpr auto DIM = CONVERTER::DimInternal; + + public: + template > + FilterSphere( + const KeyExternal& center, + const double radius, + const CONVERTER& converter, + DIST&& distance_function = DIST()) + : center_external_{center} + , center_internal_{converter.pre(center)} + , radius_{radius} + , converter_{converter} + , distance_function_(std::forward(distance_function)){}; + + template + [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T&) const { + KeyExternal point = converter_.get().post(key); + return distance_function_(center_external_, point) <= radius_; + } + + /* + * Calculate whether AABB encompassing all possible points in the node intersects with the + * sphere. + */ + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { + // we always want to traverse the root node (bits_to_ignore == 64) + + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + return true; + } + + ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; + ScalarInternal node_max_bits = ~node_min_bits; + + KeyInternal closest_in_bounds; + for (dimension_t i = 0; i < DIM; ++i) { + // calculate lower and upper bound for dimension for given node + ScalarInternal lo = prefix[i] & node_min_bits; + ScalarInternal hi = prefix[i] | node_max_bits; + + // choose value closest to center for dimension + closest_in_bounds[i] = std::clamp(center_internal_[i], lo, hi); + } + + KeyExternal closest_point = converter_.get().post(closest_in_bounds); + return distance_function_(center_external_, closest_point) <= radius_; + } + + private: + KeyExternal center_external_; + KeyInternal center_internal_; + double radius_; + std::reference_wrapper converter_; + DISTANCE distance_function_; +}; +// deduction guide +template < + typename CONV, + typename DIST = DistanceEuclidean, + typename P = typename CONV::KeyExternal> +FilterSphere(const P&, double, const CONV&, DIST&& fn = DIST()) -> FilterSphere; + +/* + * AABB filter for box keys. + * It detects all boxes that overlap partially or fully with the query box. + */ +template +class FilterBoxAABB { + using KeyInternal = typename CONVERTER::KeyInternal; + using ScalarInternal = typename CONVERTER::ScalarInternal; + using QueryPoint = typename CONVERTER::QueryPointExternal; + using QueryPointInternal = typename CONVERTER::QueryPointInternal; + static constexpr auto DIM = CONVERTER::DimExternal; + + public: + FilterBoxAABB( + const QueryPoint& min_include, const QueryPoint& max_include, const CONVERTER& converter) + : min_internal_{converter.pre_query(min_include)} + , max_internal_{converter.pre_query(max_include)} + , converter_{converter} {}; + + /* + * This function allows resizing/shifting the AABB while iterating over the tree. + */ + void set(const QueryPoint& min_include, const QueryPoint& max_include) { + min_internal_ = converter_.get().pre_query(min_include); + max_internal_ = converter_.get().pre_query(max_include); + } + + template + [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T& /*value*/) const { + for (dimension_t i = 0; i < DIM; ++i) { + if (key[i + DIM] < min_internal_[i] || key[i] > max_internal_[i]) { + return false; + } + } + return true; + } + + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { + // Let's assume that we always want to traverse the root node (bits_to_ignore == 64) + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + return true; + } + ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; + ScalarInternal node_max_bits = ~node_min_bits; + + for (dimension_t i = 0; i < DIM; ++i) { + if ((prefix[i] | node_max_bits) < min_internal_[i] || + (prefix[i + DIM] & node_min_bits) > max_internal_[i]) { + return false; + } + } + return true; + } + + private: + QueryPointInternal min_internal_; + QueryPointInternal max_internal_; + std::reference_wrapper converter_; +}; + +/* + * The box sphere filter can be used to query a PH-Tree for boxes that intersect with a sphere. + */ +template +class FilterBoxSphere { + using KeyInternal = typename CONVERTER::KeyInternal; + using ScalarInternal = typename CONVERTER::ScalarInternal; + using QueryPoint = typename CONVERTER::QueryPointExternal; + using QueryPointInternal = typename CONVERTER::QueryPointInternal; + static constexpr auto DIM = CONVERTER::DimExternal; + + public: + template > + FilterBoxSphere( + const QueryPoint& center, + const double radius, + const CONVERTER& converter, + DIST&& distance_function = DIST()) + : center_external_{center} + , center_internal_{converter.pre_query(center)} + , radius_{radius} + , converter_{converter} + , distance_function_(std::forward(distance_function)){}; + + template + [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T&) const { + QueryPointInternal closest_in_bounds; + for (dimension_t i = 0; i < DIM; ++i) { + // choose value closest to center for each dimension + closest_in_bounds[i] = std::clamp(center_internal_[i], key[i], key[i + DIM]); + } + QueryPoint closest_point = converter_.get().post_query(closest_in_bounds); + return distance_function_(center_external_, closest_point) <= radius_; + } + + /* + * Calculate whether AABB of all possible points in the node intersects with the sphere. + */ + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { + // we always want to traverse the root node (bits_to_ignore == 64) + + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + return true; + } + + ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; + ScalarInternal node_max_bits = ~node_min_bits; + + QueryPointInternal closest_in_bounds; + for (dimension_t i = 0; i < DIM; ++i) { + // calculate lower and upper bound for dimension for given node + ScalarInternal lo = prefix[i] & node_min_bits; + ScalarInternal hi = prefix[i + DIM] | node_max_bits; + + // choose value closest to center for dimension + closest_in_bounds[i] = std::clamp(center_internal_[i], lo, hi); + } + + QueryPoint closest_point = converter_.get().post_query(closest_in_bounds); + return distance_function_(center_external_, closest_point) <= radius_; + } + + private: + QueryPoint center_external_; + QueryPointInternal center_internal_; + double radius_; + std::reference_wrapper converter_; + DISTANCE distance_function_; +}; +// deduction guide +template < + typename CONV, + typename DIST = DistanceEuclidean, + typename P = typename CONV::KeyExternal> +FilterBoxSphere(const P&, double, const CONV&, DIST&& fn = DIST()) -> FilterBoxSphere; + +/* + * AABB filter for MultiMaps. + */ +template +class FilterMultiMapAABB : public FilterAABB { + using Key = typename CONVERTER::KeyExternal; + using KeyInternal = typename CONVERTER::KeyInternal; + + public: + FilterMultiMapAABB(const Key& min_include, const Key& max_include, CONVERTER& converter) + : FilterAABB(min_include, max_include, converter){}; + + template + [[nodiscard]] inline bool IsBucketEntryValid(const KeyInternal&, const ValueT&) const noexcept { + return true; + } +}; + +/* + * Sphere filter for MultiMaps. + */ +template +class FilterMultiMapSphere : public FilterSphere { + using Key = typename CONVERTER::KeyExternal; + using KeyInternal = typename CONVERTER::KeyInternal; + + public: + template > + FilterMultiMapSphere( + const Key& center, double radius, const CONVERTER& converter, DIST&& dist_fn = DIST()) + : FilterSphere(center, radius, converter, std::forward(dist_fn)){}; + + template + [[nodiscard]] inline bool IsBucketEntryValid(const KeyInternal&, const ValueT&) const noexcept { + return true; + } +}; +// deduction guide +template < + typename CONV, + typename DIST = DistanceEuclidean, + typename P = typename CONV::KeyExternal> +FilterMultiMapSphere(const P&, double, const CONV&, DIST&& fn = DIST()) + -> FilterMultiMapSphere; + +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_FILTERS_H diff --git a/phtree/phtree.h b/include/phtree/phtree.h similarity index 74% rename from phtree/phtree.h rename to include/phtree/phtree.h index 54dfd2dd..b7d30695 100644 --- a/phtree/phtree.h +++ b/include/phtree/phtree.h @@ -32,8 +32,6 @@ namespace improbable::phtree { template > class PhTree { friend PhTreeDebugHelper; - using KeyInternal = typename CONVERTER::KeyInternal; - using QueryBox = typename CONVERTER::QueryBoxExternal; using Key = typename CONVERTER::KeyExternal; static constexpr dimension_t DimInternal = CONVERTER::DimInternal; @@ -42,7 +40,17 @@ class PhTree { typename std::conditional<(DIM == DimInternal), QueryPoint, QueryIntersect>::type; public: - explicit PhTree(CONVERTER converter = CONVERTER()) : tree_{converter}, converter_{converter} {} + // Unless specified otherwise this is just PhBox + using QueryBox = typename CONVERTER::QueryBoxExternal; + + template + explicit PhTree(CONV&& converter = CONV()) : tree_{&converter_}, converter_{converter} {} + + PhTree(const PhTree& other) = delete; + PhTree& operator=(const PhTree& other) = delete; + PhTree(PhTree&& other) noexcept = default; + PhTree& operator=(PhTree&& other) noexcept = default; + ~PhTree() noexcept = default; /* * Attempts to build and insert a key and a value into the tree. @@ -60,7 +68,7 @@ class PhTree { */ template std::pair emplace(const Key& key, Args&&... args) { - return tree_.emplace(converter_.pre(key), std::forward(args)...); + return tree_.try_emplace(converter_.pre(key), std::forward(args)...); } /* @@ -80,7 +88,7 @@ class PhTree { */ template std::pair emplace_hint(const ITERATOR& iterator, const Key& key, Args&&... args) { - return tree_.emplace_hint(iterator, converter_.pre(key), std::forward(args)...); + return tree_.try_emplace(iterator, converter_.pre(key), std::forward(args)...); } /* @@ -93,6 +101,22 @@ class PhTree { return tree_.insert(converter_.pre(key), value); } + /* + * See emplace(). + */ + template + std::pair try_emplace(const Key& key, Args&&... args) { + return tree_.try_emplace(converter_.pre(key), std::forward(args)...); + } + + /* + * See emplace_hint(). + */ + template + std::pair try_emplace(const ITERATOR& iterator, const Key& key, Args&&... args) { + return tree_.try_emplace(iterator, converter_.pre(key), std::forward(args)...); + } + /* * @return the value stored at position 'key'. If no such value exists, one is added to the tree * and returned. @@ -147,6 +171,44 @@ class PhTree { return tree_.erase(iterator); } + /* + * This function attempts to remove a 'value' from 'old_key' and reinsert it for 'new_key'. + * + * The function will report _success_ in the following cases: + * - the value was removed from the old position and reinserted at the new position + * - the position and new position refer to the same bucket. + * + * The function will report _failure_ in the following cases: + * - The value was already present in the new position + * - The value was not present in the old position + * + * This method will _not_ remove the value from the old position if it is already present at the + * new position. + * + * @param old_key The old position + * @param new_key The new position + * @return '1' if the 'value' was moved, otherwise '0'. + */ + auto relocate(const Key& old_key, const Key& new_key) { + return tree_.relocate_if( + converter_.pre(old_key), converter_.pre(new_key), [](const T&) { return true; }); + } + + /* + * Relocate (move) an entry from one position to another, subject to a predicate. + * + * @param old_key The old position + * @param new_key The new position + * @param predicate The predicate is called for every value before it is relocated. + * If the predicate returns 'false', the relocation is aborted. + * @return '1' if the 'value' was moved, otherwise '0'. + */ + template + auto relocate_if(const Key& old_key, const Key& new_key, PRED&& predicate) { + return tree_.relocate_if( + converter_.pre(old_key), converter_.pre(new_key), std::forward(predicate)); + } + /* * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter @@ -158,9 +220,9 @@ class PhTree { * sub-nodes before they are returned or traversed. Any filter function must follow the * signature of the default 'FilterNoOp`. */ - template - void for_each(CALLBACK_FN& callback, FILTER filter = FILTER()) const { - tree_.for_each(callback, filter); + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) const { + tree_.for_each(std::forward(callback), std::forward(filter)); } /* @@ -175,15 +237,18 @@ class PhTree { * signature of the default 'FilterNoOp`. */ template < - typename CALLBACK_FN, + typename CALLBACK, typename FILTER = FilterNoOp, typename QUERY_TYPE = DEFAULT_QUERY_TYPE> void for_each( QueryBox query_box, - CALLBACK_FN& callback, - FILTER filter = FILTER(), + CALLBACK&& callback, + FILTER&& filter = FILTER(), QUERY_TYPE query_type = QUERY_TYPE()) const { - tree_.for_each(query_type(converter_.pre_query(query_box)), callback, filter); + tree_.for_each( + query_type(converter_.pre_query(query_box)), + std::forward(callback), + std::forward(filter)); } /* @@ -194,8 +259,8 @@ class PhTree { * @return an iterator over all (filtered) entries in the tree, */ template - auto begin(FILTER filter = FILTER()) const { - return tree_.begin(filter); + auto begin(FILTER&& filter = FILTER()) const { + return tree_.begin(std::forward(filter)); } /* @@ -211,9 +276,10 @@ class PhTree { template auto begin_query( const QueryBox& query_box, - FILTER filter = FILTER(), + FILTER&& filter = FILTER(), QUERY_TYPE query_type = DEFAULT_QUERY_TYPE()) const { - return tree_.begin_query(query_type(converter_.pre_query(query_box)), filter); + return tree_.begin_query( + query_type(converter_.pre_query(query_box)), std::forward(filter)); } /* @@ -238,18 +304,21 @@ class PhTree { auto begin_knn_query( size_t min_results, const Key& center, - DISTANCE distance_function = DISTANCE(), - FILTER filter = FILTER()) const { + DISTANCE&& distance_function = DISTANCE(), + FILTER&& filter = FILTER()) const { // We use pre() instead of pre_query() here because, strictly speaking, we want to // find the nearest neighbors of a (fictional) key, which may as well be a box. return tree_.begin_knn_query( - min_results, converter_.pre(center), distance_function, filter); + min_results, + converter_.pre(center), + std::forward(distance_function), + std::forward(filter)); } /* * @return An iterator representing the tree's 'end'. */ - const auto& end() const { + auto end() const { return tree_.end(); } @@ -287,6 +356,14 @@ class PhTree { return tree_; } + void CheckConsistencyExternal() const { + [[maybe_unused]] size_t n = 0; + for ([[maybe_unused]] const auto& entry : tree_) { + ++n; + } + assert(n == size()); + } + v16::PhTreeV16 tree_; CONVERTER converter_; }; diff --git a/phtree/phtree_multimap.h b/include/phtree/phtree_multimap.h similarity index 59% rename from phtree/phtree_multimap.h rename to include/phtree/phtree_multimap.h index 75540f9f..09447b99 100644 --- a/phtree/phtree_multimap.h +++ b/include/phtree/phtree_multimap.h @@ -17,6 +17,7 @@ #ifndef PHTREE_PHTREE_MULTIMAP_H #define PHTREE_PHTREE_MULTIMAP_H +#include "common/b_plus_tree_hash_map.h" #include "common/common.h" #include "v16/phtree_v16.h" #include @@ -56,8 +57,11 @@ class IteratorBase { friend PHTREE; using T = typename PHTREE::ValueType; + protected: + using BucketIterType = typename PHTREE::BucketIterType; + public: - explicit IteratorBase() noexcept : current_value_ptr_{nullptr}, is_finished_{false} {} + explicit IteratorBase() noexcept : current_value_ptr_{nullptr} {} T& operator*() const noexcept { assert(current_value_ptr_); @@ -71,26 +75,16 @@ class IteratorBase { friend bool operator==( const IteratorBase& left, const IteratorBase& right) noexcept { - // Note: The following compares pointers to Entry objects (actually: their values T) - // so it should be _fast_ and return 'true' only for identical entries. - static_assert(std::is_pointer_v); - return (left.is_finished_ && right.Finished()) || - (!left.is_finished_ && !right.Finished() && - left.current_value_ptr_ == right.current_value_ptr_); + return left.current_value_ptr_ == right.current_value_ptr_; } friend bool operator!=( const IteratorBase& left, const IteratorBase& right) noexcept { - return !(left == right); + return left.current_value_ptr_ != right.current_value_ptr_; } protected: - [[nodiscard]] bool Finished() const noexcept { - return is_finished_; - } - void SetFinished() noexcept { - is_finished_ = true; current_value_ptr_ = nullptr; } @@ -100,41 +94,21 @@ class IteratorBase { private: const T* current_value_ptr_; - bool is_finished_; }; -template +template class IteratorNormal : public IteratorBase { friend PHTREE; - using BucketIterType = typename PHTREE::BucketIterType; - using PhTreeIterEndType = typename PHTREE::EndType; + using BucketIterType = typename IteratorBase::BucketIterType; public: - explicit IteratorNormal(const PhTreeIterEndType& iter_ph_end) noexcept - : IteratorBase() - , iter_ph_end_{iter_ph_end} - , iter_ph_{iter_ph_end} - , iter_bucket_{} - , filter_{} { - this->SetFinished(); - } + explicit IteratorNormal() noexcept : IteratorBase(), iter_ph_{}, iter_bucket_{} {} - // Why are we passing two iterators by reference + std::move? - // See: https://abseil.io/tips/117 - IteratorNormal( - const PhTreeIterEndType& iter_ph_end, - ITERATOR_PH iter_ph, - BucketIterType iter_bucket, - const FILTER filter = FILTER()) noexcept + template + IteratorNormal(ITER_PH&& iter_ph, BucketIterType&& iter_bucket) noexcept : IteratorBase() - , iter_ph_end_{iter_ph_end} - , iter_ph_{std::move(iter_ph)} - , iter_bucket_{std::move(iter_bucket)} - , filter_{filter} { - if (iter_ph == iter_ph_end) { - this->SetFinished(); - return; - } + , iter_ph_{std::forward(iter_ph)} + , iter_bucket_{std::forward(iter_bucket)} { FindNextElement(); } @@ -168,17 +142,18 @@ class IteratorNormal : public IteratorBase { private: void FindNextElement() { - while (iter_ph_ != iter_ph_end_) { + while (!iter_ph_.IsEnd()) { while (iter_bucket_ != iter_ph_->end()) { // We filter only entries here, nodes are filtered elsewhere - if (filter_.IsEntryValid(iter_ph_.GetCurrentResult()->GetKey(), *iter_bucket_)) { + if (iter_ph_.__Filter().IsBucketEntryValid( + iter_ph_.GetEntry()->GetKey(), *iter_bucket_)) { this->SetCurrentValue(&(*iter_bucket_)); return; } ++iter_bucket_; } ++iter_ph_; - if (iter_ph_ != iter_ph_end_) { + if (!iter_ph_.IsEnd()) { iter_bucket_ = iter_ph_->begin(); } } @@ -186,24 +161,17 @@ class IteratorNormal : public IteratorBase { this->SetFinished(); } - PhTreeIterEndType& iter_ph_end_; ITERATOR_PH iter_ph_; BucketIterType iter_bucket_; - FILTER filter_; }; -template -class IteratorKnn : public IteratorNormal { - using BucketIterType = typename PHTREE::BucketIterType; - using PhTreeIterEndType = typename PHTREE::EndType; - +template +class IteratorKnn : public IteratorNormal { public: - IteratorKnn( - const PhTreeIterEndType& iter_ph_end, - const ITERATOR_PH iter_ph, - BucketIterType iter_bucket, - const FILTER filter) noexcept - : IteratorNormal(iter_ph_end, iter_ph, iter_bucket, filter) {} + template + IteratorKnn(ITER_PH&& iter_ph, BucketIterType&& iter_bucket) noexcept + : IteratorNormal( + std::forward(iter_ph), std::forward(iter_bucket)) {} [[nodiscard]] double distance() const noexcept { return this->GetIteratorOfPhTree().distance(); @@ -219,24 +187,32 @@ template < dimension_t DIM, typename T, typename CONVERTER = ConverterNoOp, - typename BUCKET = std::unordered_set, + typename BUCKET = b_plus_tree_hash_set, bool POINT_KEYS = true, typename DEFAULT_QUERY_TYPE = QueryPoint> class PhTreeMultiMap { - friend PhTreeDebugHelper; using KeyInternal = typename CONVERTER::KeyInternal; - using QueryBox = typename CONVERTER::QueryBoxExternal; using Key = typename CONVERTER::KeyExternal; static constexpr dimension_t DimInternal = CONVERTER::DimInternal; using PHTREE = PhTreeMultiMap; - - public: using ValueType = T; using BucketIterType = decltype(std::declval().begin()); - using EndType = decltype(std::declval>().end()); + using EndType = decltype(std::declval>().end()); + + friend PhTreeDebugHelper; + friend IteratorBase; + + public: + using QueryBox = typename CONVERTER::QueryBoxExternal; explicit PhTreeMultiMap(CONVERTER converter = CONVERTER()) - : tree_{converter}, converter_{converter}, size_{0} {} + : tree_{&converter_}, converter_{converter}, size_{0} {} + + PhTreeMultiMap(const PhTreeMultiMap& other) = delete; + PhTreeMultiMap& operator=(const PhTreeMultiMap& other) = delete; + PhTreeMultiMap(PhTreeMultiMap&& other) noexcept = default; + PhTreeMultiMap& operator=(PhTreeMultiMap&& other) noexcept = default; + ~PhTreeMultiMap() noexcept = default; /* * Attempts to build and insert a key and a value into the tree. @@ -254,7 +230,7 @@ class PhTreeMultiMap { */ template std::pair emplace(const Key& key, Args&&... args) { - auto& outer_iter = tree_.emplace(converter_.pre(key)).first; + auto& outer_iter = tree_.try_emplace(converter_.pre(key)).first; auto bucket_iter = outer_iter.emplace(std::forward(args)...); size_ += bucket_iter.second ? 1 : 0; return {const_cast(*bucket_iter.first), bucket_iter.second}; @@ -269,7 +245,7 @@ class PhTreeMultiMap { * to erase() and if no other modifications occurred. * The following is valid: * - * // Move value from key1 to key2 + * // Move value from key1 to key2 (if you don't want to use relocate() ). * auto iter = tree.find(key1); * auto value = iter.second(); // The value may become invalid in erase() * erase(iter); @@ -277,7 +253,7 @@ class PhTreeMultiMap { */ template std::pair emplace_hint(const ITERATOR& iterator, const Key& key, Args&&... args) { - auto result_ph = tree_.emplace_hint(iterator.GetIteratorOfPhTree(), converter_.pre(key)); + auto result_ph = tree_.try_emplace(iterator.GetIteratorOfPhTree(), converter_.pre(key)); auto& bucket = result_ph.first; if (result_ph.second) { // new bucket @@ -306,6 +282,22 @@ class PhTreeMultiMap { return emplace(key, value); } + /* + * See emplace(). + */ + template + std::pair try_emplace(const Key& key, Args&&... args) { + return emplace(key, std::forward(args)...); + } + + /* + * See emplace_hint(). + */ + template + std::pair try_emplace(const ITERATOR& iterator, const Key& key, Args&&... args) { + return emplace_hint(iterator, key, std::forward(args)...); + } + /* * @return '1', if a value is associated with the provided key, otherwise '0'. */ @@ -337,16 +329,11 @@ class PhTreeMultiMap { * See std::unordered_multimap::find(). * * @param key the key to look up - * @return an iterator that points either to the the first value associated with the key or + * @return an iterator that points either to the first value associated with the key or * to {@code end()} if no value was found */ auto find(const Key& key) const { - auto outer_iter = tree_.find(converter_.pre(key)); - if (outer_iter == tree_.end()) { - return CreateIterator(tree_.end(), bucket_dummy_end_); - } - auto bucket_iter = outer_iter.second().begin(); - return CreateIterator(outer_iter, bucket_iter); + return CreateIterator(tree_.find(converter_.pre(key))); } /* @@ -358,12 +345,7 @@ class PhTreeMultiMap { * or to {@code end()} if the key/value pair was found */ auto find(const Key& key, const T& value) const { - auto outer_iter = tree_.find(converter_.pre(key)); - if (outer_iter == tree_.end()) { - return CreateIterator(tree_.end(), bucket_dummy_end_); - } - auto bucket_iter = outer_iter.second().find(value); - return CreateIterator(outer_iter, bucket_iter); + return CreateIteratorFind(tree_.find(converter_.pre(key)), value); } /* @@ -388,7 +370,7 @@ class PhTreeMultiMap { /* * See std::map::erase(). Removes any entry located at the provided iterator. * - * This function uses the iterator to directly erase the entry so it is usually faster than + * This function uses the iterator to directly erase the entry, so it is usually faster than * erase(key, value). * * @return '1' if a value was found, otherwise '0'. @@ -416,66 +398,145 @@ class PhTreeMultiMap { /* * This function attempts to remove the 'value' from 'old_key' and reinsert it for 'new_key'. * - * The relocate will report _success_ in the following cases: + * The relocate function will report _success_ in the following cases: * - the value was removed from the old position and reinserted at the new position - * - the position and new position refer to the same bucket. + * - the old position and new position are identical. * - * The relocate will report_failure_ in the following cases: + * The relocate function will report _failure_ in the following cases: * - The value was already present in the new position * - The value was not present in the old position * - * This method will _always_ attempt to insert the value at the new position even if the value - * was not found at the old position. - * This method will _not_ remove the value from the old position if it is already present at the - * new position. + * In case of _failure_, this function guarantees that the tree remains unchanged + * or is returned to its original state (i.e. before the function was called). * * @param old_key The old position * @param new_key The new position - * @param always_erase Setting this flag to 'true' ensures that the value is removed from - * the old position even if it is already present at the new position. This may double the - * execution cost of this method. The default is 'false'. + * @param value The value that needs to be relocated. The relocate() method used the value's + * '==' operator to identify the entry that should be moved. + * @param count_equals This setting toggles whether a relocate() between two identical keys + * should be counted as 'success' and return '1'. The function may still return '0' + * in case the keys are not in the index. + * Background: the intuitively correct behavior is to return '1' for identical + * (exising) keys. However, avoiding this check can considerably speed up + * relocate() calls, especially when using a ConverterMultiply. + * * @return '1' if a value was found and reinserted, otherwise '0'. */ - size_t relocate( - const Key& old_key, const Key& new_key, const T& value, bool always_erase = false) { - // Be smart: insert first, if the target-map already contains the entry we can avoid erase() - auto new_key_pre = converter_.pre(new_key); - auto& new_bucket = tree_.emplace(new_key_pre).first; - auto new_result = new_bucket.emplace(value); - if (!new_result.second) { - // Entry is already in correct place -> abort - // Return '1' if old/new refer to the same bucket, otherwise '0' - if (converter_.pre(old_key) == new_key_pre) { - return 1; - } - if (!always_erase) { - // Abort, unless we insist on erase() - return 0; + template + size_t relocate(const Key& old_key, const Key& new_key, T2&& value, bool count_equals = true) { + auto pair = tree_._find_or_create_two_mm( + converter_.pre(old_key), converter_.pre(new_key), count_equals); + auto& iter_old = pair.first; + auto& iter_new = pair.second; + + if (iter_old.IsEnd()) { + return 0; + } + auto iter_old_value = iter_old->find(value); + if (iter_old_value == iter_old->end()) { + if (iter_new->empty()) { + tree_.erase(iter_new); } + return 0; } - auto old_outer_iter = tree_.find(converter_.pre(old_key)); - if (old_outer_iter == tree_.end()) { - // No entry for old_key -> fail - return 0; + // Are we inserting in same node and same quadrant? Or are the keys equal? + if (iter_old == iter_new) { + assert(old_key == new_key); + return 1; } - auto old_bucket_iter = old_outer_iter->find(value); - if (old_bucket_iter == old_outer_iter->end()) { + assert(iter_old_value != iter_old->end()); + if (!iter_new->emplace(std::move(*iter_old_value)).second) { return 0; } - old_outer_iter->erase(old_bucket_iter); - // clean up - if (old_outer_iter->empty()) { - tree_.erase(old_outer_iter); + iter_old->erase(iter_old_value); + if (iter_old->empty()) { + [[maybe_unused]] auto found = tree_.erase(iter_old); + assert(found); } return 1; } + /* + * This function attempts to remove the 'value' from 'old_key' and reinsert it for 'new_key'. + * + * The relocate function will report _success_ in the following cases: + * - the value was removed from the old position and reinserted at the new position + * - the old position and new position are identical. + * + * The relocate function will report _failure_ in the following cases: + * - The value was already present in the new position + * - The value was not present in the old position + * + * In case of _failure_, this function guarantees that the tree remains unchanged + * or is returned to its original state (i.e. before the function was called). + * + * @param old_key The old position + * @param new_key The new position + * @param predicate The predicate that is used for every value at position old_key to evaluate + * whether it should be relocated to new_key. + * @param count_equals This setting toggles whether a relocate() between two identical keys + * should be counted as 'success' and return '1'. The function may still return '0' + * in case the keys are not in the index. + * Background: the intuitively correct behavior is to return '1' for identical + * (exising) keys. However, avoiding this check can considerably speed up + * relocate() calls, especially when using a ConverterMultiply. + * + * @return the number of values that were relocated. + */ + template + size_t relocate_if( + const Key& old_key, const Key& new_key, PREDICATE&& predicate, bool count_equals = true) { + auto pair = tree_._find_or_create_two_mm( + converter_.pre(old_key), converter_.pre(new_key), count_equals); + auto& iter_old = pair.first; + auto& iter_new = pair.second; + + if (iter_old.IsEnd()) { + assert(iter_new.IsEnd() || !iter_new->empty()); // Otherwise remove iter_new + return 0; + } + + // Are we inserting in same node and same quadrant? Or are the keys equal? + if (iter_old == iter_new) { + assert(old_key == new_key); + return 1; + } + + size_t n = 0; + auto it = iter_old->begin(); + while (it != iter_old->end()) { + if (predicate(*it) && iter_new->emplace(std::move(*it)).second) { + it = iter_old->erase(it); + ++n; + } else { + ++it; + } + } + + if (iter_old->empty()) { + [[maybe_unused]] auto found = tree_.erase(iter_old); + assert(found); + } else if (iter_new->empty()) { + [[maybe_unused]] auto found = tree_.erase(iter_new); + assert(found); + } + return n; + } + + /* + * Relocates all values from one coordinate to another. + * Returns an iterator pointing to the relocated data (or end(), if the relocation failed). + */ + auto relocate_all(const Key& old_key, const Key& new_key) { + return tree_.relocate(old_key, new_key); + } + /* * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes - * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * (=sub-trees) before returning / traversing them. By default, all entries are returned. Filter * functions must implement the same signature as the default 'FilterNoOp'. * * @param callback The callback function to be called for every entry that matches the filter. @@ -485,10 +546,12 @@ class PhTreeMultiMap { * follow the signature of the default 'FilterNoOp`. * The default 'FilterNoOp` filter matches all entries. */ - template - void for_each(CALLBACK_FN& callback, FILTER filter = FILTER()) const { - CallbackWrapper inner_callback{callback, filter, converter_}; - tree_.for_each(inner_callback, WrapFilter(filter)); + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) const { + tree_.for_each( + NoOpCallback{}, + WrapCallbackFilter{ + std::forward(callback), std::forward(filter), converter_}); } /* @@ -505,35 +568,30 @@ class PhTreeMultiMap { * The default 'FilterNoOp` filter matches all entries. */ template < - typename CALLBACK_FN, + typename CALLBACK, typename FILTER = FilterNoOp, typename QUERY_TYPE = DEFAULT_QUERY_TYPE> void for_each( QueryBox query_box, - CALLBACK_FN& callback, - const FILTER& filter = FILTER(), + CALLBACK&& callback, + FILTER&& filter = FILTER(), QUERY_TYPE query_type = QUERY_TYPE()) const { - CallbackWrapper inner_callback{callback, filter, converter_}; - tree_.for_each( - query_type(converter_.pre_query(query_box)), inner_callback, WrapFilter(filter)); + tree_.template for_each>( + query_type(converter_.pre_query(query_box)), + {}, + {std::forward(callback), std::forward(filter), converter_}); } /* * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes - * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * (=sub-trees) before returning / traversing them. By default, all entries are returned. Filter * functions must implement the same signature as the default 'FilterNoOp'. * * @return an iterator over all (filtered) entries in the tree, */ template - auto begin(FILTER filter = FILTER()) const { - auto outer_iter = tree_.begin(WrapFilter(filter)); - if (outer_iter == tree_.end()) { - return CreateIterator(outer_iter, bucket_dummy_end_, filter); - } - auto bucket_iter = outer_iter.second().begin(); - assert(bucket_iter != outer_iter.second().end()); - return CreateIterator(outer_iter, bucket_iter, filter); + auto begin(FILTER&& filter = FILTER()) const { + return CreateIterator(tree_.begin(std::forward(filter))); } /* @@ -549,16 +607,10 @@ class PhTreeMultiMap { template auto begin_query( const QueryBox& query_box, - FILTER filter = FILTER(), - QUERY_TYPE query_type = QUERY_TYPE()) const { - auto outer_iter = - tree_.begin_query(query_type(converter_.pre_query(query_box)), WrapFilter(filter)); - if (outer_iter == tree_.end()) { - return CreateIterator(outer_iter, bucket_dummy_end_, filter); - } - auto bucket_iter = outer_iter.second().begin(); - assert(bucket_iter != outer_iter.second().end()); - return CreateIterator(outer_iter, bucket_iter, filter); + FILTER&& filter = FILTER(), + QUERY_TYPE&& query_type = QUERY_TYPE()) const { + return CreateIterator(tree_.begin_query( + query_type(converter_.pre_query(query_box)), std::forward(filter))); } /* @@ -583,25 +635,22 @@ class PhTreeMultiMap { auto begin_knn_query( size_t min_results, const Key& center, - DISTANCE distance_function = DISTANCE(), - FILTER filter = FILTER()) const { + DISTANCE&& distance_function = DISTANCE(), + FILTER&& filter = FILTER()) const { // We use pre() instead of pre_query() here because, strictly speaking, we want to // find the nearest neighbors of a (fictional) key, which may as well be a box. - auto outer_iter = tree_.begin_knn_query( - min_results, converter_.pre(center), distance_function, WrapFilter(filter)); - if (outer_iter == tree_.end()) { - return CreateIteratorKnn(outer_iter, bucket_dummy_end_, filter); - } - auto bucket_iter = outer_iter.second().begin(); - assert(bucket_iter != outer_iter.second().end()); - return CreateIteratorKnn(outer_iter, bucket_iter, filter); + return CreateIteratorKnn(tree_.begin_knn_query( + min_results, + converter_.pre(center), + std::forward(distance_function), + std::forward(filter))); } /* * @return An iterator representing the tree's 'end'. */ - const auto& end() const { - return the_end_; + auto end() const { + return IteratorNormal{}; } /* @@ -639,64 +688,87 @@ class PhTreeMultiMap { return tree_; } - template - auto CreateIterator( - OUTER_ITER outer_iter, BucketIterType bucket_iter, FILTER filter = FILTER()) const { - return IteratorNormal( - tree_.end(), std::move(outer_iter), std::move(bucket_iter), filter); + void CheckConsistencyExternal() const { + size_t n = 0; + for (const auto& bucket : tree_) { + assert(!bucket.empty()); + n += bucket.size(); + } + assert(n == size_); + } + + template + auto CreateIteratorFind(OUTER_ITER&& outer_iter, const T& value) const { + auto bucket_iter = + outer_iter == tree_.end() ? BucketIterType{} : outer_iter.second().find(value); + return IteratorNormal( + std::forward(outer_iter), std::move(bucket_iter)); } - template - auto CreateIteratorKnn( - OUTER_ITER outer_iter, BucketIterType bucket_iter, FILTER filter = FILTER()) const { - return IteratorKnn( - tree_.end(), std::move(outer_iter), std::move(bucket_iter), filter); + template + auto CreateIterator(OUTER_ITER&& outer_iter) const { + auto bucket_iter = + outer_iter == tree_.end() ? BucketIterType{} : outer_iter.second().begin(); + return IteratorNormal( + std::forward(outer_iter), std::move(bucket_iter)); } - template - static auto WrapFilter(FILTER filter) { - // We always have two iterators, one that traverses the PH-Tree and one that traverses the - // bucket. Using the FilterWrapper we create a new Filter for the PH-Tree iterator. This new - // filter checks only if nodes are valid. It cannot check whether buckets are valid. - // The original filter is then used when we iterate over the entries of a bucket. At this - // point, we do not need to check IsNodeValid anymore for each entry (see `IteratorNormal`). - struct FilterWrapper { - [[nodiscard]] constexpr bool IsEntryValid(const KeyInternal&, const BUCKET&) const { - // This filter is checked in the Iterator. - return true; - } - [[nodiscard]] constexpr bool IsNodeValid( - const KeyInternal& prefix, int bits_to_ignore) const { - return filter_.IsNodeValid(prefix, bits_to_ignore); - } - FILTER filter_; - }; - return FilterWrapper{filter}; + template + auto CreateIteratorKnn(OUTER_ITER&& outer_iter) const { + auto bucket_iter = + outer_iter == tree_.end() ? BucketIterType{} : outer_iter.second().begin(); + return IteratorKnn( + std::forward(outer_iter), std::move(bucket_iter)); } - template - struct CallbackWrapper { + /* + * This wrapper wraps the Filter and Callback such that the callback is called for every + * entry in any bucket that matches the user defined IsEntryValid(). + */ + template + class WrapCallbackFilter { + public: /* - * The CallbackWrapper ensures that we call the callback on each entry of the bucket. - * The vanilla PH-Tree call it only on the bucket itself. + * We always have two iterators, one that traverses the PH-Tree and returns 'buckets', the + * other iterator traverses the returned buckets. + * The wrapper ensures that the callback is called for every entry in a bucket.. */ - void operator()(const Key& key, const BUCKET& bucket) const { - auto internal_key = converter_.pre(key); - for (auto& entry : bucket) { - if (filter_.IsEntryValid(internal_key, entry)) { - callback_(key, entry); + template + WrapCallbackFilter(CB&& callback, F&& filter, const CONVERTER& converter) + : callback_{std::forward(callback)} + , filter_{std::forward(filter)} + , converter_{converter} {} + + [[nodiscard]] inline bool IsEntryValid( + const KeyInternal& internal_key, const BUCKET& bucket) { + if (filter_.IsEntryValid(internal_key, bucket)) { + auto key = converter_.post(internal_key); + for (auto& entry : bucket) { + if (filter_.IsBucketEntryValid(internal_key, entry)) { + callback_(key, entry); + } } } + // Return false. We already called the callback. + return false; } - CALLBACK_FN& callback_; - const FILTER filter_; + + [[nodiscard]] inline bool IsNodeValid(const KeyInternal& prefix, int bits_to_ignore) { + return filter_.IsNodeValid(prefix, bits_to_ignore); + } + + private: + CALLBACK callback_; + FILTER filter_; const CONVERTER& converter_; }; + struct NoOpCallback { + constexpr void operator()(const Key&, const BUCKET&) const noexcept {} + }; + v16::PhTreeV16 tree_; CONVERTER converter_; - IteratorNormal the_end_{tree_.end()}; - BucketIterType bucket_dummy_end_; size_t size_; }; @@ -710,14 +782,14 @@ template < dimension_t DIM, typename T, typename CONVERTER = ConverterIEEE, - typename BUCKET = std::unordered_set> + typename BUCKET = b_plus_tree_hash_set> using PhTreeMultiMapD = PhTreeMultiMap; template < dimension_t DIM, typename T, typename CONVERTER_BOX, - typename BUCKET = std::unordered_set> + typename BUCKET = b_plus_tree_hash_set> using PhTreeMultiMapBox = PhTreeMultiMap; /** @@ -730,7 +802,7 @@ template < dimension_t DIM, typename T, typename CONVERTER_BOX = ConverterBoxIEEE, - typename BUCKET = std::unordered_set> + typename BUCKET = b_plus_tree_hash_set> using PhTreeMultiMapBoxD = PhTreeMultiMapBox; } // namespace improbable::phtree diff --git a/phtree/v16/BUILD b/include/phtree/v16/BUILD similarity index 86% rename from phtree/v16/BUILD rename to include/phtree/v16/BUILD index b44b14a1..f8bfe515 100644 --- a/phtree/v16/BUILD +++ b/include/phtree/v16/BUILD @@ -13,7 +13,7 @@ cc_library( "iterator_full.h", "iterator_hc.h", "iterator_knn_hs.h", - "iterator_simple.h", + "iterator_with_parent.h", "node.h", "phtree_v16.h", ], @@ -21,6 +21,6 @@ cc_library( "//visibility:public", ], deps = [ - "//phtree/common", + "//include/phtree/common", ], ) diff --git a/phtree/v16/debug_helper_v16.h b/include/phtree/v16/debug_helper_v16.h similarity index 66% rename from phtree/v16/debug_helper_v16.h rename to include/phtree/v16/debug_helper_v16.h index 85ef92d9..bb62942f 100644 --- a/phtree/v16/debug_helper_v16.h +++ b/include/phtree/v16/debug_helper_v16.h @@ -17,8 +17,8 @@ #ifndef PHTREE_V16_DEBUG_HELPER_H #define PHTREE_V16_DEBUG_HELPER_H -#include "../common/common.h" -#include "../common/debug_helper.h" +#include "phtree/common/common.h" +#include "phtree/common/debug_helper.h" #include "node.h" #include "phtree_v16.h" #include @@ -30,11 +30,10 @@ class PhTreeV16; template class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { - using KeyT = PhPoint; - using NodeT = Node; + using EntryT = Entry; public: - DebugHelperV16(const NodeT& root, size_t size) : root_{root}, size_{size} {} + DebugHelperV16(const EntryT& root, size_t size) : root_{root}, size_{size} {} /* * Depending on the detail parameter this returns: @@ -57,7 +56,7 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { ToStringPlain(os, root_); break; case Enum::tree: - ToStringTree(os, 0, root_, KeyT{}, true); + ToStringTree(os, 0, root_, MAX_BIT_WIDTH, true); break; } return os.str(); @@ -70,7 +69,7 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { */ [[nodiscard]] PhTreeStats GetStats() const override { PhTreeStats stats; - root_.GetStats(stats); + root_.GetNode().GetStats(stats, root_); return stats; } @@ -78,19 +77,19 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { * Checks the consistency of the tree. This function requires assertions to be enabled. */ void CheckConsistency() const override { - assert(size_ == root_.CheckConsistency()); + assert(size_ == root_.GetNode().CheckConsistency(root_)); } private: - void ToStringPlain(std::ostringstream& os, const NodeT& node) const { - for (auto& it : node.Entries()) { - const auto& o = it.second; + void ToStringPlain(std::ostringstream& os, const EntryT& entry) const { + for (auto& it : entry.GetNode().Entries()) { + const auto& child = it.second; // inner node? - if (o.IsNode()) { - ToStringPlain(os, o.GetNode()); + if (child.IsNode()) { + ToStringPlain(os, child); } else { - os << o.GetKey(); - os << " v=" << (o.IsValue() ? "T" : "null") << std::endl; + os << child.GetKey(); + os << " v=" << (child.IsValue() ? "T" : "null") << std::endl; } } } @@ -98,50 +97,53 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { void ToStringTree( std::ostringstream& sb, bit_width_t current_depth, - const NodeT& node, - const KeyT& prefix, + const EntryT& entry, + const bit_width_t parent_postfix_len, bool printValue) const { std::string ind = "*"; for (bit_width_t i = 0; i < current_depth; ++i) { ind += "-"; } - sb << ind << "il=" << node.GetInfixLen() << " pl=" << node.GetPostfixLen() - << " ec=" << node.GetEntryCount() << " inf=["; + const auto& node = entry.GetNode(); + const auto infix_len = entry.GetNodeInfixLen(parent_postfix_len); + const auto postfix_len = entry.GetNodePostfixLen(); + sb << ind << "il=" << infix_len << " pl=" << postfix_len << " ec=" << node.GetEntryCount() + << " inf=["; // for a leaf node, the existence of a sub just indicates that the value exists. - if (node.GetInfixLen() > 0) { - bit_mask_t mask = MAX_MASK << node.GetInfixLen(); + if (infix_len > 0) { + bit_mask_t mask = MAX_MASK << infix_len; mask = ~mask; - mask <<= node.GetPostfixLen() + 1; + mask <<= (std::uint64_t)postfix_len + 1; for (dimension_t i = 0; i < DIM; ++i) { - sb << ToBinary(prefix[i] & mask) << ","; + sb << ToBinary(entry.GetKey()[i] & mask) << ","; } } - current_depth += node.GetInfixLen(); + current_depth += infix_len; sb << "] " - << "Node___il=" << node.GetInfixLen() << ";pl=" << node.GetPostfixLen() + << "Node___il=" << infix_len << ";pl=" << postfix_len << ";size=" << node.Entries().size() << std::endl; // To clean previous postfixes. for (auto& it : node.Entries()) { - const auto& o = it.second; + const auto& child = it.second; hc_pos_t hcPos = it.first; - if (o.IsNode()) { + if (child.IsNode()) { sb << ind << "# " << hcPos << " Node: " << std::endl; - ToStringTree(sb, current_depth + 1, o.GetNode(), o.GetKey(), printValue); + ToStringTree(sb, current_depth + 1, child, postfix_len, printValue); } else { // post-fix - sb << ind << ToBinary(o.GetKey()); + sb << ind << ToBinary(child.GetKey()); sb << " hcPos=" << hcPos; if (printValue) { - sb << " v=" << (o.IsValue() ? "T" : "null"); + sb << " v=" << (child.IsValue() ? "T" : "null"); } sb << std::endl; } } } - const NodeT& root_; + const EntryT& root_; const size_t size_; }; } // namespace improbable::phtree::v16 diff --git a/include/phtree/v16/entry.h b/include/phtree/v16/entry.h new file mode 100644 index 00000000..6b2a2dbf --- /dev/null +++ b/include/phtree/v16/entry.h @@ -0,0 +1,243 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_ENTRY_H +#define PHTREE_V16_ENTRY_H + +#include "phtree/common/common.h" +#include "node.h" +#include +#include + +namespace improbable::phtree::v16 { + +template +class Node; + +/* + * Nodes in the PH-Tree contain up to 2^DIM Entries, one in each geometric quadrant. + * Entries can contain two types of data: + * - A key/value pair (value of type T) + * - A prefix/child-node pair, where prefix is the prefix of the child node and the + * child node is contained in a unique_ptr. + */ +template +class Entry { + using KeyT = PhPoint; + using ValueT = std::remove_const_t; + using NodeT = Node; + + enum { + VALUE = 0, + NODE = 1, + EMPTY = 2, + }; + + public: + /* + * Construct entry with existing node. + */ + Entry(const KeyT& k, std::unique_ptr&& node_ptr, bit_width_t postfix_len) noexcept + : kd_key_{k} + , node_{std::move(node_ptr)} + , union_type_{NODE} + , postfix_len_{static_cast(postfix_len)} {} + + /* + * Construct entry with existing T (T is not movable). + */ + template + Entry( + const KeyT& k, + ValueT2&& value, + typename std::enable_if_t, int>::type = 0) noexcept + : kd_key_{k}, value_(value), union_type_{VALUE}, postfix_len_{0} {} + + /* + * Construct entry with existing T (T must be movable). + */ + template + Entry( + const KeyT& k, + ValueT2&& value, + typename std::enable_if_t, int>::type = 0) noexcept + : kd_key_{k}, value_(std::forward(value)), union_type_{VALUE}, postfix_len_{0} {} + + /* + * Construct entry with new T or copied T (T is not movable). + */ + template < + typename ValueT2 = ValueT, + typename = std::enable_if_t>> + explicit Entry(const KeyT& k, const ValueT& value) noexcept + : kd_key_{k}, value_(value), union_type_{VALUE}, postfix_len_{0} {} + + /* + * Construct entry with new T or copied T (T is not movable, using T's default constructor). + */ + template < + typename ValueT2 = ValueT, + typename = std::enable_if_t>> + explicit Entry(const KeyT& k) noexcept + : kd_key_{k}, value_(), union_type_{VALUE}, postfix_len_{0} {} + + /* + * Construct entry with new T or moved T (T must be movable). + */ + template < + typename... Args, + typename ValueT2 = ValueT, + typename = std::enable_if_t>> + explicit Entry(const KeyT& k, Args&&... args) noexcept + : kd_key_{k}, value_(std::forward(args)...), union_type_{VALUE}, postfix_len_{0} {} + + Entry(const Entry& other) = delete; + Entry& operator=(const Entry& other) = delete; + + Entry(Entry&& other) noexcept + : kd_key_{std::move(other.kd_key_)}, union_type_{std::move(other.union_type_)} { + postfix_len_ = std::move(other.postfix_len_); + AssignUnion(std::move(other)); + } + + Entry& operator=(Entry&& other) noexcept { + kd_key_ = std::move(other.kd_key_); + postfix_len_ = std::move(other.postfix_len_); + DestroyUnion(); + AssignUnion(std::move(other)); + return *this; + } + + ~Entry() noexcept { + DestroyUnion(); + } + + [[nodiscard]] const KeyT& GetKey() const { + return kd_key_; + } + + [[nodiscard]] bool IsValue() const { + return union_type_ == VALUE; + } + + [[nodiscard]] bool IsNode() const { + return union_type_ == NODE; + } + + [[nodiscard]] T& GetValue() const { + assert(union_type_ == VALUE); + return const_cast(value_); + } + + [[nodiscard]] NodeT& GetNode() const { + assert(union_type_ == NODE); + return *node_; + } + + void SetKey(const KeyT& key) noexcept { + kd_key_ = key; + } + + void SetNode(std::unique_ptr&& node, bit_width_t postfix_len) noexcept { + postfix_len_ = static_cast(postfix_len); + DestroyUnion(); + union_type_ = NODE; + new (&node_) std::unique_ptr{std::move(node)}; + assert(!node); + } + + [[nodiscard]] bit_width_t GetNodePostfixLen() const noexcept { + assert(IsNode()); + return postfix_len_; + } + + [[nodiscard]] bit_width_t GetNodeInfixLen(bit_width_t parent_postfix_len) const noexcept { + assert(IsNode()); + return parent_postfix_len - GetNodePostfixLen() - 1; + } + + [[nodiscard]] bool HasNodeInfix(bit_width_t parent_postfix_len) const noexcept { + assert(IsNode()); + return parent_postfix_len - GetNodePostfixLen() - 1 > 0; + } + + [[nodiscard]] ValueT&& ExtractValue() noexcept { + assert(IsValue()); + return std::move(value_); + } + + [[nodiscard]] std::unique_ptr&& ExtractNode() noexcept { + assert(IsNode()); + // Moving the node somewhere else means we should remove it here: + union_type_ = EMPTY; + return std::move(node_); + } + + void ReplaceNodeWithDataFromEntry(Entry&& other) { + assert(IsNode()); + // 'other' may be referenced from the local node, so we need to do move(other) + // before destructing the local node. + auto node = std::move(node_); + union_type_ = EMPTY; + *this = std::move(other); + node.reset(); + } + + private: + void AssignUnion(Entry&& other) noexcept { + union_type_ = std::move(other.union_type_); + if (union_type_ == NODE) { + new (&node_) std::unique_ptr{std::move(other.node_)}; + } else if (union_type_ == VALUE) { + if constexpr (std::is_move_constructible_v) { + new (&value_) ValueT{std::move(other.value_)}; + } else { + new (&value_) ValueT{other.value_}; + } + } else { + assert(false && "Assigning from an EMPTY variant is a waste of time."); + } + } + + void DestroyUnion() noexcept { + if (union_type_ == VALUE) { + value_.~ValueT(); + } else if (union_type_ == NODE) { + node_.~unique_ptr(); + } else { + assert(union_type_ == EMPTY); + } + union_type_ = EMPTY; + } + + KeyT kd_key_; + union { + std::unique_ptr node_; + ValueT value_; + }; + std::uint16_t union_type_; + // The length (number of bits) of post fixes (the part of the coordinate that is 'below' the + // current node). If a variable prefix_len would refer to the number of bits in this node's + // prefix, and if we assume 64 bit values, the following would always hold: + // prefix_len + 1 + postfix_len = 64. + // The '+1' accounts for the 1 bit that is represented by the local node's hypercube, + // i.e. the same bit that is used to create the lookup keys in entries_. + std::uint16_t postfix_len_; +}; + +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_ENTRY_H diff --git a/phtree/v16/for_each.h b/include/phtree/v16/for_each.h similarity index 58% rename from phtree/v16/for_each.h rename to include/phtree/v16/for_each.h index aee3d157..e61e24fd 100644 --- a/phtree/v16/for_each.h +++ b/include/phtree/v16/for_each.h @@ -17,8 +17,8 @@ #ifndef PHTREE_V16_FOR_EACH_H #define PHTREE_V16_FOR_EACH_H -#include "../common/common.h" -#include "iterator_simple.h" +#include "phtree/common/common.h" +#include "iterator_with_parent.h" namespace improbable::phtree::v16 { @@ -26,47 +26,43 @@ namespace improbable::phtree::v16 { * Iterates over the whole tree. Entries and child nodes that are rejected by the Filter are not * traversed or returned. */ -template +template class ForEach { static constexpr dimension_t DIM = CONVERT::DimInternal; - using KeyExternal = typename CONVERT::KeyExternal; using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; using EntryT = Entry; - using NodeT = Node; public: - ForEach(const CONVERT& converter, CALLBACK_FN& callback, FILTER filter) - : converter_{converter}, callback_{callback}, filter_(std::move(filter)) {} + template + ForEach(const CONVERT* converter, CB&& callback, F&& filter) + : converter_{converter} + , callback_{std::forward(callback)} + , filter_(std::forward(filter)) {} - void run(const EntryT& root) { - assert(root.IsNode()); - TraverseNode(root.GetKey(), root.GetNode()); - } - - private: - void TraverseNode(const KeyInternal& key, const NodeT& node) { - auto iter = node.Entries().begin(); - auto end = node.Entries().end(); + void Traverse(const EntryT& entry) { + assert(entry.IsNode()); + auto& entries = entry.GetNode().Entries(); + auto iter = entries.begin(); + auto end = entries.end(); for (; iter != end; ++iter) { const auto& child = iter->second; const auto& child_key = child.GetKey(); if (child.IsNode()) { - const auto& child_node = child.GetNode(); - if (filter_.IsNodeValid(key, node.GetPostfixLen() + 1)) { - TraverseNode(child_key, child_node); + if (filter_.IsNodeValid(child_key, child.GetNodePostfixLen() + 1)) { + Traverse(child); } } else { T& value = child.GetValue(); - if (filter_.IsEntryValid(key, value)) { - callback_(converter_.post(child_key), value); + if (filter_.IsEntryValid(child_key, value)) { + callback_(converter_->post(child_key), value); } } } } - CONVERT converter_; - CALLBACK_FN& callback_; + const CONVERT* converter_; + CALLBACK callback_; FILTER filter_; }; } // namespace improbable::phtree::v16 diff --git a/phtree/v16/for_each_hc.h b/include/phtree/v16/for_each_hc.h similarity index 78% rename from phtree/v16/for_each_hc.h rename to include/phtree/v16/for_each_hc.h index d870debc..25883f17 100644 --- a/phtree/v16/for_each_hc.h +++ b/include/phtree/v16/for_each_hc.h @@ -17,8 +17,8 @@ #ifndef PHTREE_V16_FOR_EACH_HC_H #define PHTREE_V16_FOR_EACH_HC_H -#include "../common/common.h" -#include "iterator_simple.h" +#include "phtree/common/common.h" +#include "iterator_with_parent.h" namespace improbable::phtree::v16 { @@ -33,40 +33,36 @@ namespace improbable::phtree::v16 { * For details see "Efficient Z-Ordered Traversal of Hypercube Indexes" by T. Zäschke, M.C. Norrie, * 2017. */ -template +template class ForEachHC { static constexpr dimension_t DIM = CONVERT::DimInternal; - using KeyExternal = typename CONVERT::KeyExternal; using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; using EntryT = Entry; - using NodeT = Node; public: + template ForEachHC( const KeyInternal& range_min, const KeyInternal& range_max, - const CONVERT& converter, - CALLBACK_FN& callback, - FILTER filter) + const CONVERT* converter, + CB&& callback, + F&& filter) : range_min_{range_min} , range_max_{range_max} , converter_{converter} - , callback_{callback} - , filter_(std::move(filter)) {} + , callback_{std::forward(callback)} + , filter_(std::forward(filter)) {} - void run(const EntryT& root) { - assert(root.IsNode()); - TraverseNode(root.GetKey(), root.GetNode()); - } - - private: - void TraverseNode(const KeyInternal& key, const NodeT& node) { + void Traverse(const EntryT& entry) { + assert(entry.IsNode()); hc_pos_t mask_lower = 0; hc_pos_t mask_upper = 0; - CalcLimits(node.GetPostfixLen(), key, mask_lower, mask_upper); - auto iter = node.Entries().lower_bound(mask_lower); - auto end = node.Entries().end(); + CalcLimits(entry.GetNodePostfixLen(), entry.GetKey(), mask_lower, mask_upper); + auto& entries = entry.GetNode().Entries(); + auto postfix_len = entry.GetNodePostfixLen(); + auto iter = entries.lower_bound(mask_lower); + auto end = entries.end(); for (; iter != end && iter->first <= mask_upper; ++iter) { auto child_hc_pos = iter->first; // Use bit-mask magic to check whether we are in a valid quadrant. @@ -75,29 +71,30 @@ class ForEachHC { const auto& child = iter->second; const auto& child_key = child.GetKey(); if (child.IsNode()) { - const auto& child_node = child.GetNode(); - if (CheckNode(child_key, child_node)) { - TraverseNode(child_key, child_node); + if (CheckNode(child, postfix_len)) { + Traverse(child); } } else { T& value = child.GetValue(); if (IsInRange(child_key, range_min_, range_max_) && - ApplyFilter(child_key, value)) { - callback_(converter_.post(child_key), value); + filter_.IsEntryValid(child_key, value)) { + callback_(converter_->post(child_key), value); } } } } } - bool CheckNode(const KeyInternal& key, const NodeT& node) const { + bool CheckNode(const EntryT& entry, bit_width_t parent_postfix_len) { + const KeyInternal& key = entry.GetKey(); // Check if the node overlaps with the query box. // An infix with len=0 implies that at least part of the child node overlaps with the query, // otherwise the bit mask checking would have returned 'false'. - if (node.GetInfixLen() > 0) { + // Putting it differently, if the infix has len=0, then there is no point in validating it. + if (entry.HasNodeInfix(parent_postfix_len)) { // Mask for comparing the prefix with the query boundaries. - assert(node.GetPostfixLen() + 1 < MAX_BIT_WIDTH); - SCALAR comparison_mask = MAX_MASK << (node.GetPostfixLen() + 1); + assert(entry.GetNodePostfixLen() + 1 < MAX_BIT_WIDTH); + SCALAR comparison_mask = MAX_MASK << (entry.GetNodePostfixLen() + 1); for (dimension_t dim = 0; dim < DIM; ++dim) { SCALAR prefix = key[dim] & comparison_mask; if (prefix > range_max_[dim] || prefix < (range_min_[dim] & comparison_mask)) { @@ -105,15 +102,7 @@ class ForEachHC { } } } - return ApplyFilter(key, node); - } - - [[nodiscard]] bool ApplyFilter(const KeyInternal& key, const NodeT& node) const { - return filter_.IsNodeValid(key, node.GetPostfixLen() + 1); - } - - [[nodiscard]] bool ApplyFilter(const KeyInternal& key, const T& value) const { - return filter_.IsEntryValid(key, value); + return filter_.IsNodeValid(key, entry.GetNodePostfixLen() + 1); } void CalcLimits( @@ -180,8 +169,8 @@ class ForEachHC { const KeyInternal range_min_; const KeyInternal range_max_; - CONVERT converter_; - CALLBACK_FN& callback_; + const CONVERT* converter_; + CALLBACK callback_; FILTER filter_; }; } // namespace improbable::phtree::v16 diff --git a/include/phtree/v16/iterator_base.h b/include/phtree/v16/iterator_base.h new file mode 100644 index 00000000..d5152dfe --- /dev/null +++ b/include/phtree/v16/iterator_base.h @@ -0,0 +1,125 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_ITERATOR_BASE_H +#define PHTREE_V16_ITERATOR_BASE_H + +#include "phtree/common/common.h" +#include "phtree/filter.h" +#include "entry.h" + +namespace improbable::phtree::v16 { + +/* + * Base class for all PH-Tree iterators. + */ +template +class IteratorBase { + public: + explicit IteratorBase() noexcept : current_entry_{nullptr} {} + explicit IteratorBase(const EntryT* current_entry) noexcept : current_entry_{current_entry} {} + + inline auto& operator*() const noexcept { + assert(current_entry_); + return current_entry_->GetValue(); + } + + inline auto* operator->() const noexcept { + assert(current_entry_); + return ¤t_entry_->GetValue(); + } + + inline friend bool operator==( + const IteratorBase& left, const IteratorBase& right) noexcept { + return left.current_entry_ == right.current_entry_; + } + + inline friend bool operator!=( + const IteratorBase& left, const IteratorBase& right) noexcept { + return left.current_entry_ != right.current_entry_; + } + + auto& second() const { + return current_entry_->GetValue(); + } + + [[nodiscard]] inline bool IsEnd() const noexcept { + return current_entry_ == nullptr; + } + + inline EntryT* GetEntry() const noexcept { + return const_cast(current_entry_); + } + + protected: + void SetFinished() { + current_entry_ = nullptr; + } + + void SetCurrentResult(const EntryT* current_entry) { + current_entry_ = current_entry; + } + + protected: + const EntryT* current_entry_; +}; + +template +using IteratorEnd = IteratorBase; + +template +class IteratorWithFilter +: public IteratorBase> { + protected: + static constexpr dimension_t DIM = CONVERT::DimInternal; + using KeyInternal = typename CONVERT::KeyInternal; + using SCALAR = typename CONVERT::ScalarInternal; + using EntryT = Entry; + + public: + template + explicit IteratorWithFilter(const CONVERT* converter, F&& filter) noexcept + : IteratorBase(nullptr), converter_{converter}, filter_{std::forward(filter)} {} + + explicit IteratorWithFilter(const EntryT* current_entry, const CONVERT* converter) noexcept + : IteratorBase(current_entry), converter_{converter}, filter_{FILTER()} {} + + auto first() const { + return converter_->post(this->current_entry_->GetKey()); + } + + auto& __Filter() { + return filter_; + } + + protected: + [[nodiscard]] bool ApplyFilter(const EntryT& entry) { + return entry.IsNode() ? filter_.IsNodeValid(entry.GetKey(), entry.GetNodePostfixLen() + 1) + : filter_.IsEntryValid(entry.GetKey(), entry.GetValue()); + } + + auto post(const KeyInternal& point) { + return converter_->post(point); + } + + private: + const CONVERT* converter_; + FILTER filter_; +}; + +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_ITERATOR_BASE_H diff --git a/phtree/v16/iterator_full.h b/include/phtree/v16/iterator_full.h similarity index 81% rename from phtree/v16/iterator_full.h rename to include/phtree/v16/iterator_full.h index b60be035..fbd9bb60 100644 --- a/phtree/v16/iterator_full.h +++ b/include/phtree/v16/iterator_full.h @@ -17,7 +17,7 @@ #ifndef PHTREE_V16_ITERATOR_FULL_H #define PHTREE_V16_ITERATOR_FULL_H -#include "../common/common.h" +#include "phtree/common/common.h" #include "iterator_base.h" namespace improbable::phtree::v16 { @@ -26,32 +26,35 @@ template class Node; template -class IteratorFull : public IteratorBase { +class IteratorFull : public IteratorWithFilter { static constexpr dimension_t DIM = CONVERT::DimInternal; using SCALAR = typename CONVERT::ScalarInternal; using NodeT = Node; - using EntryT = typename IteratorBase::EntryT; + using EntryT = typename IteratorWithFilter::EntryT; public: - IteratorFull(const EntryT& root, const CONVERT& converter, FILTER filter) - : IteratorBase(converter, filter), stack_{}, stack_size_{0} { + template + IteratorFull(const EntryT& root, const CONVERT* converter, F&& filter) + : IteratorWithFilter(converter, std::forward(filter)) + , stack_{} + , stack_size_{0} { PrepareAndPush(root.GetNode()); FindNextElement(); } - IteratorFull& operator++() { + IteratorFull& operator++() noexcept { FindNextElement(); return *this; } - IteratorFull operator++(int) { + IteratorFull operator++(int) noexcept { IteratorFull iterator(*this); ++(*this); return iterator; } private: - void FindNextElement() { + void FindNextElement() noexcept { while (!IsEmpty()) { auto* p = &Peek(); while (*p != PeekEnd()) { @@ -82,22 +85,22 @@ class IteratorFull : public IteratorBase { return stack_[stack_size_ - 1].first; } - auto& Peek() { + auto& Peek() noexcept { assert(stack_size_ > 0); return stack_[stack_size_ - 1].first; } - auto& PeekEnd() { + auto& PeekEnd() noexcept { assert(stack_size_ > 0); return stack_[stack_size_ - 1].second; } - auto& Pop() { + auto& Pop() noexcept { assert(stack_size_ > 0); return stack_[--stack_size_].first; } - bool IsEmpty() { + bool IsEmpty() noexcept { return stack_size_ == 0; } diff --git a/phtree/v16/iterator_hc.h b/include/phtree/v16/iterator_hc.h similarity index 81% rename from phtree/v16/iterator_hc.h rename to include/phtree/v16/iterator_hc.h index 2485550c..cd71794a 100644 --- a/phtree/v16/iterator_hc.h +++ b/include/phtree/v16/iterator_hc.h @@ -17,8 +17,8 @@ #ifndef PHTREE_V16_ITERATOR_HC_H #define PHTREE_V16_ITERATOR_HC_H -#include "../common/common.h" -#include "iterator_simple.h" +#include "phtree/common/common.h" +#include "iterator_with_parent.h" namespace improbable::phtree::v16 { @@ -42,44 +42,45 @@ class NodeIterator; * 2017. */ template -class IteratorHC : public IteratorBase { +class IteratorHC : public IteratorWithFilter { static constexpr dimension_t DIM = CONVERT::DimInternal; using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; - using EntryT = typename IteratorBase::EntryT; + using EntryT = typename IteratorWithFilter::EntryT; public: + template IteratorHC( const EntryT& root, const KeyInternal& range_min, const KeyInternal& range_max, - const CONVERT& converter, - FILTER filter) - : IteratorBase(converter, filter) + const CONVERT* converter, + F&& filter) + : IteratorWithFilter(converter, std::forward(filter)) , stack_size_{0} , range_min_{range_min} , range_max_{range_max} { + stack_.reserve(8); PrepareAndPush(root); FindNextElement(); } - IteratorHC& operator++() { + IteratorHC& operator++() noexcept { FindNextElement(); return *this; } - IteratorHC operator++(int) { + IteratorHC operator++(int) noexcept { IteratorHC iterator(*this); ++(*this); return iterator; } private: - void FindNextElement() { - assert(!this->Finished()); + void FindNextElement() noexcept { while (!IsEmpty()) { auto* p = &Peek(); - const EntryT* current_result = nullptr; + const EntryT* current_result; while ((current_result = p->Increment(range_min_, range_max_))) { if (this->ApplyFilter(*current_result)) { if (current_result->IsNode()) { @@ -97,28 +98,31 @@ class IteratorHC : public IteratorBase { this->SetFinished(); } - auto& PrepareAndPush(const EntryT& entry) { - assert(stack_size_ < stack_.size() - 1); + auto& PrepareAndPush(const EntryT& entry) noexcept { + if (stack_.size() < stack_size_ + 1) { + stack_.emplace_back(); + } + assert(stack_size_ < stack_.size()); auto& ni = stack_[stack_size_++]; - ni.init(range_min_, range_max_, entry.GetNode(), entry.GetKey()); + ni.Init(range_min_, range_max_, entry); return ni; } - auto& Peek() { + auto& Peek() noexcept { assert(stack_size_ > 0); return stack_[stack_size_ - 1]; } - auto& Pop() { + auto& Pop() noexcept { assert(stack_size_ > 0); return stack_[--stack_size_]; } - bool IsEmpty() { + bool IsEmpty() noexcept { return stack_size_ == 0; } - std::array, MAX_BIT_WIDTH> stack_; + std::vector> stack_; size_t stack_size_; const KeyInternal range_min_; const KeyInternal range_max_; @@ -129,15 +133,17 @@ template class NodeIterator { using KeyT = PhPoint; using EntryT = Entry; - using NodeT = Node; + using EntriesT = EntryMap; public: - NodeIterator() : iter_{}, node_{nullptr}, mask_lower_{0}, mask_upper_(0) {} + NodeIterator() : iter_{}, entries_{nullptr}, mask_lower_{0}, mask_upper_{0}, postfix_len_{0} {} - void init(const KeyT& range_min, const KeyT& range_max, const NodeT& node, const KeyT& prefix) { - node_ = &node; - CalcLimits(node.GetPostfixLen(), range_min, range_max, prefix); + void Init(const KeyT& range_min, const KeyT& range_max, const EntryT& entry) { + auto& node = entry.GetNode(); + CalcLimits(entry.GetNodePostfixLen(), range_min, range_max, entry.GetKey()); iter_ = node.Entries().lower_bound(mask_lower_); + entries_ = &node.Entries(); + postfix_len_ = entry.GetNodePostfixLen(); } /* @@ -145,7 +151,7 @@ class NodeIterator { * @return TRUE iff a matching element was found. */ const EntryT* Increment(const KeyT& range_min, const KeyT& range_max) { - while (iter_ != node_->Entries().end() && iter_->first <= mask_upper_) { + while (iter_ != entries_->end() && iter_->first <= mask_upper_) { if (IsPosValid(iter_->first)) { const auto* be = &iter_->second; if (CheckEntry(*be, range_min, range_max)) { @@ -163,16 +169,16 @@ class NodeIterator { return IsInRange(candidate.GetKey(), range_min, range_max); } - auto& node = candidate.GetNode(); // Check if node-prefix allows sub-node to contain any useful values. // An infix with len=0 implies that at least part of the child node overlaps with the query. - if (node.GetInfixLen() == 0) { + // Putting it differently, if the infix has len=0, then there is no point in validating it. + if (!candidate.HasNodeInfix(postfix_len_)) { return true; } // Mask for comparing the prefix with the query boundaries. - assert(node.GetPostfixLen() + 1 < MAX_BIT_WIDTH); - SCALAR comparison_mask = MAX_MASK << (node.GetPostfixLen() + 1); + assert(candidate.GetNodePostfixLen() + 1 < MAX_BIT_WIDTH); + SCALAR comparison_mask = MAX_MASK << (candidate.GetNodePostfixLen() + 1); auto& key = candidate.GetKey(); for (dimension_t dim = 0; dim < DIM; ++dim) { SCALAR in = key[dim] & comparison_mask; @@ -184,7 +190,7 @@ class NodeIterator { } private: - [[nodiscard]] bool IsPosValid(hc_pos_t key) const { + [[nodiscard]] inline bool IsPosValid(hc_pos_t key) const noexcept { return ((key | mask_lower_) & mask_upper_) == key; } @@ -254,9 +260,10 @@ class NodeIterator { private: EntryIteratorC iter_; - const NodeT* node_; + EntriesT* entries_; hc_pos_t mask_lower_; hc_pos_t mask_upper_; + bit_width_t postfix_len_; }; } // namespace } // namespace improbable::phtree::v16 diff --git a/phtree/v16/iterator_knn_hs.h b/include/phtree/v16/iterator_knn_hs.h similarity index 86% rename from phtree/v16/iterator_knn_hs.h rename to include/phtree/v16/iterator_knn_hs.h index 3c30f7d6..5af0902e 100644 --- a/phtree/v16/iterator_knn_hs.h +++ b/include/phtree/v16/iterator_knn_hs.h @@ -17,7 +17,7 @@ #ifndef PHTREE_V16_QUERY_KNN_HS_H #define PHTREE_V16_QUERY_KNN_HS_H -#include "../common/common.h" +#include "phtree/common/common.h" #include "iterator_base.h" #include @@ -44,29 +44,30 @@ struct CompareEntryDistByDistance { } // namespace template -class IteratorKnnHS : public IteratorBase { +class IteratorKnnHS : public IteratorWithFilter { static constexpr dimension_t DIM = CONVERT::DimInternal; using KeyExternal = typename CONVERT::KeyExternal; using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; - using EntryT = typename IteratorBase::EntryT; + using EntryT = typename IteratorWithFilter::EntryT; using EntryDistT = EntryDist; public: + template explicit IteratorKnnHS( const EntryT& root, size_t min_results, const KeyInternal& center, - const CONVERT& converter, - DISTANCE dist, - FILTER filter) - : IteratorBase(converter, filter) + const CONVERT* converter, + DIST&& dist, + F&& filter) + : IteratorWithFilter(converter, std::forward(filter)) , center_{center} - , center_post_{converter.post(center)} + , center_post_{converter->post(center)} , current_distance_{std::numeric_limits::max()} , num_found_results_(0) , num_requested_results_(min_results) - , distance_(std::move(dist)) { + , distance_(std::forward(dist)) { if (min_results <= 0 || root.GetNode().GetEntryCount() == 0) { this->SetFinished(); return; @@ -81,12 +82,12 @@ class IteratorKnnHS : public IteratorBase { return current_distance_; } - IteratorKnnHS& operator++() { + IteratorKnnHS& operator++() noexcept { FindNextElement(); return *this; } - IteratorKnnHS operator++(int) { + IteratorKnnHS operator++(int) noexcept { IteratorKnnHS iterator(*this); ++(*this); return iterator; @@ -96,7 +97,7 @@ class IteratorKnnHS : public IteratorBase { void FindNextElement() { while (num_found_results_ < num_requested_results_ && !queue_.empty()) { auto& candidate = queue_.top(); - auto o = candidate.second; + auto* o = candidate.second; if (!o->IsNode()) { // data entry ++num_found_results_; @@ -114,8 +115,7 @@ class IteratorKnnHS : public IteratorBase { auto& e2 = entry.second; if (this->ApplyFilter(e2)) { if (e2.IsNode()) { - auto& sub = e2.GetNode(); - double d = DistanceToNode(e2.GetKey(), sub.GetPostfixLen() + 1); + double d = DistanceToNode(e2.GetKey(), e2.GetNodePostfixLen() + 1); queue_.emplace(d, &e2); } else { double d = distance_(center_post_, this->post(e2.GetKey())); @@ -129,7 +129,7 @@ class IteratorKnnHS : public IteratorBase { current_distance_ = std::numeric_limits::max(); } - double DistanceToNode(const KeyInternal& prefix, int bits_to_ignore) { + double DistanceToNode(const KeyInternal& prefix, std::uint32_t bits_to_ignore) { assert(bits_to_ignore < MAX_BIT_WIDTH); SCALAR mask_min = MAX_MASK << bits_to_ignore; SCALAR mask_max = ~mask_min; @@ -154,8 +154,8 @@ class IteratorKnnHS : public IteratorBase { double current_distance_; std::priority_queue, CompareEntryDistByDistance> queue_; - int num_found_results_; - int num_requested_results_; + size_t num_found_results_; + size_t num_requested_results_; DISTANCE distance_; }; diff --git a/phtree/v16/iterator_simple.h b/include/phtree/v16/iterator_with_parent.h similarity index 56% rename from phtree/v16/iterator_simple.h rename to include/phtree/v16/iterator_with_parent.h index 815979a7..47216615 100644 --- a/phtree/v16/iterator_simple.h +++ b/include/phtree/v16/iterator_with_parent.h @@ -17,51 +17,55 @@ #ifndef PHTREE_V16_ITERATOR_SIMPLE_H #define PHTREE_V16_ITERATOR_SIMPLE_H -#include "../common/common.h" +#include "phtree/common/common.h" #include "iterator_base.h" namespace improbable::phtree::v16 { template -class IteratorSimple : public IteratorBase { +class IteratorWithParent : public IteratorWithFilter { static constexpr dimension_t DIM = CONVERT::DimInternal; using SCALAR = typename CONVERT::ScalarInternal; - using EntryT = typename IteratorBase::EntryT; + using EntryT = typename IteratorWithFilter::EntryT; + friend PhTreeV16; public: - explicit IteratorSimple(const CONVERT& converter) : IteratorBase(converter) { - this->SetFinished(); - } - - explicit IteratorSimple( + explicit IteratorWithParent( const EntryT* current_result, const EntryT* current_node, const EntryT* parent_node, - CONVERT converter) - : IteratorBase(converter) { - if (current_result) { - this->SetCurrentResult(current_result); - this->SetCurrentNodeEntry(current_node); - this->SetParentNodeEntry(parent_node); - } else { - this->SetFinished(); - } - } + const CONVERT* converter) noexcept + : IteratorWithFilter(current_result, converter) + , current_node_{current_node} + , parent_node_{parent_node} {} - IteratorSimple& operator++() { + IteratorWithParent& operator++() { this->SetFinished(); return *this; } - IteratorSimple operator++(int) { - IteratorSimple iterator(*this); + IteratorWithParent operator++(int) { + IteratorWithParent iterator(*this); ++(*this); return iterator; } -}; -template -using IteratorEnd = IteratorSimple; + private: + /* + * The parent entry contains the parent node. The parent node is the node ABOVE the current node + * which contains the current entry. + */ + EntryT* GetNodeEntry() const { + return const_cast(current_node_); + } + + EntryT* GetParentNodeEntry() const { + return const_cast(parent_node_); + } + + const EntryT* current_node_; + const EntryT* parent_node_; +}; } // namespace improbable::phtree::v16 diff --git a/phtree/v16/node.h b/include/phtree/v16/node.h similarity index 66% rename from phtree/v16/node.h rename to include/phtree/v16/node.h index 6994bca0..8f01a672 100644 --- a/phtree/v16/node.h +++ b/include/phtree/v16/node.h @@ -17,8 +17,7 @@ #ifndef PHTREE_V16_NODE_H #define PHTREE_V16_NODE_H -#include "../common/common.h" -#include "../common/tree_stats.h" +#include "phtree/common/common.h" #include "entry.h" #include "phtree_v16.h" #include @@ -26,67 +25,32 @@ namespace improbable::phtree::v16 { /* - * We provide different implementations of the node's internal entry set: + * We provide different implementations of the node's internal entry set. + * All implementations are equivalent to "std::map" which can be used as + * a plugin example for verification. + * * - `array_map` is the fastest, but has O(2^DIM) space complexity. This can be very wasteful * because many nodes may have only 2 entries. * Also, iteration depends on some bit operations and is also O(DIM) per step if the CPU/compiler * does not support CTZ (count trailing bits). * - `sparse_map` is slower, but requires only O(n) memory (n = number of entries/children). * However, insertion/deletion is O(n), i.e. O(2^DIM) time complexity in the worst case. - * - 'std::map` is the least efficient for small node sizes but scales best with larger nodes and - * dimensionality. Remember that n_max = 2^DIM. + * - 'b_plus_tree_map` is the least efficient for small node sizes but scales best with larger + * nodes and dimensionality. Remember that n_max = 2^DIM. */ template using EntryMap = typename std::conditional< DIM <= 3, array_map, - typename std::conditional, std::map>::type>::type; + typename std:: + conditional, b_plus_tree_map>:: + type>::type; template using EntryIterator = decltype(EntryMap().begin()); template using EntryIteratorC = decltype(EntryMap().cbegin()); -namespace { - -/* - * Takes a construct of parent_node -> child_node, ie the child_node is owned by parent_node. - * This function also assumes that the child_node contains only one entry. - * - * This function takes the remaining entry from the child node and inserts it into the parent_node - * where it replaces (and implicitly deletes) the child_node. - * @param prefix_of_child_in_parent This specifies the position of child_node inside the - * parent_node. We only need the relevant bits at the level of the parent_node. This means we can - * use any key of any node or entry that is, or used to be) inside the child_node, because they all - * share the same prefix. This includes the key of the child_node itself. - * @param child_node The node to be removed from the parent node. - * @param parent_node Current owner of the child node. - */ -template -void MergeIntoParent(Node& child_node, Node& parent) { - assert(child_node.GetEntryCount() == 1); - // At this point we have found an entry that needs to be removed. We also know that we need to - // remove the child node because it contains at most one other entry and it is not the root - // node. - auto map_entry = child_node.Entries().begin(); - auto& entry = map_entry->second; - - auto hc_pos_in_parent = CalcPosInArray(entry.GetKey(), parent.GetPostfixLen()); - auto& parent_entry = parent.Entries().find(hc_pos_in_parent)->second; - - if (entry.IsNode()) { - // connect sub to parent - auto& sub2 = entry.GetNode(); - bit_width_t new_infix_len = child_node.GetInfixLen() + 1 + sub2.GetInfixLen(); - sub2.SetInfixLen(new_infix_len); - } - - // Now move the single entry into the parent, the position in the parent is the same as the - // child_node. - parent_entry.ReplaceNodeWithDataFromEntry(std::move(entry)); -} -} // namespace - /* * A node of the PH-Tree. It contains up to 2^DIM entries, each entry being either a leaf with data * of type T or a child node (both are of the variant type Entry). @@ -112,11 +76,7 @@ class Node { using EntryT = Entry; public: - Node(bit_width_t infix_len, bit_width_t postfix_len) - : postfix_len_(postfix_len), infix_len_(infix_len), entries_{} { - assert(infix_len_ < MAX_BIT_WIDTH); - assert(infix_len >= 0); - } + Node() : entries_{} {} // Nodes should never be copied! Node(const Node&) = delete; @@ -128,14 +88,6 @@ class Node { return entries_.size(); } - [[nodiscard]] bit_width_t GetInfixLen() const { - return infix_len_; - } - - [[nodiscard]] bit_width_t GetPostfixLen() const { - return postfix_len_; - } - /* * Attempts to emplace an entry in this node. * The behavior is analogous to std::map::emplace(), i.e. if there is already a value with the @@ -164,56 +116,65 @@ class Node { * @param args Constructor arguments for creating a value T that can be inserted for the key. */ template - EntryT* Emplace(bool& is_inserted, const KeyT& key, Args&&... args) { - hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); + EntryT& Emplace(bool& is_inserted, const KeyT& key, bit_width_t postfix_len, Args&&... args) { + hc_pos_t hc_pos = CalcPosInArray(key, postfix_len); auto emplace_result = entries_.try_emplace(hc_pos, key, std::forward(args)...); auto& entry = emplace_result.first->second; // Return if emplace succeed, i.e. there was no entry. if (emplace_result.second) { is_inserted = true; - return &entry; + return entry; } - return HandleCollision(entry, is_inserted, key, std::forward(args)...); + return HandleCollision(entry, is_inserted, key, postfix_len, std::forward(args)...); } /* * Returns the value (T or Node) if the entry exists and matches the key. Child nodes are * _not_ traversed. * @param key The key of the entry - * @param parent parent node + * @param parent The parent node * @return The sub node or null. */ - const EntryT* Find(const KeyT& key) const { - hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); + const EntryT* Find(const KeyT& key, bit_width_t postfix_len) const { + hc_pos_t hc_pos = CalcPosInArray(key, postfix_len); const auto& entry = entries_.find(hc_pos); - if (entry != entries_.end() && DoesEntryMatch(entry->second, key)) { + if (entry != entries_.end() && DoesEntryMatch(entry->second, key, postfix_len)) { return &entry->second; } return nullptr; } + EntryT* Find(const KeyT& key, bit_width_t postfix_len) { + return const_cast(static_cast(this)->Find(key, postfix_len)); + } + /* * Attempts to erase a key/value pair. * This function is not recursive, if the 'key' leads to a child node, the child node * is returned and nothing is removed. * * @param key The key of the key/value pair to be erased - * @param parent The parent node of the current node (=nullptr) if this is the root node. + * @param parent_entry The parent node of the current node (=nullptr) if this is the root node. + * @param allow_move_into_parent Whether the node can be merged into the parent if only 1 + * entry is left. * @param found This is and output parameter and will be set to 'true' if a value was removed. * @return A child node if the provided key leads to a child node. */ - Node* Erase(const KeyT& key, Node* parent, bool& found) { - hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); + EntryT* Erase(const KeyT& key, EntryT* parent_entry, bool allow_move_into_parent, bool& found) { + auto postfix_len = parent_entry->GetNodePostfixLen(); + hc_pos_t hc_pos = CalcPosInArray(key, postfix_len); auto it = entries_.find(hc_pos); - if (it != entries_.end() && DoesEntryMatch(it->second, key)) { + if (it != entries_.end() && DoesEntryMatch(it->second, key, postfix_len)) { if (it->second.IsNode()) { - return &it->second.GetNode(); + return &it->second; } entries_.erase(it); found = true; - if (parent && GetEntryCount() == 1) { - MergeIntoParent(*this, *parent); + if (allow_move_into_parent && GetEntryCount() == 1) { + // We take the remaining entry from the current node and inserts it into the + // parent_entry where it replaces (and implicitly deletes) the current node. + parent_entry->ReplaceNodeWithDataFromEntry(std::move(entries_.begin()->second)); // WARNING: (this) is deleted here, do not refer to it beyond this point. } } @@ -228,23 +189,23 @@ class Node { return entries_; } - void GetStats(PhTreeStats& stats, bit_width_t current_depth = 0) const { + void GetStats( + PhTreeStats& stats, const EntryT& current_entry, bit_width_t current_depth = 0) const { size_t num_children = entries_.size(); ++stats.n_nodes_; - ++stats.infix_hist_[GetInfixLen()]; ++stats.node_depth_hist_[current_depth]; ++stats.node_size_log_hist_[32 - CountLeadingZeros(std::uint32_t(num_children))]; stats.n_total_children_ += num_children; - - current_depth += GetInfixLen(); stats.q_total_depth_ += current_depth; for (auto& entry : entries_) { auto& child = entry.second; if (child.IsNode()) { + auto child_infix_len = child.GetNodeInfixLen(current_entry.GetNodePostfixLen()); + ++stats.infix_hist_[child_infix_len]; auto& sub = child.GetNode(); - sub.GetStats(stats, current_depth + 1); + sub.GetStats(stats, child, current_depth + 1 + child_infix_len); } else { ++stats.q_n_post_fix_n_[current_depth]; ++stats.size_; @@ -252,11 +213,9 @@ class Node { } } - size_t CheckConsistency(bit_width_t current_depth = 0) const { + size_t CheckConsistency(const EntryT& current_entry, bit_width_t current_depth = 0) const { // Except for a root node if the tree has <2 entries. assert(entries_.size() >= 2 || current_depth == 0); - - current_depth += GetInfixLen(); size_t num_entries_local = 0; size_t num_entries_children = 0; for (auto& entry : entries_) { @@ -264,8 +223,12 @@ class Node { if (child.IsNode()) { auto& sub = child.GetNode(); // Check node consistency - assert(sub.GetInfixLen() + 1 + sub.GetPostfixLen() == GetPostfixLen()); - num_entries_children += sub.CheckConsistency(current_depth + 1); + auto sub_infix_len = child.GetNodeInfixLen(current_entry.GetNodePostfixLen()); + assert( + sub_infix_len + 1 + child.GetNodePostfixLen() == + current_entry.GetNodePostfixLen()); + num_entries_children += + sub.CheckConsistency(child, current_depth + 1 + sub_infix_len); } else { ++num_entries_local; } @@ -273,12 +236,6 @@ class Node { return num_entries_local + num_entries_children; } - void SetInfixLen(bit_width_t newInfLen) { - assert(newInfLen < MAX_BIT_WIDTH); - assert(newInfLen >= 0); - infix_len_ = newInfLen; - } - private: template auto& WriteValue(hc_pos_t hc_pos, const KeyT& new_key, Args&&... args) { @@ -287,10 +244,8 @@ class Node { void WriteEntry(hc_pos_t hc_pos, EntryT& entry) { if (entry.IsNode()) { - auto& node = entry.GetNode(); - bit_width_t new_subnode_infix_len = postfix_len_ - node.postfix_len_ - 1; - node.SetInfixLen(new_subnode_infix_len); - entries_.try_emplace(hc_pos, entry.GetKey(), entry.ExtractNode()); + auto postfix_len = entry.GetNodePostfixLen(); + entries_.try_emplace(hc_pos, entry.GetKey(), entry.ExtractNode(), postfix_len); } else { entries_.try_emplace(hc_pos, entry.GetKey(), entry.ExtractValue()); } @@ -311,18 +266,21 @@ class Node { * an entry with the exact same key as new_key, so insertion has failed. */ template - auto* HandleCollision( - EntryT& existing_entry, bool& is_inserted, const KeyT& new_key, Args&&... args) { + auto& HandleCollision( + EntryT& existing_entry, + bool& is_inserted, + const KeyT& new_key, + bit_width_t current_postfix_len, + Args&&... args) { assert(!is_inserted); // We have two entries in the same location (local pos). // Now we need to compare the keys. // If they are identical, we simply return the entry for further traversal. if (existing_entry.IsNode()) { - auto& sub_node = existing_entry.GetNode(); - if (sub_node.GetInfixLen() > 0) { + if (existing_entry.HasNodeInfix(current_postfix_len)) { bit_width_t max_conflicting_bits = NumberOfDivergingBits(new_key, existing_entry.GetKey()); - if (max_conflicting_bits > sub_node.GetPostfixLen() + 1) { + if (max_conflicting_bits > existing_entry.GetNodePostfixLen() + 1) { is_inserted = true; return InsertSplit( existing_entry, new_key, max_conflicting_bits, std::forward(args)...); @@ -339,31 +297,27 @@ class Node { } // perfect match -> return existing } - return &existing_entry; + return existing_entry; } template - auto* InsertSplit( + auto& InsertSplit( EntryT& current_entry, const KeyT& new_key, bit_width_t max_conflicting_bits, Args&&... args) { - const auto current_key = current_entry.GetKey(); - - // determine length of infix - bit_width_t new_local_infix_len = GetPostfixLen() - max_conflicting_bits; bit_width_t new_postfix_len = max_conflicting_bits - 1; - auto new_sub_node = std::make_unique(new_local_infix_len, new_postfix_len); + auto new_sub_node = std::make_unique(); hc_pos_t pos_sub_1 = CalcPosInArray(new_key, new_postfix_len); - hc_pos_t pos_sub_2 = CalcPosInArray(current_key, new_postfix_len); + hc_pos_t pos_sub_2 = CalcPosInArray(current_entry.GetKey(), new_postfix_len); // Move key/value into subnode new_sub_node->WriteEntry(pos_sub_2, current_entry); auto& new_entry = new_sub_node->WriteValue(pos_sub_1, new_key, std::forward(args)...); // Insert new node into local node - current_entry.SetNode(std::move(new_sub_node)); - return &new_entry; + current_entry.SetNode(std::move(new_sub_node), new_postfix_len); + return new_entry; } /* @@ -374,11 +328,11 @@ class Node { * @return 'true' iff the relevant part of the key matches (prefix for nodes, whole key for * other entries). */ - bool DoesEntryMatch(const EntryT& entry, const KeyT& key) const { + bool DoesEntryMatch( + const EntryT& entry, const KeyT& key, const bit_width_t parent_postfix_len) const { if (entry.IsNode()) { - const auto& sub = entry.GetNode(); - if (sub.GetInfixLen() > 0) { - const bit_mask_t mask = MAX_MASK << (sub.GetPostfixLen() + 1); + if (entry.HasNodeInfix(parent_postfix_len)) { + const bit_mask_t mask = MAX_MASK << (entry.GetNodePostfixLen() + 1); return KeyEquals(entry.GetKey(), key, mask); } return true; @@ -386,16 +340,6 @@ class Node { return entry.GetKey() == key; } - // The length (number of bits) of post fixes (the part of the coordinate that is 'below' the - // current node). If a variable prefix_len would refer to the number of bits in this node's - // prefix, and if we assume 64 bit values, the following would always hold: - // prefix_len + 1 + postfix_len = 64. - // The '+1' accounts for the 1 bit that is represented by the local node's hypercube, - // ie. the same bit that is used to create the lookup keys in entries_. - bit_width_t postfix_len_; - // The number of bits between this node and the parent node. For 64bit keys possible values - // range from 0 to 62. - bit_width_t infix_len_; EntryMap entries_; }; diff --git a/include/phtree/v16/phtree_v16.h b/include/phtree/v16/phtree_v16.h new file mode 100644 index 00000000..1f49ef69 --- /dev/null +++ b/include/phtree/v16/phtree_v16.h @@ -0,0 +1,584 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_PHTREE_V16_H +#define PHTREE_V16_PHTREE_V16_H + +#include "debug_helper_v16.h" +#include "for_each.h" +#include "for_each_hc.h" +#include "iterator_full.h" +#include "iterator_hc.h" +#include "iterator_knn_hs.h" +#include "iterator_with_parent.h" +#include "node.h" + +namespace improbable::phtree::v16 { + +/* + * The PH-Tree is an ordered index on an n-dimensional space (quad-/oct-/2^n-tree) where each + * dimension is (by default) indexed by a 64 bit integer. The index ordered follows z-order / Morton + * order. The index is effectively a 'map', i.e. each key is associated with at most one value. + * + * Keys are points in n-dimensional space. + * + * This API behaves similar to std::map, see function descriptions for details. + * + * Loosely based on PH-Tree Java, V16, see http://www.phtree.org + * + * See also : + * - T. Zaeschke, C. Zimmerli, M.C. Norrie: + * "The PH-Tree -- A Space-Efficient Storage Structure and Multi-Dimensional Index", (SIGMOD 2014) + * - T. Zaeschke: "The PH-Tree Revisited", (2015) + * - T. Zaeschke, M.C. Norrie: "Efficient Z-Ordered Traversal of Hypercube Indexes" (BTW 2017). + * + * @tparam T Value type. + * @tparam DIM Dimensionality. This is the number of dimensions of the space to index. + * @tparam CONVERT A converter class with a 'pre()' and a 'post()' function. 'pre()' translates + * external KEYs into the internal PhPoint type. 'post()' + * translates the PhPoint back to the external KEY type. + */ +template > +class PhTreeV16 { + friend PhTreeDebugHelper; + using ScalarExternal = typename CONVERT::ScalarExternal; + using ScalarInternal = typename CONVERT::ScalarInternal; + using KeyT = typename CONVERT::KeyInternal; + using EntryT = Entry; + using NodeT = Node; + + public: + static_assert(!std::is_reference::value, "Reference type value are not supported."); + static_assert(std::is_signed::value, "ScalarInternal must be a signed type"); + static_assert( + std::is_integral::value, "ScalarInternal must be an integral type"); + static_assert( + std::is_arithmetic::value, "ScalarExternal must be an arithmetic type"); + static_assert(DIM >= 1 && DIM <= 63, "This PH-Tree supports between 1 and 63 dimensions"); + + explicit PhTreeV16(CONVERT* converter) + : num_entries_{0} + , root_{{}, std::make_unique(), MAX_BIT_WIDTH - 1} + , converter_{converter} {} + + PhTreeV16(const PhTreeV16& other) = delete; + PhTreeV16& operator=(const PhTreeV16& other) = delete; + PhTreeV16(PhTreeV16&& other) noexcept = default; + PhTreeV16& operator=(PhTreeV16&& other) noexcept = default; + ~PhTreeV16() noexcept = default; + + /* + * Attempts to build and insert a key and a value into the tree. + * + * @param key The key for the new entry. + * + * @param args Arguments used to generate a new value. + * + * @return A pair, whose first element points to the possibly inserted pair, + * and whose second element is a bool that is true if the pair was actually inserted. + * + * This function attempts to build and insert a (key, value) pair into the tree. The PH-Tree is + * effectively a map, so if an entry with the same key was already in the tree, returns that + * entry instead of inserting a new one. + */ + template + std::pair try_emplace(const KeyT& key, Args&&... args) { + auto* current_entry = &root_; + bool is_inserted = false; + while (current_entry->IsNode()) { + current_entry = ¤t_entry->GetNode().Emplace( + is_inserted, key, current_entry->GetNodePostfixLen(), std::forward(args)...); + } + num_entries_ += is_inserted; + return {current_entry->GetValue(), is_inserted}; + } + + /* + * The try_emplace(hint, key, value) method uses an iterator as hint for insertion. + * The hint is ignored if it is not useful or is equal to end(). + * + * Iterators should normally not be used after the tree has been modified. As an exception to + * this rule, an iterator can be used as hint if it was previously used with at most one call + * to erase() and if no other modifications occurred. + * The following is valid: + * + * // Move value from key1 to key2 + * auto iter = tree.find(key1); + * auto value = iter.second(); // The value may become invalid in erase() + * erase(iter); + * try_emplace(iter, key2, value); // the iterator can still be used as hint here + */ + template + std::pair try_emplace(const ITERATOR& iterator, const KeyT& key, Args&&... args) { + if constexpr (!std::is_same_v>) { + return try_emplace(key, std::forward(args)...); + } else { + // This function can be used to insert a value close to a known value + // or close to a recently removed value. The hint can only be used if the new key is + // inside one of the nodes provided by the hint iterator. + // The idea behind using the 'parent' is twofold: + // - The 'parent' node is one level above the iterator position, it is spatially + // larger and has a better probability of containing the new position, allowing for + // fast track try_emplace. + // - Using 'parent' allows a scenario where the iterator was previously used with + // erase(iterator). This is safe because erase() will never erase the 'parent' node. + + if (!iterator.GetParentNodeEntry()) { + // No hint available, use standard try_emplace() + return try_emplace(key, std::forward(args)...); + } + + auto* parent_entry = iterator.GetParentNodeEntry(); + if (NumberOfDivergingBits(key, parent_entry->GetKey()) > + parent_entry->GetNodePostfixLen() + 1) { + // replace higher up in the tree + return try_emplace(key, std::forward(args)...); + } + + // replace in node + auto* entry = parent_entry; + bool is_inserted = false; + while (entry->IsNode()) { + entry = &entry->GetNode().Emplace( + is_inserted, key, entry->GetNodePostfixLen(), std::forward(args)...); + } + num_entries_ += is_inserted; + return {entry->GetValue(), is_inserted}; + } + } + + /* + * See std::map::insert(). + * + * @return a pair consisting of the inserted element (or to the element that prevented the + * insertion) and a bool denoting whether the insertion took place. + */ + std::pair insert(const KeyT& key, const T& value) { + return try_emplace(key, value); + } + + /* + * @return the value stored at position 'key'. If no such value exists, one is added to the tree + * and returned. + */ + T& operator[](const KeyT& key) { + return try_emplace(key).first; + } + + /* + * Analogous to map:count(). + * + * @return '1', if a value is associated with the provided key, otherwise '0'. + */ + size_t count(const KeyT& key) const { + if (empty()) { + return 0; + } + auto* current_entry = &root_; + while (current_entry && current_entry->IsNode()) { + current_entry = current_entry->GetNode().Find(key, current_entry->GetNodePostfixLen()); + } + return current_entry ? 1 : 0; + } + + /* + * Analogous to map:find(). + * + * Get an entry associated with a k dimensional key. + * @param key the key to look up + * @return an iterator that points either to the associated value or to {@code end()} if the key + * was found + */ + auto find(const KeyT& key) const { + const EntryT* current_entry = &root_; + const EntryT* current_node = nullptr; + const EntryT* parent_node = nullptr; + while (current_entry && current_entry->IsNode()) { + parent_node = current_node; + current_node = current_entry; + current_entry = current_entry->GetNode().Find(key, current_entry->GetNodePostfixLen()); + } + + return IteratorWithParent(current_entry, current_node, parent_node, converter_); + } + + /* + * See std::map::erase(). Removes any value associated with the provided key. + * + * @return '1' if a value was found, otherwise '0'. + */ + size_t erase(const KeyT& key) { + auto* entry = &root_; + // We do not want the root entry to be modified. The reason is simply that a lot of the + // code in this class becomes simpler if we can assume the root entry to contain a node. + bool found = false; + while (entry) { + entry = entry->GetNode().Erase(key, entry, entry != &root_, found); + } + num_entries_ -= found; + return found; + } + + /* + * See std::map::erase(). Removes any value at the given iterator location. + * + * WARNING + * While this is guaranteed to work correctly, only iterators returned from find() + * will result in erase(iterator) being faster than erase(key). + * Iterators returned from other functions may be optimized in a future version. + * + * @return '1' if a value was found, otherwise '0'. + */ + template + size_t erase(const ITERATOR& iterator) { + if (iterator.IsEnd()) { + return 0; + } + if constexpr (std::is_same_v>) { + const auto& iter_rich = static_cast&>(iterator); + if (!iter_rich.GetNodeEntry() || iter_rich.GetNodeEntry() == &root_) { + // Do _not_ use the root entry, see erase(key). Start searching from the top. + return erase(iter_rich.GetEntry()->GetKey()); + } + bool found = false; + EntryT* entry = iter_rich.GetNodeEntry(); + // The loop is a safeguard for find_two_mm which may return slightly wrong iterators. + while (entry != nullptr) { + entry = entry->GetNode().Erase(iter_rich.GetEntry()->GetKey(), entry, true, found); + } + num_entries_ -= found; + return found; + } + // There may be no entry because not every iterator sets it. + return erase(iterator.GetEntry()->GetKey()); + } + + /* + * Relocate (move) an entry from one position to another, subject to a predicate. + * + * @param old_key + * @param new_key + * @param predicate + * + * @return A pair, whose first element points to the possibly relocated value, and + * whose second element is a bool that is true if the value was actually relocated. + */ + template + size_t relocate_if(const KeyT& old_key, const KeyT& new_key, PRED&& pred) { + auto pair = _find_two(old_key, new_key); + auto& iter_old = pair.first; + auto& iter_new = pair.second; + + if (iter_old.IsEnd() || !pred(*iter_old)) { + return 0; + } + // Are we inserting in same node and same quadrant? Or are the keys equal? + if (iter_old == iter_new) { + iter_old.GetEntry()->SetKey(new_key); + return 1; + } + + bool is_inserted = false; + auto* new_parent = iter_new.GetNodeEntry(); + new_parent->GetNode().Emplace( + is_inserted, new_key, new_parent->GetNodePostfixLen(), std::move(*iter_old)); + if (!is_inserted) { + return 0; + } + + // Erase old value. See comments in try_emplace(iterator) for details. + EntryT* old_node_entry = iter_old.GetNodeEntry(); + if (iter_old.GetParentNodeEntry() == iter_new.GetNodeEntry()) { + // In this case the old_node_entry may have been invalidated by the previous insertion. + old_node_entry = iter_old.GetParentNodeEntry(); + } + bool found = false; + while (old_node_entry) { + old_node_entry = old_node_entry->GetNode().Erase( + old_key, old_node_entry, old_node_entry != &root_, found); + } + assert(found); + return 1; + } + + /* + * Tries to locate two entries that are 'close' to each other. + * + * Special behavior: + * - returns end() if old_key does not exist; + */ + auto _find_two(const KeyT& old_key, const KeyT& new_key) { + using Iter = IteratorWithParent; + bit_width_t n_diverging_bits = NumberOfDivergingBits(old_key, new_key); + + const EntryT* current_entry = &root_; // An entry. + const EntryT* old_node_entry = nullptr; // Node that contains entry to be removed + const EntryT* old_node_entry_parent = nullptr; // Parent of the old_node_entry + const EntryT* new_node_entry = nullptr; // Node that will contain new entry + // Find node for removal + while (current_entry && current_entry->IsNode()) { + old_node_entry_parent = old_node_entry; + old_node_entry = current_entry; + auto postfix_len = old_node_entry->GetNodePostfixLen(); + if (postfix_len + 1 >= n_diverging_bits) { + new_node_entry = old_node_entry; + } + current_entry = current_entry->GetNode().Find(old_key, postfix_len); + } + const EntryT* old_entry = current_entry; // Entry to be removed + + // Can we stop already? + if (old_entry == nullptr) { + auto iter = Iter(nullptr, nullptr, nullptr, converter_); + return std::make_pair(iter, iter); // old_key not found! + } + + // Are we inserting in same node and same quadrant? Or are the keys equal? + assert(old_node_entry != nullptr); + if (n_diverging_bits == 0 || old_node_entry->GetNodePostfixLen() >= n_diverging_bits) { + auto iter = Iter(old_entry, old_node_entry, old_node_entry_parent, converter_); + return std::make_pair(iter, iter); + } + + // Find node for insertion + auto new_entry = new_node_entry; + while (new_entry && new_entry->IsNode()) { + new_node_entry = new_entry; + new_entry = new_entry->GetNode().Find(new_key, new_entry->GetNodePostfixLen()); + } + + auto iter1 = Iter(old_entry, old_node_entry, old_node_entry_parent, converter_); + auto iter2 = Iter(new_entry, new_node_entry, nullptr, converter_); + return std::make_pair(iter1, iter2); + } + + /* + * Tries to locate two entries that are 'close' to each other. + * + * Special behavior: + * - returns end() if old_key does not exist; + * - CREATES the destination entry if it does not exist! + */ + auto _find_or_create_two_mm(const KeyT& old_key, const KeyT& new_key, bool count_equals) { + using Iter = IteratorWithParent; + bit_width_t n_diverging_bits = NumberOfDivergingBits(old_key, new_key); + + if (!count_equals && n_diverging_bits == 0) { + auto iter = Iter(nullptr, nullptr, nullptr, converter_); + return std::make_pair(iter, iter); + } + + const EntryT* new_entry = &root_; // An entry. + const EntryT* old_node_entry = nullptr; // Node that contains entry to be removed + const EntryT* new_node_entry = nullptr; // Node that will contain new entry + // Find the deepest common parent node for removal and insertion + bool is_inserted = false; + while (new_entry && new_entry->IsNode() && + new_entry->GetNodePostfixLen() + 1 >= n_diverging_bits) { + new_node_entry = new_entry; + auto postfix_len = new_entry->GetNodePostfixLen(); + new_entry = &new_entry->GetNode().Emplace(is_inserted, new_key, postfix_len); + } + old_node_entry = new_node_entry; + + // Find node for insertion + while (new_entry->IsNode()) { + new_node_entry = new_entry; + new_entry = + &new_entry->GetNode().Emplace(is_inserted, new_key, new_entry->GetNodePostfixLen()); + } + num_entries_ += is_inserted; + assert(new_entry != nullptr); + + auto* old_entry = old_node_entry; + while (old_entry && old_entry->IsNode()) { + old_node_entry = old_entry; + old_entry = old_entry->GetNode().Find(old_key, old_entry->GetNodePostfixLen()); + } + + // Does old_entry exist? + if (old_entry == nullptr) { + auto iter = Iter(nullptr, nullptr, nullptr, converter_); + return std::make_pair(iter, iter); // old_key not found! + } + + // Are we inserting in same node and same quadrant? Or are the keys equal? + if (n_diverging_bits == 0) { + auto iter = Iter(old_entry, old_node_entry, nullptr, converter_); + return std::make_pair(iter, iter); + } + + auto iter1 = Iter(old_entry, old_node_entry, nullptr, converter_); + // TODO Note: Emplace() may return a sub-child so new_node_entry be a grandparent! + auto iter2 = Iter(new_entry, new_node_entry, nullptr, converter_); + return std::make_pair(iter1, iter2); + } + + /* + * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes + * (=sub-trees) before returning / traversing them. By default, all entries are returned. Filter + * functions must implement the same signature as the default 'FilterNoOp'. + * + * @param callback The callback function to be called for every entry that matches the query. + * The callback requires the following signature: callback(const PhPointD &, const T &) + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'FilterNoOp`. + */ + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) { + ForEach( + converter_, std::forward(callback), std::forward(filter)) + .Traverse(root_); + } + + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) const { + ForEach( + converter_, std::forward(callback), std::forward(filter)) + .Traverse(root_); + } + + /* + * Performs a rectangular window query. The parameters are the min and max keys which + * contain the minimum respectively the maximum keys in every dimension. + * @param query_box The query window. + * @param callback The callback function to be called for every entry that matches the query. + * The callback requires the following signature: callback(const PhPoint &, const T &) + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'FilterNoOp`. + */ + template + void for_each( + // TODO check copy elision + const PhBox query_box, + CALLBACK&& callback, + FILTER&& filter = FILTER()) const { + ForEachHC( + query_box.min(), + query_box.max(), + converter_, + std::forward(callback), + std::forward(filter)) + .Traverse(root_); + } + + /* + * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes + * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * functions must implement the same signature as the default 'FilterNoOp'. + * + * @return an iterator over all (filtered) entries in the tree, + */ + template + auto begin(FILTER&& filter = FILTER()) const { + return IteratorFull(root_, converter_, std::forward(filter)); + } + + /* + * Performs a rectangular window query. The parameters are the min and max keys which + * contain the minimum respectively the maximum keys in every dimension. + * @param query_box The query window. + * @param filter An optional filter function. The filter function allows filtering entries and + * sub-nodes before they are returned or traversed. Any filter function must follow the + * signature of the default 'FilterNoOp`. + * @return Result iterator. + */ + template + auto begin_query( + const PhBox& query_box, FILTER&& filter = FILTER()) const { + return IteratorHC( + root_, query_box.min(), query_box.max(), converter_, std::forward(filter)); + } + + /* + * Locate nearest neighbors for a given point in space. + * + * Example for distance function: auto fn = DistanceEuclidean + * auto iter = tree.begin_knn_query> + * + * @param min_results number of entries to be returned. More entries may or may not be returned + * when several entries have the same distance. + * @param center center point + * @param distance_function optional distance function, defaults to euclidean distance + * @param filter optional filter predicate that excludes nodes/entries before their distance is + * calculated. + * @return Result iterator. + */ + template + auto begin_knn_query( + size_t min_results, + const KeyT& center, + DISTANCE&& distance_function = DISTANCE(), + FILTER&& filter = FILTER()) const { + return IteratorKnnHS( + root_, + min_results, + center, + converter_, + std::forward(distance_function), + std::forward(filter)); + } + + /* + * @return An iterator representing the tree's 'end'. + */ + auto end() const { + return IteratorEnd(); + } + + /* + * Remove all entries from the tree. + */ + void clear() { + num_entries_ = 0; + root_ = EntryT({}, std::make_unique(), MAX_BIT_WIDTH - 1); + } + + /* + * @return the number of entries (key/value pairs) in the tree. + */ + [[nodiscard]] size_t size() const { + return num_entries_; + } + + /* + * @return 'true' if the tree is empty, otherwise 'false'. + */ + [[nodiscard]] bool empty() const { + return num_entries_ == 0; + } + + private: + /* + * This function is only for debugging. + */ + auto GetDebugHelper() const { + return DebugHelperV16(root_, num_entries_); + } + + private: + size_t num_entries_; + // Contract: root_ contains a Node with 0 or more entries. The root node is the only Node + // that is allowed to have less than two entries. + EntryT root_; + CONVERT* converter_; +}; + +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_PHTREE_V16_H diff --git a/phtree/CMakeLists.txt b/phtree/CMakeLists.txt deleted file mode 100644 index 53761cd5..00000000 --- a/phtree/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -cmake_minimum_required(VERSION 3.14) -project(phtree) - -add_library(phtree STATIC "") -add_subdirectory(common) -add_subdirectory(v16) - -set_target_properties(phtree PROPERTIES LINKER_LANGUAGE CXX) - diff --git a/phtree/benchmark/logging.cc b/phtree/benchmark/logging.cc deleted file mode 100644 index 51803f0c..00000000 --- a/phtree/benchmark/logging.cc +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) Improbable Worlds Ltd, All Rights Reserved -#include "logging.h" - -namespace improbable::phtree::phbenchmark::logging { - -void SetupDefaultLogging() { - SetupLogging({}, spdlog::level::warn); -} - -void SetupLogging(std::vector sinks, spdlog::level::level_enum log_level) { - auto& console_sink = sinks.emplace_back(std::make_shared()); - console_sink->set_level(log_level); - - // Find the minimum log level, in case one of the sinks passed to us has a lower log level. - const auto& sink_with_lowest_log_level = *std::min_element( - sinks.begin(), - sinks.end(), - [](const spdlog::sink_ptr& a, const spdlog::sink_ptr& b) -> bool { - return a->level() < b->level(); - }); - spdlog::level::level_enum min_log_level = - std::min(sink_with_lowest_log_level->level(), log_level); - - // Create the external logger, worker logger and the internal (default) logger from the same log - // sinks. Each logsink can use `GetLoggerTypeFromMessage` to determine which logger a message - // was logged to. - spdlog::set_default_logger( - std::make_shared(kInternalLoggerName, sinks.begin(), sinks.end())); - spdlog::set_level(min_log_level); - spdlog::flush_on(min_log_level); -} - -} // namespace improbable::phtree::phbenchmark::logging diff --git a/phtree/common/BUILD b/phtree/common/BUILD deleted file mode 100644 index 7ef3b6bf..00000000 --- a/phtree/common/BUILD +++ /dev/null @@ -1,126 +0,0 @@ -package(default_visibility = ["//visibility:private"]) - -cc_library( - name = "common", - hdrs = [ - "base_types.h", - "bits.h", - "common.h", - "converter.h", - "debug_helper.h", - "distance.h", - "filter.h", - "flat_array_map.h", - "flat_sparse_map.h", - "tree_stats.h", - ], - visibility = [ - "//visibility:public", - ], - deps = [ - ], -) - -cc_test( - name = "base_types_test", - timeout = "long", - srcs = [ - "base_types_test.cc", - ], - linkstatic = True, - deps = [ - ":common", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "bits_test", - timeout = "long", - srcs = [ - "bits_test.cc", - ], - linkstatic = True, - deps = [ - ":common", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "common_test", - timeout = "long", - srcs = [ - "common_test.cc", - ], - linkstatic = True, - deps = [ - ":common", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "distance_test", - timeout = "long", - srcs = [ - "distance_test.cc", - ], - linkstatic = True, - deps = [ - ":common", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "filter_test", - timeout = "long", - srcs = [ - "filter_test.cc", - ], - linkstatic = True, - deps = [ - ":common", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "flat_array_map_test", - timeout = "long", - srcs = [ - "flat_array_map_test.cc", - ], - linkstatic = True, - deps = [ - ":common", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "flat_sparse_map_test", - timeout = "long", - srcs = [ - "flat_sparse_map_test.cc", - ], - linkstatic = True, - deps = [ - ":common", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "preprocessor_test", - timeout = "long", - srcs = [ - "converter_test.cc", - ], - linkstatic = True, - deps = [ - ":common", - "//phtree/testing/gtest_main", - ], -) diff --git a/phtree/common/CMakeLists.txt b/phtree/common/CMakeLists.txt deleted file mode 100644 index bb07ca12..00000000 --- a/phtree/common/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -cmake_minimum_required(VERSION 3.14) - -target_sources(phtree - PRIVATE - common.h - base_types.h - bits.h - distance.h - filter.h - flat_array_map.h - flat_sparse_map.h - converter.h - debug_helper.h - tree_stats.h - ) diff --git a/phtree/common/filter.h b/phtree/common/filter.h deleted file mode 100644 index 46eacee3..00000000 --- a/phtree/common/filter.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PHTREE_COMMON_FILTERS_H -#define PHTREE_COMMON_FILTERS_H - -#include "base_types.h" -#include "bits.h" -#include "converter.h" -#include "flat_array_map.h" -#include "flat_sparse_map.h" -#include "tree_stats.h" -#include -#include -#include -#include -#include -#include - -namespace improbable::phtree { - -/* - * Any iterator that has a filter defined will traverse nodes or return values if and only if the - * filter function returns 'true'. The filter functions are called for every node and every entry - * (note: internally, nodes are also stored in entries, but these entries will be passed to the - * filter for nodes) that the iterator encounters. By implication, it will never call the filter - * function for nodes of entries if their respective parent node has already been rejected. - * - * There are separate filter functions for nodes and for key/value entries. - * - * Every filter needs to provide two functions: - * - bool IsEntryValid(const PhPoint& key, const T& value); - * This function is called for every key/value pair that the query encounters. The function - * should return 'true' iff the key/value should be added to the query result. - * The parameters are the key and value of the key/value pair. - * - bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore); - * This function is called for every node that the query encounters. The function should - * return 'true' if the node should be traversed and searched for potential results. - * The parameters are the prefix of the node and the number of least significant bits of the - * prefix that can (and should) be ignored. The bits of the prefix that should be ignored can - * have any value. - */ - -/* - * The no-op filter is the default filter for the PH-Tree. It always returns 'true'. - */ -struct FilterNoOp { - /* - * @param key The key/coordinate of the entry. - * @param value The value of the entry. - * @returns This default implementation always returns `true`. - */ - template - constexpr bool IsEntryValid(const KEY& /*key*/, const T& /*value*/) const { - return true; - } - - /* - * @param prefix The prefix of node. Any coordinate in the nodes shares this prefix. - * @param bits_to_ignore The number of bits of the prefix that should be ignored because they - * are NOT the same for all coordinates in the node. For example, assuming 64bit values, if the - * node represents coordinates that all share the first 10 bits of the prefix, then the value of - * bits_to_ignore is 64-10=54. - * @returns This default implementation always returns `true`. - */ - template - constexpr bool IsNodeValid(const KEY& /*prefix*/, int /*bits_to_ignore*/) const { - return true; - } -}; - -/* - * The AABB filter can be used to query a point tree for an axis aligned bounding box (AABB). - * The result is equivalent to that of the 'begin_query(...)' function. - */ -template > -class FilterAABB { - using KeyExternal = typename CONVERTER::KeyExternal; - using KeyInternal = typename CONVERTER::KeyInternal; - using ScalarInternal = typename CONVERTER::ScalarInternal; - - static constexpr auto DIM = CONVERTER::DimInternal; - - public: - FilterAABB( - const KeyExternal& min_include, - const KeyExternal& max_include, - CONVERTER converter = CONVERTER()) - : min_external_{min_include} - , max_external_{max_include} - , min_internal_{converter.pre(min_include)} - , max_internal_{converter.pre(max_include)} - , converter_{converter} {}; - - /* - * This function allows resizing/shifting the AABB while iterating over the tree. - */ - void set(const KeyExternal& min_include, const KeyExternal& max_include) { - min_external_ = min_include; - max_external_ = max_include; - min_internal_ = converter_.pre(min_include); - max_internal_ = converter_.pre(max_include); - } - - template - [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T& /*value*/) const { - auto point = converter_.post(key); - for (dimension_t i = 0; i < DIM; ++i) { - if (point[i] < min_external_[i] || point[i] > max_external_[i]) { - return false; - } - } - return true; - } - - [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, int bits_to_ignore) const { - // Let's assume that we always want to traverse the root node (bits_to_ignore == 64) - if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { - return true; - } - ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; - ScalarInternal node_max_bits = ~node_min_bits; - - for (dimension_t i = 0; i < DIM; ++i) { - if ((prefix[i] | node_max_bits) < min_internal_[i] || - (prefix[i] & node_min_bits) > max_internal_[i]) { - return false; - } - } - return true; - } - - private: - const KeyExternal min_external_; - const KeyExternal max_external_; - const KeyInternal min_internal_; - const KeyInternal max_internal_; - const CONVERTER converter_; -}; - -/* - * The sphere filter can be used to query a point tree for a sphere. - */ -template < - typename CONVERTER = ConverterIEEE<3>, - typename DISTANCE = DistanceEuclidean> -class FilterSphere { - using KeyExternal = typename CONVERTER::KeyExternal; - using KeyInternal = typename CONVERTER::KeyInternal; - using ScalarInternal = typename CONVERTER::ScalarInternal; - using ScalarExternal = typename CONVERTER::ScalarExternal; - - static constexpr auto DIM = CONVERTER::DimInternal; - - public: - FilterSphere( - const KeyExternal& center, - const ScalarExternal& radius, - CONVERTER converter = CONVERTER(), - DISTANCE distance_function = DISTANCE()) - : center_external_{center} - , center_internal_{converter.pre(center)} - , radius_{radius} - , converter_{converter} - , distance_function_{distance_function} {}; - - template - [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T&) const { - KeyExternal point = converter_.post(key); - return distance_function_(center_external_, point) <= radius_; - } - - /* - * Calculate whether AABB encompassing all possible points in the node intersects with the - * sphere. - */ - [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, int bits_to_ignore) const { - // we always want to traverse the root node (bits_to_ignore == 64) - - if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { - return true; - } - - ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; - ScalarInternal node_max_bits = ~node_min_bits; - - KeyInternal closest_in_bounds; - for (dimension_t i = 0; i < DIM; ++i) { - // calculate lower and upper bound for dimension for given node - ScalarInternal lo = prefix[i] & node_min_bits; - ScalarInternal hi = prefix[i] | node_max_bits; - - // choose value closest to center for dimension - closest_in_bounds[i] = std::clamp(center_internal_[i], lo, hi); - } - - KeyExternal closest_point = converter_.post(closest_in_bounds); - return distance_function_(center_external_, closest_point) <= radius_; - } - - private: - const KeyExternal center_external_; - const KeyExternal center_internal_; - const ScalarExternal radius_; - const CONVERTER converter_; - const DISTANCE distance_function_; -}; - -} // namespace improbable::phtree - -#endif // PHTREE_COMMON_FILTERS_H diff --git a/phtree/phtree_d_test_filter.cc b/phtree/phtree_d_test_filter.cc deleted file mode 100644 index f5470190..00000000 --- a/phtree/phtree_d_test_filter.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "phtree/phtree.h" -#include -#include -#include - -using namespace improbable::phtree; - -template -using TestPoint = PhPointD; - -template -using TestTree = PhTreeD; - -class DoubleRng { - public: - DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} - - double next() { - return rnd(eng); - } - - private: - std::default_random_engine eng; - std::uniform_real_distribution rnd; -}; - -template -void generateCube(std::vector>& points, size_t N) { - DoubleRng rng(-1000, 1000); - auto refTree = std::map, size_t>(); - - points.reserve(N); - for (size_t i = 0; i < N; i++) { - auto point = TestPoint{rng.next(), rng.next(), rng.next()}; - if (refTree.count(point) != 0) { - i--; - continue; - } - - refTree.emplace(point, i); - points.push_back(point); - } - ASSERT_EQ(refTree.size(), N); - ASSERT_EQ(points.size(), N); -} - -template -void populate(TestTree& tree, std::vector>& points, size_t N) { - generateCube(points, N); - for (size_t i = 0; i < N; i++) { - ASSERT_TRUE(tree.insert(points[i], i).second); - } - ASSERT_EQ(N, tree.size()); -} diff --git a/phtree/phtree_test_unique_ptr_values.cc b/phtree/phtree_test_unique_ptr_values.cc deleted file mode 100644 index 7d7d6716..00000000 --- a/phtree/phtree_test_unique_ptr_values.cc +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "phtree/phtree.h" -#include -#include - -using namespace improbable::phtree; - -template -using TestPoint = PhPoint; - -template -using TestTree = PhTree; - -class IntRng { - public: - IntRng(int minIncl, int maxExcl) : eng(7), rnd{minIncl, maxExcl} {} - - int next() { - return rnd(eng); - } - - private: - std::default_random_engine eng; - std::uniform_int_distribution rnd; -}; - -struct IdObj { - IdObj() = default; - - explicit IdObj(const size_t i) : _i(static_cast(i)){}; - - bool operator==(IdObj& rhs) { - return _i == rhs._i; - } - - IdObj& operator=(IdObj const& rhs) = default; - - int _i; -}; - -using Id = std::unique_ptr; - -struct PointDistance { - PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} - - double _distance; - size_t _id; -}; - -bool comparePointDistance(PointDistance& i1, PointDistance& i2) { - return (i1._distance < i2._distance); -} - -template -double distance(const TestPoint& p1, const TestPoint& p2) { - double sum2 = 0; - for (dimension_t i = 0; i < DIM; i++) { - double d = p1[i] - p2[i]; - sum2 += d * d; - } - return sqrt(sum2); -} - -template -double distanceL1(const TestPoint& p1, const TestPoint& p2) { - double sum = 0; - for (dimension_t i = 0; i < DIM; i++) { - sum += std::abs(p1[i] - p2[i]); - } - return sum; -} - -template -void generateCube(std::vector>& points, size_t N) { - IntRng rng(-1000, 1000); - auto refTree = std::map, size_t>(); - - points.reserve(N); - for (size_t i = 0; i < N; i++) { - auto point = TestPoint{rng.next(), rng.next(), rng.next()}; - if (refTree.count(point) != 0) { - i--; - continue; - } - - refTree.emplace(point, i); - points.push_back(point); - } - ASSERT_EQ(refTree.size(), N); - ASSERT_EQ(points.size(), N); -} - -template -void SmokeTestBasicOps(size_t N) { - TestTree tree; - std::vector> points; - generateCube(points, N); - - ASSERT_EQ(0, tree.size()); - ASSERT_TRUE(tree.empty()); - PhTreeDebugHelper::CheckConsistency(tree); - - for (size_t i = 0; i < N; i++) { - TestPoint& p = points.at(i); - ASSERT_EQ(tree.count(p), 0); - ASSERT_EQ(tree.end(), tree.find(p)); - - if (i % 2 == 0) { - ASSERT_TRUE(tree.emplace(p, std::make_unique(i)).second); - } else { - Id id = std::make_unique(i); - ASSERT_TRUE(tree.emplace(p, std::move(id)).second); - } - ASSERT_EQ(tree.count(p), 1); - ASSERT_NE(tree.end(), tree.find(p)); - ASSERT_EQ(i, (*tree.find(p))->_i); - ASSERT_EQ(i + 1, tree.size()); - - // try add again - ASSERT_FALSE(tree.emplace(p, std::make_unique(i)).second); - ASSERT_EQ(tree.count(p), 1); - ASSERT_NE(tree.end(), tree.find(p)); - ASSERT_EQ(i, (*tree.find(p))->_i); - ASSERT_EQ(i + 1, tree.size()); - ASSERT_FALSE(tree.empty()); - } - - for (size_t i = 0; i < N; i++) { - TestPoint& p = points.at(i); - auto q = tree.begin_query({p, p}); - ASSERT_NE(q, tree.end()); - ASSERT_EQ(i, (*q)->_i); - q++; - ASSERT_EQ(q, tree.end()); - } - - PhTreeDebugHelper::CheckConsistency(tree); - - for (size_t i = 0; i < N; i++) { - TestPoint& p = points.at(i); - ASSERT_NE(tree.find(p), tree.end()); - ASSERT_EQ(tree.count(p), 1); - ASSERT_EQ(i, (*tree.find(p))->_i); - ASSERT_EQ(1, tree.erase(p)); - - ASSERT_EQ(tree.count(p), 0); - ASSERT_EQ(tree.end(), tree.find(p)); - ASSERT_EQ(N - i - 1, tree.size()); - - // try remove again - ASSERT_EQ(0, tree.erase(p)); - ASSERT_EQ(tree.count(p), 0); - ASSERT_EQ(tree.end(), tree.find(p)); - ASSERT_EQ(N - i - 1, tree.size()); - if (i < N - 1) { - ASSERT_FALSE(tree.empty()); - } - } - ASSERT_EQ(0, tree.size()); - ASSERT_TRUE(tree.empty()); - PhTreeDebugHelper::CheckConsistency(tree); -} - -TEST(PhTreeTestUniquePtr, SmokeTestBasicOps) { - SmokeTestBasicOps<3>(10000); - SmokeTestBasicOps<6>(10000); - SmokeTestBasicOps<10>(1000); - SmokeTestBasicOps<20>(100); -} diff --git a/phtree/testing/BUILD b/phtree/testing/BUILD deleted file mode 100644 index 2aed744c..00000000 --- a/phtree/testing/BUILD +++ /dev/null @@ -1,14 +0,0 @@ -package(default_visibility = ["//visibility:private"]) - -cc_library( - name = "testing", - testonly = True, - srcs = [ - ], - hdrs = [ - ], - visibility = [ - ], - deps = [ - ], -) diff --git a/phtree/testing/gtest_main/BUILD b/phtree/testing/gtest_main/BUILD deleted file mode 100644 index 0d591976..00000000 --- a/phtree/testing/gtest_main/BUILD +++ /dev/null @@ -1,14 +0,0 @@ -package(default_visibility = ["//visibility:private"]) - -cc_library( - name = "gtest_main", - testonly = True, - srcs = ["gtest_main.cc"], - visibility = [ - "//visibility:public", - ], - deps = [ - "@gtest", - ], - alwayslink = 1, -) diff --git a/phtree/testing/gtest_main/gtest_main.cc b/phtree/testing/gtest_main/gtest_main.cc deleted file mode 100644 index 1e11ab41..00000000 --- a/phtree/testing/gtest_main/gtest_main.cc +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -int main(int argc, char** argv) { - testing::InitGoogleMock(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/phtree/v16/CMakeLists.txt b/phtree/v16/CMakeLists.txt deleted file mode 100644 index 1aa65630..00000000 --- a/phtree/v16/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -cmake_minimum_required(VERSION 3.14) - -target_sources(phtree - PRIVATE - debug_helper_v16.h - node.h - entry.h - iterator_base.h - iterator_full.h - iterator_hc.h - iterator_knn_hs.h - iterator_simple.h - phtree_v16.h - ) diff --git a/phtree/v16/entry.h b/phtree/v16/entry.h deleted file mode 100644 index 1c8610fc..00000000 --- a/phtree/v16/entry.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PHTREE_V16_ENTRY_H -#define PHTREE_V16_ENTRY_H - -#include "../../phtree/common/common.h" -#include "node.h" -#include -#include -#include - -namespace improbable::phtree::v16 { - -template -class Node; - -/* - * Nodes in the PH-Tree contain up to 2^DIM PhEntries, one in each geometric quadrant. - * PhEntries can contain two types of data: - * - A key/value pair (value of type T) - * - A prefix/child-node pair, where prefix is the prefix of the child node and the - * child node is contained in a unique_ptr. - */ -template -class Entry { - using KeyT = PhPoint; - using ValueT = std::remove_const_t; - using NodeT = Node; - - public: - /* - * Construct entry with existing node. - */ - Entry(const KeyT& k, std::unique_ptr&& node_ptr) - : kd_key_{k}, node_{std::move(node_ptr)}, value_{std::nullopt} {} - - /* - * Construct entry with a new node. - */ - Entry(bit_width_t infix_len, bit_width_t postfix_len) - : kd_key_(), node_{std::make_unique(infix_len, postfix_len)}, value_{std::nullopt} {} - - /* - * Construct entry with existing T. - */ - Entry(const KeyT& k, std::optional&& value) - : kd_key_{k}, node_{nullptr}, value_{std::move(value)} {} - - /* - * Construct entry with new T or moved T. - */ - template - explicit Entry(const KeyT& k, Args&&... args) - : kd_key_{k}, node_{nullptr}, value_{std::in_place, std::forward(args)...} {} - - [[nodiscard]] const KeyT& GetKey() const { - return kd_key_; - } - - [[nodiscard]] bool IsValue() const { - return value_.has_value(); - } - - [[nodiscard]] bool IsNode() const { - return node_.get() != nullptr; - } - - [[nodiscard]] T& GetValue() const { - assert(IsValue()); - return const_cast(*value_); - } - - [[nodiscard]] NodeT& GetNode() const { - assert(IsNode()); - return *node_; - } - - void SetNode(std::unique_ptr&& node) { - assert(!IsNode()); - node_ = std::move(node); - value_.reset(); - } - - [[nodiscard]] std::optional&& ExtractValue() { - assert(IsValue()); - return std::move(value_); - } - - [[nodiscard]] std::unique_ptr&& ExtractNode() { - assert(IsNode()); - return std::move(node_); - } - - void ReplaceNodeWithDataFromEntry(Entry&& other) { - assert(IsNode()); - kd_key_ = other.GetKey(); - - if (other.IsNode()) { - node_ = std::move(other.node_); - } else { - value_ = std::move(other.value_); - node_.reset(); - } - } - - private: - KeyT kd_key_; - std::unique_ptr node_; - std::optional value_; -}; -} // namespace improbable::phtree::v16 - -#endif // PHTREE_V16_ENTRY_H diff --git a/phtree/v16/iterator_base.h b/phtree/v16/iterator_base.h deleted file mode 100644 index 50ac8708..00000000 --- a/phtree/v16/iterator_base.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PHTREE_V16_ITERATOR_BASE_H -#define PHTREE_V16_ITERATOR_BASE_H - -#include "../common/common.h" -#include "entry.h" - -namespace improbable::phtree::v16 { - -template -class PhTreeV16; - -/* - * Base class for all PH-Tree iterators. - */ -template -class IteratorBase { - protected: - static constexpr dimension_t DIM = CONVERT::DimInternal; - using KeyInternal = typename CONVERT::KeyInternal; - using SCALAR = typename CONVERT::ScalarInternal; - using EntryT = Entry; - friend PhTreeV16; - - public: - explicit IteratorBase(const CONVERT& converter) - : current_result_{nullptr} - , current_node_{} - , parent_node_{} - , is_finished_{false} - , converter_{converter} - , filter_{FILTER()} {} - - explicit IteratorBase(const CONVERT& converter, FILTER filter) - : current_result_{nullptr} - , current_node_{} - , parent_node_{} - , is_finished_{false} - , converter_{converter} - , filter_(std::move(filter)) {} - - T& operator*() const { - assert(current_result_); - return current_result_->GetValue(); - } - - T* operator->() const { - assert(current_result_); - return ¤t_result_->GetValue(); - } - - template - friend bool operator==( - const IteratorBase& left, - const IteratorBase& right) { - // Note: The following compares pointers to Entry objects so it should be - // a) fast (i.e. not comparing contents of entries) - // b) return `false` when comparing apparently identical entries from different PH-Trees (as - // intended) - return (left.is_finished_ && right.Finished()) || - (!left.is_finished_ && !right.Finished() && - left.current_result_ == right.GetCurrentResult()); - } - - template - friend bool operator!=( - const IteratorBase& left, - const IteratorBase& right) { - return !(left == right); - } - - auto first() const { - return converter_.post(current_result_->GetKey()); - } - - T& second() const { - return current_result_->GetValue(); - } - - [[nodiscard]] bool Finished() const { - return is_finished_; - } - - const EntryT* GetCurrentResult() const { - return current_result_; - } - - protected: - void SetFinished() { - is_finished_ = true; - current_result_ = nullptr; - } - - [[nodiscard]] bool ApplyFilter(const EntryT& entry) const { - return entry.IsNode() - ? filter_.IsNodeValid(entry.GetKey(), entry.GetNode().GetPostfixLen() + 1) - : filter_.IsEntryValid(entry.GetKey(), entry.GetValue()); - } - - void SetCurrentResult(const EntryT* current_result) { - current_result_ = current_result; - } - - void SetCurrentNodeEntry(const EntryT* current_node) { - assert(!current_node || current_node->IsNode()); - current_node_ = current_node; - } - - void SetParentNodeEntry(const EntryT* parent_node) { - assert(!parent_node || parent_node->IsNode()); - parent_node_ = parent_node; - } - - auto post(const KeyInternal& point) { - return converter_.post(point); - } - - private: - /* - * The parent entry contains the parent node. The parent node is the node ABOVE the current node - * which contains the current entry. - */ - const EntryT* GetCurrentNodeEntry() const { - return current_node_; - } - - const EntryT* GetParentNodeEntry() const { - return parent_node_; - } - - const EntryT* current_result_; - const EntryT* current_node_; - const EntryT* parent_node_; - bool is_finished_; - const CONVERT& converter_; - FILTER filter_; -}; - -} // namespace improbable::phtree::v16 - -#endif // PHTREE_V16_ITERATOR_BASE_H diff --git a/phtree/v16/phtree_v16.h b/phtree/v16/phtree_v16.h deleted file mode 100644 index 103b7870..00000000 --- a/phtree/v16/phtree_v16.h +++ /dev/null @@ -1,401 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PHTREE_V16_PHTREE_V16_H -#define PHTREE_V16_PHTREE_V16_H - -#include "debug_helper_v16.h" -#include "for_each.h" -#include "for_each_hc.h" -#include "iterator_full.h" -#include "iterator_hc.h" -#include "iterator_knn_hs.h" -#include "iterator_simple.h" -#include "node.h" - -namespace improbable::phtree::v16 { - -/* - * The PH-Tree is an ordered index on an n-dimensional space (quad-/oct-/2^n-tree) where each - * dimension is (by default) indexed by a 64 bit integer. The index ordered follows z-order / Morton - * order. The index is effectively a 'map', i.e. each key is associated with at most one value. - * - * Keys are points in n-dimensional space. - * - * This API behaves similar to std::map, see function descriptions for details. - * - * Loosely based on PH-Tree Java, V16, see http://www.phtree.org - * - * See also : - * - T. Zaeschke, C. Zimmerli, M.C. Norrie: - * "The PH-Tree -- A Space-Efficient Storage Structure and Multi-Dimensional Index", (SIGMOD 2014) - * - T. Zaeschke: "The PH-Tree Revisited", (2015) - * - T. Zaeschke, M.C. Norrie: "Efficient Z-Ordered Traversal of Hypercube Indexes" (BTW 2017). - * - * @tparam T Value type. - * @tparam DIM Dimensionality. This is the number of dimensions of the space to index. - * @tparam CONVERT A converter class with a 'pre()' and a 'post()' function. 'pre()' translates - * external KEYs into the internal PhPoint type. 'post()' - * translates the PhPoint back to the external KEY type. - */ -template > -class PhTreeV16 { - friend PhTreeDebugHelper; - using ScalarExternal = typename CONVERT::ScalarExternal; - using ScalarInternal = typename CONVERT::ScalarInternal; - using KeyT = typename CONVERT::KeyInternal; - using NodeT = Node; - using EntryT = Entry; - - public: - static_assert(!std::is_reference::value, "Reference type value are not supported."); - static_assert(std::is_signed::value, "ScalarInternal must be a signed type"); - static_assert( - std::is_integral::value, "ScalarInternal must be an integral type"); - static_assert( - std::is_arithmetic::value, "ScalarExternal must be an arithmetic type"); - static_assert(DIM >= 1 && DIM <= 63, "This PH-Tree supports between 1 and 63 dimensions"); - - PhTreeV16(CONVERT& converter = ConverterNoOp()) - : num_entries_{0} - , root_{0, MAX_BIT_WIDTH - 1} - , the_end_{converter} - , converter_{converter} {} - - /* - * Attempts to build and insert a key and a value into the tree. - * - * @param key The key for the new entry. - * - * @param args Arguments used to generate a new value. - * - * @return A pair, whose first element points to the possibly inserted pair, - * and whose second element is a bool that is true if the pair was actually inserted. - * - * This function attempts to build and insert a (key, value) pair into the tree. The PH-Tree is - * effectively a map, so if an entry with the same key was already in the tree, returns that - * entry instead of inserting a new one. - */ - template - std::pair emplace(const KeyT& key, Args&&... args) { - auto* current_entry = &root_; - bool is_inserted = false; - while (current_entry->IsNode()) { - current_entry = - current_entry->GetNode().Emplace(is_inserted, key, std::forward(args)...); - } - num_entries_ += is_inserted; - return {current_entry->GetValue(), is_inserted}; - } - - /* - * The emplace_hint() method uses an iterator as hint for insertion. - * The hint is ignored if it is not useful or is equal to end(). - * - * Iterators should normally not be used after the tree has been modified. As an exception to - * this rule, an iterator can be used as hint if it was previously used with at most one call - * to erase() and if no other modifications occurred. - * The following is valid: - * - * // Move value from key1 to key2 - * auto iter = tree.find(key1); - * auto value = iter.second(); // The value may become invalid in erase() - * erase(iter); - * emplace_hint(iter, key2, value); // the iterator can still be used as hint here - */ - template - std::pair emplace_hint(const ITERATOR& iterator, const KeyT& key, Args&&... args) { - // This function can be used to insert a value close to a known value - // or close to a recently removed value. The hint can only be used if the new key is - // inside one of the nodes provided by the hint iterator. - // The idea behind using the 'parent' is twofold: - // - The 'parent' node is one level above the iterator position, it therefore is spatially - // larger and has a better probability of containing the new position, allowing for - // fast track emplace. - // - Using 'parent' allows a scenario where the iterator was previously used with - // erase(iterator). This is safe because erase() will never erase the 'parent' node. - - if (!iterator.GetParentNodeEntry()) { - // No hint available, use standard emplace() - return emplace(key, std::forward(args)...); - } - - auto* parent_entry = iterator.GetParentNodeEntry(); - if (NumberOfDivergingBits(key, parent_entry->GetKey()) > - parent_entry->GetNode().GetPostfixLen() + 1) { - // replace higher up in the tree - return emplace(key, std::forward(args)...); - } - - // replace in node - auto* current_entry = parent_entry; - bool is_inserted = false; - while (current_entry->IsNode()) { - current_entry = - current_entry->GetNode().Emplace(is_inserted, key, std::forward(args)...); - } - num_entries_ += is_inserted; - return {current_entry->GetValue(), is_inserted}; - } - - /* - * See std::map::insert(). - * - * @return a pair consisting of the inserted element (or to the element that prevented the - * insertion) and a bool denoting whether the insertion took place. - */ - std::pair insert(const KeyT& key, const T& value) { - return emplace(key, value); - } - - /* - * @return the value stored at position 'key'. If no such value exists, one is added to the tree - * and returned. - */ - T& operator[](const KeyT& key) { - return emplace(key).first; - } - - /* - * Analogous to map:count(). - * - * @return '1', if a value is associated with the provided key, otherwise '0'. - */ - size_t count(const KeyT& key) const { - if (empty()) { - return 0; - } - auto* current_entry = &root_; - while (current_entry && current_entry->IsNode()) { - current_entry = current_entry->GetNode().Find(key); - } - return current_entry ? 1 : 0; - } - - /* - * Analogous to map:find(). - * - * Get an entry associated with a k dimensional key. - * @param key the key to look up - * @return an iterator that points either to the associated value or to {@code end()} if the key - * was found - */ - auto find(const KeyT& key) const { - if (empty()) { - return IteratorSimple(converter_); - } - - const EntryT* current_entry = &root_; - const EntryT* current_node = nullptr; - const EntryT* parent_node = nullptr; - while (current_entry && current_entry->IsNode()) { - parent_node = current_node; - current_node = current_entry; - current_entry = current_entry->GetNode().Find(key); - } - - return IteratorSimple(current_entry, current_node, parent_node, converter_); - } - - /* - * See std::map::erase(). Removes any value associated with the provided key. - * - * @return '1' if a value was found, otherwise '0'. - */ - size_t erase(const KeyT& key) { - auto* current_node = &root_.GetNode(); - NodeT* parent_node = nullptr; - bool found = false; - while (current_node) { - auto* child_node = current_node->Erase(key, parent_node, found); - parent_node = current_node; - current_node = child_node; - } - num_entries_ -= found; - return found; - } - - /* - * See std::map::erase(). Removes any value at the given iterator location. - * - * - * - * WARNING - * While this is guaranteed to work correctly, only iterators returned from find() - * will result in erase(iterator) being faster than erase(key). - * Iterators returned from other functions may be optimized in a future version. - * - * @return '1' if a value was found, otherwise '0'. - */ - template - size_t erase(const ITERATOR& iterator) { - if (iterator.Finished()) { - return 0; - } - if (!iterator.GetParentNodeEntry()) { - // Why may there be no parent? - // - we are in the root node - // - the iterator did not set this value - // In either case, we need to start searching from the top. - return erase(iterator.GetCurrentResult()->GetKey()); - } - bool found = false; - assert(iterator.GetCurrentNodeEntry() && iterator.GetCurrentNodeEntry()->IsNode()); - iterator.GetCurrentNodeEntry()->GetNode().Erase( - iterator.GetCurrentResult()->GetKey(), - &iterator.GetParentNodeEntry()->GetNode(), - found); - - num_entries_ -= found; - return found; - } - - /* - * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes - * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter - * functions must implement the same signature as the default 'FilterNoOp'. - * - * @param callback The callback function to be called for every entry that matches the query. - * The callback requires the following signature: callback(const PhPointD &, const T &) - * @param filter An optional filter function. The filter function allows filtering entries and - * sub-nodes before they are returned or traversed. Any filter function must follow the - * signature of the default 'FilterNoOp`. - */ - template - void for_each(CALLBACK_FN& callback, FILTER filter = FILTER()) const { - ForEach(converter_, callback, filter).run(root_); - } - - /* - * Performs a rectangular window query. The parameters are the min and max keys which - * contain the minimum respectively the maximum keys in every dimension. - * @param query_box The query window. - * @param callback The callback function to be called for every entry that matches the query. - * The callback requires the following signature: callback(const PhPoint &, const T &) - * @param filter An optional filter function. The filter function allows filtering entries and - * sub-nodes before they are returned or traversed. Any filter function must follow the - * signature of the default 'FilterNoOp`. - */ - template - void for_each( - const PhBox& query_box, - CALLBACK_FN& callback, - FILTER filter = FILTER()) const { - ForEachHC( - query_box.min(), query_box.max(), converter_, callback, filter) - .run(root_); - } - - /* - * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes - * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter - * functions must implement the same signature as the default 'FilterNoOp'. - * - * @return an iterator over all (filtered) entries in the tree, - */ - template - auto begin(FILTER filter = FILTER()) const { - return IteratorFull(root_, converter_, filter); - } - - /* - * Performs a rectangular window query. The parameters are the min and max keys which - * contain the minimum respectively the maximum keys in every dimension. - * @param query_box The query window. - * @param filter An optional filter function. The filter function allows filtering entries and - * sub-nodes before they are returned or traversed. Any filter function must follow the - * signature of the default 'FilterNoOp`. - * @return Result iterator. - */ - template - auto begin_query(const PhBox& query_box, FILTER filter = FILTER()) const { - return IteratorHC( - root_, query_box.min(), query_box.max(), converter_, filter); - } - - /* - * Locate nearest neighbors for a given point in space. - * - * Example for distance function: auto fn = DistanceEuclidean - * auto iter = tree.begin_knn_query> - * - * @param min_results number of entries to be returned. More entries may or may not be returned - * when several entries have the same distance. - * @param center center point - * @param distance_function optional distance function, defaults to euclidean distance - * @param filter optional filter predicate that excludes nodes/entries before their distance is - * calculated. - * @return Result iterator. - */ - template - auto begin_knn_query( - size_t min_results, - const KeyT& center, - DISTANCE distance_function = DISTANCE(), - FILTER filter = FILTER()) const { - return IteratorKnnHS( - root_, min_results, center, converter_, distance_function, filter); - } - - /* - * @return An iterator representing the tree's 'end'. - */ - const auto& end() const { - return the_end_; - } - - /* - * Remove all entries from the tree. - */ - void clear() { - num_entries_ = 0; - root_ = EntryT(0, MAX_BIT_WIDTH - 1); - } - - /* - * @return the number of entries (key/value pairs) in the tree. - */ - [[nodiscard]] size_t size() const { - return num_entries_; - } - - /* - * @return 'true' if the tree is empty, otherwise 'false'. - */ - [[nodiscard]] bool empty() const { - return num_entries_ == 0; - } - - private: - /* - * This function is only for debugging. - */ - auto GetDebugHelper() const { - return DebugHelperV16(root_.GetNode(), num_entries_); - } - - private: - size_t num_entries_; - // Contract: root_ contains a Node with 0 or more entries (the root node is the only Node - // that is allowed to have less than two entries. - EntryT root_; - IteratorEnd the_end_; - CONVERT converter_; -}; - -} // namespace improbable::phtree::v16 - -#endif // PHTREE_V16_PHTREE_V16_H diff --git a/phtree/BUILD b/test/BUILD similarity index 50% rename from phtree/BUILD rename to test/BUILD index fe48ccc8..0d8d0d7f 100644 --- a/phtree/BUILD +++ b/test/BUILD @@ -1,19 +1,41 @@ package(default_visibility = ["//visibility:private"]) -cc_library( - name = "phtree", +cc_test( + name = "converter_test", + timeout = "long", srcs = [ + "converter_test.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", ], - hdrs = [ - "phtree.h", - "phtree_multimap.h", +) + +cc_test( + name = "distance_test", + timeout = "long", + srcs = [ + "distance_test.cc", ], linkstatic = True, - visibility = [ - "//visibility:public", + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "filter_test", + timeout = "long", + srcs = [ + "filter_test.cc", ], + linkstatic = True, deps = [ - "//phtree/v16", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -25,8 +47,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -38,8 +60,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -51,8 +73,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -64,21 +86,21 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) cc_test( - name = "phtree_multimap_test_move_only_values", + name = "phtree_multimap_d_test_unique_ptr_values", timeout = "long", srcs = [ - "phtree_test_unique_ptr_values.cc", + "phtree_multimap_d_test_unique_ptr_values.cc", ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -90,8 +112,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -103,8 +125,60 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "phtree_box_d_test_filter", + timeout = "long", + srcs = [ + "phtree_box_d_test_filter.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "phtree_multimap_d_test_filter", + timeout = "long", + srcs = [ + "phtree_multimap_d_test_filter.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "phtree_d_test_copy_move", + timeout = "long", + srcs = [ + "phtree_d_test_copy_move.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "phtree_multimap_d_test_copy_move", + timeout = "long", + srcs = [ + "phtree_multimap_d_test_copy_move.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -116,8 +190,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -129,8 +203,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -142,8 +216,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -155,8 +229,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -168,8 +242,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -181,8 +255,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -194,8 +268,8 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", ], ) @@ -207,7 +281,20 @@ cc_test( ], linkstatic = True, deps = [ - ":phtree", - "//phtree/testing/gtest_main", + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "phtree_test_issues", + timeout = "long", + srcs = [ + "phtree_test_issues.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", ], ) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 00000000..5c899ada --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,100 @@ +cmake_minimum_required(VERSION 3.14) +project(phtree-tests LANGUAGES CXX) + +include(FetchContent) +include(common/scripts.cmake) + +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.12.1 +) +if (MSVC) + # Avoids LNK2038 Error with MSVC + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +endif () +FetchContent_MakeAvailable(googletest) + +# The next line is optional, but keeps your CACHE cleaner: +mark_as_advanced( + BUILD_GMOCK BUILD_GTEST BUILD_SHARED_LIBS + gmock_build_tests gtest_build_samples gtest_build_tests + gtest_disable_pthreads gtest_force_shared_crt gtest_hide_internal_symbols +) + +# If you are interested in keeping IDEs that support folders clean, I would also add these lines: +set_target_properties(gtest PROPERTIES FOLDER extern) +set_target_properties(gtest_main PROPERTIES FOLDER extern) + +#include(GoogleTest) +#gtest_discover_tests(all_tests_driver) + +if (PHTREE_CODE_COVERAGE) + package_add_test_main(all_tests + all_tests.cc + converter_test.cc + distance_test.cc + filter_test.cc + phtree_test.cc + phtree_test_const_values.cc + phtree_test_issues.cc + phtree_test_ptr_values.cc + phtree_test_unique_ptr_values.cc + phtree_f_test.cc + phtree_d_test.cc + phtree_d_test_copy_move.cc + phtree_d_test_custom_key.cc + phtree_d_test_filter.cc + phtree_d_test_preprocessor.cc + phtree_box_f_test.cc + phtree_box_d_test.cc + phtree_box_d_test_filter.cc + phtree_box_d_test_query_types.cc + phtree_multimap_d_test.cc + phtree_multimap_d_test_copy_move.cc + phtree_multimap_d_test_filter.cc + phtree_multimap_d_test_unique_ptr_values.cc + phtree_multimap_box_d_test.cc + common/b_plus_tree_hash_map_test.cc + common/b_plus_tree_map_test.cc + common/base_types_test.cc + common/bits_test.cc + common/common_test.cc + common/flat_array_map_test.cc + common/flat_sparse_map_test.cc) + target_compile_definitions(all_tests PUBLIC SKIP_TEST_MEMORY_LEAKS=ON) +else () + package_add_test(phtree_test phtree_test.cc) + package_add_test(phtree_test_const_values phtree_test_const_values.cc) + package_add_test(phtree_test_issues phtree_test_issues.cc) + target_compile_definitions(phtree_test_issues PUBLIC SKIP_TEST_MEMORY_LEAKS=ON) + package_add_test(phtree_test_ptr_values phtree_test_ptr_values.cc) + package_add_test(phtree_test_unique_ptr_values phtree_test_unique_ptr_values.cc) + + package_add_test(phtree_f_test phtree_f_test.cc) + + package_add_test(phtree_d_test phtree_d_test.cc) + package_add_test(phtree_d_test_copy_move phtree_d_test_copy_move.cc) + package_add_test(phtree_d_test_custom_key phtree_d_test_custom_key.cc) + package_add_test(phtree_d_test_filter phtree_d_test_filter.cc) + package_add_test(phtree_d_test_preprocessor phtree_d_test_preprocessor.cc) + + package_add_test(phtree_box_f_test phtree_box_f_test.cc) + + package_add_test(phtree_box_d_test phtree_box_d_test.cc) + package_add_test(phtree_box_d_test_filter phtree_box_d_test_filter.cc) + package_add_test(phtree_box_d_test_query_types phtree_box_d_test_query_types.cc) + + package_add_test(phtree_multimap_d_test phtree_multimap_d_test.cc) + package_add_test(phtree_multimap_d_test_copy_move phtree_multimap_d_test_copy_move.cc) + package_add_test(phtree_multimap_d_test_filter phtree_multimap_d_test_filter.cc) + package_add_test(phtree_multimap_d_test_unique_ptr_values phtree_multimap_d_test_unique_ptr_values.cc) + + package_add_test(phtree_multimap_box_d_test phtree_multimap_box_d_test.cc) + + package_add_test(converter_test converter_test.cc) + package_add_test(distance_test distance_test.cc) + package_add_test(filter_test filter_test.cc) + + add_subdirectory(common) +endif () diff --git a/test/all_tests.cc b/test/all_tests.cc new file mode 100644 index 00000000..ddc6dfc6 --- /dev/null +++ b/test/all_tests.cc @@ -0,0 +1,11 @@ +#include + +// #include "gtest/gtest.h" + +//#include "phtree_f_test.cc" +//#include "phtree_test.cc" + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/test/common/BUILD b/test/common/BUILD new file mode 100644 index 00000000..8299d673 --- /dev/null +++ b/test/common/BUILD @@ -0,0 +1,92 @@ +package(default_visibility = ["//visibility:private"]) + +cc_test( + name = "base_types_test", + timeout = "long", + srcs = [ + "base_types_test.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "bits_test", + timeout = "long", + srcs = [ + "bits_test.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "common_test", + timeout = "long", + srcs = [ + "common_test.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "flat_array_map_test", + timeout = "long", + srcs = [ + "flat_array_map_test.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "b_plus_tree_hash_map_test", + timeout = "long", + srcs = [ + "b_plus_tree_hash_map_test.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "b_plus_tree_map_test", + timeout = "long", + srcs = [ + "b_plus_tree_map_test.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "flat_sparse_map_test", + timeout = "long", + srcs = [ + "flat_sparse_map_test.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) diff --git a/test/common/CMakeLists.txt b/test/common/CMakeLists.txt new file mode 100644 index 00000000..a24a1b6c --- /dev/null +++ b/test/common/CMakeLists.txt @@ -0,0 +1,10 @@ +include(scripts.cmake) + +package_add_test(b_plus_tree_hash_map_test b_plus_tree_hash_map_test.cc) +package_add_test(b_plus_tree_map_test b_plus_tree_map_test.cc) +package_add_test(base_types_test base_types_test.cc) +package_add_test(bits_test bits_test.cc) +package_add_test(common_test common_test.cc) + +package_add_test(flat_array_map_test flat_array_map_test.cc) +package_add_test(flat_sparse_map_test flat_sparse_map_test.cc) diff --git a/test/common/b_plus_tree_hash_map_test.cc b/test/common/b_plus_tree_hash_map_test.cc new file mode 100644 index 00000000..5d74ae7a --- /dev/null +++ b/test/common/b_plus_tree_hash_map_test.cc @@ -0,0 +1,386 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/common/b_plus_tree_hash_map.h" +#include +#include +#include + +using namespace improbable::phtree; + +static int default_construct_count_ = 0; +static int construct_count_ = 0; +static int copy_construct_count_ = 0; +static int move_construct_count_ = 0; +static int copy_assign_count_ = 0; +static int move_assign_count_ = 0; +static int destruct_count_ = 0; + +[[maybe_unused]] static void reset_id_counters() { + default_construct_count_ = 0; + construct_count_ = 0; + copy_construct_count_ = 0; + move_construct_count_ = 0; + copy_assign_count_ = 0; + move_assign_count_ = 0; + destruct_count_ = 0; +} + +[[maybe_unused]] static void print_id_counters() { + std::cout << "dc=" << default_construct_count_ << " c=" << construct_count_ + << " cc=" << copy_construct_count_ << " mc=" << move_construct_count_ + << " ca=" << copy_assign_count_ << " ma=" << move_assign_count_ + << " d=" << destruct_count_ << std::endl; +} + +struct Id { + Id() : _i{0} { + ++default_construct_count_; + } + + explicit Id(const size_t i) : _i{static_cast(i)} { + ++construct_count_; + } + + explicit Id(const int i) : _i{i} { + ++construct_count_; + } + + Id(const Id& other) { + ++copy_construct_count_; + _i = other._i; + } + + Id(Id&& other) noexcept { + ++move_construct_count_; + _i = other._i; + } + + Id& operator=(const Id& other) noexcept { + ++copy_assign_count_; + _i = other._i; + return *this; + } + Id& operator=(Id&& other) noexcept { + ++move_assign_count_; + _i = other._i; + return *this; + } + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + ~Id() { + ++destruct_count_; + } + + int _i; +}; + +namespace std { +template <> +struct hash { + size_t operator()(const Id& x) const { + return std::hash{}(x._i % 10); + } +}; +}; // namespace std + +template +void CheckMapResult(const R& result, END end, const K& key, const V& val) { + ASSERT_NE(result, end); + ASSERT_EQ(result->first, key); + ASSERT_EQ(result->second, val); +} + +template +void CheckMapResultPair(const R& result, bool expected_success, const K& key, const V& val) { + assert(result.second == expected_success); + ASSERT_EQ(result.second, expected_success); + ASSERT_EQ(result.first->first, key); + ASSERT_EQ(result.first->second, val); +} + +template +void CheckSetResult(const R& result, END end, const K& key) { + ASSERT_NE(result, end); + ASSERT_EQ(*result, key); +} + +template +void CheckSetResultPair(const R& result, bool expected_success, const K& key) { + assert(result.second == expected_success); + ASSERT_EQ(result.second, expected_success); + ASSERT_EQ(*result.first, key); +} + +template +void SmokeTestMap() { + const int N = 300; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + size_t val = 0; + for (int i = 0; i < 10; i++) { + b_plus_tree_hash_map> test_map; + std::unordered_map reference_map; + for (int j = 0; j < N; j++) { + size_t key = cube_distribution(random_engine); + Id id(key); + bool hasVal = test_map.find(id) != test_map.end(); + bool hasValRef = reference_map.find(id) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + + if (!hasVal) { + if (key % 6 == 0) { + CheckMapResultPair(test_map.emplace(id, val), true, id, val); + CheckMapResultPair(test_map.emplace(id, val), false, id, val); + } else if (key % 6 == 1) { + CheckMapResultPair(test_map.try_emplace(id, val), true, id, val); + CheckMapResultPair(test_map.try_emplace(id, val), false, id, val); + } else if (key % 6 == 2) { + // Leaf-hint of questionable quality + auto hint = test_map.find(Id(key - 1)); + CheckMapResult(test_map.try_emplace(hint, id, val), test_map.end(), id, val); + CheckMapResult(test_map.try_emplace(hint, id, val), test_map.end(), id, val); + } else if (key % 6 == 3) { + auto hint = j % 2 == 0 ? test_map.begin() : test_map.end(); + // Bad hint + CheckMapResult(test_map.try_emplace(hint, id, val), test_map.end(), id, val); + CheckMapResult(test_map.try_emplace(hint, id, val), test_map.end(), id, val); + } else if (key % 6 == 4) { + // Leaf-hint of questionable quality + auto hint = test_map.find(Id(key - 1)); + CheckMapResult(test_map.emplace_hint(hint, id, val), test_map.end(), id, val); + CheckMapResult(test_map.emplace_hint(hint, id, val), test_map.end(), id, val); + } else { + auto hint = j % 2 == 0 ? test_map.begin() : test_map.end(); + // Bad hint + CheckMapResult(test_map.emplace_hint(hint, id, val), test_map.end(), id, val); + CheckMapResult(test_map.emplace_hint(hint, id, val), test_map.end(), id, val); + } + test_map._check(); + reference_map.emplace(id, val); + } + + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto& entry : reference_map) { + const Id& kRef = entry.first; + size_t vMap = test_map.find(kRef)->second; + ASSERT_EQ(vMap, entry.second); + ASSERT_TRUE(test_map.count(kRef)); + } + for (auto& entry : test_map) { + Id& k = entry.first; + size_t vRef = reference_map.find(k)->second; + size_t vMap = test_map.find(k)->second; + ASSERT_EQ(vMap, vRef); + } + ++val; + } + } +} + +TEST(PhTreeBptHashMapTest, SmokeTestNonUnique) { + SmokeTestMap>(); +} + +TEST(PhTreeBptHashMapTest, SmokeTestSameHash) { + struct DumbHash { + size_t operator()(const Id&) const { + return 42; + } + }; + SmokeTestMap(); +} + +template +void SmokeTestSet() { + const int N = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + for (int i = 0; i < 10; i++) { + b_plus_tree_hash_set test_map; + std::unordered_set reference_map; + for (int j = 0; j < N; j++) { + { + size_t key = cube_distribution(random_engine); + Id id(key); + bool hasVal = test_map.find(id) != test_map.end(); + bool hasValRef = reference_map.find(id) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + + if (!hasVal) { + if (key % 3 == 0) { + CheckSetResultPair(test_map.emplace(id), true, id); + CheckSetResultPair(test_map.emplace(key), false, id); + } else if (key % 3 == 1) { + // Leaf-hint of questionable quality + auto hint = test_map.find(Id(key - 1)); + CheckSetResult(test_map.emplace_hint(hint, id), test_map.end(), id); + CheckSetResult(test_map.emplace_hint(hint, key), test_map.end(), id); + } else { + auto hint = j % 2 == 0 ? test_map.begin() : test_map.end(); + // Bad hint + CheckSetResult(test_map.emplace_hint(hint, id), test_map.end(), id); + CheckSetResult(test_map.emplace_hint(hint, key), test_map.end(), id); + } + test_map._check(); + reference_map.emplace(id); + } + } + + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto& id : reference_map) { + Id& idMap = *test_map.find(id); + ASSERT_EQ(idMap, id); + } + for (auto& id : test_map) { + const Id& vRef = *reference_map.find(id); + Id& vMap = *test_map.find(id); + ASSERT_EQ(vMap, vRef); + } + } + } +} + +TEST(PhTreeBptHashSetTest, SmokeTestNonUnique) { + SmokeTestSet>(); +} + +TEST(PhTreeBptHashSetTest, SmokeTestSameHash) { + struct DumbHash { + size_t operator()(const Id&) const { + return 42; + } + }; + SmokeTestSet(); +} + +TEST(PhTreeBptHashMapTest, SmokeTestWithTryEmplace) { + const int N = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + for (int i = 0; i < 10; i++) { + b_plus_tree_hash_map, std::equal_to<>> test_map; + std::map reference_map; + for (int j = 0; j < N; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto entry : reference_map) { + size_t vRef = entry.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto entry : test_map) { + size_t v = entry.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + } + } +} + +template +void SmokeTestWithErase(bool by_iterator) { + const int N = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + for (int i = 0; i < 10; i++) { + b_plus_tree_hash_map> test_map{}; + std::unordered_map reference_map{}; + std::vector key_list{}; + for (int j = 0; j < N; j++) { + size_t key = cube_distribution(random_engine); + Id id(key); + bool hasVal = test_map.find(id) != test_map.end(); + bool hasValRef = reference_map.find(id) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + reference_map.emplace(id, key); + test_map.try_emplace(id, key); + key_list.emplace_back(key); + } + + int x = 0; + std::shuffle(key_list.begin(), key_list.end(), random_engine); + for (auto key : key_list) { + Id id(key); + // This may try to erase an entry that does not exist! + auto it = test_map.find(id); + if (it == test_map.end()) { + ASSERT_EQ(0u, reference_map.erase(id)); + continue; + } + if (by_iterator) { + auto next = it; + ++next; + auto is_last = next == test_map.end(); + auto next_val = is_last ? Id(-1) : next->first; + auto result = test_map.erase(it); + if (is_last) { + ASSERT_EQ(test_map.end(), result); + } else { + ASSERT_NE(test_map.end(), result); + ASSERT_EQ(next_val, result->first); + } + } else { + test_map.erase(id); + } + test_map._check(); + ASSERT_EQ(1u, reference_map.erase(id)); + for (auto& entry : reference_map) { + const Id& vRef = entry.first; + Id& vMap = test_map.find(vRef)->first; + ASSERT_EQ(vMap, vRef); + } + for (auto& entry : test_map) { + Id& v = entry.first; + const Id& vRef = reference_map.find(v)->first; + Id& vMap = test_map.find(v)->first; + ASSERT_EQ(vMap, vRef); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + ++x; + } + } +} + +TEST(PhTreeBptHashMapTest, SmokeTestWithErase) { + SmokeTestWithErase>(true); + SmokeTestWithErase>(false); +} + +TEST(PhTreeBptHashMapTest, SmokeTestWithEraseSameHash) { + struct DumbHash { + size_t operator()(const Id&) const { + return 42; + } + }; + SmokeTestWithErase(true); + SmokeTestWithErase(false); +} diff --git a/test/common/b_plus_tree_map_test.cc b/test/common/b_plus_tree_map_test.cc new file mode 100644 index 00000000..5e83b511 --- /dev/null +++ b/test/common/b_plus_tree_map_test.cc @@ -0,0 +1,181 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/common/b_plus_tree_map.h" +#include +#include + +using namespace improbable::phtree; + +TEST(PhTreeBptMapTest, SmokeTest) { + const int max_size = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + b_plus_tree_map test_map; + std::map reference_map; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.emplace(val, val); + test_map._check(); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + } + } +} + +TEST(PhTreeBptMapTest, SmokeTestWithTryEmplace) { + const int max_size = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + b_plus_tree_map test_map; + std::map reference_map; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + } + } +} + +TEST(PhTreeBptMapTest, SmokeTestWithErase) { + const int max_size = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + b_plus_tree_map test_map{}; + std::unordered_map reference_map{}; + std::vector key_list{}; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); + key_list.emplace_back(val); + } + } + + std::shuffle(key_list.begin(), key_list.end(), random_engine); + for (auto key : key_list) { + if (key % 2 == 0) { + test_map.erase(key); + } else { + auto it = test_map.find(key); + ASSERT_NE(it, test_map.end()); + ASSERT_EQ(it->second, key); + test_map.erase(it); + } + test_map._check(); + reference_map.erase(key); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + } + } +} + +TEST(PhTreeBptMapTest, SmokeTestLowerBound) { + const int max_size = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + b_plus_tree_map test_map; + std::map reference_map; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.lower_bound(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.lower_bound(v)->second; + ASSERT_EQ(vMap, vRef); + } + for (size_t v = 0; v < max_size + 5; ++v) { + auto itRef = reference_map.lower_bound(v); + auto itMap = test_map.lower_bound(v); + if (itRef == reference_map.end()) { + ASSERT_EQ(itMap, test_map.end()); + } else { + ASSERT_NE(itMap, test_map.end()); + // ASSERT_EQ(v, itRef->second); + ASSERT_EQ(itRef->second, itMap->second); + } + } + } + } +} diff --git a/phtree/common/base_types_test.cc b/test/common/base_types_test.cc similarity index 96% rename from phtree/common/base_types_test.cc rename to test/common/base_types_test.cc index 04a45d6a..389dbf74 100644 --- a/phtree/common/base_types_test.cc +++ b/test/common/base_types_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "base_types.h" -#include +#include "phtree/common/base_types.h" +#include #include using namespace improbable::phtree; diff --git a/phtree/common/bits_test.cc b/test/common/bits_test.cc similarity index 95% rename from phtree/common/bits_test.cc rename to test/common/bits_test.cc index e4129bf3..bc64c5cb 100644 --- a/phtree/common/bits_test.cc +++ b/test/common/bits_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "bits.h" -#include +#include "phtree/common/bits.h" +#include #include using namespace improbable::phtree; diff --git a/phtree/common/common_test.cc b/test/common/common_test.cc similarity index 88% rename from phtree/common/common_test.cc rename to test/common/common_test.cc index 788c9fd2..eff99aad 100644 --- a/phtree/common/common_test.cc +++ b/test/common/common_test.cc @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "common.h" -#include -#include +#include "phtree/common/common.h" +#include "phtree/converter.h" +#include using namespace improbable::phtree; @@ -30,26 +30,26 @@ TEST(PhTreeCommonTest, NumberOfDivergingBits) { scalar_64_t l_max = std::numeric_limits::max(); bit_width_t x = NumberOfDivergingBits(PhPoint<2>({l1, l1}), PhPoint<2>({l2, l2})); - ASSERT_EQ(64, x); + ASSERT_EQ(64u, x); x = NumberOfDivergingBits(PhPoint<2>({-1, -1}), PhPoint<2>({l_min, l_min})); - ASSERT_EQ(63, x); + ASSERT_EQ(63u, x); x = NumberOfDivergingBits(PhPoint<2>({1, 1}), PhPoint<2>({l_max, l_max})); - ASSERT_EQ(63, x); + ASSERT_EQ(63u, x); x = NumberOfDivergingBits(PhPoint<2>({l1, l2}), PhPoint<2>({l1, l2})); - ASSERT_EQ(0, x); + ASSERT_EQ(0u, x); // PhPointD{679.186, 519.897, 519.897} PhPoint<3> p1{0x4085397c9ffc65e8, 0x40803f2cf7158e9a, 0x40803f2cf7158e9a}; // PhPointD{35.5375, 8.69049, 8.69049} PhPoint<3> p2{0x4041c4ce0e8a359e, 0x40216187a0776fd5, 0x40216187a0776fd5}; x = NumberOfDivergingBits(p1, p2); - ASSERT_EQ(56, x); + ASSERT_EQ(56u, x); // PhPointD{132.406, 219.74, 219.74} PhPoint<3> p20{0x40608cffffe5b480, 0x406b77aff096adc1, 0x406b77aff096adc1}; // PhPointD{679.186, 519.897, 519.897} PhPoint<3> p21{0x4085397c9ffc65e8, 0x40803f2cf7158e9a, 0x40803f2cf7158e9a}; x = NumberOfDivergingBits(p20, p21); - ASSERT_EQ(56, x); + ASSERT_EQ(56u, x); } diff --git a/phtree/common/flat_array_map_test.cc b/test/common/flat_array_map_test.cc similarity index 98% rename from phtree/common/flat_array_map_test.cc rename to test/common/flat_array_map_test.cc index e0250820..618f5254 100644 --- a/phtree/common/flat_array_map_test.cc +++ b/test/common/flat_array_map_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "flat_array_map.h" -#include +#include "phtree/common/flat_array_map.h" +#include #include using namespace improbable::phtree; diff --git a/phtree/common/flat_sparse_map_test.cc b/test/common/flat_sparse_map_test.cc similarity index 97% rename from phtree/common/flat_sparse_map_test.cc rename to test/common/flat_sparse_map_test.cc index dcb72bba..99d581d7 100644 --- a/phtree/common/flat_sparse_map_test.cc +++ b/test/common/flat_sparse_map_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "flat_sparse_map.h" -#include +#include "phtree/common/flat_sparse_map.h" +#include #include using namespace improbable::phtree; diff --git a/test/common/scripts.cmake b/test/common/scripts.cmake new file mode 100644 index 00000000..f8a8c9a9 --- /dev/null +++ b/test/common/scripts.cmake @@ -0,0 +1,31 @@ +macro(package_add_test TESTNAME) + # create an executable in which the tests will be stored + add_executable(${TESTNAME} ${ARGN}) + # link the Google test infrastructure, mocking library, and a default main function to + # the test executable. Remove g_test_main if writing your own main function. + target_link_libraries(${TESTNAME} GTest::gtest_main phtree) + # gtest_discover_tests replaces gtest_add_tests, + # see https://cmake.org/cmake/help/v3.10/module/GoogleTest.html for more options to pass to it + gtest_discover_tests(${TESTNAME} + # set a working directory so your project root so that you can find test data via paths relative to the project root + WORKING_DIRECTORY ${PROJECT_DIR} + PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${PROJECT_DIR}" + ) + set_target_properties(${TESTNAME} PROPERTIES FOLDER test) +endmacro() + +macro(package_add_test_main TESTNAME) + # create an executable in which the tests will be stored + add_executable(${TESTNAME} ${ARGN}) + # link the Google test infrastructure, mocking library, and a default main function to + # the test executable. Remove g_test_main if writing your own main function. + target_link_libraries(${TESTNAME} gtest gmock phtree) + # gtest_discover_tests replaces gtest_add_tests, + # see https://cmake.org/cmake/help/v3.10/module/GoogleTest.html for more options to pass to it + gtest_discover_tests(${TESTNAME} + # set a working directory so your project root so that you can find test data via paths relative to the project root + WORKING_DIRECTORY ${PROJECT_DIR} + PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${PROJECT_DIR}" + ) + set_target_properties(${TESTNAME} PROPERTIES FOLDER test) +endmacro() \ No newline at end of file diff --git a/phtree/common/converter_test.cc b/test/converter_test.cc similarity index 95% rename from phtree/common/converter_test.cc rename to test/converter_test.cc index c9ede115..667cd221 100644 --- a/phtree/common/converter_test.cc +++ b/test/converter_test.cc @@ -14,9 +14,8 @@ * limitations under the License. */ -#include "converter.h" -#include "common.h" -#include +#include "phtree/converter.h" +#include using namespace improbable::phtree; diff --git a/phtree/common/distance_test.cc b/test/distance_test.cc similarity index 95% rename from phtree/common/distance_test.cc rename to test/distance_test.cc index 0038285a..d0652934 100644 --- a/phtree/common/distance_test.cc +++ b/test/distance_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "common.h" -#include +#include "phtree/distance.h" +#include #include using namespace improbable::phtree; diff --git a/phtree/common/filter_test.cc b/test/filter_test.cc similarity index 50% rename from phtree/common/filter_test.cc rename to test/filter_test.cc index 41905421..abc712cd 100644 --- a/phtree/common/filter_test.cc +++ b/test/filter_test.cc @@ -14,14 +14,15 @@ * limitations under the License. */ -#include "common.h" -#include +#include "phtree/filter.h" +#include #include using namespace improbable::phtree; TEST(PhTreeFilterTest, FilterSphereTest) { - FilterSphere, DistanceEuclidean<2>> filter{{5, 3}, 5}; + ConverterNoOp<2, scalar_64_t> conv{}; + FilterSphere filter{{5, 3}, 5, conv, DistanceEuclidean<2>{}}; // root is always valid ASSERT_TRUE(filter.IsNodeValid({0, 0}, 63)); // valid because node encompasses the circle @@ -44,8 +45,9 @@ TEST(PhTreeFilterTest, FilterSphereTest) { ASSERT_FALSE(filter.IsEntryValid({3, 8}, nullptr)); } -TEST(PhTreeFilterTest, BoxFilterTest) { - FilterAABB> filter{{3, 3}, {7, 7}}; +TEST(PhTreeFilterTest, FilterAABBTest) { + ConverterNoOp<2, scalar_64_t> conv{}; + FilterAABB filter{{3, 3}, {7, 7}, conv}; // root is always valid ASSERT_TRUE(filter.IsNodeValid({0, 0}, 63)); // valid because node encompasses the AABB @@ -63,4 +65,62 @@ TEST(PhTreeFilterTest, FilterNoOpSmokeTest) { auto filter = FilterNoOp(); ASSERT_TRUE(filter.IsNodeValid>({3, 7, 2}, 10)); ASSERT_TRUE(filter.IsEntryValid>({3, 7, 2}, 10)); -} \ No newline at end of file +} + +template +void TestAssignability() { + ASSERT_TRUE(std::is_copy_constructible_v); + ASSERT_TRUE(std::is_copy_assignable_v); + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); +} + +TEST(PhTreeFilterTest, FilterAssignableTest) { + using CONV = ConverterIEEE<3>; + using DIST = DistanceEuclidean<3>; + TestAssignability(); + TestAssignability>(); + TestAssignability>(); + TestAssignability>(); + TestAssignability>(); +} + +TEST(PhTreeFilterTest, ConverterAssignableTest) { + TestAssignability>(); + TestAssignability(); +} + +class TestConverter : public ConverterMultiply<2, 1, 1> { + public: + TestConverter() = default; + + TestConverter(const TestConverter&) = delete; + TestConverter(TestConverter&&) = delete; + TestConverter& operator=(const TestConverter&) = delete; + TestConverter& operator=(TestConverter&&) = delete; +}; + +TEST(PhTreeFilterTest, ConstructFilterAABBTest) { + TestConverter conv; + FilterAABB filter1{{3, 3}, {7, 7}, conv}; + ASSERT_TRUE(filter1.IsNodeValid({0, 0}, 63)); + + FilterAABB filter2{{3, 3}, {7, 7}, TestConverter()}; + ASSERT_TRUE(filter2.IsNodeValid({0, 0}, 63)); +} + +TEST(PhTreeFilterTest, ConstructFilterSphereTest) { + DistanceL1<2> dist; + TestConverter conv; + FilterSphere filter1a{{3, 3}, 7, conv}; + ASSERT_TRUE(filter1a.IsNodeValid({0, 0}, 63)); + FilterSphere filter1b{{3, 3}, 7, conv, {}}; + ASSERT_TRUE(filter1b.IsNodeValid({0, 0}, 63)); + FilterSphere filter1c{{3, 3}, 7, conv, dist}; + ASSERT_TRUE(filter1c.IsNodeValid({0, 0}, 63)); + FilterSphere filter1d{{3, 3}, 7, conv, DistanceL1<2>{}}; + ASSERT_TRUE(filter1d.IsNodeValid({0, 0}, 63)); + + FilterSphere filter2{{3, 3}, 7, TestConverter()}; + ASSERT_TRUE(filter2.IsNodeValid({0, 0}, 63)); +} diff --git a/phtree/phtree_box_d_test.cc b/test/phtree_box_d_test.cc similarity index 97% rename from phtree/phtree_box_d_test.cc rename to test/phtree_box_d_test.cc index 8f630be1..ad1782d0 100644 --- a/phtree/phtree_box_d_test.cc +++ b/test/phtree_box_d_test.cc @@ -15,12 +15,14 @@ */ #include "phtree/phtree.h" -#include +#include #include #include using namespace improbable::phtree; +namespace phtree_box_d_test { + class DoubleRng { public: DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} @@ -39,12 +41,10 @@ struct Id { explicit Id(const size_t i) : _i(i){}; - bool operator==(Id& rhs) { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - size_t _i; }; @@ -172,7 +172,7 @@ void SmokeTestBasicOps(size_t N) { PhTreeDebugHelper::CheckConsistency(tree); } -TEST(PhTreeDTest, SmokeTestBasicOps) { +TEST(PhTreeBoxDTest, SmokeTestBasicOps) { SmokeTestBasicOps<1>(100); SmokeTestBasicOps<3>(10000); SmokeTestBasicOps<6>(10000); @@ -181,7 +181,7 @@ TEST(PhTreeDTest, SmokeTestBasicOps) { SmokeTestBasicOps<31>(100); } -TEST(PhTreeDTest, TestDebug) { +TEST(PhTreeBoxDTest, TestDebug) { const dimension_t dim = 3; TestTree tree; size_t N = 1000; @@ -274,7 +274,7 @@ TEST(PhTreeBoxDTest, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -432,8 +432,8 @@ TEST(PhTreeBoxDTest, TestUpdateWithEmplace) { PhBoxD pNew( {pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}, {pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}); - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42u); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -461,8 +461,8 @@ TEST(PhTreeBoxDTest, TestUpdateWithEmplaceHint) { PhPointD max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; TestPoint pNew{min, max}; auto iter = tree.find(pOld); - int n = tree.erase(iter); - ASSERT_EQ(1, n); + size_t n = tree.erase(iter); + ASSERT_EQ(1u, n); tree.emplace_hint(iter, pNew, 42u); ASSERT_EQ(1, tree.count(pNew)); if (delta != 0.0) { @@ -486,8 +486,8 @@ TEST(PhTreeBoxDTest, TestEraseByIterator) { for (auto& p : points) { auto iter = tree.find(p); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); ASSERT_EQ(tree.end(), tree.find(p)); i++; } @@ -505,8 +505,8 @@ TEST(PhTreeBoxDTest, TestEraseByIteratorQuery) { for (size_t i = 0; i < N; ++i) { auto iter = tree.begin(); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); } ASSERT_EQ(0, tree.erase(tree.end())); @@ -651,7 +651,7 @@ TEST(PhTreeBoxDTest, TestWindowQueryManyMoving) { size_t nn = 0; for (int i = -120; i < 120; i++) { PhPointD min{i * 10., i * 9., i * 11.}; - PhPointD max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + PhPointD max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -754,3 +754,5 @@ TEST(PhTreeBoxDTest, SmokeTestTreeAPI) { PhTreeBoxD<3, const Id> treeConst; treeConst.emplace(PhBoxD<3>({1, 2, 3}, {4, 5, 6}), Id(1)); } + +} // namespace phtree_box_d_test diff --git a/test/phtree_box_d_test_filter.cc b/test/phtree_box_d_test_filter.cc new file mode 100644 index 00000000..f457421f --- /dev/null +++ b/test/phtree_box_d_test_filter.cc @@ -0,0 +1,636 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include +#include + +using namespace improbable::phtree; + +namespace phtree_box_d_test_filter { + +template +using TestKey = PhBoxD; + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeBoxD; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + Id(Id const& rhs) = default; + Id(Id&& rhs) = default; + Id& operator=(Id const& rhs) = default; + Id& operator=(Id&& rhs) = default; + + int _i; +}; + +template +void generateCube(std::vector>& points, size_t N) { + DoubleRng rng(-1000, 1000); + auto refTree = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = PhPointD{rng.next(), rng.next(), rng.next()}; + auto box = PhBoxD{point, {point[0] + 1, point[1] + 1, point[2] + 1}}; + if (refTree.count(box) != 0) { + i--; + continue; + } + + refTree.emplace(box, i); + points.push_back(box); + } + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +static int f_default_construct_ = 0; +static int f_construct_ = 0; +static int f_copy_construct_ = 0; +static int f_move_construct_ = 0; +static int f_copy_assign_ = 0; +static int f_move_assign_ = 0; +static int f_destruct_ = 0; + +static void f_reset_id_counters() { + f_default_construct_ = 0; + f_construct_ = 0; + f_copy_construct_ = 0; + f_move_construct_ = 0; + f_copy_assign_ = 0; + f_move_assign_ = 0; + f_destruct_ = 0; +} + +template +struct FilterCount { + FilterCount() : last_known{} { + ++f_default_construct_; + } + + explicit FilterCount(const T i) : last_known{i} { + ++f_construct_; + } + + FilterCount(const FilterCount& other) { + ++f_copy_construct_; + last_known = other.last_known; + } + + FilterCount(FilterCount&& other) noexcept { + ++f_move_construct_; + last_known = other.last_known; + } + + FilterCount& operator=(const FilterCount& other) noexcept { + ++f_copy_assign_; + last_known = other.last_known; + return *this; + } + FilterCount& operator=(FilterCount&& other) noexcept { + ++f_move_assign_; + last_known = other.last_known; + return *this; + } + + ~FilterCount() { + ++f_destruct_; + } + + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint<2 * DIM>&, const T& value) { + last_known = const_cast(value); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint<2 * DIM>&, int) { + return true; + } + + T last_known; +}; + +template +struct DistanceCount { + DistanceCount() { + ++f_default_construct_; + } + + DistanceCount(const DistanceCount&) { + ++f_copy_construct_; + } + + DistanceCount(DistanceCount&&) noexcept { + ++f_move_construct_; + } + + DistanceCount& operator=(const DistanceCount&) noexcept { + ++f_copy_assign_; + return *this; + } + DistanceCount& operator=(DistanceCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~DistanceCount() { + ++f_destruct_; + } + + double operator()(const PhPointD& p1, const PhPointD& p2) const { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); + }; +}; + +static size_t static_id = 0; + +template +struct CallbackCount { + CallbackCount() { + static_id = 0; + ++f_default_construct_; + } + + CallbackCount(const CallbackCount&) { + ++f_copy_construct_; + } + + CallbackCount(CallbackCount&&) noexcept { + ++f_move_construct_; + } + + CallbackCount& operator=(const CallbackCount&) noexcept { + ++f_copy_assign_; + return *this; + } + CallbackCount& operator=(CallbackCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~CallbackCount() { + ++f_destruct_; + } + + void operator()(TestKey, Id& t) { + static_id = t._i; + } +}; + +template +struct FilterConst { + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint<2 * DIM>&, const T& value) const { + assert(value._i == 1); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint<2 * DIM>&, int) const { + return true; + } +}; + +template +struct CallbackConst { + void operator()(const TestKey, const Id& t) const { + static_id = t._i; + } +}; + +[[maybe_unused]] static void print_id_counters() { + std::cout << "dc=" << f_default_construct_ << " c=" << f_construct_ + << " cc=" << f_copy_construct_ << " mc=" << f_move_construct_ + << " ca=" << f_copy_assign_ << " ma=" << f_move_assign_ << " d=" << f_destruct_ + << std::endl; +} + +TEST(PhTreeTest, TestFilterAPI_FOR_EACH) { + // Test edge case: only one entry in tree + TestKey<3> p{{1, 2, 3}, {4, 5, 6}}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // rvalue + tree.for_each(callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // lvalue + tree.for_each(CallbackCount<3>(), FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + CallbackCount<3> callbackC; + FilterConst<3, Id> filterC; + treeC.for_each(callbackC, filterC); + // rvalue + treeC.for_each(CallbackConst<3>{}, FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_FOR_EACH_WQ) { + // Test edge case: only one entry in tree + TestKey<3> p{{1, 2, 3}, {4, 5, 6}}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // lvalue + tree.for_each(qb, callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + tree.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackCount<3>{}, FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + treeC.for_each(qb, callback, filterC); + // rvalue + treeC.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackConst<3>(), FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_BEGIN) { + // Test edge case: only one entry in tree + TestKey<3> p{{1, 2, 3}, {4, 5, 6}}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin(filter)->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin(FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin(filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin(FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_WQ) { + // Test edge case: only one entry in tree + TestKey<3> p{{1, 2, 3}, {4, 5, 6}}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin_query(qb, filter)->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_query({{1, 2, 3}, {4, 5, 6}}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_query(qb, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_query(qb, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +// TEST(PhTreeTest, TestFilterAPI_KNN) { +// // Test edge case: only one entry in tree +// TestKey<3> p{{1, 2, 3}, {4, 5, 6}}; +// auto tree = TestTree<3, Id>(); +// tree.emplace(p, Id{1}); +// +// FilterCount<3, Id> filter{}; +// DistanceCount<3> dist_fn{}; +// // lvalue +// ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, dist_fn, filter)->_i, 1); +// ASSERT_EQ(2, f_construct_ + f_default_construct_); +// ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); +// f_reset_id_counters(); +// +// // rvalue +// ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterCount<3, Id>())->_i, +// 1); ASSERT_EQ(2, f_construct_ + f_default_construct_); ASSERT_LE(0, f_copy_construct_ + +// f_move_construct_ + f_copy_assign_ + f_move_assign_); f_reset_id_counters(); +// +// // rvalue #2 +// auto a = tree.begin_knn_query, FilterCount<3, Id>>(3, {2, 3, 4})->_i; +// ASSERT_EQ(a, 1); +// ASSERT_EQ(2, f_construct_ + f_default_construct_); +// ASSERT_LE(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); +// f_reset_id_counters(); +// +// // const Tree: just test that it compiles +// const TestTree<3, Id>& treeC = tree; +// // lvalue +// FilterConst<3, Id> filterC; +// ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, dist_fn, filterC)->_i, 1); +// // rvalue +// ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterConst<3, Id>())->_i, +// 1); f_reset_id_counters(); +// } + +template +double distance(const TestPoint& p1, const TestKey& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double closest = std::clamp(p1[i], p2.min()[i], p2.max()[i]); + double d2 = p1[i] + closest; + sum2 += d2 * d2; + } + return sqrt(sum2); +}; + +template +void referenceSphereQuery( + std::vector>& points, + TestPoint& center, + double radius, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + if (distance(center, p) <= radius) { + result.insert(i); + } + } +} + +template +void referenceAABBQuery( + std::vector>& points, + TestPoint& center, + double radius, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + bool inside = true; + for (dimension_t i = 0; i < DIM; ++i) { + inside &= (p.min()[i] <= center[i] + radius) && (p.max()[i] >= center[i] - radius); + } + + if (inside) { + result.insert(i); + } + } +} + +template +PhBoxD QueryBox(PhPointD& center, double radius) { + typename TestTree::QueryBox query_box{ + {center[0] - radius, center[1] - radius, center[2] - radius}, + {center[0] + radius, center[1] + radius, center[2] + radius}}; + return query_box; +} + +// We use 'int&' because gtest does not compile with assertions in non-void functions. +template +void testSphereQuery(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto filter = FilterBoxSphere(center, radius, tree.converter()); + for (auto it = tree.begin(filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testSphereQueryWithBox(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterBoxSphere(center, radius, tree.converter()); + for (auto it = tree.begin_query(query_box, filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testSphereQueryForEach(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto filter = FilterBoxSphere(center, radius, tree.converter()); + auto callback = [&result, &referenceResult](TestKey, const size_t& x) { + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + ++result; + }; + tree.for_each(callback, filter); + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testSphereQueryForEachQueryBox(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterBoxSphere(center, radius, tree.converter()); + auto callback = [&result, &referenceResult](TestKey, const size_t& x) { + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + ++result; + }; + tree.for_each(query_box, callback, filter); + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testAABBQuery(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceAABBQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterBoxAABB(query_box.min(), query_box.max(), tree.converter()); + for (auto it = tree.begin(filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void Query0(QUERY query) { + TestPoint p{-10000, -10000, -10000}; + int n = 0; + query(p, 0.1, 100, n); + ASSERT_EQ(0, n); +} + +template +void QueryMany(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 1000, 1000, n); + ASSERT_GT(n, 400); + ASSERT_LT(n, 800); +} + +template +void QueryManyAABB(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 1000, 1000, n); + ASSERT_EQ(n, 1000); +} + +template +void QueryAll(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 10000, 1000, n); + ASSERT_EQ(1000, n); +} + +TEST(PhTreeBoxDFilterTest, TestSphereQuery) { + Query0<3>(&testSphereQuery<3>); + QueryMany<3>(&testSphereQuery<3>); + QueryAll<3>(&testSphereQuery<3>); +} + +TEST(PhTreeBoxDFilterTest, TestSphereQueryWithQueryBox) { + Query0<3>(&testSphereQueryWithBox<3>); + QueryMany<3>(&testSphereQueryWithBox<3>); + QueryAll<3>(&testSphereQueryWithBox<3>); +} + +TEST(PhTreeBoxDFilterTest, TestSphereQueryForEach) { + Query0<3>(&testSphereQueryForEach<3>); + QueryMany<3>(&testSphereQueryForEach<3>); + QueryAll<3>(&testSphereQueryForEach<3>); +} + +TEST(PhTreeBoxDFilterTest, TestSphereQueryForEachWithQueryBox) { + Query0<3>(&testSphereQueryForEachQueryBox<3>); + QueryMany<3>(&testSphereQueryForEachQueryBox<3>); + QueryAll<3>(&testSphereQueryForEachQueryBox<3>); +} + +TEST(PhTreeBoxDFilterTest, TestAABBQuery) { + Query0<3>(&testAABBQuery<3>); + QueryManyAABB<3>(&testAABBQuery<3>); + QueryAll<3>(&testAABBQuery<3>); +} + +} // namespace phtree_box_d_test_filter diff --git a/phtree/phtree_box_d_test_query_types.cc b/test/phtree_box_d_test_query_types.cc similarity index 93% rename from phtree/phtree_box_d_test_query_types.cc rename to test/phtree_box_d_test_query_types.cc index c5460665..84a77a83 100644 --- a/phtree/phtree_box_d_test_query_types.cc +++ b/test/phtree_box_d_test_query_types.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_box_d_test_query_types { + template using TestPoint = PhBoxD; @@ -60,3 +62,5 @@ TEST(PhTreeBoxDTestQueryTypes, SmokeTestQuery) { q3++; ASSERT_EQ(q3, tree.end()); } + +} // namespace phtree_box_d_test_query_types diff --git a/phtree/phtree_box_f_test.cc b/test/phtree_box_f_test.cc similarity index 97% rename from phtree/phtree_box_f_test.cc rename to test/phtree_box_f_test.cc index 05cfbe55..e95f8334 100644 --- a/phtree/phtree_box_f_test.cc +++ b/test/phtree_box_f_test.cc @@ -15,12 +15,14 @@ */ #include "phtree/phtree.h" -#include +#include #include #include using namespace improbable::phtree; +namespace phtree_box_f_test { + template using TestPoint = PhBoxF; @@ -46,12 +48,10 @@ struct Id { explicit Id(const size_t i) : _i(i){}; - bool operator==(Id& rhs) { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - size_t _i; }; @@ -173,7 +173,7 @@ void SmokeTestBasicOps(size_t N) { PhTreeDebugHelper::CheckConsistency(tree); } -TEST(PhTreeDTest, SmokeTestBasicOps) { +TEST(PhTreeBoxFTest, SmokeTestBasicOps) { SmokeTestBasicOps<1>(100); SmokeTestBasicOps<3>(10000); SmokeTestBasicOps<6>(10000); @@ -182,7 +182,7 @@ TEST(PhTreeDTest, SmokeTestBasicOps) { SmokeTestBasicOps<31>(100); } -TEST(PhTreeDTest, TestDebug) { +TEST(PhTreeBoxFTest, TestDebug) { const dimension_t dim = 3; TestTree tree; size_t N = 1000; @@ -275,7 +275,7 @@ TEST(PhTreeBoxFTest, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -368,7 +368,7 @@ void populate( template void populate( TestTree& tree, std::vector>& points, size_t N, double boxLen = 10) { - generateCube(points, N, boxLen); + generateCube(points, N, (float)boxLen); for (size_t i = 0; i < N; i++) { ASSERT_TRUE(tree.emplace(points[i], i + 1).second); } @@ -436,8 +436,8 @@ TEST(PhTreeBoxFTest, TestUpdateWithEmplace) { TestPoint pNew( {pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}, {pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}); - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42u); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -452,7 +452,7 @@ TEST(PhTreeBoxFTest, TestUpdateWithEmplaceHint) { const dimension_t dim = 3; TestTree tree; size_t N = 10000; - std::array deltas{0, 0.1, 1, 10}; + std::array deltas{0.f, 0.1f, 1.f, 10.f}; std::vector> points; populate(tree, points, N); @@ -465,8 +465,8 @@ TEST(PhTreeBoxFTest, TestUpdateWithEmplaceHint) { PhPointF max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; TestPoint pNew{min, max}; auto iter = tree.find(pOld); - int n = tree.erase(iter); - ASSERT_EQ(1, n); + size_t n = tree.erase(iter); + ASSERT_EQ(1u, n); tree.emplace_hint(iter, pNew, 42u); ASSERT_EQ(1, tree.count(pNew)); if (delta != 0.0) { @@ -490,8 +490,8 @@ TEST(PhTreeBoxFTest, TestEraseByIterator) { for (auto& p : points) { auto iter = tree.find(p); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); ASSERT_EQ(tree.end(), tree.find(p)); i++; } @@ -509,8 +509,8 @@ TEST(PhTreeBoxFTest, TestEraseByIteratorQuery) { for (size_t i = 0; i < N; ++i) { auto iter = tree.begin(); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); } ASSERT_EQ(0, tree.erase(tree.end())); @@ -758,3 +758,5 @@ TEST(PhTreeBoxFTest, SmokeTestTreeAPI) { PhTreeBoxF<3, const Id> treeConst; treeConst.emplace(TestPoint<3>({1, 2, 3}, {4, 5, 6}), Id(1)); } + +} // namespace phtree_box_f_test diff --git a/phtree/phtree_d_test.cc b/test/phtree_d_test.cc similarity index 83% rename from phtree/phtree_d_test.cc rename to test/phtree_d_test.cc index 6e966906..c18d559d 100644 --- a/phtree/phtree_d_test.cc +++ b/test/phtree_d_test.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_d_test { + template using TestPoint = PhPointD; @@ -42,13 +44,18 @@ class DoubleRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i){}; + explicit Id(const int i) : _i{i} {} + + explicit Id(const size_t i) : _i{static_cast(i)} {} - bool operator==(Id& rhs) { + bool operator==(const Id& rhs) const { return _i == rhs._i; } + Id(Id const& rhs) = default; + Id(Id&& rhs) = default; Id& operator=(Id const& rhs) = default; + Id& operator=(Id&& rhs) = default; int _i; }; @@ -284,7 +291,7 @@ TEST(PhTreeDTest, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -328,7 +335,7 @@ TEST(PhTreeDTest, TestSquareBrackets) { ASSERT_EQ(0, tree[p]._i); ASSERT_EQ(tree.count(p), 1); if (i % 2 == 0) { - tree[p]._i = i; + tree[p]._i = (int)i; } else { tree[p] = id; } @@ -441,8 +448,8 @@ TEST(PhTreeDTest, TestUpdateWithEmplace) { for (auto& p : points) { auto pOld = p; TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -468,8 +475,8 @@ TEST(PhTreeDTest, TestUpdateWithEmplaceHint) { double delta = deltas[d_n]; TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; auto iter = tree.find(pOld); - int n = tree.erase(iter); - ASSERT_EQ(1, n); + size_t n = tree.erase(iter); + ASSERT_EQ(1u, n); tree.emplace_hint(iter, pNew, 42); ASSERT_EQ(1, tree.count(pNew)); if (delta != 0.0) { @@ -482,6 +489,142 @@ TEST(PhTreeDTest, TestUpdateWithEmplaceHint) { tree.clear(); } +TEST(PhTreeDTest, TestUpdateWithRelocate) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + size_t i = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if (delta > 0.0 && tree.find(pNew) != tree.end()) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } else { + ASSERT_EQ(1, tree.relocate(pOld, pNew)); + if (delta > 0.0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } + ASSERT_EQ(Id(i), *tree.find(pNew)); + p = pNew; + } + ++i; + } + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + ASSERT_EQ(0, tree.relocate(points[0], points[1])); + // Check that small tree works + tree.emplace(points[0], 1); + ASSERT_EQ(1, tree.relocate(points[0], points[1])); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(Id(1), *tree.find(points[1])); + ASSERT_EQ(1, tree.size()); + tree.clear(); + + // check that existing destination fails + tree.emplace(points[0], 1); + tree.emplace(points[1], 2); + ASSERT_EQ(0, tree.relocate(points[0], points[1])); +} + +TEST(PhTreeDTest, TestUpdateWithRelocateCorenerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{1, 2, 3}; + TestPoint point1{4, 5, 6}; + + // Check that empty tree works + ASSERT_EQ(0, tree.relocate(point0, point1)); + ASSERT_EQ(0, tree.size()); + + // Check that small tree works + tree.emplace(point0, 1); + ASSERT_EQ(1, tree.relocate(point0, point1)); + ASSERT_EQ(tree.end(), tree.find(point0)); + ASSERT_EQ(Id(1), *tree.find(point1)); + ASSERT_EQ(1, tree.size()); + tree.clear(); + + // check that existing destination fails + tree.emplace(point0, Id(0)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source fails + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); +} + +TEST(PhTreeDTest, TestUpdateWithRelocateIf) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + size_t i = 0; + size_t done = 0; + auto pred = [](const Id& id) { return id._i % 2 == 0; }; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if ((delta > 0.0 && tree.find(pNew) != tree.end()) || (i % 2 != 0)) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } else { + ASSERT_EQ(1, tree.relocate_if(pOld, pNew, pred)); + if (delta > 0.0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } + ASSERT_EQ(Id(i), *tree.find(pNew)); + p = pNew; + ++done; + } + ++i; + } + ASSERT_GT(done, i * 0.4); + ASSERT_LT(done, i * 0.6); + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + auto pred = [](const Id&) { return true; }; + ASSERT_EQ(0, tree.relocate_if(points[0], points[1], pred)); + // Check that small tree works + tree.emplace(points[0], 1); + ASSERT_EQ(1, tree.relocate_if(points[0], points[1], pred)); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(Id(1), *tree.find(points[1])); + ASSERT_EQ(1, tree.size()); +} + TEST(PhTreeDTest, TestEraseByIterator) { const dimension_t dim = 3; TestTree tree; @@ -493,8 +636,8 @@ TEST(PhTreeDTest, TestEraseByIterator) { for (auto& p : points) { auto iter = tree.find(p); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); ASSERT_EQ(tree.end(), tree.find(p)); i++; } @@ -512,8 +655,8 @@ TEST(PhTreeDTest, TestEraseByIteratorQuery) { for (size_t i = 0; i < N; ++i) { auto iter = tree.begin(); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); } ASSERT_EQ(0, tree.erase(tree.end())); @@ -688,9 +831,9 @@ TEST(PhTreeDTest, TestWindowQueryManyMoving) { double query_length = 200; size_t nn = 0; - for (int i = -120; i < 120; i++) { + for (long i = -120; i < 120; i++) { TestPoint min{i * 10., i * 9., i * 11.}; - TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + TestPoint max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -724,7 +867,7 @@ TEST(PhTreeDTest, TestWindowForEachQueryManyMoving) { size_t nn = 0; for (int i = -120; i < 120; i++) { TestPoint min{i * 10., i * 9., i * 11.}; - TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + TestPoint max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -1011,3 +1154,5 @@ TEST(PhTreeDTest, SmokeTestTreeAPI) { PhTreeD<3, const Id> treeConst; treeConst.emplace(PhPointD<3>{1, 2, 3}, Id(1)); } + +} // namespace phtree_d_test diff --git a/test/phtree_d_test_copy_move.cc b/test/phtree_d_test_copy_move.cc new file mode 100644 index 00000000..506bc66b --- /dev/null +++ b/test/phtree_d_test_copy_move.cc @@ -0,0 +1,302 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include + +using namespace improbable::phtree; + +namespace phtree_d_test_copy_move { + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeD; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct IdCopyOnly { + explicit IdCopyOnly(const size_t i) : _i{i} {} + + IdCopyOnly() = default; + IdCopyOnly(const IdCopyOnly& other) = default; + IdCopyOnly(IdCopyOnly&& other) = delete; + // IdCopyOnly& operator=(const IdCopyOnly& other) = default; + IdCopyOnly& operator=(const IdCopyOnly& other) { + _i = other._i; + return *this; + } + IdCopyOnly& operator=(IdCopyOnly&& other) = delete; + ~IdCopyOnly() = default; + + bool operator==(const IdCopyOnly& rhs) const { + return _i == rhs._i; + } + + size_t _i{}; +}; + +struct IdMoveOnly { + explicit IdMoveOnly(const size_t i) : _i{i} {} + + IdMoveOnly() = default; + IdMoveOnly(const IdMoveOnly& other) = delete; + IdMoveOnly(IdMoveOnly&& other) = default; + IdMoveOnly& operator=(const IdMoveOnly& other) = delete; + IdMoveOnly& operator=(IdMoveOnly&& other) = default; + ~IdMoveOnly() = default; + + bool operator==(const IdMoveOnly& rhs) const { + return _i == rhs._i; + } + + size_t _i{}; +}; + +// Assert that copy-ctr is not called even when available +struct IdCopyOrMove { + explicit IdCopyOrMove(const size_t i) : _i{i} {} + + IdCopyOrMove() = default; + IdCopyOrMove(const IdCopyOrMove&) { + assert(false); + } + IdCopyOrMove(IdCopyOrMove&& other) = default; + IdCopyOrMove& operator=(const IdCopyOrMove&) { + assert(false); + } + IdCopyOrMove& operator=(IdCopyOrMove&& other) = default; + ~IdCopyOrMove() = default; + + bool operator==(const IdCopyOrMove& rhs) const { + return _i == rhs._i; + } + + size_t _i{}; +}; + +template +void generateCube(std::vector>& points, size_t N) { + DoubleRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + TestPoint point{}; + for (dimension_t d = 0; d < DIM; ++d) { + point[d] = rng.next(); + } + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps_QueryAndErase(TestTree& tree, std::vector>& points) { + size_t N = points.size(); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_knn_query(1, p, DistanceEuclidean()); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + // TODO enable for new relocate functions + // for (size_t i = 0; i < N; i++) { + // TestPoint& p = points.at(i); + // TestPoint pOld = p; + // for (dimension_t d = 0; d < DIM; ++d) { + // p[d] += 10000; + // } + // auto r = tree.relocate(pOld, p); + // ASSERT_EQ(r, 1u); + // } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_EQ(tree.count(p), 1u); + ASSERT_EQ(i, tree.find(p)->_i); + if (i % 2 == 0) { + ASSERT_EQ(1u, tree.erase(p)); + } else { + auto iter = tree.find(p); + ASSERT_EQ(1u, tree.erase(iter)); + } + + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0u, tree.erase(p)); + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +template +void SmokeTestBasicOps(size_t N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + + Id id(i); + if (i % 4 == 0) { + ASSERT_TRUE(tree.try_emplace(p, id).second); + } else if (i % 4 == 1) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else if (i % 4 == 2) { + tree[p] = id; + } else { + ASSERT_TRUE(tree.insert(p, id).second); + } + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + SmokeTestBasicOps_QueryAndErase(tree, points); +} + +TEST(PhTreeDTestCopyMove, SmokeTestBasicOpsCopyOnly) { + SmokeTestBasicOps<1, IdCopyOnly>(100); + SmokeTestBasicOps<3, IdCopyOnly>(100); + SmokeTestBasicOps<6, IdCopyOnly>(100); + SmokeTestBasicOps<10, IdCopyOnly>(100); + SmokeTestBasicOps<20, IdCopyOnly>(100); + SmokeTestBasicOps<63, IdCopyOnly>(100); +} + +template +void SmokeTestBasicOpsMoveOnly(size_t N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + + if (i % 2 == 0) { + ASSERT_TRUE(tree.try_emplace(p, Id(i)).second); + } else if (i % 4 == 1) { + tree[p] = Id(i); + } else { + ASSERT_TRUE(tree.emplace(p, Id(i)).second); + } + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.try_emplace(p, Id(i)).second); + ASSERT_FALSE(tree.emplace(p, Id(i)).second); + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + SmokeTestBasicOps_QueryAndErase(tree, points); +} + +TEST(PhTreeDTestCopyMove, SmokeTestBasicOpsMoveOnly) { + SmokeTestBasicOpsMoveOnly<1, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<3, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<6, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<10, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<20, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<63, IdMoveOnly>(100); +} + +TEST(PhTreeDTestCopyMove, SmokeTestBasicOpsCopyFails) { + SmokeTestBasicOpsMoveOnly<1, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<3, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<6, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<10, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<20, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<63, IdCopyOrMove>(100); +} + +} // namespace phtree_d_test_copy_move diff --git a/phtree/phtree_d_test_custom_key.cc b/test/phtree_d_test_custom_key.cc similarity index 97% rename from phtree/phtree_d_test_custom_key.cc rename to test/phtree_d_test_custom_key.cc index aa293f1d..4a22f54d 100644 --- a/phtree/phtree_d_test_custom_key.cc +++ b/test/phtree_d_test_custom_key.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_d_test_custom_key { + static const double MY_MULTIPLIER = 1000000.; /* @@ -106,7 +108,7 @@ class DoubleRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i){}; + explicit Id(const size_t i) : _i{static_cast(i)} {} bool operator==(const Id& rhs) const { return _i == rhs._i; @@ -215,3 +217,5 @@ void SmokeTestBasicOps() { TEST(PhTreeDTestCustomKey, SmokeTestBasicOps) { SmokeTestBasicOps<3>(); } + +} // namespace phtree_d_test_custom_key diff --git a/test/phtree_d_test_filter.cc b/test/phtree_d_test_filter.cc new file mode 100644 index 00000000..86ef2c3f --- /dev/null +++ b/test/phtree_d_test_filter.cc @@ -0,0 +1,484 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include +#include + +using namespace improbable::phtree; + +namespace phtree_d_test_filter { + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeD; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + Id(Id const& rhs) = default; + Id(Id&& rhs) = default; + Id& operator=(Id const& rhs) = default; + Id& operator=(Id&& rhs) = default; + + int _i; +}; + +template +void generateCube(std::vector>& points, size_t N) { + DoubleRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = TestPoint{rng.next(), rng.next(), rng.next()}; + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +static int f_default_construct_ = 0; +static int f_construct_ = 0; +static int f_copy_construct_ = 0; +static int f_move_construct_ = 0; +static int f_copy_assign_ = 0; +static int f_move_assign_ = 0; +static int f_destruct_ = 0; + +static void f_reset_id_counters() { + f_default_construct_ = 0; + f_construct_ = 0; + f_copy_construct_ = 0; + f_move_construct_ = 0; + f_copy_assign_ = 0; + f_move_assign_ = 0; + f_destruct_ = 0; +} + +template +struct FilterCount { + FilterCount() : last_known{} { + ++f_default_construct_; + } + + explicit FilterCount(const T i) : last_known{i} { + ++f_construct_; + } + + FilterCount(const FilterCount& other) { + ++f_copy_construct_; + last_known = other.last_known; + } + + FilterCount(FilterCount&& other) noexcept { + ++f_move_construct_; + last_known = other.last_known; + } + + FilterCount& operator=(const FilterCount& other) noexcept { + ++f_copy_assign_; + last_known = other.last_known; + return *this; + } + FilterCount& operator=(FilterCount&& other) noexcept { + ++f_move_assign_; + last_known = other.last_known; + return *this; + } + + ~FilterCount() { + ++f_destruct_; + } + + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const T& value) { + last_known = const_cast(value); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) { + return true; + } + + T last_known; +}; + +template +struct DistanceCount { + DistanceCount() { + ++f_default_construct_; + } + + DistanceCount(const DistanceCount&) { + ++f_copy_construct_; + } + + DistanceCount(DistanceCount&&) noexcept { + ++f_move_construct_; + } + + DistanceCount& operator=(const DistanceCount&) noexcept { + ++f_copy_assign_; + return *this; + } + DistanceCount& operator=(DistanceCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~DistanceCount() { + ++f_destruct_; + } + + double operator()(const PhPointD& p1, const PhPointD& p2) const { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); + }; +}; + +static size_t static_id = 0; + +template +struct CallbackCount { + CallbackCount() { + static_id = 0; + ++f_default_construct_; + } + + CallbackCount(const CallbackCount&) { + ++f_copy_construct_; + } + + CallbackCount(CallbackCount&&) noexcept { + ++f_move_construct_; + } + + CallbackCount& operator=(const CallbackCount&) noexcept { + ++f_copy_assign_; + return *this; + } + CallbackCount& operator=(CallbackCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~CallbackCount() { + ++f_destruct_; + } + + void operator()(TestPoint, Id& t) { + static_id = t._i; + } +}; + +template +struct FilterConst { + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const T& value) const { + assert(value._i == 1); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) const { + return true; + } +}; + +template +struct CallbackConst { + void operator()(const TestPoint, const Id& t) const { + static_id = t._i; + } +}; + +[[maybe_unused]] static void print_id_counters() { + std::cout << "dc=" << f_default_construct_ << " c=" << f_construct_ + << " cc=" << f_copy_construct_ << " mc=" << f_move_construct_ + << " ca=" << f_copy_assign_ << " ma=" << f_move_assign_ << " d=" << f_destruct_ + << std::endl; +} + +TEST(PhTreeDFilterTest, TestFilterAPI_FOR_EACH) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // rvalue + tree.for_each(callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // lvalue + tree.for_each(CallbackCount<3>(), FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + CallbackCount<3> callbackC; + FilterConst<3, Id> filterC; + treeC.for_each(callbackC, filterC); + // rvalue + treeC.for_each(CallbackConst<3>{}, FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeDFilterTest, TestFilterAPI_FOR_EACH_WQ) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // lvalue + tree.for_each(qb, callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + tree.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackCount<3>{}, FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + treeC.for_each(qb, callback, filterC); + // rvalue + treeC.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackConst<3>(), FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeDFilterTest, TestFilterAPI_BEGIN) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin(filter)->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin(FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin(filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin(FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeDFilterTest, TestFilterAPI_WQ) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin_query(qb, filter)->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_query({{1, 2, 3}, {4, 5, 6}}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_query(qb, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_query(qb, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeDFilterTest, TestFilterAPI_KNN) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + DistanceCount<3> dist_fn{}; + // lvalue + ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, dist_fn, filter)->_i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue #2 + auto a = tree.begin_knn_query, FilterCount<3, Id>>(3, {2, 3, 4})->_i; + ASSERT_EQ(a, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, dist_fn, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); +}; + +template +void referenceSphereQuery( + std::vector>& points, + TestPoint& center, + double radius, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + if (distance(center, p) <= radius) { + result.insert(i); + } + } +} + +// We use 'int&' because gtest does not compile with assertions in non-void functions. +template +void testSphereQuery(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto filter = FilterSphere(center, radius, tree.converter()); + for (auto it = tree.begin(filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +TEST(PhTreeDFilterTest, TestSphereQuery0) { + const dimension_t dim = 3; + TestPoint p{-10000, -10000, -10000}; + int n = 0; + testSphereQuery(p, 0.1, 100, n); + ASSERT_EQ(0, n); +} + +TEST(PhTreeDFilterTest, TestSphereQueryMany) { + const dimension_t dim = 3; + TestPoint p{0, 0, 0}; + int n = 0; + testSphereQuery(p, 1000, 1000, n); + ASSERT_GT(n, 400); + ASSERT_LT(n, 800); +} + +TEST(PhTreeDFilterTest, TestSphereQueryAll) { + const dimension_t dim = 3; + TestPoint p{0, 0, 0}; + int n = 0; + testSphereQuery(p, 10000, 1000, n); + ASSERT_EQ(1000, n); +} + +} // namespace phtree_d_test_filter diff --git a/phtree/phtree_d_test_preprocessor.cc b/test/phtree_d_test_preprocessor.cc similarity index 94% rename from phtree/phtree_d_test_preprocessor.cc rename to test/phtree_d_test_preprocessor.cc index 7e2e9010..588a2a20 100644 --- a/phtree/phtree_d_test_preprocessor.cc +++ b/test/phtree_d_test_preprocessor.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_d_test_preprocessor { + template using TestPoint = PhPointD; @@ -42,14 +44,12 @@ class DoubleRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i){}; + explicit Id(const size_t i) : _i{static_cast(i)} {} - bool operator==(Id& rhs) { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; }; @@ -153,3 +153,5 @@ TEST(PhTreeDTestPreprocessor, SmokeTestBasicOps) { SmokeTestBasicOps<10>(); SmokeTestBasicOps<20>(); } + +} // namespace phtree_d_test_preprocessor diff --git a/phtree/phtree_f_test.cc b/test/phtree_f_test.cc similarity index 98% rename from phtree/phtree_f_test.cc rename to test/phtree_f_test.cc index 9e2e3a93..42d6f5dd 100644 --- a/phtree/phtree_f_test.cc +++ b/test/phtree_f_test.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_f_test { + template using TestPoint = PhPointF; @@ -43,14 +45,14 @@ class FloatRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i){}; + explicit Id(const int i) : _i{i} {} + + explicit Id(const size_t i) : _i{static_cast(i)} {} - bool operator==(Id& rhs) { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; }; @@ -69,7 +71,7 @@ template double distance(const TestPoint& p1, const TestPoint& p2) { double sum2 = 0; for (dimension_t i = 0; i < DIM; i++) { - double d = p1[i] - p2[i]; + double d = (double)(p1[i]) - (double)p2[i]; sum2 += d * d; } return sqrt(sum2); @@ -283,7 +285,7 @@ TEST(PhTreeFTest, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -327,7 +329,7 @@ TEST(PhTreeFTest, TestSquareBrackets) { ASSERT_EQ(0, tree[p]._i); ASSERT_EQ(tree.count(p), 1); if (i % 2 == 0) { - tree[p]._i = i; + tree[p]._i = (int)i; } else { tree[p] = id; } @@ -443,8 +445,8 @@ TEST(PhTreeFTest, TestUpdateWithEmplace) { static_cast(pOld[0] + delta), static_cast(pOld[1] + delta), static_cast(pOld[2] + delta)}; - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -466,8 +468,8 @@ TEST(PhTreeFTest, TestEraseByIterator) { for (auto& p : points) { auto iter = tree.find(p); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); ASSERT_EQ(tree.end(), tree.find(p)); i++; } @@ -485,8 +487,8 @@ TEST(PhTreeFTest, TestEraseByIteratorQuery) { for (size_t i = 0; i < N; ++i) { auto iter = tree.begin(); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); } ASSERT_EQ(0, tree.erase(tree.end())); @@ -993,3 +995,5 @@ TEST(PhTreeFTest, SmokeTestTreeAPI) { PhTreeF<3, const Id> treeConst; treeConst.emplace(PhPointF<3>{1, 2, 3}, Id(1)); } + +} // namespace phtree_f_test diff --git a/phtree/phtree_multimap_box_d_test.cc b/test/phtree_multimap_box_d_test.cc similarity index 82% rename from phtree/phtree_multimap_box_d_test.cc rename to test/phtree_multimap_box_d_test.cc index d1f19a85..386de516 100644 --- a/phtree/phtree_multimap_box_d_test.cc +++ b/test/phtree_multimap_box_d_test.cc @@ -15,12 +15,14 @@ */ #include "phtree/phtree_multimap.h" -#include +#include #include #include using namespace improbable::phtree; +namespace phtree_multimap_box_d_test { + // Number of entries that have the same coordinate static const size_t NUM_DUPL = 4; static const double WORLD_MIN = -1000; @@ -49,29 +51,30 @@ class DoubleRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i), data_{0} {}; + explicit Id(const size_t i) : _i{static_cast(i)}, data_{0} {} bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; int data_; }; +} // namespace phtree_multimap_box_d_test namespace std { template <> -struct hash { - size_t operator()(const Id& x) const { +struct hash { + size_t operator()(const phtree_multimap_box_d_test::Id& x) const { return std::hash{}(x._i); } }; }; // namespace std +namespace phtree_multimap_box_d_test { + struct PointDistance { - PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + PointDistance(double distance, size_t id) : _distance(distance), _id(static_cast(id)) {} double _distance; int _id; @@ -99,7 +102,7 @@ void generateCube(std::vector>& points, size_t N, double box_Len points.reserve(N); for (size_t i = 0; i < N / NUM_DUPL; i++) { - // create duplicates, ie. entries with the same coordinates. However, avoid unintentional + // create duplicates, i.e. entries with the same coordinates. However, avoid unintentional // duplicates. TestPoint key{}; for (dimension_t d = 0; d < DIM; ++d) { @@ -123,7 +126,6 @@ void generateCube(std::vector>& points, size_t N, double box_Len template void SmokeTestBasicOps(size_t N) { TestTree tree; - std::vector> points; generateCube(points, N); @@ -139,17 +141,19 @@ void SmokeTestBasicOps(size_t N) { } Id id(i); - if (i % 2 == 0) { + if (i % 4 == 0) { ASSERT_TRUE(tree.emplace(p, id).second); - } else { + } else if (i % 4 == 1) { ASSERT_TRUE(tree.insert(p, id).second); + } else { + ASSERT_TRUE(tree.try_emplace(p, id).second); } ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); ASSERT_NE(tree.end(), tree.find(p)); ASSERT_EQ(id._i, tree.find(p, id)->_i); ASSERT_EQ(i + 1, tree.size()); - // try add again + // try adding it again ASSERT_FALSE(tree.insert(p, id).second); ASSERT_FALSE(tree.emplace(p, id).second); ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); @@ -413,6 +417,13 @@ TEST(PhTreeMMBoxDTest, TestFind) { ASSERT_NE(tree.find(p, id), tree.end()); ASSERT_NE(tree.end(), tree.find(p, id)); ASSERT_EQ(tree.find(p, id)->_i, i); + auto iterN = tree.find(points[0]); + size_t n = 0; + while (iterN != tree.end()) { + ++iterN; + ++n; + } + ASSERT_EQ(n, NUM_DUPL); i++; } @@ -488,7 +499,42 @@ TEST(PhTreeMMBoxDTest, TestUpdateWithEmplaceHint) { tree.clear(); } -TEST(PhTreeMMDTest, TestUpdateWithRelocate) { +// TEST(PhTreeMMDTest, TestUpdateWithRelocate) { +// const dimension_t dim = 3; +// TestTree tree; +// size_t N = 10000; +// std::array deltas{0, 0.1, 1, 10}; +// std::vector> points; +// populate(tree, points, N); +// +// for (auto delta : deltas) { +// size_t i = 0; +// for (auto& p : points) { +// auto pOld = p; +// TestPoint pNew; +// if (relocate_to_existing_coordinate) { +// pNew = delta > 0.0 ? points[(i + 17) % N] : pOld; +// } else { +// pNew = {pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; +// } +// PhPointD min{pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + +// delta}; PhPointD max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] +// + delta}; TestPoint pNew{min, max}; ASSERT_EQ(1, tree.relocate(pOld, pNew, +// Id(i))); if (delta > 0.0) { +// // second time fails because value has already been moved +// ASSERT_EQ(0, tree.relocate(pOld, pNew, Id(i))); +// } +// ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); +// p = pNew; +// ++i; +// } +// } +// +// ASSERT_EQ(N, tree.size()); +// tree.clear(); +// } + +void TestUpdateWithRelocate(bool relocate_to_existing_coordinate) { const dimension_t dim = 3; TestTree tree; size_t N = 10000; @@ -496,29 +542,91 @@ TEST(PhTreeMMDTest, TestUpdateWithRelocate) { std::vector> points; populate(tree, points, N); - size_t i = 0; - size_t d_n = 0; - for (auto& p : points) { - auto pOld = p; - d_n = (d_n + 1) % deltas.size(); - double delta = deltas[d_n]; - PhPointD min{pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}; - PhPointD max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; - TestPoint pNew{min, max}; - ASSERT_EQ(1, tree.relocate(pOld, pNew, Id(i))); - if (delta > 0.0) { - // second time fails because value has already been moved - ASSERT_EQ(0, tree.relocate(pOld, pNew, Id(i))); + for (auto delta : deltas) { + size_t i = 0; + for (auto& p : points) { + auto pOld = p; + TestPoint pNew; + if (relocate_to_existing_coordinate) { + pNew = delta > 0.0 ? points[(i + 17) % N] : pOld; + } else { + PhPointD min{ + pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}; + PhPointD max{ + pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; + pNew = {min, max}; + } + ASSERT_EQ(1u, tree.relocate(pOld, pNew, Id(i))); + if (pOld != pNew) { + // second time fails because value has already been moved + ASSERT_EQ(0u, tree.relocate(pOld, pNew, Id(i))); + ASSERT_EQ(tree.end(), tree.find(pOld, Id(i))); + } else { + ASSERT_EQ(1u, tree.relocate(pOld, pNew, Id(i))); + } + ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); + p = pNew; + ++i; } - ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); - p = pNew; - ++i; + PhTreeDebugHelper::CheckConsistency(tree); } ASSERT_EQ(N, tree.size()); tree.clear(); } +TEST(PhTreeMMDTest, TestUpdateWithRelocateDelta) { + TestUpdateWithRelocate(false); +} + +TEST(PhTreeMMDTest, TestUpdateWithRelocateToExisting) { + TestUpdateWithRelocate(true); +} + +TEST(PhTreeMMDTest, TestUpdateWithRelocateCornerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{{1, 2, 3}, {2, 3, 4}}; + TestPoint point1{{2, 3, 4}, {3, 4, 5}}; + + // Check that empty tree works + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(42))); + + // Check that small tree works + tree.emplace(point0, Id(1)); + ASSERT_EQ(1u, tree.relocate(point0, point1, Id(1))); + ASSERT_EQ(tree.end(), tree.find(point0, Id(1))); + ASSERT_EQ(1, tree.find(point1, Id(1))->_i); + ASSERT_EQ(1u, tree.size()); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that existing destination fails + tree.emplace(point0, Id(1)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(1))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source bucket fails + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(0))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket exists) + tree.emplace(point0, Id(0)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(2))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket missing) + tree.emplace(point0, Id(0)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(2))); + PhTreeDebugHelper::CheckConsistency(tree); +} + TEST(PhTreeMMBoxDTest, TestEraseByIterator) { const dimension_t dim = 3; TestTree tree; @@ -583,12 +691,16 @@ TEST(PhTreeMMBoxDTest, TestExtent) { template struct FilterEvenId { - [[nodiscard]] constexpr bool IsEntryValid(const PhPoint<2 * DIM>&, const T& value) const { - return value._i % 2 == 0; + template + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint<2 * DIM>&, const BucketT&) const { + return true; } [[nodiscard]] constexpr bool IsNodeValid(const PhPoint<2 * DIM>&, int) const { return true; } + [[nodiscard]] constexpr bool IsBucketEntryValid(const PhPoint<2 * DIM>&, const T& value) const { + return value._i % 2 == 0; + } }; TEST(PhTreeMMDTest, TestExtentFilter) { @@ -809,7 +921,7 @@ TEST(PhTreeMMBoxDTest, TestWindowQueryManyMoving) { size_t nn = 0; for (int i = -120; i < 120; i++) { PhPointD min{i * 10., i * 9., i * 11.}; - PhPointD max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + PhPointD max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -905,3 +1017,5 @@ TEST(PhTreeMMBoxDTest, SmokeTestTreeAPI) { treePtr.clear(); delete idPtr; } + +} // namespace phtree_multimap_box_d_test diff --git a/phtree/phtree_multimap_d_test.cc b/test/phtree_multimap_d_test.cc similarity index 80% rename from phtree/phtree_multimap_d_test.cc rename to test/phtree_multimap_d_test.cc index d695ec91..495fa1ff 100644 --- a/phtree/phtree_multimap_d_test.cc +++ b/test/phtree_multimap_d_test.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree_multimap.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_multimap_d_test { + // Number of entries that have the same coordinate static const size_t NUM_DUPL = 4; static const double WORLD_MIN = -1000; @@ -29,7 +31,7 @@ template using TestPoint = PhPointD; template -using TestTree = PhTreeMultiMap>; +using TestTree = PhTreeMultiMapD; class DoubleRng { public: @@ -47,36 +49,31 @@ class DoubleRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i), data_{0} {}; + explicit Id(const int i) : _i{i}, data_{0} {} + explicit Id(const size_t i) : _i{static_cast(i)}, data_{0} {} bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; int data_; }; +} namespace std { template <> -struct hash { - size_t operator()(const Id& x) const { +struct hash { + size_t operator()(const phtree_multimap_d_test::Id& x) const { return std::hash{}(x._i); } }; }; // namespace std -struct IdHash { - template - std::size_t operator()(std::pair const& v) const { - return std::hash()(v.size()); - } -}; +namespace phtree_multimap_d_test { struct PointDistance { - PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + PointDistance(double distance, size_t id) : _distance(distance), _id(static_cast(id)) {} double _distance; int _id; @@ -113,7 +110,7 @@ void generateCube(std::vector>& points, size_t N) { points.reserve(N); for (size_t i = 0; i < N / NUM_DUPL; i++) { - // create duplicates, ie. entries with the same coordinates. However, avoid unintentional + // create duplicates, i.e. entries with the same coordinates. However, avoid unintentional // duplicates. TestPoint key{}; for (dimension_t d = 0; d < DIM; ++d) { @@ -136,7 +133,6 @@ void generateCube(std::vector>& points, size_t N) { template void SmokeTestBasicOps(size_t N) { TestTree tree; - std::vector> points; generateCube(points, N); @@ -152,17 +148,19 @@ void SmokeTestBasicOps(size_t N) { } Id id(i); - if (i % 2 == 0) { + if (i % 4 == 0) { ASSERT_TRUE(tree.emplace(p, id).second); - } else { + } else if (i % 4 == 1) { ASSERT_TRUE(tree.insert(p, id).second); + } else { + ASSERT_TRUE(tree.try_emplace(p, id).second); } ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); ASSERT_NE(tree.end(), tree.find(p)); ASSERT_EQ(id._i, tree.find(p, id)->_i); ASSERT_EQ(i + 1, tree.size()); - // try add again + // try adding it again ASSERT_FALSE(tree.insert(p, id).second); ASSERT_FALSE(tree.emplace(p, id).second); ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); @@ -421,6 +419,13 @@ TEST(PhTreeMMDTest, TestFind) { ASSERT_NE(tree.find(p, id), tree.end()); ASSERT_NE(tree.end(), tree.find(p, id)); ASSERT_EQ(tree.find(p, id)->_i, i); + auto iterN = tree.find(points[0]); + size_t n = 0; + while (iterN != tree.end()) { + ++iterN; + ++n; + } + ASSERT_EQ(n, NUM_DUPL); i++; } @@ -490,9 +495,13 @@ TEST(PhTreeMMDTest, TestUpdateWithEmplaceHint) { ASSERT_EQ(N, tree.size()); tree.clear(); + + tree.emplace_hint(tree.end(), {11, 21, 31}, 421); + tree.emplace_hint(tree.begin(), {1, 2, 3}, 42); + ASSERT_EQ(2, tree.size()); } -TEST(PhTreeMMDTest, TestUpdateWithRelocate) { +void TestUpdateWithRelocate(bool relocate_to_existing_coordinate) { const dimension_t dim = 3; TestTree tree; size_t N = 10000; @@ -500,27 +509,87 @@ TEST(PhTreeMMDTest, TestUpdateWithRelocate) { std::vector> points; populate(tree, points, N); - size_t i = 0; - size_t d_n = 0; - for (auto& p : points) { - auto pOld = p; - d_n = (d_n + 1) % deltas.size(); - double delta = deltas[d_n]; - TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; - ASSERT_EQ(1, tree.relocate(pOld, pNew, Id(i))); - if (delta > 0.0) { - // second time fails because value has already been moved - ASSERT_EQ(0, tree.relocate(pOld, pNew, Id(i))); + for (auto delta : deltas) { + size_t i = 0; + for (auto& p : points) { + auto pOld = p; + TestPoint pNew; + if (relocate_to_existing_coordinate) { + pNew = delta > 0.0 ? points[(i + 17) % N] : pOld; + } else { + pNew = {pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + } + ASSERT_EQ(1u, tree.relocate(pOld, pNew, Id(i))); + if (pOld != pNew) { + // second time fails because value has already been moved + ASSERT_EQ(0u, tree.relocate(pOld, pNew, Id(i))); + ASSERT_EQ(tree.end(), tree.find(pOld, Id(i))); + } else { + ASSERT_EQ(1u, tree.relocate(pOld, pNew, Id(i))); + } + ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); + p = pNew; + ++i; } - ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); - p = pNew; - ++i; + PhTreeDebugHelper::CheckConsistency(tree); } ASSERT_EQ(N, tree.size()); tree.clear(); } +TEST(PhTreeMMDTest, TestUpdateWithRelocateDelta) { + TestUpdateWithRelocate(false); +} + +TEST(PhTreeMMDTest, TestUpdateWithRelocateToExisting) { + TestUpdateWithRelocate(true); +} + +TEST(PhTreeMMDTest, TestUpdateWithRelocateCornerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{1, 2, 3}; + TestPoint point1{4, 5, 6}; + + // Check that empty tree works + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(42))); + + // Check that small tree works + tree.emplace(point0, Id(1)); + ASSERT_EQ(1u, tree.relocate(point0, point1, Id(1))); + ASSERT_EQ(tree.end(), tree.find(point0, Id(1))); + ASSERT_EQ(1, tree.find(point1, Id(1))->_i); + ASSERT_EQ(1u, tree.size()); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that existing destination fails + tree.emplace(point0, Id(1)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(1))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source bucket fails + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(0))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket exists) + tree.emplace(point0, Id(0)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(2))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket missing) + tree.emplace(point0, Id(0)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(2))); + PhTreeDebugHelper::CheckConsistency(tree); +} + TEST(PhTreeMMDTest, TestEraseByIterator) { const dimension_t dim = 3; TestTree tree; @@ -585,12 +654,16 @@ TEST(PhTreeMMDTest, TestExtent) { template struct FilterEvenId { - [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const T& value) const { - return value._i % 2 == 0; + template + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const BucketT&) const { + return true; } [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) const { return true; } + [[nodiscard]] constexpr bool IsBucketEntryValid(const PhPoint&, const T& value) const { + return value._i % 2 == 0; + } }; TEST(PhTreeMMDTest, TestExtentFilter) { @@ -782,7 +855,7 @@ TEST(PhTreeMMDTest, TestWindowQueryManyMoving) { size_t nn = 0; for (int i = -120; i < 120; i++) { TestPoint min{i * 10., i * 9., i * 11.}; - TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + TestPoint max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -813,7 +886,7 @@ TEST(PhTreeMMDTest, TestWindowForEachQueryManyMoving) { size_t nn = 0; for (int i = -120; i < 120; i++) { TestPoint min{i * 10., i * 9., i * 11.}; - TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + TestPoint max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -1102,3 +1175,114 @@ TEST(PhTreeMMDTest, SmokeTestTreeAPI) { treePtr.clear(); delete idPtr; } + +template +void test_tree(TREE& tree) { + PhPointD<3> p{1, 2, 3}; + + // test various operations + tree.emplace(p, Id{2}); + Id id3{3}; + tree.insert(p, id3); + ASSERT_EQ(tree.size(), 3); + ASSERT_EQ(tree.count(p), 3); + ASSERT_EQ(tree.find(p, Id(1))->_i, 1); + ASSERT_EQ(tree.find(p, Id(2))->_i, 2); + ASSERT_EQ(tree.find(p, Id(3))->_i, 3); + + auto q_window = tree.begin_query({p, p}); + std::set wq_result; + wq_result.emplace(q_window->_i); + ++q_window; + wq_result.emplace(q_window->_i); + ++q_window; + wq_result.emplace(q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + ASSERT_EQ(3, wq_result.size()); + + auto q_extent = tree.begin(); + std::set eq_result; + eq_result.emplace(q_extent->_i); + ++q_extent; + eq_result.emplace(q_extent->_i); + ++q_extent; + eq_result.emplace(q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + ASSERT_EQ(3, eq_result.size()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + std::set knn_result; + knn_result.emplace(q_knn->_i); + ++q_knn; + knn_result.emplace(q_knn->_i); + ++q_knn; + knn_result.emplace(q_knn->_i); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + ASSERT_EQ(3, knn_result.size()); + + ASSERT_EQ(1, tree.erase(p, Id{1})); + ASSERT_EQ(2, tree.size()); + ASSERT_EQ(0, tree.erase(p, Id{1})); + ASSERT_EQ(2, tree.size()); + ASSERT_EQ(1, tree.erase(p, Id{2})); + ASSERT_EQ(1, tree.erase(p, Id{3})); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeMMDTest, TestMoveConstruct) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + PhTreeMultiMapD<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{std::move(tree1)}; + test_tree(tree); + tree.~PhTreeMultiMap(); +} + +TEST(PhTreeMMDTest, TestMoveAssign) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + PhTreeMultiMapD<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{}; + tree = std::move(tree1); + test_tree(tree); + tree.~PhTreeMultiMap(); +} + +TEST(PhTreeMMDTest, TestMovableIterators) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.begin(), tree.end()); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.find(p), tree.end()); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterMultiMapAABB filter(p, p, tree.converter()); + ASSERT_TRUE(std::is_move_constructible_v); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v()))>); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v()))>); +} + +} // namespace phtree_multimap_d_test diff --git a/test/phtree_multimap_d_test_copy_move.cc b/test/phtree_multimap_d_test_copy_move.cc new file mode 100644 index 00000000..ed3c652a --- /dev/null +++ b/test/phtree_multimap_d_test_copy_move.cc @@ -0,0 +1,330 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree_multimap.h" +#include +#include + +using namespace improbable::phtree; + +namespace phtree_multimap_d_test_copy_move { + +// Number of entries that have the same coordinate +static const size_t NUM_DUPL = 4; +static const double WORLD_MIN = -1000; +static const double WORLD_MAX = 1000; + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeMultiMap>; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct IdCopyOnly { + explicit IdCopyOnly(const size_t i) : _i{static_cast(i)} {} + + IdCopyOnly() = default; + IdCopyOnly(const IdCopyOnly& other) = default; + IdCopyOnly(IdCopyOnly&& other) = delete; + IdCopyOnly& operator=(const IdCopyOnly& other) = default; + IdCopyOnly& operator=(IdCopyOnly&& other) = delete; + ~IdCopyOnly() = default; + + bool operator==(const IdCopyOnly& rhs) const { + return _i == rhs._i; + } + + int _i{}; + int _data{}; +}; + +struct IdMoveOnly { + explicit IdMoveOnly(const size_t i) : _i{i} {} + + IdMoveOnly() = default; + IdMoveOnly(const IdMoveOnly& other) = delete; + IdMoveOnly(IdMoveOnly&& other) = default; + IdMoveOnly& operator=(const IdMoveOnly& other) = delete; + IdMoveOnly& operator=(IdMoveOnly&& other) = default; + ~IdMoveOnly() = default; + + bool operator==(const IdMoveOnly& rhs) const { + return _i == rhs._i; + } + + size_t _i{}; + int _data{}; +}; + +// Assert that copy-ctr is not called even when available +struct IdCopyOrMove { + explicit IdCopyOrMove(const size_t i) : _i{i} {} + + IdCopyOrMove() = default; + IdCopyOrMove(const IdCopyOrMove&) { + assert(false); + } + IdCopyOrMove(IdCopyOrMove&& other) = default; + IdCopyOrMove& operator=(const IdCopyOrMove&) { + assert(false); + } + IdCopyOrMove& operator=(IdCopyOrMove&& other) = default; + ~IdCopyOrMove() = default; + + bool operator==(const IdCopyOrMove& rhs) const { + return _i == rhs._i; + } + + size_t _i{}; + int _data{}; +}; +} + +namespace std { +template <> +struct hash { + size_t operator()(const phtree_multimap_d_test_copy_move::IdCopyOnly& x) const { + return std::hash{}(x._i); + } +}; +template <> +struct hash { + size_t operator()(const phtree_multimap_d_test_copy_move::IdMoveOnly& x) const { + return std::hash{}(x._i); + } +}; +template <> +struct hash { + size_t operator()(const phtree_multimap_d_test_copy_move::IdCopyOrMove& x) const { + return std::hash{}(x._i); + } +}; +}; // namespace std + +namespace phtree_multimap_d_test_copy_move { + +struct IdHash { + template + std::size_t operator()(std::pair const& v) const { + return std::hash()(v.size()); + } +}; + +template +void generateCube(std::vector>& points, size_t N) { + assert(N % NUM_DUPL == 0); + DoubleRng rng(WORLD_MIN, WORLD_MAX); + auto reference_set = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N / NUM_DUPL; i++) { + // create duplicates, i.e. entries with the same coordinates. However, avoid unintentional + // duplicates. + TestPoint key{}; + for (dimension_t d = 0; d < DIM; ++d) { + key[d] = rng.next(); + } + if (reference_set.count(key) != 0) { + i--; + continue; + } + reference_set.emplace(key, i); + for (size_t dupl = 0; dupl < NUM_DUPL; dupl++) { + auto point = TestPoint(key); + points.push_back(point); + } + } + ASSERT_EQ(reference_set.size(), N / NUM_DUPL); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps_QueryAndErase(TestTree& tree, std::vector>& points) { + size_t N = points.size(); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + for (size_t j = 0; j < NUM_DUPL; j++) { + ASSERT_EQ(i / NUM_DUPL, (*q)._i / NUM_DUPL); + q++; + } + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_NE(tree.find(p), tree.end()); + size_t expected_remaining = (N - i - 1) % NUM_DUPL + 1; + ASSERT_EQ(tree.count(p), expected_remaining); + ASSERT_EQ(i, tree.find(p, id)->_i); + if (i % 2 == 0) { + ASSERT_EQ(1, tree.erase(p, id)); + } else { + auto iter = tree.find(p, id); + ASSERT_EQ(1, tree.erase(iter)); + } + + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0, tree.erase(p, id)); + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +template +void SmokeTestBasicOps(size_t N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_LE(tree.count(p), i % NUM_DUPL); + if (i % NUM_DUPL == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + + Id id(i); + if (i % 4 == 0) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else if (i % 4 == 1) { + ASSERT_TRUE(tree.insert(p, id).second); + } else { + ASSERT_TRUE(tree.try_emplace(p, id).second); + } + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + SmokeTestBasicOps_QueryAndErase(tree, points); +} + +TEST(PhTreeMMDTestCopyMove, SmokeTestBasicOps) { + SmokeTestBasicOps<1, IdCopyOnly>(100); + SmokeTestBasicOps<3, IdCopyOnly>(100); + SmokeTestBasicOps<6, IdCopyOnly>(100); + SmokeTestBasicOps<10, IdCopyOnly>(100); + SmokeTestBasicOps<20, IdCopyOnly>(100); + SmokeTestBasicOps<63, IdCopyOnly>(100); +} + +template +void SmokeTestBasicOpsMoveOnly(size_t N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_LE(tree.count(p), i % NUM_DUPL); + if (i % NUM_DUPL == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, Id(i)).second); + } else { + ASSERT_TRUE(tree.try_emplace(p, Id(i)).second); + } + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, tree.find(p, Id(i))->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.try_emplace(p, Id(i)).second); + ASSERT_FALSE(tree.emplace(p, Id(i)).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, tree.find(p, Id(i))->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + SmokeTestBasicOps_QueryAndErase(tree, points); +} + +TEST(PhTreeMMDTestCopyMove, SmokeTestBasicOpsMoveOnly) { + SmokeTestBasicOpsMoveOnly<1, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<3, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<6, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<10, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<20, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<63, IdMoveOnly>(100); +} + +TEST(PhTreeMMDTestCopyMove, SmokeTestBasicOpsCopyFails) { + SmokeTestBasicOpsMoveOnly<1, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<3, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<6, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<10, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<20, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<63, IdCopyOrMove>(100); +} + +} // namespace phtree_multimap_d_test_copy_move diff --git a/test/phtree_multimap_d_test_filter.cc b/test/phtree_multimap_d_test_filter.cc new file mode 100644 index 00000000..89b04057 --- /dev/null +++ b/test/phtree_multimap_d_test_filter.cc @@ -0,0 +1,692 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree_multimap.h" +#include +#include +#include + +using namespace improbable::phtree; + +namespace phtree_multimap_d_test_filter { + +// Number of entries that have the same coordinate +static const size_t NUM_DUPL = 4; +[[maybe_unused]] static const double WORLD_MIN = -1000; +[[maybe_unused]] static const double WORLD_MAX = 1000; + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeMultiMap>; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + Id(Id const& rhs) = default; + Id(Id&& rhs) = default; + Id& operator=(Id const& rhs) = default; + Id& operator=(Id&& rhs) = default; + + int _i; +}; +} // namespace phtree_multimap_d_test_filter + +namespace std { +template <> +struct hash { + size_t operator()(const phtree_multimap_d_test_filter::Id& x) const { + return std::hash{}(x._i); + } +}; +}; // namespace std + +namespace phtree_multimap_d_test_filter { + +struct IdHash { + template + std::size_t operator()(std::pair const& v) const { + return std::hash()(v.size()); + } +}; + +template +void generateCube(std::vector>& points, size_t N) { + assert(N % NUM_DUPL == 0); + DoubleRng rng(WORLD_MIN, WORLD_MAX); + auto reference_set = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N / NUM_DUPL; i++) { + // create duplicates, ie. entries with the same coordinates. However, avoid unintentional + // duplicates. + TestPoint key{}; + for (dimension_t d = 0; d < DIM; ++d) { + key[d] = rng.next(); + } + if (reference_set.count(key) != 0) { + i--; + continue; + } + reference_set.emplace(key, i); + for (size_t dupl = 0; dupl < NUM_DUPL; dupl++) { + auto point = TestPoint(key); + points.push_back(point); + } + } + ASSERT_EQ(reference_set.size(), N / NUM_DUPL); + ASSERT_EQ(points.size(), N); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +static int f_default_construct_ = 0; +static int f_construct_ = 0; +static int f_copy_construct_ = 0; +static int f_move_construct_ = 0; +static int f_copy_assign_ = 0; +static int f_move_assign_ = 0; +static int f_destruct_ = 0; + +static void f_reset_id_counters() { + f_default_construct_ = 0; + f_construct_ = 0; + f_copy_construct_ = 0; + f_move_construct_ = 0; + f_copy_assign_ = 0; + f_move_assign_ = 0; + f_destruct_ = 0; +} + +template +struct FilterCount { + FilterCount() : last_known{} { + ++f_default_construct_; + } + + explicit FilterCount(const T i) : last_known{i} { + ++f_construct_; + } + + FilterCount(const FilterCount& other) { + ++f_copy_construct_; + last_known = other.last_known; + } + + FilterCount(FilterCount&& other) noexcept { + ++f_move_construct_; + last_known = other.last_known; + } + + FilterCount& operator=(const FilterCount& other) noexcept { + ++f_copy_assign_; + last_known = other.last_known; + return *this; + } + FilterCount& operator=(FilterCount&& other) noexcept { + ++f_move_assign_; + last_known = other.last_known; + return *this; + } + + ~FilterCount() { + ++f_destruct_; + } + + template + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const BucketT& bucket) { + assert(!bucket.empty()); + return true; + } + + template + [[nodiscard]] bool IsBucketEntryValid(const PhPoint&, const T2& value) { + last_known = value; + return true; + } + + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) { + return true; + } + + T last_known; +}; + +template +struct DistanceCount { + DistanceCount() { + ++f_default_construct_; + } + + DistanceCount(const DistanceCount&) { + ++f_copy_construct_; + } + + DistanceCount(DistanceCount&&) noexcept { + ++f_move_construct_; + } + + DistanceCount& operator=(const DistanceCount&) noexcept { + ++f_copy_assign_; + return *this; + } + DistanceCount& operator=(DistanceCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~DistanceCount() { + ++f_destruct_; + } + + double operator()(const PhPointD& p1, const PhPointD& p2) const { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); + }; +}; + +static size_t static_id = 0; + +template +struct CallbackCount { + CallbackCount() { + static_id = 0; + ++f_default_construct_; + } + + CallbackCount(const CallbackCount&) { + ++f_copy_construct_; + } + + CallbackCount(CallbackCount&&) noexcept { + ++f_move_construct_; + } + + CallbackCount& operator=(const CallbackCount&) noexcept { + ++f_copy_assign_; + return *this; + } + CallbackCount& operator=(CallbackCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~CallbackCount() { + ++f_destruct_; + } + + void operator()(const TestPoint, const Id& t) { + static_id = t._i; + } +}; + +template +struct FilterConst { + template + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const BucketT&) const { + return true; + } + [[nodiscard]] constexpr bool IsBucketEntryValid(const PhPoint&, const T& value) { + assert(value._i == 1); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) const { + return true; + } +}; + +template +struct CallbackConst { + void operator()(const TestPoint, const Id& t) const { + static_id = t._i; + } +}; + +[[maybe_unused]] static void print_id_counters() { + std::cout << "dc=" << f_default_construct_ << " c=" << f_construct_ + << " cc=" << f_copy_construct_ << " mc=" << f_move_construct_ + << " ca=" << f_copy_assign_ << " ma=" << f_move_assign_ << " d=" << f_destruct_ + << std::endl; +} + +/* + * General comment: We are testing several thing here. + * - If we pass lvalue filters/callbacks/... we want to ensure that they do not get copied or + * moved at all. We need to ensure that the lvalue argument is the same instance that is + * used internally by the iterator. + * - If we pass a rvalue filters/callbacks/..., preventing copies/moves is harder. We are testing + * somewhat arbitrarily for a limit of 3 moves/copies per argument. + * - We want to ensure that both rvalue/lvalue arguments work. + * - We also do some limited testing that it works with 'const' trees. + * - Finally, we test separately that the old legacy filters still work + */ + +TEST(PhTreeTest, TestFilterAPI_FOR_EACH) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // rvalue + tree.for_each(callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(filter.last_known._i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // lvalue + tree.for_each(CallbackCount<3>(), FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_GE(4, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + CallbackCount<3> callbackC; + FilterConst<3, Id> filterC; + treeC.for_each(callbackC, filterC); + // rvalue + treeC.for_each(CallbackConst<3>{}, FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_FOR_EACH_WQ) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // lvalue + tree.for_each(qb, callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(filter.last_known._i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + tree.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackCount<3>{}, FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_GE(4, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + treeC.for_each(qb, callback, filterC); + // rvalue + treeC.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackConst<3>(), FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_BEGIN) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin(filter)->_i, 1); + ASSERT_EQ(filter.last_known._i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_GE(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin(FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_GE(2, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin(filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin(FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_WQ) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin_query(qb, filter)->_i, 1); + ASSERT_EQ(filter.last_known._i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_query({{1, 2, 3}, {4, 5, 6}}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_GE(2, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_query(qb, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_query(qb, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_KNN) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + DistanceCount<3> dist_fn{}; + // lvalue + ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, dist_fn, filter)->_i, 1); + ASSERT_EQ(filter.last_known._i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_GE(2 * 3, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue #2 + auto a = tree.begin_knn_query, FilterCount<3, Id>>(3, {2, 3, 4})->_i; + ASSERT_EQ(a, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_GE(2 * 3, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, dist_fn, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); +}; + +template +void referenceSphereQuery( + std::vector>& points, + TestPoint& center, + double radius, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + if (distance(center, p) <= radius) { + result.insert(i); + } + } +} + +template +void referenceAABBQuery( + std::vector>& points, + TestPoint& center, + double radius, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + bool inside = true; + for (dimension_t i = 0; i < DIM; ++i) { + inside &= std::abs(p[i] - center[i]) <= radius; + } + + if (inside) { + result.insert(i); + } + } +} + +template +PhBoxD QueryBox(PhPointD& center, double radius) { + typename TestTree::QueryBox query_box{ + {center[0] - radius, center[1] - radius, center[2] - radius}, + {center[0] + radius, center[1] + radius, center[2] + radius}}; + return query_box; +} + +// We use 'int&' because gtest does not compile with assertions in non-void functions. +template +void testSphereQuery(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto filter = FilterMultiMapSphere(center, radius, tree.converter()); + for (auto it = tree.begin(filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testSphereQueryWithBox(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterMultiMapSphere(center, radius, tree.converter()); + for (auto it = tree.begin_query(query_box, filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testSphereQueryForEach(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto filter = FilterMultiMapSphere(center, radius, tree.converter()); + auto callback = [&result, &referenceResult](PhPointD, const size_t& x) { + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + ++result; + }; + tree.for_each(callback, filter); + ASSERT_EQ(referenceResult.size(), result); +} +template +void testSphereQueryForEachQueryBox(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterMultiMapSphere(center, radius, tree.converter()); + auto callback = [&result, &referenceResult](PhPointD, const size_t& x) { + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + ++result; + }; + tree.for_each(query_box, callback, filter); + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testAABBQuery(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceAABBQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterMultiMapAABB(query_box.min(), query_box.max(), tree.converter()); + for (auto it = tree.begin(filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void Query0(QUERY query) { + TestPoint p{-10000, -10000, -10000}; + int n = 0; + query(p, 0.1, 100, n); + ASSERT_EQ(0, n); +} + +template +void QueryMany(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 1000, 1000, n); + ASSERT_GT(n, 400); + ASSERT_LT(n, 800); +} + +template +void QueryManyAABB(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 1000, 1000, n); + ASSERT_EQ(n, 1000); +} + +template +void QueryAll(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 10000, 1000, n); + ASSERT_EQ(1000, n); +} + +TEST(PhTreeMMDFilterTest, TestSphereQuery) { + Query0<3>(&testSphereQuery<3>); + QueryMany<3>(&testSphereQuery<3>); + QueryAll<3>(&testSphereQuery<3>); +} + +TEST(PhTreeMMDFilterTest, TestSphereQueryWithQueryBox) { + Query0<3>(&testSphereQueryWithBox<3>); + QueryMany<3>(&testSphereQueryWithBox<3>); + QueryAll<3>(&testSphereQueryWithBox<3>); +} + +TEST(PhTreeMMDFilterTest, TestSphereQueryForEach) { + Query0<3>(&testSphereQueryForEach<3>); + QueryMany<3>(&testSphereQueryForEach<3>); + QueryAll<3>(&testSphereQueryForEach<3>); +} + +TEST(PhTreeMMDFilterTest, TestSphereQueryForEachWithQueryBox) { + Query0<3>(&testSphereQueryForEachQueryBox<3>); + QueryMany<3>(&testSphereQueryForEachQueryBox<3>); + QueryAll<3>(&testSphereQueryForEachQueryBox<3>); +} + +TEST(PhTreeMMDFilterTest, TestAABBQuery) { + Query0<3>(&testAABBQuery<3>); + QueryManyAABB<3>(&testAABBQuery<3>); + QueryAll<3>(&testAABBQuery<3>); +} + +} // namespace phtree_multimap_d_test_filter diff --git a/test/phtree_multimap_d_test_unique_ptr_values.cc b/test/phtree_multimap_d_test_unique_ptr_values.cc new file mode 100644 index 00000000..5364804a --- /dev/null +++ b/test/phtree_multimap_d_test_unique_ptr_values.cc @@ -0,0 +1,388 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree_multimap.h" +#include +#include + +using namespace improbable::phtree; + +namespace phtree_multimap_d_test_unique_ptr_values { + +// Number of entries that have the same coordinate +static const size_t NUM_DUPL = 4; +static const double WORLD_MIN = -1000; +static const double WORLD_MAX = 1000; + +template +using TestPoint = PhPointD; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct IdObj { + IdObj() = default; + + explicit IdObj(const int i) : _i(i), data_{0} {}; + explicit IdObj(const size_t i) : _i(static_cast(i)), data_{0} {}; + + bool operator==(const IdObj& rhs) const noexcept { + return _i == rhs._i; + } + + int _i; + int data_; +}; + +using Id = std::unique_ptr; +} // namespace phtree_multimap_d_test_unique_ptr_values + +namespace std { +template <> +struct hash { + size_t operator()(const phtree_multimap_d_test_unique_ptr_values::Id& x) const { + return std::hash{}(x->_i); + } +}; +}; // namespace std +struct equal_to_content { + bool operator()( + const phtree_multimap_d_test_unique_ptr_values::Id& x1, + const phtree_multimap_d_test_unique_ptr_values::Id& x2) const { + return (*x1) == (*x2); + } +}; +struct less_content { + bool operator()( + const phtree_multimap_d_test_unique_ptr_values::Id& x1, + const phtree_multimap_d_test_unique_ptr_values::Id& x2) const { + return (*x1)._i < (*x2)._i; + } +}; + +namespace phtree_multimap_d_test_unique_ptr_values { + +template +using TestTree = PhTreeMultiMap< + DIM, + T, + ConverterIEEE, + b_plus_tree_hash_set, equal_to_content>>; +// using TestTree = PhTreeMultiMap, std::unordered_set, +// equal_to_content>>; using TestTree = PhTreeMultiMap, std::set>; + +template +void generateCube(std::vector>& points, size_t N) { + assert(N % NUM_DUPL == 0); + DoubleRng rng(WORLD_MIN, WORLD_MAX); + auto reference_set = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N / NUM_DUPL; i++) { + // create duplicates, i.e. entries with the same coordinates. However, avoid unintentional + // duplicates. + TestPoint key{}; + for (dimension_t d = 0; d < DIM; ++d) { + key[d] = rng.next(); + } + if (reference_set.count(key) != 0) { + i--; + continue; + } + reference_set.emplace(key, i); + for (size_t dupl = 0; dupl < NUM_DUPL; dupl++) { + auto point = TestPoint(key); + points.emplace_back(point); + } + } + ASSERT_EQ(reference_set.size(), N / NUM_DUPL); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps(int N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_LE(tree.count(p), i % NUM_DUPL); + if (i % NUM_DUPL == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + + Id id2(new IdObj{i}); + // Id id3(new IdObj{i}); + // ASSERT_EQ(id2.get(), id3.get()); + // ASSERT_TRUE(id2 == id3); + // ASSERT_EQ(id2, id3); + if (i % 4 == 0) { + ASSERT_TRUE(tree.emplace(p, std::make_unique(i)).second); + } else if (i % 4 == 1) { + ASSERT_TRUE(tree.emplace(p, new IdObj{i}).second); + } else if (i % 4 == 2) { + ASSERT_TRUE(tree.try_emplace(p, new IdObj{i}).second); + } else { + Id id = std::make_unique(i); + ASSERT_TRUE(tree.emplace(p, std::move(id)).second); + } + Id id = std::make_unique(i); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, (*tree.find(p, id))->_i); + ASSERT_EQ(i + 1u, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.try_emplace(p, std::make_unique(i)).second); + ASSERT_FALSE(tree.emplace(p, std::make_unique(i)).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, (*tree.find(p, std::make_unique(i)))->_i); + ASSERT_EQ(i + 1u, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + for (size_t j = 0; j < NUM_DUPL; j++) { + ASSERT_EQ(i / NUM_DUPL, (*q)->_i / NUM_DUPL); + q++; + } + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + size_t expected_remaining = (N - i - 1) % NUM_DUPL + 1; + ASSERT_EQ(tree.count(p), expected_remaining); + ASSERT_EQ(i, (*tree.find(p, std::make_unique(i)))->_i); + if (i % 3 == 0) { + ASSERT_EQ(1u, tree.erase(p, std::make_unique(i))); + } else { + auto iter = tree.find(p, std::make_unique(i)); + ASSERT_EQ(1u, tree.erase(iter)); + } + + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1u, tree.size()); + + // try remove again + ASSERT_EQ(0u, tree.erase(p, std::make_unique(i))); + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1u, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeMMDTestUniquePtr, SmokeTestBasicOps) { + SmokeTestBasicOps<1>(10000); + SmokeTestBasicOps<3>(10000); + SmokeTestBasicOps<6>(10000); + SmokeTestBasicOps<10>(1000); + SmokeTestBasicOps<20>(100); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.emplace(points[i], std::make_unique(i)).second); + } + ASSERT_EQ(N, tree.size()); +} + +TEST(PhTreeMMDTestUniquePtr, TestUpdateWithRelocate) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + for (auto delta : deltas) { + int i = 0; + for (auto& p : points) { + auto pOld = p; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + ASSERT_EQ(1u, tree.relocate(pOld, pNew, std::make_unique(i))); + if (delta > 0) { + // second time fails because value has already been moved + ASSERT_EQ(0u, tree.relocate(pOld, pNew, std::make_unique(i))); + } + ASSERT_EQ(i, (*tree.find(pNew, std::make_unique(i)))->_i); + p = pNew; + ++i; + } + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeMMDTestUniquePtr, TestUpdateWithRelocateCornerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{1, 2, 3}; + TestPoint point1{4, 5, 6}; + + // Check that empty tree works + ASSERT_EQ(0u, tree.relocate(point0, point1, std::make_unique(42))); + + // Check that small tree works + tree.emplace(point0, std::make_unique(1)); + ASSERT_EQ(1u, tree.relocate(point0, point1, std::make_unique(1))); + ASSERT_EQ(tree.end(), tree.find(point0, std::make_unique(1))); + ASSERT_EQ(1, (*tree.find(point1, std::make_unique(1)))->_i); + ASSERT_EQ(1u, tree.size()); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that existing destination fails + tree.emplace(point0, std::make_unique(1)); + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, std::make_unique(1))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source bucket fails + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, std::make_unique(0))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket exists) + tree.emplace(point0, std::make_unique(0)); + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, std::make_unique(2))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket missing) + tree.emplace(point0, std::make_unique(0)); + ASSERT_EQ(0u, tree.relocate(point0, point1, std::make_unique(2))); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeMMDTestUniquePtr, TestUpdateWithRelocateIf) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + for (auto delta : deltas) { + size_t done = 0; + for (int i = 0; size_t(i) < N; ++i) { + auto pred = [&i](const Id& id) { return id->_i == i; }; + auto pOld = points[i]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + ASSERT_EQ(1u, tree.relocate_if(pOld, pNew, pred)); + if (delta > 0) { + // second time fails because value has already been moved + ASSERT_EQ(0u, tree.relocate_if(pOld, pNew, pred)); + } + ASSERT_EQ(i, (*tree.find(pNew, std::make_unique(i)))->_i); + ++done; + points[i] = pNew; + } + ASSERT_EQ(done, N); + PhTreeDebugHelper::CheckConsistency(tree); + } + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeMMDTestUniquePtr, TestUpdateWithRelocateIfCornerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{1, 2, 3}; + TestPoint point1{4, 5, 6}; + auto TRUE = [](const Id&) { return true; }; + auto TWO = [](const Id& id) { return id->_i == 2; }; + + // Check that empty tree works + ASSERT_EQ(0u, tree.relocate_if(point0, point1, TRUE)); + + // Check that small tree works + tree.emplace(point0, std::make_unique(1)); + ASSERT_EQ(1u, tree.relocate_if(point0, point1, TRUE)); + ASSERT_EQ(tree.end(), tree.find(point0)); + ASSERT_EQ(1, (*tree.find(point1))->_i); + ASSERT_EQ(1u, tree.size()); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that existing destination fails + tree.emplace(point0, std::make_unique(1)); + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate_if(point0, point1, TRUE)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source bucket fails + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate_if(point0, point1, TRUE)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket exists) + tree.emplace(point0, std::make_unique(0)); + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate_if(point0, point1, TWO)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket missing) + tree.emplace(point0, std::make_unique(0)); + ASSERT_EQ(0u, tree.relocate_if(point0, point1, TWO)); + PhTreeDebugHelper::CheckConsistency(tree); +} + +} // namespace phtree_multimap_d_test_unique_ptr_values diff --git a/phtree/phtree_test.cc b/test/phtree_test.cc similarity index 70% rename from phtree/phtree_test.cc rename to test/phtree_test.cc index fe323c39..88aa2b40 100644 --- a/phtree/phtree_test.cc +++ b/test/phtree_test.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_test { + template using TestPoint = PhPoint; @@ -57,6 +59,13 @@ static void reset_id_counters() { destruct_count_ = 0; } +static void print_id_counters() { + std::cout << "dc=" << default_construct_count_ << " c=" << construct_count_ + << " cc=" << copy_construct_count_ << " mc=" << move_construct_count_ + << " ca=" << copy_assign_count_ << " ma=" << move_assign_count_ + << " d=" << destruct_count_ << std::endl; +} + struct Id { Id() : _i{0} { ++default_construct_count_; @@ -64,7 +73,7 @@ struct Id { explicit Id(const size_t i) : _i{static_cast(i)} { ++construct_count_; - }; + } Id(const Id& other) { ++copy_construct_count_; @@ -76,13 +85,18 @@ struct Id { _i = other._i; } - bool operator==(const Id& rhs) const { + Id& operator=(const Id& other) noexcept { ++copy_assign_count_; - return _i == rhs._i; + _i = other._i; + return *this; } - - bool operator==(Id&& rhs) const { + Id& operator=(Id&& other) noexcept { ++move_assign_count_; + _i = other._i; + return *this; + } + + bool operator==(const Id& rhs) const { return _i == rhs._i; } @@ -90,8 +104,6 @@ struct Id { ++destruct_count_; } - Id& operator=(Id const& rhs) = default; - int _i; }; @@ -110,7 +122,7 @@ template double distance(const TestPoint& p1, const TestPoint& p2) { double sum2 = 0; for (dimension_t i = 0; i < DIM; i++) { - double d = p1[i] - p2[i]; + double d = (double)p1[i] - (double)p2[i]; sum2 += d * d; } return sqrt(sum2); @@ -166,17 +178,19 @@ void SmokeTestBasicOps(size_t N) { ASSERT_EQ(tree.end(), tree.find(p)); Id id(i); - if (i % 2 == 0) { + if (i % 4 == 0) { ASSERT_TRUE(tree.emplace(p, i).second); - } else { + } else if (i % 4 == 1) { ASSERT_TRUE(tree.insert(p, id).second); + } else { + ASSERT_TRUE(tree.try_emplace(p, i).second); } ASSERT_EQ(tree.count(p), 1); ASSERT_NE(tree.end(), tree.find(p)); ASSERT_EQ(id._i, tree.find(p)->_i); ASSERT_EQ(i + 1, tree.size()); - // try add again + // try insert/emplace again ASSERT_FALSE(tree.insert(p, id).second); ASSERT_FALSE(tree.emplace(p, id).second); ASSERT_EQ(tree.count(p), 1); @@ -221,7 +235,9 @@ void SmokeTestBasicOps(size_t N) { ASSERT_TRUE(tree.empty()); PhTreeDebugHelper::CheckConsistency(tree); - ASSERT_EQ(construct_count_ + copy_construct_count_ + move_construct_count_, destruct_count_); + // Normal construction and destruction should be symmetric. Move-construction is ignored. + ASSERT_GE(construct_count_ + copy_construct_count_ + move_construct_count_, destruct_count_); + ASSERT_LE(construct_count_ + copy_construct_count_, destruct_count_); // The following assertions exist only as sanity checks and may need adjusting. // There is nothing fundamentally wrong if a change in the implementation violates // any of the following assertions, as long as performance/memory impact is observed. @@ -237,7 +253,10 @@ void SmokeTestBasicOps(size_t N) { // small node require a lot of copying/moving ASSERT_GE(construct_count_ * 3, move_construct_count_); } else { - ASSERT_GE(construct_count_ * 2, move_construct_count_); + if (construct_count_ * 15 < move_construct_count_) { + print_id_counters(); + } + ASSERT_GE(construct_count_ * 15, move_construct_count_); } } @@ -342,7 +361,7 @@ TEST(PhTreeTest, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -499,8 +518,8 @@ TEST(PhTreeTest, TestUpdateWithEmplace) { for (auto& p : points) { auto pOld = p; TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -526,8 +545,8 @@ TEST(PhTreeTest, TestUpdateWithEmplaceHint) { int delta = deltas[d_n]; TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; auto iter = tree.find(pOld); - int n = tree.erase(iter); - ASSERT_EQ(1, n); + size_t n = tree.erase(iter); + ASSERT_EQ(1u, n); tree.emplace_hint(iter, pNew, 42); ASSERT_EQ(1, tree.count(pNew)); if (delta != 0.0) { @@ -538,6 +557,147 @@ TEST(PhTreeTest, TestUpdateWithEmplaceHint) { ASSERT_EQ(N, tree.size()); tree.clear(); + + tree.emplace_hint(tree.end(), {11, 21, 31}, 421); + tree.emplace_hint(tree.begin(), {1, 2, 3}, 42); + ASSERT_EQ(2, tree.size()); +} + +TEST(PhTreeTest, TestUpdateWithTryEmplaceHint) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + int delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + auto iter = tree.find(pOld); + size_t n = tree.erase(iter); + ASSERT_EQ(1u, n); + tree.try_emplace(iter, pNew, 42); + ASSERT_EQ(1, tree.count(pNew)); + if (delta != 0.0) { + ASSERT_EQ(0, tree.count(pOld)); + } + p = pNew; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + tree.try_emplace(tree.end(), {11, 21, 31}, 421); + tree.try_emplace(tree.begin(), {1, 2, 3}, 42); + ASSERT_EQ(2, tree.size()); +} + +TEST(PhTreeTest, TestUpdateWithRelocate) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + size_t i = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if (delta > 0.0 && tree.find(pNew) != tree.end()) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } else { + ASSERT_EQ(1, tree.relocate(pOld, pNew)); + if (delta > 0.0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } + ASSERT_EQ(Id(i), *tree.find(pNew)); + p = pNew; + } + ++i; + } + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + ASSERT_EQ(0, tree.relocate(points[0], points[1])); + // Check that small tree works + tree.emplace(points[0], 1); + ASSERT_EQ(1, tree.relocate(points[0], points[1])); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(Id(1), *tree.find(points[1])); + ASSERT_EQ(1, tree.size()); + tree.clear(); + + // check that existing destination fails + tree.emplace(points[0], 1); + tree.emplace(points[1], 2); + ASSERT_EQ(0, tree.relocate(points[0], points[1])); +} + +TEST(PhTreeTest, TestUpdateWithRelocateIf) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + size_t i = 0; + size_t done = 0; + auto pred = [](const Id& id) { return id._i % 2 == 0; }; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if ((delta > 0.0 && tree.find(pNew) != tree.end()) || (i % 2 != 0)) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } else { + ASSERT_EQ(1, tree.relocate_if(pOld, pNew, pred)); + if (delta > 0.0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } + ASSERT_EQ(Id(i), *tree.find(pNew)); + p = pNew; + ++done; + } + ++i; + } + ASSERT_GT(done, i * 0.4); + ASSERT_LT(done, i * 0.6); + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + auto pred = [](const Id&) { return true; }; + ASSERT_EQ(0, tree.relocate_if(points[0], points[1], pred)); + // Check that small tree works + tree.emplace(points[0], 1); + ASSERT_EQ(1, tree.relocate_if(points[0], points[1], pred)); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(Id(1), *tree.find(points[1])); + ASSERT_EQ(1, tree.size()); } TEST(PhTreeTest, TestEraseByIterator) { @@ -551,10 +711,13 @@ TEST(PhTreeTest, TestEraseByIterator) { for (auto& p : points) { auto iter = tree.find(p); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); ASSERT_EQ(tree.end(), tree.find(p)); i++; + if (i % 100 == 0 || tree.size() < 10) { + PhTreeDebugHelper::CheckConsistency(tree); + } } ASSERT_EQ(0, tree.erase(tree.end())); @@ -570,8 +733,11 @@ TEST(PhTreeTest, TestEraseByIteratorQuery) { for (size_t i = 0; i < N; ++i) { auto iter = tree.begin(); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); + if (i % 100 == 0 || tree.size() < 10) { + PhTreeDebugHelper::CheckConsistency(tree); + } } ASSERT_EQ(0, tree.erase(tree.end())); @@ -717,6 +883,32 @@ TEST(PhTreeTest, TestWindowQuery1) { ASSERT_EQ(N, n); } +TEST(PhTreeTest, TestWindowQuery1_WithFilter) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + struct Counter { + void operator()(TestPoint, Id& t) { + ++n_; + id_ = t; + } + Id id_{}; + size_t n_ = 0; + }; + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Counter callback{}; + FilterAABB filter(p, p, tree.converter()); + tree.for_each(callback, filter); + ASSERT_EQ(i, callback.id_._i); + ASSERT_EQ(1, callback.n_); + } +} + TEST(PhTreeTest, TestWindowQueryMany) { const dimension_t dim = 3; TestPoint min{-100, -100, -100}; @@ -746,7 +938,7 @@ TEST(PhTreeTest, TestWindowQueryManyMoving) { int query_length = 200; size_t nn = 0; - for (int i = -120; i < 120; i++) { + for (std::int64_t i = -120; i < 120; i++) { TestPoint min{i * 10, i * 9, i * 11}; TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; std::set referenceResult; @@ -780,7 +972,7 @@ TEST(PhTreeTest, TestWindowForEachManyMoving) { int query_length = 200; size_t nn = 0; - for (int i = -120; i < 120; i++) { + for (std::int64_t i = -120; i < 120; i++) { TestPoint min{i * 10, i * 9, i * 11}; TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; std::set referenceResult; @@ -1030,3 +1222,146 @@ TEST(PhTreeTest, SmokeTestPoint1) { ASSERT_EQ(0, tree.size()); ASSERT_TRUE(tree.empty()); } + +template +void test_tree(TREE& tree) { + PhPoint<3> p{1, 2, 3}; + + // test various operations + tree.emplace(p, Id{2}); // already exists + Id id3{3}; + tree.insert(p, id3); // already exists + ASSERT_EQ(tree.size(), 1); + ASSERT_EQ(tree.find(p).second()._i, 1); + ASSERT_EQ(tree[p]._i, 1); + + auto q_window = tree.begin_query({p, p}); + ASSERT_EQ(1, q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(1, q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + ASSERT_EQ(1, q_knn->_i); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(1, tree.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeTest, TestMoveConstruct) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{std::move(tree1)}; + test_tree(tree); + tree.~PhTree(); +} + +TEST(PhTreeTest, TestMoveAssign) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{}; + tree = std::move(tree1); + test_tree(tree); + tree.~PhTree(); +} + +size_t count_pre{0}; +size_t count_post{0}; +size_t count_query{0}; + +template +struct DebugConverterNoOp : public ConverterPointBase { + using BASE = ConverterPointBase; + using Point = typename BASE::KeyExternal; + using PointInternal = typename BASE::KeyInternal; + + constexpr const PointInternal& pre(const Point& point) const { + ++count_pre; + ++const_cast(count_pre_local); + return point; + } + + constexpr const Point& post(const PointInternal& point) const { + ++count_post; + ++const_cast(count_post_local); + return point; + } + + constexpr const PhBox& pre_query(const PhBox& box) const { + ++count_query; + ++const_cast(count_query_local); + return box; + } + + size_t count_pre_local{0}; + size_t count_post_local{0}; + size_t count_query_local{0}; +}; + +TEST(PhTreeTest, TestMoveAssignCustomConverter) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + auto converter = DebugConverterNoOp<3>(); + auto tree1 = PhTree<3, Id, DebugConverterNoOp<3>>(converter); + tree1.emplace(p, Id{1}); + ASSERT_GE(tree1.converter().count_pre_local, 1); + ASSERT_EQ(tree1.converter().count_pre_local, count_pre); + + PhTree<3, Id, DebugConverterNoOp<3>> tree{}; + tree = std::move(tree1); + // Assert that converter got moved (or copied?): + ASSERT_GE(tree.converter().count_pre_local, 1); + ASSERT_EQ(tree.converter().count_pre_local, count_pre); + + test_tree(tree); + ASSERT_GE(tree.converter().count_pre_local, 2); + ASSERT_EQ(tree.converter().count_pre_local, count_pre); + tree.~PhTree(); +} + +TEST(PhTreeTest, TestMovableIterators) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.begin(), tree.end()); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.find(p), tree.end()); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterEvenId<3, Id> filter{}; + ASSERT_TRUE(std::is_move_constructible_v); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v()))>); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v()))>); +} + +} // namespace phtree_test diff --git a/phtree/phtree_test_const_values.cc b/test/phtree_test_const_values.cc similarity index 97% rename from phtree/phtree_test_const_values.cc rename to test/phtree_test_const_values.cc index 2fcb123e..64dd432d 100644 --- a/phtree/phtree_test_const_values.cc +++ b/test/phtree_test_const_values.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_test_const_values { + template using TestPoint = PhPoint; @@ -42,14 +44,12 @@ class IntRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i){}; + explicit Id(const size_t i) : _i{static_cast(i)} {} - bool operator==(Id& rhs) { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; }; @@ -68,7 +68,7 @@ template double distance(const TestPoint& p1, const TestPoint& p2) { double sum2 = 0; for (dimension_t i = 0; i < DIM; i++) { - double d = p1[i] - p2[i]; + double d = (double)p1[i] - (double)p2[i]; sum2 += d * d; } return sqrt(sum2); @@ -276,7 +276,7 @@ TEST(PhTreeTestConst, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -409,8 +409,8 @@ TEST(PhTreeTestConst, TestUpdateWithEmplace) { for (auto& p : points) { auto pOld = p; TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -590,7 +590,7 @@ TEST(PhTreeTestConst, TestWindowQueryManyMoving) { int query_length = 200; size_t nn = 0; - for (int i = -120; i < 120; i++) { + for (std::int64_t i = -120; i < 120; i++) { TestPoint min{i * 10, i * 9, i * 11}; TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; std::set referenceResult; @@ -699,3 +699,5 @@ TEST(PhTreeTestConst, TestKnnQuery) { ASSERT_EQ(Nq, n); } } + +} // namespace phtree_test_const_values diff --git a/test/phtree_test_issues.cc b/test/phtree_test_issues.cc new file mode 100644 index 00000000..de9c67f1 --- /dev/null +++ b/test/phtree_test_issues.cc @@ -0,0 +1,208 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/phtree.h" +#include "phtree/phtree_multimap.h" +#include +#include +#include +#include + +using namespace improbable::phtree; + +using namespace std; + +namespace phtree_test_issues { + +#if defined(__clang__) || defined(__GNUC__) + +void mem_usage(double& vm_usage, double& resident_set) { + vm_usage = 0.0; + resident_set = 0.0; + ifstream stat_stream("/proc/self/stat", ios_base::in); // get info from proc directory + // create some variables to get info + string pid, comm, state, ppid, pgrp, session, tty_nr; + string tpgid, flags, minflt, cminflt, majflt, cmajflt; + string utime, stime, cutime, cstime, priority, nice; + string O, itrealvalue, starttime; + unsigned long vsize; + long rss; + stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr >> tpgid >> flags >> + minflt >> cminflt >> majflt >> cmajflt >> utime >> stime >> cutime >> cstime >> priority >> + nice >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest + stat_stream.close(); + long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // for x86-64 is configured to use 2MB pages + vm_usage = vsize / 1024.0; + resident_set = rss * page_size_kb; +} + +int get_resident_mem_kb() { + double vm, rss; + mem_usage(vm, rss); + return rss; +} + +void print_mem() { + double vm, rss; + mem_usage(vm, rss); + cout << " Virtual Memory: " << vm << " KB" << std::endl + << " Resident set size: " << rss << " KB" << endl; +} + +#elif defined(_MSC_VER) +int get_resident_mem_kb() { + return 0; +} + +void print_mem() { + double vm = 0, rss = 0; + // mem_usage(vm, rss); + cout << " Virtual Memory: " << vm << " KB" << std::endl + << " Resident set size: " << rss << " KB" << endl; +} +#endif + +auto start_timer() { + return std::chrono::steady_clock::now(); +} + +template +void end_timer(T start, const char* prefix) { + auto end = std::chrono::steady_clock::now(); + std::chrono::duration elapsed_seconds1 = end - start; + std::cout << "elapsed time " << prefix << " = " << elapsed_seconds1.count() << " s" + << std::endl; +} + +// Disabled for cmake CI builds because it always fails +#if !defined(SKIP_TEST_MEMORY_LEAKS) +TEST(PhTreeTestIssues, TestIssue60) { + // auto tree = PhTreeMultiMapD<2, int>(); + auto tree = PhTreeMultiMapD<2, int, ConverterIEEE<2>, std::set>(); + std::vector> vecPos; + int dim = 1000; + int num = 1000; + + auto start1 = start_timer(); + for (int i = 0; i < num; ++i) { + PhPointD<2> p = {(double)(rand() % dim), (double)(rand() % dim)}; + vecPos.push_back(p); + tree.emplace(p, i); + } + end_timer(start1, "1"); + + // "warm up": relocate() will inevitably allocate a little bit of memory (new nodes etc). + // This warm up allocates this memory before we proceed to leak testing which ensures that the + // memory does not grow. + for (int j = 0; j < 100; ++j) { + for (int i = 0; i < num; ++i) { + PhPointD<2>& p = vecPos[i]; + PhPointD<2> newp = {(double)(rand() % dim), (double)(rand() % dim)}; + tree.relocate(p, newp, i); + p = newp; + } + } + + // Leak testing + print_mem(); + auto start2 = start_timer(); + auto mem_start_2 = get_resident_mem_kb(); + for (int j = 0; j < 100; ++j) { + for (int i = 0; i < num; ++i) { + PhPointD<2>& p = vecPos[i]; + PhPointD<2> newp = {(double)(rand() % dim), (double)(rand() % dim)}; + tree.relocate(p, newp, i); + p = newp; + } + } + end_timer(start2, "2"); + + auto mem_end_2 = get_resident_mem_kb(); + ASSERT_LT(abs(mem_end_2 - mem_start_2), 1); + print_mem(); +} +#endif + +// Disabled for cmake CI builds because it always fails +#if !defined(SKIP_TEST_MEMORY_LEAKS) +TEST(PhTreeTestIssues, TestIssue60_minimal) { + // auto tree = PhTreeMultiMapD<2, int>(); + auto tree = PhTreeMultiMapD<2, int, ConverterIEEE<2>, std::set>(); + std::vector> vecPos; + int dim = 1000; + int num = 1000; + + auto start1 = start_timer(); + for (int i = 0; i < num; ++i) { + PhPointD<2> p = {(double)(rand() % dim), (double)(rand() % dim)}; + vecPos.push_back(p); + tree.emplace(p, i); + } + end_timer(start1, "1"); + + // "warm up": relocate() will inevitably allocate a little bit of memory (new nodes etc). + // This warm up allocates this memory before we proceed to leak testing which ensures that the + // memory does not grow. + for (int j = 0; j < 100; ++j) { + for (int i = 0; i < num; ++i) { + PhPointD<2>& p = vecPos[i]; + PhPointD<2> newp = {(double)(rand() % dim), (double)(rand() % dim)}; + tree.relocate(p, newp, i); + p = newp; + } + } + + // Leak testing + print_mem(); + auto start2 = start_timer(); + auto mem_start_2 = get_resident_mem_kb(); + for (int j = 0; j < 100; ++j) { + for (int i = 0; i < num; ++i) { + PhPointD<2>& p = vecPos[i]; + PhPointD<2> newp = {p[0] + 1, p[1] + 1}; + tree.relocate(p, newp, i); + p = newp; + } + } + end_timer(start2, "2"); + + auto mem_end_2 = get_resident_mem_kb(); + ASSERT_LT(abs(mem_end_2 - mem_start_2), 1); + print_mem(); +} +#endif + +TEST(PhTreeTestIssues, TestIssue6_3_MAP) { + auto tree = PhTreeD<2, int>(); + std::vector> vecPos; + int dim = 10000; + + int num = 100000; + for (int i = 0; i < num; ++i) { + PhPointD<2> p = {(double)(rand() % dim), (double)(rand() % dim)}; + vecPos.push_back(p); + tree.emplace(p, i); + } + + print_mem(); + for (int i = 0; i < num; ++i) { + PhPointD<2> p = vecPos[i]; + PhPointD<2> newp = {(double)(rand() % dim), (double)(rand() % dim)}; + tree.relocate(p, newp); + } + print_mem(); +} + +} // namespace phtree_test_issues diff --git a/phtree/phtree_test_ptr_values.cc b/test/phtree_test_ptr_values.cc similarity index 97% rename from phtree/phtree_test_ptr_values.cc rename to test/phtree_test_ptr_values.cc index a120ad1b..6368b477 100644 --- a/phtree/phtree_test_ptr_values.cc +++ b/test/phtree_test_ptr_values.cc @@ -15,11 +15,13 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; +namespace phtree_test_ptr_values { + template using TestPoint = PhPoint; @@ -44,12 +46,10 @@ struct Id { explicit Id(const size_t i) : _i((int)i){}; - bool operator==(Id& rhs) const { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; }; @@ -68,7 +68,7 @@ template double distance(const TestPoint& p1, const TestPoint& p2) { double sum2 = 0; for (dimension_t i = 0; i < DIM; i++) { - double d = p1[i] - p2[i]; + double d = (double)p1[i] - (double)p2[i]; sum2 += d * d; } return sqrt(sum2); @@ -286,7 +286,7 @@ TEST(PhTreeTestPtr, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id* id2 = new Id(-i); + Id* id2 = new Id(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first->_i); ASSERT_EQ(tree.count(p), 1); @@ -296,11 +296,11 @@ TEST(PhTreeTestPtr, TestEmplace) { tree.emplace(p, id2).first->_i++; ASSERT_EQ(i + 1, tree.emplace(p, id).first->_i); tree.emplace(p, id2).first = id2; - ASSERT_EQ(-i, tree.emplace(p, id).first->_i); + ASSERT_EQ(i + N, tree.emplace(p, id).first->_i); // Replace it with previous value tree.emplace(p, id2).first = id; ASSERT_EQ(i + 1, tree.emplace(p, id).first->_i); - id->_i = i; + id->_i = (int)i; ASSERT_EQ(i, tree.emplace(p, id).first->_i); delete id2; } @@ -334,13 +334,13 @@ TEST(PhTreeTestPtr, TestSquareBrackets) { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); Id* id = new Id(i); - Id* id2 = new Id(-i); + Id* id2 = new Id(i + N); ASSERT_EQ(nullptr, tree[p]); tree[p] = id2; - ASSERT_EQ(-i, tree[p]->_i); + ASSERT_EQ(i + N, tree[p]->_i); ASSERT_EQ(tree.count(p), 1); if (i % 2 == 0) { - tree[p]->_i = i; + tree[p]->_i = (int)i; ASSERT_EQ(i, id2->_i); delete id; } else { @@ -666,9 +666,9 @@ TEST(PhTreeTestPtr, TestWindowQueryManyMoving) { int query_length = 200; size_t nn = 0; - for (int i = -120; i < 120; i++) { - TestPoint min{i * 10, i * 9, i * 11}; - TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + for (std::int64_t i = -120; i < 120; i++) { + TestPoint min{i * 10l, i * 9l, i * 11l}; + TestPoint max{i * 10l + query_length, i * 9l + query_length, i * 11l + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -782,3 +782,5 @@ TEST(PhTreeTestPtr, TestKnnQuery) { } depopulate(values); } + +} // namespace phtree_test_ptr_values diff --git a/test/phtree_test_unique_ptr_values.cc b/test/phtree_test_unique_ptr_values.cc new file mode 100644 index 00000000..1be2bc0a --- /dev/null +++ b/test/phtree_test_unique_ptr_values.cc @@ -0,0 +1,301 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include + +using namespace improbable::phtree; + +namespace phtree_test_unique_ptr_values { + +template +using TestPoint = PhPoint; + +template +using TestTree = PhTree; + +class IntRng { + public: + IntRng(int minIncl, int maxExcl) : eng(7), rnd{minIncl, maxExcl} {} + + int next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_int_distribution rnd; +}; + +struct IdObj { + IdObj() = default; + + explicit IdObj(const size_t i) : _i(static_cast(i)){}; + + bool operator==(const IdObj& rhs) const { + return _i == rhs._i; + } + + IdObj& operator=(IdObj const& rhs) = default; + + int _i; +}; + +using Id = std::unique_ptr; + +struct PointDistance { + PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + + double _distance; + size_t _id; +}; + +bool comparePointDistance(PointDistance& i1, PointDistance& i2) { + return (i1._distance < i2._distance); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + double d = p1[i] - p2[i]; + sum2 += d * d; + } + return sqrt(sum2); +} + +template +double distanceL1(const TestPoint& p1, const TestPoint& p2) { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(p1[i] - p2[i]); + } + return sum; +} + +template +void generateCube(std::vector>& points, size_t N) { + IntRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = TestPoint{rng.next(), rng.next(), rng.next()}; + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps(int N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, std::make_unique(i)).second); + } else { + Id id = std::make_unique(i); + ASSERT_TRUE(tree.emplace(p, std::move(id)).second); + } + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, (*tree.find(p))->_i); + ASSERT_EQ(i + 1u, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.emplace(p, std::make_unique(i)).second); + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, (*tree.find(p))->_i); + ASSERT_EQ(i + 1u, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)->_i); + q++; + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_EQ(tree.count(p), 1u); + ASSERT_EQ(i, (*tree.find(p))->_i); + ASSERT_EQ(1u, tree.erase(p)); + + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1u, tree.size()); + + // try remove again + ASSERT_EQ(0u, tree.erase(p)); + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1u, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeTestUniquePtr, SmokeTestBasicOps) { + SmokeTestBasicOps<3>(10000); + SmokeTestBasicOps<6>(10000); + SmokeTestBasicOps<10>(1000); + SmokeTestBasicOps<20>(100); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.emplace(points[i], std::make_unique(i)).second); + } + ASSERT_EQ(N, tree.size()); +} + +TEST(PhTreeTestUniquePtr, TestUpdateWithRelocate) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + int i = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if (delta > 0 && tree.find(pNew) != tree.end()) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } else { + ASSERT_EQ(1, tree.relocate(pOld, pNew)); + if (delta > 0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } + ASSERT_EQ(i, (*tree.find(pNew))->_i); + p = pNew; + } + ++i; + } + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + ASSERT_EQ(0, tree.relocate(points[0], points[1])); + // Check that small tree works + tree.emplace(points[0], std::make_unique(1)); + ASSERT_EQ(1u, tree.relocate(points[0], points[1])); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(1, (*tree.find(points[1]))->_i); + ASSERT_EQ(1u, tree.size()); + tree.clear(); + + // check that existing destination fails + tree.emplace(points[0], std::make_unique(1)); + tree.emplace(points[1], std::make_unique(2)); + ASSERT_EQ(0, tree.relocate(points[0], points[1])); +} + +TEST(PhTreeTestUniquePtr, TestUpdateWithRelocateIf) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + int i = 0; + size_t done = 0; + auto pred = [](const Id& id) { return id->_i % 2 == 0; }; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if ((delta > 0 && tree.find(pNew) != tree.end()) || (i % 2 != 0)) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } else { + ASSERT_EQ(1, tree.relocate_if(pOld, pNew, pred)); + if (delta > 0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } + ASSERT_EQ(i, (*tree.find(pNew))->_i); + p = pNew; + ++done; + } + ++i; + } + ASSERT_GT(done, i * 0.4); + ASSERT_LT(done, i * 0.6); + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + auto pred = [](const Id&) { return true; }; + ASSERT_EQ(0, tree.relocate_if(points[0], points[1], pred)); + // Check that small tree works + tree.emplace(points[0], std::make_unique(1)); + ASSERT_EQ(1, tree.relocate_if(points[0], points[1], pred)); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(1, (*tree.find(points[1]))->_i); + ASSERT_EQ(1u, tree.size()); +} + +} // namespace phtree_test_unique_ptr_values diff --git a/tools/bazel b/tools/bazel deleted file mode 100755 index 03324532..00000000 --- a/tools/bazel +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env bash - -TOOLS_DIR="$(dirname "$0")" - -source "${TOOLS_DIR}"/../ci/includes/os.sh -source "${TOOLS_DIR}"/../ci/includes/bazel.sh - -# All information required for the script to select or, if necessary, install bazel is contained -# in this code block. -# If a higher version of bazel is required, update `REQUIRED_BAZEL_VERSION` and the -# `REQUIRED_BAZEL_SHA256` values for each platform. -REQUIRED_BAZEL_VERSION="$(getBazelVersion)" -BAZEL_INSTALLATION_DIR="${HOME}/.bazel_installations/${REQUIRED_BAZEL_VERSION}" -if isLinux; then - DOWNLOAD_CMD="wget -q --no-clobber -O bazel" - BAZEL_EXE="bazel-${REQUIRED_BAZEL_VERSION}-linux-x86_64" - - if which clang-10 1>/dev/null; then - # We follow the symlink of clang-10 here to avoid a bug with the LLVM package when combined with -no-canonical-prefixes. - export CC="$(readlink -f "$(which clang-10)")" - else - echo -e "\033[0;33mWarning: You don't seem to have clang-9 correctly installed. Please check README.md to ensure your compiler is set up correctly. Continuing with whatever compiler bazel detects, your mileage might vary.\033[0m" - fi -elif isMacOS; then - DOWNLOAD_CMD="wget -q --no-clobber -O bazel" - BAZEL_EXE="bazel-${REQUIRED_BAZEL_VERSION}-darwin-x86_64" -else - DOWNLOAD_CMD="curl -L -s -o bazel.exe" - # Windows does not have an installer but retrieves the executable directly. - BAZEL_EXE="bazel-${REQUIRED_BAZEL_VERSION}-windows-x86_64.exe" - - export BAZEL_VC="C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC" - if [[ ! -d "$BAZEL_VC" ]]; then - export BAZEL_VC="C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\VC" - fi - if [[ ! -d "$BAZEL_VC" ]]; then - echo -e "\033[0;33mWarning: You don't seem to have Visual Studio 2019 installed correctly. Continuing with whatever compiler bazel detects, your mileage might vary.\033[0m" - fi -fi - -BAZEL_TARGET_PATH="${BAZEL_INSTALLATION_DIR}/bin/bazel" - -# Check if correct version is already installed. -if [[ -f "${BAZEL_TARGET_PATH}" ]]; then - if [[ ! -x "${BAZEL_TARGET_PATH}" ]]; then - echo "ERROR: Bazel executable at '${BAZEL_TARGET_PATH}' does not have execute permission" - stat "${BAZEL_TARGET_PATH}" - exit 1 - fi - BAZEL_SUBCOMMAND="$1" - shift - exec -a "$0" "${BAZEL_TARGET_PATH}" "$BAZEL_SUBCOMMAND" "$@" -fi - -cat << EOM -================================================= -Bazel version ${REQUIRED_BAZEL_VERSION} is not -installed under ~/.bazel_installations - -Installing bazel ${REQUIRED_BAZEL_VERSION} now... -================================================= -EOM - -# Create root directory if needed. -if [[ ! -d "${BAZEL_INSTALLATION_DIR}" ]]; then - echo "Installation directory created." - mkdir -p "${BAZEL_INSTALLATION_DIR}" -fi - -# Install correct bazel version. -# If we don't have a local Bazel install at this point we need to retrieve the right version from GitHub. -mkdir -p "${BAZEL_INSTALLATION_DIR}/bin/tmp" -pushd "${BAZEL_INSTALLATION_DIR}/bin/tmp" -rm bazel 2>/dev/null || true # Remove bazel binary if already present in tmp dir - indicates previous failed download. -echo "Starting download of bazel ${REQUIRED_BAZEL_VERSION}..." -${DOWNLOAD_CMD} "https://github.com/bazelbuild/bazel/releases/download/${REQUIRED_BAZEL_VERSION}/${BAZEL_EXE}" -echo "Download finished." -# Mark downloaded file executable and move out of tmp directory. -chmod a+x "bazel" -mv bazel .. -popd - -echo "Executing downloaded bazel..." -BAZEL_SUBCOMMAND="$1" -shift -exec -a "$0" "${BAZEL_TARGET_PATH}" "$BAZEL_SUBCOMMAND" "$@" diff --git a/tools/build_rules/http.bzl b/tools/build_rules/http.bzl deleted file mode 100644 index 26e5ba2f..00000000 --- a/tools/build_rules/http.bzl +++ /dev/null @@ -1,463 +0,0 @@ -# Copyright 2016 The Bazel Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Rules for downloading files and archives over HTTP. -### Setup -To use these rules, load them in your `WORKSPACE` file as follows: -```python -load( - "@bazel_tools//tools/build_defs/repo:http.bzl", - "http_archive", - "http_file", - "http_jar", -) -``` -These rules are improved versions of the native http rules and will eventually -replace the native rules. -""" - -load( - ":utils.bzl", - "patch", - "read_netrc", - "update_attrs", - "use_netrc", - "workspace_and_buildfile", -) - -# Shared between http_jar, http_file and http_archive. -_AUTH_PATTERN_DOC = """An optional dict mapping host names to custom authorization patterns. -If a URL's host name is present in this dict the value will be used as a pattern when -generating the authorization header for the http request. This enables the use of custom -authorization schemes used in a lot of common cloud storage providers. -The pattern currently supports 2 tokens: <login> and -<password>, which are replaced with their equivalent value -in the netrc file for the same host name. After formatting, the result is set -as the value for the Authorization field of the HTTP request. -Example attribute and netrc for a http download to an oauth2 enabled API using a bearer token: -
-auth_patterns = {
-    "storage.cloudprovider.com": "Bearer <password>"
-}
-
-netrc: -
-machine storage.cloudprovider.com
-        password RANDOM-TOKEN
-
-The final HTTP request would have the following header: -
-Authorization: Bearer RANDOM-TOKEN
-
-""" - -def _get_auth(ctx, urls): - """Given the list of URLs obtain the correct auth dict.""" - if ctx.attr.netrc: - netrc = read_netrc(ctx, ctx.attr.netrc) - return use_netrc(netrc, urls, ctx.attr.auth_patterns) - - if "HOME" in ctx.os.environ and not ctx.os.name.startswith("windows"): - netrcfile = "%s/.netrc" % (ctx.os.environ["HOME"]) - if ctx.execute(["test", "-f", netrcfile]).return_code == 0: - netrc = read_netrc(ctx, netrcfile) - return use_netrc(netrc, urls, ctx.attr.auth_patterns) - - if "USERPROFILE" in ctx.os.environ and ctx.os.name.startswith("windows"): - netrcfile = "%s/.netrc" % (ctx.os.environ["USERPROFILE"]) - if ctx.path(netrcfile).exists: - netrc = read_netrc(ctx, netrcfile) - return use_netrc(netrc, urls, ctx.attr.auth_patterns) - - return {} - -def _http_archive_impl(ctx): - """Implementation of the http_archive rule.""" - if not ctx.attr.url and not ctx.attr.urls: - fail("At least one of url and urls must be provided") - if ctx.attr.build_file and ctx.attr.build_file_content: - fail("Only one of build_file and build_file_content can be provided.") - - all_urls = [] - if ctx.attr.urls: - all_urls = ctx.attr.urls - if ctx.attr.url: - all_urls = [ctx.attr.url] + all_urls - - auth = _get_auth(ctx, all_urls) - - download_info = ctx.download_and_extract( - all_urls, - "", - ctx.attr.sha256, - ctx.attr.type, - ctx.attr.strip_prefix, - canonical_id = ctx.attr.canonical_id, - auth = auth, - ) - workspace_and_buildfile(ctx) - patch(ctx) - - return update_attrs(ctx.attr, _http_archive_attrs.keys(), {"sha256": download_info.sha256}) - -_HTTP_FILE_BUILD = """ -package(default_visibility = ["//visibility:public"]) -filegroup( - name = "file", - srcs = ["{}"], -) -""" - -def _http_file_impl(ctx): - """Implementation of the http_file rule.""" - repo_root = ctx.path(".") - forbidden_files = [ - repo_root, - ctx.path("WORKSPACE"), - ctx.path("BUILD"), - ctx.path("BUILD.bazel"), - ctx.path("file/BUILD"), - ctx.path("file/BUILD.bazel"), - ] - downloaded_file_path = ctx.attr.downloaded_file_path - download_path = ctx.path("file/" + downloaded_file_path) - if download_path in forbidden_files or not str(download_path).startswith(str(repo_root)): - fail("'%s' cannot be used as downloaded_file_path in http_file" % ctx.attr.downloaded_file_path) - auth = _get_auth(ctx, ctx.attr.urls) - download_info = ctx.download( - ctx.attr.urls, - "file/" + downloaded_file_path, - ctx.attr.sha256, - ctx.attr.executable, - canonical_id = ctx.attr.canonical_id, - auth = auth, - ) - ctx.file("WORKSPACE", "workspace(name = \"{name}\")".format(name = ctx.name)) - ctx.file("file/BUILD", _HTTP_FILE_BUILD.format(downloaded_file_path)) - - return update_attrs(ctx.attr, _http_file_attrs.keys(), {"sha256": download_info.sha256}) - -_HTTP_JAR_BUILD = """ -load("@rules_java//java:defs.bzl", "java_import") -package(default_visibility = ["//visibility:public"]) -java_import( - name = 'jar', - jars = ['downloaded.jar'], - visibility = ['//visibility:public'], -) -filegroup( - name = 'file', - srcs = ['downloaded.jar'], - visibility = ['//visibility:public'], -) -""" - -def _http_jar_impl(ctx): - """Implementation of the http_jar rule.""" - all_urls = [] - if ctx.attr.urls: - all_urls = ctx.attr.urls - if ctx.attr.url: - all_urls = [ctx.attr.url] + all_urls - auth = _get_auth(ctx, all_urls) - download_info = ctx.download( - all_urls, - "jar/downloaded.jar", - ctx.attr.sha256, - canonical_id = ctx.attr.canonical_id, - auth = auth, - ) - ctx.file("WORKSPACE", "workspace(name = \"{name}\")".format(name = ctx.name)) - ctx.file("jar/BUILD", _HTTP_JAR_BUILD) - return update_attrs(ctx.attr, _http_jar_attrs.keys(), {"sha256": download_info.sha256}) - -_http_archive_attrs = { - "url": attr.string( - doc = - """A URL to a file that will be made available to Bazel. -This must be a file, http or https URL. Redirections are followed. -Authentication is not supported. -This parameter is to simplify the transition from the native http_archive -rule. More flexibility can be achieved by the urls parameter that allows -to specify alternative URLs to fetch from. -""", - ), - "urls": attr.string_list( - doc = - """A list of URLs to a file that will be made available to Bazel. -Each entry must be a file, http or https URL. Redirections are followed. -Authentication is not supported.""", - ), - "sha256": attr.string( - doc = """The expected SHA-256 of the file downloaded. -This must match the SHA-256 of the file downloaded. _It is a security risk -to omit the SHA-256 as remote files can change._ At best omitting this -field will make your build non-hermetic. It is optional to make development -easier but should be set before shipping.""", - ), - "netrc": attr.string( - doc = "Location of the .netrc file to use for authentication", - ), - "auth_patterns": attr.string_dict( - doc = _AUTH_PATTERN_DOC, - ), - "canonical_id": attr.string( - doc = """A canonical id of the archive downloaded. -If specified and non-empty, bazel will not take the archive from cache, -unless it was added to the cache by a request with the same canonical id. -""", - ), - "strip_prefix": attr.string( - doc = """A directory prefix to strip from the extracted files. -Many archives contain a top-level directory that contains all of the useful -files in archive. Instead of needing to specify this prefix over and over -in the `build_file`, this field can be used to strip it from all of the -extracted files. -For example, suppose you are using `foo-lib-latest.zip`, which contains the -directory `foo-lib-1.2.3/` under which there is a `WORKSPACE` file and are -`src/`, `lib/`, and `test/` directories that contain the actual code you -wish to build. Specify `strip_prefix = "foo-lib-1.2.3"` to use the -`foo-lib-1.2.3` directory as your top-level directory. -Note that if there are files outside of this directory, they will be -discarded and inaccessible (e.g., a top-level license file). This includes -files/directories that start with the prefix but are not in the directory -(e.g., `foo-lib-1.2.3.release-notes`). If the specified prefix does not -match a directory in the archive, Bazel will return an error.""", - ), - "type": attr.string( - doc = """The archive type of the downloaded file. -By default, the archive type is determined from the file extension of the -URL. If the file has no extension, you can explicitly specify one of the -following: `"zip"`, `"jar"`, `"war"`, `"tar"`, `"tar.gz"`, `"tgz"`, -`"tar.xz"`, or `tar.bz2`.""", - ), - "patches": attr.label_list( - default = [], - doc = - "A list of files that are to be applied as patches after " + - "extracting the archive. By default, it uses the Bazel-native patch implementation " + - "which doesn't support fuzz match and binary patch, but Bazel will fall back to use " + - "patch command line tool if `patch_tool` attribute is specified or there are " + - "arguments other than `-p` in `patch_args` attribute.", - ), - "patch_tool": attr.string( - default = "", - doc = "The patch(1) utility to use. If this is specified, Bazel will use the specifed " + - "patch tool instead of the Bazel-native patch implementation.", - ), - "patch_args": attr.string_list( - default = ["-p0"], - doc = - "The arguments given to the patch tool. Defaults to -p0, " + - "however -p1 will usually be needed for patches generated by " + - "git. If multiple -p arguments are specified, the last one will take effect." + - "If arguments other than -p are specified, Bazel will fall back to use patch " + - "command line tool instead of the Bazel-native patch implementation. When falling " + - "back to patch command line tool and patch_tool attribute is not specified, " + - "`patch` will be used.", - ), - "patch_cmds": attr.string_list( - default = [], - doc = "Sequence of Bash commands to be applied on Linux/Macos after patches are applied.", - ), - "patch_cmds_win": attr.string_list( - default = [], - doc = "Sequence of Powershell commands to be applied on Windows after patches are " + - "applied. If this attribute is not set, patch_cmds will be executed on Windows, " + - "which requires Bash binary to exist.", - ), - "build_file": attr.label( - allow_single_file = True, - doc = - "The file to use as the BUILD file for this repository." + - "This attribute is an absolute label (use '@//' for the main " + - "repo). The file does not need to be named BUILD, but can " + - "be (something like BUILD.new-repo-name may work well for " + - "distinguishing it from the repository's actual BUILD files. " + - "Either build_file or build_file_content can be specified, but " + - "not both.", - ), - "build_file_content": attr.string( - doc = - "The content for the BUILD file for this repository. " + - "Either build_file or build_file_content can be specified, but " + - "not both.", - ), - "workspace_file": attr.label( - doc = - "The file to use as the `WORKSPACE` file for this repository. " + - "Either `workspace_file` or `workspace_file_content` can be " + - "specified, or neither, but not both.", - ), - "workspace_file_content": attr.string( - doc = - "The content for the WORKSPACE file for this repository. " + - "Either `workspace_file` or `workspace_file_content` can be " + - "specified, or neither, but not both.", - ), -} - -http_archive = repository_rule( - implementation = _http_archive_impl, - attrs = _http_archive_attrs, - doc = - """Downloads a Bazel repository as a compressed archive file, decompresses it, -and makes its targets available for binding. -It supports the following file extensions: `"zip"`, `"jar"`, `"war"`, `"tar"`, -`"tar.gz"`, `"tgz"`, `"tar.xz"`, and `tar.bz2`. -Examples: - Suppose the current repository contains the source code for a chat program, - rooted at the directory `~/chat-app`. It needs to depend on an SSL library - which is available from http://example.com/openssl.zip. This `.zip` file - contains the following directory structure: - ``` - WORKSPACE - src/ - openssl.cc - openssl.h - ``` - In the local repository, the user creates a `openssl.BUILD` file which - contains the following target definition: - ```python - cc_library( - name = "openssl-lib", - srcs = ["src/openssl.cc"], - hdrs = ["src/openssl.h"], - ) - ``` - Targets in the `~/chat-app` repository can depend on this target if the - following lines are added to `~/chat-app/WORKSPACE`: - ```python - load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") - http_archive( - name = "my_ssl", - urls = ["http://example.com/openssl.zip"], - sha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", - build_file = "@//:openssl.BUILD", - ) - ``` - Then targets would specify `@my_ssl//:openssl-lib` as a dependency. -""", -) - -_http_file_attrs = { - "executable": attr.bool( - doc = "If the downloaded file should be made executable.", - ), - "downloaded_file_path": attr.string( - default = "downloaded", - doc = "Path assigned to the file downloaded", - ), - "sha256": attr.string( - doc = """The expected SHA-256 of the file downloaded. -This must match the SHA-256 of the file downloaded. _It is a security risk -to omit the SHA-256 as remote files can change._ At best omitting this -field will make your build non-hermetic. It is optional to make development -easier but should be set before shipping.""", - ), - "canonical_id": attr.string( - doc = """A canonical id of the archive downloaded. -If specified and non-empty, bazel will not take the archive from cache, -unless it was added to the cache by a request with the same canonical id. -""", - ), - "urls": attr.string_list( - mandatory = True, - doc = """A list of URLs to a file that will be made available to Bazel. -Each entry must be a file, http or https URL. Redirections are followed. -Authentication is not supported.""", - ), - "netrc": attr.string( - doc = "Location of the .netrc file to use for authentication", - ), - "auth_patterns": attr.string_dict( - doc = _AUTH_PATTERN_DOC, - ), -} - -http_file = repository_rule( - implementation = _http_file_impl, - attrs = _http_file_attrs, - doc = - """Downloads a file from a URL and makes it available to be used as a file -group. -Examples: - Suppose you need to have a debian package for your custom rules. This package - is available from http://example.com/package.deb. Then you can add to your - WORKSPACE file: - ```python - load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_file") - http_file( - name = "my_deb", - urls = ["http://example.com/package.deb"], - sha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", - ) - ``` - Targets would specify `@my_deb//file` as a dependency to depend on this file. -""", -) - -_http_jar_attrs = { - "sha256": attr.string( - doc = "The expected SHA-256 of the file downloaded.", - ), - "canonical_id": attr.string( - doc = """A canonical id of the archive downloaded. -If specified and non-empty, bazel will not take the archive from cache, -unless it was added to the cache by a request with the same canonical id. -""", - ), - "url": attr.string( - doc = - "The URL to fetch the jar from. It must end in `.jar`.", - ), - "urls": attr.string_list( - doc = - "A list of URLS the jar can be fetched from. They have to end " + - "in `.jar`.", - ), - "netrc": attr.string( - doc = "Location of the .netrc file to use for authentication", - ), - "auth_patterns": attr.string_dict( - doc = _AUTH_PATTERN_DOC, - ), -} - -http_jar = repository_rule( - implementation = _http_jar_impl, - attrs = _http_jar_attrs, - doc = - """Downloads a jar from a URL and makes it available as java_import -Downloaded files must have a .jar extension. -Examples: - Suppose the current repository contains the source code for a chat program, rooted at the - directory `~/chat-app`. It needs to depend on an SSL library which is available from - `http://example.com/openssl-0.2.jar`. - Targets in the `~/chat-app` repository can depend on this target if the following lines are - added to `~/chat-app/WORKSPACE`: - ```python - load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_jar") - http_jar( - name = "my_ssl", - url = "http://example.com/openssl-0.2.jar", - sha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", - ) - ``` - Targets would specify @my_ssl//jar as a dependency to depend on this jar. - You may also reference files on the current system (localhost) by using "file:///path/to/file" - if you are on Unix-based systems. If you're on Windows, use "file:///c:/path/to/file". In both - examples, note the three slashes (`/`) -- the first two slashes belong to `file://` and the third - one belongs to the absolute path to the file. -""", -) diff --git a/tools/build_rules/utils.bzl b/tools/build_rules/utils.bzl deleted file mode 100644 index b2a70051..00000000 --- a/tools/build_rules/utils.bzl +++ /dev/null @@ -1,322 +0,0 @@ -# Copyright 2018 The Bazel Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Utils for manipulating external repositories, once fetched. -### Setup -These utilities are intended to be used by other repository rules. They -can be loaded as follows. -```python -load( - "@bazel_tools//tools/build_defs/repo:utils.bzl", - "workspace_and_buildfile", - "patch", - "update_attrs", -) -``` -""" - -def workspace_and_buildfile(ctx): - """Utility function for writing WORKSPACE and, if requested, a BUILD file. - This rule is intended to be used in the implementation function of a - repository rule. - It assumes the parameters `name`, `build_file`, `build_file_content`, - `workspace_file`, and `workspace_file_content` to be - present in `ctx.attr`; the latter four possibly with value None. - Args: - ctx: The repository context of the repository rule calling this utility - function. - """ - if ctx.attr.build_file and ctx.attr.build_file_content: - ctx.fail("Only one of build_file and build_file_content can be provided.") - - if ctx.attr.workspace_file and ctx.attr.workspace_file_content: - ctx.fail("Only one of workspace_file and workspace_file_content can be provided.") - - if ctx.attr.workspace_file: - ctx.file("WORKSPACE", ctx.read(ctx.attr.workspace_file)) - elif ctx.attr.workspace_file_content: - ctx.file("WORKSPACE", ctx.attr.workspace_file_content) - else: - ctx.file("WORKSPACE", "workspace(name = \"{name}\")\n".format(name = ctx.name)) - - if ctx.attr.build_file: - ctx.file("BUILD.bazel", ctx.read(ctx.attr.build_file)) - elif ctx.attr.build_file_content: - ctx.file("BUILD.bazel", ctx.attr.build_file_content) - -def _is_windows(ctx): - return ctx.os.name.lower().find("windows") != -1 - -def _use_native_patch(patch_args): - """If patch_args only contains -p options, we can use the native patch implementation.""" - for arg in patch_args: - if not arg.startswith("-p"): - return False - return True - -def patch(ctx, patches = None, patch_cmds = None, patch_cmds_win = None, patch_tool = None, patch_args = None): - """Implementation of patching an already extracted repository. - This rule is intended to be used in the implementation function of - a repository rule. If the parameters `patches`, `patch_tool`, - `patch_args`, `patch_cmds` and `patch_cmds_win` are not specified - then they are taken from `ctx.attr`. - Args: - ctx: The repository context of the repository rule calling this utility - function. - patches: The patch files to apply. List of strings, Labels, or paths. - patch_cmds: Bash commands to run for patching, passed one at a - time to bash -c. List of strings - patch_cmds_win: Powershell commands to run for patching, passed - one at a time to powershell /c. List of strings. If the - boolean value of this parameter is false, patch_cmds will be - used and this parameter will be ignored. - patch_tool: Path of the patch tool to execute for applying - patches. String. - patch_args: Arguments to pass to the patch tool. List of strings. - """ - bash_exe = ctx.os.environ["BAZEL_SH"] if "BAZEL_SH" in ctx.os.environ else "bash" - powershell_exe = ctx.os.environ["BAZEL_POWERSHELL"] if "BAZEL_POWERSHELL" in ctx.os.environ else "powershell.exe" - - if patches == None and hasattr(ctx.attr, "patches"): - patches = ctx.attr.patches - if patches == None: - patches = [] - - if patch_cmds == None and hasattr(ctx.attr, "patch_cmds"): - patch_cmds = ctx.attr.patch_cmds - if patch_cmds == None: - patch_cmds = [] - - if patch_cmds_win == None and hasattr(ctx.attr, "patch_cmds_win"): - patch_cmds_win = ctx.attr.patch_cmds_win - if patch_cmds_win == None: - patch_cmds_win = [] - - if patch_tool == None and hasattr(ctx.attr, "patch_tool"): - patch_tool = ctx.attr.patch_tool - if not patch_tool: - patch_tool = "patch" - native_patch = True - else: - native_patch = False - - if patch_args == None and hasattr(ctx.attr, "patch_args"): - patch_args = ctx.attr.patch_args - if patch_args == None: - patch_args = [] - - if len(patches) > 0 or len(patch_cmds) > 0: - ctx.report_progress("Patching repository") - - if native_patch and _use_native_patch(patch_args): - if patch_args: - strip = int(patch_args[-1][2:]) - else: - strip = 0 - for patchfile in patches: - ctx.patch(patchfile, strip) - else: - for patchfile in patches: - command = "{patchtool} {patch_args} < {patchfile}".format( - patchtool = patch_tool, - patchfile = ctx.path(patchfile), - patch_args = " ".join([ - "'%s'" % arg - for arg in patch_args - ]), - ) - st = ctx.execute([bash_exe, "-c", command]) - if st.return_code: - fail("Error applying patch %s:\n%s%s" % - (str(patchfile), st.stderr, st.stdout)) - - if _is_windows(ctx) and patch_cmds_win: - for cmd in patch_cmds_win: - st = ctx.execute([powershell_exe, "/c", cmd]) - if st.return_code: - fail("Error applying patch command %s:\n%s%s" % - (cmd, st.stdout, st.stderr)) - else: - for cmd in patch_cmds: - st = ctx.execute([bash_exe, "-c", cmd]) - if st.return_code: - fail("Error applying patch command %s:\n%s%s" % - (cmd, st.stdout, st.stderr)) - -def update_attrs(orig, keys, override): - """Utility function for altering and adding the specified attributes to a particular repository rule invocation. - This is used to make a rule reproducible. - Args: - orig: dict of actually set attributes (either explicitly or implicitly) - by a particular rule invocation - keys: complete set of attributes defined on this rule - override: dict of attributes to override or add to orig - Returns: - dict of attributes with the keys from override inserted/updated - """ - result = {} - for key in keys: - if getattr(orig, key) != None: - result[key] = getattr(orig, key) - result["name"] = orig.name - result.update(override) - return result - -def maybe(repo_rule, name, **kwargs): - """Utility function for only adding a repository if it's not already present. - This is to implement safe repositories.bzl macro documented in - https://docs.bazel.build/versions/master/skylark/deploying.html#dependencies. - Args: - repo_rule: repository rule function. - name: name of the repository to create. - **kwargs: remaining arguments that are passed to the repo_rule function. - Returns: - Nothing, defines the repository when needed as a side-effect. - """ - if not native.existing_rule(name): - repo_rule(name = name, **kwargs) - -def read_netrc(ctx, filename): - """Utility function to parse at least a basic .netrc file. - Args: - ctx: The repository context of the repository rule calling this utility - function. - filename: the name of the .netrc file to read - Returns: - dict mapping a machine names to a dict with the information provided - about them - """ - contents = ctx.read(filename) - - # Parse the file. This is mainly a token-based update of a simple state - # machine, but we need to keep the line structure to correctly determine - # the end of a `macdef` command. - netrc = {} - currentmachinename = None - currentmachine = {} - macdef = None - currentmacro = "" - cmd = None - for line in contents.splitlines(): - if line.startswith("#"): - # Comments start with #. Ignore these lines. - continue - elif macdef: - # as we're in a macro, just determine if we reached the end. - if line: - currentmacro += line + "\n" - else: - # reached end of macro, add it - currentmachine[macdef] = currentmacro - macdef = None - currentmacro = "" - else: - # Essentially line.split(None) which starlark does not support. - tokens = [ - w.strip() - for w in line.split(" ") - if len(w.strip()) > 0 - ] - for token in tokens: - if cmd: - # we have a command that expects another argument - if cmd == "machine": - # a new machine definition was provided, so save the - # old one, if present - if not currentmachinename == None: - netrc[currentmachinename] = currentmachine - currentmachine = {} - currentmachinename = token - elif cmd == "macdef": - macdef = "macdef %s" % (token,) - # a new macro definition; the documentation says - # "its contents begin with the next .netrc line [...]", - # so should there really be tokens left in the current - # line, they're not part of the macro. - - else: - currentmachine[cmd] = token - cmd = None - elif token in [ - "machine", - "login", - "password", - "account", - "macdef", - ]: - # command takes one argument - cmd = token - elif token == "default": - # defines the default machine; again, store old machine - if not currentmachinename == None: - netrc[currentmachinename] = currentmachine - - # We use the empty string for the default machine, as that - # can never be a valid hostname ("default" could be, in the - # default search domain). - currentmachinename = "" - currentmachine = {} - else: - fail("Unexpected token '%s' while reading %s" % - (token, filename)) - if not currentmachinename == None: - netrc[currentmachinename] = currentmachine - return netrc - -def use_netrc(netrc, urls, patterns): - """Compute an auth dict from a parsed netrc file and a list of URLs. - Args: - netrc: a netrc file already parsed to a dict, e.g., as obtained from - read_netrc - urls: a list of URLs. - patterns: optional dict of url to authorization patterns - Returns: - dict suitable as auth argument for ctx.download; more precisely, the dict - will map all URLs where the netrc file provides login and password to a - dict containing the corresponding login, password and optional authorization pattern, - as well as the mapping of "type" to "basic" or "pattern". - """ - auth = {} - for url in urls: - schemerest = url.split("://", 1) - if len(schemerest) < 2: - continue - if not (schemerest[0] in ["http", "https"]): - # For other protocols, bazel currently does not support - # authentication. So ignore them. - continue - host = schemerest[1].split("/")[0].split(":")[0] - if not host in netrc: - continue - authforhost = netrc[host] - if host in patterns: - auth_dict = { - "type": "pattern", - "pattern": patterns[host], - } - - if "login" in authforhost: - auth_dict["login"] = authforhost["login"] - - if "password" in authforhost: - auth_dict["password"] = authforhost["password"] - - auth[url] = auth_dict - elif "login" in authforhost and "password" in authforhost: - auth[url] = { - "type": "basic", - "login": authforhost["login"], - "password": authforhost["password"], - } - - return auth diff --git a/tools/runners/sanitizers/msan/BUILD b/tools/runners/sanitizers/msan/BUILD new file mode 100644 index 00000000..bc7d5f6f --- /dev/null +++ b/tools/runners/sanitizers/msan/BUILD @@ -0,0 +1,9 @@ +package(default_visibility = ["//visibility:private"]) + +sh_binary( + name = "msan", + srcs = ["msan.sh"], + data = [ + "msan-suppressions.txt", + ], +) diff --git a/tools/build_rules/BUILD b/tools/runners/sanitizers/msan/msan-suppressions.txt similarity index 100% rename from tools/build_rules/BUILD rename to tools/runners/sanitizers/msan/msan-suppressions.txt diff --git a/tools/runners/sanitizers/msan/msan.sh b/tools/runners/sanitizers/msan/msan.sh new file mode 100755 index 00000000..c796ac7a --- /dev/null +++ b/tools/runners/sanitizers/msan/msan.sh @@ -0,0 +1 @@ +MSAN_OPTIONS=suppressions="tools/runners/sanitizers/msan/msan-suppressions.txt ${MSAN_OPTIONS}" "${@}"