diff --git a/.bazelignore b/.bazelignore new file mode 100644 index 00000000..87508c8e --- /dev/null +++ b/.bazelignore @@ -0,0 +1,4 @@ +#ignore typical cmake build folders +build +out +cmake-build-debug diff --git a/.bazelrc b/.bazelrc index adcf592b..e0af0f7d 100644 --- a/.bazelrc +++ b/.bazelrc @@ -101,9 +101,9 @@ build:ubsan --linkopt="-lubsan" test:ubsan --run_under=//tools/runners/sanitizers/ubsan # MSAN is disabled for now, as there are false positives and we can't suppress them easily. -#build:msan --config=base-sanitizer -#build:msan --copt="-fsanitize=memory" -#build:msan --linkopt="-fsanitize=memory" -#test:msan --run_under=//tools/runners/sanitizers/msan +build:msan --config=base-sanitizer +build:msan --copt="-fsanitize=memory" +build:msan --linkopt="-fsanitize=memory" +test:msan --run_under=//tools/runners/sanitizers/msan build:lint --define linting_only=true diff --git a/.github/workflows/bazel.yml b/.github/workflows/bazel.yml index 030eaaea..24007c44 100644 --- a/.github/workflows/bazel.yml +++ b/.github/workflows/bazel.yml @@ -1,6 +1,6 @@ name: Bazel build -on: [push, pull_request] +on: [push] jobs: build: @@ -16,11 +16,13 @@ jobs: uses: actions/checkout@v2 - name: Setup bazel - # install bazelisk to install the appropriate bazel version - run: | - export PATH=$PATH:$HOME/bin && mkdir -p $HOME/bin - wget https://github.com/bazelbuild/bazelisk/releases/download/v1.5.0/bazelisk-linux-amd64 && chmod +x bazelisk-linux-amd64 && mv bazelisk-linux-amd64 $HOME/bin/bazel - wget https://github.com/bazelbuild/buildtools/releases/download/0.22.0/buildifier && chmod +x buildifier && mv buildifier $HOME/bin/ + uses: bazelbuild/setup-bazelisk@v2 + + - name: Mount bazel cache # Optional + uses: actions/cache@v3 + with: + path: "~/.cache/bazel" + key: bazel - name: Build shell: bash diff --git a/.github/workflows/cmake-windows.yml b/.github/workflows/cmake-windows.yml new file mode 100644 index 00000000..9eb0d3d3 --- /dev/null +++ b/.github/workflows/cmake-windows.yml @@ -0,0 +1,33 @@ +name: CMake Windows build + +on: [push] + +env: + BUILD_TYPE: Release + +jobs: + build: + runs-on: windows-latest + + steps: + - uses: actions/checkout@v2 + + - uses: ilammy/msvc-dev-cmd@v1 + + - name: Create Build Environment + run: cmake -E make_directory ${{github.workspace}}\out + + - name: Configure CMake + working-directory: ${{github.workspace}}\out + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -S ${{github.workspace}} -B ${{github.workspace}}\out -DPHTREE_BUILD_EXAMPLES=ON -DPHTREE_BUILD_TESTS=ON + + - name: Build + working-directory: ${{github.workspace}}\out + # Execute the build. You can specify a specific target with "--target " + run: cmake --build . --config ${env:BUILD_TYPE} + + - name: Test + working-directory: ${{github.workspace}}\out + # Execute tests defined by the CMake configuration. + # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail + run: ctest -C ${env:BUILD_TYPE} diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 22599941..abdea7aa 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -1,6 +1,6 @@ name: CMake build -on: [push, pull_request] +on: [push] env: BUILD_TYPE: Release @@ -23,7 +23,7 @@ jobs: # Note the current convention is to use the -S and -B options here to specify source # and build directories, but this is only available with CMake 3.13 and higher. # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 - run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DPHTREE_BUILD_ALL=ON - name: Build working-directory: ${{github.workspace}}/build @@ -38,3 +38,8 @@ jobs: # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail # TODO Currently tests are run via bazel only. run: ctest -C $BUILD_TYPE + + - name: Example + working-directory: ${{github.workspace}}/build + shell: bash + run: examples/Example diff --git a/.gitignore b/.gitignore index 55098c94..75f038ae 100644 --- a/.gitignore +++ b/.gitignore @@ -4,9 +4,13 @@ !.clang-format !.gitignore !.github +!*.yml bazel-* !bazel-*.sh compile_commands.json perf.data* build +out +CMakeSettings.json +/cmake-build-debug/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fd2a904..b74eb8ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,86 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] -Nothing yet. +### Added +- Nothing. + +### Changed +- Nothing. + +### Removed +- Nothing. + +### Fixed +- Nothing. + +## [1.3.0] - 2022-08-28 +### Added +- Added flag to relocate() allow short cutting in case of identical keys. + [#68](https://github.com/tzaeschke/phtree-cpp/issues/68) +- Added tested support for move-only and copy-only value objects. + [#56](https://github.com/tzaeschke/phtree-cpp/issues/56) +- Added custom bucket implementation (similar to std::unordered_set). This improves update performance by 5%-20%. + [#44](https://github.com/tzaeschke/phtree-cpp/issues/44) +- Added `PhTree.relocate(old_key, new_key)` and `PhTree.relocate_if(old_key, new_key, predicate)`. + This is **a lot faster** than using other methods. + [#43](https://github.com/tzaeschke/phtree-cpp/issues/43) +- Added try_emplace(key, value) and try_emplace(iter_hint, key, value) + [#40](https://github.com/tzaeschke/phtree-cpp/issues/40) +- Added FilterBoxAABB and FilterSphereAABB as examples for filtering a PH-Tree with box keys + [#33](https://github.com/tzaeschke/phtree-cpp/issues/33) +### Changed +- Moved tests and benchmarks into separate folders. [#67](https://github.com/tzaeschke/phtree-cpp/pull/67) +- Cleaned up unit tests. [#54](https://github.com/tzaeschke/phtree-cpp/pull/54) +- Simplified internals of `erase()`. [#47](https://github.com/tzaeschke/phtree-cpp/pull/47) +- Removed internal use of `std::optional()` to slightly reduce memory overhead + [#38](https://github.com/tzaeschke/phtree-cpp/issues/38) +- Removed restrictions on bazel version [#35](https://github.com/tzaeschke/phtree-cpp/issues/35) +- **API BREAKING CHANGE**: API of filters have been changed to be more correct, explicit and flexible. + [#21](https://github.com/tzaeschke/phtree-cpp/issues/21) + - Correctness: Converters and distance functions are not copied unnecessarily anymore. + - Explicit: + Filters *must* have a mandatory parameter for a converter reference. This ensures that the correct + converter is used, probably `tree.converter()`. + - Flexible: + Distance functions can be provided through a universal reference (forwarding reference). + Also, filters are now movable and copyable. + +- **API BREAKING CHANGE**: Allow filtering on buckets in multimaps. Multimap filters have different functions + and function signatures than normal `PhTree` filters. [#26](https://github.com/tzaeschke/phtree-cpp/issues/26) + +### Fixed +- Fixed compiler warnings when compiling with Visual Studio 2019. + [#74](https://github.com/tzaeschke/phtree-cpp/issues/74) +- Fixed cmake to work with Visual Studio 2019. Added tests and benchmarks to cmake. + (benchmarks still do not work with VS at the moment). + [#62](https://github.com/tzaeschke/phtree-cpp/issues/62) +- Fixed compilation problems and a memory leak when compiling with Visual Studio 2019. + (also added `msan` support). [#64](https://github.com/tzaeschke/phtree-cpp/pull/64) + +## [1.2.0] - 2022-04-14 +### Changed +- Bugfix: FilterSphere was not working correctly. [#27](https://github.com/tzaeschke/phtree-cpp/issues/27) +- Potentially **BREAKING CHANGE**: Refactored API of all methods that accept callbacks and filters to + accept universal/forwarding references. + Also changed filters and callback to not require `const` methods. + [#22](https://github.com/tzaeschke/phtree-cpp/issues/22) +- Clean up iterator implementations. [#19](https://github.com/tzaeschke/phtree-cpp/issues/19) +- Make PhTree and PhTreeMultimap movable (move-assign/copy). [#18](https://github.com/tzaeschke/phtree-cpp/issues/18) +- Potentially **BREAKING CHANGE** when using `IsNodeValid()` in provided filters: + Changed `bit_width_t` from `uin16_t` to `uint32_t`. This improves performance of 3D insert/emplace + on small datasets by up to 15%. To avoid warnings that meant that the API of `FilterAABB` and `FilterSphere` + had to be changed to accept `uint32_t` instead of `int`. This may break some implementations. + [#17](https://github.com/tzaeschke/phtree-cpp/pull/17) +- DIM>8 now uses custom b_plus_tree_map instead of std::map. This improves performance for all operations, e.g. + window queries on large datasets are up to 4x faster. Benchmarks results can be found in the issue. + [#14](https://github.com/tzaeschke/phtree-cpp/issues/14) +- postfix/infix field moved from Node to Entry. This avoids indirections and improves performance of most by ~10%. + operations by 5-15%. [#11](https://github.com/tzaeschke/phtree-cpp/issues/11) +- Entries now use 'union' to store children. [#9](https://github.com/tzaeschke/phtree-cpp/issues/9) +- Avoid unnecessary find() when removing a node. [#5](https://github.com/tzaeschke/phtree-cpp/issues/5) +- Avoid unnecessary key copy when inserting a node. [#4](https://github.com/tzaeschke/phtree-cpp/issues/4) +- for_each(callback, filter) was traversing too many nodes. [#2](https://github.com/tzaeschke/phtree-cpp/issues/2) +- Build improvements for bazel/cmake ## [1.1.1] - 2022-01-30 ### Changed @@ -70,7 +149,9 @@ Nothing yet. - Nothing. -[Unreleased]: https://github.com/improbable-eng/phtree-cpp/compare/v1.1.1...HEAD +[Unreleased]: https://github.com/improbable-eng/phtree-cpp/compare/v1.3.0...HEAD +[1.3.0]: https://github.com/improbable-eng/phtree-cpp/compare/v1.2.0...v1.3.0 +[1.2.0]: https://github.com/improbable-eng/phtree-cpp/compare/v1.1.0...v1.2.0 [1.1.1]: https://github.com/improbable-eng/phtree-cpp/compare/v1.1.0...v1.1.1 [1.1.0]: https://github.com/improbable-eng/phtree-cpp/compare/v1.0.0...v1.1.0 [1.0.1]: https://github.com/improbable-eng/phtree-cpp/compare/v1.0.0...v1.0.1 diff --git a/CMakeLists.txt b/CMakeLists.txt index 18a5da8a..6936ad64 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,19 +1,123 @@ cmake_minimum_required(VERSION 3.14) # set the project name -project(PH_Tree_Main VERSION 1.1.1 +project(phtree VERSION 1.3.0 DESCRIPTION "PH-Tree C++" LANGUAGES CXX) -if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE Release) -endif() + +cmake_policy(SET CMP0077 NEW) + +# --------------------------------------------------------------------------------------- +# Set default build to release +# --------------------------------------------------------------------------------------- +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose Release or Debug" FORCE) +endif () + +# --------------------------------------------------------------------------------------- +# Build options +# --------------------------------------------------------------------------------------- +# example options +option(PHTREE_BUILD_ALL "Build examples, tests and benchmarks" OFF) + +# example options +option(PHTREE_BUILD_EXAMPLES "Build examples" OFF) +#option(PHTREE_BUILD_EXAMPLE_HO "Build header only example" OFF) + +# testing options +option(PHTREE_BUILD_TESTS "Build tests" OFF) +#option(PHTREE_BUILD_TESTS_HO "Build tests using the header only version" OFF) + +# bench options +option(PHTREE_BUILD_BENCHMARKS "Build benchmarks (Requires https://github.com/google/benchmark.git to be installed)" OFF) + +# --------------------------------------------------------------------------------------- +# Compiler config +# --------------------------------------------------------------------------------------- +find_program(CCACHE_FOUND ccache) +if (CCACHE_FOUND) + message("CCACHE is found") + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) + set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) +else (CCACHE_FOUND) + message("CCACHE is NOT found") +endif (CCACHE_FOUND) # specify the C++ standard -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED True) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Wall -Werror") -set(CMAKE_CXX_FLAGS_RELEASE "-O3") +if (NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 17) + set(CMAKE_CXX_STANDARD_REQUIRED ON) +endif () +if (MSVC) + #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /std:c++17 /Wall") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /std:c++17") + set(CMAKE_CXX_FLAGS_RELEASE "/O2") + + if (PHTREE_BUILD_TESTS OR PHTREE_BUILD_ALL) + add_compile_options(/bigobj) + endif () + + # For google benchmark + if (PHTREE_BUILD_BENCHMARKS) # OR PHTREE_BUILD_ALL) + # This still doesn't work. This also breaks gtest + # See for example + # https://stackoverflow.com/questions/55376111/how-to-build-and-link-google-benchmark-using-cmake-in-windows + # https://github.com/google/benchmark/issues/1348 + # https://github.com/google/benchmark/issues/639 + # set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE) + # set(BUILD_SHARED_LIBS TRUE) #=TRUE + # set(BENCHMARK_DOWNLOAD_DEPENDENCIES on) + # set(BENCHMARK_ENABLE_GTEST_TESTS OFF) + endif () +else () + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Wall -Werror") + if (PHTREE_BUILD_BENCHMARKS) + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -mavx -pthread") + else () + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -mavx") + endif () +endif () + +# --------------------------------------------------------------------------------------- +# Build binaries +# --------------------------------------------------------------------------------------- add_subdirectory(phtree) -add_subdirectory(examples) + +if (PHTREE_BUILD_EXAMPLES OR PHTREE_BUILD_ALL) + message(STATUS "Generating examples") + add_subdirectory(examples) +endif () + +if (!MSVC AND (PHTREE_BUILD_BENCHMARKS OR PHTREE_BUILD_ALL)) + message(STATUS "Generating benchmarks") + add_subdirectory(benchmark) +endif () + +if (PHTREE_BUILD_TESTS OR PHTREE_BUILD_ALL) + message(STATUS "Generating tests") + if (FALSE) + add_compile_definitions(GTEST_HAS_ABSL=0) + add_compile_definitions(GTEST_OS_WINDOWS_MOBILE=0) + if (MSVC) + add_compile_definitions(GTEST_OS_WINDOWS_MINGW=0) + endif () + add_compile_definitions(GTEST_OS_LINUX_ANDROID=0) + if (LINUX) + add_compile_definitions(GTEST_OS_LINUX=1) + else () + add_compile_definitions(GTEST_OS_LINUX=0) + endif () + add_compile_definitions( + GTEST_OS_WINDOWS_MOBILE=0 + GTEST_OS_WINDOWS_PHONE=0 + GTEST_OS_WINDOWS_RT=0 + GTEST_OS_ESP8266=0 + GTEST_OS_XTENSA=0) + endif () + + enable_testing() + include(GoogleTest) + add_subdirectory(test) +endif () diff --git a/LICENSE b/LICENSE index e46c5961..13cd100a 100644 --- a/LICENSE +++ b/LICENSE @@ -188,6 +188,7 @@ identification within third-party archives. Copyright 2020 Improbable Worlds Limited + Copyright 2022 Tilmann Zäschke Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index fad24140..a8f0256d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -**Note: for updates please also check the [fork](https://github.com/tzaeschke/phtree-cpp) by the original PH-Tree developer.** +**This is a fork of [Improbable's PH-tree](https://github.com/improbable-eng/phtree-cpp)**. # PH-Tree C++ @@ -35,6 +35,8 @@ More information about PH-Trees (including a Java implementation) is available [ * [Filters](#filters) +* [Filters for MultiMaps](#filters-for-multimaps) + * [Distance Functions](#distance-functions) [Converters](#converters) @@ -49,7 +51,7 @@ More information about PH-Trees (including a Java implementation) is available [ [When to use a PH-Tree](#when-to-use-a-ph-tree) -[Optimising Performance](#optimising-performance) +[Optimising Performance](#optimizing-performance) ### Compiling / Building @@ -107,8 +109,12 @@ auto tree = PhTreeD<3, MyData>(); PhPointD<3> p{1.1, 1.0, 10.}; // Some operations +tree.relocate(p1, p2); // Move an entry from point 1 to point 2 +tree.relocate_if(p1, p2, predicate); // Conditionally move an entry from point 1 to point 2 tree.emplace(p, my_data); tree.emplace_hint(hint, p, my_data); +tree.try_emplace(p, my_data); +tree.try_emplace(hint, p, my_data); tree.insert(p, my_data); tree[p] = my_data; tree.count(p); @@ -120,7 +126,6 @@ tree.empty(); tree.clear(); // Multi-map only -tree.relocate(p_old, p_new, value); tree.estimate_count(query); ``` @@ -128,9 +133,10 @@ tree.estimate_count(query); #### Queries -* For-each over all elements: `tree.fore_each(callback);` +* For-each over all elements: `tree.for_each(callback);` + **Note that `for_each` tends to be 10%-20% faster than using an iterator.** * Iterator over all elements: `auto iterator = tree.begin();` -* For-each with box shaped window queries: `tree.fore_each(PhBoxD(min, max), callback);` +* For-each with box shaped window queries: `tree.for_each(PhBoxD(min, max), callback);` * Iterator for box shaped window queries: `auto q = tree.begin_query(PhBoxD(min, max));` * Iterator for _k_ nearest neighbor queries: `auto q = tree.begin_knn_query(k, center_point, distance_function);` * Custom query shapes, such as spheres: `tree.for_each(callback, FilterSphere(center, radius, tree.converter()));` @@ -164,18 +170,18 @@ for (auto it : tree) { ... } -// Iterate over all entries inside of an axis aligned box defined by the two points (1,1,1) and (3,3,3) +// Iterate over all entries inside an axis aligned box defined by the two points (1,1,1) and (3,3,3) for (auto it = tree.begin_query({{1, 1, 1}, {3, 3, 3}}); it != tree.end(); ++it) { ... } // Find 5 nearest neighbors of (1,1,1) -for (auto it = tree.begin_knn_query(5, {1, 1, 1}); it != tree.end(); ++it) { +for (auto it = tree.begin_knn_query(5, {1, 1, 1}, DistanceEuclidean<3>())); it != tree.end(); ++it) { ... } ``` - + ##### Filters @@ -183,7 +189,8 @@ All queries allow specifying an additional filter. The filter is called for ever returned (subject to query constraints) and to every node in the tree that the query decides to traverse (also subject to query constraints). Returning `true` in the filter does not change query behaviour, returning `false` means that the current value or child node is not returned or traversed. An example of a geometric filter can be found -in `phtree/common/filter.h` in `FilterAABB`. +in `phtree/common/filter.h` in `FilterAABB` or `FilterSphere` (for examples with box keys see +`FilterBoxAABB` or `FilterBoxSphere`). ```C++ template @@ -205,6 +212,40 @@ for (auto it = tree.begin_query({1, 1, 1}, {3, 3, 3}, FilterByValueId<3, T>())); } ``` +Note: The filter example works only for the 'map' version of the PH-Tree, such as `PhTree`, `PhTreeD`, ... . Filters for +the `PhTreeMultiMap` are discussed in the next section. + + + +#### Filters for MultiMaps + +The `PhTreeMultiMap` requires a different type of filter. In order to function as a multimap, it uses a collections +("buckets") as entries for each occupied coordinate. The buckets allow it to store several values per coordinate. When +using a filter, the PH-Tree will check `IsEntryValid` for every *bucket* (this is different from version 1.x.x where it +called `IsEntryValid` for every entry in a bucket but never for the bucket itself). Since 2.0.0 there is a new function +required in every multimap filter: `IsBucketEntryValid`. It is called once for every entry in a bucket if the bucket +passed `IsEntryValid`. An example of a geometric filter can be found in `phtree/common/filter.h` in `FilterMultiMapAABB` +. + +```C++ +template +struct FilterMultiMapByValueId { + template + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint& key, const BucketT& bucket) const { + // Arbitrary example: Only allow keys/buckets with a certain property, eg. keys that lie within a given sphere. + return check_some_geometric_propert_of_key(key); + } + [[nodiscard]] constexpr bool IsBucketEntryValid(const PhPoint& key, const T& value) const { + // Arbitrary example: Only allow values with even values of id_ + return value.id_ % 2 == 0; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore) const { + // Allow all nodes + return true; + } +}; +``` + ##### Distance function @@ -250,7 +291,14 @@ double resultung_float = ((double)my_int) / 1000000.; It is obvious that this approach leads to a loss of numerical precision. Moreover, the loss of precision depends on the actual range of the double values and the constant. The chosen constant should probably be as large as possible but small enough such that converted values do not exceed the 64bit limit of `std::int64_t`. Note that the PH-Tree provides -several `ConverterMultiply` implementations for point/box and double/float. +several `ConverterMultiply` implementations for point/box and double/float. For example: + +```C++ +// Multiply converter that multiplies by 1'000'000 (and divides by 1). +auto tree = PhTreeD>(); +``` + +You can also write your own converter. For example: ```C++ template @@ -371,10 +419,10 @@ void test() { **Problem**: The PH-Tree appears to be losing updates/insertions. **Solution**: Remember that the PH-Tree is a *map*, keys will not be inserted if an identical key already exists. The -easiest solution is to use one of the `PhTreeMultiMap` implementations. Alternatively, this can be solved by turning the -PH-Tree into a multi-map, for example by using something like `std::map` or `std::set` as member type: -`PhTree<3, std::set>`. The `set` instances can then be used to handle key conflicts by storing multiple -entries for the same key. The logic to handle conflicts must currently be implemented manually by the user. +easiest solution is to use one of the `PhTreeMultiMap` implementations. Alternatively, this can be solved by turning a +`PhTree` into a multi-map, for example by using something like `std::map` or `std::set` as member type: +`PhTree<3, T, CONVERTER, std::set>`. The `set` instances can then be used to handle key conflicts by +storing multiple entries for the same key. The logic to handle conflicts must currently be implemented manually. ---------------------------------- @@ -407,7 +455,7 @@ heavily on the actual dataset, usage patterns, hardware, ... . * Scalability with the number of dimensions. The PH-Tree has been shown to deal "well" with high dimensional data ( 1000k+ dimensions). What does "well" mean? * It works very well for up to 30 (sometimes 50) dimensions. **Please note that the C++ implementation has not been - optimised nearly as much as the Java implementation.** + optimized nearly as much as the Java implementation.** * For more dimensions (Java was tested with 1000+ dimensions) the PH-Tree still has excellent insertion/deletion performance. However, the query performance cannot compete with specialised high-dim indexes such as cover-trees or pyramid-trees (these tend to be *very slow* on insertion/deletion though). @@ -426,22 +474,25 @@ heavily on the actual dataset, usage patterns, hardware, ... . * PH-Trees are not very efficient in scenarios where queries tend to return large result sets in the order of 1000 or more. - + -### Optimising Performance +### Optimizing Performance There are numerous ways to improve performance. The following list gives an overview over the possibilities. -1) **Use `for_each` instead of iterators**. This should improve performance of queries by 5%-10%. +1) **Use `for_each` instead of iterators**. This should improve performance of queries by 10%-20%. -2) **Use `emplace_hint` if possible**. When updating the position of an entry, the naive way is to use `erase()` - /`emplace()`. With `emplace_hint`, insertion can avoid navigation to the target node if the insertion coordinate is - close to the removal coordinate. - ```c++ - auto iter = tree.find(old_position); - tree.erase(iter); - tree.emplace_hint(iter, new_position, value); - ``` +2) **Use `relocate()` / `relocate_if()` if possible**. When updating the position of an entry, the naive way is + to use `erase()` / `emplace()`. With `relocate` / `relocate_if()`, insertion can avoid a lot of duplicate + navigation in the tree if the new coordinate is close to the old coordinate. + ```c++ + relocate(old_position, new_position); + relocate_if(old_position, new_position, [](const T& value) { return [true/false]; }); + ``` + The multi-map version relocates all values unless a 'value' is specified to identify the value to be relocated: + ```c++ + relocate(old_position, new_position, value); + ``` 3) **Store pointers instead of large data objects**. For example, use `PhTree<3, MyLargeClass*>` instead of `PhTree<3, MyLargeClass>` if `MyLargeClass` is large. @@ -462,7 +513,7 @@ There are numerous ways to improve performance. The following list gives an over caused by faster operation in the converter itself but by a more compact tree shape. The example shows how to use a converter that multiplies coordinates by 100'000, thus preserving roughly 5 fractional digits: - `PhTreeD>` + `PhTreeD>()` 6) **Use custom key types**. By default, the PH-Tree accepts only coordinates in the form of its own key types, such as `PhPointD`, `PhBoxF` or similar. To avoid conversion from custom types to PH-Tree key types, custom classes can @@ -487,67 +538,72 @@ There are numerous ways to improve performance. The following list gives an over ## Compiling the PH-Tree -This section will guide you through the initial build system and IDE you need to go through in order to build and run -custom versions of the PH-Tree on your machine. +The PH-Tree index itself is a *header only* library, it can be used by simply copying all headers in the `phtree` +folder. +The examples, tests and benchmarks can be build with bazel or cmake. ### Build system & dependencies -PH-Tree can be built with *cmake 3.14* or [Bazel](https://bazel.build) as build system. All code is written in C++ -targeting the C++17 standard. The code has been verified to compile on Linux with Clang 9, 10, 11, 12, and GCC 9, 10, -11, and on Windows with Visual Studio 2019. - -#### Ubuntu Linux - -* Installing [clang](https://apt.llvm.org/) - -* Installing [bazel](https://docs.bazel.build/versions/main/install-ubuntu.html) - -* To install [cmake](https://launchpad.net/~hnakamur/+archive/ubuntu/cmake): - +PH-Tree can be built with [Bazel](https://bazel.build) (primary build system) or with +[cmake](https://cmake.org/) *3.14*. +All code is written in C++ targeting the C++17 standard. +The code has been verified to compile on Linux with Clang 11 and GCC 9, and on Windows with Visual Studio 2019 +(except benchmarks, which don't work wi VS). +The PH-tree makes use of vectorization, so suggested compilation options for clang/gcc are: ``` -sudo add-apt-repository ppa:hnakamur/libarchive -sudo add-apt-repository ppa:hnakamur/libzstd -sudo add-apt-repository ppa:hnakamur/cmake -sudo apt update -sudo apt install cmake +-O3 -mavx ``` -#### Windows - -To build on Windows, you'll need to have a version of Visual Studio 2019 installed (likely Professional), in addition to -[Bazel](https://docs.bazel.build/versions/master/windows.html) or -[cmake](https://cmake.org/download/). ### Bazel Once you have set up your dependencies, you should be able to build the PH-Tree repository by running: - ``` bazel build ... ``` Similarly, you can run all unit tests with: - ``` bazel test ... ``` +Benchmarks: +``` +bazel run //benchmark:update_mm_d_benchmark --config=benchmark -- --benchmark_counters_tabular=true +``` + + ### cmake - +`cmake` uses `ccache` when available. ``` mkdir build cd build cmake .. cmake --build . +``` + +Run example: +``` +cmake .. -DPHTREE_BUILD_EXAMPLES=ON +cmake --build . ./example/Example ``` +Run tests: +``` +cmake .. -DPHTREE_BUILD_TESTS=ON +cmake --build . +ctest +``` +Next to example (`PHTREE_BUILD_EXAMPLES`) there are also tests (`PHTREE_BUILD_TESTS`) and +benchmarks (`PHTREE_BUILD_BENCHMARKS`). To build all, use `PHTREE_BUILD_ALL`. + ## Further Resources diff --git a/TODO.txt b/TODO.txt new file mode 100644 index 00000000..9bf73e5e --- /dev/null +++ b/TODO.txt @@ -0,0 +1,76 @@ +Fix const-ness +============== +- operator[] should have a const overload +- find() should have a non-const overload +- test: + +TEST(PhTreeTest, SmokeTestConstTree) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree1; + tree1.emplace(p, Id{1}); + tree1.emplace(p, Id{2}); + Id id3{3}; + tree1.insert(p, id3); + Id id4{4}; + tree1.insert(p, id4); + const auto& tree = tree1; + ASSERT_EQ(tree.size(), 1); + ASSERT_EQ(tree.find(p).second()._i, 1); + ASSERT_EQ(tree[p]._i, 1); + + auto q_window = tree.begin_query({p, p}); + ASSERT_EQ(1, q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(1, q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + ASSERT_EQ(1, q_knn->_i); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(1, tree1.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_EQ(0, tree1.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + + +b_plus_tree_map - binary search +=============== +Use custom binary search: + + // return BptEntry* ?!?!? + template + [[nodiscard]] auto lower_bound(key_t key, std::vector& data) noexcept { + return std::lower_bound(data.begin(), data.end(), key, [](E& left, const key_t key) { + return left.first < key; + }); + // auto pos = __lower_bound(&*data_leaf_.begin(), &*data_leaf_.end(), key); + // return data_leaf_.begin() + pos; + } + + template + inline auto __lower_bound(const TT* __first, const TT* __last, key_t __val) const noexcept { + const TT* const_first = __first; + auto __len = __last - __first; + + while (__len > 0) { + auto __half = __len >> 1; + const TT* __middle = __first + __half; + if (__middle->first < __val) { + __first = __middle; + ++__first; + __len = __len - __half - 1; + } else + __len = __half; + } + return __first - const_first; + } + diff --git a/WORKSPACE b/WORKSPACE index 0bd3d32b..98b0dce9 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -11,8 +11,7 @@ http_archive( load("@bazel_skylib//lib:versions.bzl", "versions") versions.check( - minimum_bazel_version = "4.2.2", - maximum_bazel_version = "4.2.2", + minimum_bazel_version = "2.0.0", ) # NOTE: We make third_party/ its own bazel workspace because it allows to run `bazel build ...` without @@ -34,17 +33,17 @@ http_archive( http_archive( name = "gbenchmark", - sha256 = "dccbdab796baa1043f04982147e67bb6e118fe610da2c65f88912d73987e700c", - strip_prefix = "benchmark-1.5.2", - url = "https://github.com/google/benchmark/archive/v1.5.2.tar.gz", + sha256 = "6132883bc8c9b0df5375b16ab520fac1a85dc9e4cf5be59480448ece74b278d4", + strip_prefix = "benchmark-1.6.1", + url = "https://github.com/google/benchmark/archive/v1.6.1.tar.gz", ) http_archive( name = "gtest", build_file = "@third_party//gtest:BUILD", - sha256 = "9dc9157a9a1551ec7a7e43daea9a694a0bb5fb8bec81235d8a1e6ef64c716dcb", - strip_prefix = "googletest-release-1.10.0", - url = "https://github.com/google/googletest/archive/release-1.10.0.tar.gz", + sha256 = "b4870bf121ff7795ba20d20bcdd8627b8e088f2d1dab299a031c1034eddc93d5", + strip_prefix = "googletest-release-1.11.0", + url = "https://github.com/google/googletest/archive/release-1.11.0.tar.gz", ) # Development environment tooling diff --git a/phtree/benchmark/BUILD b/benchmark/BUILD similarity index 73% rename from phtree/benchmark/BUILD rename to benchmark/BUILD index 95315788..4df29874 100644 --- a/phtree/benchmark/BUILD +++ b/benchmark/BUILD @@ -3,9 +3,6 @@ package(default_visibility = ["//visibility:private"]) cc_library( name = "benchmark", testonly = True, - srcs = [ - "logging.cc", - ], hdrs = [ "benchmark_util.h", "logging.h", @@ -29,7 +26,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -44,7 +41,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -59,7 +56,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -74,7 +71,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -89,7 +86,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -104,7 +101,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -119,7 +116,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -134,7 +131,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -149,7 +146,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -164,7 +161,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -179,7 +176,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -194,7 +191,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -209,7 +206,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -224,7 +221,22 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "query_mm_d_filter_benchmark", + testonly = True, + srcs = [ + "query_mm_d_filter_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -239,7 +251,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -254,7 +266,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -269,7 +281,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -284,7 +296,7 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], @@ -299,7 +311,67 @@ cc_binary( linkstatic = True, deps = [ "//phtree", - "//phtree/benchmark", + ":benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "hd_insert_d_benchmark", + testonly = True, + srcs = [ + "hd_insert_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + ":benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "hd_erase_d_benchmark", + testonly = True, + srcs = [ + "hd_erase_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + ":benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "hd_query_d_benchmark", + testonly = True, + srcs = [ + "hd_query_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + ":benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "hd_knn_d_benchmark", + testonly = True, + srcs = [ + "hd_knn_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + ":benchmark", "@gbenchmark//:benchmark", "@spdlog", ], diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt new file mode 100644 index 00000000..0406329e --- /dev/null +++ b/benchmark/CMakeLists.txt @@ -0,0 +1,57 @@ +cmake_minimum_required(VERSION 3.14) +project(phtree-benchmarks) + +set(BENCHMARK_ENABLE_TESTING OFF) + +include(FetchContent) + +FetchContent_Declare( + googlebenchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG v1.7.0 +) +FetchContent_MakeAvailable(googlebenchmark) + +FetchContent_Declare( + spdlog + GIT_REPOSITORY https://github.com/gabime/spdlog.git + GIT_TAG v1.10.0 +) +FetchContent_MakeAvailable(spdlog) + +set(INCLUDE_DIR ${CMAKE_SOURCE_DIR}/phtree) +set(INCLUDE_FILES ${INCLUDE_DIR}/phtree.h ${INCLUDE_DIR}/phtree_multimap.h benchmark_util.h logging.h) + +macro(package_add_benchmark TESTNAME) + add_executable(${TESTNAME} ${ARGN} ${INCLUDE_FILES}) + target_link_libraries(${TESTNAME} PRIVATE benchmark::benchmark) + target_link_libraries(${TESTNAME} PRIVATE spdlog::spdlog) + target_include_directories(${TESTNAME} PRIVATE ${PROJECT_SOURCE_DIR}/..) +endmacro() + +add_compile_definitions(RUN_HAVE_STD_REGEX=0 RUN_HAVE_POSIX_REGEX=0 COMPILE_HAVE_GNU_POSIX_REGEX=0) + +package_add_benchmark(count_mm_d_benchmark count_mm_d_benchmark.cc) +package_add_benchmark(erase_benchmark erase_benchmark.cc) +package_add_benchmark(erase_d_benchmark erase_d_benchmark.cc) +package_add_benchmark(extent_benchmark extent_benchmark.cc) +package_add_benchmark(extent_benchmark_weird extent_benchmark_weird.cc) +package_add_benchmark(find_benchmark find_benchmark.cc) +package_add_benchmark(hd_erase_d_benchmark hd_erase_d_benchmark.cc) +package_add_benchmark(hd_insert_d_benchmark hd_insert_d_benchmark.cc) +package_add_benchmark(hd_knn_d_benchmark hd_knn_d_benchmark.cc) +package_add_benchmark(hd_query_d_benchmark hd_query_d_benchmark.cc) +package_add_benchmark(insert_benchmark insert_benchmark.cc) +package_add_benchmark(insert_box_d_benchmark insert_box_d_benchmark.cc) +package_add_benchmark(insert_d_benchmark insert_d_benchmark.cc) +package_add_benchmark(knn_d_benchmark knn_d_benchmark.cc) +package_add_benchmark(query_benchmark query_benchmark.cc) +package_add_benchmark(query_box_d_benchmark query_box_d_benchmark.cc) +package_add_benchmark(query_d_benchmark query_d_benchmark.cc) +package_add_benchmark(query_mm_box_d_benchmark query_mm_box_d_benchmark.cc) +package_add_benchmark(query_mm_d_benchmark query_mm_d_benchmark.cc) +package_add_benchmark(query_mm_d_filter_benchmark query_mm_d_filter_benchmark.cc) +package_add_benchmark(update_box_d_benchmark update_box_d_benchmark.cc) +package_add_benchmark(update_d_benchmark update_d_benchmark.cc) +package_add_benchmark(update_mm_box_d_benchmark update_mm_box_d_benchmark.cc) +package_add_benchmark(update_mm_d_benchmark update_mm_d_benchmark.cc) diff --git a/phtree/benchmark/benchmark_util.h b/benchmark/benchmark_util.h similarity index 93% rename from phtree/benchmark/benchmark_util.h rename to benchmark/benchmark_util.h index 5af70367..73069710 100644 --- a/phtree/benchmark/benchmark_util.h +++ b/benchmark/benchmark_util.h @@ -81,7 +81,7 @@ auto CreateDataCLUSTER = [](auto& points, }; auto CreateDuplicates = - [](auto& points, size_t num_unique_entries, size_t num_total_entities, std::uint32_t seed) { + [](auto& points, int num_unique_entries, size_t num_total_entities, std::uint32_t seed) { std::default_random_engine random_engine{seed}; std::uniform_int_distribution<> distribution(0, num_unique_entries); for (size_t i = num_unique_entries; i < num_total_entities; ++i) { @@ -91,7 +91,7 @@ auto CreateDuplicates = }; } // namespace -enum TestGenerator { CUBE, CLUSTER }; +enum TestGenerator { CUBE = 4, CLUSTER = 7 }; template auto CreatePointDataMinMax = [](auto& points, @@ -101,11 +101,13 @@ auto CreatePointDataMinMax = [](auto& points, double world_minimum, double world_maximum, double fraction_of_duplicates) { - auto set_coordinate_lambda = [](auto& p, dimension_t dim, auto value) { p[dim] = value; }; + auto set_coordinate_lambda = [](auto& p, dimension_t dim, auto value) { + p[dim] = static_cast < typename std::remove_reference_t>(value); + }; // Create at least 1 unique point // Note that the following point generator is likely, but not guaranteed, to created unique // points. - size_t num_unique_entries = 1 + (num_entities - 1) * (1. - fraction_of_duplicates); + int num_unique_entries = static_cast(1 + (num_entities - 1) * (1. - fraction_of_duplicates)); points.reserve(num_entities); switch (test_generator) { case CUBE: @@ -140,7 +142,7 @@ auto CreateBoxDataMinMax = [](auto& points, // Create at least 1 unique point // Note that the following point generator is likely, but not guaranteed, to created unique // points. - int num_unique_entries = 1 + (num_entities - 1) * (1. - fraction_of_duplicates); + int num_unique_entries = static_cast(1 + (num_entities - 1) * (1. - fraction_of_duplicates)); points.reserve(num_entities); switch (test_generator) { case CUBE: diff --git a/phtree/benchmark/count_mm_d_benchmark.cc b/benchmark/count_mm_d_benchmark.cc similarity index 99% rename from phtree/benchmark/count_mm_d_benchmark.cc rename to benchmark/count_mm_d_benchmark.cc index b05987bd..0b426a7d 100644 --- a/phtree/benchmark/count_mm_d_benchmark.cc +++ b/benchmark/count_mm_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "benchmark_util.h" -#include "logging.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include "phtree/phtree_multimap.h" #include diff --git a/phtree/benchmark/erase_benchmark.cc b/benchmark/erase_benchmark.cc similarity index 88% rename from phtree/benchmark/erase_benchmark.cc rename to benchmark/erase_benchmark.cc index 1e59a6d2..a4ef1de4 100644 --- a/phtree/benchmark/erase_benchmark.cc +++ b/benchmark/erase_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "logging.h" -#include "phtree/benchmark/benchmark_util.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include #include @@ -26,6 +26,7 @@ using namespace improbable::phtree::phbenchmark; namespace { const int GLOBAL_MAX = 10000; +using payload_t = std::uint32_t; /* * Benchmark for removing entries. @@ -39,11 +40,11 @@ class IndexBenchmark { private: void SetupWorld(benchmark::State& state); - void Insert(benchmark::State& state, PhTree& tree); - void Remove(benchmark::State& state, PhTree& tree); + void Insert(benchmark::State& state, PhTree& tree); + void Remove(benchmark::State& state, PhTree& tree); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; std::default_random_engine random_engine_; std::uniform_int_distribution<> cube_distribution_; @@ -66,7 +67,7 @@ template void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - auto* tree = new PhTree(); + auto* tree = new PhTree(); Insert(state, *tree); state.ResumeTiming(); @@ -91,16 +92,16 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { } template -void IndexBenchmark::Insert(benchmark::State&, PhTree& tree) { - for (int i = 0; i < num_entities_; ++i) { - tree.emplace(points_[i], i); +void IndexBenchmark::Insert(benchmark::State&, PhTree& tree) { + for (size_t i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i], (int)i); } } template -void IndexBenchmark::Remove(benchmark::State& state, PhTree& tree) { - int n = 0; - for (int i = 0; i < num_entities_; ++i) { +void IndexBenchmark::Remove(benchmark::State& state, PhTree& tree) { + size_t n = 0; + for (size_t i = 0; i < num_entities_; ++i) { n += tree.erase(points_[i]); } diff --git a/phtree/benchmark/erase_d_benchmark.cc b/benchmark/erase_d_benchmark.cc similarity index 89% rename from phtree/benchmark/erase_d_benchmark.cc rename to benchmark/erase_d_benchmark.cc index a544a4e0..9be51308 100644 --- a/phtree/benchmark/erase_d_benchmark.cc +++ b/benchmark/erase_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "logging.h" -#include "phtree/benchmark/benchmark_util.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include #include @@ -26,6 +26,7 @@ using namespace improbable::phtree::phbenchmark; namespace { const int GLOBAL_MAX = 10000; +using payload_t = std::uint32_t; /* * Benchmark for removing entries. @@ -39,11 +40,11 @@ class IndexBenchmark { private: void SetupWorld(benchmark::State& state); - void Insert(benchmark::State& state, PhTreeD& tree); - void Remove(benchmark::State& state, PhTreeD& tree); + void Insert(benchmark::State& state, PhTreeD& tree); + void Remove(benchmark::State& state, PhTreeD& tree); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; std::default_random_engine random_engine_; std::uniform_real_distribution<> cube_distribution_; @@ -66,7 +67,7 @@ template void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - auto* tree = new PhTreeD(); + auto* tree = new PhTreeD(); Insert(state, *tree); state.ResumeTiming(); @@ -91,16 +92,16 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { } template -void IndexBenchmark::Insert(benchmark::State&, PhTreeD& tree) { - for (int i = 0; i < num_entities_; ++i) { +void IndexBenchmark::Insert(benchmark::State&, PhTreeD& tree) { + for (payload_t i = 0; i < num_entities_; ++i) { tree.emplace(points_[i], i); } } template -void IndexBenchmark::Remove(benchmark::State& state, PhTreeD& tree) { - int n = 0; - for (int i = 0; i < num_entities_; ++i) { +void IndexBenchmark::Remove(benchmark::State& state, PhTreeD& tree) { + size_t n = 0; + for (size_t i = 0; i < num_entities_; ++i) { n += tree.erase(points_[i]); } diff --git a/phtree/benchmark/extent_benchmark.cc b/benchmark/extent_benchmark.cc similarity index 95% rename from phtree/benchmark/extent_benchmark.cc rename to benchmark/extent_benchmark.cc index 760a5749..6241c5f7 100644 --- a/phtree/benchmark/extent_benchmark.cc +++ b/benchmark/extent_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "logging.h" -#include "phtree/benchmark/benchmark_util.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include #include @@ -42,7 +42,7 @@ class IndexBenchmark { void QueryWorld(benchmark::State& state); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; PhTree tree_; std::default_random_engine random_engine_; @@ -73,8 +73,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); diff --git a/phtree/benchmark/extent_benchmark_weird.cc b/benchmark/extent_benchmark_weird.cc similarity index 97% rename from phtree/benchmark/extent_benchmark_weird.cc rename to benchmark/extent_benchmark_weird.cc index bee6ecb0..cfc26cd2 100644 --- a/phtree/benchmark/extent_benchmark_weird.cc +++ b/benchmark/extent_benchmark_weird.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "logging.h" -#include "phtree/benchmark/benchmark_util.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include #include @@ -48,7 +48,7 @@ class IndexBenchmark { void QueryWorld(benchmark::State& state); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; PhTree tree_; std::default_random_engine random_engine_; @@ -81,8 +81,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); diff --git a/phtree/benchmark/find_benchmark.cc b/benchmark/find_benchmark.cc similarity index 97% rename from phtree/benchmark/find_benchmark.cc rename to benchmark/find_benchmark.cc index 0621dd7b..138e6f90 100644 --- a/phtree/benchmark/find_benchmark.cc +++ b/benchmark/find_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "logging.h" -#include "phtree/benchmark/benchmark_util.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include #include @@ -49,7 +49,7 @@ class IndexBenchmark { int QueryWorldFind(benchmark::State& state); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; const QueryType query_type_; PhTree tree_; @@ -102,8 +102,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); diff --git a/benchmark/hd_erase_d_benchmark.cc b/benchmark/hd_erase_d_benchmark.cc new file mode 100644 index 00000000..87363e51 --- /dev/null +++ b/benchmark/hd_erase_d_benchmark.cc @@ -0,0 +1,146 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" +#include "phtree/phtree.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; +using payload_t = std::uint32_t; + +/* + * Benchmark for removing entries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state); + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void Insert(benchmark::State& state, PhTreeD& tree); + void Remove(benchmark::State& state, PhTreeD& tree); + + const TestGenerator data_type_; + const size_t num_entities_; + + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new PhTreeD(); + Insert(state, *tree); + state.ResumeTiming(); + + Remove(state, *tree); + + state.PauseTiming(); + // avoid measuring deallocation + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + + state.counters["total_remove_count"] = benchmark::Counter(0); + state.counters["remove_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + + logging::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State&, PhTreeD& tree) { + for (size_t i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i], (int)i); + } +} + +template +void IndexBenchmark::Remove(benchmark::State& state, PhTreeD& tree) { + size_t n = 0; + for (size_t i = 0; i < num_entities_; ++i) { + n += tree.erase(points_[i]); + } + + state.counters["total_remove_count"] += n; + state.counters["remove_rate"] += n; +} + +} // namespace + +template +void PhTree6D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree10D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree20D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20> benchmark{state}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities +BENCHMARK_CAPTURE(PhTree6D, ERASE, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D, ERASE, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D, ERASE, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/benchmark/hd_insert_d_benchmark.cc b/benchmark/hd_insert_d_benchmark.cc new file mode 100644 index 00000000..ecaf37e5 --- /dev/null +++ b/benchmark/hd_insert_d_benchmark.cc @@ -0,0 +1,132 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" +#include "phtree/phtree.h" +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; + +/* + * Benchmark for adding entries to the index. + */ +template +class IndexBenchmark { + using Index = PhTreeD; + + public: + explicit IndexBenchmark(benchmark::State& state); + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void Insert(benchmark::State& state, Index& tree); + + const TestGenerator data_type_; + const size_t num_entities_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new Index(); + state.ResumeTiming(); + + Insert(state, *tree); + + // we do this top avoid measuring deallocation + state.PauseTiming(); + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + + state.counters["total_put_count"] = benchmark::Counter(0); + state.counters["put_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + + logging::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State& state, Index& tree) { + for (size_t i = 0; i < num_entities_; ++i) { + PhPointD& p = points_[i]; + tree.emplace(p, (int)i); + } + + state.counters["total_put_count"] += num_entities_; + state.counters["put_rate"] += num_entities_; +} + +} // namespace + +template +void PhTree6D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree10D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree20D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20> benchmark{state}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities +BENCHMARK_CAPTURE(PhTree6D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/benchmark/hd_knn_d_benchmark.cc b/benchmark/hd_knn_d_benchmark.cc new file mode 100644 index 00000000..2f122ea0 --- /dev/null +++ b/benchmark/hd_knn_d_benchmark.cc @@ -0,0 +1,152 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" +#include "phtree/phtree.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; +using payload_t = std::uint32_t; + +/* + * Benchmark for k-nearest-neighbour queries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state, PhPointD& center); + void CreateQuery(PhPointD& center); + + const TestGenerator data_type_; + const size_t num_entities_; + const size_t knn_result_size_; + + PhTreeD tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state) +: data_type_{static_cast(state.range(2))} +, num_entities_(state.range(0)) +, knn_result_size_(state.range(1)) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + PhPointD center; + CreateQuery(center); + state.ResumeTiming(); + + QueryWorld(state, center); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); + } + + state.counters["total_query_count"] = benchmark::Counter(0); + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + + logging::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, PhPointD& center) { + size_t n = 0; + for (auto q = tree_.begin_knn_query(knn_result_size_, center, DistanceEuclidean()); + q != tree_.end(); + ++q) { + ++n; + } + + state.counters["total_query_count"] += 1; + state.counters["query_rate"] += 1; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(PhPointD& center) { + for (dimension_t d = 0; d < DIM; ++d) { + center[d] = cube_distribution_(random_engine_) * GLOBAL_MAX; + } +} + +} // namespace + +template +void PhTree6D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree10D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree20D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20> benchmark{state}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, query_result_size +BENCHMARK_CAPTURE(PhTree6D, KNN, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {1, 10}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D, KNN, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {1, 10}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D, KNN, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {1, 10}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/benchmark/hd_query_d_benchmark.cc b/benchmark/hd_query_d_benchmark.cc new file mode 100644 index 00000000..61ef219d --- /dev/null +++ b/benchmark/hd_query_d_benchmark.cc @@ -0,0 +1,214 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" +#include "phtree/phtree.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; + +enum QueryType { MIN_MAX_ITER, MIN_MAX_FOR_EACH }; + +template +using BoxType = PhBoxD; + +template +using PointType = PhPointD; + +template +using TreeType = PhTreeD; + +/* + * Benchmark for window queries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, double avg_query_result_size_ = 100); + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state, BoxType& query_box); + void CreateQuery(BoxType& query_box); + + const TestGenerator data_type_; + const size_t num_entities_; + const double avg_query_result_size_; + + constexpr int query_edge_length() { + return (int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); + }; + + TreeType tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, double avg_query_result_size) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, avg_query_result_size_(avg_query_result_size) +, tree_{} +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + BoxType query_box; + CreateQuery(query_box); + state.ResumeTiming(); + + QueryWorld(state, query_box); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); + } + + state.counters["total_result_count"] = benchmark::Counter(0); + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + + logging::info("World setup complete."); +} + +template +struct Counter { + void operator()(PointType, T&) { + ++n_; + } + + size_t n_ = 0; +}; + +template +size_t Count_MMI(TreeType& tree, BoxType& query_box) { + size_t n = 0; + for (auto q = tree.begin_query(query_box); q != tree.end(); ++q) { + ++n; + } + return n; +} + +template +size_t Count_MMFE(TreeType& tree, BoxType& query_box) { + Counter callback; + tree.for_each(query_box, callback); + return callback.n_; +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, BoxType& query_box) { + size_t n = 0; + switch (QUERY_TYPE) { + case MIN_MAX_ITER: + n = Count_MMI(tree_, query_box); + break; + case MIN_MAX_FOR_EACH: + n = Count_MMFE(tree_, query_box); + break; + } + + state.counters["total_result_count"] += n; + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(BoxType& query_box) { + int length = query_edge_length(); + // scale to ensure query lies within boundary + double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; + for (dimension_t d = 0; d < DIM; ++d) { + auto s = cube_distribution_(random_engine_); + s = s * scale; + query_box.min()[d] = s; + query_box.max()[d] = s + length; + } +} + +} // namespace + +template +void PhTree6D_FE(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6, MIN_MAX_FOR_EACH> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree6D_IT(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6, MIN_MAX_ITER> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree10D_IT(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10, MIN_MAX_ITER> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree20D_IT(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20, MIN_MAX_ITER> benchmark{state}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, query_result_size +BENCHMARK_CAPTURE(PhTree6D_FE, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree6D_IT, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D_IT, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D_IT, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/insert_benchmark.cc b/benchmark/insert_benchmark.cc similarity index 93% rename from phtree/benchmark/insert_benchmark.cc rename to benchmark/insert_benchmark.cc index c48e7778..e0880246 100644 --- a/phtree/benchmark/insert_benchmark.cc +++ b/benchmark/insert_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "logging.h" -#include "phtree/benchmark/benchmark_util.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include @@ -52,7 +52,7 @@ class IndexBenchmark { void Insert(benchmark::State& state, PhTree& tree); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; const InsertionType insertion_type_; std::vector> points_; }; @@ -99,20 +99,20 @@ template void IndexBenchmark::Insert(benchmark::State& state, PhTree& tree) { switch (insertion_type_) { case INSERT: { - for (int i = 0; i < num_entities_; ++i) { - tree.insert(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree.insert(points_[i], (int)i); } break; } case EMPLACE: { - for (int i = 0; i < num_entities_; ++i) { - tree.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i], (int)i); } break; } case SQUARE_BR: { - for (int i = 0; i < num_entities_; ++i) { - tree[points_[i]] = i; + for (size_t i = 0; i < num_entities_; ++i) { + tree[points_[i]] = (int)i; } break; } diff --git a/phtree/benchmark/insert_box_d_benchmark.cc b/benchmark/insert_box_d_benchmark.cc similarity index 95% rename from phtree/benchmark/insert_box_d_benchmark.cc rename to benchmark/insert_box_d_benchmark.cc index 817e848d..34819cb4 100644 --- a/phtree/benchmark/insert_box_d_benchmark.cc +++ b/benchmark/insert_box_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "logging.h" -#include "phtree/benchmark/benchmark_util.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include @@ -43,7 +43,7 @@ class IndexBenchmark { void Insert(benchmark::State& state, PhTreeBoxD& tree); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; std::vector> boxes_; }; @@ -84,9 +84,9 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { template void IndexBenchmark::Insert(benchmark::State& state, PhTreeBoxD& tree) { - for (int i = 0; i < num_entities_; ++i) { + for (size_t i = 0; i < num_entities_; ++i) { PhBoxD& p = boxes_[i]; - tree.emplace(p, i); + tree.emplace(p, (int)i); } state.counters["total_put_count"] += num_entities_; diff --git a/phtree/benchmark/insert_d_benchmark.cc b/benchmark/insert_d_benchmark.cc similarity index 92% rename from phtree/benchmark/insert_d_benchmark.cc rename to benchmark/insert_d_benchmark.cc index 7ef06a36..20d9dede 100644 --- a/phtree/benchmark/insert_d_benchmark.cc +++ b/benchmark/insert_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "logging.h" -#include "phtree/benchmark/benchmark_util.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include @@ -31,6 +31,7 @@ const double GLOBAL_MAX = 10000; */ template class IndexBenchmark { + using Index = PhTreeD; public: IndexBenchmark(benchmark::State& state, TestGenerator data_type, int num_entities); @@ -39,10 +40,10 @@ class IndexBenchmark { private: void SetupWorld(benchmark::State& state); - void Insert(benchmark::State& state, PhTreeD& tree); + void Insert(benchmark::State& state, Index& tree); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; std::vector> points_; }; @@ -58,7 +59,7 @@ template void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - auto* tree = new PhTreeD(); + auto* tree = new Index(); state.ResumeTiming(); Insert(state, *tree); @@ -82,10 +83,10 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { } template -void IndexBenchmark::Insert(benchmark::State& state, PhTreeD& tree) { - for (int i = 0; i < num_entities_; ++i) { +void IndexBenchmark::Insert(benchmark::State& state, Index& tree) { + for (size_t i = 0; i < num_entities_; ++i) { PhPointD& p = points_[i]; - tree.emplace(p, i); + tree.emplace(p, (int)i); } state.counters["total_put_count"] += num_entities_; diff --git a/phtree/benchmark/knn_d_benchmark.cc b/benchmark/knn_d_benchmark.cc similarity index 95% rename from phtree/benchmark/knn_d_benchmark.cc rename to benchmark/knn_d_benchmark.cc index 7c56b852..6e2c0188 100644 --- a/phtree/benchmark/knn_d_benchmark.cc +++ b/benchmark/knn_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "logging.h" -#include "phtree/benchmark/benchmark_util.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include #include @@ -44,8 +44,8 @@ class IndexBenchmark { void CreateQuery(PhPointD& center); const TestGenerator data_type_; - const int num_entities_; - const double knn_result_size_; + const size_t num_entities_; + const size_t knn_result_size_; PhTreeD tree_; std::default_random_engine random_engine_; @@ -82,8 +82,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); diff --git a/phtree/benchmark/logging.h b/benchmark/logging.h similarity index 75% rename from phtree/benchmark/logging.h rename to benchmark/logging.h index 14b7ae68..64573099 100644 --- a/phtree/benchmark/logging.h +++ b/benchmark/logging.h @@ -22,11 +22,34 @@ constexpr auto kInternalLoggerName = "internal"; // Sets up spdlog for internal and external. If you need to do some logging before doing this // call, use instead CaptureLogMessagesToBufferSink()/SetupLoggingAndFlushBuffer. -void SetupLogging(std::vector sinks, spdlog::level::level_enum log_level); +void SetupLogging(std::vector sinks, spdlog::level::level_enum log_level) { + auto& console_sink = sinks.emplace_back(std::make_shared()); + console_sink->set_level(log_level); + + // Find the minimum log level, in case one of the sinks passed to us has a lower log level. + const auto& sink_with_lowest_log_level = *std::min_element( + sinks.begin(), + sinks.end(), + [](const spdlog::sink_ptr& a, const spdlog::sink_ptr& b) -> bool { + return a->level() < b->level(); + }); + spdlog::level::level_enum min_log_level = + std::min(sink_with_lowest_log_level->level(), log_level); + + // Create the external logger, worker logger and the internal (default) logger from the same log + // sinks. Each logsink can use `GetLoggerTypeFromMessage` to determine which logger a message + // was logged to. + spdlog::set_default_logger( + std::make_shared(kInternalLoggerName, sinks.begin(), sinks.end())); + spdlog::set_level(min_log_level); + spdlog::flush_on(min_log_level); +} // Sets up default logging typically used for tests/benchmarks. Also used for default // initialization if the logging hasn't been initialized before the first logging line. -void SetupDefaultLogging(); +void SetupDefaultLogging() { + SetupLogging({}, spdlog::level::warn); +} template inline void log( diff --git a/phtree/benchmark/query_benchmark.cc b/benchmark/query_benchmark.cc similarity index 92% rename from phtree/benchmark/query_benchmark.cc rename to benchmark/query_benchmark.cc index b0f50f39..4e3e06f0 100644 --- a/phtree/benchmark/query_benchmark.cc +++ b/benchmark/query_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "logging.h" -#include "phtree/benchmark/benchmark_util.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include #include @@ -49,11 +49,11 @@ class IndexBenchmark { void CreateQuery(PhBox& query); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; const double avg_query_result_size_; constexpr int query_endge_length() { - return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + return (int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); }; PhTree tree_; @@ -94,8 +94,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); @@ -125,8 +125,8 @@ void IndexBenchmark::CreateQuery(PhBox& query_box) { // scale to ensure query lies within boundary double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto s = cube_distribution_(random_engine_); - s = s * scale; + scalar_64_t s = cube_distribution_(random_engine_); + s = (scalar_64_t)(s * scale); query_box.min()[d] = s; query_box.max()[d] = s + length; } diff --git a/phtree/benchmark/query_box_d_benchmark.cc b/benchmark/query_box_d_benchmark.cc similarity index 96% rename from phtree/benchmark/query_box_d_benchmark.cc rename to benchmark/query_box_d_benchmark.cc index ecd736a8..43d646b3 100644 --- a/phtree/benchmark/query_box_d_benchmark.cc +++ b/benchmark/query_box_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "logging.h" -#include "phtree/benchmark/benchmark_util.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include #include @@ -61,11 +61,11 @@ class IndexBenchmark { void CreateQuery(BoxType& query_box); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; const double avg_query_result_size_; constexpr int query_endge_length() { - return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + return (int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); }; TreeType tree_; @@ -106,8 +106,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreateBoxData(boxes_, data_type_, num_entities_, 0, GLOBAL_MAX, BOX_LEN); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(boxes_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(boxes_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); @@ -145,7 +145,7 @@ size_t Count_MMFE(TreeType& tree, BoxType& query_box) { template void IndexBenchmark::QueryWorld(benchmark::State& state, BoxType& query_box) { - int n = 0; + size_t n = 0; switch (QUERY_TYPE) { case MIN_MAX_ITER: n = Count_MMI(tree_, query_box); diff --git a/phtree/benchmark/query_d_benchmark.cc b/benchmark/query_d_benchmark.cc similarity index 96% rename from phtree/benchmark/query_d_benchmark.cc rename to benchmark/query_d_benchmark.cc index 57fd2268..b3ffdb04 100644 --- a/phtree/benchmark/query_d_benchmark.cc +++ b/benchmark/query_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "logging.h" -#include "phtree/benchmark/benchmark_util.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include #include @@ -60,11 +60,11 @@ class IndexBenchmark { void CreateQuery(BoxType& query_box); const TestGenerator data_type_; - const int num_entities_; + const size_t num_entities_; const double avg_query_result_size_; constexpr int query_endge_length() { - return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + return (int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); }; TreeType tree_; @@ -106,8 +106,8 @@ template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); - for (int i = 0; i < num_entities_; ++i) { - tree_.emplace(points_[i], i); + for (size_t i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], (int)i); } state.counters["total_result_count"] = benchmark::Counter(0); @@ -145,7 +145,7 @@ size_t Count_MMFE(TreeType& tree, BoxType& query_box) { template void IndexBenchmark::QueryWorld(benchmark::State& state, BoxType& query_box) { - int n = 0; + size_t n = 0; switch (QUERY_TYPE) { case MIN_MAX_ITER: n = Count_MMI(tree_, query_box); diff --git a/phtree/benchmark/query_mm_box_d_benchmark.cc b/benchmark/query_mm_box_d_benchmark.cc similarity index 95% rename from phtree/benchmark/query_mm_box_d_benchmark.cc rename to benchmark/query_mm_box_d_benchmark.cc index 538e73d9..68458e25 100644 --- a/phtree/benchmark/query_mm_box_d_benchmark.cc +++ b/benchmark/query_mm_box_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "benchmark_util.h" -#include "logging.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include "phtree/phtree_multimap.h" #include @@ -144,7 +144,7 @@ struct CounterMultiMap { }; template -typename std::enable_if::type CountEntries( +typename std::enable_if::type CountEntries( TestMap& tree, const Query& query) { CounterTreeWithMap counter{query.box_, 0}; tree.for_each(query.box_, counter); @@ -152,7 +152,7 @@ typename std::enable_if::type CountEnt } template -int CountEntries(TestMap& tree, const Query& query) { +size_t CountEntries(TestMap& tree, const Query& query) { CounterMultiMap counter{query.box_, 0}; tree.for_each(query.box_, counter); return counter.n_; @@ -175,7 +175,7 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { template void IndexBenchmark::QueryWorld(benchmark::State& state, const Query& query) { - int n = CountEntries(tree_, query); + size_t n = CountEntries(tree_, query); state.counters["query_rate"] += 1; state.counters["result_rate"] += n; diff --git a/phtree/benchmark/query_mm_d_benchmark.cc b/benchmark/query_mm_d_benchmark.cc similarity index 77% rename from phtree/benchmark/query_mm_d_benchmark.cc rename to benchmark/query_mm_d_benchmark.cc index 9e819450..6a609e9a 100644 --- a/phtree/benchmark/query_mm_d_benchmark.cc +++ b/benchmark/query_mm_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "benchmark_util.h" -#include "logging.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include "phtree/phtree_multimap.h" #include @@ -32,7 +32,7 @@ namespace { const double GLOBAL_MAX = 10000; -enum Scenario { TREE_WITH_MAP, MULTI_MAP }; +enum Scenario { TREE_WITH_MAP, MULTI_MAP, MULTI_MAP_STD }; using TestPoint = PhPointD<3>; using QueryBox = PhBoxD<3>; @@ -52,7 +52,10 @@ template using TestMap = typename std::conditional_t< SCENARIO == TREE_WITH_MAP, PhTreeD>, - PhTreeMultiMapD>>; + typename std::conditional_t< + SCENARIO == MULTI_MAP, + PhTreeMultiMapD, b_plus_tree_hash_set>, + PhTreeMultiMapD, std::unordered_set>>>; template class IndexBenchmark { @@ -120,12 +123,20 @@ void InsertEntry( tree.emplace(point, data); } -bool CheckPosition(const payload_t& entity, const TestPoint& center, double radius) { +template +void InsertEntry( + TestMap& tree, + const PhPointD& point, + const payload_t& data) { + tree.emplace(point, data); +} + +int CheckPosition(const payload_t& entity, const TestPoint& center, double radius) { const auto& point = entity; - double dx = center[0] - point[0]; - double dy = center[1] - point[1]; - double dz = center[2] - point[2]; - return dx * dx + dy * dy + dz * dz <= radius * radius; + bool dx = abs(center[0] - point[0]) <= radius; + bool dy = abs(center[1] - point[1]) <= radius; + bool dz = abs(center[2] - point[2]) <= radius; + return dx && dy && dz ? 1 : -100000000; } struct CounterTreeWithMap { @@ -150,7 +161,7 @@ struct CounterMultiMap { }; template -typename std::enable_if::type CountEntries( +typename std::enable_if::type CountEntries( TestMap& tree, const Query& query) { CounterTreeWithMap counter{query.center, query.radius, 0}; tree.for_each(query.box, counter); @@ -158,7 +169,14 @@ typename std::enable_if::type CountEnt } template -int CountEntries(TestMap& tree, const Query& query) { +size_t CountEntries(TestMap& tree, const Query& query) { + CounterMultiMap counter{query.center, query.radius, 0}; + tree.for_each(query.box, counter); + return counter.n_; +} + +template +size_t CountEntries(TestMap& tree, const Query& query) { CounterMultiMap counter{query.center, query.radius, 0}; tree.for_each(query.box, counter); return counter.n_; @@ -181,7 +199,7 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { template void IndexBenchmark::QueryWorld(benchmark::State& state, const Query& query) { - int n = CountEntries(tree_, query); + size_t n = CountEntries(tree_, query); state.counters["query_rate"] += 1; state.counters["result_rate"] += n; @@ -209,11 +227,17 @@ void PhTree3D(benchmark::State& state, Arguments&&... arguments) { } template -void PhTreeMultiMapM3D(benchmark::State& state, Arguments&&... arguments) { +void PhTreeMultiMap3D(benchmark::State& state, Arguments&&... arguments) { IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; benchmark.Benchmark(state); } +template +void PhTreeMultiMapStd3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MULTI_MAP_STD> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + // index type, scenario name, data_type, num_entities, avg_query_result_size // PhTree BENCHMARK_CAPTURE(PhTree3D, WQ_100, 100.0) @@ -222,7 +246,13 @@ BENCHMARK_CAPTURE(PhTree3D, WQ_100, 100.0) ->Unit(benchmark::kMillisecond); // PhTreeMultiMap -BENCHMARK_CAPTURE(PhTreeMultiMapM3D, WQ_100, 100.0) +BENCHMARK_CAPTURE(PhTreeMultiMap3D, WQ_100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTreeMultiMap +BENCHMARK_CAPTURE(PhTreeMultiMapStd3D, WQ_100, 100.0) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); diff --git a/benchmark/query_mm_d_filter_benchmark.cc b/benchmark/query_mm_d_filter_benchmark.cc new file mode 100644 index 00000000..e8e5f5ac --- /dev/null +++ b/benchmark/query_mm_d_filter_benchmark.cc @@ -0,0 +1,350 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" +#include "phtree/phtree.h" +#include "phtree/phtree_multimap.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +/* + * Benchmark for querying entries in multi-map implementations. + * This benchmarks uses a SPHERE shaped query! + */ +namespace { + +const double GLOBAL_MAX = 10000; + +enum Scenario { SPHERE_WQ, SPHERE, WQ, SPHERE_IT_WQ, LEGACY_WQ }; + +using TestPoint = PhPointD<3>; +using QueryBox = PhBoxD<3>; +using payload_t = TestPoint; +using BucketType = std::set; + +struct Query { + QueryBox box{}; + TestPoint center{}; + double radius{}; +}; + +template +using CONVERTER = ConverterIEEE; + +template +using DistanceFn = DistanceEuclidean; + +template +using TestMap = PhTreeMultiMapD>; + +template < + typename CONVERTER = ConverterIEEE<3>, + typename DISTANCE = DistanceEuclidean> +class FilterSphereLegacy { + using KeyExternal = typename CONVERTER::KeyExternal; + using KeyInternal = typename CONVERTER::KeyInternal; + using ScalarInternal = typename CONVERTER::ScalarInternal; + using ScalarExternal = typename CONVERTER::ScalarExternal; + + static constexpr auto DIM = CONVERTER::DimInternal; + + public: + FilterSphereLegacy( + const KeyExternal& center, + const ScalarExternal& radius, + CONVERTER converter = CONVERTER(), + DISTANCE distance_function = DISTANCE()) + : center_external_{center} + , center_internal_{converter.pre(center)} + , radius_{radius} + , converter_{converter} + , distance_function_{distance_function} {}; + + template + [[nodiscard]] bool IsEntryValid(const KeyInternal&, const BucketT&) const { + // We simulate a legacy filter by returning 'true' for all buckets + return true; + } + + template + [[nodiscard]] bool IsBucketEntryValid(const KeyInternal& key, const T&) const { + KeyExternal point = converter_.post(key); + return distance_function_(center_external_, point) <= radius_; + } + + /* + * Calculate whether AABB encompassing all possible points in the node intersects with the + * sphere. + */ + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { + // we always want to traverse the root node (bits_to_ignore == 64) + + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + return true; + } + + ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; + ScalarInternal node_max_bits = ~node_min_bits; + + KeyInternal closest_in_bounds; + for (dimension_t i = 0; i < DIM; ++i) { + // calculate lower and upper bound for dimension for given node + ScalarInternal lo = prefix[i] & node_min_bits; + ScalarInternal hi = prefix[i] | node_max_bits; + + // choose value closest to center for dimension + closest_in_bounds[i] = std::clamp(center_internal_[i], lo, hi); + } + + KeyExternal closest_point = converter_.post(closest_in_bounds); + return distance_function_(center_external_, closest_point) <= radius_; + } + + private: + const KeyExternal center_external_; + const KeyInternal center_internal_; + const ScalarExternal radius_; + const CONVERTER converter_; + const DISTANCE distance_function_; +}; + +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, double avg_query_result_size_); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state, const Query& query); + void CreateQuery(Query& query); + + const TestGenerator data_type_; + const size_t num_entities_; + const double avg_query_result_size_; + + constexpr double query_endge_length() { + return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + }; + + TestMap tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state, double avg_query_result_size) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, avg_query_result_size_(avg_query_result_size) +, tree_{} +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(num_entities_) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + Query query{}; + for (auto _ : state) { + state.PauseTiming(); + CreateQuery(query); + state.ResumeTiming(); + + QueryWorld(state, query); + } +} + +template +void InsertEntry(TestMap& tree, const PhPointD& point, const payload_t& data) { + tree.emplace(point, data); +} + +bool CheckPosition(const payload_t& entity, const TestPoint& center, double radius) { + const auto& point = entity; + double dx = center[0] - point[0]; + double dy = center[1] - point[1]; + double dz = center[2] - point[2]; + return dx * dx + dy * dy + dz * dz <= radius * radius; +} + +struct CounterCheckPosition { + template + void operator()(const PhPointD<3>& p, const T&) { + n_ += CheckPosition(p, center_, radius_); + } + const TestPoint& center_; + double radius_; + size_t n_; +}; + +struct Counter { + void operator()(const PhPointD<3>&, const payload_t&) { + ++n_; + } + size_t n_; +}; + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + FilterMultiMapSphere filter{query.center, query.radius, tree.converter(), DistanceFn()}; + Counter counter{0}; + tree.for_each(query.box, counter, filter); + return counter.n_; +} + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + FilterMultiMapSphere filter{query.center, query.radius, tree.converter(), DistanceFn()}; + Counter counter{0}; + tree.for_each(counter, filter); + return counter.n_; +} + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + CounterCheckPosition counter{query.center, query.radius, 0}; + tree.for_each(query.box, counter); + return counter.n_; +} + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + FilterMultiMapSphere filter{query.center, query.radius, tree.converter(), DistanceFn()}; + Counter counter{0}; + for (auto it = tree.begin_query(query.box, filter); it != tree.end(); ++it) { + ++counter.n_; + } + return counter.n_; +} + +template +typename std::enable_if::type CountEntries( + TestMap& tree, const Query& query) { + // Legacy: use non-multi-map filter + FilterSphereLegacy filter{query.center, query.radius, tree.converter(), DistanceFn()}; + Counter counter{0}; + tree.for_each(query.box, counter, filter); + return counter.n_; +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + // create data with about 10% duplicate coordinates + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX, 0.8); + for (size_t i = 0; i < num_entities_; ++i) { + InsertEntry(tree_, points_[i], points_[i]); + } + + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + logging::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, const Query& query) { + size_t n = CountEntries(tree_, query); + + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(Query& query) { + double radius = query_endge_length() * 0.5; + for (dimension_t d = 0; d < DIM; ++d) { + auto x = cube_distribution_(random_engine_); + query.box.min()[d] = x - radius; + query.box.max()[d] = x + radius; + query.center[d] = x; + } + query.radius = radius; +} + +} // namespace + +template +void PhTree3DSphereWQ(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::SPHERE_WQ> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3DSphere(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::SPHERE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3DWQ(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::WQ> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3DSphereITWQ(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::SPHERE_IT_WQ> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTree3DLegacyWQ(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::LEGACY_WQ> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, avg_query_result_size +BENCHMARK_CAPTURE(PhTree3DSphereWQ, _100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3DSphere, _100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3DWQ, _100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3DSphereITWQ, _100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3DLegacyWQ, _100, 100.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/update_box_d_benchmark.cc b/benchmark/update_box_d_benchmark.cc similarity index 62% rename from phtree/benchmark/update_box_d_benchmark.cc rename to benchmark/update_box_d_benchmark.cc index ab825e26..5221c7d9 100644 --- a/phtree/benchmark/update_box_d_benchmark.cc +++ b/benchmark/update_box_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "logging.h" -#include "phtree/benchmark/benchmark_util.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include @@ -24,12 +24,14 @@ using namespace improbable::phtree::phbenchmark; namespace { -constexpr int UPDATES_PER_ROUND = 1000; +constexpr size_t UPDATES_PER_ROUND = 1000; constexpr double MOVE_DISTANCE = 10; const double GLOBAL_MAX = 10000; const double BOX_LEN = 10; +enum UpdateType { RELOCATE, ERASE_BY_KEY }; + template using BoxType = PhBoxD; @@ -46,14 +48,12 @@ struct UpdateOp { /* * Benchmark for updating the position of entries. */ -template +template class IndexBenchmark { public: IndexBenchmark( benchmark::State& state, - TestGenerator data_type, - int num_entities, - int updates_per_round = UPDATES_PER_ROUND, + size_t updates_per_round = UPDATES_PER_ROUND, double move_distance = MOVE_DISTANCE); void Benchmark(benchmark::State& state); @@ -75,27 +75,23 @@ class IndexBenchmark { std::uniform_int_distribution<> entity_id_distribution_; }; -template -IndexBenchmark::IndexBenchmark( - benchmark::State& state, - TestGenerator data_type, - int num_entities, - int updates_per_round, - double move_distance) -: data_type_{data_type} -, num_entities_(num_entities) +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, size_t updates_per_round, double move_distance) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) , updates_per_round_(updates_per_round) , move_distance_(move_distance) -, boxes_(num_entities) +, boxes_(num_entities_) , updates_(updates_per_round) , random_engine_{0} -, entity_id_distribution_{0, num_entities - 1} { +, entity_id_distribution_{0, static_cast(num_entities_ - 1)} { logging::SetupDefaultLogging(); SetupWorld(state); } -template -void IndexBenchmark::Benchmark(benchmark::State& state) { +template +void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); BuildUpdates(); @@ -105,12 +101,12 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { } } -template -void IndexBenchmark::SetupWorld(benchmark::State& state) { +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreateBoxData(boxes_, data_type_, num_entities_, 0, GLOBAL_MAX, BOX_LEN); for (size_t i = 0; i < num_entities_; ++i) { - tree_.emplace(boxes_[i], i); + tree_.emplace(boxes_[i], (int)i); } state.counters["total_upd_count"] = benchmark::Counter(0); @@ -118,8 +114,8 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("World setup complete."); } -template -void IndexBenchmark::BuildUpdates() { +template +void IndexBenchmark::BuildUpdates() { for (auto& update : updates_) { int box_id = entity_id_distribution_(random_engine_); update.id_ = box_id; @@ -134,14 +130,37 @@ void IndexBenchmark::BuildUpdates() { } template -void IndexBenchmark::UpdateWorld(benchmark::State& state) { - size_t initial_tree_size = tree_.size(); +size_t UpdateByRelocate(TreeType& tree, std::vector>& updates) { size_t n = 0; - for (auto& update : updates_) { - size_t result_erase = tree_.erase(update.old_); - auto result_emplace = tree_.emplace(update.new_, update.id_); + for (auto& update : updates) { + n += tree.relocate(update.old_, update.new_); + } + return n; +} + +template +size_t UpdateByKey(TreeType& tree, std::vector>& updates) { + size_t n = 0; + for (auto& update : updates) { + size_t result_erase = tree.erase(update.old_); + auto result_emplace = tree.emplace(update.new_, update.id_); n += result_erase == 1 && result_emplace.second; } + return n; +} + +template +void IndexBenchmark::UpdateWorld(benchmark::State& state) { + size_t initial_tree_size = tree_.size(); + size_t n = 0; + switch (UPDATE_TYPE) { + case UpdateType::ERASE_BY_KEY: + n = UpdateByKey(tree_, updates_); + break; + case UpdateType::RELOCATE: + n = UpdateByRelocate(tree_, updates_); + break; + } if (n != updates_.size()) { logging::error("Invalid update count: {}/{}", updates_.size(), n); @@ -159,37 +178,29 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { } // namespace +template +void PhTreeRelocate3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, UpdateType::RELOCATE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + template void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3> benchmark{state, arguments...}; + IndexBenchmark<3, UpdateType::ERASE_BY_KEY> benchmark{state, arguments...}; benchmark.Benchmark(state); } // index type, scenario name, data_type, num_entities, updates_per_round, move_distance -// PhTree3D CUBE -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000) - ->Unit(benchmark::kMillisecond); - -// PhTree3D CLUSTER -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000) +// PhTree with relocate() +BENCHMARK_CAPTURE(PhTreeRelocate3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) +// PhTree with relocate() +BENCHMARK_CAPTURE(PhTree3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/phtree/benchmark/update_d_benchmark.cc b/benchmark/update_d_benchmark.cc similarity index 65% rename from phtree/benchmark/update_d_benchmark.cc rename to benchmark/update_d_benchmark.cc index f358c564..bcfd86ff 100644 --- a/phtree/benchmark/update_d_benchmark.cc +++ b/benchmark/update_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "logging.h" -#include "phtree/benchmark/benchmark_util.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include @@ -29,7 +29,7 @@ std::vector MOVE_DISTANCE = {0, 1.0, 10}; const double GLOBAL_MAX = 10000; -enum UpdateType { ERASE_BY_KEY, ERASE_BY_ITER, EMPLACE_HINT }; +enum UpdateType { RELOCATE, ERASE_BY_KEY, ERASE_BY_ITER, EMPLACE_HINT }; template using PointType = PhPointD; @@ -52,9 +52,7 @@ class IndexBenchmark { public: IndexBenchmark( benchmark::State& state, - TestGenerator data_type, - int num_entities, - int updates_per_round = UPDATES_PER_ROUND, + size_t updates_per_round = UPDATES_PER_ROUND, std::vector move_distance = MOVE_DISTANCE); void Benchmark(benchmark::State& state); @@ -78,19 +76,15 @@ class IndexBenchmark { template IndexBenchmark::IndexBenchmark( - benchmark::State& state, - TestGenerator data_type, - int num_entities, - int updates_per_round, - std::vector move_distance) -: data_type_{data_type} -, num_entities_(num_entities) + benchmark::State& state, size_t updates_per_round, std::vector move_distance) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) , updates_per_round_(updates_per_round) , move_distance_(std::move(move_distance)) -, points_(num_entities) +, points_(num_entities_) , updates_(updates_per_round) , random_engine_{0} -, entity_id_distribution_{0, num_entities - 1} { +, entity_id_distribution_{0, static_cast(num_entities_ - 1)} { logging::SetupDefaultLogging(); SetupWorld(state); } @@ -136,6 +130,15 @@ void IndexBenchmark::BuildUpdates() { } } +template +size_t UpdateByRelocate(TreeType& tree, std::vector>& updates) { + size_t n = 0; + for (auto& update : updates) { + n += tree.relocate(update.old_, update.new_); + } + return n; +} + template size_t UpdateByKey(TreeType& tree, std::vector>& updates) { size_t n = 0; @@ -190,6 +193,9 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { case UpdateType::EMPLACE_HINT: n = UpdateByIterHint(tree_, updates_); break; + case UpdateType::RELOCATE: + n = UpdateByRelocate(tree_, updates_); + break; } if (n != updates_.size()) { @@ -208,6 +214,12 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { } // namespace +template +void PhTreeRelocate3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, UpdateType::RELOCATE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + template void PhTreeEraseKey3D(benchmark::State& state, Arguments&&... arguments) { IndexBenchmark<3, UpdateType::ERASE_BY_KEY> benchmark{state, arguments...}; @@ -227,83 +239,28 @@ void PhTreeEmplaceHint3D(benchmark::State& state, Arguments&&... arguments) { } // index type, scenario name, data_type, num_entities, updates_per_round, move_distance -// PhTree3D CUBE -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000) +// PhTree with relocate() +BENCHMARK_CAPTURE(PhTreeRelocate3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000) - ->Unit(benchmark::kMillisecond); - -// PhTree3D CLUSTER -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) - ->Unit(benchmark::kMillisecond); - -// index type, scenario name, data_type, num_entities, updates_per_round, move_distance -// PhTree3D CUBE -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000) +// PhTree with erase()/emplace +BENCHMARK_CAPTURE(PhTreeEraseKey3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000) +// PhTree with erase(iter) +BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000) +// PhTree with emplace_hint() +BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000) - ->Unit(benchmark::kMillisecond); - -// PhTree3D CLUSTER -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEraseIter3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) - ->Unit(benchmark::kMillisecond); - -// index type, scenario name, data_type, num_entities, updates_per_round, move_distance -// PhTree3D CUBE -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CU_100_of_1K, TestGenerator::CUBE, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CU_100_of_10K, TestGenerator::CUBE, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CU_100_of_100K, TestGenerator::CUBE, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CU_100_of_1M, TestGenerator::CUBE, 1000000) - ->Unit(benchmark::kMillisecond); - -// PhTree3D CLUSTER -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CL_100_of_1K, TestGenerator::CLUSTER, 1000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CL_100_of_10K, TestGenerator::CLUSTER, 10000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CL_100_of_100K, TestGenerator::CLUSTER, 100000) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTreeEmplaceHint3D, UPDATE_CL_100_of_1M, TestGenerator::CLUSTER, 1000000) - ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/phtree/benchmark/update_mm_box_d_benchmark.cc b/benchmark/update_mm_box_d_benchmark.cc similarity index 79% rename from phtree/benchmark/update_mm_box_d_benchmark.cc rename to benchmark/update_mm_box_d_benchmark.cc index 13f58b5e..bcff9ad2 100644 --- a/phtree/benchmark/update_mm_box_d_benchmark.cc +++ b/benchmark/update_mm_box_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "benchmark_util.h" -#include "logging.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include "phtree/phtree_multimap.h" #include @@ -35,7 +35,7 @@ std::vector MOVE_DISTANCE = {0, 1.0, 10}; const double GLOBAL_MAX = 10000; const double BOX_LEN = 100; -enum Scenario { TREE_WITH_MAP, MULTI_MAP }; +enum Scenario { ERASE_EMPLACE, MM_BPT_RELOCATE, MM_SET_RELOCATE }; using payload_t = scalar_64_t; @@ -46,9 +46,16 @@ using CONVERTER = ConverterBoxIEEE; template using TestMap = typename std::conditional_t< - SCENARIO == TREE_WITH_MAP, + SCENARIO == ERASE_EMPLACE, PhTreeBoxD>, - PhTreeMultiMapBoxD>>; + typename std::conditional_t< + SCENARIO == MM_BPT_RELOCATE, + PhTreeMultiMapBoxD< + DIM, + payload_t, + CONVERTER, + b_plus_tree_hash_set>, + PhTreeMultiMapBoxD, std::set>>>; template struct UpdateOp { @@ -112,19 +119,25 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { template void InsertEntry( - TestMap& tree, const PhBoxD& point, payload_t data) { + TestMap& tree, const PhBoxD& point, payload_t data) { BucketType& bucket = tree.emplace(point).first; bucket.emplace(data); } template void InsertEntry( - TestMap& tree, const PhBoxD& point, payload_t data) { + TestMap& tree, const PhBoxD& point, payload_t data) { + tree.emplace(point, data); +} + +template +void InsertEntry( + TestMap& tree, const PhBoxD& point, payload_t data) { tree.emplace(point, data); } template -typename std::enable_if::type UpdateEntry( +typename std::enable_if::type UpdateEntry( TestMap& tree, std::vector>& updates) { size_t n = 0; for (auto& update : updates) { @@ -151,7 +164,7 @@ typename std::enable_if::type Updat } template -typename std::enable_if::type UpdateEntry( +typename std::enable_if::type UpdateEntry( TestMap& tree, std::vector>& updates) { size_t n = 0; for (auto& update : updates) { @@ -202,7 +215,7 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { logging::error("Invalid update count: {}/{}", updates_.size(), n); } - if constexpr (SCENARIO == MULTI_MAP) { + if constexpr (SCENARIO == MM_BPT_RELOCATE) { (void)initial_tree_size; if (tree_.size() != num_entities_) { logging::error("Invalid index size after update: {}/{}", tree_.size(), num_entities_); @@ -222,26 +235,38 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { } // namespace template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; +void PhTreeBox3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::ERASE_EMPLACE> benchmark{state, arguments...}; benchmark.Benchmark(state); } template -void PhTreeMultiMap3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; +void PhTreeMultiMapBox3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MM_BPT_RELOCATE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeMultiMapStdBox3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MM_SET_RELOCATE> benchmark{state, arguments...}; benchmark.Benchmark(state); } // index type, scenario name, data_type, num_entities, updates_per_round, move_distance // PhTree -BENCHMARK_CAPTURE(PhTree3D, UPDATE_1000, UPDATES_PER_ROUND) +BENCHMARK_CAPTURE(PhTreeBox3D, UPDATE_1000, UPDATES_PER_ROUND) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); // PhTreeMultiMap -BENCHMARK_CAPTURE(PhTreeMultiMap3D, UPDATE_1000, UPDATES_PER_ROUND) +BENCHMARK_CAPTURE(PhTreeMultiMapBox3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTreeMultiMap with std::map +BENCHMARK_CAPTURE(PhTreeMultiMapStdBox3D, UPDATE_1000, UPDATES_PER_ROUND) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); diff --git a/phtree/benchmark/update_mm_d_benchmark.cc b/benchmark/update_mm_d_benchmark.cc similarity index 71% rename from phtree/benchmark/update_mm_d_benchmark.cc rename to benchmark/update_mm_d_benchmark.cc index f3149403..6957e7c3 100644 --- a/phtree/benchmark/update_mm_d_benchmark.cc +++ b/benchmark/update_mm_d_benchmark.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "benchmark_util.h" -#include "logging.h" +#include "benchmark/benchmark_util.h" +#include "benchmark/logging.h" #include "phtree/phtree.h" #include "phtree/phtree_multimap.h" #include @@ -34,7 +34,7 @@ std::vector MOVE_DISTANCE = {0, 1.0, 10}; const double GLOBAL_MAX = 10000; -enum Scenario { TREE_WITH_MAP, MULTI_MAP }; +enum Scenario { ERASE_EMPLACE, MM_BPT_RELOCATE, MM_SET_RELOCATE, MM_SET_RELOCATE_IF }; using payload_t = scalar_64_t; @@ -48,9 +48,12 @@ using CONVERTER = ConverterIEEE; template using TestMap = typename std::conditional_t< - SCENARIO == TREE_WITH_MAP, + SCENARIO == ERASE_EMPLACE, PhTreeD>, - PhTreeMultiMapD>>; + typename std::conditional_t< + SCENARIO == MM_BPT_RELOCATE, + PhTreeMultiMapD, b_plus_tree_hash_set>, + PhTreeMultiMapD, std::set>>>; template struct UpdateOp { @@ -114,19 +117,25 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { template void InsertEntry( - TestMap& tree, const PointType& point, payload_t data) { + TestMap& tree, const PointType& point, payload_t data) { BucketType& bucket = tree.emplace(point).first; bucket.emplace(data); } template void InsertEntry( - TestMap& tree, const PointType& point, payload_t data) { + TestMap& tree, const PointType& point, payload_t data) { + tree.emplace(point, data); +} + +template +void InsertEntry( + TestMap& tree, const PointType& point, payload_t data) { tree.emplace(point, data); } template -typename std::enable_if::type UpdateEntry( +typename std::enable_if::type UpdateEntry( TestMap& tree, std::vector>& updates) { size_t n = 0; for (auto& update : updates) { @@ -138,8 +147,6 @@ typename std::enable_if::type Updat continue; } - // TODO implement erase_hint or find_hint or something? - // Entry is already inserted, now remove old entry. auto iter_old_bucket = tree.find(update.old_); assert(iter_old_bucket != tree.end()); @@ -153,8 +160,10 @@ typename std::enable_if::type Updat } template -typename std::enable_if::type UpdateEntry( - TestMap& tree, std::vector>& updates) { +typename std::enable_if< + SCENARIO == Scenario::MM_BPT_RELOCATE || SCENARIO == Scenario::MM_SET_RELOCATE, + size_t>::type +UpdateEntry(TestMap& tree, std::vector>& updates) { size_t n = 0; for (auto& update : updates) { n += tree.relocate(update.old_, update.new_, update.id_); @@ -162,6 +171,17 @@ typename std::enable_if::type UpdateEnt return n; } +template +typename std::enable_if::type UpdateEntry( + TestMap& tree, std::vector>& updates) { + size_t n = 0; + for (auto& update : updates) { + n += tree.relocate_if( + update.old_, update.new_, [&update](const payload_t& v) { return v == update.id_; }); + } + return n; +} + template void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); @@ -215,26 +235,50 @@ void IndexBenchmark::UpdateWorld(benchmark::State& state) { } // namespace template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; +void PhTreeMMRelocateIfStdSet3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MM_SET_RELOCATE_IF> benchmark{state, arguments...}; benchmark.Benchmark(state); } template -void PhTreeMultiMap3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; +void PhTreeMMRelocateBpt3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MM_BPT_RELOCATE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeMMRelocateStdSet3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::MM_SET_RELOCATE> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeMMEraseEmplace3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::ERASE_EMPLACE> benchmark{state, arguments...}; benchmark.Benchmark(state); } // index type, scenario name, data_type, num_entities, updates_per_round, move_distance -// PhTree -BENCHMARK_CAPTURE(PhTree3D, UPDATE_1000, UPDATES_PER_ROUND) +// PhTreeMultiMap +BENCHMARK_CAPTURE(PhTreeMMRelocateIfStdSet3D, UPDATE_1000, UPDATES_PER_ROUND) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -// PhTreeMultiMap -BENCHMARK_CAPTURE(PhTreeMultiMap3D, UPDATE_1000, UPDATES_PER_ROUND) +// PhTreeMultiMap with b_plus_tree_hash_map +BENCHMARK_CAPTURE(PhTreeMMRelocateBpt3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTreeMultiMap with std::set +BENCHMARK_CAPTURE(PhTreeMMRelocateStdSet3D, UPDATE_1000, UPDATES_PER_ROUND) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +// PhTree (manual bucket handling) +BENCHMARK_CAPTURE(PhTreeMMEraseEmplace3D, UPDATE_1000, UPDATES_PER_ROUND) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 370887f6..ae1345c4 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,6 +1,10 @@ cmake_minimum_required(VERSION 3.14) -project(Example) +project(phtree-examples) -set(SOURCE_FILES example.cc) -add_executable(Example ${SOURCE_FILES}) -target_link_libraries(Example phtree) +if (WIN32 OR UNIX) + add_executable(Example example.cc) + target_include_directories(Example PRIVATE ${PROJECT_SOURCE_DIR}/..) +else () + add_executable(Example example.cc) + target_link_libraries(Example phtree) +endif () \ No newline at end of file diff --git a/examples/example.cc b/examples/example.cc index b0ceb5e9..aecbb049 100644 --- a/examples/example.cc +++ b/examples/example.cc @@ -14,11 +14,48 @@ * limitations under the License. */ -#include "../phtree/phtree.h" +#include "phtree/phtree.h" +#include "phtree/phtree_multimap.h" +#include #include +#include using namespace improbable::phtree; +int relocate_example() { + //auto tree = PhTreeMultiMapD<2, int, ConverterIEEE<2>, std::unordered_set>(); + auto tree = PhTreeMultiMapD<2, int, ConverterMultiply<2, 1, 200>, std::unordered_set>(); + std::vector> vecPos; + int dim = 1000; + + int num = 30000; + for (int i = 0; i < num; ++i) { + PhPointD<2> p = {(double)(rand() % dim), (double)(rand() % dim)}; + vecPos.push_back(p); + tree.emplace(p, i); + } + + long T = 0; + int nT = 0; + while (true) { + auto t1 = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < num; ++i) { + PhPointD<2>& p = vecPos[i]; + PhPointD<2> newp = {p[0] + 1, p[1] + 1}; + tree.relocate(p, newp, i, false); + p = newp; + } + auto t2 = std::chrono::high_resolution_clock::now(); + auto s = std::chrono::duration_cast(t2 - t1); + ++nT; + T += (long)s.count() / 1000; + std::cout << s.count() << " " << (T / nT) + << " msec/num= " << (s.count() / (double)num) << std::endl; + } + + return 0; +} + int main() { std::cout << "PH-Tree example with 3D `double` coordinates." << std::endl; PhPointD<3> p1({1, 1, 1}); @@ -55,4 +92,8 @@ int main() { std::cout << "ID at " << p4b << ": " << tree.find(p4b).second() << std::endl; std::cout << "Done." << std::endl; -} \ No newline at end of file + + //relocate_example(); + + return 0; +} diff --git a/phtree/BUILD b/phtree/BUILD index fe48ccc8..727b2621 100644 --- a/phtree/BUILD +++ b/phtree/BUILD @@ -16,198 +16,3 @@ cc_library( "//phtree/v16", ], ) - -cc_test( - name = "phtree_test", - timeout = "long", - srcs = [ - "phtree_test.cc", - ], - linkstatic = True, - deps = [ - ":phtree", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "phtree_test_const_values", - timeout = "long", - srcs = [ - "phtree_test_const_values.cc", - ], - linkstatic = True, - deps = [ - ":phtree", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "phtree_test_ptr_values", - timeout = "long", - srcs = [ - "phtree_test_ptr_values.cc", - ], - linkstatic = True, - deps = [ - ":phtree", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "phtree_test_unique_ptr_values", - timeout = "long", - srcs = [ - "phtree_test_unique_ptr_values.cc", - ], - linkstatic = True, - deps = [ - ":phtree", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "phtree_multimap_test_move_only_values", - timeout = "long", - srcs = [ - "phtree_test_unique_ptr_values.cc", - ], - linkstatic = True, - deps = [ - ":phtree", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "phtree_d_test", - timeout = "long", - srcs = [ - "phtree_d_test.cc", - ], - linkstatic = True, - deps = [ - ":phtree", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "phtree_d_test_filter", - timeout = "long", - srcs = [ - "phtree_d_test_filter.cc", - ], - linkstatic = True, - deps = [ - ":phtree", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "phtree_d_test_custom_key", - timeout = "long", - srcs = [ - "phtree_d_test_custom_key.cc", - ], - linkstatic = True, - deps = [ - ":phtree", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "phtree_d_test_preprocessor", - timeout = "long", - srcs = [ - "phtree_d_test_preprocessor.cc", - ], - linkstatic = True, - deps = [ - ":phtree", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "phtree_multimap_d_test", - timeout = "long", - srcs = [ - "phtree_multimap_d_test.cc", - ], - linkstatic = True, - deps = [ - ":phtree", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "phtree_box_d_test_query_types", - timeout = "long", - srcs = [ - "phtree_box_d_test_query_types.cc", - ], - linkstatic = True, - deps = [ - ":phtree", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "phtree_box_d_test", - timeout = "long", - srcs = [ - "phtree_box_d_test.cc", - ], - linkstatic = True, - deps = [ - ":phtree", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "phtree_multimap_box_d_test", - timeout = "long", - srcs = [ - "phtree_multimap_box_d_test.cc", - ], - linkstatic = True, - deps = [ - ":phtree", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "phtree_f_test", - timeout = "long", - srcs = [ - "phtree_f_test.cc", - ], - linkstatic = True, - deps = [ - ":phtree", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "phtree_box_f_test", - timeout = "long", - srcs = [ - "phtree_box_f_test.cc", - ], - linkstatic = True, - deps = [ - ":phtree", - "//phtree/testing/gtest_main", - ], -) diff --git a/phtree/CMakeLists.txt b/phtree/CMakeLists.txt index 53761cd5..150b1bc8 100644 --- a/phtree/CMakeLists.txt +++ b/phtree/CMakeLists.txt @@ -5,5 +5,8 @@ add_library(phtree STATIC "") add_subdirectory(common) add_subdirectory(v16) +#target_include_directories(phtree PUBLIC phtree) +target_include_directories(phtree PUBLIC ${PROJECT_SOURCE_DIR}/..) + set_target_properties(phtree PROPERTIES LINKER_LANGUAGE CXX) diff --git a/phtree/benchmark/logging.cc b/phtree/benchmark/logging.cc deleted file mode 100644 index 51803f0c..00000000 --- a/phtree/benchmark/logging.cc +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) Improbable Worlds Ltd, All Rights Reserved -#include "logging.h" - -namespace improbable::phtree::phbenchmark::logging { - -void SetupDefaultLogging() { - SetupLogging({}, spdlog::level::warn); -} - -void SetupLogging(std::vector sinks, spdlog::level::level_enum log_level) { - auto& console_sink = sinks.emplace_back(std::make_shared()); - console_sink->set_level(log_level); - - // Find the minimum log level, in case one of the sinks passed to us has a lower log level. - const auto& sink_with_lowest_log_level = *std::min_element( - sinks.begin(), - sinks.end(), - [](const spdlog::sink_ptr& a, const spdlog::sink_ptr& b) -> bool { - return a->level() < b->level(); - }); - spdlog::level::level_enum min_log_level = - std::min(sink_with_lowest_log_level->level(), log_level); - - // Create the external logger, worker logger and the internal (default) logger from the same log - // sinks. Each logsink can use `GetLoggerTypeFromMessage` to determine which logger a message - // was logged to. - spdlog::set_default_logger( - std::make_shared(kInternalLoggerName, sinks.begin(), sinks.end())); - spdlog::set_level(min_log_level); - spdlog::flush_on(min_log_level); -} - -} // namespace improbable::phtree::phbenchmark::logging diff --git a/phtree/common/BUILD b/phtree/common/BUILD index 7ef3b6bf..b25588b1 100644 --- a/phtree/common/BUILD +++ b/phtree/common/BUILD @@ -11,6 +11,8 @@ cc_library( "distance.h", "filter.h", "flat_array_map.h", + "b_plus_tree_hash_map.h", + "b_plus_tree_map.h", "flat_sparse_map.h", "tree_stats.h", ], @@ -20,107 +22,3 @@ cc_library( deps = [ ], ) - -cc_test( - name = "base_types_test", - timeout = "long", - srcs = [ - "base_types_test.cc", - ], - linkstatic = True, - deps = [ - ":common", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "bits_test", - timeout = "long", - srcs = [ - "bits_test.cc", - ], - linkstatic = True, - deps = [ - ":common", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "common_test", - timeout = "long", - srcs = [ - "common_test.cc", - ], - linkstatic = True, - deps = [ - ":common", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "distance_test", - timeout = "long", - srcs = [ - "distance_test.cc", - ], - linkstatic = True, - deps = [ - ":common", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "filter_test", - timeout = "long", - srcs = [ - "filter_test.cc", - ], - linkstatic = True, - deps = [ - ":common", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "flat_array_map_test", - timeout = "long", - srcs = [ - "flat_array_map_test.cc", - ], - linkstatic = True, - deps = [ - ":common", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "flat_sparse_map_test", - timeout = "long", - srcs = [ - "flat_sparse_map_test.cc", - ], - linkstatic = True, - deps = [ - ":common", - "//phtree/testing/gtest_main", - ], -) - -cc_test( - name = "preprocessor_test", - timeout = "long", - srcs = [ - "converter_test.cc", - ], - linkstatic = True, - deps = [ - ":common", - "//phtree/testing/gtest_main", - ], -) diff --git a/phtree/common/b_plus_tree_hash_map.h b/phtree/common/b_plus_tree_hash_map.h new file mode 100644 index 00000000..f3ea6028 --- /dev/null +++ b/phtree/common/b_plus_tree_hash_map.h @@ -0,0 +1,942 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_B_PLUS_TREE_HASH_MAP_H +#define PHTREE_COMMON_B_PLUS_TREE_HASH_MAP_H + +#include "bits.h" +#include +#include +#include + +/* + * PLEASE do not include this file directly, it is included via common.h. + * + * This file contains the B+tree implementation which is used in high-dimensional nodes in + * the PH-Tree. + */ +namespace improbable::phtree { + +/* + * The b_plus_tree_hash_map is a B+tree implementation that uses a hierarchy of horizontally + * connected nodes for fast traversal through all entries. + * + * Behavior + * ======== + * This is a hash set/map. It behaves just like std::unordered_set / std::unordered_map, minus + * some API functions. + * The set/map is ordered by their hash. Entries with identical hash have no specific ordering + * but the order is stable with respect to insertion/removal of other entries. + * + * + * Rationale + * ========= + * This implementations is optimized for small entry count (for the multi-map PH-tree we + * expect small numbers of entries that actually have identical positions), however it should + * scale well with large entry counts (it is a tree, so there is no need for rehashing). + * Benchmarks show 10%-20% performance improvements for relocate() when using this custom set/map. + * + * + * Internals + * ========= + * The individual nodes have at most M entries. + * The tree has O(log n) lookup and O(M log n) insertion/removal time complexity, + * space complexity is O(n). + * + * Tree structure: + * - Inner nodes: have other nodes as children; their key of an entry represents the highest + * key of any subnode in that entry + * - Leaf nodes: have values as children; their key represents the key of a key/value pair + * - Every node is either a leaf (l-node; contains values) or an inner node + * (n-node; contains nodes). + * - "Sibling" nodes refer to the nodes linked by prev_node_ or next_node_. Sibling nodes + * usually have the same parent but may also be children of their parent's siblings. + * + * - Guarantee: All leaf nodes are horizontally connected + * - Inner nodes may or may not be connected. Specifically: + * - New inner nodes will be assigned siblings from the same parent or the parent's sibling + * (if the new node is the first or last node in a parent) + * - There is no guarantee that inner nodes know about their potential sibling (=other inner + * nodes that own bordering values/child-nodes). + * - There is no guarantee that siblings are on the same depth of the tree. + * - The tree is not balanced + * + */ +template , typename PredT = std::equal_to> +class b_plus_tree_hash_set { + class bpt_node_base; + template + class bpt_node_data; + class bpt_node_leaf; + class bpt_node_inner; + class bpt_iterator; + + using hash_t = std::uint32_t; + + using bpt_entry_inner = std::pair; + using bpt_entry_leaf = std::pair; + + using IterT = bpt_iterator; + using NodeT = bpt_node_base; + using NLeafT = bpt_node_leaf; + using NInnerT = bpt_node_inner; + using LeafIteratorT = decltype(std::vector().begin()); + using TreeT = b_plus_tree_hash_set; + + public: + explicit b_plus_tree_hash_set() : root_{new NLeafT(nullptr, nullptr, nullptr)}, size_{0} {}; + + b_plus_tree_hash_set(const b_plus_tree_hash_set& other) : size_{other.size_} { + root_ = other.root_->is_leaf() ? new NLeafT(*other.root_->as_leaf()) + : new NInnerT(*other.root_->as_inner()); + } + + b_plus_tree_hash_set(b_plus_tree_hash_set&& other) noexcept + : root_{other.root_}, size_{other.size_} { + other.root_ = nullptr; + other.size_ = 0; + } + + b_plus_tree_hash_set& operator=(const b_plus_tree_hash_set& other) { + assert(this != &other); + delete root_; + root_ = other.root_->is_leaf() ? new NLeafT(*other.root_->as_leaf()) + : new NInnerT(*other.root_->as_inner()); + size_ = other.size_; + return *this; + } + + b_plus_tree_hash_set& operator=(b_plus_tree_hash_set&& other) noexcept { + delete root_; + root_ = other.root_; + other.root_ = nullptr; + size_ = other.size_; + other.size_ = 0; + return *this; + } + + ~b_plus_tree_hash_set() { + delete root_; + root_ = nullptr; + } + + [[nodiscard]] auto find(const T& value) { + auto node = root_; + auto hash = (hash_t)HashT{}(value); + while (!node->is_leaf()) { + node = node->as_inner()->find(hash); + if (node == nullptr) { + return end(); + } + } + return node->as_leaf()->find(hash, value); + } + + [[nodiscard]] auto find(const T& value) const { + return const_cast(*this).find(value); + } + + [[nodiscard]] size_t count(const T& value) const { + return const_cast(*this).find(value) != end(); + } + + [[nodiscard]] auto begin() noexcept { + return IterT(root_); + } + + [[nodiscard]] auto begin() const noexcept { + return IterT(root_); + } + + [[nodiscard]] auto cbegin() const noexcept { + return IterT(root_); + } + + [[nodiscard]] auto end() noexcept { + return IterT(); + } + + [[nodiscard]] auto end() const noexcept { + return IterT(); + } + + template + auto emplace(Args&&... args) { + T t(std::forward(args)...); + hash_t hash = (hash_t)HashT{}(t); + auto node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->find_or_last(hash); + } + return node->as_leaf()->try_emplace(hash, *this, size_, std::move(t)); + } + + template + auto emplace_hint(const IterT& hint, Args&&... args) { + if (empty() || hint.is_end()) { + return emplace(std::forward(args)...).first; + } + assert(hint.node_->is_leaf()); + + T t(std::forward(args)...); + auto hash = (hash_t)HashT{}(t); + auto node = hint.node_->as_leaf(); + + // The following may drop a valid hint but is easy to check. + if (node->data_.begin()->first > hash || (node->data_.end() - 1)->first < hash) { + return emplace(std::move(t)).first; + } + + return node->try_emplace(hash, *this, size_, std::move(t)).first; + } + + size_t erase(const T& value) { + auto node = root_; + auto hash = (hash_t)HashT{}(value); + while (!node->is_leaf()) { + node = node->as_inner()->find(hash); + if (node == nullptr) { + return 0; + } + } + auto n = node->as_leaf()->erase_key(hash, value, *this); + size_ -= n; + return n; + } + + auto erase(const IterT& iterator) { + assert(iterator != end()); + --size_; + auto result = iterator.node_->erase_it(iterator.iter_, *this); + if (result.node_) { + return IterT(static_cast(result.node_), result.iter_); + } + return IterT(); + } + + [[nodiscard]] size_t size() const noexcept { + return size_; + } + + [[nodiscard]] bool empty() const noexcept { + return size_ == 0; + } + + void _check() { + size_t count = 0; + NLeafT* prev_leaf = nullptr; + hash_t known_min = std::numeric_limits::max(); + root_->_check(count, nullptr, prev_leaf, known_min, 0); + assert(count == size()); + } + + private: + class bpt_node_base { + public: + explicit bpt_node_base(bool is_leaf, NInnerT* parent) noexcept + : is_leaf_{is_leaf}, parent_{parent} {} + + virtual ~bpt_node_base() noexcept = default; + + [[nodiscard]] inline bool is_leaf() const noexcept { + return is_leaf_; + } + + [[nodiscard]] inline NInnerT* as_inner() noexcept { + assert(!is_leaf_); + return static_cast(this); + } + + [[nodiscard]] inline NLeafT* as_leaf() noexcept { + assert(is_leaf_); + return static_cast(this); + } + + virtual void _check(size_t&, NInnerT*, NLeafT*&, hash_t&, hash_t) = 0; + + public: + const bool is_leaf_; + NInnerT* parent_; + }; + + template + class bpt_node_data : public bpt_node_base { + using DataIteratorT = decltype(std::vector().begin()); + friend IterT; + + constexpr static size_t M_leaf = 16; + constexpr static size_t M_inner = 16; + // A value >2 requires a code change to move > 1 entry when merging. + constexpr static size_t M_leaf_min = 2; // std::max((size_t)2, M_leaf >> 2); + constexpr static size_t M_inner_min = 2; // std::max((size_t)2, M_inner >> 2); + constexpr static size_t M_leaf_init = 8; + constexpr static size_t M_inner_init = 4; + + public: + explicit bpt_node_data(bool is_leaf, NInnerT* parent, ThisT* prev, ThisT* next) noexcept + : bpt_node_base(is_leaf, parent), data_{}, prev_node_{prev}, next_node_{next} { + data_.reserve(this->M_init()); + } + + virtual ~bpt_node_data() noexcept = default; + + [[nodiscard]] inline size_t M_min() { + return this->is_leaf_ ? M_leaf_min : M_inner_min; + } + + [[nodiscard]] inline size_t M_max() { + return this->is_leaf_ ? M_leaf : M_inner; + } + + [[nodiscard]] inline size_t M_init() { + return this->is_leaf_ ? M_leaf_init : M_inner_init; + } + + [[nodiscard]] auto lower_bound(hash_t hash) noexcept { + return std::lower_bound( + data_.begin(), data_.end(), hash, [](EntryT& left, const hash_t hash) { + return left.first < hash; + }); + } + + [[nodiscard]] size_t size() const noexcept { + return data_.size(); + } + + struct EraseResult { + bpt_node_data* node_ = nullptr; + DataIteratorT iter_; + }; + + auto erase_entry(DataIteratorT it_to_erase, TreeT& tree) { + using ER = EraseResult; + auto& parent_ = this->parent_; + hash_t max_key_old = data_.back().first; + + auto result = data_.erase(it_to_erase); + bool tail_entry_erased = result == data_.end(); + if (parent_ == nullptr) { + if constexpr (std::is_same_v) { + if (data_.size() < 2) { + auto remaining_node = data_.begin()->second; + data_.begin()->second = nullptr; + remaining_node->parent_ = nullptr; + tree.root_ = remaining_node; + delete this; + } + } + return tail_entry_erased ? ER{} : ER{this, result}; + } + + if (data_.empty()) { + // Nothing to merge, just remove node. This should be rare, i.e. only happens when + // a rare 1-entry node has its last entry removed. + remove_from_siblings(); + parent_->remove_node(max_key_old, this, tree); + return next_node_ == nullptr ? ER{} : ER{next_node_, next_node_->data_.begin()}; + } + + if (data_.size() < this->M_min()) { + // merge + if (prev_node_ != nullptr && prev_node_->data_.size() < this->M_max()) { + remove_from_siblings(); + auto& prev_data = prev_node_->data_; + if constexpr (std::is_same_v) { + prev_data.emplace_back(std::move(data_[0])); + } else { + data_[0].second->parent_ = prev_node_; + prev_data.emplace_back(std::move(data_[0])); + data_[0].second = nullptr; + } + auto prev_node = prev_node_; // create copy because (this) will be deleted + auto next_node = next_node_; // create copy because (this) will be deleted + parent_->remove_node(max_key_old, this, tree); + if (prev_node->parent_ != nullptr) { + hash_t old1 = (prev_data.end() - 2)->first; + hash_t new1 = (prev_data.end() - 1)->first; + prev_node->parent_->update_key(old1, new1, prev_node); + } + if (!tail_entry_erased) { + return ER{prev_node, --prev_data.end()}; + } + return next_node == nullptr ? ER{} : ER{next_node, next_node->data_.begin()}; + } else if (next_node_ != nullptr && next_node_->data_.size() < this->M_max()) { + remove_from_siblings(); + auto* next_node = next_node_; + auto& next_data = next_node_->data_; + if constexpr (std::is_same_v) { + next_data.emplace(next_data.begin(), std::move(data_[0])); + } else { + data_[0].second->parent_ = next_node_; + next_data.emplace(next_data.begin(), std::move(data_[0])); + data_[0].second = nullptr; + } + parent_->remove_node(max_key_old, this, tree); + if (tail_entry_erased) { + return ER{next_node, next_data.begin() + 1}; + } + return next_node == nullptr ? ER() : ER{next_node, next_data.begin()}; + } + // This node is too small but there is nothing we can do. + } + if (tail_entry_erased) { + parent_->update_key(max_key_old, data_.back().first, this); + return next_node_ == nullptr ? ER() : ER{next_node_, next_node_->data_.begin()}; + } + return ER{this, result}; + } + + /* + * Check whether a split is required and, if so, perform it. + * It returns the node to which the new entry should be added. + */ + ThisT* check_split(hash_t key_to_add, TreeT& tree) { + if (data_.size() < this->M_max()) { + if (this->parent_ != nullptr && key_to_add > data_.back().first) { + this->parent_->update_key(data_.back().first, key_to_add, this); + } + return static_cast(this); + } + return this->split_node(key_to_add, tree); + } + + void _check_data(NInnerT* parent, hash_t known_max) { + (void)parent; + (void)known_max; + // assert(parent_ == nullptr || data_.size() >= M_min); + assert(this->parent_ == parent); + if (this->data_.empty()) { + assert(parent == nullptr); + return; + } + assert(this->parent_ == nullptr || known_max == this->data_.back().first); + } + + private: + ThisT* split_node(hash_t key_to_add, TreeT& tree) { + auto max_key = data_.back().first; + if (this->parent_ == nullptr) { + auto* new_parent = new NInnerT(nullptr, nullptr, nullptr); + new_parent->emplace_back(max_key, this); + tree.root_ = new_parent; + this->parent_ = new_parent; + } + + // create new node + auto* node2 = new ThisT(this->parent_, static_cast(this), next_node_); + if (next_node_ != nullptr) { + next_node_->prev_node_ = node2; + } + next_node_ = node2; + + // populate new node + // TODO Optimize populating new node: move 1st part, insert new value, move 2nd part...? + auto split_pos = this->M_max() >> 1; + node2->data_.insert( + node2->data_.end(), + std::make_move_iterator(data_.begin() + split_pos), + std::make_move_iterator(data_.end())); + data_.erase(data_.begin() + split_pos, data_.end()); + + if constexpr (std::is_same_v) { + for (auto& e : node2->data_) { + e.second->parent_ = node2; + } + } + + // Add node to parent + auto split_key = data_.back().first; + this->parent_->update_key_and_add_node( + max_key, split_key, std::max(max_key, key_to_add), this, node2, tree); + + // Return node for insertion of new value + return key_to_add > split_key ? node2 : static_cast(this); + } + + void remove_from_siblings() { + if (next_node_ != nullptr) { + next_node_->prev_node_ = prev_node_; + } + if (prev_node_ != nullptr) { + prev_node_->next_node_ = next_node_; + } + } + + public: + std::vector data_; + ThisT* prev_node_; + ThisT* next_node_; + }; + + class bpt_node_leaf : public bpt_node_data { + public: + explicit bpt_node_leaf(NInnerT* parent, NLeafT* prev, NLeafT* next) noexcept + : bpt_node_data(true, parent, prev, next) {} + + ~bpt_node_leaf() noexcept = default; + + [[nodiscard]] IterT find(hash_t hash, const T& value) noexcept { + PredT equals{}; + IterT iter_full(this, this->lower_bound(hash)); + while (!iter_full.is_end() && iter_full.hash() == hash) { + if (equals(*iter_full, value)) { + return iter_full; + } + ++iter_full; + } + return IterT(); + } + + [[nodiscard]] auto lower_bound_value(hash_t hash, const T& value) noexcept { + PredT equals{}; + IterT iter_full(this, this->lower_bound(hash)); + while (!iter_full.is_end() && iter_full.hash() == hash) { + if (equals(*iter_full, value)) { + break; + } + ++iter_full; + } + return iter_full; + } + + auto try_emplace(hash_t hash, TreeT& tree, size_t& entry_count, T&& t) { + auto it = this->lower_bound(hash); + if (it != this->data_.end() && it->first == hash) { + // Hash collision ! + PredT equals{}; + IterT full_iter(this, it); + while (!full_iter.is_end() && full_iter.hash() == hash) { + if (equals(*full_iter, t)) { + return std::make_pair(full_iter, false); + } + ++full_iter; + } + } + ++entry_count; + auto old_pos = it - this->data_.begin(); + auto dest = this->check_split(hash, tree); + if (dest != this) { + // The insertion pos in `dest` can be calculated: + it = dest->data_.begin() + (old_pos - this->data_.size()); + } + auto it2 = dest->data_.emplace(it, hash, std::move(t)); + return std::make_pair(IterT(dest, it2), true); + } + + bool erase_key(hash_t hash, const T& value, TreeT& tree) { + auto iter = this->lower_bound_value(hash, value); + if (!iter.is_end() && PredT{}(*iter, value)) { + iter.node_->erase_entry(iter.iter_, tree); + return true; + } + return false; + } + + auto erase_it(LeafIteratorT iter, TreeT& tree) { + return this->erase_entry(iter, tree); + } + + void _check( + size_t& count, + NInnerT* parent, + NLeafT*& prev_leaf, + hash_t& known_min, + hash_t known_max) { + this->_check_data(parent, known_max); + + assert(prev_leaf == this->prev_node_); + for (auto& e : this->data_) { + assert(count == 0 || e.first >= known_min); + assert(this->parent_ == nullptr || e.first <= known_max); + ++count; + known_min = e.first; + } + prev_leaf = this; + } + }; + + class bpt_node_inner : public bpt_node_data { + public: + explicit bpt_node_inner(NInnerT* parent, NInnerT* prev, NInnerT* next) noexcept + : bpt_node_data(false, parent, prev, next) {} + + ~bpt_node_inner() noexcept { + for (auto& e : this->data_) { + if (e.second != nullptr) { + delete e.second; + } + } + } + + [[nodiscard]] auto lower_bound_node(hash_t hash, const NodeT* node) noexcept { + auto it = this->lower_bound(hash); + while (it != this->data_.end() && it->first == hash) { + if (it->second == node) { + return it; + } + ++it; + } + return this->data_.end(); + } + + [[nodiscard]] NodeT* find(hash_t hash) noexcept { + auto it = this->lower_bound(hash); + return it != this->data_.end() ? it->second : nullptr; + } + + [[nodiscard]] NodeT* find_or_last(hash_t hash) noexcept { + auto it = this->lower_bound(hash); + return it != this->data_.end() ? it->second : this->data_.back().second; + } + + void emplace_back(hash_t hash, NodeT* node) { + this->data_.emplace_back(hash, node); + } + + void _check( + size_t& count, + NInnerT* parent, + NLeafT*& prev_leaf, + hash_t& known_min, + hash_t known_max) { + this->_check_data(parent, known_max); + + assert(this->parent_ == nullptr || known_max == this->data_.back().first); + auto prev_key = this->data_[0].first; + int n = 0; + for (auto& e : this->data_) { + assert(n == 0 || e.first >= prev_key); + e.second->_check(count, this, prev_leaf, known_min, e.first); + assert(this->parent_ == nullptr || e.first <= known_max); + prev_key = e.first; + ++n; + } + } + + void update_key(hash_t old_key, hash_t new_key, NodeT* node) { + if (old_key == new_key) { + return; // This can happen due to multiple entries with same hash. + } + assert(new_key != old_key); + auto it = this->lower_bound_node(old_key, node); + assert(it != this->data_.end()); + assert(it->first == old_key); + it->first = new_key; + if (this->parent_ != nullptr && ++it == this->data_.end()) { + this->parent_->update_key(old_key, new_key, this); + } + } + + /* + * This method does two things: + * - It changes the key of the node (node 1) at 'key1_old' to 'key1_new'. + * - It inserts a new node (node 2) after 'new_key1' with value 'key2' + * Invariants: + * - Node1: key1_old >= key1_new; Node 1 vs 2: key2 >= new_key1 + */ + void update_key_and_add_node( + hash_t key1_old, + hash_t key1_new, + hash_t key2, + NodeT* child1, + NodeT* child2, + TreeT& tree) { + auto it = this->lower_bound_node(key1_old, child1); + assert(key2 >= key1_new && key1_old >= key1_new && it != this->data_.end()); + + auto old_pos = it - this->data_.begin(); // required for MSVC + auto dest = this->check_split(key2, tree); + child2->parent_ = dest; + if (this != dest && this->data_.back().second == child1) { + it->first = key1_new; + dest->data_.emplace(dest->data_.begin(), key2, child2); + } else { + // child1 & 2 in same node + if (this != dest) { + it = old_pos - this->data_.size() + dest->data_.begin(); + } + it->first = key1_new; + ++it; + dest->data_.emplace(it, key2, child2); + } + + // The following alternative code works, but I don't understand why! + // auto dest = this->check_split(key2, tree); + // auto it = dest->lower_bound_node(key1_old, child1); + // assert(key2 >= key1_new && key1_old >= key1_new && it != + // dest->data_.end()); + // it->first = key1_new; + // ++it; + // child2->parent_ = dest; + // dest->data_.emplace(it, key2, child2); + } + + void remove_node(hash_t key_remove, NodeT* node, TreeT& tree) { + auto it_to_erase = this->lower_bound(key_remove); + while (it_to_erase != this->data_.end() && it_to_erase->first == key_remove) { + if (it_to_erase->second == node) { + delete it_to_erase->second; + this->erase_entry(it_to_erase, tree); + return; + } + ++it_to_erase; + } + assert(false && "Node not found!"); + } + }; + + class bpt_iterator { + using EntryT = typename b_plus_tree_hash_set::bpt_entry_leaf; + friend b_plus_tree_hash_set; + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using reference = T&; + + // Arbitrary position iterator + explicit bpt_iterator(NLeafT* node, LeafIteratorT it) noexcept + : node_{it == node->data_.end() ? nullptr : node} + , iter_{node_ == nullptr ? LeafIteratorT{} : it} { + assert(node->is_leaf_ && "just for consistency, insist that we iterate leaves only "); + } + + // begin() iterator + explicit bpt_iterator(NodeT* node) noexcept { + assert(node->parent_ == nullptr && "must start with root node"); + // move iterator to first value + while (!node->is_leaf_) { + node = node->as_inner()->data_[0].second; + } + node_ = node->as_leaf(); + + if (node_->size() == 0) { + node_ = nullptr; + iter_ = {}; + return; + } + iter_ = node_->data_.begin(); + } + + // end() iterator + bpt_iterator() noexcept : node_{nullptr}, iter_{} {} + + auto& operator*() const noexcept { + assert(AssertNotEnd()); + return const_cast(iter_->second); + } + + auto* operator->() const noexcept { + assert(AssertNotEnd()); + return const_cast(&iter_->second); + } + + auto& operator++() noexcept { + assert(AssertNotEnd()); + ++iter_; + if (iter_ == node_->data_.end()) { + // this may be a nullptr -> end of data + node_ = node_->next_node_; + iter_ = node_ != nullptr ? node_->data_.begin() : LeafIteratorT{}; + } + return *this; + } + + auto operator++(int) const noexcept { + IterT iterator(*this); + ++(*this); + return iterator; + } + + friend bool operator==(const IterT& left, const IterT& right) noexcept { + return left.node_ == right.node_ && left.iter_ == right.iter_; + } + + friend bool operator!=(const IterT& left, const IterT& right) noexcept { + return !(left == right); + } + + // TODO private + bool is_end() const noexcept { + return node_ == nullptr; + } + + private: + [[nodiscard]] inline bool AssertNotEnd() const noexcept { + return node_ != nullptr; + } + + hash_t hash() { + return iter_->first; + } + + NLeafT* node_; + LeafIteratorT iter_; + }; + + private: + NodeT* root_; + size_t size_; +}; + +template < + typename KeyT, + typename ValueT, + typename HashT = std::hash, + typename PredT = std::equal_to> +class b_plus_tree_hash_map { + class iterator; + using IterT = iterator; + using EntryT = std::pair; + + public: + b_plus_tree_hash_map() : map_{} {}; + + b_plus_tree_hash_map(const b_plus_tree_hash_map&) = default; + b_plus_tree_hash_map(b_plus_tree_hash_map&&) noexcept = default; + b_plus_tree_hash_map& operator=(const b_plus_tree_hash_map&) = default; + b_plus_tree_hash_map& operator=(b_plus_tree_hash_map&&) noexcept = default; + ~b_plus_tree_hash_map() = default; + + auto begin() const { + return IterT(map_.begin()); + } + + auto end() const { + return IterT(map_.end()); + } + + auto find(const KeyT& key) const { + return IterT(map_.find(EntryT{key, {}})); + } + + auto count(const KeyT& key) const { + return map_.count(EntryT{key, {}}); + } + + template + auto emplace(Args&&... args) { + return try_emplace(std::forward(args)...); + } + + template + auto emplace_hint(const IterT& hint, Args&&... args) { + return try_emplace(hint, std::forward(args)...); + } + + template + auto try_emplace(const KeyT& key, Args&&... args) { + auto result = map_.emplace(key, std::forward(args)...); + return std::make_pair(iterator(result.first), result.second); + } + + template + auto try_emplace(const IterT& hint, const KeyT& key, Args&&... args) { + auto result = map_.emplace_hint(hint.map_iter_, key, std::forward(args)...); + return IterT(result); + } + + auto erase(const KeyT& key) { + return map_.erase({key, {}}); + } + + auto erase(const IterT& iterator) { + return IterT(map_.erase(iterator.map_iter_)); + } + + auto size() const { + return map_.size(); + } + + auto empty() const { + return map_.empty(); + } + + void _check() { + map_._check(); + } + + private: + struct EntryHashT { + size_t operator()(const EntryT& x) const { + return HashT{}(x.first); + } + }; + + struct EntryEqualsT { + bool operator()(const EntryT& x, const EntryT& y) const { + return PredT{}(x.first, y.first); + } + }; + + class iterator { + using T = EntryT; + using MapIterType = + decltype(std::declval>() + .begin()); + friend b_plus_tree_hash_map; + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using reference = T&; + + explicit iterator(MapIterType map_iter) noexcept : map_iter_{map_iter} {} + + // end() iterator + iterator() noexcept : map_iter_{} {} + + auto& operator*() const noexcept { + return *map_iter_; + } + + auto* operator->() const noexcept { + return &*map_iter_; + } + + auto& operator++() noexcept { + ++map_iter_; + return *this; + } + + auto operator++(int) noexcept { + IterT iterator(*this); + ++(*this); + return iterator; + } + + friend bool operator==(const IterT& left, const IterT& right) noexcept { + return left.map_iter_ == right.map_iter_; + } + + friend bool operator!=(const IterT& left, const IterT& right) noexcept { + return !(left == right); + } + + private: + MapIterType map_iter_; + }; + + b_plus_tree_hash_set map_; +}; + +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_B_PLUS_TREE_HASH_MAP_H diff --git a/phtree/common/b_plus_tree_map.h b/phtree/common/b_plus_tree_map.h new file mode 100644 index 00000000..a9705e0a --- /dev/null +++ b/phtree/common/b_plus_tree_map.h @@ -0,0 +1,677 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_B_PLUS_TREE_H +#define PHTREE_COMMON_B_PLUS_TREE_H + +#include "bits.h" +#include +#include +#include + +/* + * PLEASE do not include this file directly, it is included via common.h. + * + * This file contains the B+tree implementation which is used in high-dimensional nodes in + * the PH-Tree. + */ +namespace improbable::phtree { + +/* + * The b_plus_tree_map is a B+tree implementation that uses a hierarchy of horizontally + * connected nodes for fast traversal through all entries. + * + * Behavior: + * This is a key-value map. Keys are unique, so for every key there is at most one entry. + * + * The individual nodes have at most M entries. + * The tree has O(log n) lookup and O(M log n) insertion/removal time complexity, + * space complexity is O(n). + * + * Tree structure: + * - Inner nodes: have other nodes as children; their key of an entry represents the highest + * key of any subnode in that entry + * - Leaf nodes: have values as children; their key represents the key of a key/value pair + * - Every node is either a leaf (l-node; contains values) or an inner node + * (n-node; contains nodes). + * - "Sibling" nodes refer to the nodes linked by prev_node_ or next_node_. Sibling nodes + * usually have the same parent but may also be children of their parent's siblings. + * + * - Guarantee: All leaf nodes are horizontally connected + * - Inner nodes may or may not be connected. Specifically: + * - New inner nodes will be assigned siblings from the same parent or the parent's sibling + * (if the new node is the first or last node in a parent) + * - There is no guarantee that inner nodes know about their potential sibling (=other inner + * nodes that own bordering values/child-nodes). + * - There is no guarantee that siblings are on the same depth of the tree. + * - The tree is not balanced + * + * TODO since this is a "map" (with 1:1 mapping of key:value), we could optimize splitting and + * merging by trying to reduce `dead space` + * (space between key1 and key2 that exceeds (key2 - key1)). + */ +template +class b_plus_tree_map { + class bpt_node_base; + template + class bpt_node_data; + class bpt_node_leaf; + class bpt_node_inner; + class bpt_iterator; + + using key_t = std::uint64_t; + + using bpt_entry_inner = std::pair; + using bpt_entry_leaf = std::pair; + + using IterT = bpt_iterator; + using NodeT = bpt_node_base; + using NLeafT = bpt_node_leaf; + using NInnerT = bpt_node_inner; + using LeafIteratorT = decltype(std::vector().begin()); + using TreeT = b_plus_tree_map; + + public: + explicit b_plus_tree_map() : root_{new NLeafT(nullptr, nullptr, nullptr)}, size_{0} {}; + + b_plus_tree_map(const b_plus_tree_map& other) : size_{other.size_} { + root_ = other.root_->is_leaf() ? new NLeafT(*other.root_->as_leaf()) + : new NInnerT(*other.root_->as_inner()); + } + + b_plus_tree_map(b_plus_tree_map&& other) noexcept : root_{other.root_}, size_{other.size_} { + other.root_ = nullptr; + other.size_ = 0; + } + + b_plus_tree_map& operator=(const b_plus_tree_map& other) { + assert(this != &other); + delete root_; + root_ = other.root_->is_leaf() ? new NLeafT(*other.root_->as_leaf()) + : new NInnerT(*other.root_->as_inner()); + size_ = other.size_; + return *this; + } + + b_plus_tree_map& operator=(b_plus_tree_map&& other) noexcept { + delete root_; + root_ = other.root_; + other.root_ = nullptr; + size_ = other.size_; + other.size_ = 0; + return *this; + } + + ~b_plus_tree_map() { + delete root_; + root_ = nullptr; + } + + [[nodiscard]] auto find(key_t key) noexcept { + auto node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->find(key); + if (node == nullptr) { + return end(); + } + } + return node->as_leaf()->find(key); + } + + [[nodiscard]] auto find(key_t key) const noexcept { + return const_cast(*this).find(key); + } + + [[nodiscard]] auto lower_bound(key_t key) noexcept { + auto node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->find(key); + if (node == nullptr) { + return end(); + } + } + return node->as_leaf()->lower_bound_as_iter(key); + } + + [[nodiscard]] auto begin() noexcept { + return IterT(root_); + } + + [[nodiscard]] auto begin() const noexcept { + return IterT(root_); + } + + [[nodiscard]] auto cbegin() const noexcept { + return IterT(root_); + } + + [[nodiscard]] auto end() noexcept { + return IterT(); + } + + [[nodiscard]] auto end() const noexcept { + return IterT(); + } + + template + auto emplace(Args&&... args) { + return try_emplace(std::forward(args)...); + } + + template + auto try_emplace(key_t key, Args&&... args) { + auto node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->find_or_last(key); + } + return node->as_leaf()->try_emplace(key, *this, size_, std::forward(args)...); + } + + void erase(key_t key) { + auto node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->find(key); + if (node == nullptr) { + return; + } + } + size_ -= node->as_leaf()->erase_key(key, *this); + } + + void erase(const IterT& iterator) { + assert(iterator != end()); + --size_; + iterator.node_->erase_it(iterator.iter_, *this); + } + + [[nodiscard]] size_t size() const noexcept { + return size_; + } + + void _check() { + size_t count = 0; + NLeafT* prev_leaf = nullptr; + key_t known_min = std::numeric_limits::max(); + root_->_check(count, nullptr, prev_leaf, known_min, 0); + assert(count == size()); + } + + private: + class bpt_node_base { + public: + explicit bpt_node_base(bool is_leaf, NInnerT* parent) noexcept + : is_leaf_{is_leaf}, parent_{parent} {} + + virtual ~bpt_node_base() noexcept = default; + + [[nodiscard]] inline bool is_leaf() const noexcept { + return is_leaf_; + } + + [[nodiscard]] inline NInnerT* as_inner() noexcept { + assert(!is_leaf_); + return static_cast(this); + } + + [[nodiscard]] inline NLeafT* as_leaf() noexcept { + assert(is_leaf_); + return static_cast(this); + } + + virtual void _check(size_t&, NInnerT*, NLeafT*&, key_t&, key_t) = 0; + + public: + const bool is_leaf_; + NInnerT* parent_; + }; + + template + class bpt_node_data : public bpt_node_base { + using DataIteratorT = decltype(std::vector().begin()); + friend IterT; + + constexpr static size_t M_leaf = std::min(size_t(16), COUNT_MAX); + // Default MAX is 32. Special case for small COUNT with smaller inner leaf or + // trees with a single inner leaf. '*2' is added because leaf filling is not compact. + constexpr static size_t M_inner = std::min(size_t(16), COUNT_MAX / M_leaf * 2); + // TODO This could be improved but requires a code change to move > 1 entry when merging. + constexpr static size_t M_leaf_min = 2; // std::max((size_t)2, M_leaf >> 2); + constexpr static size_t M_inner_min = 2; // std::max((size_t)2, M_inner >> 2); + // There is no point in allocating more leaf space than the max amount of entries. + constexpr static size_t M_leaf_init = std::min(size_t(8), COUNT_MAX); + constexpr static size_t M_inner_init = 4; + + public: + explicit bpt_node_data(bool is_leaf, NInnerT* parent, ThisT* prev, ThisT* next) noexcept + : bpt_node_base(is_leaf, parent), data_{}, prev_node_{prev}, next_node_{next} { + data_.reserve(this->M_init()); + } + + virtual ~bpt_node_data() noexcept = default; + + [[nodiscard]] inline size_t M_min() { + return this->is_leaf_ ? M_leaf_min : M_inner_min; + } + + [[nodiscard]] inline size_t M_max() { + return this->is_leaf_ ? M_leaf : M_inner; + } + + [[nodiscard]] inline size_t M_init() { + return this->is_leaf_ ? M_leaf_init : M_inner_init; + } + + [[nodiscard]] auto lower_bound(key_t key) noexcept { + return std::lower_bound( + data_.begin(), data_.end(), key, [](EntryT& left, const key_t key) { + return left.first < key; + }); + } + + [[nodiscard]] size_t size() const noexcept { + return data_.size(); + } + + void erase_entry(DataIteratorT it_to_erase, TreeT& tree) { + auto& parent_ = this->parent_; + key_t max_key_old = data_.back().first; + + size_t pos_to_erase = it_to_erase - data_.begin(); + data_.erase(it_to_erase); + if (parent_ == nullptr) { + if constexpr (std::is_same_v) { + if (data_.size() < 2) { + auto remaining_node = data_.begin()->second; + data_.begin()->second = nullptr; + remaining_node->parent_ = nullptr; + tree.root_ = remaining_node; + delete this; + } + } + return; + } + + if (data_.empty()) { + // Nothing to merge, just remove node. This should be rare, i.e. only happens when + // a rare 1-entry node has its last entry removed. + remove_from_siblings(); + parent_->remove_node(max_key_old, tree); + return; + } + + if (data_.size() < this->M_min()) { + // merge + if (prev_node_ != nullptr && prev_node_->data_.size() < this->M_max()) { + remove_from_siblings(); + auto& prev_data = prev_node_->data_; + if constexpr (std::is_same_v) { + prev_data.emplace_back(std::move(data_[0])); + } else { + data_[0].second->parent_ = prev_node_; + prev_data.emplace_back(std::move(data_[0])); + data_[0].second = nullptr; + } + auto prev_node = prev_node_; // create copy because (this) will be deleted + parent_->remove_node(max_key_old, tree); + if (prev_node->parent_ != nullptr) { + key_t old1 = (prev_data.end() - 2)->first; + key_t new1 = (prev_data.end() - 1)->first; + prev_node->parent_->update_key(old1, new1); + } + return; + } else if (next_node_ != nullptr && next_node_->data_.size() < this->M_max()) { + remove_from_siblings(); + auto& next_data = next_node_->data_; + if constexpr (std::is_same_v) { + next_data.emplace(next_data.begin(), std::move(data_[0])); + } else { + data_[0].second->parent_ = next_node_; + next_data.emplace(next_data.begin(), std::move(data_[0])); + data_[0].second = nullptr; + } + parent_->remove_node(max_key_old, tree); + return; + } + // This node is too small but there is nothing we can do. + } + if (pos_to_erase == data_.size()) { + parent_->update_key(max_key_old, data_.back().first); + } + } + + auto check_split(key_t key, TreeT& tree, size_t& pos_in_out) { + if (data_.size() < this->M_max()) { + if (this->parent_ != nullptr && key > data_.back().first) { + this->parent_->update_key(data_.back().first, key); + } + return static_cast(this); + } + + ThisT* dest = this->split_node(key, tree); + if (dest != this) { + // The insertion pos in node2 can be calculated: + pos_in_out = pos_in_out - data_.size(); + } + return dest; + } + + void _check_data(NInnerT* parent, key_t known_max) { + (void)parent; + (void)known_max; + // assert(parent_ == nullptr || data_.size() >= M_min); + assert(this->parent_ == parent); + if (this->data_.empty()) { + assert(parent == nullptr); + return; + } + assert(this->parent_ == nullptr || known_max == this->data_.back().first); + } + + private: + ThisT* split_node(key_t key_to_add, TreeT& tree) { + auto max_key = data_.back().first; + if (this->parent_ == nullptr) { + auto* new_parent = new NInnerT(nullptr, nullptr, nullptr); + new_parent->emplace_back(max_key, this); + tree.root_ = new_parent; + this->parent_ = new_parent; + } + + // create new node + auto* node2 = new ThisT(this->parent_, static_cast(this), next_node_); + if (next_node_ != nullptr) { + next_node_->prev_node_ = node2; + } + next_node_ = node2; + + // populate new node + // TODO Optimize populating new node: move 1st part, insert new value, move 2nd part...? + auto split_pos = this->M_max() >> 1; + node2->data_.insert( + node2->data_.end(), + std::make_move_iterator(data_.begin() + split_pos), + std::make_move_iterator(data_.end())); + data_.erase(data_.begin() + split_pos, data_.end()); + + if constexpr (std::is_same_v) { + for (auto& e : node2->data_) { + e.second->parent_ = node2; + } + } + + // Add node to parent + auto split_key = data_.back().first; + this->parent_->update_key_and_add_node( + max_key, split_key, std::max(max_key, key_to_add), node2, tree); + + // Return node for insertion of new value + return key_to_add > split_key ? node2 : static_cast(this); + } + + void remove_from_siblings() { + if (next_node_ != nullptr) { + next_node_->prev_node_ = prev_node_; + } + if (prev_node_ != nullptr) { + prev_node_->next_node_ = next_node_; + } + } + + protected: + std::vector data_; + ThisT* prev_node_; + ThisT* next_node_; + }; + + class bpt_node_leaf : public bpt_node_data { + public: + explicit bpt_node_leaf(NInnerT* parent, NLeafT* prev, NLeafT* next) noexcept + : bpt_node_data(true, parent, prev, next) {} + + ~bpt_node_leaf() noexcept = default; + + [[nodiscard]] IterT find(key_t key) noexcept { + auto it = this->lower_bound(key); + if (it != this->data_.end() && it->first == key) { + return IterT(this, it); + } + return IterT(); + } + + [[nodiscard]] IterT lower_bound_as_iter(key_t key) noexcept { + auto it = this->lower_bound(key); + if (it != this->data_.end()) { + return IterT(this, it); + } + return IterT(); + } + + template + auto try_emplace(key_t key, TreeT& tree, size_t& entry_count, Args&&... args) { + auto it = this->lower_bound(key); + if (it != this->data_.end() && it->first == key) { + return std::make_pair(IterT(this, it), false); + } + ++entry_count; + + size_t pos = it - this->data_.begin(); // Must be done before split because of MSVC + auto dest = this->check_split(key, tree, pos); + auto x = dest->data_.emplace( + dest->data_.begin() + pos, + std::piecewise_construct, + std::forward_as_tuple(key), + std::forward_as_tuple(std::forward(args)...)); + return std::make_pair(IterT(this, x), true); + } + + bool erase_key(key_t key, TreeT& tree) { + auto it = this->lower_bound(key); + if (it != this->data_.end() && it->first == key) { + this->erase_entry(it, tree); + return true; + } + return false; + } + + void erase_it(LeafIteratorT iter, TreeT& tree) { + this->erase_entry(iter, tree); + } + + void _check( + size_t& count, NInnerT* parent, NLeafT*& prev_leaf, key_t& known_min, key_t known_max) { + this->_check_data(parent, known_max); + + assert(prev_leaf == this->prev_node_); + for (auto& e : this->data_) { + assert(count == 0 || e.first > known_min); + assert(this->parent_ == nullptr || e.first <= known_max); + ++count; + known_min = e.first; + } + prev_leaf = this; + } + }; + + class bpt_node_inner : public bpt_node_data { + public: + explicit bpt_node_inner(NInnerT* parent, NInnerT* prev, NInnerT* next) noexcept + : bpt_node_data(false, parent, prev, next) {} + + ~bpt_node_inner() noexcept { + for (auto& e : this->data_) { + if (e.second != nullptr) { + delete e.second; + } + } + } + + [[nodiscard]] NodeT* find(key_t key) noexcept { + auto it = this->lower_bound(key); + return it != this->data_.end() ? it->second : nullptr; + } + + [[nodiscard]] NodeT* find_or_last(key_t key) noexcept { + auto it = this->lower_bound(key); + return it != this->data_.end() ? it->second : this->data_.back().second; + } + + void emplace_back(key_t key, NodeT* node) { + this->data_.emplace_back(key, node); + } + + void _check( + size_t& count, NInnerT* parent, NLeafT*& prev_leaf, key_t& known_min, key_t known_max) { + this->_check_data(parent, known_max); + + assert(this->parent_ == nullptr || known_max == this->data_.back().first); + auto prev_key = this->data_[0].first; + int n = 0; + for (auto& e : this->data_) { + assert(n == 0 || e.first > prev_key); + e.second->_check(count, this, prev_leaf, known_min, e.first); + assert(this->parent_ == nullptr || e.first <= known_max); + prev_key = e.first; + ++n; + } + } + + void update_key(key_t old_key, key_t new_key) { + assert(new_key != old_key); + auto it = this->lower_bound(old_key); + assert(it != this->data_.end()); + assert(it->first == old_key); + it->first = new_key; + if (this->parent_ != nullptr && ++it == this->data_.end()) { + this->parent_->update_key(old_key, new_key); + } + } + + /* + * This method does two things: + * - It changes the key of the node (node 1) at 'key1_old' to 'key1_new'. + * - It inserts a new node (node 2) after 'new_key1' with value 'key2' + * Invariants: + * - Node1: key1_old > key1_new; Node 1 vs 2: key2 > new_key1 + */ + void update_key_and_add_node( + key_t key1_old, key_t key1_new, key_t key2, NodeT* child2, TreeT& tree) { + assert(key2 > key1_new); + assert(key1_old >= key1_new); + auto it2 = this->lower_bound(key1_old) + 1; + + size_t pos = it2 - this->data_.begin(); // Must be done before split because of MSVC + auto dest = this->check_split(key2, tree, pos); + // check_split() guarantees that child2 is in the same node as child1 + assert(pos > 0); + dest->data_[pos - 1].first = key1_new; + child2->parent_ = dest; + dest->data_.emplace(dest->data_.begin() + pos, key2, child2); + } + + void remove_node(key_t key_remove, TreeT& tree) { + auto it_to_erase = this->lower_bound(key_remove); + delete it_to_erase->second; + this->erase_entry(it_to_erase, tree); + } + }; + + class bpt_iterator { + using EntryT = typename b_plus_tree_map::bpt_entry_leaf; + friend b_plus_tree_map; + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using reference = T&; + + // Arbitrary position iterator + explicit bpt_iterator(NLeafT* node, LeafIteratorT it) noexcept : node_{node}, iter_{it} { + assert(node->is_leaf_ && "just for consistency, insist that we iterate leaves only "); + } + + // begin() iterator + explicit bpt_iterator(NodeT* node) noexcept { + assert(node->parent_ == nullptr && "must start with root node"); + // move iterator to first value + while (!node->is_leaf_) { + node = node->as_inner()->data_[0].second; + } + node_ = node->as_leaf(); + + if (node_->size() == 0) { + node_ = nullptr; + iter_ = {}; + return; + } + iter_ = node_->data_.begin(); + } + + // end() iterator + bpt_iterator() noexcept : node_{nullptr}, iter_{} {} + + auto& operator*() const noexcept { + assert(AssertNotEnd()); + return const_cast(*iter_); + } + + auto* operator->() const noexcept { + assert(AssertNotEnd()); + return const_cast(&*iter_); + } + + auto& operator++() noexcept { + assert(AssertNotEnd()); + ++iter_; + if (iter_ == node_->data_.end()) { + // this may be a nullptr -> end of data + node_ = node_->next_node_; + iter_ = node_ != nullptr ? node_->data_.begin() : LeafIteratorT{}; + } + return *this; + } + + auto operator++(int) const noexcept { + IterT iterator(*this); + ++(*this); + return iterator; + } + + friend bool operator==(const IterT& left, const IterT& right) noexcept { + return left.node_ == right.node_ && left.iter_ == right.iter_; + } + + friend bool operator!=(const IterT& left, const IterT& right) noexcept { + return !(left == right); + } + + private: + [[nodiscard]] inline bool AssertNotEnd() const noexcept { + return node_ != nullptr; + } + + NLeafT* node_; + LeafIteratorT iter_; + }; + + private: + NodeT* root_; + size_t size_; +}; +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_B_PLUS_TREE_H diff --git a/phtree/common/base_types.h b/phtree/common/base_types.h index 5ad77ea2..a95a721b 100644 --- a/phtree/common/base_types.h +++ b/phtree/common/base_types.h @@ -40,8 +40,10 @@ using scalar_64_t = int64_t; using scalar_32_t = int32_t; using scalar_16_t = int16_t; -// Bits in a coordinate (usually a double or long has 64 bits, so uint_8 suffices) -using bit_width_t = uint16_t; +// Bits in a coordinate (usually a double or long has 64 bits, so uint_8 suffices). +// However, uint32_t turned out to be faster, probably due to fewer cycles required for 32bit +// instructions (8bit/16bit tend to require more cycles, see CPU tables available on the web). +using bit_width_t = uint32_t; // Number of bit for 'scalar_64_t' or 'scalar_32_t'. Note that 'digits' does _not_ include sign bit, // so e.g. int64_t has 63 `digits`, however we need all bits, i.e. 64. template @@ -109,6 +111,10 @@ class PhBox { return min_ == other.min_ && max_ == other.max_; } + auto operator!=(const PhBox& other) const -> bool { + return !(*this == other); + } + private: Point min_; Point max_; diff --git a/phtree/common/common.h b/phtree/common/common.h index 2912c8ec..ce6fd286 100644 --- a/phtree/common/common.h +++ b/phtree/common/common.h @@ -23,6 +23,7 @@ #include "distance.h" #include "filter.h" #include "flat_array_map.h" +#include "b_plus_tree_map.h" #include "flat_sparse_map.h" #include "tree_stats.h" #include diff --git a/phtree/common/converter.h b/phtree/common/converter.h index 012c0454..f913edf8 100644 --- a/phtree/common/converter.h +++ b/phtree/common/converter.h @@ -90,7 +90,7 @@ class ScalarConverterMultiply { public: static scalar_64_t pre(double value) { - return value * MULTIPLY; + return static_cast(value * MULTIPLY); } static double post(scalar_64_t value) { @@ -98,7 +98,7 @@ class ScalarConverterMultiply { } static scalar_32_t pre(float value) { - return value * MULTIPLY; + return static_cast(value * MULTIPLY); } static float post(scalar_32_t value) { @@ -126,7 +126,9 @@ class ConverterBase { using KeyExternal = KEY_EXTERNAL; using KeyInternal = PhPoint; using QueryBoxExternal = QUERY_POINT_EXTERNAL; - using QueryBoxInternal = PhBox; + using QueryBoxInternal = PhBox; + using QueryPointExternal = PhPoint; + using QueryPointInternal = PhPoint; }; /* @@ -174,6 +176,8 @@ template < typename CONVERT = ScalarConverterIEEE> class SimplePointConverter : public ConverterPointBase { using BASE = ConverterPointBase; + + public: using Point = typename BASE::KeyExternal; using PointInternal = typename BASE::KeyInternal; using QueryBox = typename BASE::QueryBoxExternal; @@ -215,9 +219,14 @@ template < typename CONVERT = ScalarConverterIEEE> class SimpleBoxConverter : public ConverterBoxBase { using BASE = ConverterBoxBase; + + public: using Box = typename BASE::KeyExternal; using PointInternal = typename BASE::KeyInternal; using QueryBox = typename BASE::QueryBoxExternal; + using QueryBoxInternal = typename BASE::QueryBoxInternal; + using QueryPoint = typename BASE::QueryPointExternal; + using QueryPointInternal = typename BASE::QueryPointInternal; static_assert(std::is_same>::value); static_assert(std::is_same>::value); @@ -243,7 +252,7 @@ class SimpleBoxConverter : public ConverterBoxBase out; + QueryBoxInternal out; auto& min = out.min(); auto& max = out.max(); for (dimension_t i = 0; i < DIM; ++i) { @@ -253,6 +262,22 @@ class SimpleBoxConverter : public ConverterBoxBase static void CheckConsistency(const TREE& tree) { tree.GetInternalTree().GetDebugHelper().CheckConsistency(); + tree.CheckConsistencyExternal(); } /* diff --git a/phtree/common/filter.h b/phtree/common/filter.h index 46eacee3..fe11000c 100644 --- a/phtree/common/filter.h +++ b/phtree/common/filter.h @@ -46,12 +46,18 @@ namespace improbable::phtree { * This function is called for every key/value pair that the query encounters. The function * should return 'true' iff the key/value should be added to the query result. * The parameters are the key and value of the key/value pair. + * NOTE: WHen using a MultiMap, 'T' becomes the type of the 'bucket', i.e. the type of the + * container that holds multiple entries for a given coordinate. * - bool IsNodeValid(const PhPoint& prefix, int bits_to_ignore); * This function is called for every node that the query encounters. The function should * return 'true' if the node should be traversed and searched for potential results. * The parameters are the prefix of the node and the number of least significant bits of the * prefix that can (and should) be ignored. The bits of the prefix that should be ignored can * have any value. + * + * - bool IsBucketEntryValid(const KeyT& key, const ValueT& value); + * This is only used/required for MultiMaps, implementations for a normal PhTree are ignored. + * In case of a MultiMap, this method is called for every entry in a bucket (see above). */ /* @@ -60,11 +66,11 @@ namespace improbable::phtree { struct FilterNoOp { /* * @param key The key/coordinate of the entry. - * @param value The value of the entry. + * @param value The value of the entry. For MultiMaps, this is a container of values. * @returns This default implementation always returns `true`. */ - template - constexpr bool IsEntryValid(const KEY& /*key*/, const T& /*value*/) const { + template + constexpr bool IsEntryValid(const KeyT& /*key*/, const ValueT& /*value*/) const noexcept { return true; } @@ -76,8 +82,21 @@ struct FilterNoOp { * bits_to_ignore is 64-10=54. * @returns This default implementation always returns `true`. */ - template - constexpr bool IsNodeValid(const KEY& /*prefix*/, int /*bits_to_ignore*/) const { + template + constexpr bool IsNodeValid(const KeyT& /*prefix*/, int /*bits_to_ignore*/) const noexcept { + return true; + } + + /* + * This is checked once for every entry in a bucket. The method is called once a call to + * 'IsEntryValid` for the same bucket has returned 'true'. A typical implementation + * simply returns `true` or checks some values of the entry. + * @param key The key/coordinate of the bucket entry. + * @param value The value of the entry. + * @returns This default implementation always returns `true`. + */ + template + constexpr bool IsBucketEntryValid(const KeyT& /*key*/, const ValueT& /*value*/) const noexcept { return true; } }; @@ -86,19 +105,16 @@ struct FilterNoOp { * The AABB filter can be used to query a point tree for an axis aligned bounding box (AABB). * The result is equivalent to that of the 'begin_query(...)' function. */ -template > +template class FilterAABB { using KeyExternal = typename CONVERTER::KeyExternal; using KeyInternal = typename CONVERTER::KeyInternal; using ScalarInternal = typename CONVERTER::ScalarInternal; - static constexpr auto DIM = CONVERTER::DimInternal; public: FilterAABB( - const KeyExternal& min_include, - const KeyExternal& max_include, - CONVERTER converter = CONVERTER()) + const KeyExternal& min_include, const KeyExternal& max_include, const CONVERTER& converter) : min_external_{min_include} , max_external_{max_include} , min_internal_{converter.pre(min_include)} @@ -111,13 +127,13 @@ class FilterAABB { void set(const KeyExternal& min_include, const KeyExternal& max_include) { min_external_ = min_include; max_external_ = max_include; - min_internal_ = converter_.pre(min_include); - max_internal_ = converter_.pre(max_include); + min_internal_ = converter_.get().pre(min_include); + max_internal_ = converter_.get().pre(max_include); } template [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T& /*value*/) const { - auto point = converter_.post(key); + auto point = converter_.get().post(key); for (dimension_t i = 0; i < DIM; ++i) { if (point[i] < min_external_[i] || point[i] > max_external_[i]) { return false; @@ -126,7 +142,7 @@ class FilterAABB { return true; } - [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, int bits_to_ignore) const { + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { // Let's assume that we always want to traverse the root node (bits_to_ignore == 64) if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { return true; @@ -144,42 +160,39 @@ class FilterAABB { } private: - const KeyExternal min_external_; - const KeyExternal max_external_; - const KeyInternal min_internal_; - const KeyInternal max_internal_; - const CONVERTER converter_; + KeyExternal min_external_; + KeyExternal max_external_; + KeyInternal min_internal_; + KeyInternal max_internal_; + std::reference_wrapper converter_; }; /* * The sphere filter can be used to query a point tree for a sphere. */ -template < - typename CONVERTER = ConverterIEEE<3>, - typename DISTANCE = DistanceEuclidean> +template class FilterSphere { using KeyExternal = typename CONVERTER::KeyExternal; using KeyInternal = typename CONVERTER::KeyInternal; using ScalarInternal = typename CONVERTER::ScalarInternal; - using ScalarExternal = typename CONVERTER::ScalarExternal; - static constexpr auto DIM = CONVERTER::DimInternal; public: + template > FilterSphere( const KeyExternal& center, - const ScalarExternal& radius, - CONVERTER converter = CONVERTER(), - DISTANCE distance_function = DISTANCE()) + const double radius, + const CONVERTER& converter, + DIST&& distance_function = DIST()) : center_external_{center} , center_internal_{converter.pre(center)} , radius_{radius} , converter_{converter} - , distance_function_{distance_function} {}; + , distance_function_(std::forward(distance_function)){}; template [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T&) const { - KeyExternal point = converter_.post(key); + KeyExternal point = converter_.get().post(key); return distance_function_(center_external_, point) <= radius_; } @@ -187,7 +200,7 @@ class FilterSphere { * Calculate whether AABB encompassing all possible points in the node intersects with the * sphere. */ - [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, int bits_to_ignore) const { + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { // we always want to traverse the root node (bits_to_ignore == 64) if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { @@ -207,17 +220,204 @@ class FilterSphere { closest_in_bounds[i] = std::clamp(center_internal_[i], lo, hi); } - KeyExternal closest_point = converter_.post(closest_in_bounds); + KeyExternal closest_point = converter_.get().post(closest_in_bounds); return distance_function_(center_external_, closest_point) <= radius_; } private: - const KeyExternal center_external_; - const KeyExternal center_internal_; - const ScalarExternal radius_; - const CONVERTER converter_; - const DISTANCE distance_function_; + KeyExternal center_external_; + KeyInternal center_internal_; + double radius_; + std::reference_wrapper converter_; + DISTANCE distance_function_; }; +// deduction guide +template < + typename CONV, + typename DIST = DistanceEuclidean, + typename P = typename CONV::KeyExternal> +FilterSphere(const P&, double, const CONV&, DIST&& fn = DIST()) -> FilterSphere; + +/* + * AABB filter for box keys. + * It detects all boxes that overlap partially or fully with the query box. + */ +template +class FilterBoxAABB { + using KeyInternal = typename CONVERTER::KeyInternal; + using ScalarInternal = typename CONVERTER::ScalarInternal; + using QueryPoint = typename CONVERTER::QueryPointExternal; + using QueryPointInternal = typename CONVERTER::QueryPointInternal; + static constexpr auto DIM = CONVERTER::DimExternal; + + public: + FilterBoxAABB( + const QueryPoint& min_include, const QueryPoint& max_include, const CONVERTER& converter) + : min_internal_{converter.pre_query(min_include)} + , max_internal_{converter.pre_query(max_include)} + , converter_{converter} {}; + + /* + * This function allows resizing/shifting the AABB while iterating over the tree. + */ + void set(const QueryPoint& min_include, const QueryPoint& max_include) { + min_internal_ = converter_.get().pre_query(min_include); + max_internal_ = converter_.get().pre_query(max_include); + } + + template + [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T& /*value*/) const { + for (dimension_t i = 0; i < DIM; ++i) { + if (key[i + DIM] < min_internal_[i] || key[i] > max_internal_[i]) { + return false; + } + } + return true; + } + + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { + // Let's assume that we always want to traverse the root node (bits_to_ignore == 64) + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + return true; + } + ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; + ScalarInternal node_max_bits = ~node_min_bits; + + for (dimension_t i = 0; i < DIM; ++i) { + if ((prefix[i] | node_max_bits) < min_internal_[i] || + (prefix[i + DIM] & node_min_bits) > max_internal_[i]) { + return false; + } + } + return true; + } + + private: + QueryPointInternal min_internal_; + QueryPointInternal max_internal_; + std::reference_wrapper converter_; +}; + +/* + * The box sphere filter can be used to query a PH-Tree for boxes that intersect with a sphere. + */ +template +class FilterBoxSphere { + using KeyInternal = typename CONVERTER::KeyInternal; + using ScalarInternal = typename CONVERTER::ScalarInternal; + using QueryPoint = typename CONVERTER::QueryPointExternal; + using QueryPointInternal = typename CONVERTER::QueryPointInternal; + static constexpr auto DIM = CONVERTER::DimExternal; + + public: + template > + FilterBoxSphere( + const QueryPoint& center, + const double radius, + const CONVERTER& converter, + DIST&& distance_function = DIST()) + : center_external_{center} + , center_internal_{converter.pre_query(center)} + , radius_{radius} + , converter_{converter} + , distance_function_(std::forward(distance_function)){}; + + template + [[nodiscard]] bool IsEntryValid(const KeyInternal& key, const T&) const { + QueryPointInternal closest_in_bounds; + for (dimension_t i = 0; i < DIM; ++i) { + // choose value closest to center for each dimension + closest_in_bounds[i] = std::clamp(center_internal_[i], key[i], key[i + DIM]); + } + QueryPoint closest_point = converter_.get().post_query(closest_in_bounds); + return distance_function_(center_external_, closest_point) <= radius_; + } + + /* + * Calculate whether AABB of all possible points in the node intersects with the sphere. + */ + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { + // we always want to traverse the root node (bits_to_ignore == 64) + + if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { + return true; + } + + ScalarInternal node_min_bits = MAX_MASK << bits_to_ignore; + ScalarInternal node_max_bits = ~node_min_bits; + + QueryPointInternal closest_in_bounds; + for (dimension_t i = 0; i < DIM; ++i) { + // calculate lower and upper bound for dimension for given node + ScalarInternal lo = prefix[i] & node_min_bits; + ScalarInternal hi = prefix[i + DIM] | node_max_bits; + + // choose value closest to center for dimension + closest_in_bounds[i] = std::clamp(center_internal_[i], lo, hi); + } + + QueryPoint closest_point = converter_.get().post_query(closest_in_bounds); + return distance_function_(center_external_, closest_point) <= radius_; + } + + private: + QueryPoint center_external_; + QueryPointInternal center_internal_; + double radius_; + std::reference_wrapper converter_; + DISTANCE distance_function_; +}; +// deduction guide +template < + typename CONV, + typename DIST = DistanceEuclidean, + typename P = typename CONV::KeyExternal> +FilterBoxSphere(const P&, double, const CONV&, DIST&& fn = DIST()) -> FilterBoxSphere; + +/* + * AABB filter for MultiMaps. + */ +template +class FilterMultiMapAABB : public FilterAABB { + using Key = typename CONVERTER::KeyExternal; + using KeyInternal = typename CONVERTER::KeyInternal; + + public: + FilterMultiMapAABB(const Key& min_include, const Key& max_include, CONVERTER& converter) + : FilterAABB(min_include, max_include, converter){}; + + template + [[nodiscard]] inline bool IsBucketEntryValid(const KeyInternal&, const ValueT&) const noexcept { + return true; + } +}; + +/* + * Sphere filter for MultiMaps. + */ +template +class FilterMultiMapSphere : public FilterSphere { + using Key = typename CONVERTER::KeyExternal; + using KeyInternal = typename CONVERTER::KeyInternal; + + public: + template > + FilterMultiMapSphere( + const Key& center, double radius, const CONVERTER& converter, DIST&& dist_fn = DIST()) + : FilterSphere(center, radius, converter, std::forward(dist_fn)){}; + + template + [[nodiscard]] inline bool IsBucketEntryValid(const KeyInternal&, const ValueT&) const noexcept { + return true; + } +}; +// deduction guide +template < + typename CONV, + typename DIST = DistanceEuclidean, + typename P = typename CONV::KeyExternal> +FilterMultiMapSphere(const P&, double, const CONV&, DIST&& fn = DIST()) + -> FilterMultiMapSphere; } // namespace improbable::phtree diff --git a/phtree/common/filter_test.cc b/phtree/common/filter_test.cc deleted file mode 100644 index 41905421..00000000 --- a/phtree/common/filter_test.cc +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "common.h" -#include -#include - -using namespace improbable::phtree; - -TEST(PhTreeFilterTest, FilterSphereTest) { - FilterSphere, DistanceEuclidean<2>> filter{{5, 3}, 5}; - // root is always valid - ASSERT_TRUE(filter.IsNodeValid({0, 0}, 63)); - // valid because node encompasses the circle - ASSERT_TRUE(filter.IsNodeValid({1, 1}, 10)); - // valid because circle encompasses the node - ASSERT_TRUE(filter.IsNodeValid({5, 5}, 2)); - // valid because circle encompasses the node AABB - ASSERT_TRUE(filter.IsNodeValid({7, 7}, 1)); - // valid because circle touches the edge of the node AABB - ASSERT_TRUE(filter.IsNodeValid({5, 9}, 1)); - // valid because circle cuts edge of node AABB - ASSERT_TRUE(filter.IsNodeValid({12, 7}, 3)); - ASSERT_TRUE(filter.IsNodeValid({10, 7}, 2)); - // invalid because node is just outside the circle - ASSERT_FALSE(filter.IsNodeValid({5, 10}, 1)); - ASSERT_FALSE(filter.IsNodeValid({12, 12}, 3)); - - ASSERT_TRUE(filter.IsEntryValid({3, 7}, nullptr)); - ASSERT_TRUE(filter.IsEntryValid({5, 8}, nullptr)); - ASSERT_FALSE(filter.IsEntryValid({3, 8}, nullptr)); -} - -TEST(PhTreeFilterTest, BoxFilterTest) { - FilterAABB> filter{{3, 3}, {7, 7}}; - // root is always valid - ASSERT_TRUE(filter.IsNodeValid({0, 0}, 63)); - // valid because node encompasses the AABB - ASSERT_TRUE(filter.IsNodeValid({1, 1}, 10)); - // valid - ASSERT_TRUE(filter.IsNodeValid({7, 7}, 1)); - // invalid - ASSERT_FALSE(filter.IsNodeValid({88, 5}, 1)); - - ASSERT_TRUE(filter.IsEntryValid({3, 7}, nullptr)); - ASSERT_FALSE(filter.IsEntryValid({2, 8}, nullptr)); -} - -TEST(PhTreeFilterTest, FilterNoOpSmokeTest) { - auto filter = FilterNoOp(); - ASSERT_TRUE(filter.IsNodeValid>({3, 7, 2}, 10)); - ASSERT_TRUE(filter.IsEntryValid>({3, 7, 2}, 10)); -} \ No newline at end of file diff --git a/phtree/common/flat_sparse_map.h b/phtree/common/flat_sparse_map.h index 3c264223..f822d3d8 100644 --- a/phtree/common/flat_sparse_map.h +++ b/phtree/common/flat_sparse_map.h @@ -32,7 +32,7 @@ namespace improbable::phtree { namespace { template -using PhFlatMapPair = std::pair; +using PhSparseMapPair = std::pair; using index_t = std::int32_t; } // namespace @@ -46,7 +46,9 @@ using index_t = std::int32_t; template class sparse_map { public: - explicit sparse_map() : data_{} {}; + explicit sparse_map() : data_{} { + data_.reserve(4); + } [[nodiscard]] auto find(size_t key) { auto it = lower_bound(key); @@ -66,14 +68,14 @@ class sparse_map { [[nodiscard]] auto lower_bound(size_t key) { return std::lower_bound( - data_.begin(), data_.end(), key, [](PhFlatMapPair& left, const size_t key) { + data_.begin(), data_.end(), key, [](PhSparseMapPair& left, const size_t key) { return left.first < key; }); } [[nodiscard]] auto lower_bound(size_t key) const { return std::lower_bound( - data_.cbegin(), data_.cend(), key, [](const PhFlatMapPair& left, const size_t key) { + data_.cbegin(), data_.cend(), key, [](const PhSparseMapPair& left, const size_t key) { return left.first < key; }); } @@ -115,7 +117,7 @@ class sparse_map { } } - void erase(const typename std::vector>::iterator& iterator) { + void erase(const typename std::vector>::iterator& iterator) { data_.erase(iterator); } @@ -149,7 +151,7 @@ class sparse_map { } } - std::vector> data_; + std::vector> data_; }; } // namespace improbable::phtree diff --git a/phtree/phtree.h b/phtree/phtree.h index 54dfd2dd..b7d30695 100644 --- a/phtree/phtree.h +++ b/phtree/phtree.h @@ -32,8 +32,6 @@ namespace improbable::phtree { template > class PhTree { friend PhTreeDebugHelper; - using KeyInternal = typename CONVERTER::KeyInternal; - using QueryBox = typename CONVERTER::QueryBoxExternal; using Key = typename CONVERTER::KeyExternal; static constexpr dimension_t DimInternal = CONVERTER::DimInternal; @@ -42,7 +40,17 @@ class PhTree { typename std::conditional<(DIM == DimInternal), QueryPoint, QueryIntersect>::type; public: - explicit PhTree(CONVERTER converter = CONVERTER()) : tree_{converter}, converter_{converter} {} + // Unless specified otherwise this is just PhBox + using QueryBox = typename CONVERTER::QueryBoxExternal; + + template + explicit PhTree(CONV&& converter = CONV()) : tree_{&converter_}, converter_{converter} {} + + PhTree(const PhTree& other) = delete; + PhTree& operator=(const PhTree& other) = delete; + PhTree(PhTree&& other) noexcept = default; + PhTree& operator=(PhTree&& other) noexcept = default; + ~PhTree() noexcept = default; /* * Attempts to build and insert a key and a value into the tree. @@ -60,7 +68,7 @@ class PhTree { */ template std::pair emplace(const Key& key, Args&&... args) { - return tree_.emplace(converter_.pre(key), std::forward(args)...); + return tree_.try_emplace(converter_.pre(key), std::forward(args)...); } /* @@ -80,7 +88,7 @@ class PhTree { */ template std::pair emplace_hint(const ITERATOR& iterator, const Key& key, Args&&... args) { - return tree_.emplace_hint(iterator, converter_.pre(key), std::forward(args)...); + return tree_.try_emplace(iterator, converter_.pre(key), std::forward(args)...); } /* @@ -93,6 +101,22 @@ class PhTree { return tree_.insert(converter_.pre(key), value); } + /* + * See emplace(). + */ + template + std::pair try_emplace(const Key& key, Args&&... args) { + return tree_.try_emplace(converter_.pre(key), std::forward(args)...); + } + + /* + * See emplace_hint(). + */ + template + std::pair try_emplace(const ITERATOR& iterator, const Key& key, Args&&... args) { + return tree_.try_emplace(iterator, converter_.pre(key), std::forward(args)...); + } + /* * @return the value stored at position 'key'. If no such value exists, one is added to the tree * and returned. @@ -147,6 +171,44 @@ class PhTree { return tree_.erase(iterator); } + /* + * This function attempts to remove a 'value' from 'old_key' and reinsert it for 'new_key'. + * + * The function will report _success_ in the following cases: + * - the value was removed from the old position and reinserted at the new position + * - the position and new position refer to the same bucket. + * + * The function will report _failure_ in the following cases: + * - The value was already present in the new position + * - The value was not present in the old position + * + * This method will _not_ remove the value from the old position if it is already present at the + * new position. + * + * @param old_key The old position + * @param new_key The new position + * @return '1' if the 'value' was moved, otherwise '0'. + */ + auto relocate(const Key& old_key, const Key& new_key) { + return tree_.relocate_if( + converter_.pre(old_key), converter_.pre(new_key), [](const T&) { return true; }); + } + + /* + * Relocate (move) an entry from one position to another, subject to a predicate. + * + * @param old_key The old position + * @param new_key The new position + * @param predicate The predicate is called for every value before it is relocated. + * If the predicate returns 'false', the relocation is aborted. + * @return '1' if the 'value' was moved, otherwise '0'. + */ + template + auto relocate_if(const Key& old_key, const Key& new_key, PRED&& predicate) { + return tree_.relocate_if( + converter_.pre(old_key), converter_.pre(new_key), std::forward(predicate)); + } + /* * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter @@ -158,9 +220,9 @@ class PhTree { * sub-nodes before they are returned or traversed. Any filter function must follow the * signature of the default 'FilterNoOp`. */ - template - void for_each(CALLBACK_FN& callback, FILTER filter = FILTER()) const { - tree_.for_each(callback, filter); + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) const { + tree_.for_each(std::forward(callback), std::forward(filter)); } /* @@ -175,15 +237,18 @@ class PhTree { * signature of the default 'FilterNoOp`. */ template < - typename CALLBACK_FN, + typename CALLBACK, typename FILTER = FilterNoOp, typename QUERY_TYPE = DEFAULT_QUERY_TYPE> void for_each( QueryBox query_box, - CALLBACK_FN& callback, - FILTER filter = FILTER(), + CALLBACK&& callback, + FILTER&& filter = FILTER(), QUERY_TYPE query_type = QUERY_TYPE()) const { - tree_.for_each(query_type(converter_.pre_query(query_box)), callback, filter); + tree_.for_each( + query_type(converter_.pre_query(query_box)), + std::forward(callback), + std::forward(filter)); } /* @@ -194,8 +259,8 @@ class PhTree { * @return an iterator over all (filtered) entries in the tree, */ template - auto begin(FILTER filter = FILTER()) const { - return tree_.begin(filter); + auto begin(FILTER&& filter = FILTER()) const { + return tree_.begin(std::forward(filter)); } /* @@ -211,9 +276,10 @@ class PhTree { template auto begin_query( const QueryBox& query_box, - FILTER filter = FILTER(), + FILTER&& filter = FILTER(), QUERY_TYPE query_type = DEFAULT_QUERY_TYPE()) const { - return tree_.begin_query(query_type(converter_.pre_query(query_box)), filter); + return tree_.begin_query( + query_type(converter_.pre_query(query_box)), std::forward(filter)); } /* @@ -238,18 +304,21 @@ class PhTree { auto begin_knn_query( size_t min_results, const Key& center, - DISTANCE distance_function = DISTANCE(), - FILTER filter = FILTER()) const { + DISTANCE&& distance_function = DISTANCE(), + FILTER&& filter = FILTER()) const { // We use pre() instead of pre_query() here because, strictly speaking, we want to // find the nearest neighbors of a (fictional) key, which may as well be a box. return tree_.begin_knn_query( - min_results, converter_.pre(center), distance_function, filter); + min_results, + converter_.pre(center), + std::forward(distance_function), + std::forward(filter)); } /* * @return An iterator representing the tree's 'end'. */ - const auto& end() const { + auto end() const { return tree_.end(); } @@ -287,6 +356,14 @@ class PhTree { return tree_; } + void CheckConsistencyExternal() const { + [[maybe_unused]] size_t n = 0; + for ([[maybe_unused]] const auto& entry : tree_) { + ++n; + } + assert(n == size()); + } + v16::PhTreeV16 tree_; CONVERTER converter_; }; diff --git a/phtree/phtree_d_test_filter.cc b/phtree/phtree_d_test_filter.cc deleted file mode 100644 index f5470190..00000000 --- a/phtree/phtree_d_test_filter.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "phtree/phtree.h" -#include -#include -#include - -using namespace improbable::phtree; - -template -using TestPoint = PhPointD; - -template -using TestTree = PhTreeD; - -class DoubleRng { - public: - DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} - - double next() { - return rnd(eng); - } - - private: - std::default_random_engine eng; - std::uniform_real_distribution rnd; -}; - -template -void generateCube(std::vector>& points, size_t N) { - DoubleRng rng(-1000, 1000); - auto refTree = std::map, size_t>(); - - points.reserve(N); - for (size_t i = 0; i < N; i++) { - auto point = TestPoint{rng.next(), rng.next(), rng.next()}; - if (refTree.count(point) != 0) { - i--; - continue; - } - - refTree.emplace(point, i); - points.push_back(point); - } - ASSERT_EQ(refTree.size(), N); - ASSERT_EQ(points.size(), N); -} - -template -void populate(TestTree& tree, std::vector>& points, size_t N) { - generateCube(points, N); - for (size_t i = 0; i < N; i++) { - ASSERT_TRUE(tree.insert(points[i], i).second); - } - ASSERT_EQ(N, tree.size()); -} diff --git a/phtree/phtree_multimap.h b/phtree/phtree_multimap.h index 75540f9f..09447b99 100644 --- a/phtree/phtree_multimap.h +++ b/phtree/phtree_multimap.h @@ -17,6 +17,7 @@ #ifndef PHTREE_PHTREE_MULTIMAP_H #define PHTREE_PHTREE_MULTIMAP_H +#include "common/b_plus_tree_hash_map.h" #include "common/common.h" #include "v16/phtree_v16.h" #include @@ -56,8 +57,11 @@ class IteratorBase { friend PHTREE; using T = typename PHTREE::ValueType; + protected: + using BucketIterType = typename PHTREE::BucketIterType; + public: - explicit IteratorBase() noexcept : current_value_ptr_{nullptr}, is_finished_{false} {} + explicit IteratorBase() noexcept : current_value_ptr_{nullptr} {} T& operator*() const noexcept { assert(current_value_ptr_); @@ -71,26 +75,16 @@ class IteratorBase { friend bool operator==( const IteratorBase& left, const IteratorBase& right) noexcept { - // Note: The following compares pointers to Entry objects (actually: their values T) - // so it should be _fast_ and return 'true' only for identical entries. - static_assert(std::is_pointer_v); - return (left.is_finished_ && right.Finished()) || - (!left.is_finished_ && !right.Finished() && - left.current_value_ptr_ == right.current_value_ptr_); + return left.current_value_ptr_ == right.current_value_ptr_; } friend bool operator!=( const IteratorBase& left, const IteratorBase& right) noexcept { - return !(left == right); + return left.current_value_ptr_ != right.current_value_ptr_; } protected: - [[nodiscard]] bool Finished() const noexcept { - return is_finished_; - } - void SetFinished() noexcept { - is_finished_ = true; current_value_ptr_ = nullptr; } @@ -100,41 +94,21 @@ class IteratorBase { private: const T* current_value_ptr_; - bool is_finished_; }; -template +template class IteratorNormal : public IteratorBase { friend PHTREE; - using BucketIterType = typename PHTREE::BucketIterType; - using PhTreeIterEndType = typename PHTREE::EndType; + using BucketIterType = typename IteratorBase::BucketIterType; public: - explicit IteratorNormal(const PhTreeIterEndType& iter_ph_end) noexcept - : IteratorBase() - , iter_ph_end_{iter_ph_end} - , iter_ph_{iter_ph_end} - , iter_bucket_{} - , filter_{} { - this->SetFinished(); - } + explicit IteratorNormal() noexcept : IteratorBase(), iter_ph_{}, iter_bucket_{} {} - // Why are we passing two iterators by reference + std::move? - // See: https://abseil.io/tips/117 - IteratorNormal( - const PhTreeIterEndType& iter_ph_end, - ITERATOR_PH iter_ph, - BucketIterType iter_bucket, - const FILTER filter = FILTER()) noexcept + template + IteratorNormal(ITER_PH&& iter_ph, BucketIterType&& iter_bucket) noexcept : IteratorBase() - , iter_ph_end_{iter_ph_end} - , iter_ph_{std::move(iter_ph)} - , iter_bucket_{std::move(iter_bucket)} - , filter_{filter} { - if (iter_ph == iter_ph_end) { - this->SetFinished(); - return; - } + , iter_ph_{std::forward(iter_ph)} + , iter_bucket_{std::forward(iter_bucket)} { FindNextElement(); } @@ -168,17 +142,18 @@ class IteratorNormal : public IteratorBase { private: void FindNextElement() { - while (iter_ph_ != iter_ph_end_) { + while (!iter_ph_.IsEnd()) { while (iter_bucket_ != iter_ph_->end()) { // We filter only entries here, nodes are filtered elsewhere - if (filter_.IsEntryValid(iter_ph_.GetCurrentResult()->GetKey(), *iter_bucket_)) { + if (iter_ph_.__Filter().IsBucketEntryValid( + iter_ph_.GetEntry()->GetKey(), *iter_bucket_)) { this->SetCurrentValue(&(*iter_bucket_)); return; } ++iter_bucket_; } ++iter_ph_; - if (iter_ph_ != iter_ph_end_) { + if (!iter_ph_.IsEnd()) { iter_bucket_ = iter_ph_->begin(); } } @@ -186,24 +161,17 @@ class IteratorNormal : public IteratorBase { this->SetFinished(); } - PhTreeIterEndType& iter_ph_end_; ITERATOR_PH iter_ph_; BucketIterType iter_bucket_; - FILTER filter_; }; -template -class IteratorKnn : public IteratorNormal { - using BucketIterType = typename PHTREE::BucketIterType; - using PhTreeIterEndType = typename PHTREE::EndType; - +template +class IteratorKnn : public IteratorNormal { public: - IteratorKnn( - const PhTreeIterEndType& iter_ph_end, - const ITERATOR_PH iter_ph, - BucketIterType iter_bucket, - const FILTER filter) noexcept - : IteratorNormal(iter_ph_end, iter_ph, iter_bucket, filter) {} + template + IteratorKnn(ITER_PH&& iter_ph, BucketIterType&& iter_bucket) noexcept + : IteratorNormal( + std::forward(iter_ph), std::forward(iter_bucket)) {} [[nodiscard]] double distance() const noexcept { return this->GetIteratorOfPhTree().distance(); @@ -219,24 +187,32 @@ template < dimension_t DIM, typename T, typename CONVERTER = ConverterNoOp, - typename BUCKET = std::unordered_set, + typename BUCKET = b_plus_tree_hash_set, bool POINT_KEYS = true, typename DEFAULT_QUERY_TYPE = QueryPoint> class PhTreeMultiMap { - friend PhTreeDebugHelper; using KeyInternal = typename CONVERTER::KeyInternal; - using QueryBox = typename CONVERTER::QueryBoxExternal; using Key = typename CONVERTER::KeyExternal; static constexpr dimension_t DimInternal = CONVERTER::DimInternal; using PHTREE = PhTreeMultiMap; - - public: using ValueType = T; using BucketIterType = decltype(std::declval().begin()); - using EndType = decltype(std::declval>().end()); + using EndType = decltype(std::declval>().end()); + + friend PhTreeDebugHelper; + friend IteratorBase; + + public: + using QueryBox = typename CONVERTER::QueryBoxExternal; explicit PhTreeMultiMap(CONVERTER converter = CONVERTER()) - : tree_{converter}, converter_{converter}, size_{0} {} + : tree_{&converter_}, converter_{converter}, size_{0} {} + + PhTreeMultiMap(const PhTreeMultiMap& other) = delete; + PhTreeMultiMap& operator=(const PhTreeMultiMap& other) = delete; + PhTreeMultiMap(PhTreeMultiMap&& other) noexcept = default; + PhTreeMultiMap& operator=(PhTreeMultiMap&& other) noexcept = default; + ~PhTreeMultiMap() noexcept = default; /* * Attempts to build and insert a key and a value into the tree. @@ -254,7 +230,7 @@ class PhTreeMultiMap { */ template std::pair emplace(const Key& key, Args&&... args) { - auto& outer_iter = tree_.emplace(converter_.pre(key)).first; + auto& outer_iter = tree_.try_emplace(converter_.pre(key)).first; auto bucket_iter = outer_iter.emplace(std::forward(args)...); size_ += bucket_iter.second ? 1 : 0; return {const_cast(*bucket_iter.first), bucket_iter.second}; @@ -269,7 +245,7 @@ class PhTreeMultiMap { * to erase() and if no other modifications occurred. * The following is valid: * - * // Move value from key1 to key2 + * // Move value from key1 to key2 (if you don't want to use relocate() ). * auto iter = tree.find(key1); * auto value = iter.second(); // The value may become invalid in erase() * erase(iter); @@ -277,7 +253,7 @@ class PhTreeMultiMap { */ template std::pair emplace_hint(const ITERATOR& iterator, const Key& key, Args&&... args) { - auto result_ph = tree_.emplace_hint(iterator.GetIteratorOfPhTree(), converter_.pre(key)); + auto result_ph = tree_.try_emplace(iterator.GetIteratorOfPhTree(), converter_.pre(key)); auto& bucket = result_ph.first; if (result_ph.second) { // new bucket @@ -306,6 +282,22 @@ class PhTreeMultiMap { return emplace(key, value); } + /* + * See emplace(). + */ + template + std::pair try_emplace(const Key& key, Args&&... args) { + return emplace(key, std::forward(args)...); + } + + /* + * See emplace_hint(). + */ + template + std::pair try_emplace(const ITERATOR& iterator, const Key& key, Args&&... args) { + return emplace_hint(iterator, key, std::forward(args)...); + } + /* * @return '1', if a value is associated with the provided key, otherwise '0'. */ @@ -337,16 +329,11 @@ class PhTreeMultiMap { * See std::unordered_multimap::find(). * * @param key the key to look up - * @return an iterator that points either to the the first value associated with the key or + * @return an iterator that points either to the first value associated with the key or * to {@code end()} if no value was found */ auto find(const Key& key) const { - auto outer_iter = tree_.find(converter_.pre(key)); - if (outer_iter == tree_.end()) { - return CreateIterator(tree_.end(), bucket_dummy_end_); - } - auto bucket_iter = outer_iter.second().begin(); - return CreateIterator(outer_iter, bucket_iter); + return CreateIterator(tree_.find(converter_.pre(key))); } /* @@ -358,12 +345,7 @@ class PhTreeMultiMap { * or to {@code end()} if the key/value pair was found */ auto find(const Key& key, const T& value) const { - auto outer_iter = tree_.find(converter_.pre(key)); - if (outer_iter == tree_.end()) { - return CreateIterator(tree_.end(), bucket_dummy_end_); - } - auto bucket_iter = outer_iter.second().find(value); - return CreateIterator(outer_iter, bucket_iter); + return CreateIteratorFind(tree_.find(converter_.pre(key)), value); } /* @@ -388,7 +370,7 @@ class PhTreeMultiMap { /* * See std::map::erase(). Removes any entry located at the provided iterator. * - * This function uses the iterator to directly erase the entry so it is usually faster than + * This function uses the iterator to directly erase the entry, so it is usually faster than * erase(key, value). * * @return '1' if a value was found, otherwise '0'. @@ -416,66 +398,145 @@ class PhTreeMultiMap { /* * This function attempts to remove the 'value' from 'old_key' and reinsert it for 'new_key'. * - * The relocate will report _success_ in the following cases: + * The relocate function will report _success_ in the following cases: * - the value was removed from the old position and reinserted at the new position - * - the position and new position refer to the same bucket. + * - the old position and new position are identical. * - * The relocate will report_failure_ in the following cases: + * The relocate function will report _failure_ in the following cases: * - The value was already present in the new position * - The value was not present in the old position * - * This method will _always_ attempt to insert the value at the new position even if the value - * was not found at the old position. - * This method will _not_ remove the value from the old position if it is already present at the - * new position. + * In case of _failure_, this function guarantees that the tree remains unchanged + * or is returned to its original state (i.e. before the function was called). * * @param old_key The old position * @param new_key The new position - * @param always_erase Setting this flag to 'true' ensures that the value is removed from - * the old position even if it is already present at the new position. This may double the - * execution cost of this method. The default is 'false'. + * @param value The value that needs to be relocated. The relocate() method used the value's + * '==' operator to identify the entry that should be moved. + * @param count_equals This setting toggles whether a relocate() between two identical keys + * should be counted as 'success' and return '1'. The function may still return '0' + * in case the keys are not in the index. + * Background: the intuitively correct behavior is to return '1' for identical + * (exising) keys. However, avoiding this check can considerably speed up + * relocate() calls, especially when using a ConverterMultiply. + * * @return '1' if a value was found and reinserted, otherwise '0'. */ - size_t relocate( - const Key& old_key, const Key& new_key, const T& value, bool always_erase = false) { - // Be smart: insert first, if the target-map already contains the entry we can avoid erase() - auto new_key_pre = converter_.pre(new_key); - auto& new_bucket = tree_.emplace(new_key_pre).first; - auto new_result = new_bucket.emplace(value); - if (!new_result.second) { - // Entry is already in correct place -> abort - // Return '1' if old/new refer to the same bucket, otherwise '0' - if (converter_.pre(old_key) == new_key_pre) { - return 1; - } - if (!always_erase) { - // Abort, unless we insist on erase() - return 0; + template + size_t relocate(const Key& old_key, const Key& new_key, T2&& value, bool count_equals = true) { + auto pair = tree_._find_or_create_two_mm( + converter_.pre(old_key), converter_.pre(new_key), count_equals); + auto& iter_old = pair.first; + auto& iter_new = pair.second; + + if (iter_old.IsEnd()) { + return 0; + } + auto iter_old_value = iter_old->find(value); + if (iter_old_value == iter_old->end()) { + if (iter_new->empty()) { + tree_.erase(iter_new); } + return 0; } - auto old_outer_iter = tree_.find(converter_.pre(old_key)); - if (old_outer_iter == tree_.end()) { - // No entry for old_key -> fail - return 0; + // Are we inserting in same node and same quadrant? Or are the keys equal? + if (iter_old == iter_new) { + assert(old_key == new_key); + return 1; } - auto old_bucket_iter = old_outer_iter->find(value); - if (old_bucket_iter == old_outer_iter->end()) { + assert(iter_old_value != iter_old->end()); + if (!iter_new->emplace(std::move(*iter_old_value)).second) { return 0; } - old_outer_iter->erase(old_bucket_iter); - // clean up - if (old_outer_iter->empty()) { - tree_.erase(old_outer_iter); + iter_old->erase(iter_old_value); + if (iter_old->empty()) { + [[maybe_unused]] auto found = tree_.erase(iter_old); + assert(found); } return 1; } + /* + * This function attempts to remove the 'value' from 'old_key' and reinsert it for 'new_key'. + * + * The relocate function will report _success_ in the following cases: + * - the value was removed from the old position and reinserted at the new position + * - the old position and new position are identical. + * + * The relocate function will report _failure_ in the following cases: + * - The value was already present in the new position + * - The value was not present in the old position + * + * In case of _failure_, this function guarantees that the tree remains unchanged + * or is returned to its original state (i.e. before the function was called). + * + * @param old_key The old position + * @param new_key The new position + * @param predicate The predicate that is used for every value at position old_key to evaluate + * whether it should be relocated to new_key. + * @param count_equals This setting toggles whether a relocate() between two identical keys + * should be counted as 'success' and return '1'. The function may still return '0' + * in case the keys are not in the index. + * Background: the intuitively correct behavior is to return '1' for identical + * (exising) keys. However, avoiding this check can considerably speed up + * relocate() calls, especially when using a ConverterMultiply. + * + * @return the number of values that were relocated. + */ + template + size_t relocate_if( + const Key& old_key, const Key& new_key, PREDICATE&& predicate, bool count_equals = true) { + auto pair = tree_._find_or_create_two_mm( + converter_.pre(old_key), converter_.pre(new_key), count_equals); + auto& iter_old = pair.first; + auto& iter_new = pair.second; + + if (iter_old.IsEnd()) { + assert(iter_new.IsEnd() || !iter_new->empty()); // Otherwise remove iter_new + return 0; + } + + // Are we inserting in same node and same quadrant? Or are the keys equal? + if (iter_old == iter_new) { + assert(old_key == new_key); + return 1; + } + + size_t n = 0; + auto it = iter_old->begin(); + while (it != iter_old->end()) { + if (predicate(*it) && iter_new->emplace(std::move(*it)).second) { + it = iter_old->erase(it); + ++n; + } else { + ++it; + } + } + + if (iter_old->empty()) { + [[maybe_unused]] auto found = tree_.erase(iter_old); + assert(found); + } else if (iter_new->empty()) { + [[maybe_unused]] auto found = tree_.erase(iter_new); + assert(found); + } + return n; + } + + /* + * Relocates all values from one coordinate to another. + * Returns an iterator pointing to the relocated data (or end(), if the relocation failed). + */ + auto relocate_all(const Key& old_key, const Key& new_key) { + return tree_.relocate(old_key, new_key); + } + /* * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes - * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * (=sub-trees) before returning / traversing them. By default, all entries are returned. Filter * functions must implement the same signature as the default 'FilterNoOp'. * * @param callback The callback function to be called for every entry that matches the filter. @@ -485,10 +546,12 @@ class PhTreeMultiMap { * follow the signature of the default 'FilterNoOp`. * The default 'FilterNoOp` filter matches all entries. */ - template - void for_each(CALLBACK_FN& callback, FILTER filter = FILTER()) const { - CallbackWrapper inner_callback{callback, filter, converter_}; - tree_.for_each(inner_callback, WrapFilter(filter)); + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) const { + tree_.for_each( + NoOpCallback{}, + WrapCallbackFilter{ + std::forward(callback), std::forward(filter), converter_}); } /* @@ -505,35 +568,30 @@ class PhTreeMultiMap { * The default 'FilterNoOp` filter matches all entries. */ template < - typename CALLBACK_FN, + typename CALLBACK, typename FILTER = FilterNoOp, typename QUERY_TYPE = DEFAULT_QUERY_TYPE> void for_each( QueryBox query_box, - CALLBACK_FN& callback, - const FILTER& filter = FILTER(), + CALLBACK&& callback, + FILTER&& filter = FILTER(), QUERY_TYPE query_type = QUERY_TYPE()) const { - CallbackWrapper inner_callback{callback, filter, converter_}; - tree_.for_each( - query_type(converter_.pre_query(query_box)), inner_callback, WrapFilter(filter)); + tree_.template for_each>( + query_type(converter_.pre_query(query_box)), + {}, + {std::forward(callback), std::forward(filter), converter_}); } /* * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes - * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * (=sub-trees) before returning / traversing them. By default, all entries are returned. Filter * functions must implement the same signature as the default 'FilterNoOp'. * * @return an iterator over all (filtered) entries in the tree, */ template - auto begin(FILTER filter = FILTER()) const { - auto outer_iter = tree_.begin(WrapFilter(filter)); - if (outer_iter == tree_.end()) { - return CreateIterator(outer_iter, bucket_dummy_end_, filter); - } - auto bucket_iter = outer_iter.second().begin(); - assert(bucket_iter != outer_iter.second().end()); - return CreateIterator(outer_iter, bucket_iter, filter); + auto begin(FILTER&& filter = FILTER()) const { + return CreateIterator(tree_.begin(std::forward(filter))); } /* @@ -549,16 +607,10 @@ class PhTreeMultiMap { template auto begin_query( const QueryBox& query_box, - FILTER filter = FILTER(), - QUERY_TYPE query_type = QUERY_TYPE()) const { - auto outer_iter = - tree_.begin_query(query_type(converter_.pre_query(query_box)), WrapFilter(filter)); - if (outer_iter == tree_.end()) { - return CreateIterator(outer_iter, bucket_dummy_end_, filter); - } - auto bucket_iter = outer_iter.second().begin(); - assert(bucket_iter != outer_iter.second().end()); - return CreateIterator(outer_iter, bucket_iter, filter); + FILTER&& filter = FILTER(), + QUERY_TYPE&& query_type = QUERY_TYPE()) const { + return CreateIterator(tree_.begin_query( + query_type(converter_.pre_query(query_box)), std::forward(filter))); } /* @@ -583,25 +635,22 @@ class PhTreeMultiMap { auto begin_knn_query( size_t min_results, const Key& center, - DISTANCE distance_function = DISTANCE(), - FILTER filter = FILTER()) const { + DISTANCE&& distance_function = DISTANCE(), + FILTER&& filter = FILTER()) const { // We use pre() instead of pre_query() here because, strictly speaking, we want to // find the nearest neighbors of a (fictional) key, which may as well be a box. - auto outer_iter = tree_.begin_knn_query( - min_results, converter_.pre(center), distance_function, WrapFilter(filter)); - if (outer_iter == tree_.end()) { - return CreateIteratorKnn(outer_iter, bucket_dummy_end_, filter); - } - auto bucket_iter = outer_iter.second().begin(); - assert(bucket_iter != outer_iter.second().end()); - return CreateIteratorKnn(outer_iter, bucket_iter, filter); + return CreateIteratorKnn(tree_.begin_knn_query( + min_results, + converter_.pre(center), + std::forward(distance_function), + std::forward(filter))); } /* * @return An iterator representing the tree's 'end'. */ - const auto& end() const { - return the_end_; + auto end() const { + return IteratorNormal{}; } /* @@ -639,64 +688,87 @@ class PhTreeMultiMap { return tree_; } - template - auto CreateIterator( - OUTER_ITER outer_iter, BucketIterType bucket_iter, FILTER filter = FILTER()) const { - return IteratorNormal( - tree_.end(), std::move(outer_iter), std::move(bucket_iter), filter); + void CheckConsistencyExternal() const { + size_t n = 0; + for (const auto& bucket : tree_) { + assert(!bucket.empty()); + n += bucket.size(); + } + assert(n == size_); + } + + template + auto CreateIteratorFind(OUTER_ITER&& outer_iter, const T& value) const { + auto bucket_iter = + outer_iter == tree_.end() ? BucketIterType{} : outer_iter.second().find(value); + return IteratorNormal( + std::forward(outer_iter), std::move(bucket_iter)); } - template - auto CreateIteratorKnn( - OUTER_ITER outer_iter, BucketIterType bucket_iter, FILTER filter = FILTER()) const { - return IteratorKnn( - tree_.end(), std::move(outer_iter), std::move(bucket_iter), filter); + template + auto CreateIterator(OUTER_ITER&& outer_iter) const { + auto bucket_iter = + outer_iter == tree_.end() ? BucketIterType{} : outer_iter.second().begin(); + return IteratorNormal( + std::forward(outer_iter), std::move(bucket_iter)); } - template - static auto WrapFilter(FILTER filter) { - // We always have two iterators, one that traverses the PH-Tree and one that traverses the - // bucket. Using the FilterWrapper we create a new Filter for the PH-Tree iterator. This new - // filter checks only if nodes are valid. It cannot check whether buckets are valid. - // The original filter is then used when we iterate over the entries of a bucket. At this - // point, we do not need to check IsNodeValid anymore for each entry (see `IteratorNormal`). - struct FilterWrapper { - [[nodiscard]] constexpr bool IsEntryValid(const KeyInternal&, const BUCKET&) const { - // This filter is checked in the Iterator. - return true; - } - [[nodiscard]] constexpr bool IsNodeValid( - const KeyInternal& prefix, int bits_to_ignore) const { - return filter_.IsNodeValid(prefix, bits_to_ignore); - } - FILTER filter_; - }; - return FilterWrapper{filter}; + template + auto CreateIteratorKnn(OUTER_ITER&& outer_iter) const { + auto bucket_iter = + outer_iter == tree_.end() ? BucketIterType{} : outer_iter.second().begin(); + return IteratorKnn( + std::forward(outer_iter), std::move(bucket_iter)); } - template - struct CallbackWrapper { + /* + * This wrapper wraps the Filter and Callback such that the callback is called for every + * entry in any bucket that matches the user defined IsEntryValid(). + */ + template + class WrapCallbackFilter { + public: /* - * The CallbackWrapper ensures that we call the callback on each entry of the bucket. - * The vanilla PH-Tree call it only on the bucket itself. + * We always have two iterators, one that traverses the PH-Tree and returns 'buckets', the + * other iterator traverses the returned buckets. + * The wrapper ensures that the callback is called for every entry in a bucket.. */ - void operator()(const Key& key, const BUCKET& bucket) const { - auto internal_key = converter_.pre(key); - for (auto& entry : bucket) { - if (filter_.IsEntryValid(internal_key, entry)) { - callback_(key, entry); + template + WrapCallbackFilter(CB&& callback, F&& filter, const CONVERTER& converter) + : callback_{std::forward(callback)} + , filter_{std::forward(filter)} + , converter_{converter} {} + + [[nodiscard]] inline bool IsEntryValid( + const KeyInternal& internal_key, const BUCKET& bucket) { + if (filter_.IsEntryValid(internal_key, bucket)) { + auto key = converter_.post(internal_key); + for (auto& entry : bucket) { + if (filter_.IsBucketEntryValid(internal_key, entry)) { + callback_(key, entry); + } } } + // Return false. We already called the callback. + return false; } - CALLBACK_FN& callback_; - const FILTER filter_; + + [[nodiscard]] inline bool IsNodeValid(const KeyInternal& prefix, int bits_to_ignore) { + return filter_.IsNodeValid(prefix, bits_to_ignore); + } + + private: + CALLBACK callback_; + FILTER filter_; const CONVERTER& converter_; }; + struct NoOpCallback { + constexpr void operator()(const Key&, const BUCKET&) const noexcept {} + }; + v16::PhTreeV16 tree_; CONVERTER converter_; - IteratorNormal the_end_{tree_.end()}; - BucketIterType bucket_dummy_end_; size_t size_; }; @@ -710,14 +782,14 @@ template < dimension_t DIM, typename T, typename CONVERTER = ConverterIEEE, - typename BUCKET = std::unordered_set> + typename BUCKET = b_plus_tree_hash_set> using PhTreeMultiMapD = PhTreeMultiMap; template < dimension_t DIM, typename T, typename CONVERTER_BOX, - typename BUCKET = std::unordered_set> + typename BUCKET = b_plus_tree_hash_set> using PhTreeMultiMapBox = PhTreeMultiMap; /** @@ -730,7 +802,7 @@ template < dimension_t DIM, typename T, typename CONVERTER_BOX = ConverterBoxIEEE, - typename BUCKET = std::unordered_set> + typename BUCKET = b_plus_tree_hash_set> using PhTreeMultiMapBoxD = PhTreeMultiMapBox; } // namespace improbable::phtree diff --git a/phtree/phtree_test_unique_ptr_values.cc b/phtree/phtree_test_unique_ptr_values.cc deleted file mode 100644 index 7d7d6716..00000000 --- a/phtree/phtree_test_unique_ptr_values.cc +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright 2020 Improbable Worlds Limited - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "phtree/phtree.h" -#include -#include - -using namespace improbable::phtree; - -template -using TestPoint = PhPoint; - -template -using TestTree = PhTree; - -class IntRng { - public: - IntRng(int minIncl, int maxExcl) : eng(7), rnd{minIncl, maxExcl} {} - - int next() { - return rnd(eng); - } - - private: - std::default_random_engine eng; - std::uniform_int_distribution rnd; -}; - -struct IdObj { - IdObj() = default; - - explicit IdObj(const size_t i) : _i(static_cast(i)){}; - - bool operator==(IdObj& rhs) { - return _i == rhs._i; - } - - IdObj& operator=(IdObj const& rhs) = default; - - int _i; -}; - -using Id = std::unique_ptr; - -struct PointDistance { - PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} - - double _distance; - size_t _id; -}; - -bool comparePointDistance(PointDistance& i1, PointDistance& i2) { - return (i1._distance < i2._distance); -} - -template -double distance(const TestPoint& p1, const TestPoint& p2) { - double sum2 = 0; - for (dimension_t i = 0; i < DIM; i++) { - double d = p1[i] - p2[i]; - sum2 += d * d; - } - return sqrt(sum2); -} - -template -double distanceL1(const TestPoint& p1, const TestPoint& p2) { - double sum = 0; - for (dimension_t i = 0; i < DIM; i++) { - sum += std::abs(p1[i] - p2[i]); - } - return sum; -} - -template -void generateCube(std::vector>& points, size_t N) { - IntRng rng(-1000, 1000); - auto refTree = std::map, size_t>(); - - points.reserve(N); - for (size_t i = 0; i < N; i++) { - auto point = TestPoint{rng.next(), rng.next(), rng.next()}; - if (refTree.count(point) != 0) { - i--; - continue; - } - - refTree.emplace(point, i); - points.push_back(point); - } - ASSERT_EQ(refTree.size(), N); - ASSERT_EQ(points.size(), N); -} - -template -void SmokeTestBasicOps(size_t N) { - TestTree tree; - std::vector> points; - generateCube(points, N); - - ASSERT_EQ(0, tree.size()); - ASSERT_TRUE(tree.empty()); - PhTreeDebugHelper::CheckConsistency(tree); - - for (size_t i = 0; i < N; i++) { - TestPoint& p = points.at(i); - ASSERT_EQ(tree.count(p), 0); - ASSERT_EQ(tree.end(), tree.find(p)); - - if (i % 2 == 0) { - ASSERT_TRUE(tree.emplace(p, std::make_unique(i)).second); - } else { - Id id = std::make_unique(i); - ASSERT_TRUE(tree.emplace(p, std::move(id)).second); - } - ASSERT_EQ(tree.count(p), 1); - ASSERT_NE(tree.end(), tree.find(p)); - ASSERT_EQ(i, (*tree.find(p))->_i); - ASSERT_EQ(i + 1, tree.size()); - - // try add again - ASSERT_FALSE(tree.emplace(p, std::make_unique(i)).second); - ASSERT_EQ(tree.count(p), 1); - ASSERT_NE(tree.end(), tree.find(p)); - ASSERT_EQ(i, (*tree.find(p))->_i); - ASSERT_EQ(i + 1, tree.size()); - ASSERT_FALSE(tree.empty()); - } - - for (size_t i = 0; i < N; i++) { - TestPoint& p = points.at(i); - auto q = tree.begin_query({p, p}); - ASSERT_NE(q, tree.end()); - ASSERT_EQ(i, (*q)->_i); - q++; - ASSERT_EQ(q, tree.end()); - } - - PhTreeDebugHelper::CheckConsistency(tree); - - for (size_t i = 0; i < N; i++) { - TestPoint& p = points.at(i); - ASSERT_NE(tree.find(p), tree.end()); - ASSERT_EQ(tree.count(p), 1); - ASSERT_EQ(i, (*tree.find(p))->_i); - ASSERT_EQ(1, tree.erase(p)); - - ASSERT_EQ(tree.count(p), 0); - ASSERT_EQ(tree.end(), tree.find(p)); - ASSERT_EQ(N - i - 1, tree.size()); - - // try remove again - ASSERT_EQ(0, tree.erase(p)); - ASSERT_EQ(tree.count(p), 0); - ASSERT_EQ(tree.end(), tree.find(p)); - ASSERT_EQ(N - i - 1, tree.size()); - if (i < N - 1) { - ASSERT_FALSE(tree.empty()); - } - } - ASSERT_EQ(0, tree.size()); - ASSERT_TRUE(tree.empty()); - PhTreeDebugHelper::CheckConsistency(tree); -} - -TEST(PhTreeTestUniquePtr, SmokeTestBasicOps) { - SmokeTestBasicOps<3>(10000); - SmokeTestBasicOps<6>(10000); - SmokeTestBasicOps<10>(1000); - SmokeTestBasicOps<20>(100); -} diff --git a/phtree/v16/BUILD b/phtree/v16/BUILD index b44b14a1..caf9f902 100644 --- a/phtree/v16/BUILD +++ b/phtree/v16/BUILD @@ -13,7 +13,7 @@ cc_library( "iterator_full.h", "iterator_hc.h", "iterator_knn_hs.h", - "iterator_simple.h", + "iterator_with_parent.h", "node.h", "phtree_v16.h", ], diff --git a/phtree/v16/CMakeLists.txt b/phtree/v16/CMakeLists.txt index 1aa65630..871de932 100644 --- a/phtree/v16/CMakeLists.txt +++ b/phtree/v16/CMakeLists.txt @@ -9,6 +9,6 @@ target_sources(phtree iterator_full.h iterator_hc.h iterator_knn_hs.h - iterator_simple.h + iterator_with_parent.h phtree_v16.h ) diff --git a/phtree/v16/debug_helper_v16.h b/phtree/v16/debug_helper_v16.h index 85ef92d9..5252fd53 100644 --- a/phtree/v16/debug_helper_v16.h +++ b/phtree/v16/debug_helper_v16.h @@ -30,11 +30,10 @@ class PhTreeV16; template class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { - using KeyT = PhPoint; - using NodeT = Node; + using EntryT = Entry; public: - DebugHelperV16(const NodeT& root, size_t size) : root_{root}, size_{size} {} + DebugHelperV16(const EntryT& root, size_t size) : root_{root}, size_{size} {} /* * Depending on the detail parameter this returns: @@ -57,7 +56,7 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { ToStringPlain(os, root_); break; case Enum::tree: - ToStringTree(os, 0, root_, KeyT{}, true); + ToStringTree(os, 0, root_, MAX_BIT_WIDTH, true); break; } return os.str(); @@ -70,7 +69,7 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { */ [[nodiscard]] PhTreeStats GetStats() const override { PhTreeStats stats; - root_.GetStats(stats); + root_.GetNode().GetStats(stats, root_); return stats; } @@ -78,19 +77,19 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { * Checks the consistency of the tree. This function requires assertions to be enabled. */ void CheckConsistency() const override { - assert(size_ == root_.CheckConsistency()); + assert(size_ == root_.GetNode().CheckConsistency(root_)); } private: - void ToStringPlain(std::ostringstream& os, const NodeT& node) const { - for (auto& it : node.Entries()) { - const auto& o = it.second; + void ToStringPlain(std::ostringstream& os, const EntryT& entry) const { + for (auto& it : entry.GetNode().Entries()) { + const auto& child = it.second; // inner node? - if (o.IsNode()) { - ToStringPlain(os, o.GetNode()); + if (child.IsNode()) { + ToStringPlain(os, child); } else { - os << o.GetKey(); - os << " v=" << (o.IsValue() ? "T" : "null") << std::endl; + os << child.GetKey(); + os << " v=" << (child.IsValue() ? "T" : "null") << std::endl; } } } @@ -98,50 +97,53 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { void ToStringTree( std::ostringstream& sb, bit_width_t current_depth, - const NodeT& node, - const KeyT& prefix, + const EntryT& entry, + const bit_width_t parent_postfix_len, bool printValue) const { std::string ind = "*"; for (bit_width_t i = 0; i < current_depth; ++i) { ind += "-"; } - sb << ind << "il=" << node.GetInfixLen() << " pl=" << node.GetPostfixLen() - << " ec=" << node.GetEntryCount() << " inf=["; + const auto& node = entry.GetNode(); + const auto infix_len = entry.GetNodeInfixLen(parent_postfix_len); + const auto postfix_len = entry.GetNodePostfixLen(); + sb << ind << "il=" << infix_len << " pl=" << postfix_len << " ec=" << node.GetEntryCount() + << " inf=["; // for a leaf node, the existence of a sub just indicates that the value exists. - if (node.GetInfixLen() > 0) { - bit_mask_t mask = MAX_MASK << node.GetInfixLen(); + if (infix_len > 0) { + bit_mask_t mask = MAX_MASK << infix_len; mask = ~mask; - mask <<= node.GetPostfixLen() + 1; + mask <<= (std::uint64_t)postfix_len + 1; for (dimension_t i = 0; i < DIM; ++i) { - sb << ToBinary(prefix[i] & mask) << ","; + sb << ToBinary(entry.GetKey()[i] & mask) << ","; } } - current_depth += node.GetInfixLen(); + current_depth += infix_len; sb << "] " - << "Node___il=" << node.GetInfixLen() << ";pl=" << node.GetPostfixLen() + << "Node___il=" << infix_len << ";pl=" << postfix_len << ";size=" << node.Entries().size() << std::endl; // To clean previous postfixes. for (auto& it : node.Entries()) { - const auto& o = it.second; + const auto& child = it.second; hc_pos_t hcPos = it.first; - if (o.IsNode()) { + if (child.IsNode()) { sb << ind << "# " << hcPos << " Node: " << std::endl; - ToStringTree(sb, current_depth + 1, o.GetNode(), o.GetKey(), printValue); + ToStringTree(sb, current_depth + 1, child, postfix_len, printValue); } else { // post-fix - sb << ind << ToBinary(o.GetKey()); + sb << ind << ToBinary(child.GetKey()); sb << " hcPos=" << hcPos; if (printValue) { - sb << " v=" << (o.IsValue() ? "T" : "null"); + sb << " v=" << (child.IsValue() ? "T" : "null"); } sb << std::endl; } } } - const NodeT& root_; + const EntryT& root_; const size_t size_; }; } // namespace improbable::phtree::v16 diff --git a/phtree/v16/entry.h b/phtree/v16/entry.h index 1c8610fc..6b2a2dbf 100644 --- a/phtree/v16/entry.h +++ b/phtree/v16/entry.h @@ -17,11 +17,10 @@ #ifndef PHTREE_V16_ENTRY_H #define PHTREE_V16_ENTRY_H -#include "../../phtree/common/common.h" +#include "phtree/common/common.h" #include "node.h" #include #include -#include namespace improbable::phtree::v16 { @@ -29,8 +28,8 @@ template class Node; /* - * Nodes in the PH-Tree contain up to 2^DIM PhEntries, one in each geometric quadrant. - * PhEntries can contain two types of data: + * Nodes in the PH-Tree contain up to 2^DIM Entries, one in each geometric quadrant. + * Entries can contain two types of data: * - A key/value pair (value of type T) * - A prefix/child-node pair, where prefix is the prefix of the child node and the * child node is contained in a unique_ptr. @@ -41,87 +40,204 @@ class Entry { using ValueT = std::remove_const_t; using NodeT = Node; + enum { + VALUE = 0, + NODE = 1, + EMPTY = 2, + }; + public: /* * Construct entry with existing node. */ - Entry(const KeyT& k, std::unique_ptr&& node_ptr) - : kd_key_{k}, node_{std::move(node_ptr)}, value_{std::nullopt} {} + Entry(const KeyT& k, std::unique_ptr&& node_ptr, bit_width_t postfix_len) noexcept + : kd_key_{k} + , node_{std::move(node_ptr)} + , union_type_{NODE} + , postfix_len_{static_cast(postfix_len)} {} /* - * Construct entry with a new node. + * Construct entry with existing T (T is not movable). */ - Entry(bit_width_t infix_len, bit_width_t postfix_len) - : kd_key_(), node_{std::make_unique(infix_len, postfix_len)}, value_{std::nullopt} {} + template + Entry( + const KeyT& k, + ValueT2&& value, + typename std::enable_if_t, int>::type = 0) noexcept + : kd_key_{k}, value_(value), union_type_{VALUE}, postfix_len_{0} {} /* - * Construct entry with existing T. + * Construct entry with existing T (T must be movable). */ - Entry(const KeyT& k, std::optional&& value) - : kd_key_{k}, node_{nullptr}, value_{std::move(value)} {} + template + Entry( + const KeyT& k, + ValueT2&& value, + typename std::enable_if_t, int>::type = 0) noexcept + : kd_key_{k}, value_(std::forward(value)), union_type_{VALUE}, postfix_len_{0} {} /* - * Construct entry with new T or moved T. + * Construct entry with new T or copied T (T is not movable). */ - template - explicit Entry(const KeyT& k, Args&&... args) - : kd_key_{k}, node_{nullptr}, value_{std::in_place, std::forward(args)...} {} + template < + typename ValueT2 = ValueT, + typename = std::enable_if_t>> + explicit Entry(const KeyT& k, const ValueT& value) noexcept + : kd_key_{k}, value_(value), union_type_{VALUE}, postfix_len_{0} {} + + /* + * Construct entry with new T or copied T (T is not movable, using T's default constructor). + */ + template < + typename ValueT2 = ValueT, + typename = std::enable_if_t>> + explicit Entry(const KeyT& k) noexcept + : kd_key_{k}, value_(), union_type_{VALUE}, postfix_len_{0} {} + + /* + * Construct entry with new T or moved T (T must be movable). + */ + template < + typename... Args, + typename ValueT2 = ValueT, + typename = std::enable_if_t>> + explicit Entry(const KeyT& k, Args&&... args) noexcept + : kd_key_{k}, value_(std::forward(args)...), union_type_{VALUE}, postfix_len_{0} {} + + Entry(const Entry& other) = delete; + Entry& operator=(const Entry& other) = delete; + + Entry(Entry&& other) noexcept + : kd_key_{std::move(other.kd_key_)}, union_type_{std::move(other.union_type_)} { + postfix_len_ = std::move(other.postfix_len_); + AssignUnion(std::move(other)); + } + + Entry& operator=(Entry&& other) noexcept { + kd_key_ = std::move(other.kd_key_); + postfix_len_ = std::move(other.postfix_len_); + DestroyUnion(); + AssignUnion(std::move(other)); + return *this; + } + + ~Entry() noexcept { + DestroyUnion(); + } [[nodiscard]] const KeyT& GetKey() const { return kd_key_; } [[nodiscard]] bool IsValue() const { - return value_.has_value(); + return union_type_ == VALUE; } [[nodiscard]] bool IsNode() const { - return node_.get() != nullptr; + return union_type_ == NODE; } [[nodiscard]] T& GetValue() const { - assert(IsValue()); - return const_cast(*value_); + assert(union_type_ == VALUE); + return const_cast(value_); } [[nodiscard]] NodeT& GetNode() const { - assert(IsNode()); + assert(union_type_ == NODE); return *node_; } - void SetNode(std::unique_ptr&& node) { - assert(!IsNode()); - node_ = std::move(node); - value_.reset(); + void SetKey(const KeyT& key) noexcept { + kd_key_ = key; + } + + void SetNode(std::unique_ptr&& node, bit_width_t postfix_len) noexcept { + postfix_len_ = static_cast(postfix_len); + DestroyUnion(); + union_type_ = NODE; + new (&node_) std::unique_ptr{std::move(node)}; + assert(!node); } - [[nodiscard]] std::optional&& ExtractValue() { + [[nodiscard]] bit_width_t GetNodePostfixLen() const noexcept { + assert(IsNode()); + return postfix_len_; + } + + [[nodiscard]] bit_width_t GetNodeInfixLen(bit_width_t parent_postfix_len) const noexcept { + assert(IsNode()); + return parent_postfix_len - GetNodePostfixLen() - 1; + } + + [[nodiscard]] bool HasNodeInfix(bit_width_t parent_postfix_len) const noexcept { + assert(IsNode()); + return parent_postfix_len - GetNodePostfixLen() - 1 > 0; + } + + [[nodiscard]] ValueT&& ExtractValue() noexcept { assert(IsValue()); return std::move(value_); } - [[nodiscard]] std::unique_ptr&& ExtractNode() { + [[nodiscard]] std::unique_ptr&& ExtractNode() noexcept { assert(IsNode()); + // Moving the node somewhere else means we should remove it here: + union_type_ = EMPTY; return std::move(node_); } void ReplaceNodeWithDataFromEntry(Entry&& other) { assert(IsNode()); - kd_key_ = other.GetKey(); + // 'other' may be referenced from the local node, so we need to do move(other) + // before destructing the local node. + auto node = std::move(node_); + union_type_ = EMPTY; + *this = std::move(other); + node.reset(); + } - if (other.IsNode()) { - node_ = std::move(other.node_); + private: + void AssignUnion(Entry&& other) noexcept { + union_type_ = std::move(other.union_type_); + if (union_type_ == NODE) { + new (&node_) std::unique_ptr{std::move(other.node_)}; + } else if (union_type_ == VALUE) { + if constexpr (std::is_move_constructible_v) { + new (&value_) ValueT{std::move(other.value_)}; + } else { + new (&value_) ValueT{other.value_}; + } } else { - value_ = std::move(other.value_); - node_.reset(); + assert(false && "Assigning from an EMPTY variant is a waste of time."); } } - private: + void DestroyUnion() noexcept { + if (union_type_ == VALUE) { + value_.~ValueT(); + } else if (union_type_ == NODE) { + node_.~unique_ptr(); + } else { + assert(union_type_ == EMPTY); + } + union_type_ = EMPTY; + } + KeyT kd_key_; - std::unique_ptr node_; - std::optional value_; + union { + std::unique_ptr node_; + ValueT value_; + }; + std::uint16_t union_type_; + // The length (number of bits) of post fixes (the part of the coordinate that is 'below' the + // current node). If a variable prefix_len would refer to the number of bits in this node's + // prefix, and if we assume 64 bit values, the following would always hold: + // prefix_len + 1 + postfix_len = 64. + // The '+1' accounts for the 1 bit that is represented by the local node's hypercube, + // i.e. the same bit that is used to create the lookup keys in entries_. + std::uint16_t postfix_len_; }; + } // namespace improbable::phtree::v16 #endif // PHTREE_V16_ENTRY_H diff --git a/phtree/v16/for_each.h b/phtree/v16/for_each.h index aee3d157..7a97b537 100644 --- a/phtree/v16/for_each.h +++ b/phtree/v16/for_each.h @@ -18,7 +18,7 @@ #define PHTREE_V16_FOR_EACH_H #include "../common/common.h" -#include "iterator_simple.h" +#include "iterator_with_parent.h" namespace improbable::phtree::v16 { @@ -26,47 +26,43 @@ namespace improbable::phtree::v16 { * Iterates over the whole tree. Entries and child nodes that are rejected by the Filter are not * traversed or returned. */ -template +template class ForEach { static constexpr dimension_t DIM = CONVERT::DimInternal; - using KeyExternal = typename CONVERT::KeyExternal; using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; using EntryT = Entry; - using NodeT = Node; public: - ForEach(const CONVERT& converter, CALLBACK_FN& callback, FILTER filter) - : converter_{converter}, callback_{callback}, filter_(std::move(filter)) {} + template + ForEach(const CONVERT* converter, CB&& callback, F&& filter) + : converter_{converter} + , callback_{std::forward(callback)} + , filter_(std::forward(filter)) {} - void run(const EntryT& root) { - assert(root.IsNode()); - TraverseNode(root.GetKey(), root.GetNode()); - } - - private: - void TraverseNode(const KeyInternal& key, const NodeT& node) { - auto iter = node.Entries().begin(); - auto end = node.Entries().end(); + void Traverse(const EntryT& entry) { + assert(entry.IsNode()); + auto& entries = entry.GetNode().Entries(); + auto iter = entries.begin(); + auto end = entries.end(); for (; iter != end; ++iter) { const auto& child = iter->second; const auto& child_key = child.GetKey(); if (child.IsNode()) { - const auto& child_node = child.GetNode(); - if (filter_.IsNodeValid(key, node.GetPostfixLen() + 1)) { - TraverseNode(child_key, child_node); + if (filter_.IsNodeValid(child_key, child.GetNodePostfixLen() + 1)) { + Traverse(child); } } else { T& value = child.GetValue(); - if (filter_.IsEntryValid(key, value)) { - callback_(converter_.post(child_key), value); + if (filter_.IsEntryValid(child_key, value)) { + callback_(converter_->post(child_key), value); } } } } - CONVERT converter_; - CALLBACK_FN& callback_; + const CONVERT* converter_; + CALLBACK callback_; FILTER filter_; }; } // namespace improbable::phtree::v16 diff --git a/phtree/v16/for_each_hc.h b/phtree/v16/for_each_hc.h index d870debc..203969a4 100644 --- a/phtree/v16/for_each_hc.h +++ b/phtree/v16/for_each_hc.h @@ -18,7 +18,7 @@ #define PHTREE_V16_FOR_EACH_HC_H #include "../common/common.h" -#include "iterator_simple.h" +#include "iterator_with_parent.h" namespace improbable::phtree::v16 { @@ -33,40 +33,36 @@ namespace improbable::phtree::v16 { * For details see "Efficient Z-Ordered Traversal of Hypercube Indexes" by T. Zäschke, M.C. Norrie, * 2017. */ -template +template class ForEachHC { static constexpr dimension_t DIM = CONVERT::DimInternal; - using KeyExternal = typename CONVERT::KeyExternal; using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; using EntryT = Entry; - using NodeT = Node; public: + template ForEachHC( const KeyInternal& range_min, const KeyInternal& range_max, - const CONVERT& converter, - CALLBACK_FN& callback, - FILTER filter) + const CONVERT* converter, + CB&& callback, + F&& filter) : range_min_{range_min} , range_max_{range_max} , converter_{converter} - , callback_{callback} - , filter_(std::move(filter)) {} + , callback_{std::forward(callback)} + , filter_(std::forward(filter)) {} - void run(const EntryT& root) { - assert(root.IsNode()); - TraverseNode(root.GetKey(), root.GetNode()); - } - - private: - void TraverseNode(const KeyInternal& key, const NodeT& node) { + void Traverse(const EntryT& entry) { + assert(entry.IsNode()); hc_pos_t mask_lower = 0; hc_pos_t mask_upper = 0; - CalcLimits(node.GetPostfixLen(), key, mask_lower, mask_upper); - auto iter = node.Entries().lower_bound(mask_lower); - auto end = node.Entries().end(); + CalcLimits(entry.GetNodePostfixLen(), entry.GetKey(), mask_lower, mask_upper); + auto& entries = entry.GetNode().Entries(); + auto postfix_len = entry.GetNodePostfixLen(); + auto iter = entries.lower_bound(mask_lower); + auto end = entries.end(); for (; iter != end && iter->first <= mask_upper; ++iter) { auto child_hc_pos = iter->first; // Use bit-mask magic to check whether we are in a valid quadrant. @@ -75,29 +71,30 @@ class ForEachHC { const auto& child = iter->second; const auto& child_key = child.GetKey(); if (child.IsNode()) { - const auto& child_node = child.GetNode(); - if (CheckNode(child_key, child_node)) { - TraverseNode(child_key, child_node); + if (CheckNode(child, postfix_len)) { + Traverse(child); } } else { T& value = child.GetValue(); if (IsInRange(child_key, range_min_, range_max_) && - ApplyFilter(child_key, value)) { - callback_(converter_.post(child_key), value); + filter_.IsEntryValid(child_key, value)) { + callback_(converter_->post(child_key), value); } } } } } - bool CheckNode(const KeyInternal& key, const NodeT& node) const { + bool CheckNode(const EntryT& entry, bit_width_t parent_postfix_len) { + const KeyInternal& key = entry.GetKey(); // Check if the node overlaps with the query box. // An infix with len=0 implies that at least part of the child node overlaps with the query, // otherwise the bit mask checking would have returned 'false'. - if (node.GetInfixLen() > 0) { + // Putting it differently, if the infix has len=0, then there is no point in validating it. + if (entry.HasNodeInfix(parent_postfix_len)) { // Mask for comparing the prefix with the query boundaries. - assert(node.GetPostfixLen() + 1 < MAX_BIT_WIDTH); - SCALAR comparison_mask = MAX_MASK << (node.GetPostfixLen() + 1); + assert(entry.GetNodePostfixLen() + 1 < MAX_BIT_WIDTH); + SCALAR comparison_mask = MAX_MASK << (entry.GetNodePostfixLen() + 1); for (dimension_t dim = 0; dim < DIM; ++dim) { SCALAR prefix = key[dim] & comparison_mask; if (prefix > range_max_[dim] || prefix < (range_min_[dim] & comparison_mask)) { @@ -105,15 +102,7 @@ class ForEachHC { } } } - return ApplyFilter(key, node); - } - - [[nodiscard]] bool ApplyFilter(const KeyInternal& key, const NodeT& node) const { - return filter_.IsNodeValid(key, node.GetPostfixLen() + 1); - } - - [[nodiscard]] bool ApplyFilter(const KeyInternal& key, const T& value) const { - return filter_.IsEntryValid(key, value); + return filter_.IsNodeValid(key, entry.GetNodePostfixLen() + 1); } void CalcLimits( @@ -180,8 +169,8 @@ class ForEachHC { const KeyInternal range_min_; const KeyInternal range_max_; - CONVERT converter_; - CALLBACK_FN& callback_; + const CONVERT* converter_; + CALLBACK callback_; FILTER filter_; }; } // namespace improbable::phtree::v16 diff --git a/phtree/v16/iterator_base.h b/phtree/v16/iterator_base.h index 50ac8708..5a99c4c8 100644 --- a/phtree/v16/iterator_base.h +++ b/phtree/v16/iterator_base.h @@ -22,132 +22,100 @@ namespace improbable::phtree::v16 { -template -class PhTreeV16; - /* * Base class for all PH-Tree iterators. */ -template +template class IteratorBase { - protected: - static constexpr dimension_t DIM = CONVERT::DimInternal; - using KeyInternal = typename CONVERT::KeyInternal; - using SCALAR = typename CONVERT::ScalarInternal; - using EntryT = Entry; - friend PhTreeV16; - public: - explicit IteratorBase(const CONVERT& converter) - : current_result_{nullptr} - , current_node_{} - , parent_node_{} - , is_finished_{false} - , converter_{converter} - , filter_{FILTER()} {} - - explicit IteratorBase(const CONVERT& converter, FILTER filter) - : current_result_{nullptr} - , current_node_{} - , parent_node_{} - , is_finished_{false} - , converter_{converter} - , filter_(std::move(filter)) {} - - T& operator*() const { - assert(current_result_); - return current_result_->GetValue(); - } + explicit IteratorBase() noexcept : current_entry_{nullptr} {} + explicit IteratorBase(const EntryT* current_entry) noexcept : current_entry_{current_entry} {} - T* operator->() const { - assert(current_result_); - return ¤t_result_->GetValue(); + inline auto& operator*() const noexcept { + assert(current_entry_); + return current_entry_->GetValue(); } - template - friend bool operator==( - const IteratorBase& left, - const IteratorBase& right) { - // Note: The following compares pointers to Entry objects so it should be - // a) fast (i.e. not comparing contents of entries) - // b) return `false` when comparing apparently identical entries from different PH-Trees (as - // intended) - return (left.is_finished_ && right.Finished()) || - (!left.is_finished_ && !right.Finished() && - left.current_result_ == right.GetCurrentResult()); + inline auto* operator->() const noexcept { + assert(current_entry_); + return ¤t_entry_->GetValue(); } - template - friend bool operator!=( - const IteratorBase& left, - const IteratorBase& right) { - return !(left == right); + inline friend bool operator==( + const IteratorBase& left, const IteratorBase& right) noexcept { + return left.current_entry_ == right.current_entry_; } - auto first() const { - return converter_.post(current_result_->GetKey()); + inline friend bool operator!=( + const IteratorBase& left, const IteratorBase& right) noexcept { + return left.current_entry_ != right.current_entry_; } - T& second() const { - return current_result_->GetValue(); + auto& second() const { + return current_entry_->GetValue(); } - [[nodiscard]] bool Finished() const { - return is_finished_; + [[nodiscard]] inline bool IsEnd() const noexcept { + return current_entry_ == nullptr; } - const EntryT* GetCurrentResult() const { - return current_result_; + inline EntryT* GetEntry() const noexcept { + return const_cast(current_entry_); } protected: void SetFinished() { - is_finished_ = true; - current_result_ = nullptr; + current_entry_ = nullptr; } - [[nodiscard]] bool ApplyFilter(const EntryT& entry) const { - return entry.IsNode() - ? filter_.IsNodeValid(entry.GetKey(), entry.GetNode().GetPostfixLen() + 1) - : filter_.IsEntryValid(entry.GetKey(), entry.GetValue()); + void SetCurrentResult(const EntryT* current_entry) { + current_entry_ = current_entry; } - void SetCurrentResult(const EntryT* current_result) { - current_result_ = current_result; - } + protected: + const EntryT* current_entry_; +}; - void SetCurrentNodeEntry(const EntryT* current_node) { - assert(!current_node || current_node->IsNode()); - current_node_ = current_node; - } +template +using IteratorEnd = IteratorBase; + +template +class IteratorWithFilter +: public IteratorBase> { + protected: + static constexpr dimension_t DIM = CONVERT::DimInternal; + using KeyInternal = typename CONVERT::KeyInternal; + using SCALAR = typename CONVERT::ScalarInternal; + using EntryT = Entry; + + public: + template + explicit IteratorWithFilter(const CONVERT* converter, F&& filter) noexcept + : IteratorBase(nullptr), converter_{converter}, filter_{std::forward(filter)} {} - void SetParentNodeEntry(const EntryT* parent_node) { - assert(!parent_node || parent_node->IsNode()); - parent_node_ = parent_node; + explicit IteratorWithFilter(const EntryT* current_entry, const CONVERT* converter) noexcept + : IteratorBase(current_entry), converter_{converter}, filter_{FILTER()} {} + + auto first() const { + return converter_->post(this->current_entry_->GetKey()); } - auto post(const KeyInternal& point) { - return converter_.post(point); + auto& __Filter() { + return filter_; } - private: - /* - * The parent entry contains the parent node. The parent node is the node ABOVE the current node - * which contains the current entry. - */ - const EntryT* GetCurrentNodeEntry() const { - return current_node_; + protected: + [[nodiscard]] bool ApplyFilter(const EntryT& entry) { + return entry.IsNode() ? filter_.IsNodeValid(entry.GetKey(), entry.GetNodePostfixLen() + 1) + : filter_.IsEntryValid(entry.GetKey(), entry.GetValue()); } - const EntryT* GetParentNodeEntry() const { - return parent_node_; + auto post(const KeyInternal& point) { + return converter_->post(point); } - const EntryT* current_result_; - const EntryT* current_node_; - const EntryT* parent_node_; - bool is_finished_; - const CONVERT& converter_; + private: + const CONVERT* converter_; FILTER filter_; }; diff --git a/phtree/v16/iterator_full.h b/phtree/v16/iterator_full.h index b60be035..37531a63 100644 --- a/phtree/v16/iterator_full.h +++ b/phtree/v16/iterator_full.h @@ -26,32 +26,35 @@ template class Node; template -class IteratorFull : public IteratorBase { +class IteratorFull : public IteratorWithFilter { static constexpr dimension_t DIM = CONVERT::DimInternal; using SCALAR = typename CONVERT::ScalarInternal; using NodeT = Node; - using EntryT = typename IteratorBase::EntryT; + using EntryT = typename IteratorWithFilter::EntryT; public: - IteratorFull(const EntryT& root, const CONVERT& converter, FILTER filter) - : IteratorBase(converter, filter), stack_{}, stack_size_{0} { + template + IteratorFull(const EntryT& root, const CONVERT* converter, F&& filter) + : IteratorWithFilter(converter, std::forward(filter)) + , stack_{} + , stack_size_{0} { PrepareAndPush(root.GetNode()); FindNextElement(); } - IteratorFull& operator++() { + IteratorFull& operator++() noexcept { FindNextElement(); return *this; } - IteratorFull operator++(int) { + IteratorFull operator++(int) noexcept { IteratorFull iterator(*this); ++(*this); return iterator; } private: - void FindNextElement() { + void FindNextElement() noexcept { while (!IsEmpty()) { auto* p = &Peek(); while (*p != PeekEnd()) { @@ -82,22 +85,22 @@ class IteratorFull : public IteratorBase { return stack_[stack_size_ - 1].first; } - auto& Peek() { + auto& Peek() noexcept { assert(stack_size_ > 0); return stack_[stack_size_ - 1].first; } - auto& PeekEnd() { + auto& PeekEnd() noexcept { assert(stack_size_ > 0); return stack_[stack_size_ - 1].second; } - auto& Pop() { + auto& Pop() noexcept { assert(stack_size_ > 0); return stack_[--stack_size_].first; } - bool IsEmpty() { + bool IsEmpty() noexcept { return stack_size_ == 0; } diff --git a/phtree/v16/iterator_hc.h b/phtree/v16/iterator_hc.h index 2485550c..64a67a94 100644 --- a/phtree/v16/iterator_hc.h +++ b/phtree/v16/iterator_hc.h @@ -18,7 +18,7 @@ #define PHTREE_V16_ITERATOR_HC_H #include "../common/common.h" -#include "iterator_simple.h" +#include "iterator_with_parent.h" namespace improbable::phtree::v16 { @@ -42,44 +42,45 @@ class NodeIterator; * 2017. */ template -class IteratorHC : public IteratorBase { +class IteratorHC : public IteratorWithFilter { static constexpr dimension_t DIM = CONVERT::DimInternal; using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; - using EntryT = typename IteratorBase::EntryT; + using EntryT = typename IteratorWithFilter::EntryT; public: + template IteratorHC( const EntryT& root, const KeyInternal& range_min, const KeyInternal& range_max, - const CONVERT& converter, - FILTER filter) - : IteratorBase(converter, filter) + const CONVERT* converter, + F&& filter) + : IteratorWithFilter(converter, std::forward(filter)) , stack_size_{0} , range_min_{range_min} , range_max_{range_max} { + stack_.reserve(8); PrepareAndPush(root); FindNextElement(); } - IteratorHC& operator++() { + IteratorHC& operator++() noexcept { FindNextElement(); return *this; } - IteratorHC operator++(int) { + IteratorHC operator++(int) noexcept { IteratorHC iterator(*this); ++(*this); return iterator; } private: - void FindNextElement() { - assert(!this->Finished()); + void FindNextElement() noexcept { while (!IsEmpty()) { auto* p = &Peek(); - const EntryT* current_result = nullptr; + const EntryT* current_result; while ((current_result = p->Increment(range_min_, range_max_))) { if (this->ApplyFilter(*current_result)) { if (current_result->IsNode()) { @@ -97,28 +98,31 @@ class IteratorHC : public IteratorBase { this->SetFinished(); } - auto& PrepareAndPush(const EntryT& entry) { - assert(stack_size_ < stack_.size() - 1); + auto& PrepareAndPush(const EntryT& entry) noexcept { + if (stack_.size() < stack_size_ + 1) { + stack_.emplace_back(); + } + assert(stack_size_ < stack_.size()); auto& ni = stack_[stack_size_++]; - ni.init(range_min_, range_max_, entry.GetNode(), entry.GetKey()); + ni.Init(range_min_, range_max_, entry); return ni; } - auto& Peek() { + auto& Peek() noexcept { assert(stack_size_ > 0); return stack_[stack_size_ - 1]; } - auto& Pop() { + auto& Pop() noexcept { assert(stack_size_ > 0); return stack_[--stack_size_]; } - bool IsEmpty() { + bool IsEmpty() noexcept { return stack_size_ == 0; } - std::array, MAX_BIT_WIDTH> stack_; + std::vector> stack_; size_t stack_size_; const KeyInternal range_min_; const KeyInternal range_max_; @@ -129,15 +133,17 @@ template class NodeIterator { using KeyT = PhPoint; using EntryT = Entry; - using NodeT = Node; + using EntriesT = EntryMap; public: - NodeIterator() : iter_{}, node_{nullptr}, mask_lower_{0}, mask_upper_(0) {} + NodeIterator() : iter_{}, entries_{nullptr}, mask_lower_{0}, mask_upper_{0}, postfix_len_{0} {} - void init(const KeyT& range_min, const KeyT& range_max, const NodeT& node, const KeyT& prefix) { - node_ = &node; - CalcLimits(node.GetPostfixLen(), range_min, range_max, prefix); + void Init(const KeyT& range_min, const KeyT& range_max, const EntryT& entry) { + auto& node = entry.GetNode(); + CalcLimits(entry.GetNodePostfixLen(), range_min, range_max, entry.GetKey()); iter_ = node.Entries().lower_bound(mask_lower_); + entries_ = &node.Entries(); + postfix_len_ = entry.GetNodePostfixLen(); } /* @@ -145,7 +151,7 @@ class NodeIterator { * @return TRUE iff a matching element was found. */ const EntryT* Increment(const KeyT& range_min, const KeyT& range_max) { - while (iter_ != node_->Entries().end() && iter_->first <= mask_upper_) { + while (iter_ != entries_->end() && iter_->first <= mask_upper_) { if (IsPosValid(iter_->first)) { const auto* be = &iter_->second; if (CheckEntry(*be, range_min, range_max)) { @@ -163,16 +169,16 @@ class NodeIterator { return IsInRange(candidate.GetKey(), range_min, range_max); } - auto& node = candidate.GetNode(); // Check if node-prefix allows sub-node to contain any useful values. // An infix with len=0 implies that at least part of the child node overlaps with the query. - if (node.GetInfixLen() == 0) { + // Putting it differently, if the infix has len=0, then there is no point in validating it. + if (!candidate.HasNodeInfix(postfix_len_)) { return true; } // Mask for comparing the prefix with the query boundaries. - assert(node.GetPostfixLen() + 1 < MAX_BIT_WIDTH); - SCALAR comparison_mask = MAX_MASK << (node.GetPostfixLen() + 1); + assert(candidate.GetNodePostfixLen() + 1 < MAX_BIT_WIDTH); + SCALAR comparison_mask = MAX_MASK << (candidate.GetNodePostfixLen() + 1); auto& key = candidate.GetKey(); for (dimension_t dim = 0; dim < DIM; ++dim) { SCALAR in = key[dim] & comparison_mask; @@ -184,7 +190,7 @@ class NodeIterator { } private: - [[nodiscard]] bool IsPosValid(hc_pos_t key) const { + [[nodiscard]] inline bool IsPosValid(hc_pos_t key) const noexcept { return ((key | mask_lower_) & mask_upper_) == key; } @@ -254,9 +260,10 @@ class NodeIterator { private: EntryIteratorC iter_; - const NodeT* node_; + EntriesT* entries_; hc_pos_t mask_lower_; hc_pos_t mask_upper_; + bit_width_t postfix_len_; }; } // namespace } // namespace improbable::phtree::v16 diff --git a/phtree/v16/iterator_knn_hs.h b/phtree/v16/iterator_knn_hs.h index 3c30f7d6..ca8aac80 100644 --- a/phtree/v16/iterator_knn_hs.h +++ b/phtree/v16/iterator_knn_hs.h @@ -44,29 +44,30 @@ struct CompareEntryDistByDistance { } // namespace template -class IteratorKnnHS : public IteratorBase { +class IteratorKnnHS : public IteratorWithFilter { static constexpr dimension_t DIM = CONVERT::DimInternal; using KeyExternal = typename CONVERT::KeyExternal; using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; - using EntryT = typename IteratorBase::EntryT; + using EntryT = typename IteratorWithFilter::EntryT; using EntryDistT = EntryDist; public: + template explicit IteratorKnnHS( const EntryT& root, size_t min_results, const KeyInternal& center, - const CONVERT& converter, - DISTANCE dist, - FILTER filter) - : IteratorBase(converter, filter) + const CONVERT* converter, + DIST&& dist, + F&& filter) + : IteratorWithFilter(converter, std::forward(filter)) , center_{center} - , center_post_{converter.post(center)} + , center_post_{converter->post(center)} , current_distance_{std::numeric_limits::max()} , num_found_results_(0) , num_requested_results_(min_results) - , distance_(std::move(dist)) { + , distance_(std::forward(dist)) { if (min_results <= 0 || root.GetNode().GetEntryCount() == 0) { this->SetFinished(); return; @@ -81,12 +82,12 @@ class IteratorKnnHS : public IteratorBase { return current_distance_; } - IteratorKnnHS& operator++() { + IteratorKnnHS& operator++() noexcept { FindNextElement(); return *this; } - IteratorKnnHS operator++(int) { + IteratorKnnHS operator++(int) noexcept { IteratorKnnHS iterator(*this); ++(*this); return iterator; @@ -96,7 +97,7 @@ class IteratorKnnHS : public IteratorBase { void FindNextElement() { while (num_found_results_ < num_requested_results_ && !queue_.empty()) { auto& candidate = queue_.top(); - auto o = candidate.second; + auto* o = candidate.second; if (!o->IsNode()) { // data entry ++num_found_results_; @@ -114,8 +115,7 @@ class IteratorKnnHS : public IteratorBase { auto& e2 = entry.second; if (this->ApplyFilter(e2)) { if (e2.IsNode()) { - auto& sub = e2.GetNode(); - double d = DistanceToNode(e2.GetKey(), sub.GetPostfixLen() + 1); + double d = DistanceToNode(e2.GetKey(), e2.GetNodePostfixLen() + 1); queue_.emplace(d, &e2); } else { double d = distance_(center_post_, this->post(e2.GetKey())); @@ -129,7 +129,7 @@ class IteratorKnnHS : public IteratorBase { current_distance_ = std::numeric_limits::max(); } - double DistanceToNode(const KeyInternal& prefix, int bits_to_ignore) { + double DistanceToNode(const KeyInternal& prefix, std::uint32_t bits_to_ignore) { assert(bits_to_ignore < MAX_BIT_WIDTH); SCALAR mask_min = MAX_MASK << bits_to_ignore; SCALAR mask_max = ~mask_min; @@ -154,8 +154,8 @@ class IteratorKnnHS : public IteratorBase { double current_distance_; std::priority_queue, CompareEntryDistByDistance> queue_; - int num_found_results_; - int num_requested_results_; + size_t num_found_results_; + size_t num_requested_results_; DISTANCE distance_; }; diff --git a/phtree/v16/iterator_simple.h b/phtree/v16/iterator_with_parent.h similarity index 57% rename from phtree/v16/iterator_simple.h rename to phtree/v16/iterator_with_parent.h index 815979a7..e9347609 100644 --- a/phtree/v16/iterator_simple.h +++ b/phtree/v16/iterator_with_parent.h @@ -23,45 +23,49 @@ namespace improbable::phtree::v16 { template -class IteratorSimple : public IteratorBase { +class IteratorWithParent : public IteratorWithFilter { static constexpr dimension_t DIM = CONVERT::DimInternal; using SCALAR = typename CONVERT::ScalarInternal; - using EntryT = typename IteratorBase::EntryT; + using EntryT = typename IteratorWithFilter::EntryT; + friend PhTreeV16; public: - explicit IteratorSimple(const CONVERT& converter) : IteratorBase(converter) { - this->SetFinished(); - } - - explicit IteratorSimple( + explicit IteratorWithParent( const EntryT* current_result, const EntryT* current_node, const EntryT* parent_node, - CONVERT converter) - : IteratorBase(converter) { - if (current_result) { - this->SetCurrentResult(current_result); - this->SetCurrentNodeEntry(current_node); - this->SetParentNodeEntry(parent_node); - } else { - this->SetFinished(); - } - } + const CONVERT* converter) noexcept + : IteratorWithFilter(current_result, converter) + , current_node_{current_node} + , parent_node_{parent_node} {} - IteratorSimple& operator++() { + IteratorWithParent& operator++() { this->SetFinished(); return *this; } - IteratorSimple operator++(int) { - IteratorSimple iterator(*this); + IteratorWithParent operator++(int) { + IteratorWithParent iterator(*this); ++(*this); return iterator; } -}; -template -using IteratorEnd = IteratorSimple; + private: + /* + * The parent entry contains the parent node. The parent node is the node ABOVE the current node + * which contains the current entry. + */ + EntryT* GetNodeEntry() const { + return const_cast(current_node_); + } + + EntryT* GetParentNodeEntry() const { + return const_cast(parent_node_); + } + + const EntryT* current_node_; + const EntryT* parent_node_; +}; } // namespace improbable::phtree::v16 diff --git a/phtree/v16/node.h b/phtree/v16/node.h index 6994bca0..4a2aa451 100644 --- a/phtree/v16/node.h +++ b/phtree/v16/node.h @@ -26,67 +26,32 @@ namespace improbable::phtree::v16 { /* - * We provide different implementations of the node's internal entry set: + * We provide different implementations of the node's internal entry set. + * All implementations are equivalent to "std::map" which can be used as + * a plugin example for verification. + * * - `array_map` is the fastest, but has O(2^DIM) space complexity. This can be very wasteful * because many nodes may have only 2 entries. * Also, iteration depends on some bit operations and is also O(DIM) per step if the CPU/compiler * does not support CTZ (count trailing bits). * - `sparse_map` is slower, but requires only O(n) memory (n = number of entries/children). * However, insertion/deletion is O(n), i.e. O(2^DIM) time complexity in the worst case. - * - 'std::map` is the least efficient for small node sizes but scales best with larger nodes and - * dimensionality. Remember that n_max = 2^DIM. + * - 'b_plus_tree_map` is the least efficient for small node sizes but scales best with larger + * nodes and dimensionality. Remember that n_max = 2^DIM. */ template using EntryMap = typename std::conditional< DIM <= 3, array_map, - typename std::conditional, std::map>::type>::type; + typename std:: + conditional, b_plus_tree_map>:: + type>::type; template using EntryIterator = decltype(EntryMap().begin()); template using EntryIteratorC = decltype(EntryMap().cbegin()); -namespace { - -/* - * Takes a construct of parent_node -> child_node, ie the child_node is owned by parent_node. - * This function also assumes that the child_node contains only one entry. - * - * This function takes the remaining entry from the child node and inserts it into the parent_node - * where it replaces (and implicitly deletes) the child_node. - * @param prefix_of_child_in_parent This specifies the position of child_node inside the - * parent_node. We only need the relevant bits at the level of the parent_node. This means we can - * use any key of any node or entry that is, or used to be) inside the child_node, because they all - * share the same prefix. This includes the key of the child_node itself. - * @param child_node The node to be removed from the parent node. - * @param parent_node Current owner of the child node. - */ -template -void MergeIntoParent(Node& child_node, Node& parent) { - assert(child_node.GetEntryCount() == 1); - // At this point we have found an entry that needs to be removed. We also know that we need to - // remove the child node because it contains at most one other entry and it is not the root - // node. - auto map_entry = child_node.Entries().begin(); - auto& entry = map_entry->second; - - auto hc_pos_in_parent = CalcPosInArray(entry.GetKey(), parent.GetPostfixLen()); - auto& parent_entry = parent.Entries().find(hc_pos_in_parent)->second; - - if (entry.IsNode()) { - // connect sub to parent - auto& sub2 = entry.GetNode(); - bit_width_t new_infix_len = child_node.GetInfixLen() + 1 + sub2.GetInfixLen(); - sub2.SetInfixLen(new_infix_len); - } - - // Now move the single entry into the parent, the position in the parent is the same as the - // child_node. - parent_entry.ReplaceNodeWithDataFromEntry(std::move(entry)); -} -} // namespace - /* * A node of the PH-Tree. It contains up to 2^DIM entries, each entry being either a leaf with data * of type T or a child node (both are of the variant type Entry). @@ -112,11 +77,7 @@ class Node { using EntryT = Entry; public: - Node(bit_width_t infix_len, bit_width_t postfix_len) - : postfix_len_(postfix_len), infix_len_(infix_len), entries_{} { - assert(infix_len_ < MAX_BIT_WIDTH); - assert(infix_len >= 0); - } + Node() : entries_{} {} // Nodes should never be copied! Node(const Node&) = delete; @@ -128,14 +89,6 @@ class Node { return entries_.size(); } - [[nodiscard]] bit_width_t GetInfixLen() const { - return infix_len_; - } - - [[nodiscard]] bit_width_t GetPostfixLen() const { - return postfix_len_; - } - /* * Attempts to emplace an entry in this node. * The behavior is analogous to std::map::emplace(), i.e. if there is already a value with the @@ -164,56 +117,65 @@ class Node { * @param args Constructor arguments for creating a value T that can be inserted for the key. */ template - EntryT* Emplace(bool& is_inserted, const KeyT& key, Args&&... args) { - hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); + EntryT& Emplace(bool& is_inserted, const KeyT& key, bit_width_t postfix_len, Args&&... args) { + hc_pos_t hc_pos = CalcPosInArray(key, postfix_len); auto emplace_result = entries_.try_emplace(hc_pos, key, std::forward(args)...); auto& entry = emplace_result.first->second; // Return if emplace succeed, i.e. there was no entry. if (emplace_result.second) { is_inserted = true; - return &entry; + return entry; } - return HandleCollision(entry, is_inserted, key, std::forward(args)...); + return HandleCollision(entry, is_inserted, key, postfix_len, std::forward(args)...); } /* * Returns the value (T or Node) if the entry exists and matches the key. Child nodes are * _not_ traversed. * @param key The key of the entry - * @param parent parent node + * @param parent The parent node * @return The sub node or null. */ - const EntryT* Find(const KeyT& key) const { - hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); + const EntryT* Find(const KeyT& key, bit_width_t postfix_len) const { + hc_pos_t hc_pos = CalcPosInArray(key, postfix_len); const auto& entry = entries_.find(hc_pos); - if (entry != entries_.end() && DoesEntryMatch(entry->second, key)) { + if (entry != entries_.end() && DoesEntryMatch(entry->second, key, postfix_len)) { return &entry->second; } return nullptr; } + EntryT* Find(const KeyT& key, bit_width_t postfix_len) { + return const_cast(static_cast(this)->Find(key, postfix_len)); + } + /* * Attempts to erase a key/value pair. * This function is not recursive, if the 'key' leads to a child node, the child node * is returned and nothing is removed. * * @param key The key of the key/value pair to be erased - * @param parent The parent node of the current node (=nullptr) if this is the root node. + * @param parent_entry The parent node of the current node (=nullptr) if this is the root node. + * @param allow_move_into_parent Whether the node can be merged into the parent if only 1 + * entry is left. * @param found This is and output parameter and will be set to 'true' if a value was removed. * @return A child node if the provided key leads to a child node. */ - Node* Erase(const KeyT& key, Node* parent, bool& found) { - hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); + EntryT* Erase(const KeyT& key, EntryT* parent_entry, bool allow_move_into_parent, bool& found) { + auto postfix_len = parent_entry->GetNodePostfixLen(); + hc_pos_t hc_pos = CalcPosInArray(key, postfix_len); auto it = entries_.find(hc_pos); - if (it != entries_.end() && DoesEntryMatch(it->second, key)) { + if (it != entries_.end() && DoesEntryMatch(it->second, key, postfix_len)) { if (it->second.IsNode()) { - return &it->second.GetNode(); + return &it->second; } entries_.erase(it); found = true; - if (parent && GetEntryCount() == 1) { - MergeIntoParent(*this, *parent); + if (allow_move_into_parent && GetEntryCount() == 1) { + // We take the remaining entry from the current node and inserts it into the + // parent_entry where it replaces (and implicitly deletes) the current node. + parent_entry->ReplaceNodeWithDataFromEntry(std::move(entries_.begin()->second)); // WARNING: (this) is deleted here, do not refer to it beyond this point. } } @@ -228,23 +190,23 @@ class Node { return entries_; } - void GetStats(PhTreeStats& stats, bit_width_t current_depth = 0) const { + void GetStats( + PhTreeStats& stats, const EntryT& current_entry, bit_width_t current_depth = 0) const { size_t num_children = entries_.size(); ++stats.n_nodes_; - ++stats.infix_hist_[GetInfixLen()]; ++stats.node_depth_hist_[current_depth]; ++stats.node_size_log_hist_[32 - CountLeadingZeros(std::uint32_t(num_children))]; stats.n_total_children_ += num_children; - - current_depth += GetInfixLen(); stats.q_total_depth_ += current_depth; for (auto& entry : entries_) { auto& child = entry.second; if (child.IsNode()) { + auto child_infix_len = child.GetNodeInfixLen(current_entry.GetNodePostfixLen()); + ++stats.infix_hist_[child_infix_len]; auto& sub = child.GetNode(); - sub.GetStats(stats, current_depth + 1); + sub.GetStats(stats, child, current_depth + 1 + child_infix_len); } else { ++stats.q_n_post_fix_n_[current_depth]; ++stats.size_; @@ -252,11 +214,9 @@ class Node { } } - size_t CheckConsistency(bit_width_t current_depth = 0) const { + size_t CheckConsistency(const EntryT& current_entry, bit_width_t current_depth = 0) const { // Except for a root node if the tree has <2 entries. assert(entries_.size() >= 2 || current_depth == 0); - - current_depth += GetInfixLen(); size_t num_entries_local = 0; size_t num_entries_children = 0; for (auto& entry : entries_) { @@ -264,8 +224,12 @@ class Node { if (child.IsNode()) { auto& sub = child.GetNode(); // Check node consistency - assert(sub.GetInfixLen() + 1 + sub.GetPostfixLen() == GetPostfixLen()); - num_entries_children += sub.CheckConsistency(current_depth + 1); + auto sub_infix_len = child.GetNodeInfixLen(current_entry.GetNodePostfixLen()); + assert( + sub_infix_len + 1 + child.GetNodePostfixLen() == + current_entry.GetNodePostfixLen()); + num_entries_children += + sub.CheckConsistency(child, current_depth + 1 + sub_infix_len); } else { ++num_entries_local; } @@ -273,12 +237,6 @@ class Node { return num_entries_local + num_entries_children; } - void SetInfixLen(bit_width_t newInfLen) { - assert(newInfLen < MAX_BIT_WIDTH); - assert(newInfLen >= 0); - infix_len_ = newInfLen; - } - private: template auto& WriteValue(hc_pos_t hc_pos, const KeyT& new_key, Args&&... args) { @@ -287,10 +245,8 @@ class Node { void WriteEntry(hc_pos_t hc_pos, EntryT& entry) { if (entry.IsNode()) { - auto& node = entry.GetNode(); - bit_width_t new_subnode_infix_len = postfix_len_ - node.postfix_len_ - 1; - node.SetInfixLen(new_subnode_infix_len); - entries_.try_emplace(hc_pos, entry.GetKey(), entry.ExtractNode()); + auto postfix_len = entry.GetNodePostfixLen(); + entries_.try_emplace(hc_pos, entry.GetKey(), entry.ExtractNode(), postfix_len); } else { entries_.try_emplace(hc_pos, entry.GetKey(), entry.ExtractValue()); } @@ -311,18 +267,21 @@ class Node { * an entry with the exact same key as new_key, so insertion has failed. */ template - auto* HandleCollision( - EntryT& existing_entry, bool& is_inserted, const KeyT& new_key, Args&&... args) { + auto& HandleCollision( + EntryT& existing_entry, + bool& is_inserted, + const KeyT& new_key, + bit_width_t current_postfix_len, + Args&&... args) { assert(!is_inserted); // We have two entries in the same location (local pos). // Now we need to compare the keys. // If they are identical, we simply return the entry for further traversal. if (existing_entry.IsNode()) { - auto& sub_node = existing_entry.GetNode(); - if (sub_node.GetInfixLen() > 0) { + if (existing_entry.HasNodeInfix(current_postfix_len)) { bit_width_t max_conflicting_bits = NumberOfDivergingBits(new_key, existing_entry.GetKey()); - if (max_conflicting_bits > sub_node.GetPostfixLen() + 1) { + if (max_conflicting_bits > existing_entry.GetNodePostfixLen() + 1) { is_inserted = true; return InsertSplit( existing_entry, new_key, max_conflicting_bits, std::forward(args)...); @@ -339,31 +298,27 @@ class Node { } // perfect match -> return existing } - return &existing_entry; + return existing_entry; } template - auto* InsertSplit( + auto& InsertSplit( EntryT& current_entry, const KeyT& new_key, bit_width_t max_conflicting_bits, Args&&... args) { - const auto current_key = current_entry.GetKey(); - - // determine length of infix - bit_width_t new_local_infix_len = GetPostfixLen() - max_conflicting_bits; bit_width_t new_postfix_len = max_conflicting_bits - 1; - auto new_sub_node = std::make_unique(new_local_infix_len, new_postfix_len); + auto new_sub_node = std::make_unique(); hc_pos_t pos_sub_1 = CalcPosInArray(new_key, new_postfix_len); - hc_pos_t pos_sub_2 = CalcPosInArray(current_key, new_postfix_len); + hc_pos_t pos_sub_2 = CalcPosInArray(current_entry.GetKey(), new_postfix_len); // Move key/value into subnode new_sub_node->WriteEntry(pos_sub_2, current_entry); auto& new_entry = new_sub_node->WriteValue(pos_sub_1, new_key, std::forward(args)...); // Insert new node into local node - current_entry.SetNode(std::move(new_sub_node)); - return &new_entry; + current_entry.SetNode(std::move(new_sub_node), new_postfix_len); + return new_entry; } /* @@ -374,11 +329,11 @@ class Node { * @return 'true' iff the relevant part of the key matches (prefix for nodes, whole key for * other entries). */ - bool DoesEntryMatch(const EntryT& entry, const KeyT& key) const { + bool DoesEntryMatch( + const EntryT& entry, const KeyT& key, const bit_width_t parent_postfix_len) const { if (entry.IsNode()) { - const auto& sub = entry.GetNode(); - if (sub.GetInfixLen() > 0) { - const bit_mask_t mask = MAX_MASK << (sub.GetPostfixLen() + 1); + if (entry.HasNodeInfix(parent_postfix_len)) { + const bit_mask_t mask = MAX_MASK << (entry.GetNodePostfixLen() + 1); return KeyEquals(entry.GetKey(), key, mask); } return true; @@ -386,16 +341,6 @@ class Node { return entry.GetKey() == key; } - // The length (number of bits) of post fixes (the part of the coordinate that is 'below' the - // current node). If a variable prefix_len would refer to the number of bits in this node's - // prefix, and if we assume 64 bit values, the following would always hold: - // prefix_len + 1 + postfix_len = 64. - // The '+1' accounts for the 1 bit that is represented by the local node's hypercube, - // ie. the same bit that is used to create the lookup keys in entries_. - bit_width_t postfix_len_; - // The number of bits between this node and the parent node. For 64bit keys possible values - // range from 0 to 62. - bit_width_t infix_len_; EntryMap entries_; }; diff --git a/phtree/v16/phtree_v16.h b/phtree/v16/phtree_v16.h index 103b7870..1f49ef69 100644 --- a/phtree/v16/phtree_v16.h +++ b/phtree/v16/phtree_v16.h @@ -23,7 +23,7 @@ #include "iterator_full.h" #include "iterator_hc.h" #include "iterator_knn_hs.h" -#include "iterator_simple.h" +#include "iterator_with_parent.h" #include "node.h" namespace improbable::phtree::v16 { @@ -57,8 +57,8 @@ class PhTreeV16 { using ScalarExternal = typename CONVERT::ScalarExternal; using ScalarInternal = typename CONVERT::ScalarInternal; using KeyT = typename CONVERT::KeyInternal; - using NodeT = Node; using EntryT = Entry; + using NodeT = Node; public: static_assert(!std::is_reference::value, "Reference type value are not supported."); @@ -69,12 +69,17 @@ class PhTreeV16 { std::is_arithmetic::value, "ScalarExternal must be an arithmetic type"); static_assert(DIM >= 1 && DIM <= 63, "This PH-Tree supports between 1 and 63 dimensions"); - PhTreeV16(CONVERT& converter = ConverterNoOp()) + explicit PhTreeV16(CONVERT* converter) : num_entries_{0} - , root_{0, MAX_BIT_WIDTH - 1} - , the_end_{converter} + , root_{{}, std::make_unique(), MAX_BIT_WIDTH - 1} , converter_{converter} {} + PhTreeV16(const PhTreeV16& other) = delete; + PhTreeV16& operator=(const PhTreeV16& other) = delete; + PhTreeV16(PhTreeV16&& other) noexcept = default; + PhTreeV16& operator=(PhTreeV16&& other) noexcept = default; + ~PhTreeV16() noexcept = default; + /* * Attempts to build and insert a key and a value into the tree. * @@ -90,19 +95,19 @@ class PhTreeV16 { * entry instead of inserting a new one. */ template - std::pair emplace(const KeyT& key, Args&&... args) { + std::pair try_emplace(const KeyT& key, Args&&... args) { auto* current_entry = &root_; bool is_inserted = false; while (current_entry->IsNode()) { - current_entry = - current_entry->GetNode().Emplace(is_inserted, key, std::forward(args)...); + current_entry = ¤t_entry->GetNode().Emplace( + is_inserted, key, current_entry->GetNodePostfixLen(), std::forward(args)...); } num_entries_ += is_inserted; return {current_entry->GetValue(), is_inserted}; } /* - * The emplace_hint() method uses an iterator as hint for insertion. + * The try_emplace(hint, key, value) method uses an iterator as hint for insertion. * The hint is ignored if it is not useful or is equal to end(). * * Iterators should normally not be used after the tree has been modified. As an exception to @@ -114,41 +119,45 @@ class PhTreeV16 { * auto iter = tree.find(key1); * auto value = iter.second(); // The value may become invalid in erase() * erase(iter); - * emplace_hint(iter, key2, value); // the iterator can still be used as hint here + * try_emplace(iter, key2, value); // the iterator can still be used as hint here */ template - std::pair emplace_hint(const ITERATOR& iterator, const KeyT& key, Args&&... args) { - // This function can be used to insert a value close to a known value - // or close to a recently removed value. The hint can only be used if the new key is - // inside one of the nodes provided by the hint iterator. - // The idea behind using the 'parent' is twofold: - // - The 'parent' node is one level above the iterator position, it therefore is spatially - // larger and has a better probability of containing the new position, allowing for - // fast track emplace. - // - Using 'parent' allows a scenario where the iterator was previously used with - // erase(iterator). This is safe because erase() will never erase the 'parent' node. - - if (!iterator.GetParentNodeEntry()) { - // No hint available, use standard emplace() - return emplace(key, std::forward(args)...); - } - - auto* parent_entry = iterator.GetParentNodeEntry(); - if (NumberOfDivergingBits(key, parent_entry->GetKey()) > - parent_entry->GetNode().GetPostfixLen() + 1) { - // replace higher up in the tree - return emplace(key, std::forward(args)...); - } - - // replace in node - auto* current_entry = parent_entry; - bool is_inserted = false; - while (current_entry->IsNode()) { - current_entry = - current_entry->GetNode().Emplace(is_inserted, key, std::forward(args)...); + std::pair try_emplace(const ITERATOR& iterator, const KeyT& key, Args&&... args) { + if constexpr (!std::is_same_v>) { + return try_emplace(key, std::forward(args)...); + } else { + // This function can be used to insert a value close to a known value + // or close to a recently removed value. The hint can only be used if the new key is + // inside one of the nodes provided by the hint iterator. + // The idea behind using the 'parent' is twofold: + // - The 'parent' node is one level above the iterator position, it is spatially + // larger and has a better probability of containing the new position, allowing for + // fast track try_emplace. + // - Using 'parent' allows a scenario where the iterator was previously used with + // erase(iterator). This is safe because erase() will never erase the 'parent' node. + + if (!iterator.GetParentNodeEntry()) { + // No hint available, use standard try_emplace() + return try_emplace(key, std::forward(args)...); + } + + auto* parent_entry = iterator.GetParentNodeEntry(); + if (NumberOfDivergingBits(key, parent_entry->GetKey()) > + parent_entry->GetNodePostfixLen() + 1) { + // replace higher up in the tree + return try_emplace(key, std::forward(args)...); + } + + // replace in node + auto* entry = parent_entry; + bool is_inserted = false; + while (entry->IsNode()) { + entry = &entry->GetNode().Emplace( + is_inserted, key, entry->GetNodePostfixLen(), std::forward(args)...); + } + num_entries_ += is_inserted; + return {entry->GetValue(), is_inserted}; } - num_entries_ += is_inserted; - return {current_entry->GetValue(), is_inserted}; } /* @@ -158,7 +167,7 @@ class PhTreeV16 { * insertion) and a bool denoting whether the insertion took place. */ std::pair insert(const KeyT& key, const T& value) { - return emplace(key, value); + return try_emplace(key, value); } /* @@ -166,7 +175,7 @@ class PhTreeV16 { * and returned. */ T& operator[](const KeyT& key) { - return emplace(key).first; + return try_emplace(key).first; } /* @@ -180,7 +189,7 @@ class PhTreeV16 { } auto* current_entry = &root_; while (current_entry && current_entry->IsNode()) { - current_entry = current_entry->GetNode().Find(key); + current_entry = current_entry->GetNode().Find(key, current_entry->GetNodePostfixLen()); } return current_entry ? 1 : 0; } @@ -194,20 +203,16 @@ class PhTreeV16 { * was found */ auto find(const KeyT& key) const { - if (empty()) { - return IteratorSimple(converter_); - } - const EntryT* current_entry = &root_; const EntryT* current_node = nullptr; const EntryT* parent_node = nullptr; while (current_entry && current_entry->IsNode()) { parent_node = current_node; current_node = current_entry; - current_entry = current_entry->GetNode().Find(key); + current_entry = current_entry->GetNode().Find(key, current_entry->GetNodePostfixLen()); } - return IteratorSimple(current_entry, current_node, parent_node, converter_); + return IteratorWithParent(current_entry, current_node, parent_node, converter_); } /* @@ -216,13 +221,12 @@ class PhTreeV16 { * @return '1' if a value was found, otherwise '0'. */ size_t erase(const KeyT& key) { - auto* current_node = &root_.GetNode(); - NodeT* parent_node = nullptr; + auto* entry = &root_; + // We do not want the root entry to be modified. The reason is simply that a lot of the + // code in this class becomes simpler if we can assume the root entry to contain a node. bool found = false; - while (current_node) { - auto* child_node = current_node->Erase(key, parent_node, found); - parent_node = current_node; - current_node = child_node; + while (entry) { + entry = entry->GetNode().Erase(key, entry, entry != &root_, found); } num_entries_ -= found; return found; @@ -231,8 +235,6 @@ class PhTreeV16 { /* * See std::map::erase(). Removes any value at the given iterator location. * - * - * * WARNING * While this is guaranteed to work correctly, only iterators returned from find() * will result in erase(iterator) being faster than erase(key). @@ -242,30 +244,192 @@ class PhTreeV16 { */ template size_t erase(const ITERATOR& iterator) { - if (iterator.Finished()) { + if (iterator.IsEnd()) { + return 0; + } + if constexpr (std::is_same_v>) { + const auto& iter_rich = static_cast&>(iterator); + if (!iter_rich.GetNodeEntry() || iter_rich.GetNodeEntry() == &root_) { + // Do _not_ use the root entry, see erase(key). Start searching from the top. + return erase(iter_rich.GetEntry()->GetKey()); + } + bool found = false; + EntryT* entry = iter_rich.GetNodeEntry(); + // The loop is a safeguard for find_two_mm which may return slightly wrong iterators. + while (entry != nullptr) { + entry = entry->GetNode().Erase(iter_rich.GetEntry()->GetKey(), entry, true, found); + } + num_entries_ -= found; + return found; + } + // There may be no entry because not every iterator sets it. + return erase(iterator.GetEntry()->GetKey()); + } + + /* + * Relocate (move) an entry from one position to another, subject to a predicate. + * + * @param old_key + * @param new_key + * @param predicate + * + * @return A pair, whose first element points to the possibly relocated value, and + * whose second element is a bool that is true if the value was actually relocated. + */ + template + size_t relocate_if(const KeyT& old_key, const KeyT& new_key, PRED&& pred) { + auto pair = _find_two(old_key, new_key); + auto& iter_old = pair.first; + auto& iter_new = pair.second; + + if (iter_old.IsEnd() || !pred(*iter_old)) { return 0; } - if (!iterator.GetParentNodeEntry()) { - // Why may there be no parent? - // - we are in the root node - // - the iterator did not set this value - // In either case, we need to start searching from the top. - return erase(iterator.GetCurrentResult()->GetKey()); + // Are we inserting in same node and same quadrant? Or are the keys equal? + if (iter_old == iter_new) { + iter_old.GetEntry()->SetKey(new_key); + return 1; + } + + bool is_inserted = false; + auto* new_parent = iter_new.GetNodeEntry(); + new_parent->GetNode().Emplace( + is_inserted, new_key, new_parent->GetNodePostfixLen(), std::move(*iter_old)); + if (!is_inserted) { + return 0; + } + + // Erase old value. See comments in try_emplace(iterator) for details. + EntryT* old_node_entry = iter_old.GetNodeEntry(); + if (iter_old.GetParentNodeEntry() == iter_new.GetNodeEntry()) { + // In this case the old_node_entry may have been invalidated by the previous insertion. + old_node_entry = iter_old.GetParentNodeEntry(); } bool found = false; - assert(iterator.GetCurrentNodeEntry() && iterator.GetCurrentNodeEntry()->IsNode()); - iterator.GetCurrentNodeEntry()->GetNode().Erase( - iterator.GetCurrentResult()->GetKey(), - &iterator.GetParentNodeEntry()->GetNode(), - found); + while (old_node_entry) { + old_node_entry = old_node_entry->GetNode().Erase( + old_key, old_node_entry, old_node_entry != &root_, found); + } + assert(found); + return 1; + } - num_entries_ -= found; - return found; + /* + * Tries to locate two entries that are 'close' to each other. + * + * Special behavior: + * - returns end() if old_key does not exist; + */ + auto _find_two(const KeyT& old_key, const KeyT& new_key) { + using Iter = IteratorWithParent; + bit_width_t n_diverging_bits = NumberOfDivergingBits(old_key, new_key); + + const EntryT* current_entry = &root_; // An entry. + const EntryT* old_node_entry = nullptr; // Node that contains entry to be removed + const EntryT* old_node_entry_parent = nullptr; // Parent of the old_node_entry + const EntryT* new_node_entry = nullptr; // Node that will contain new entry + // Find node for removal + while (current_entry && current_entry->IsNode()) { + old_node_entry_parent = old_node_entry; + old_node_entry = current_entry; + auto postfix_len = old_node_entry->GetNodePostfixLen(); + if (postfix_len + 1 >= n_diverging_bits) { + new_node_entry = old_node_entry; + } + current_entry = current_entry->GetNode().Find(old_key, postfix_len); + } + const EntryT* old_entry = current_entry; // Entry to be removed + + // Can we stop already? + if (old_entry == nullptr) { + auto iter = Iter(nullptr, nullptr, nullptr, converter_); + return std::make_pair(iter, iter); // old_key not found! + } + + // Are we inserting in same node and same quadrant? Or are the keys equal? + assert(old_node_entry != nullptr); + if (n_diverging_bits == 0 || old_node_entry->GetNodePostfixLen() >= n_diverging_bits) { + auto iter = Iter(old_entry, old_node_entry, old_node_entry_parent, converter_); + return std::make_pair(iter, iter); + } + + // Find node for insertion + auto new_entry = new_node_entry; + while (new_entry && new_entry->IsNode()) { + new_node_entry = new_entry; + new_entry = new_entry->GetNode().Find(new_key, new_entry->GetNodePostfixLen()); + } + + auto iter1 = Iter(old_entry, old_node_entry, old_node_entry_parent, converter_); + auto iter2 = Iter(new_entry, new_node_entry, nullptr, converter_); + return std::make_pair(iter1, iter2); + } + + /* + * Tries to locate two entries that are 'close' to each other. + * + * Special behavior: + * - returns end() if old_key does not exist; + * - CREATES the destination entry if it does not exist! + */ + auto _find_or_create_two_mm(const KeyT& old_key, const KeyT& new_key, bool count_equals) { + using Iter = IteratorWithParent; + bit_width_t n_diverging_bits = NumberOfDivergingBits(old_key, new_key); + + if (!count_equals && n_diverging_bits == 0) { + auto iter = Iter(nullptr, nullptr, nullptr, converter_); + return std::make_pair(iter, iter); + } + + const EntryT* new_entry = &root_; // An entry. + const EntryT* old_node_entry = nullptr; // Node that contains entry to be removed + const EntryT* new_node_entry = nullptr; // Node that will contain new entry + // Find the deepest common parent node for removal and insertion + bool is_inserted = false; + while (new_entry && new_entry->IsNode() && + new_entry->GetNodePostfixLen() + 1 >= n_diverging_bits) { + new_node_entry = new_entry; + auto postfix_len = new_entry->GetNodePostfixLen(); + new_entry = &new_entry->GetNode().Emplace(is_inserted, new_key, postfix_len); + } + old_node_entry = new_node_entry; + + // Find node for insertion + while (new_entry->IsNode()) { + new_node_entry = new_entry; + new_entry = + &new_entry->GetNode().Emplace(is_inserted, new_key, new_entry->GetNodePostfixLen()); + } + num_entries_ += is_inserted; + assert(new_entry != nullptr); + + auto* old_entry = old_node_entry; + while (old_entry && old_entry->IsNode()) { + old_node_entry = old_entry; + old_entry = old_entry->GetNode().Find(old_key, old_entry->GetNodePostfixLen()); + } + + // Does old_entry exist? + if (old_entry == nullptr) { + auto iter = Iter(nullptr, nullptr, nullptr, converter_); + return std::make_pair(iter, iter); // old_key not found! + } + + // Are we inserting in same node and same quadrant? Or are the keys equal? + if (n_diverging_bits == 0) { + auto iter = Iter(old_entry, old_node_entry, nullptr, converter_); + return std::make_pair(iter, iter); + } + + auto iter1 = Iter(old_entry, old_node_entry, nullptr, converter_); + // TODO Note: Emplace() may return a sub-child so new_node_entry be a grandparent! + auto iter2 = Iter(new_entry, new_node_entry, nullptr, converter_); + return std::make_pair(iter1, iter2); } /* * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes - * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * (=sub-trees) before returning / traversing them. By default, all entries are returned. Filter * functions must implement the same signature as the default 'FilterNoOp'. * * @param callback The callback function to be called for every entry that matches the query. @@ -274,9 +438,18 @@ class PhTreeV16 { * sub-nodes before they are returned or traversed. Any filter function must follow the * signature of the default 'FilterNoOp`. */ - template - void for_each(CALLBACK_FN& callback, FILTER filter = FILTER()) const { - ForEach(converter_, callback, filter).run(root_); + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) { + ForEach( + converter_, std::forward(callback), std::forward(filter)) + .Traverse(root_); + } + + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) const { + ForEach( + converter_, std::forward(callback), std::forward(filter)) + .Traverse(root_); } /* @@ -289,14 +462,19 @@ class PhTreeV16 { * sub-nodes before they are returned or traversed. Any filter function must follow the * signature of the default 'FilterNoOp`. */ - template + template void for_each( - const PhBox& query_box, - CALLBACK_FN& callback, - FILTER filter = FILTER()) const { - ForEachHC( - query_box.min(), query_box.max(), converter_, callback, filter) - .run(root_); + // TODO check copy elision + const PhBox query_box, + CALLBACK&& callback, + FILTER&& filter = FILTER()) const { + ForEachHC( + query_box.min(), + query_box.max(), + converter_, + std::forward(callback), + std::forward(filter)) + .Traverse(root_); } /* @@ -307,8 +485,8 @@ class PhTreeV16 { * @return an iterator over all (filtered) entries in the tree, */ template - auto begin(FILTER filter = FILTER()) const { - return IteratorFull(root_, converter_, filter); + auto begin(FILTER&& filter = FILTER()) const { + return IteratorFull(root_, converter_, std::forward(filter)); } /* @@ -321,9 +499,10 @@ class PhTreeV16 { * @return Result iterator. */ template - auto begin_query(const PhBox& query_box, FILTER filter = FILTER()) const { + auto begin_query( + const PhBox& query_box, FILTER&& filter = FILTER()) const { return IteratorHC( - root_, query_box.min(), query_box.max(), converter_, filter); + root_, query_box.min(), query_box.max(), converter_, std::forward(filter)); } /* @@ -344,17 +523,22 @@ class PhTreeV16 { auto begin_knn_query( size_t min_results, const KeyT& center, - DISTANCE distance_function = DISTANCE(), - FILTER filter = FILTER()) const { + DISTANCE&& distance_function = DISTANCE(), + FILTER&& filter = FILTER()) const { return IteratorKnnHS( - root_, min_results, center, converter_, distance_function, filter); + root_, + min_results, + center, + converter_, + std::forward(distance_function), + std::forward(filter)); } /* * @return An iterator representing the tree's 'end'. */ - const auto& end() const { - return the_end_; + auto end() const { + return IteratorEnd(); } /* @@ -362,7 +546,7 @@ class PhTreeV16 { */ void clear() { num_entries_ = 0; - root_ = EntryT(0, MAX_BIT_WIDTH - 1); + root_ = EntryT({}, std::make_unique(), MAX_BIT_WIDTH - 1); } /* @@ -384,16 +568,15 @@ class PhTreeV16 { * This function is only for debugging. */ auto GetDebugHelper() const { - return DebugHelperV16(root_.GetNode(), num_entries_); + return DebugHelperV16(root_, num_entries_); } private: size_t num_entries_; - // Contract: root_ contains a Node with 0 or more entries (the root node is the only Node + // Contract: root_ contains a Node with 0 or more entries. The root node is the only Node // that is allowed to have less than two entries. EntryT root_; - IteratorEnd the_end_; - CONVERT converter_; + CONVERT* converter_; }; } // namespace improbable::phtree::v16 diff --git a/test/BUILD b/test/BUILD new file mode 100644 index 00000000..3191aefe --- /dev/null +++ b/test/BUILD @@ -0,0 +1,262 @@ +package(default_visibility = ["//visibility:private"]) + +cc_test( + name = "phtree_test", + timeout = "long", + srcs = [ + "phtree_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_test_const_values", + timeout = "long", + srcs = [ + "phtree_test_const_values.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_test_ptr_values", + timeout = "long", + srcs = [ + "phtree_test_ptr_values.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_test_unique_ptr_values", + timeout = "long", + srcs = [ + "phtree_test_unique_ptr_values.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_multimap_d_test_unique_ptr_values", + timeout = "long", + srcs = [ + "phtree_multimap_d_test_unique_ptr_values.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_d_test", + timeout = "long", + srcs = [ + "phtree_d_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_d_test_filter", + timeout = "long", + srcs = [ + "phtree_d_test_filter.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_box_d_test_filter", + timeout = "long", + srcs = [ + "phtree_box_d_test_filter.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_multimap_d_test_filter", + timeout = "long", + srcs = [ + "phtree_multimap_d_test_filter.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_d_test_copy_move", + timeout = "long", + srcs = [ + "phtree_d_test_copy_move.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_multimap_d_test_copy_move", + timeout = "long", + srcs = [ + "phtree_multimap_d_test_copy_move.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_d_test_custom_key", + timeout = "long", + srcs = [ + "phtree_d_test_custom_key.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_d_test_preprocessor", + timeout = "long", + srcs = [ + "phtree_d_test_preprocessor.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_multimap_d_test", + timeout = "long", + srcs = [ + "phtree_multimap_d_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_box_d_test_query_types", + timeout = "long", + srcs = [ + "phtree_box_d_test_query_types.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_box_d_test", + timeout = "long", + srcs = [ + "phtree_box_d_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_multimap_box_d_test", + timeout = "long", + srcs = [ + "phtree_multimap_box_d_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_f_test", + timeout = "long", + srcs = [ + "phtree_f_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_box_f_test", + timeout = "long", + srcs = [ + "phtree_box_f_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "phtree_test_issues", + timeout = "long", + srcs = [ + "phtree_test_issues.cc", + ], + linkstatic = True, + deps = [ + "//phtree:phtree", + "//test/testing/gtest_main", + ], +) + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 00000000..3484ccb3 --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,61 @@ +cmake_minimum_required(VERSION 3.14) +project(phtree-tests LANGUAGES CXX) + +# Avoids LNK2038 Error with MSVC +set(gtest_force_shared_crt on) + +include(FetchContent) +include(common/scripts.cmake) + +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.12.1 +) +FetchContent_MakeAvailable(googletest) +add_library(GTest::GTest INTERFACE IMPORTED) +target_link_libraries(GTest::GTest INTERFACE gtest_main) + +# The next line is optional, but keeps your CACHE cleaner: +mark_as_advanced( + BUILD_GMOCK BUILD_GTEST BUILD_SHARED_LIBS + gmock_build_tests gtest_build_samples gtest_build_tests + gtest_disable_pthreads gtest_force_shared_crt gtest_hide_internal_symbols +) + +# If you are interested in keeping IDEs that support folders clean, I would also add these lines: +set_target_properties(gtest PROPERTIES FOLDER extern) +set_target_properties(gtest_main PROPERTIES FOLDER extern) + + +# package_add_test(phtree_all_test phtree_test.cc phtree_d_test.cc phtree_f_test.cc) +package_add_test(phtree_test phtree_test.cc) +package_add_test(phtree_test_const_values phtree_test_const_values.cc) +package_add_test(phtree_test_issues phtree_test_issues.cc) +target_compile_definitions(phtree_test_issues PUBLIC SKIP_TEST_MEMORY_LEAKS=ON) +package_add_test(phtree_test_ptr_values phtree_test_ptr_values.cc) +package_add_test(phtree_test_unique_ptr_values phtree_test_unique_ptr_values.cc) + +package_add_test(phtree_f_test phtree_f_test.cc) + +package_add_test(phtree_d_test phtree_d_test.cc) +package_add_test(phtree_d_test_copy_move phtree_d_test_copy_move.cc) +package_add_test(phtree_d_test_custom_key phtree_d_test_custom_key.cc) +package_add_test(phtree_d_test_filter phtree_d_test_filter.cc) +package_add_test(phtree_d_test_preprocessor phtree_d_test_preprocessor.cc) + +package_add_test(phtree_box_f_test phtree_box_f_test.cc) + +package_add_test(phtree_box_d_test phtree_box_d_test.cc) +package_add_test(phtree_box_d_test_filter phtree_box_d_test_filter.cc) +package_add_test(phtree_box_d_test_query_types phtree_box_d_test_query_types.cc) + +package_add_test(phtree_multimap_d_test phtree_multimap_d_test.cc) +package_add_test(phtree_multimap_d_test_copy_move phtree_multimap_d_test_copy_move.cc) +package_add_test(phtree_multimap_d_test_filter phtree_multimap_d_test_filter.cc) +package_add_test(phtree_multimap_d_test_unique_ptr_values phtree_multimap_d_test_unique_ptr_values.cc) + +package_add_test(phtree_multimap_box_d_test phtree_multimap_box_d_test.cc) + +add_subdirectory(common) + diff --git a/test/common/BUILD b/test/common/BUILD new file mode 100644 index 00000000..01452079 --- /dev/null +++ b/test/common/BUILD @@ -0,0 +1,131 @@ +package(default_visibility = ["//visibility:private"]) + +cc_test( + name = "base_types_test", + timeout = "long", + srcs = [ + "base_types_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree/common:common", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "bits_test", + timeout = "long", + srcs = [ + "bits_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree/common:common", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "common_test", + timeout = "long", + srcs = [ + "common_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree/common:common", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "distance_test", + timeout = "long", + srcs = [ + "distance_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree/common:common", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "filter_test", + timeout = "long", + srcs = [ + "filter_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree/common:common", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "flat_array_map_test", + timeout = "long", + srcs = [ + "flat_array_map_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree/common:common", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "b_plus_tree_hash_map_test", + timeout = "long", + srcs = [ + "b_plus_tree_hash_map_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree/common:common", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "b_plus_tree_map_test", + timeout = "long", + srcs = [ + "b_plus_tree_map_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree/common:common", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "flat_sparse_map_test", + timeout = "long", + srcs = [ + "flat_sparse_map_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree/common:common", + "//test/testing/gtest_main", + ], +) + +cc_test( + name = "preprocessor_test", + timeout = "long", + srcs = [ + "converter_test.cc", + ], + linkstatic = True, + deps = [ + "//phtree/common:common", + "//test/testing/gtest_main", + ], +) diff --git a/test/common/CMakeLists.txt b/test/common/CMakeLists.txt new file mode 100644 index 00000000..63bc8c9a --- /dev/null +++ b/test/common/CMakeLists.txt @@ -0,0 +1,14 @@ +include(scripts.cmake) + +package_add_test(b_plus_tree_hash_map_test b_plus_tree_hash_map_test.cc) +package_add_test(b_plus_tree_map_test b_plus_tree_map_test.cc) +package_add_test(base_types_test base_types_test.cc) +package_add_test(bits_test bits_test.cc) +package_add_test(common_test common_test.cc) + +package_add_test(converter_test converter_test.cc) + +package_add_test(distance_test distance_test.cc) +package_add_test(filter_test filter_test.cc) +package_add_test(flat_array_map_test flat_array_map_test.cc) +package_add_test(flat_sparse_map_test flat_sparse_map_test.cc) diff --git a/test/common/b_plus_tree_hash_map_test.cc b/test/common/b_plus_tree_hash_map_test.cc new file mode 100644 index 00000000..5d74ae7a --- /dev/null +++ b/test/common/b_plus_tree_hash_map_test.cc @@ -0,0 +1,386 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/common/b_plus_tree_hash_map.h" +#include +#include +#include + +using namespace improbable::phtree; + +static int default_construct_count_ = 0; +static int construct_count_ = 0; +static int copy_construct_count_ = 0; +static int move_construct_count_ = 0; +static int copy_assign_count_ = 0; +static int move_assign_count_ = 0; +static int destruct_count_ = 0; + +[[maybe_unused]] static void reset_id_counters() { + default_construct_count_ = 0; + construct_count_ = 0; + copy_construct_count_ = 0; + move_construct_count_ = 0; + copy_assign_count_ = 0; + move_assign_count_ = 0; + destruct_count_ = 0; +} + +[[maybe_unused]] static void print_id_counters() { + std::cout << "dc=" << default_construct_count_ << " c=" << construct_count_ + << " cc=" << copy_construct_count_ << " mc=" << move_construct_count_ + << " ca=" << copy_assign_count_ << " ma=" << move_assign_count_ + << " d=" << destruct_count_ << std::endl; +} + +struct Id { + Id() : _i{0} { + ++default_construct_count_; + } + + explicit Id(const size_t i) : _i{static_cast(i)} { + ++construct_count_; + } + + explicit Id(const int i) : _i{i} { + ++construct_count_; + } + + Id(const Id& other) { + ++copy_construct_count_; + _i = other._i; + } + + Id(Id&& other) noexcept { + ++move_construct_count_; + _i = other._i; + } + + Id& operator=(const Id& other) noexcept { + ++copy_assign_count_; + _i = other._i; + return *this; + } + Id& operator=(Id&& other) noexcept { + ++move_assign_count_; + _i = other._i; + return *this; + } + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + ~Id() { + ++destruct_count_; + } + + int _i; +}; + +namespace std { +template <> +struct hash { + size_t operator()(const Id& x) const { + return std::hash{}(x._i % 10); + } +}; +}; // namespace std + +template +void CheckMapResult(const R& result, END end, const K& key, const V& val) { + ASSERT_NE(result, end); + ASSERT_EQ(result->first, key); + ASSERT_EQ(result->second, val); +} + +template +void CheckMapResultPair(const R& result, bool expected_success, const K& key, const V& val) { + assert(result.second == expected_success); + ASSERT_EQ(result.second, expected_success); + ASSERT_EQ(result.first->first, key); + ASSERT_EQ(result.first->second, val); +} + +template +void CheckSetResult(const R& result, END end, const K& key) { + ASSERT_NE(result, end); + ASSERT_EQ(*result, key); +} + +template +void CheckSetResultPair(const R& result, bool expected_success, const K& key) { + assert(result.second == expected_success); + ASSERT_EQ(result.second, expected_success); + ASSERT_EQ(*result.first, key); +} + +template +void SmokeTestMap() { + const int N = 300; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + size_t val = 0; + for (int i = 0; i < 10; i++) { + b_plus_tree_hash_map> test_map; + std::unordered_map reference_map; + for (int j = 0; j < N; j++) { + size_t key = cube_distribution(random_engine); + Id id(key); + bool hasVal = test_map.find(id) != test_map.end(); + bool hasValRef = reference_map.find(id) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + + if (!hasVal) { + if (key % 6 == 0) { + CheckMapResultPair(test_map.emplace(id, val), true, id, val); + CheckMapResultPair(test_map.emplace(id, val), false, id, val); + } else if (key % 6 == 1) { + CheckMapResultPair(test_map.try_emplace(id, val), true, id, val); + CheckMapResultPair(test_map.try_emplace(id, val), false, id, val); + } else if (key % 6 == 2) { + // Leaf-hint of questionable quality + auto hint = test_map.find(Id(key - 1)); + CheckMapResult(test_map.try_emplace(hint, id, val), test_map.end(), id, val); + CheckMapResult(test_map.try_emplace(hint, id, val), test_map.end(), id, val); + } else if (key % 6 == 3) { + auto hint = j % 2 == 0 ? test_map.begin() : test_map.end(); + // Bad hint + CheckMapResult(test_map.try_emplace(hint, id, val), test_map.end(), id, val); + CheckMapResult(test_map.try_emplace(hint, id, val), test_map.end(), id, val); + } else if (key % 6 == 4) { + // Leaf-hint of questionable quality + auto hint = test_map.find(Id(key - 1)); + CheckMapResult(test_map.emplace_hint(hint, id, val), test_map.end(), id, val); + CheckMapResult(test_map.emplace_hint(hint, id, val), test_map.end(), id, val); + } else { + auto hint = j % 2 == 0 ? test_map.begin() : test_map.end(); + // Bad hint + CheckMapResult(test_map.emplace_hint(hint, id, val), test_map.end(), id, val); + CheckMapResult(test_map.emplace_hint(hint, id, val), test_map.end(), id, val); + } + test_map._check(); + reference_map.emplace(id, val); + } + + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto& entry : reference_map) { + const Id& kRef = entry.first; + size_t vMap = test_map.find(kRef)->second; + ASSERT_EQ(vMap, entry.second); + ASSERT_TRUE(test_map.count(kRef)); + } + for (auto& entry : test_map) { + Id& k = entry.first; + size_t vRef = reference_map.find(k)->second; + size_t vMap = test_map.find(k)->second; + ASSERT_EQ(vMap, vRef); + } + ++val; + } + } +} + +TEST(PhTreeBptHashMapTest, SmokeTestNonUnique) { + SmokeTestMap>(); +} + +TEST(PhTreeBptHashMapTest, SmokeTestSameHash) { + struct DumbHash { + size_t operator()(const Id&) const { + return 42; + } + }; + SmokeTestMap(); +} + +template +void SmokeTestSet() { + const int N = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + for (int i = 0; i < 10; i++) { + b_plus_tree_hash_set test_map; + std::unordered_set reference_map; + for (int j = 0; j < N; j++) { + { + size_t key = cube_distribution(random_engine); + Id id(key); + bool hasVal = test_map.find(id) != test_map.end(); + bool hasValRef = reference_map.find(id) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + + if (!hasVal) { + if (key % 3 == 0) { + CheckSetResultPair(test_map.emplace(id), true, id); + CheckSetResultPair(test_map.emplace(key), false, id); + } else if (key % 3 == 1) { + // Leaf-hint of questionable quality + auto hint = test_map.find(Id(key - 1)); + CheckSetResult(test_map.emplace_hint(hint, id), test_map.end(), id); + CheckSetResult(test_map.emplace_hint(hint, key), test_map.end(), id); + } else { + auto hint = j % 2 == 0 ? test_map.begin() : test_map.end(); + // Bad hint + CheckSetResult(test_map.emplace_hint(hint, id), test_map.end(), id); + CheckSetResult(test_map.emplace_hint(hint, key), test_map.end(), id); + } + test_map._check(); + reference_map.emplace(id); + } + } + + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto& id : reference_map) { + Id& idMap = *test_map.find(id); + ASSERT_EQ(idMap, id); + } + for (auto& id : test_map) { + const Id& vRef = *reference_map.find(id); + Id& vMap = *test_map.find(id); + ASSERT_EQ(vMap, vRef); + } + } + } +} + +TEST(PhTreeBptHashSetTest, SmokeTestNonUnique) { + SmokeTestSet>(); +} + +TEST(PhTreeBptHashSetTest, SmokeTestSameHash) { + struct DumbHash { + size_t operator()(const Id&) const { + return 42; + } + }; + SmokeTestSet(); +} + +TEST(PhTreeBptHashMapTest, SmokeTestWithTryEmplace) { + const int N = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + for (int i = 0; i < 10; i++) { + b_plus_tree_hash_map, std::equal_to<>> test_map; + std::map reference_map; + for (int j = 0; j < N; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto entry : reference_map) { + size_t vRef = entry.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto entry : test_map) { + size_t v = entry.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + } + } +} + +template +void SmokeTestWithErase(bool by_iterator) { + const int N = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + for (int i = 0; i < 10; i++) { + b_plus_tree_hash_map> test_map{}; + std::unordered_map reference_map{}; + std::vector key_list{}; + for (int j = 0; j < N; j++) { + size_t key = cube_distribution(random_engine); + Id id(key); + bool hasVal = test_map.find(id) != test_map.end(); + bool hasValRef = reference_map.find(id) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + reference_map.emplace(id, key); + test_map.try_emplace(id, key); + key_list.emplace_back(key); + } + + int x = 0; + std::shuffle(key_list.begin(), key_list.end(), random_engine); + for (auto key : key_list) { + Id id(key); + // This may try to erase an entry that does not exist! + auto it = test_map.find(id); + if (it == test_map.end()) { + ASSERT_EQ(0u, reference_map.erase(id)); + continue; + } + if (by_iterator) { + auto next = it; + ++next; + auto is_last = next == test_map.end(); + auto next_val = is_last ? Id(-1) : next->first; + auto result = test_map.erase(it); + if (is_last) { + ASSERT_EQ(test_map.end(), result); + } else { + ASSERT_NE(test_map.end(), result); + ASSERT_EQ(next_val, result->first); + } + } else { + test_map.erase(id); + } + test_map._check(); + ASSERT_EQ(1u, reference_map.erase(id)); + for (auto& entry : reference_map) { + const Id& vRef = entry.first; + Id& vMap = test_map.find(vRef)->first; + ASSERT_EQ(vMap, vRef); + } + for (auto& entry : test_map) { + Id& v = entry.first; + const Id& vRef = reference_map.find(v)->first; + Id& vMap = test_map.find(v)->first; + ASSERT_EQ(vMap, vRef); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + ++x; + } + } +} + +TEST(PhTreeBptHashMapTest, SmokeTestWithErase) { + SmokeTestWithErase>(true); + SmokeTestWithErase>(false); +} + +TEST(PhTreeBptHashMapTest, SmokeTestWithEraseSameHash) { + struct DumbHash { + size_t operator()(const Id&) const { + return 42; + } + }; + SmokeTestWithErase(true); + SmokeTestWithErase(false); +} diff --git a/test/common/b_plus_tree_map_test.cc b/test/common/b_plus_tree_map_test.cc new file mode 100644 index 00000000..5e83b511 --- /dev/null +++ b/test/common/b_plus_tree_map_test.cc @@ -0,0 +1,181 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/common/b_plus_tree_map.h" +#include +#include + +using namespace improbable::phtree; + +TEST(PhTreeBptMapTest, SmokeTest) { + const int max_size = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + b_plus_tree_map test_map; + std::map reference_map; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.emplace(val, val); + test_map._check(); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + } + } +} + +TEST(PhTreeBptMapTest, SmokeTestWithTryEmplace) { + const int max_size = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + b_plus_tree_map test_map; + std::map reference_map; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + } + } +} + +TEST(PhTreeBptMapTest, SmokeTestWithErase) { + const int max_size = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + b_plus_tree_map test_map{}; + std::unordered_map reference_map{}; + std::vector key_list{}; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); + key_list.emplace_back(val); + } + } + + std::shuffle(key_list.begin(), key_list.end(), random_engine); + for (auto key : key_list) { + if (key % 2 == 0) { + test_map.erase(key); + } else { + auto it = test_map.find(key); + ASSERT_NE(it, test_map.end()); + ASSERT_EQ(it->second, key); + test_map.erase(it); + } + test_map._check(); + reference_map.erase(key); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + } + } +} + +TEST(PhTreeBptMapTest, SmokeTestLowerBound) { + const int max_size = 200; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + b_plus_tree_map test_map; + std::map reference_map; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.lower_bound(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.lower_bound(v)->second; + ASSERT_EQ(vMap, vRef); + } + for (size_t v = 0; v < max_size + 5; ++v) { + auto itRef = reference_map.lower_bound(v); + auto itMap = test_map.lower_bound(v); + if (itRef == reference_map.end()) { + ASSERT_EQ(itMap, test_map.end()); + } else { + ASSERT_NE(itMap, test_map.end()); + // ASSERT_EQ(v, itRef->second); + ASSERT_EQ(itRef->second, itMap->second); + } + } + } + } +} diff --git a/phtree/common/base_types_test.cc b/test/common/base_types_test.cc similarity index 96% rename from phtree/common/base_types_test.cc rename to test/common/base_types_test.cc index 04a45d6a..389dbf74 100644 --- a/phtree/common/base_types_test.cc +++ b/test/common/base_types_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "base_types.h" -#include +#include "phtree/common/base_types.h" +#include #include using namespace improbable::phtree; diff --git a/phtree/common/bits_test.cc b/test/common/bits_test.cc similarity index 95% rename from phtree/common/bits_test.cc rename to test/common/bits_test.cc index e4129bf3..bc64c5cb 100644 --- a/phtree/common/bits_test.cc +++ b/test/common/bits_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "bits.h" -#include +#include "phtree/common/bits.h" +#include #include using namespace improbable::phtree; diff --git a/phtree/common/common_test.cc b/test/common/common_test.cc similarity index 96% rename from phtree/common/common_test.cc rename to test/common/common_test.cc index 788c9fd2..0a2657d8 100644 --- a/phtree/common/common_test.cc +++ b/test/common/common_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "common.h" -#include +#include "phtree/common/common.h" +#include #include using namespace improbable::phtree; diff --git a/phtree/common/converter_test.cc b/test/common/converter_test.cc similarity index 93% rename from phtree/common/converter_test.cc rename to test/common/converter_test.cc index c9ede115..a2859904 100644 --- a/phtree/common/converter_test.cc +++ b/test/common/converter_test.cc @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "converter.h" -#include "common.h" -#include +#include "phtree/common/converter.h" +#include "phtree/common/common.h" +#include using namespace improbable::phtree; diff --git a/phtree/common/distance_test.cc b/test/common/distance_test.cc similarity index 95% rename from phtree/common/distance_test.cc rename to test/common/distance_test.cc index 0038285a..eb44a93e 100644 --- a/phtree/common/distance_test.cc +++ b/test/common/distance_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "common.h" -#include +#include "phtree/common/common.h" +#include #include using namespace improbable::phtree; diff --git a/test/common/filter_test.cc b/test/common/filter_test.cc new file mode 100644 index 00000000..d18d8bcd --- /dev/null +++ b/test/common/filter_test.cc @@ -0,0 +1,126 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/common/common.h" +#include +#include + +using namespace improbable::phtree; + +TEST(PhTreeFilterTest, FilterSphereTest) { + ConverterNoOp<2, scalar_64_t> conv{}; + FilterSphere filter{{5, 3}, 5, conv, DistanceEuclidean<2>{}}; + // root is always valid + ASSERT_TRUE(filter.IsNodeValid({0, 0}, 63)); + // valid because node encompasses the circle + ASSERT_TRUE(filter.IsNodeValid({1, 1}, 10)); + // valid because circle encompasses the node + ASSERT_TRUE(filter.IsNodeValid({5, 5}, 2)); + // valid because circle encompasses the node AABB + ASSERT_TRUE(filter.IsNodeValid({7, 7}, 1)); + // valid because circle touches the edge of the node AABB + ASSERT_TRUE(filter.IsNodeValid({5, 9}, 1)); + // valid because circle cuts edge of node AABB + ASSERT_TRUE(filter.IsNodeValid({12, 7}, 3)); + ASSERT_TRUE(filter.IsNodeValid({10, 7}, 2)); + // invalid because node is just outside the circle + ASSERT_FALSE(filter.IsNodeValid({5, 10}, 1)); + ASSERT_FALSE(filter.IsNodeValid({12, 12}, 3)); + + ASSERT_TRUE(filter.IsEntryValid({3, 7}, nullptr)); + ASSERT_TRUE(filter.IsEntryValid({5, 8}, nullptr)); + ASSERT_FALSE(filter.IsEntryValid({3, 8}, nullptr)); +} + +TEST(PhTreeFilterTest, FilterAABBTest) { + ConverterNoOp<2, scalar_64_t> conv{}; + FilterAABB filter{{3, 3}, {7, 7}, conv}; + // root is always valid + ASSERT_TRUE(filter.IsNodeValid({0, 0}, 63)); + // valid because node encompasses the AABB + ASSERT_TRUE(filter.IsNodeValid({1, 1}, 10)); + // valid + ASSERT_TRUE(filter.IsNodeValid({7, 7}, 1)); + // invalid + ASSERT_FALSE(filter.IsNodeValid({88, 5}, 1)); + + ASSERT_TRUE(filter.IsEntryValid({3, 7}, nullptr)); + ASSERT_FALSE(filter.IsEntryValid({2, 8}, nullptr)); +} + +TEST(PhTreeFilterTest, FilterNoOpSmokeTest) { + auto filter = FilterNoOp(); + ASSERT_TRUE(filter.IsNodeValid>({3, 7, 2}, 10)); + ASSERT_TRUE(filter.IsEntryValid>({3, 7, 2}, 10)); +} + +template +void TestAssignability() { + ASSERT_TRUE(std::is_copy_constructible_v); + ASSERT_TRUE(std::is_copy_assignable_v); + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); +} + +TEST(PhTreeFilterTest, FilterAssignableTest) { + using CONV = ConverterIEEE<3>; + using DIST = DistanceEuclidean<3>; + TestAssignability(); + TestAssignability>(); + TestAssignability>(); + TestAssignability>(); + TestAssignability>(); +} + +TEST(PhTreeFilterTest, ConverterAssignableTest) { + TestAssignability>(); + TestAssignability(); +} + +class TestConverter : public ConverterMultiply<2, 1, 1> { + public: + TestConverter() = default; + + TestConverter(const TestConverter&) = delete; + TestConverter(TestConverter&&) = delete; + TestConverter& operator=(const TestConverter&) = delete; + TestConverter& operator=(TestConverter&&) = delete; +}; + +TEST(PhTreeFilterTest, ConstructFilterAABBTest) { + TestConverter conv; + FilterAABB filter1{{3, 3}, {7, 7}, conv}; + ASSERT_TRUE(filter1.IsNodeValid({0, 0}, 63)); + + FilterAABB filter2{{3, 3}, {7, 7}, TestConverter()}; + ASSERT_TRUE(filter2.IsNodeValid({0, 0}, 63)); +} + +TEST(PhTreeFilterTest, ConstructFilterSphereTest) { + DistanceL1<2> dist; + TestConverter conv; + FilterSphere filter1a{{3, 3}, 7, conv}; + ASSERT_TRUE(filter1a.IsNodeValid({0, 0}, 63)); + FilterSphere filter1b{{3, 3}, 7, conv, {}}; + ASSERT_TRUE(filter1b.IsNodeValid({0, 0}, 63)); + FilterSphere filter1c{{3, 3}, 7, conv, dist}; + ASSERT_TRUE(filter1c.IsNodeValid({0, 0}, 63)); + FilterSphere filter1d{{3, 3}, 7, conv, DistanceL1<2>{}}; + ASSERT_TRUE(filter1d.IsNodeValid({0, 0}, 63)); + + FilterSphere filter2{{3, 3}, 7, TestConverter()}; + ASSERT_TRUE(filter2.IsNodeValid({0, 0}, 63)); +} diff --git a/phtree/common/flat_array_map_test.cc b/test/common/flat_array_map_test.cc similarity index 98% rename from phtree/common/flat_array_map_test.cc rename to test/common/flat_array_map_test.cc index e0250820..618f5254 100644 --- a/phtree/common/flat_array_map_test.cc +++ b/test/common/flat_array_map_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "flat_array_map.h" -#include +#include "phtree/common/flat_array_map.h" +#include #include using namespace improbable::phtree; diff --git a/phtree/common/flat_sparse_map_test.cc b/test/common/flat_sparse_map_test.cc similarity index 97% rename from phtree/common/flat_sparse_map_test.cc rename to test/common/flat_sparse_map_test.cc index dcb72bba..99d581d7 100644 --- a/phtree/common/flat_sparse_map_test.cc +++ b/test/common/flat_sparse_map_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "flat_sparse_map.h" -#include +#include "phtree/common/flat_sparse_map.h" +#include #include using namespace improbable::phtree; diff --git a/test/common/scripts.cmake b/test/common/scripts.cmake new file mode 100644 index 00000000..012bb4fa --- /dev/null +++ b/test/common/scripts.cmake @@ -0,0 +1,16 @@ +macro(package_add_test TESTNAME) + # create an executable in which the tests will be stored + add_executable(${TESTNAME} ${ARGN}) + # link the Google test infrastructure, mocking library, and a default main function to + # the test executable. Remove g_test_main if writing your own main function. + target_link_libraries(${TESTNAME} gtest gmock gtest_main) + target_include_directories(${TESTNAME} PRIVATE ${PROJECT_SOURCE_DIR}/..) + # gtest_discover_tests replaces gtest_add_tests, + # see https://cmake.org/cmake/help/v3.10/module/GoogleTest.html for more options to pass to it + gtest_discover_tests(${TESTNAME} + # set a working directory so your project root so that you can find test data via paths relative to the project root + WORKING_DIRECTORY ${PROJECT_DIR} + PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${PROJECT_DIR}" + ) + set_target_properties(${TESTNAME} PROPERTIES FOLDER test) +endmacro() diff --git a/phtree/phtree_box_d_test.cc b/test/phtree_box_d_test.cc similarity index 97% rename from phtree/phtree_box_d_test.cc rename to test/phtree_box_d_test.cc index 8f630be1..cf4c3955 100644 --- a/phtree/phtree_box_d_test.cc +++ b/test/phtree_box_d_test.cc @@ -15,7 +15,7 @@ */ #include "phtree/phtree.h" -#include +#include #include #include @@ -39,12 +39,10 @@ struct Id { explicit Id(const size_t i) : _i(i){}; - bool operator==(Id& rhs) { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - size_t _i; }; @@ -172,7 +170,7 @@ void SmokeTestBasicOps(size_t N) { PhTreeDebugHelper::CheckConsistency(tree); } -TEST(PhTreeDTest, SmokeTestBasicOps) { +TEST(PhTreeBoxDTest, SmokeTestBasicOps) { SmokeTestBasicOps<1>(100); SmokeTestBasicOps<3>(10000); SmokeTestBasicOps<6>(10000); @@ -181,7 +179,7 @@ TEST(PhTreeDTest, SmokeTestBasicOps) { SmokeTestBasicOps<31>(100); } -TEST(PhTreeDTest, TestDebug) { +TEST(PhTreeBoxDTest, TestDebug) { const dimension_t dim = 3; TestTree tree; size_t N = 1000; @@ -274,7 +272,7 @@ TEST(PhTreeBoxDTest, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -432,8 +430,8 @@ TEST(PhTreeBoxDTest, TestUpdateWithEmplace) { PhBoxD pNew( {pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}, {pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}); - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42u); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -461,8 +459,8 @@ TEST(PhTreeBoxDTest, TestUpdateWithEmplaceHint) { PhPointD max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; TestPoint pNew{min, max}; auto iter = tree.find(pOld); - int n = tree.erase(iter); - ASSERT_EQ(1, n); + size_t n = tree.erase(iter); + ASSERT_EQ(1u, n); tree.emplace_hint(iter, pNew, 42u); ASSERT_EQ(1, tree.count(pNew)); if (delta != 0.0) { @@ -486,8 +484,8 @@ TEST(PhTreeBoxDTest, TestEraseByIterator) { for (auto& p : points) { auto iter = tree.find(p); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); ASSERT_EQ(tree.end(), tree.find(p)); i++; } @@ -505,8 +503,8 @@ TEST(PhTreeBoxDTest, TestEraseByIteratorQuery) { for (size_t i = 0; i < N; ++i) { auto iter = tree.begin(); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); } ASSERT_EQ(0, tree.erase(tree.end())); @@ -651,7 +649,7 @@ TEST(PhTreeBoxDTest, TestWindowQueryManyMoving) { size_t nn = 0; for (int i = -120; i < 120; i++) { PhPointD min{i * 10., i * 9., i * 11.}; - PhPointD max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + PhPointD max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); diff --git a/test/phtree_box_d_test_filter.cc b/test/phtree_box_d_test_filter.cc new file mode 100644 index 00000000..93fac118 --- /dev/null +++ b/test/phtree_box_d_test_filter.cc @@ -0,0 +1,632 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include +#include + +using namespace improbable::phtree; + +template +using TestKey = PhBoxD; + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeBoxD; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + Id(Id const& rhs) = default; + Id(Id&& rhs) = default; + Id& operator=(Id const& rhs) = default; + Id& operator=(Id&& rhs) = default; + + int _i; +}; + +template +void generateCube(std::vector>& points, size_t N) { + DoubleRng rng(-1000, 1000); + auto refTree = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = PhPointD{rng.next(), rng.next(), rng.next()}; + auto box = PhBoxD{point, {point[0] + 1, point[1] + 1, point[2] + 1}}; + if (refTree.count(box) != 0) { + i--; + continue; + } + + refTree.emplace(box, i); + points.push_back(box); + } + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +static int f_default_construct_ = 0; +static int f_construct_ = 0; +static int f_copy_construct_ = 0; +static int f_move_construct_ = 0; +static int f_copy_assign_ = 0; +static int f_move_assign_ = 0; +static int f_destruct_ = 0; + +static void f_reset_id_counters() { + f_default_construct_ = 0; + f_construct_ = 0; + f_copy_construct_ = 0; + f_move_construct_ = 0; + f_copy_assign_ = 0; + f_move_assign_ = 0; + f_destruct_ = 0; +} + +template +struct FilterCount { + FilterCount() : last_known{} { + ++f_default_construct_; + } + + explicit FilterCount(const T i) : last_known{i} { + ++f_construct_; + } + + FilterCount(const FilterCount& other) { + ++f_copy_construct_; + last_known = other.last_known; + } + + FilterCount(FilterCount&& other) noexcept { + ++f_move_construct_; + last_known = other.last_known; + } + + FilterCount& operator=(const FilterCount& other) noexcept { + ++f_copy_assign_; + last_known = other.last_known; + return *this; + } + FilterCount& operator=(FilterCount&& other) noexcept { + ++f_move_assign_; + last_known = other.last_known; + return *this; + } + + ~FilterCount() { + ++f_destruct_; + } + + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint<2 * DIM>&, const T& value) { + last_known = const_cast(value); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint<2 * DIM>&, int) { + return true; + } + + T last_known; +}; + +template +struct DistanceCount { + DistanceCount() { + ++f_default_construct_; + } + + DistanceCount(const DistanceCount&) { + ++f_copy_construct_; + } + + DistanceCount(DistanceCount&&) noexcept { + ++f_move_construct_; + } + + DistanceCount& operator=(const DistanceCount&) noexcept { + ++f_copy_assign_; + return *this; + } + DistanceCount& operator=(DistanceCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~DistanceCount() { + ++f_destruct_; + } + + double operator()(const PhPointD& p1, const PhPointD& p2) const { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); + }; +}; + +static size_t static_id = 0; + +template +struct CallbackCount { + CallbackCount() { + static_id = 0; + ++f_default_construct_; + } + + CallbackCount(const CallbackCount&) { + ++f_copy_construct_; + } + + CallbackCount(CallbackCount&&) noexcept { + ++f_move_construct_; + } + + CallbackCount& operator=(const CallbackCount&) noexcept { + ++f_copy_assign_; + return *this; + } + CallbackCount& operator=(CallbackCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~CallbackCount() { + ++f_destruct_; + } + + void operator()(TestKey, Id& t) { + static_id = t._i; + } +}; + +template +struct FilterConst { + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint<2 * DIM>&, const T& value) const { + assert(value._i == 1); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint<2 * DIM>&, int) const { + return true; + } +}; + +template +struct CallbackConst { + void operator()(const TestKey, const Id& t) const { + static_id = t._i; + } +}; + +[[maybe_unused]] static void print_id_counters() { + std::cout << "dc=" << f_default_construct_ << " c=" << f_construct_ + << " cc=" << f_copy_construct_ << " mc=" << f_move_construct_ + << " ca=" << f_copy_assign_ << " ma=" << f_move_assign_ << " d=" << f_destruct_ + << std::endl; +} + +TEST(PhTreeTest, TestFilterAPI_FOR_EACH) { + // Test edge case: only one entry in tree + TestKey<3> p{{1, 2, 3}, {4, 5, 6}}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // rvalue + tree.for_each(callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // lvalue + tree.for_each(CallbackCount<3>(), FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + CallbackCount<3> callbackC; + FilterConst<3, Id> filterC; + treeC.for_each(callbackC, filterC); + // rvalue + treeC.for_each(CallbackConst<3>{}, FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_FOR_EACH_WQ) { + // Test edge case: only one entry in tree + TestKey<3> p{{1, 2, 3}, {4, 5, 6}}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // lvalue + tree.for_each(qb, callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + tree.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackCount<3>{}, FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + treeC.for_each(qb, callback, filterC); + // rvalue + treeC.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackConst<3>(), FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_BEGIN) { + // Test edge case: only one entry in tree + TestKey<3> p{{1, 2, 3}, {4, 5, 6}}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin(filter)->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin(FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin(filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin(FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_WQ) { + // Test edge case: only one entry in tree + TestKey<3> p{{1, 2, 3}, {4, 5, 6}}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin_query(qb, filter)->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_query({{1, 2, 3}, {4, 5, 6}}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_query(qb, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_query(qb, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +// TEST(PhTreeTest, TestFilterAPI_KNN) { +// // Test edge case: only one entry in tree +// TestKey<3> p{{1, 2, 3}, {4, 5, 6}}; +// auto tree = TestTree<3, Id>(); +// tree.emplace(p, Id{1}); +// +// FilterCount<3, Id> filter{}; +// DistanceCount<3> dist_fn{}; +// // lvalue +// ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, dist_fn, filter)->_i, 1); +// ASSERT_EQ(2, f_construct_ + f_default_construct_); +// ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); +// f_reset_id_counters(); +// +// // rvalue +// ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterCount<3, Id>())->_i, +// 1); ASSERT_EQ(2, f_construct_ + f_default_construct_); ASSERT_LE(0, f_copy_construct_ + +// f_move_construct_ + f_copy_assign_ + f_move_assign_); f_reset_id_counters(); +// +// // rvalue #2 +// auto a = tree.begin_knn_query, FilterCount<3, Id>>(3, {2, 3, 4})->_i; +// ASSERT_EQ(a, 1); +// ASSERT_EQ(2, f_construct_ + f_default_construct_); +// ASSERT_LE(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); +// f_reset_id_counters(); +// +// // const Tree: just test that it compiles +// const TestTree<3, Id>& treeC = tree; +// // lvalue +// FilterConst<3, Id> filterC; +// ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, dist_fn, filterC)->_i, 1); +// // rvalue +// ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterConst<3, Id>())->_i, +// 1); f_reset_id_counters(); +// } + +template +double distance(const TestPoint& p1, const TestKey& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double closest = std::clamp(p1[i], p2.min()[i], p2.max()[i]); + double d2 = p1[i] + closest; + sum2 += d2 * d2; + } + return sqrt(sum2); +}; + +template +void referenceSphereQuery( + std::vector>& points, + TestPoint& center, + double radius, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + if (distance(center, p) <= radius) { + result.insert(i); + } + } +} + +template +void referenceAABBQuery( + std::vector>& points, + TestPoint& center, + double radius, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + bool inside = true; + for (dimension_t i = 0; i < DIM; ++i) { + inside &= (p.min()[i] <= center[i] + radius) && (p.max()[i] >= center[i] - radius); + } + + if (inside) { + result.insert(i); + } + } +} + +template +PhBoxD QueryBox(PhPointD& center, double radius) { + typename TestTree::QueryBox query_box{ + {center[0] - radius, center[1] - radius, center[2] - radius}, + {center[0] + radius, center[1] + radius, center[2] + radius}}; + return query_box; +} + +// We use 'int&' because gtest does not compile with assertions in non-void functions. +template +void testSphereQuery(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto filter = FilterBoxSphere(center, radius, tree.converter()); + for (auto it = tree.begin(filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testSphereQueryWithBox(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterBoxSphere(center, radius, tree.converter()); + for (auto it = tree.begin_query(query_box, filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testSphereQueryForEach(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto filter = FilterBoxSphere(center, radius, tree.converter()); + auto callback = [&result, &referenceResult](TestKey, const size_t& x) { + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + ++result; + }; + tree.for_each(callback, filter); + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testSphereQueryForEachQueryBox(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterBoxSphere(center, radius, tree.converter()); + auto callback = [&result, &referenceResult](TestKey, const size_t& x) { + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + ++result; + }; + tree.for_each(query_box, callback, filter); + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testAABBQuery(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceAABBQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterBoxAABB(query_box.min(), query_box.max(), tree.converter()); + for (auto it = tree.begin(filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void Query0(QUERY query) { + TestPoint p{-10000, -10000, -10000}; + int n = 0; + query(p, 0.1, 100, n); + ASSERT_EQ(0, n); +} + +template +void QueryMany(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 1000, 1000, n); + ASSERT_GT(n, 400); + ASSERT_LT(n, 800); +} + +template +void QueryManyAABB(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 1000, 1000, n); + ASSERT_EQ(n, 1000); +} + +template +void QueryAll(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 10000, 1000, n); + ASSERT_EQ(1000, n); +} + +TEST(PhTreeBoxDFilterTest, TestSphereQuery) { + Query0<3>(&testSphereQuery<3>); + QueryMany<3>(&testSphereQuery<3>); + QueryAll<3>(&testSphereQuery<3>); +} + +TEST(PhTreeBoxDFilterTest, TestSphereQueryWithQueryBox) { + Query0<3>(&testSphereQueryWithBox<3>); + QueryMany<3>(&testSphereQueryWithBox<3>); + QueryAll<3>(&testSphereQueryWithBox<3>); +} + +TEST(PhTreeBoxDFilterTest, TestSphereQueryForEach) { + Query0<3>(&testSphereQueryForEach<3>); + QueryMany<3>(&testSphereQueryForEach<3>); + QueryAll<3>(&testSphereQueryForEach<3>); +} + +TEST(PhTreeBoxDFilterTest, TestSphereQueryForEachWithQueryBox) { + Query0<3>(&testSphereQueryForEachQueryBox<3>); + QueryMany<3>(&testSphereQueryForEachQueryBox<3>); + QueryAll<3>(&testSphereQueryForEachQueryBox<3>); +} + +TEST(PhTreeBoxDFilterTest, TestAABBQuery) { + Query0<3>(&testAABBQuery<3>); + QueryManyAABB<3>(&testAABBQuery<3>); + QueryAll<3>(&testAABBQuery<3>); +} \ No newline at end of file diff --git a/phtree/phtree_box_d_test_query_types.cc b/test/phtree_box_d_test_query_types.cc similarity index 98% rename from phtree/phtree_box_d_test_query_types.cc rename to test/phtree_box_d_test_query_types.cc index c5460665..fea0cd99 100644 --- a/phtree/phtree_box_d_test_query_types.cc +++ b/test/phtree_box_d_test_query_types.cc @@ -15,7 +15,7 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; diff --git a/phtree/phtree_box_f_test.cc b/test/phtree_box_f_test.cc similarity index 97% rename from phtree/phtree_box_f_test.cc rename to test/phtree_box_f_test.cc index 05cfbe55..c8546528 100644 --- a/phtree/phtree_box_f_test.cc +++ b/test/phtree_box_f_test.cc @@ -15,7 +15,7 @@ */ #include "phtree/phtree.h" -#include +#include #include #include @@ -46,12 +46,10 @@ struct Id { explicit Id(const size_t i) : _i(i){}; - bool operator==(Id& rhs) { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - size_t _i; }; @@ -173,7 +171,7 @@ void SmokeTestBasicOps(size_t N) { PhTreeDebugHelper::CheckConsistency(tree); } -TEST(PhTreeDTest, SmokeTestBasicOps) { +TEST(PhTreeBoxFTest, SmokeTestBasicOps) { SmokeTestBasicOps<1>(100); SmokeTestBasicOps<3>(10000); SmokeTestBasicOps<6>(10000); @@ -182,7 +180,7 @@ TEST(PhTreeDTest, SmokeTestBasicOps) { SmokeTestBasicOps<31>(100); } -TEST(PhTreeDTest, TestDebug) { +TEST(PhTreeBoxFTest, TestDebug) { const dimension_t dim = 3; TestTree tree; size_t N = 1000; @@ -275,7 +273,7 @@ TEST(PhTreeBoxFTest, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -368,7 +366,7 @@ void populate( template void populate( TestTree& tree, std::vector>& points, size_t N, double boxLen = 10) { - generateCube(points, N, boxLen); + generateCube(points, N, (float)boxLen); for (size_t i = 0; i < N; i++) { ASSERT_TRUE(tree.emplace(points[i], i + 1).second); } @@ -436,8 +434,8 @@ TEST(PhTreeBoxFTest, TestUpdateWithEmplace) { TestPoint pNew( {pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}, {pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}); - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42u); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -452,7 +450,7 @@ TEST(PhTreeBoxFTest, TestUpdateWithEmplaceHint) { const dimension_t dim = 3; TestTree tree; size_t N = 10000; - std::array deltas{0, 0.1, 1, 10}; + std::array deltas{0.f, 0.1f, 1.f, 10.f}; std::vector> points; populate(tree, points, N); @@ -465,8 +463,8 @@ TEST(PhTreeBoxFTest, TestUpdateWithEmplaceHint) { PhPointF max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; TestPoint pNew{min, max}; auto iter = tree.find(pOld); - int n = tree.erase(iter); - ASSERT_EQ(1, n); + size_t n = tree.erase(iter); + ASSERT_EQ(1u, n); tree.emplace_hint(iter, pNew, 42u); ASSERT_EQ(1, tree.count(pNew)); if (delta != 0.0) { @@ -490,8 +488,8 @@ TEST(PhTreeBoxFTest, TestEraseByIterator) { for (auto& p : points) { auto iter = tree.find(p); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); ASSERT_EQ(tree.end(), tree.find(p)); i++; } @@ -509,8 +507,8 @@ TEST(PhTreeBoxFTest, TestEraseByIteratorQuery) { for (size_t i = 0; i < N; ++i) { auto iter = tree.begin(); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); } ASSERT_EQ(0, tree.erase(tree.end())); diff --git a/phtree/phtree_d_test.cc b/test/phtree_d_test.cc similarity index 83% rename from phtree/phtree_d_test.cc rename to test/phtree_d_test.cc index 6e966906..8894c6fd 100644 --- a/phtree/phtree_d_test.cc +++ b/test/phtree_d_test.cc @@ -15,7 +15,7 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; @@ -42,13 +42,18 @@ class DoubleRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i){}; + explicit Id(const int i) : _i{i} {} - bool operator==(Id& rhs) { + explicit Id(const size_t i) : _i{static_cast(i)} {} + + bool operator==(const Id& rhs) const { return _i == rhs._i; } + Id(Id const& rhs) = default; + Id(Id&& rhs) = default; Id& operator=(Id const& rhs) = default; + Id& operator=(Id&& rhs) = default; int _i; }; @@ -284,7 +289,7 @@ TEST(PhTreeDTest, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -328,7 +333,7 @@ TEST(PhTreeDTest, TestSquareBrackets) { ASSERT_EQ(0, tree[p]._i); ASSERT_EQ(tree.count(p), 1); if (i % 2 == 0) { - tree[p]._i = i; + tree[p]._i = (int)i; } else { tree[p] = id; } @@ -441,8 +446,8 @@ TEST(PhTreeDTest, TestUpdateWithEmplace) { for (auto& p : points) { auto pOld = p; TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -468,8 +473,8 @@ TEST(PhTreeDTest, TestUpdateWithEmplaceHint) { double delta = deltas[d_n]; TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; auto iter = tree.find(pOld); - int n = tree.erase(iter); - ASSERT_EQ(1, n); + size_t n = tree.erase(iter); + ASSERT_EQ(1u, n); tree.emplace_hint(iter, pNew, 42); ASSERT_EQ(1, tree.count(pNew)); if (delta != 0.0) { @@ -482,6 +487,142 @@ TEST(PhTreeDTest, TestUpdateWithEmplaceHint) { tree.clear(); } +TEST(PhTreeDTest, TestUpdateWithRelocate) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + size_t i = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if (delta > 0.0 && tree.find(pNew) != tree.end()) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } else { + ASSERT_EQ(1, tree.relocate(pOld, pNew)); + if (delta > 0.0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } + ASSERT_EQ(Id(i), *tree.find(pNew)); + p = pNew; + } + ++i; + } + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + ASSERT_EQ(0, tree.relocate(points[0], points[1])); + // Check that small tree works + tree.emplace(points[0], 1); + ASSERT_EQ(1, tree.relocate(points[0], points[1])); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(Id(1), *tree.find(points[1])); + ASSERT_EQ(1, tree.size()); + tree.clear(); + + // check that existing destination fails + tree.emplace(points[0], 1); + tree.emplace(points[1], 2); + ASSERT_EQ(0, tree.relocate(points[0], points[1])); +} + +TEST(PhTreeDTest, TestUpdateWithRelocateCorenerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{1, 2, 3}; + TestPoint point1{4, 5, 6}; + + // Check that empty tree works + ASSERT_EQ(0, tree.relocate(point0, point1)); + ASSERT_EQ(0, tree.size()); + + // Check that small tree works + tree.emplace(point0, 1); + ASSERT_EQ(1, tree.relocate(point0, point1)); + ASSERT_EQ(tree.end(), tree.find(point0)); + ASSERT_EQ(Id(1), *tree.find(point1)); + ASSERT_EQ(1, tree.size()); + tree.clear(); + + // check that existing destination fails + tree.emplace(point0, Id(0)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source fails + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); +} + +TEST(PhTreeDTest, TestUpdateWithRelocateIf) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + size_t i = 0; + size_t done = 0; + auto pred = [](const Id& id) { return id._i % 2 == 0; }; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if ((delta > 0.0 && tree.find(pNew) != tree.end()) || (i % 2 != 0)) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } else { + ASSERT_EQ(1, tree.relocate_if(pOld, pNew, pred)); + if (delta > 0.0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } + ASSERT_EQ(Id(i), *tree.find(pNew)); + p = pNew; + ++done; + } + ++i; + } + ASSERT_GT(done, i * 0.4); + ASSERT_LT(done, i * 0.6); + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + auto pred = [](const Id&) { return true; }; + ASSERT_EQ(0, tree.relocate_if(points[0], points[1], pred)); + // Check that small tree works + tree.emplace(points[0], 1); + ASSERT_EQ(1, tree.relocate_if(points[0], points[1], pred)); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(Id(1), *tree.find(points[1])); + ASSERT_EQ(1, tree.size()); +} + TEST(PhTreeDTest, TestEraseByIterator) { const dimension_t dim = 3; TestTree tree; @@ -493,8 +634,8 @@ TEST(PhTreeDTest, TestEraseByIterator) { for (auto& p : points) { auto iter = tree.find(p); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); ASSERT_EQ(tree.end(), tree.find(p)); i++; } @@ -512,8 +653,8 @@ TEST(PhTreeDTest, TestEraseByIteratorQuery) { for (size_t i = 0; i < N; ++i) { auto iter = tree.begin(); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); } ASSERT_EQ(0, tree.erase(tree.end())); @@ -688,9 +829,9 @@ TEST(PhTreeDTest, TestWindowQueryManyMoving) { double query_length = 200; size_t nn = 0; - for (int i = -120; i < 120; i++) { + for (long i = -120; i < 120; i++) { TestPoint min{i * 10., i * 9., i * 11.}; - TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + TestPoint max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -724,7 +865,7 @@ TEST(PhTreeDTest, TestWindowForEachQueryManyMoving) { size_t nn = 0; for (int i = -120; i < 120; i++) { TestPoint min{i * 10., i * 9., i * 11.}; - TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + TestPoint max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); diff --git a/test/phtree_d_test_copy_move.cc b/test/phtree_d_test_copy_move.cc new file mode 100644 index 00000000..c20fcf68 --- /dev/null +++ b/test/phtree_d_test_copy_move.cc @@ -0,0 +1,298 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include + +using namespace improbable::phtree; + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeD; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct IdCopyOnly { + explicit IdCopyOnly(const size_t i) : _i{i} {} + + IdCopyOnly() = default; + IdCopyOnly(const IdCopyOnly& other) = default; + IdCopyOnly(IdCopyOnly&& other) = delete; + // IdCopyOnly& operator=(const IdCopyOnly& other) = default; + IdCopyOnly& operator=(const IdCopyOnly& other) { + _i = other._i; + return *this; + } + IdCopyOnly& operator=(IdCopyOnly&& other) = delete; + ~IdCopyOnly() = default; + + bool operator==(const IdCopyOnly& rhs) const { + return _i == rhs._i; + } + + size_t _i{}; +}; + +struct IdMoveOnly { + explicit IdMoveOnly(const size_t i) : _i{i} {} + + IdMoveOnly() = default; + IdMoveOnly(const IdMoveOnly& other) = delete; + IdMoveOnly(IdMoveOnly&& other) = default; + IdMoveOnly& operator=(const IdMoveOnly& other) = delete; + IdMoveOnly& operator=(IdMoveOnly&& other) = default; + ~IdMoveOnly() = default; + + bool operator==(const IdMoveOnly& rhs) const { + return _i == rhs._i; + } + + size_t _i{}; +}; + +// Assert that copy-ctr is not called even when available +struct IdCopyOrMove { + explicit IdCopyOrMove(const size_t i) : _i{i} {} + + IdCopyOrMove() = default; + IdCopyOrMove(const IdCopyOrMove&) { + assert(false); + } + IdCopyOrMove(IdCopyOrMove&& other) = default; + IdCopyOrMove& operator=(const IdCopyOrMove&) { + assert(false); + } + IdCopyOrMove& operator=(IdCopyOrMove&& other) = default; + ~IdCopyOrMove() = default; + + bool operator==(const IdCopyOrMove& rhs) const { + return _i == rhs._i; + } + + size_t _i{}; +}; + +template +void generateCube(std::vector>& points, size_t N) { + DoubleRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + TestPoint point{}; + for (dimension_t d = 0; d < DIM; ++d) { + point[d] = rng.next(); + } + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps_QueryAndErase(TestTree& tree, std::vector>& points) { + size_t N = points.size(); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_knn_query(1, p, DistanceEuclidean()); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)._i); + q++; + ASSERT_EQ(q, tree.end()); + } + + // TODO enable for new relocate functions + // for (size_t i = 0; i < N; i++) { + // TestPoint& p = points.at(i); + // TestPoint pOld = p; + // for (dimension_t d = 0; d < DIM; ++d) { + // p[d] += 10000; + // } + // auto r = tree.relocate(pOld, p); + // ASSERT_EQ(r, 1u); + // } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_EQ(tree.count(p), 1u); + ASSERT_EQ(i, tree.find(p)->_i); + if (i % 2 == 0) { + ASSERT_EQ(1u, tree.erase(p)); + } else { + auto iter = tree.find(p); + ASSERT_EQ(1u, tree.erase(iter)); + } + + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0u, tree.erase(p)); + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +template +void SmokeTestBasicOps(size_t N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + + Id id(i); + if (i % 4 == 0) { + ASSERT_TRUE(tree.try_emplace(p, id).second); + } else if (i % 4 == 1) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else if (i % 4 == 2) { + tree[p] = id; + } else { + ASSERT_TRUE(tree.insert(p, id).second); + } + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + SmokeTestBasicOps_QueryAndErase(tree, points); +} + +TEST(PhTreeDTestCopyMove, SmokeTestBasicOpsCopyOnly) { + SmokeTestBasicOps<1, IdCopyOnly>(100); + SmokeTestBasicOps<3, IdCopyOnly>(100); + SmokeTestBasicOps<6, IdCopyOnly>(100); + SmokeTestBasicOps<10, IdCopyOnly>(100); + SmokeTestBasicOps<20, IdCopyOnly>(100); + SmokeTestBasicOps<63, IdCopyOnly>(100); +} + +template +void SmokeTestBasicOpsMoveOnly(size_t N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + + if (i % 2 == 0) { + ASSERT_TRUE(tree.try_emplace(p, Id(i)).second); + } else if (i % 4 == 1) { + tree[p] = Id(i); + } else { + ASSERT_TRUE(tree.emplace(p, Id(i)).second); + } + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.try_emplace(p, Id(i)).second); + ASSERT_FALSE(tree.emplace(p, Id(i)).second); + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, tree.find(p)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + SmokeTestBasicOps_QueryAndErase(tree, points); +} + +TEST(PhTreeDTestCopyMove, SmokeTestBasicOpsMoveOnly) { + SmokeTestBasicOpsMoveOnly<1, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<3, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<6, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<10, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<20, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<63, IdMoveOnly>(100); +} + +TEST(PhTreeDTestCopyMove, SmokeTestBasicOpsCopyFails) { + SmokeTestBasicOpsMoveOnly<1, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<3, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<6, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<10, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<20, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<63, IdCopyOrMove>(100); +} diff --git a/phtree/phtree_d_test_custom_key.cc b/test/phtree_d_test_custom_key.cc similarity index 98% rename from phtree/phtree_d_test_custom_key.cc rename to test/phtree_d_test_custom_key.cc index aa293f1d..914b66f5 100644 --- a/phtree/phtree_d_test_custom_key.cc +++ b/test/phtree_d_test_custom_key.cc @@ -15,7 +15,7 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; @@ -106,7 +106,7 @@ class DoubleRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i){}; + explicit Id(const size_t i) : _i{static_cast(i)} {} bool operator==(const Id& rhs) const { return _i == rhs._i; diff --git a/test/phtree_d_test_filter.cc b/test/phtree_d_test_filter.cc new file mode 100644 index 00000000..551e343b --- /dev/null +++ b/test/phtree_d_test_filter.cc @@ -0,0 +1,481 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include +#include + +using namespace improbable::phtree; + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeD; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + Id(Id const& rhs) = default; + Id(Id&& rhs) = default; + Id& operator=(Id const& rhs) = default; + Id& operator=(Id&& rhs) = default; + + int _i; +}; + +template +void generateCube(std::vector>& points, size_t N) { + DoubleRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = TestPoint{rng.next(), rng.next(), rng.next()}; + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +static int f_default_construct_ = 0; +static int f_construct_ = 0; +static int f_copy_construct_ = 0; +static int f_move_construct_ = 0; +static int f_copy_assign_ = 0; +static int f_move_assign_ = 0; +static int f_destruct_ = 0; + +static void f_reset_id_counters() { + f_default_construct_ = 0; + f_construct_ = 0; + f_copy_construct_ = 0; + f_move_construct_ = 0; + f_copy_assign_ = 0; + f_move_assign_ = 0; + f_destruct_ = 0; +} + +template +struct FilterCount { + FilterCount() : last_known{} { + ++f_default_construct_; + } + + explicit FilterCount(const T i) : last_known{i} { + ++f_construct_; + } + + FilterCount(const FilterCount& other) { + ++f_copy_construct_; + last_known = other.last_known; + } + + FilterCount(FilterCount&& other) noexcept { + ++f_move_construct_; + last_known = other.last_known; + } + + FilterCount& operator=(const FilterCount& other) noexcept { + ++f_copy_assign_; + last_known = other.last_known; + return *this; + } + FilterCount& operator=(FilterCount&& other) noexcept { + ++f_move_assign_; + last_known = other.last_known; + return *this; + } + + ~FilterCount() { + ++f_destruct_; + } + + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const T& value) { + last_known = const_cast(value); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) { + return true; + } + + T last_known; +}; + +template +struct DistanceCount { + DistanceCount() { + ++f_default_construct_; + } + + DistanceCount(const DistanceCount&) { + ++f_copy_construct_; + } + + DistanceCount(DistanceCount&&) noexcept { + ++f_move_construct_; + } + + DistanceCount& operator=(const DistanceCount&) noexcept { + ++f_copy_assign_; + return *this; + } + DistanceCount& operator=(DistanceCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~DistanceCount() { + ++f_destruct_; + } + + double operator()(const PhPointD& p1, const PhPointD& p2) const { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); + }; +}; + +static size_t static_id = 0; + +template +struct CallbackCount { + CallbackCount() { + static_id = 0; + ++f_default_construct_; + } + + CallbackCount(const CallbackCount&) { + ++f_copy_construct_; + } + + CallbackCount(CallbackCount&&) noexcept { + ++f_move_construct_; + } + + CallbackCount& operator=(const CallbackCount&) noexcept { + ++f_copy_assign_; + return *this; + } + CallbackCount& operator=(CallbackCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~CallbackCount() { + ++f_destruct_; + } + + void operator()(TestPoint, Id& t) { + static_id = t._i; + } +}; + +template +struct FilterConst { + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const T& value) const { + assert(value._i == 1); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) const { + return true; + } +}; + +template +struct CallbackConst { + void operator()(const TestPoint, const Id& t) const { + static_id = t._i; + } +}; + +[[maybe_unused]] +static void print_id_counters() { + std::cout << "dc=" << f_default_construct_ << " c=" << f_construct_ + << " cc=" << f_copy_construct_ << " mc=" << f_move_construct_ + << " ca=" << f_copy_assign_ << " ma=" << f_move_assign_ << " d=" << f_destruct_ + << std::endl; +} + +TEST(PhTreeDFilterTest, TestFilterAPI_FOR_EACH) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // rvalue + tree.for_each(callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // lvalue + tree.for_each(CallbackCount<3>(), FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + CallbackCount<3> callbackC; + FilterConst<3, Id> filterC; + treeC.for_each(callbackC, filterC); + // rvalue + treeC.for_each(CallbackConst<3>{}, FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeDFilterTest, TestFilterAPI_FOR_EACH_WQ) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // lvalue + tree.for_each(qb, callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + tree.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackCount<3>{}, FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + treeC.for_each(qb, callback, filterC); + // rvalue + treeC.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackConst<3>(), FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeDFilterTest, TestFilterAPI_BEGIN) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin(filter)->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin(FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin(filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin(FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeDFilterTest, TestFilterAPI_WQ) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin_query(qb, filter)->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_query({{1, 2, 3}, {4, 5, 6}}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_query(qb, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_query(qb, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeDFilterTest, TestFilterAPI_KNN) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + DistanceCount<3> dist_fn{}; + // lvalue + ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, dist_fn, filter)->_i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue #2 + auto a = tree.begin_knn_query, FilterCount<3, Id>>(3, {2, 3, 4})->_i; + ASSERT_EQ(a, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, dist_fn, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); +}; + +template +void referenceSphereQuery( + std::vector>& points, + TestPoint& center, + double radius, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + if (distance(center, p) <= radius) { + result.insert(i); + } + } +} + +// We use 'int&' because gtest does not compile with assertions in non-void functions. +template +void testSphereQuery(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto filter = FilterSphere(center, radius, tree.converter()); + for (auto it = tree.begin(filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +TEST(PhTreeDFilterTest, TestSphereQuery0) { + const dimension_t dim = 3; + TestPoint p{-10000, -10000, -10000}; + int n = 0; + testSphereQuery(p, 0.1, 100, n); + ASSERT_EQ(0, n); +} + +TEST(PhTreeDFilterTest, TestSphereQueryMany) { + const dimension_t dim = 3; + TestPoint p{0, 0, 0}; + int n = 0; + testSphereQuery(p, 1000, 1000, n); + ASSERT_GT(n, 400); + ASSERT_LT(n, 800); +} + +TEST(PhTreeDFilterTest, TestSphereQueryAll) { + const dimension_t dim = 3; + TestPoint p{0, 0, 0}; + int n = 0; + testSphereQuery(p, 10000, 1000, n); + ASSERT_EQ(1000, n); +} diff --git a/phtree/phtree_d_test_preprocessor.cc b/test/phtree_d_test_preprocessor.cc similarity index 96% rename from phtree/phtree_d_test_preprocessor.cc rename to test/phtree_d_test_preprocessor.cc index 7e2e9010..d01c891c 100644 --- a/phtree/phtree_d_test_preprocessor.cc +++ b/test/phtree_d_test_preprocessor.cc @@ -15,7 +15,7 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; @@ -42,14 +42,12 @@ class DoubleRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i){}; + explicit Id(const size_t i) : _i{static_cast(i)} {} - bool operator==(Id& rhs) { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; }; diff --git a/phtree/phtree_f_test.cc b/test/phtree_f_test.cc similarity index 98% rename from phtree/phtree_f_test.cc rename to test/phtree_f_test.cc index 9e2e3a93..c7d593c9 100644 --- a/phtree/phtree_f_test.cc +++ b/test/phtree_f_test.cc @@ -15,7 +15,7 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; @@ -43,14 +43,14 @@ class FloatRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i){}; + explicit Id(const int i) : _i{i} {} - bool operator==(Id& rhs) { + explicit Id(const size_t i) : _i{static_cast(i)} {} + + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; }; @@ -69,7 +69,7 @@ template double distance(const TestPoint& p1, const TestPoint& p2) { double sum2 = 0; for (dimension_t i = 0; i < DIM; i++) { - double d = p1[i] - p2[i]; + double d = (double)(p1[i]) - (double)p2[i]; sum2 += d * d; } return sqrt(sum2); @@ -283,7 +283,7 @@ TEST(PhTreeFTest, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -327,7 +327,7 @@ TEST(PhTreeFTest, TestSquareBrackets) { ASSERT_EQ(0, tree[p]._i); ASSERT_EQ(tree.count(p), 1); if (i % 2 == 0) { - tree[p]._i = i; + tree[p]._i = (int)i; } else { tree[p] = id; } @@ -443,8 +443,8 @@ TEST(PhTreeFTest, TestUpdateWithEmplace) { static_cast(pOld[0] + delta), static_cast(pOld[1] + delta), static_cast(pOld[2] + delta)}; - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -466,8 +466,8 @@ TEST(PhTreeFTest, TestEraseByIterator) { for (auto& p : points) { auto iter = tree.find(p); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); ASSERT_EQ(tree.end(), tree.find(p)); i++; } @@ -485,8 +485,8 @@ TEST(PhTreeFTest, TestEraseByIteratorQuery) { for (size_t i = 0; i < N; ++i) { auto iter = tree.begin(); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); } ASSERT_EQ(0, tree.erase(tree.end())); diff --git a/phtree/phtree_multimap_box_d_test.cc b/test/phtree_multimap_box_d_test.cc similarity index 83% rename from phtree/phtree_multimap_box_d_test.cc rename to test/phtree_multimap_box_d_test.cc index d1f19a85..e34d1206 100644 --- a/phtree/phtree_multimap_box_d_test.cc +++ b/test/phtree_multimap_box_d_test.cc @@ -15,7 +15,7 @@ */ #include "phtree/phtree_multimap.h" -#include +#include #include #include @@ -49,14 +49,12 @@ class DoubleRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i), data_{0} {}; + explicit Id(const size_t i) : _i{static_cast(i)}, data_{0} {} bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; int data_; }; @@ -71,7 +69,7 @@ struct hash { }; // namespace std struct PointDistance { - PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + PointDistance(double distance, size_t id) : _distance(distance), _id(static_cast(id)) {} double _distance; int _id; @@ -99,7 +97,7 @@ void generateCube(std::vector>& points, size_t N, double box_Len points.reserve(N); for (size_t i = 0; i < N / NUM_DUPL; i++) { - // create duplicates, ie. entries with the same coordinates. However, avoid unintentional + // create duplicates, i.e. entries with the same coordinates. However, avoid unintentional // duplicates. TestPoint key{}; for (dimension_t d = 0; d < DIM; ++d) { @@ -123,7 +121,6 @@ void generateCube(std::vector>& points, size_t N, double box_Len template void SmokeTestBasicOps(size_t N) { TestTree tree; - std::vector> points; generateCube(points, N); @@ -139,17 +136,19 @@ void SmokeTestBasicOps(size_t N) { } Id id(i); - if (i % 2 == 0) { + if (i % 4 == 0) { ASSERT_TRUE(tree.emplace(p, id).second); - } else { + } else if (i % 4 == 1) { ASSERT_TRUE(tree.insert(p, id).second); + } else { + ASSERT_TRUE(tree.try_emplace(p, id).second); } ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); ASSERT_NE(tree.end(), tree.find(p)); ASSERT_EQ(id._i, tree.find(p, id)->_i); ASSERT_EQ(i + 1, tree.size()); - // try add again + // try adding it again ASSERT_FALSE(tree.insert(p, id).second); ASSERT_FALSE(tree.emplace(p, id).second); ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); @@ -413,6 +412,13 @@ TEST(PhTreeMMBoxDTest, TestFind) { ASSERT_NE(tree.find(p, id), tree.end()); ASSERT_NE(tree.end(), tree.find(p, id)); ASSERT_EQ(tree.find(p, id)->_i, i); + auto iterN = tree.find(points[0]); + size_t n = 0; + while (iterN != tree.end()) { + ++iterN; + ++n; + } + ASSERT_EQ(n, NUM_DUPL); i++; } @@ -488,7 +494,42 @@ TEST(PhTreeMMBoxDTest, TestUpdateWithEmplaceHint) { tree.clear(); } -TEST(PhTreeMMDTest, TestUpdateWithRelocate) { +// TEST(PhTreeMMDTest, TestUpdateWithRelocate) { +// const dimension_t dim = 3; +// TestTree tree; +// size_t N = 10000; +// std::array deltas{0, 0.1, 1, 10}; +// std::vector> points; +// populate(tree, points, N); +// +// for (auto delta : deltas) { +// size_t i = 0; +// for (auto& p : points) { +// auto pOld = p; +// TestPoint pNew; +// if (relocate_to_existing_coordinate) { +// pNew = delta > 0.0 ? points[(i + 17) % N] : pOld; +// } else { +// pNew = {pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; +// } +// PhPointD min{pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + +// delta}; PhPointD max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] +// + delta}; TestPoint pNew{min, max}; ASSERT_EQ(1, tree.relocate(pOld, pNew, +// Id(i))); if (delta > 0.0) { +// // second time fails because value has already been moved +// ASSERT_EQ(0, tree.relocate(pOld, pNew, Id(i))); +// } +// ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); +// p = pNew; +// ++i; +// } +// } +// +// ASSERT_EQ(N, tree.size()); +// tree.clear(); +// } + +void TestUpdateWithRelocate(bool relocate_to_existing_coordinate) { const dimension_t dim = 3; TestTree tree; size_t N = 10000; @@ -496,29 +537,91 @@ TEST(PhTreeMMDTest, TestUpdateWithRelocate) { std::vector> points; populate(tree, points, N); - size_t i = 0; - size_t d_n = 0; - for (auto& p : points) { - auto pOld = p; - d_n = (d_n + 1) % deltas.size(); - double delta = deltas[d_n]; - PhPointD min{pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}; - PhPointD max{pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; - TestPoint pNew{min, max}; - ASSERT_EQ(1, tree.relocate(pOld, pNew, Id(i))); - if (delta > 0.0) { - // second time fails because value has already been moved - ASSERT_EQ(0, tree.relocate(pOld, pNew, Id(i))); + for (auto delta : deltas) { + size_t i = 0; + for (auto& p : points) { + auto pOld = p; + TestPoint pNew; + if (relocate_to_existing_coordinate) { + pNew = delta > 0.0 ? points[(i + 17) % N] : pOld; + } else { + PhPointD min{ + pOld.min()[0] + delta, pOld.min()[1] + delta, pOld.min()[2] + delta}; + PhPointD max{ + pOld.max()[0] + delta, pOld.max()[1] + delta, pOld.max()[2] + delta}; + pNew = {min, max}; + } + ASSERT_EQ(1u, tree.relocate(pOld, pNew, Id(i))); + if (pOld != pNew) { + // second time fails because value has already been moved + ASSERT_EQ(0u, tree.relocate(pOld, pNew, Id(i))); + ASSERT_EQ(tree.end(), tree.find(pOld, Id(i))); + } else { + ASSERT_EQ(1u, tree.relocate(pOld, pNew, Id(i))); + } + ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); + p = pNew; + ++i; } - ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); - p = pNew; - ++i; + PhTreeDebugHelper::CheckConsistency(tree); } ASSERT_EQ(N, tree.size()); tree.clear(); } +TEST(PhTreeMMDTest, TestUpdateWithRelocateDelta) { + TestUpdateWithRelocate(false); +} + +TEST(PhTreeMMDTest, TestUpdateWithRelocateToExisting) { + TestUpdateWithRelocate(true); +} + +TEST(PhTreeMMDTest, TestUpdateWithRelocateCornerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{{1, 2, 3}, {2, 3, 4}}; + TestPoint point1{{2, 3, 4}, {3, 4, 5}}; + + // Check that empty tree works + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(42))); + + // Check that small tree works + tree.emplace(point0, Id(1)); + ASSERT_EQ(1u, tree.relocate(point0, point1, Id(1))); + ASSERT_EQ(tree.end(), tree.find(point0, Id(1))); + ASSERT_EQ(1, tree.find(point1, Id(1))->_i); + ASSERT_EQ(1u, tree.size()); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that existing destination fails + tree.emplace(point0, Id(1)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(1))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source bucket fails + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(0))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket exists) + tree.emplace(point0, Id(0)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(2))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket missing) + tree.emplace(point0, Id(0)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(2))); + PhTreeDebugHelper::CheckConsistency(tree); +} + TEST(PhTreeMMBoxDTest, TestEraseByIterator) { const dimension_t dim = 3; TestTree tree; @@ -583,12 +686,16 @@ TEST(PhTreeMMBoxDTest, TestExtent) { template struct FilterEvenId { - [[nodiscard]] constexpr bool IsEntryValid(const PhPoint<2 * DIM>&, const T& value) const { - return value._i % 2 == 0; + template + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint<2 * DIM>&, const BucketT&) const { + return true; } [[nodiscard]] constexpr bool IsNodeValid(const PhPoint<2 * DIM>&, int) const { return true; } + [[nodiscard]] constexpr bool IsBucketEntryValid(const PhPoint<2 * DIM>&, const T& value) const { + return value._i % 2 == 0; + } }; TEST(PhTreeMMDTest, TestExtentFilter) { @@ -809,7 +916,7 @@ TEST(PhTreeMMBoxDTest, TestWindowQueryManyMoving) { size_t nn = 0; for (int i = -120; i < 120; i++) { PhPointD min{i * 10., i * 9., i * 11.}; - PhPointD max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + PhPointD max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); diff --git a/phtree/phtree_multimap_d_test.cc b/test/phtree_multimap_d_test.cc similarity index 81% rename from phtree/phtree_multimap_d_test.cc rename to test/phtree_multimap_d_test.cc index d695ec91..001a1207 100644 --- a/phtree/phtree_multimap_d_test.cc +++ b/test/phtree_multimap_d_test.cc @@ -15,7 +15,7 @@ */ #include "phtree/phtree_multimap.h" -#include +#include #include using namespace improbable::phtree; @@ -29,7 +29,7 @@ template using TestPoint = PhPointD; template -using TestTree = PhTreeMultiMap>; +using TestTree = PhTreeMultiMapD; class DoubleRng { public: @@ -47,14 +47,13 @@ class DoubleRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i), data_{0} {}; + explicit Id(const int i) : _i{i}, data_{0} {} + explicit Id(const size_t i) : _i{static_cast(i)}, data_{0} {} bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; int data_; }; @@ -68,15 +67,8 @@ struct hash { }; }; // namespace std -struct IdHash { - template - std::size_t operator()(std::pair const& v) const { - return std::hash()(v.size()); - } -}; - struct PointDistance { - PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + PointDistance(double distance, size_t id) : _distance(distance), _id(static_cast(id)) {} double _distance; int _id; @@ -113,7 +105,7 @@ void generateCube(std::vector>& points, size_t N) { points.reserve(N); for (size_t i = 0; i < N / NUM_DUPL; i++) { - // create duplicates, ie. entries with the same coordinates. However, avoid unintentional + // create duplicates, i.e. entries with the same coordinates. However, avoid unintentional // duplicates. TestPoint key{}; for (dimension_t d = 0; d < DIM; ++d) { @@ -136,7 +128,6 @@ void generateCube(std::vector>& points, size_t N) { template void SmokeTestBasicOps(size_t N) { TestTree tree; - std::vector> points; generateCube(points, N); @@ -152,17 +143,19 @@ void SmokeTestBasicOps(size_t N) { } Id id(i); - if (i % 2 == 0) { + if (i % 4 == 0) { ASSERT_TRUE(tree.emplace(p, id).second); - } else { + } else if (i % 4 == 1) { ASSERT_TRUE(tree.insert(p, id).second); + } else { + ASSERT_TRUE(tree.try_emplace(p, id).second); } ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); ASSERT_NE(tree.end(), tree.find(p)); ASSERT_EQ(id._i, tree.find(p, id)->_i); ASSERT_EQ(i + 1, tree.size()); - // try add again + // try adding it again ASSERT_FALSE(tree.insert(p, id).second); ASSERT_FALSE(tree.emplace(p, id).second); ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); @@ -421,6 +414,13 @@ TEST(PhTreeMMDTest, TestFind) { ASSERT_NE(tree.find(p, id), tree.end()); ASSERT_NE(tree.end(), tree.find(p, id)); ASSERT_EQ(tree.find(p, id)->_i, i); + auto iterN = tree.find(points[0]); + size_t n = 0; + while (iterN != tree.end()) { + ++iterN; + ++n; + } + ASSERT_EQ(n, NUM_DUPL); i++; } @@ -490,9 +490,13 @@ TEST(PhTreeMMDTest, TestUpdateWithEmplaceHint) { ASSERT_EQ(N, tree.size()); tree.clear(); + + tree.emplace_hint(tree.end(), {11, 21, 31}, 421); + tree.emplace_hint(tree.begin(), {1, 2, 3}, 42); + ASSERT_EQ(2, tree.size()); } -TEST(PhTreeMMDTest, TestUpdateWithRelocate) { +void TestUpdateWithRelocate(bool relocate_to_existing_coordinate) { const dimension_t dim = 3; TestTree tree; size_t N = 10000; @@ -500,27 +504,87 @@ TEST(PhTreeMMDTest, TestUpdateWithRelocate) { std::vector> points; populate(tree, points, N); - size_t i = 0; - size_t d_n = 0; - for (auto& p : points) { - auto pOld = p; - d_n = (d_n + 1) % deltas.size(); - double delta = deltas[d_n]; - TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; - ASSERT_EQ(1, tree.relocate(pOld, pNew, Id(i))); - if (delta > 0.0) { - // second time fails because value has already been moved - ASSERT_EQ(0, tree.relocate(pOld, pNew, Id(i))); + for (auto delta : deltas) { + size_t i = 0; + for (auto& p : points) { + auto pOld = p; + TestPoint pNew; + if (relocate_to_existing_coordinate) { + pNew = delta > 0.0 ? points[(i + 17) % N] : pOld; + } else { + pNew = {pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + } + ASSERT_EQ(1u, tree.relocate(pOld, pNew, Id(i))); + if (pOld != pNew) { + // second time fails because value has already been moved + ASSERT_EQ(0u, tree.relocate(pOld, pNew, Id(i))); + ASSERT_EQ(tree.end(), tree.find(pOld, Id(i))); + } else { + ASSERT_EQ(1u, tree.relocate(pOld, pNew, Id(i))); + } + ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); + p = pNew; + ++i; } - ASSERT_EQ(Id(i), *tree.find(pNew, Id(i))); - p = pNew; - ++i; + PhTreeDebugHelper::CheckConsistency(tree); } ASSERT_EQ(N, tree.size()); tree.clear(); } +TEST(PhTreeMMDTest, TestUpdateWithRelocateDelta) { + TestUpdateWithRelocate(false); +} + +TEST(PhTreeMMDTest, TestUpdateWithRelocateToExisting) { + TestUpdateWithRelocate(true); +} + +TEST(PhTreeMMDTest, TestUpdateWithRelocateCornerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{1, 2, 3}; + TestPoint point1{4, 5, 6}; + + // Check that empty tree works + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(42))); + + // Check that small tree works + tree.emplace(point0, Id(1)); + ASSERT_EQ(1u, tree.relocate(point0, point1, Id(1))); + ASSERT_EQ(tree.end(), tree.find(point0, Id(1))); + ASSERT_EQ(1, tree.find(point1, Id(1))->_i); + ASSERT_EQ(1u, tree.size()); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that existing destination fails + tree.emplace(point0, Id(1)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(1))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source bucket fails + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(0))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket exists) + tree.emplace(point0, Id(0)); + tree.emplace(point1, Id(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(2))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket missing) + tree.emplace(point0, Id(0)); + ASSERT_EQ(0u, tree.relocate(point0, point1, Id(2))); + PhTreeDebugHelper::CheckConsistency(tree); +} + TEST(PhTreeMMDTest, TestEraseByIterator) { const dimension_t dim = 3; TestTree tree; @@ -585,12 +649,16 @@ TEST(PhTreeMMDTest, TestExtent) { template struct FilterEvenId { - [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const T& value) const { - return value._i % 2 == 0; + template + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const BucketT&) const { + return true; } [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) const { return true; } + [[nodiscard]] constexpr bool IsBucketEntryValid(const PhPoint&, const T& value) const { + return value._i % 2 == 0; + } }; TEST(PhTreeMMDTest, TestExtentFilter) { @@ -782,7 +850,7 @@ TEST(PhTreeMMDTest, TestWindowQueryManyMoving) { size_t nn = 0; for (int i = -120; i < 120; i++) { TestPoint min{i * 10., i * 9., i * 11.}; - TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + TestPoint max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -813,7 +881,7 @@ TEST(PhTreeMMDTest, TestWindowForEachQueryManyMoving) { size_t nn = 0; for (int i = -120; i < 120; i++) { TestPoint min{i * 10., i * 9., i * 11.}; - TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + TestPoint max{i * 10. + query_length, i * 9. + query_length, i * 11. + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); @@ -1102,3 +1170,112 @@ TEST(PhTreeMMDTest, SmokeTestTreeAPI) { treePtr.clear(); delete idPtr; } + +template +void test_tree(TREE& tree) { + PhPointD<3> p{1, 2, 3}; + + // test various operations + tree.emplace(p, Id{2}); + Id id3{3}; + tree.insert(p, id3); + ASSERT_EQ(tree.size(), 3); + ASSERT_EQ(tree.count(p), 3); + ASSERT_EQ(tree.find(p, Id(1))->_i, 1); + ASSERT_EQ(tree.find(p, Id(2))->_i, 2); + ASSERT_EQ(tree.find(p, Id(3))->_i, 3); + + auto q_window = tree.begin_query({p, p}); + std::set wq_result; + wq_result.emplace(q_window->_i); + ++q_window; + wq_result.emplace(q_window->_i); + ++q_window; + wq_result.emplace(q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + ASSERT_EQ(3, wq_result.size()); + + auto q_extent = tree.begin(); + std::set eq_result; + eq_result.emplace(q_extent->_i); + ++q_extent; + eq_result.emplace(q_extent->_i); + ++q_extent; + eq_result.emplace(q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + ASSERT_EQ(3, eq_result.size()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + std::set knn_result; + knn_result.emplace(q_knn->_i); + ++q_knn; + knn_result.emplace(q_knn->_i); + ++q_knn; + knn_result.emplace(q_knn->_i); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + ASSERT_EQ(3, knn_result.size()); + + ASSERT_EQ(1, tree.erase(p, Id{1})); + ASSERT_EQ(2, tree.size()); + ASSERT_EQ(0, tree.erase(p, Id{1})); + ASSERT_EQ(2, tree.size()); + ASSERT_EQ(1, tree.erase(p, Id{2})); + ASSERT_EQ(1, tree.erase(p, Id{3})); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeMMDTest, TestMoveConstruct) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + PhTreeMultiMapD<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{std::move(tree1)}; + test_tree(tree); + tree.~PhTreeMultiMap(); +} + +TEST(PhTreeMMDTest, TestMoveAssign) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + PhTreeMultiMapD<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{}; + tree = std::move(tree1); + test_tree(tree); + tree.~PhTreeMultiMap(); +} + +TEST(PhTreeMMDTest, TestMovableIterators) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.begin(), tree.end()); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.find(p), tree.end()); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterMultiMapAABB filter(p, p, tree.converter()); + ASSERT_TRUE(std::is_move_constructible_v); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v()))>); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v()))>); +} \ No newline at end of file diff --git a/test/phtree_multimap_d_test_copy_move.cc b/test/phtree_multimap_d_test_copy_move.cc new file mode 100644 index 00000000..49f307e9 --- /dev/null +++ b/test/phtree_multimap_d_test_copy_move.cc @@ -0,0 +1,323 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree_multimap.h" +#include +#include + +using namespace improbable::phtree; + +// Number of entries that have the same coordinate +static const size_t NUM_DUPL = 4; +static const double WORLD_MIN = -1000; +static const double WORLD_MAX = 1000; + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeMultiMap>; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct IdCopyOnly { + explicit IdCopyOnly(const size_t i) : _i{static_cast(i)} {} + + IdCopyOnly() = default; + IdCopyOnly(const IdCopyOnly& other) = default; + IdCopyOnly(IdCopyOnly&& other) = delete; + IdCopyOnly& operator=(const IdCopyOnly& other) = default; + IdCopyOnly& operator=(IdCopyOnly&& other) = delete; + ~IdCopyOnly() = default; + + bool operator==(const IdCopyOnly& rhs) const { + return _i == rhs._i; + } + + int _i{}; + int _data{}; +}; + +struct IdMoveOnly { + explicit IdMoveOnly(const size_t i) : _i{i} {} + + IdMoveOnly() = default; + IdMoveOnly(const IdMoveOnly& other) = delete; + IdMoveOnly(IdMoveOnly&& other) = default; + IdMoveOnly& operator=(const IdMoveOnly& other) = delete; + IdMoveOnly& operator=(IdMoveOnly&& other) = default; + ~IdMoveOnly() = default; + + bool operator==(const IdMoveOnly& rhs) const { + return _i == rhs._i; + } + + size_t _i{}; + int _data{}; +}; + +// Assert that copy-ctr is not called even when available +struct IdCopyOrMove { + explicit IdCopyOrMove(const size_t i) : _i{i} {} + + IdCopyOrMove() = default; + IdCopyOrMove(const IdCopyOrMove&) { + assert(false); + } + IdCopyOrMove(IdCopyOrMove&& other) = default; + IdCopyOrMove& operator=(const IdCopyOrMove&) { + assert(false); + } + IdCopyOrMove& operator=(IdCopyOrMove&& other) = default; + ~IdCopyOrMove() = default; + + bool operator==(const IdCopyOrMove& rhs) const { + return _i == rhs._i; + } + + size_t _i{}; + int _data{}; +}; + +namespace std { +template <> +struct hash { + size_t operator()(const IdCopyOnly& x) const { + return std::hash{}(x._i); + } +}; +template <> +struct hash { + size_t operator()(const IdMoveOnly& x) const { + return std::hash{}(x._i); + } +}; +template <> +struct hash { + size_t operator()(const IdCopyOrMove& x) const { + return std::hash{}(x._i); + } +}; +}; // namespace std + +struct IdHash { + template + std::size_t operator()(std::pair const& v) const { + return std::hash()(v.size()); + } +}; + +template +void generateCube(std::vector>& points, size_t N) { + assert(N % NUM_DUPL == 0); + DoubleRng rng(WORLD_MIN, WORLD_MAX); + auto reference_set = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N / NUM_DUPL; i++) { + // create duplicates, i.e. entries with the same coordinates. However, avoid unintentional + // duplicates. + TestPoint key{}; + for (dimension_t d = 0; d < DIM; ++d) { + key[d] = rng.next(); + } + if (reference_set.count(key) != 0) { + i--; + continue; + } + reference_set.emplace(key, i); + for (size_t dupl = 0; dupl < NUM_DUPL; dupl++) { + auto point = TestPoint(key); + points.push_back(point); + } + } + ASSERT_EQ(reference_set.size(), N / NUM_DUPL); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps_QueryAndErase(TestTree& tree, std::vector>& points) { + size_t N = points.size(); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + for (size_t j = 0; j < NUM_DUPL; j++) { + ASSERT_EQ(i / NUM_DUPL, (*q)._i / NUM_DUPL); + q++; + } + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Id id(i); + ASSERT_NE(tree.find(p), tree.end()); + size_t expected_remaining = (N - i - 1) % NUM_DUPL + 1; + ASSERT_EQ(tree.count(p), expected_remaining); + ASSERT_EQ(i, tree.find(p, id)->_i); + if (i % 2 == 0) { + ASSERT_EQ(1, tree.erase(p, id)); + } else { + auto iter = tree.find(p, id); + ASSERT_EQ(1, tree.erase(iter)); + } + + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1, tree.size()); + + // try remove again + ASSERT_EQ(0, tree.erase(p, id)); + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +template +void SmokeTestBasicOps(size_t N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_LE(tree.count(p), i % NUM_DUPL); + if (i % NUM_DUPL == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + + Id id(i); + if (i % 4 == 0) { + ASSERT_TRUE(tree.emplace(p, id).second); + } else if (i % 4 == 1) { + ASSERT_TRUE(tree.insert(p, id).second); + } else { + ASSERT_TRUE(tree.try_emplace(p, id).second); + } + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.insert(p, id).second); + ASSERT_FALSE(tree.emplace(p, id).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(id._i, tree.find(p, id)->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + SmokeTestBasicOps_QueryAndErase(tree, points); +} + +TEST(PhTreeMMDTestCopyMove, SmokeTestBasicOps) { + SmokeTestBasicOps<1, IdCopyOnly>(100); + SmokeTestBasicOps<3, IdCopyOnly>(100); + SmokeTestBasicOps<6, IdCopyOnly>(100); + SmokeTestBasicOps<10, IdCopyOnly>(100); + SmokeTestBasicOps<20, IdCopyOnly>(100); + SmokeTestBasicOps<63, IdCopyOnly>(100); +} + +template +void SmokeTestBasicOpsMoveOnly(size_t N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_LE(tree.count(p), i % NUM_DUPL); + if (i % NUM_DUPL == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, Id(i)).second); + } else { + ASSERT_TRUE(tree.try_emplace(p, Id(i)).second); + } + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, tree.find(p, Id(i))->_i); + ASSERT_EQ(i + 1, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.try_emplace(p, Id(i)).second); + ASSERT_FALSE(tree.emplace(p, Id(i)).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, tree.find(p, Id(i))->_i); + ASSERT_EQ(i + 1, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + SmokeTestBasicOps_QueryAndErase(tree, points); +} + +TEST(PhTreeMMDTestCopyMove, SmokeTestBasicOpsMoveOnly) { + SmokeTestBasicOpsMoveOnly<1, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<3, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<6, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<10, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<20, IdMoveOnly>(100); + SmokeTestBasicOpsMoveOnly<63, IdMoveOnly>(100); +} + +TEST(PhTreeMMDTestCopyMove, SmokeTestBasicOpsCopyFails) { + SmokeTestBasicOpsMoveOnly<1, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<3, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<6, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<10, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<20, IdCopyOrMove>(100); + SmokeTestBasicOpsMoveOnly<63, IdCopyOrMove>(100); +} diff --git a/test/phtree_multimap_d_test_filter.cc b/test/phtree_multimap_d_test_filter.cc new file mode 100644 index 00000000..0fc5576d --- /dev/null +++ b/test/phtree_multimap_d_test_filter.cc @@ -0,0 +1,685 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree_multimap.h" +#include +#include +#include + +using namespace improbable::phtree; + +// Number of entries that have the same coordinate +static const size_t NUM_DUPL = 4; +[[maybe_unused]] static const double WORLD_MIN = -1000; +[[maybe_unused]] static const double WORLD_MAX = 1000; + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeMultiMap>; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + Id(Id const& rhs) = default; + Id(Id&& rhs) = default; + Id& operator=(Id const& rhs) = default; + Id& operator=(Id&& rhs) = default; + + int _i; +}; + +namespace std { +template <> +struct hash { + size_t operator()(const Id& x) const { + return std::hash{}(x._i); + } +}; +}; // namespace std + +struct IdHash { + template + std::size_t operator()(std::pair const& v) const { + return std::hash()(v.size()); + } +}; + +template +void generateCube(std::vector>& points, size_t N) { + assert(N % NUM_DUPL == 0); + DoubleRng rng(WORLD_MIN, WORLD_MAX); + auto reference_set = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N / NUM_DUPL; i++) { + // create duplicates, ie. entries with the same coordinates. However, avoid unintentional + // duplicates. + TestPoint key{}; + for (dimension_t d = 0; d < DIM; ++d) { + key[d] = rng.next(); + } + if (reference_set.count(key) != 0) { + i--; + continue; + } + reference_set.emplace(key, i); + for (size_t dupl = 0; dupl < NUM_DUPL; dupl++) { + auto point = TestPoint(key); + points.push_back(point); + } + } + ASSERT_EQ(reference_set.size(), N / NUM_DUPL); + ASSERT_EQ(points.size(), N); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +static int f_default_construct_ = 0; +static int f_construct_ = 0; +static int f_copy_construct_ = 0; +static int f_move_construct_ = 0; +static int f_copy_assign_ = 0; +static int f_move_assign_ = 0; +static int f_destruct_ = 0; + +static void f_reset_id_counters() { + f_default_construct_ = 0; + f_construct_ = 0; + f_copy_construct_ = 0; + f_move_construct_ = 0; + f_copy_assign_ = 0; + f_move_assign_ = 0; + f_destruct_ = 0; +} + +template +struct FilterCount { + FilterCount() : last_known{} { + ++f_default_construct_; + } + + explicit FilterCount(const T i) : last_known{i} { + ++f_construct_; + } + + FilterCount(const FilterCount& other) { + ++f_copy_construct_; + last_known = other.last_known; + } + + FilterCount(FilterCount&& other) noexcept { + ++f_move_construct_; + last_known = other.last_known; + } + + FilterCount& operator=(const FilterCount& other) noexcept { + ++f_copy_assign_; + last_known = other.last_known; + return *this; + } + FilterCount& operator=(FilterCount&& other) noexcept { + ++f_move_assign_; + last_known = other.last_known; + return *this; + } + + ~FilterCount() { + ++f_destruct_; + } + + template + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const BucketT& bucket) { + assert(!bucket.empty()); + return true; + } + + template + [[nodiscard]] bool IsBucketEntryValid(const PhPoint&, const T2& value) { + last_known = value; + return true; + } + + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) { + return true; + } + + T last_known; +}; + +template +struct DistanceCount { + DistanceCount() { + ++f_default_construct_; + } + + DistanceCount(const DistanceCount&) { + ++f_copy_construct_; + } + + DistanceCount(DistanceCount&&) noexcept { + ++f_move_construct_; + } + + DistanceCount& operator=(const DistanceCount&) noexcept { + ++f_copy_assign_; + return *this; + } + DistanceCount& operator=(DistanceCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~DistanceCount() { + ++f_destruct_; + } + + double operator()(const PhPointD& p1, const PhPointD& p2) const { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); + }; +}; + +static size_t static_id = 0; + +template +struct CallbackCount { + CallbackCount() { + static_id = 0; + ++f_default_construct_; + } + + CallbackCount(const CallbackCount&) { + ++f_copy_construct_; + } + + CallbackCount(CallbackCount&&) noexcept { + ++f_move_construct_; + } + + CallbackCount& operator=(const CallbackCount&) noexcept { + ++f_copy_assign_; + return *this; + } + CallbackCount& operator=(CallbackCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~CallbackCount() { + ++f_destruct_; + } + + void operator()(const TestPoint, const Id& t) { + static_id = t._i; + } +}; + +template +struct FilterConst { + template + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const BucketT&) const { + return true; + } + [[nodiscard]] constexpr bool IsBucketEntryValid(const PhPoint&, const T& value) { + assert(value._i == 1); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) const { + return true; + } +}; + +template +struct CallbackConst { + void operator()(const TestPoint, const Id& t) const { + static_id = t._i; + } +}; + +[[maybe_unused]] static void print_id_counters() { + std::cout << "dc=" << f_default_construct_ << " c=" << f_construct_ + << " cc=" << f_copy_construct_ << " mc=" << f_move_construct_ + << " ca=" << f_copy_assign_ << " ma=" << f_move_assign_ << " d=" << f_destruct_ + << std::endl; +} + +/* + * General comment: We are testing several thing here. + * - If we pass lvalue filters/callbacks/... we want to ensure that they do not get copied or + * moved at all. We need to ensure that the lvalue argument is the same instance that is + * used internally by the iterator. + * - If we pass a rvalue filters/callbacks/..., preventing copies/moves is harder. We are testing + * somewhat arbitrarily for a limit of 3 moves/copies per argument. + * - We want to ensure that both rvalue/lvalue arguments work. + * - We also do some limited testing that it works with 'const' trees. + * - Finally, we test separately that the old legacy filters still work + */ + +TEST(PhTreeTest, TestFilterAPI_FOR_EACH) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // rvalue + tree.for_each(callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(filter.last_known._i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // lvalue + tree.for_each(CallbackCount<3>(), FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_GE(4, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + CallbackCount<3> callbackC; + FilterConst<3, Id> filterC; + treeC.for_each(callbackC, filterC); + // rvalue + treeC.for_each(CallbackConst<3>{}, FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_FOR_EACH_WQ) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // lvalue + tree.for_each(qb, callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(filter.last_known._i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + tree.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackCount<3>{}, FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_GE(4, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + treeC.for_each(qb, callback, filterC); + // rvalue + treeC.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackConst<3>(), FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_BEGIN) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin(filter)->_i, 1); + ASSERT_EQ(filter.last_known._i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_GE(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin(FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_GE(2, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin(filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin(FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_WQ) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin_query(qb, filter)->_i, 1); + ASSERT_EQ(filter.last_known._i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_query({{1, 2, 3}, {4, 5, 6}}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_GE(2, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_query(qb, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_query(qb, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_KNN) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + DistanceCount<3> dist_fn{}; + // lvalue + ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, dist_fn, filter)->_i, 1); + ASSERT_EQ(filter.last_known._i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_GE(2 * 3, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue #2 + auto a = tree.begin_knn_query, FilterCount<3, Id>>(3, {2, 3, 4})->_i; + ASSERT_EQ(a, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_GE(2 * 3, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, dist_fn, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); +}; + +template +void referenceSphereQuery( + std::vector>& points, + TestPoint& center, + double radius, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + if (distance(center, p) <= radius) { + result.insert(i); + } + } +} + +template +void referenceAABBQuery( + std::vector>& points, + TestPoint& center, + double radius, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + bool inside = true; + for (dimension_t i = 0; i < DIM; ++i) { + inside &= std::abs(p[i] - center[i]) <= radius; + } + + if (inside) { + result.insert(i); + } + } +} + +template +PhBoxD QueryBox(PhPointD& center, double radius) { + typename TestTree::QueryBox query_box{ + {center[0] - radius, center[1] - radius, center[2] - radius}, + {center[0] + radius, center[1] + radius, center[2] + radius}}; + return query_box; +} + +// We use 'int&' because gtest does not compile with assertions in non-void functions. +template +void testSphereQuery(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto filter = FilterMultiMapSphere(center, radius, tree.converter()); + for (auto it = tree.begin(filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testSphereQueryWithBox(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterMultiMapSphere(center, radius, tree.converter()); + for (auto it = tree.begin_query(query_box, filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testSphereQueryForEach(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto filter = FilterMultiMapSphere(center, radius, tree.converter()); + auto callback = [&result, &referenceResult](PhPointD, const size_t& x) { + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + ++result; + }; + tree.for_each(callback, filter); + ASSERT_EQ(referenceResult.size(), result); +} +template +void testSphereQueryForEachQueryBox(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterMultiMapSphere(center, radius, tree.converter()); + auto callback = [&result, &referenceResult](PhPointD, const size_t& x) { + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + ++result; + }; + tree.for_each(query_box, callback, filter); + ASSERT_EQ(referenceResult.size(), result); +} + +template +void testAABBQuery(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceAABBQuery(points, center, radius, referenceResult); + + result = 0; + auto query_box = QueryBox(center, radius); + auto filter = FilterMultiMapAABB(query_box.min(), query_box.max(), tree.converter()); + for (auto it = tree.begin(filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +template +void Query0(QUERY query) { + TestPoint p{-10000, -10000, -10000}; + int n = 0; + query(p, 0.1, 100, n); + ASSERT_EQ(0, n); +} + +template +void QueryMany(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 1000, 1000, n); + ASSERT_GT(n, 400); + ASSERT_LT(n, 800); +} + +template +void QueryManyAABB(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 1000, 1000, n); + ASSERT_EQ(n, 1000); +} + +template +void QueryAll(QUERY query) { + TestPoint p{0, 0, 0}; + int n = 0; + query(p, 10000, 1000, n); + ASSERT_EQ(1000, n); +} + +TEST(PhTreeMMDFilterTest, TestSphereQuery) { + Query0<3>(&testSphereQuery<3>); + QueryMany<3>(&testSphereQuery<3>); + QueryAll<3>(&testSphereQuery<3>); +} + +TEST(PhTreeMMDFilterTest, TestSphereQueryWithQueryBox) { + Query0<3>(&testSphereQueryWithBox<3>); + QueryMany<3>(&testSphereQueryWithBox<3>); + QueryAll<3>(&testSphereQueryWithBox<3>); +} + +TEST(PhTreeMMDFilterTest, TestSphereQueryForEach) { + Query0<3>(&testSphereQueryForEach<3>); + QueryMany<3>(&testSphereQueryForEach<3>); + QueryAll<3>(&testSphereQueryForEach<3>); +} + +TEST(PhTreeMMDFilterTest, TestSphereQueryForEachWithQueryBox) { + Query0<3>(&testSphereQueryForEachQueryBox<3>); + QueryMany<3>(&testSphereQueryForEachQueryBox<3>); + QueryAll<3>(&testSphereQueryForEachQueryBox<3>); +} + +TEST(PhTreeMMDFilterTest, TestAABBQuery) { + Query0<3>(&testAABBQuery<3>); + QueryManyAABB<3>(&testAABBQuery<3>); + QueryAll<3>(&testAABBQuery<3>); +} diff --git a/test/phtree_multimap_d_test_unique_ptr_values.cc b/test/phtree_multimap_d_test_unique_ptr_values.cc new file mode 100644 index 00000000..28c31c2f --- /dev/null +++ b/test/phtree_multimap_d_test_unique_ptr_values.cc @@ -0,0 +1,377 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree_multimap.h" +#include +#include + +using namespace improbable::phtree; + +// Number of entries that have the same coordinate +static const size_t NUM_DUPL = 4; +static const double WORLD_MIN = -1000; +static const double WORLD_MAX = 1000; + +template +using TestPoint = PhPointD; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct IdObj { + IdObj() = default; + + explicit IdObj(const int i) : _i(i), data_{0} {}; + explicit IdObj(const size_t i) : _i(static_cast(i)), data_{0} {}; + + bool operator==(const IdObj& rhs) const noexcept { + return _i == rhs._i; + } + + int _i; + int data_; +}; + +using Id = std::unique_ptr; + +namespace std { +template <> +struct hash { + size_t operator()(const Id& x) const { + return std::hash{}(x->_i); + } +}; +}; // namespace std +struct equal_to_content { + bool operator()(const Id& x1, const Id& x2) const { + return (*x1) == (*x2); + } +}; +struct less_content { + bool operator()(const Id& x1, const Id& x2) const { + return (*x1)._i < (*x2)._i; + } +}; + +template +using TestTree = PhTreeMultiMap< + DIM, + T, + ConverterIEEE, + b_plus_tree_hash_set, equal_to_content>>; +// using TestTree = PhTreeMultiMap, std::unordered_set, +// equal_to_content>>; using TestTree = PhTreeMultiMap, std::set>; + +template +void generateCube(std::vector>& points, size_t N) { + assert(N % NUM_DUPL == 0); + DoubleRng rng(WORLD_MIN, WORLD_MAX); + auto reference_set = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N / NUM_DUPL; i++) { + // create duplicates, i.e. entries with the same coordinates. However, avoid unintentional + // duplicates. + TestPoint key{}; + for (dimension_t d = 0; d < DIM; ++d) { + key[d] = rng.next(); + } + if (reference_set.count(key) != 0) { + i--; + continue; + } + reference_set.emplace(key, i); + for (size_t dupl = 0; dupl < NUM_DUPL; dupl++) { + auto point = TestPoint(key); + points.emplace_back(point); + } + } + ASSERT_EQ(reference_set.size(), N / NUM_DUPL); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps(int N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_LE(tree.count(p), i % NUM_DUPL); + if (i % NUM_DUPL == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + + Id id2(new IdObj{i}); + // Id id3(new IdObj{i}); + // ASSERT_EQ(id2.get(), id3.get()); + // ASSERT_TRUE(id2 == id3); + // ASSERT_EQ(id2, id3); + if (i % 4 == 0) { + ASSERT_TRUE(tree.emplace(p, std::make_unique(i)).second); + } else if (i % 4 == 1) { + ASSERT_TRUE(tree.emplace(p, new IdObj{i}).second); + } else if (i % 4 == 2) { + ASSERT_TRUE(tree.try_emplace(p, new IdObj{i}).second); + } else { + Id id = std::make_unique(i); + ASSERT_TRUE(tree.emplace(p, std::move(id)).second); + } + Id id = std::make_unique(i); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, (*tree.find(p, id))->_i); + ASSERT_EQ(i + 1u, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.try_emplace(p, std::make_unique(i)).second); + ASSERT_FALSE(tree.emplace(p, std::make_unique(i)).second); + ASSERT_EQ(tree.count(p), i % NUM_DUPL + 1); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, (*tree.find(p, std::make_unique(i)))->_i); + ASSERT_EQ(i + 1u, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + for (size_t j = 0; j < NUM_DUPL; j++) { + ASSERT_EQ(i / NUM_DUPL, (*q)->_i / NUM_DUPL); + q++; + } + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + size_t expected_remaining = (N - i - 1) % NUM_DUPL + 1; + ASSERT_EQ(tree.count(p), expected_remaining); + ASSERT_EQ(i, (*tree.find(p, std::make_unique(i)))->_i); + if (i % 3 == 0) { + ASSERT_EQ(1u, tree.erase(p, std::make_unique(i))); + } else { + auto iter = tree.find(p, std::make_unique(i)); + ASSERT_EQ(1u, tree.erase(iter)); + } + + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1u, tree.size()); + + // try remove again + ASSERT_EQ(0u, tree.erase(p, std::make_unique(i))); + ASSERT_EQ(tree.count(p), expected_remaining - 1); + if (expected_remaining - 1 == 0) { + ASSERT_EQ(tree.end(), tree.find(p)); + } + ASSERT_EQ(N - i - 1u, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeMMDTestUniquePtr, SmokeTestBasicOps) { + SmokeTestBasicOps<1>(10000); + SmokeTestBasicOps<3>(10000); + SmokeTestBasicOps<6>(10000); + SmokeTestBasicOps<10>(1000); + SmokeTestBasicOps<20>(100); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.emplace(points[i], std::make_unique(i)).second); + } + ASSERT_EQ(N, tree.size()); +} + +TEST(PhTreeMMDTestUniquePtr, TestUpdateWithRelocate) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 1000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + for (auto delta : deltas) { + int i = 0; + for (auto& p : points) { + auto pOld = p; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + ASSERT_EQ(1u, tree.relocate(pOld, pNew, std::make_unique(i))); + if (delta > 0) { + // second time fails because value has already been moved + ASSERT_EQ(0u, tree.relocate(pOld, pNew, std::make_unique(i))); + } + ASSERT_EQ(i, (*tree.find(pNew, std::make_unique(i)))->_i); + p = pNew; + ++i; + } + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeMMDTestUniquePtr, TestUpdateWithRelocateCornerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{1, 2, 3}; + TestPoint point1{4, 5, 6}; + + // Check that empty tree works + ASSERT_EQ(0u, tree.relocate(point0, point1, std::make_unique(42))); + + // Check that small tree works + tree.emplace(point0, std::make_unique(1)); + ASSERT_EQ(1u, tree.relocate(point0, point1, std::make_unique(1))); + ASSERT_EQ(tree.end(), tree.find(point0, std::make_unique(1))); + ASSERT_EQ(1, (*tree.find(point1, std::make_unique(1)))->_i); + ASSERT_EQ(1u, tree.size()); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that existing destination fails + tree.emplace(point0, std::make_unique(1)); + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, std::make_unique(1))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source bucket fails + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, std::make_unique(0))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket exists) + tree.emplace(point0, std::make_unique(0)); + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate(point0, point1, std::make_unique(2))); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket missing) + tree.emplace(point0, std::make_unique(0)); + ASSERT_EQ(0u, tree.relocate(point0, point1, std::make_unique(2))); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeMMDTestUniquePtr, TestUpdateWithRelocateIf) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + for (auto delta : deltas) { + size_t done = 0; + for (int i = 0; size_t(i) < N; ++i) { + auto pred = [&i](const Id& id) { return id->_i == i; }; + auto pOld = points[i]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + ASSERT_EQ(1u, tree.relocate_if(pOld, pNew, pred)); + if (delta > 0) { + // second time fails because value has already been moved + ASSERT_EQ(0u, tree.relocate_if(pOld, pNew, pred)); + } + ASSERT_EQ(i, (*tree.find(pNew, std::make_unique(i)))->_i); + ++done; + points[i] = pNew; + } + ASSERT_EQ(done, N); + PhTreeDebugHelper::CheckConsistency(tree); + } + ASSERT_EQ(N, tree.size()); + tree.clear(); +} + +TEST(PhTreeMMDTestUniquePtr, TestUpdateWithRelocateIfCornerCases) { + const dimension_t dim = 3; + TestTree tree; + TestPoint point0{1, 2, 3}; + TestPoint point1{4, 5, 6}; + auto TRUE = [](const Id&) { return true; }; + auto TWO = [](const Id& id) { return id->_i == 2; }; + + // Check that empty tree works + ASSERT_EQ(0u, tree.relocate_if(point0, point1, TRUE)); + + // Check that small tree works + tree.emplace(point0, std::make_unique(1)); + ASSERT_EQ(1u, tree.relocate_if(point0, point1, TRUE)); + ASSERT_EQ(tree.end(), tree.find(point0)); + ASSERT_EQ(1, (*tree.find(point1))->_i); + ASSERT_EQ(1u, tree.size()); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that existing destination fails + tree.emplace(point0, std::make_unique(1)); + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate_if(point0, point1, TRUE)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source bucket fails + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate_if(point0, point1, TRUE)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket exists) + tree.emplace(point0, std::make_unique(0)); + tree.emplace(point1, std::make_unique(1)); + ASSERT_EQ(0u, tree.relocate_if(point0, point1, TWO)); + PhTreeDebugHelper::CheckConsistency(tree); + tree.clear(); + + // check that missing source value fails (target bucket missing) + tree.emplace(point0, std::make_unique(0)); + ASSERT_EQ(0u, tree.relocate_if(point0, point1, TWO)); + PhTreeDebugHelper::CheckConsistency(tree); +} diff --git a/phtree/phtree_test.cc b/test/phtree_test.cc similarity index 70% rename from phtree/phtree_test.cc rename to test/phtree_test.cc index fe323c39..51a1d5b5 100644 --- a/phtree/phtree_test.cc +++ b/test/phtree_test.cc @@ -15,7 +15,7 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; @@ -57,6 +57,13 @@ static void reset_id_counters() { destruct_count_ = 0; } +static void print_id_counters() { + std::cout << "dc=" << default_construct_count_ << " c=" << construct_count_ + << " cc=" << copy_construct_count_ << " mc=" << move_construct_count_ + << " ca=" << copy_assign_count_ << " ma=" << move_assign_count_ + << " d=" << destruct_count_ << std::endl; +} + struct Id { Id() : _i{0} { ++default_construct_count_; @@ -64,7 +71,7 @@ struct Id { explicit Id(const size_t i) : _i{static_cast(i)} { ++construct_count_; - }; + } Id(const Id& other) { ++copy_construct_count_; @@ -76,13 +83,18 @@ struct Id { _i = other._i; } - bool operator==(const Id& rhs) const { + Id& operator=(const Id& other) noexcept { ++copy_assign_count_; - return _i == rhs._i; + _i = other._i; + return *this; } - - bool operator==(Id&& rhs) const { + Id& operator=(Id&& other) noexcept { ++move_assign_count_; + _i = other._i; + return *this; + } + + bool operator==(const Id& rhs) const { return _i == rhs._i; } @@ -90,8 +102,6 @@ struct Id { ++destruct_count_; } - Id& operator=(Id const& rhs) = default; - int _i; }; @@ -110,7 +120,7 @@ template double distance(const TestPoint& p1, const TestPoint& p2) { double sum2 = 0; for (dimension_t i = 0; i < DIM; i++) { - double d = p1[i] - p2[i]; + double d = (double)p1[i] - (double)p2[i]; sum2 += d * d; } return sqrt(sum2); @@ -166,17 +176,19 @@ void SmokeTestBasicOps(size_t N) { ASSERT_EQ(tree.end(), tree.find(p)); Id id(i); - if (i % 2 == 0) { + if (i % 4 == 0) { ASSERT_TRUE(tree.emplace(p, i).second); - } else { + } else if (i % 4 == 1) { ASSERT_TRUE(tree.insert(p, id).second); + } else { + ASSERT_TRUE(tree.try_emplace(p, i).second); } ASSERT_EQ(tree.count(p), 1); ASSERT_NE(tree.end(), tree.find(p)); ASSERT_EQ(id._i, tree.find(p)->_i); ASSERT_EQ(i + 1, tree.size()); - // try add again + // try insert/emplace again ASSERT_FALSE(tree.insert(p, id).second); ASSERT_FALSE(tree.emplace(p, id).second); ASSERT_EQ(tree.count(p), 1); @@ -221,7 +233,9 @@ void SmokeTestBasicOps(size_t N) { ASSERT_TRUE(tree.empty()); PhTreeDebugHelper::CheckConsistency(tree); - ASSERT_EQ(construct_count_ + copy_construct_count_ + move_construct_count_, destruct_count_); + // Normal construction and destruction should be symmetric. Move-construction is ignored. + ASSERT_GE(construct_count_ + copy_construct_count_ + move_construct_count_, destruct_count_); + ASSERT_LE(construct_count_ + copy_construct_count_, destruct_count_); // The following assertions exist only as sanity checks and may need adjusting. // There is nothing fundamentally wrong if a change in the implementation violates // any of the following assertions, as long as performance/memory impact is observed. @@ -237,7 +251,10 @@ void SmokeTestBasicOps(size_t N) { // small node require a lot of copying/moving ASSERT_GE(construct_count_ * 3, move_construct_count_); } else { - ASSERT_GE(construct_count_ * 2, move_construct_count_); + if (construct_count_ * 15 < move_construct_count_) { + print_id_counters(); + } + ASSERT_GE(construct_count_ * 15, move_construct_count_); } } @@ -342,7 +359,7 @@ TEST(PhTreeTest, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -499,8 +516,8 @@ TEST(PhTreeTest, TestUpdateWithEmplace) { for (auto& p : points) { auto pOld = p; TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -526,8 +543,8 @@ TEST(PhTreeTest, TestUpdateWithEmplaceHint) { int delta = deltas[d_n]; TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; auto iter = tree.find(pOld); - int n = tree.erase(iter); - ASSERT_EQ(1, n); + size_t n = tree.erase(iter); + ASSERT_EQ(1u, n); tree.emplace_hint(iter, pNew, 42); ASSERT_EQ(1, tree.count(pNew)); if (delta != 0.0) { @@ -538,6 +555,147 @@ TEST(PhTreeTest, TestUpdateWithEmplaceHint) { ASSERT_EQ(N, tree.size()); tree.clear(); + + tree.emplace_hint(tree.end(), {11, 21, 31}, 421); + tree.emplace_hint(tree.begin(), {1, 2, 3}, 42); + ASSERT_EQ(2, tree.size()); +} + +TEST(PhTreeTest, TestUpdateWithTryEmplaceHint) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + int delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + auto iter = tree.find(pOld); + size_t n = tree.erase(iter); + ASSERT_EQ(1u, n); + tree.try_emplace(iter, pNew, 42); + ASSERT_EQ(1, tree.count(pNew)); + if (delta != 0.0) { + ASSERT_EQ(0, tree.count(pOld)); + } + p = pNew; + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + tree.try_emplace(tree.end(), {11, 21, 31}, 421); + tree.try_emplace(tree.begin(), {1, 2, 3}, 42); + ASSERT_EQ(2, tree.size()); +} + +TEST(PhTreeTest, TestUpdateWithRelocate) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + size_t i = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if (delta > 0.0 && tree.find(pNew) != tree.end()) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } else { + ASSERT_EQ(1, tree.relocate(pOld, pNew)); + if (delta > 0.0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } + ASSERT_EQ(Id(i), *tree.find(pNew)); + p = pNew; + } + ++i; + } + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + ASSERT_EQ(0, tree.relocate(points[0], points[1])); + // Check that small tree works + tree.emplace(points[0], 1); + ASSERT_EQ(1, tree.relocate(points[0], points[1])); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(Id(1), *tree.find(points[1])); + ASSERT_EQ(1, tree.size()); + tree.clear(); + + // check that existing destination fails + tree.emplace(points[0], 1); + tree.emplace(points[1], 2); + ASSERT_EQ(0, tree.relocate(points[0], points[1])); +} + +TEST(PhTreeTest, TestUpdateWithRelocateIf) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + size_t i = 0; + size_t done = 0; + auto pred = [](const Id& id) { return id._i % 2 == 0; }; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if ((delta > 0.0 && tree.find(pNew) != tree.end()) || (i % 2 != 0)) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } else { + ASSERT_EQ(1, tree.relocate_if(pOld, pNew, pred)); + if (delta > 0.0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } + ASSERT_EQ(Id(i), *tree.find(pNew)); + p = pNew; + ++done; + } + ++i; + } + ASSERT_GT(done, i * 0.4); + ASSERT_LT(done, i * 0.6); + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + auto pred = [](const Id&) { return true; }; + ASSERT_EQ(0, tree.relocate_if(points[0], points[1], pred)); + // Check that small tree works + tree.emplace(points[0], 1); + ASSERT_EQ(1, tree.relocate_if(points[0], points[1], pred)); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(Id(1), *tree.find(points[1])); + ASSERT_EQ(1, tree.size()); } TEST(PhTreeTest, TestEraseByIterator) { @@ -551,10 +709,13 @@ TEST(PhTreeTest, TestEraseByIterator) { for (auto& p : points) { auto iter = tree.find(p); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); ASSERT_EQ(tree.end(), tree.find(p)); i++; + if (i % 100 == 0 || tree.size() < 10) { + PhTreeDebugHelper::CheckConsistency(tree); + } } ASSERT_EQ(0, tree.erase(tree.end())); @@ -570,8 +731,11 @@ TEST(PhTreeTest, TestEraseByIteratorQuery) { for (size_t i = 0; i < N; ++i) { auto iter = tree.begin(); ASSERT_NE(tree.end(), iter); - int count = tree.erase(iter); - ASSERT_EQ(1, count); + size_t count = tree.erase(iter); + ASSERT_EQ(1u, count); + if (i % 100 == 0 || tree.size() < 10) { + PhTreeDebugHelper::CheckConsistency(tree); + } } ASSERT_EQ(0, tree.erase(tree.end())); @@ -717,6 +881,32 @@ TEST(PhTreeTest, TestWindowQuery1) { ASSERT_EQ(N, n); } +TEST(PhTreeTest, TestWindowQuery1_WithFilter) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + struct Counter { + void operator()(TestPoint, Id& t) { + ++n_; + id_ = t; + } + Id id_{}; + size_t n_ = 0; + }; + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Counter callback{}; + FilterAABB filter(p, p, tree.converter()); + tree.for_each(callback, filter); + ASSERT_EQ(i, callback.id_._i); + ASSERT_EQ(1, callback.n_); + } +} + TEST(PhTreeTest, TestWindowQueryMany) { const dimension_t dim = 3; TestPoint min{-100, -100, -100}; @@ -746,7 +936,7 @@ TEST(PhTreeTest, TestWindowQueryManyMoving) { int query_length = 200; size_t nn = 0; - for (int i = -120; i < 120; i++) { + for (std::int64_t i = -120; i < 120; i++) { TestPoint min{i * 10, i * 9, i * 11}; TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; std::set referenceResult; @@ -780,7 +970,7 @@ TEST(PhTreeTest, TestWindowForEachManyMoving) { int query_length = 200; size_t nn = 0; - for (int i = -120; i < 120; i++) { + for (std::int64_t i = -120; i < 120; i++) { TestPoint min{i * 10, i * 9, i * 11}; TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; std::set referenceResult; @@ -1030,3 +1220,144 @@ TEST(PhTreeTest, SmokeTestPoint1) { ASSERT_EQ(0, tree.size()); ASSERT_TRUE(tree.empty()); } + +template +void test_tree(TREE& tree) { + PhPoint<3> p{1, 2, 3}; + + // test various operations + tree.emplace(p, Id{2}); // already exists + Id id3{3}; + tree.insert(p, id3); // already exists + ASSERT_EQ(tree.size(), 1); + ASSERT_EQ(tree.find(p).second()._i, 1); + ASSERT_EQ(tree[p]._i, 1); + + auto q_window = tree.begin_query({p, p}); + ASSERT_EQ(1, q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(1, q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + ASSERT_EQ(1, q_knn->_i); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(1, tree.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeTest, TestMoveConstruct) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{std::move(tree1)}; + test_tree(tree); + tree.~PhTree(); +} + +TEST(PhTreeTest, TestMoveAssign) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{}; + tree = std::move(tree1); + test_tree(tree); + tree.~PhTree(); +} + +size_t count_pre{0}; +size_t count_post{0}; +size_t count_query{0}; + +template +struct DebugConverterNoOp : public ConverterPointBase { + using BASE = ConverterPointBase; + using Point = typename BASE::KeyExternal; + using PointInternal = typename BASE::KeyInternal; + + constexpr const PointInternal& pre(const Point& point) const { + ++count_pre; + ++const_cast(count_pre_local); + return point; + } + + constexpr const Point& post(const PointInternal& point) const { + ++count_post; + ++const_cast(count_post_local); + return point; + } + + constexpr const PhBox& pre_query(const PhBox& box) const { + ++count_query; + ++const_cast(count_query_local); + return box; + } + + size_t count_pre_local{0}; + size_t count_post_local{0}; + size_t count_query_local{0}; +}; + +TEST(PhTreeTest, TestMoveAssignCustomConverter) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + auto converter = DebugConverterNoOp<3>(); + auto tree1 = PhTree<3, Id, DebugConverterNoOp<3>>(converter); + tree1.emplace(p, Id{1}); + ASSERT_GE(tree1.converter().count_pre_local, 1); + ASSERT_EQ(tree1.converter().count_pre_local, count_pre); + + PhTree<3, Id, DebugConverterNoOp<3>> tree{}; + tree = std::move(tree1); + // Assert that converter got moved (or copied?): + ASSERT_GE(tree.converter().count_pre_local, 1); + ASSERT_EQ(tree.converter().count_pre_local, count_pre); + + test_tree(tree); + ASSERT_GE(tree.converter().count_pre_local, 2); + ASSERT_EQ(tree.converter().count_pre_local, count_pre); + tree.~PhTree(); +} + +TEST(PhTreeTest, TestMovableIterators) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.begin(), tree.end()); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.find(p), tree.end()); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterEvenId<3, Id> filter{}; + ASSERT_TRUE(std::is_move_constructible_v); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v()))>); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v()))>); +} \ No newline at end of file diff --git a/phtree/phtree_test_const_values.cc b/test/phtree_test_const_values.cc similarity index 98% rename from phtree/phtree_test_const_values.cc rename to test/phtree_test_const_values.cc index 2fcb123e..bcce72bc 100644 --- a/phtree/phtree_test_const_values.cc +++ b/test/phtree_test_const_values.cc @@ -15,7 +15,7 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; @@ -42,14 +42,12 @@ class IntRng { struct Id { Id() = default; - explicit Id(const int i) : _i(i){}; + explicit Id(const size_t i) : _i{static_cast(i)} {} - bool operator==(Id& rhs) { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; }; @@ -68,7 +66,7 @@ template double distance(const TestPoint& p1, const TestPoint& p2) { double sum2 = 0; for (dimension_t i = 0; i < DIM; i++) { - double d = p1[i] - p2[i]; + double d = (double)p1[i] - (double)p2[i]; sum2 += d * d; } return sqrt(sum2); @@ -276,7 +274,7 @@ TEST(PhTreeTestConst, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id id2(-i); + Id id2(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first._i); ASSERT_EQ(tree.count(p), 1); @@ -409,8 +407,8 @@ TEST(PhTreeTestConst, TestUpdateWithEmplace) { for (auto& p : points) { auto pOld = p; TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; - int n = tree.erase(pOld); - ASSERT_EQ(1, n); + size_t n = tree.erase(pOld); + ASSERT_EQ(1u, n); tree.emplace(pNew, 42); ASSERT_EQ(1, tree.count(pNew)); ASSERT_EQ(0, tree.count(pOld)); @@ -590,7 +588,7 @@ TEST(PhTreeTestConst, TestWindowQueryManyMoving) { int query_length = 200; size_t nn = 0; - for (int i = -120; i < 120; i++) { + for (std::int64_t i = -120; i < 120; i++) { TestPoint min{i * 10, i * 9, i * 11}; TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; std::set referenceResult; diff --git a/test/phtree_test_issues.cc b/test/phtree_test_issues.cc new file mode 100644 index 00000000..a8f2b9d8 --- /dev/null +++ b/test/phtree_test_issues.cc @@ -0,0 +1,203 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "phtree/phtree.h" +#include "phtree/phtree_multimap.h" +#include +#include +#include +#include + +using namespace improbable::phtree; + + +using namespace std; + +#if defined(__clang__) || defined(__GNUC__) + +void mem_usage(double &vm_usage, double &resident_set) { + vm_usage = 0.0; + resident_set = 0.0; + ifstream stat_stream("/proc/self/stat", ios_base::in); //get info from proc directory + //create some variables to get info + string pid, comm, state, ppid, pgrp, session, tty_nr; + string tpgid, flags, minflt, cminflt, majflt, cmajflt; + string utime, stime, cutime, cstime, priority, nice; + string O, itrealvalue, starttime; + unsigned long vsize; + long rss; + stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr + >> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt + >> utime >> stime >> cutime >> cstime >> priority >> nice + >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest + stat_stream.close(); + long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // for x86-64 is configured to use 2MB pages + vm_usage = vsize / 1024.0; + resident_set = rss * page_size_kb; +} + +int get_resident_mem_kb() { + double vm, rss; + mem_usage(vm, rss); + return rss; +} + +void print_mem() { + double vm, rss; + mem_usage(vm, rss); + cout << " Virtual Memory: " << vm << " KB" << std::endl << " Resident set size: " << rss << " KB" << endl; +} + +#elif defined(_MSC_VER) +int get_resident_mem_kb() { + return 0; +} + +void print_mem() { + double vm = 0, rss = 0; + //mem_usage(vm, rss); + cout << " Virtual Memory: " << vm << " KB" << std::endl << " Resident set size: " << rss << " KB" << endl; +} +#endif + +auto start_timer() { + return std::chrono::steady_clock::now(); +} + +template +void end_timer(T start, const char *prefix) { + auto end = std::chrono::steady_clock::now(); + std::chrono::duration elapsed_seconds1 = end - start; + std::cout << "elapsed time " << prefix << " = " << elapsed_seconds1.count() << " s" << std::endl; +} + +// Disabled for cmake CI builds because it always fails +#if !defined(SKIP_TEST_MEMORY_LEAKS) +TEST(PhTreeTestIssues, TestIssue60) { + //auto tree = PhTreeMultiMapD<2, int>(); + auto tree = PhTreeMultiMapD<2, int, ConverterIEEE<2>, std::set>(); + std::vector> vecPos; + int dim = 1000; + int num = 1000; + + auto start1 = start_timer(); + for (int i = 0; i < num; ++i) { + PhPointD<2> p = {(double) (rand() % dim), (double) (rand() % dim)}; + vecPos.push_back(p); + tree.emplace(p, i); + } + end_timer(start1, "1"); + + // "warm up": relocate() will inevitably allocate a little bit of memory (new nodes etc). + // This warm up allocates this memory before we proceed to leak testing which ensures that the memory does not grow. + for (int j = 0; j < 100; ++j) { + for (int i = 0; i < num; ++i) { + PhPointD<2> &p = vecPos[i]; + PhPointD<2> newp = {(double) (rand() % dim), (double) (rand() % dim)}; + tree.relocate(p, newp, i); + p = newp; + } + } + + // Leak testing + print_mem(); + auto start2 = start_timer(); + auto mem_start_2 = get_resident_mem_kb(); + for (int j = 0; j < 100; ++j) { + for (int i = 0; i < num; ++i) { + PhPointD<2> &p = vecPos[i]; + PhPointD<2> newp = {(double) (rand() % dim), (double) (rand() % dim)}; + tree.relocate(p, newp, i); + p = newp; + } + } + end_timer(start2, "2"); + + auto mem_end_2 = get_resident_mem_kb(); + ASSERT_LT(abs(mem_end_2 - mem_start_2), 1); + print_mem(); +} +#endif + +// Disabled for cmake CI builds because it always fails +#if !defined(SKIP_TEST_MEMORY_LEAKS) +TEST(PhTreeTestIssues, TestIssue60_minimal) { + //auto tree = PhTreeMultiMapD<2, int>(); + auto tree = PhTreeMultiMapD<2, int, ConverterIEEE<2>, std::set>(); + std::vector> vecPos; + int dim = 1000; + int num = 1000; + + auto start1 = start_timer(); + for (int i = 0; i < num; ++i) { + PhPointD<2> p = {(double) (rand() % dim), (double) (rand() % dim)}; + vecPos.push_back(p); + tree.emplace(p, i); + } + end_timer(start1, "1"); + + // "warm up": relocate() will inevitably allocate a little bit of memory (new nodes etc). + // This warm up allocates this memory before we proceed to leak testing which ensures that the memory does not grow. + for (int j = 0; j < 100; ++j) { + for (int i = 0; i < num; ++i) { + PhPointD<2> &p = vecPos[i]; + PhPointD<2> newp = {(double) (rand() % dim), (double) (rand() % dim)}; + tree.relocate(p, newp, i); + p = newp; + } + } + + // Leak testing + print_mem(); + auto start2 = start_timer(); + auto mem_start_2 = get_resident_mem_kb(); + for (int j = 0; j < 100; ++j) { + for (int i = 0; i < num; ++i) { + PhPointD<2> &p = vecPos[i]; + PhPointD<2> newp = {p[0] + 1, p[1] + 1}; + tree.relocate(p, newp, i); + p = newp; + } + } + end_timer(start2, "2"); + + auto mem_end_2 = get_resident_mem_kb(); + ASSERT_LT(abs(mem_end_2 - mem_start_2), 1); + print_mem(); +} +#endif + +TEST(PhTreeTestIssues, TestIssue6_3_MAP) { + auto tree = PhTreeD<2, int>(); + std::vector> vecPos; + int dim = 10000; + + int num = 100000; + for (int i = 0; i < num; ++i) { + PhPointD<2> p = {(double) (rand() % dim), (double) (rand() % dim)}; + vecPos.push_back(p); + tree.emplace(p, i); + } + + print_mem(); + for (int i = 0; i < num; ++i) { + PhPointD<2> p = vecPos[i]; + PhPointD<2> newp = {(double) (rand() % dim), (double) (rand() % dim)}; + tree.relocate(p, newp); + } + print_mem(); +} + + diff --git a/phtree/phtree_test_ptr_values.cc b/test/phtree_test_ptr_values.cc similarity index 97% rename from phtree/phtree_test_ptr_values.cc rename to test/phtree_test_ptr_values.cc index a120ad1b..9ab74cf3 100644 --- a/phtree/phtree_test_ptr_values.cc +++ b/test/phtree_test_ptr_values.cc @@ -15,7 +15,7 @@ */ #include "phtree/phtree.h" -#include +#include #include using namespace improbable::phtree; @@ -44,12 +44,10 @@ struct Id { explicit Id(const size_t i) : _i((int)i){}; - bool operator==(Id& rhs) const { + bool operator==(const Id& rhs) const { return _i == rhs._i; } - Id& operator=(Id const& rhs) = default; - int _i; }; @@ -68,7 +66,7 @@ template double distance(const TestPoint& p1, const TestPoint& p2) { double sum2 = 0; for (dimension_t i = 0; i < DIM; i++) { - double d = p1[i] - p2[i]; + double d = (double)p1[i] - (double)p2[i]; sum2 += d * d; } return sqrt(sum2); @@ -286,7 +284,7 @@ TEST(PhTreeTestPtr, TestEmplace) { ASSERT_EQ(i + 1, tree.size()); // try add again, this should _not_ replace the existing value - Id* id2 = new Id(-i); + Id* id2 = new Id(i + N); ASSERT_EQ(false, tree.emplace(p, id2).second); ASSERT_EQ(i, tree.emplace(p, id).first->_i); ASSERT_EQ(tree.count(p), 1); @@ -296,11 +294,11 @@ TEST(PhTreeTestPtr, TestEmplace) { tree.emplace(p, id2).first->_i++; ASSERT_EQ(i + 1, tree.emplace(p, id).first->_i); tree.emplace(p, id2).first = id2; - ASSERT_EQ(-i, tree.emplace(p, id).first->_i); + ASSERT_EQ(i + N, tree.emplace(p, id).first->_i); // Replace it with previous value tree.emplace(p, id2).first = id; ASSERT_EQ(i + 1, tree.emplace(p, id).first->_i); - id->_i = i; + id->_i = (int)i; ASSERT_EQ(i, tree.emplace(p, id).first->_i); delete id2; } @@ -334,13 +332,13 @@ TEST(PhTreeTestPtr, TestSquareBrackets) { for (size_t i = 0; i < N; i++) { TestPoint& p = points.at(i); Id* id = new Id(i); - Id* id2 = new Id(-i); + Id* id2 = new Id(i + N); ASSERT_EQ(nullptr, tree[p]); tree[p] = id2; - ASSERT_EQ(-i, tree[p]->_i); + ASSERT_EQ(i + N, tree[p]->_i); ASSERT_EQ(tree.count(p), 1); if (i % 2 == 0) { - tree[p]->_i = i; + tree[p]->_i = (int)i; ASSERT_EQ(i, id2->_i); delete id; } else { @@ -666,9 +664,9 @@ TEST(PhTreeTestPtr, TestWindowQueryManyMoving) { int query_length = 200; size_t nn = 0; - for (int i = -120; i < 120; i++) { - TestPoint min{i * 10, i * 9, i * 11}; - TestPoint max{i * 10 + query_length, i * 9 + query_length, i * 11 + query_length}; + for (std::int64_t i = -120; i < 120; i++) { + TestPoint min{i * 10l, i * 9l, i * 11l}; + TestPoint max{i * 10l + query_length, i * 9l + query_length, i * 11l + query_length}; std::set referenceResult; referenceQuery(points, min, max, referenceResult); diff --git a/test/phtree_test_unique_ptr_values.cc b/test/phtree_test_unique_ptr_values.cc new file mode 100644 index 00000000..6a790304 --- /dev/null +++ b/test/phtree_test_unique_ptr_values.cc @@ -0,0 +1,297 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree.h" +#include +#include + +using namespace improbable::phtree; + +template +using TestPoint = PhPoint; + +template +using TestTree = PhTree; + +class IntRng { + public: + IntRng(int minIncl, int maxExcl) : eng(7), rnd{minIncl, maxExcl} {} + + int next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_int_distribution rnd; +}; + +struct IdObj { + IdObj() = default; + + explicit IdObj(const size_t i) : _i(static_cast(i)){}; + + bool operator==(const IdObj& rhs) const { + return _i == rhs._i; + } + + IdObj& operator=(IdObj const& rhs) = default; + + int _i; +}; + +using Id = std::unique_ptr; + +struct PointDistance { + PointDistance(double distance, size_t id) : _distance(distance), _id(id) {} + + double _distance; + size_t _id; +}; + +bool comparePointDistance(PointDistance& i1, PointDistance& i2) { + return (i1._distance < i2._distance); +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; i++) { + double d = p1[i] - p2[i]; + sum2 += d * d; + } + return sqrt(sum2); +} + +template +double distanceL1(const TestPoint& p1, const TestPoint& p2) { + double sum = 0; + for (dimension_t i = 0; i < DIM; i++) { + sum += std::abs(p1[i] - p2[i]); + } + return sum; +} + +template +void generateCube(std::vector>& points, size_t N) { + IntRng rng(-1000, 1000); + auto refTree = std::map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N; i++) { + auto point = TestPoint{rng.next(), rng.next(), rng.next()}; + if (refTree.count(point) != 0) { + i--; + continue; + } + + refTree.emplace(point, i); + points.push_back(point); + } + ASSERT_EQ(refTree.size(), N); + ASSERT_EQ(points.size(), N); +} + +template +void SmokeTestBasicOps(int N) { + TestTree tree; + std::vector> points; + generateCube(points, N); + + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + + if (i % 2 == 0) { + ASSERT_TRUE(tree.emplace(p, std::make_unique(i)).second); + } else { + Id id = std::make_unique(i); + ASSERT_TRUE(tree.emplace(p, std::move(id)).second); + } + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, (*tree.find(p))->_i); + ASSERT_EQ(i + 1u, tree.size()); + + // try adding it again + ASSERT_FALSE(tree.emplace(p, std::make_unique(i)).second); + ASSERT_EQ(tree.count(p), 1u); + ASSERT_NE(tree.end(), tree.find(p)); + ASSERT_EQ(i, (*tree.find(p))->_i); + ASSERT_EQ(i + 1u, tree.size()); + ASSERT_FALSE(tree.empty()); + } + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + auto q = tree.begin_query({p, p}); + ASSERT_NE(q, tree.end()); + ASSERT_EQ(i, (*q)->_i); + q++; + ASSERT_EQ(q, tree.end()); + } + + PhTreeDebugHelper::CheckConsistency(tree); + + for (int i = 0; i < N; i++) { + TestPoint& p = points.at(i); + ASSERT_NE(tree.find(p), tree.end()); + ASSERT_EQ(tree.count(p), 1u); + ASSERT_EQ(i, (*tree.find(p))->_i); + ASSERT_EQ(1u, tree.erase(p)); + + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1u, tree.size()); + + // try remove again + ASSERT_EQ(0u, tree.erase(p)); + ASSERT_EQ(tree.count(p), 0u); + ASSERT_EQ(tree.end(), tree.find(p)); + ASSERT_EQ(N - i - 1u, tree.size()); + if (i < N - 1) { + ASSERT_FALSE(tree.empty()); + } + } + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); + PhTreeDebugHelper::CheckConsistency(tree); +} + +TEST(PhTreeTestUniquePtr, SmokeTestBasicOps) { + SmokeTestBasicOps<3>(10000); + SmokeTestBasicOps<6>(10000); + SmokeTestBasicOps<10>(1000); + SmokeTestBasicOps<20>(100); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.emplace(points[i], std::make_unique(i)).second); + } + ASSERT_EQ(N, tree.size()); +} + +TEST(PhTreeTestUniquePtr, TestUpdateWithRelocate) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + int i = 0; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if (delta > 0 && tree.find(pNew) != tree.end()) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } else { + ASSERT_EQ(1, tree.relocate(pOld, pNew)); + if (delta > 0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate(pOld, pNew)); + } + ASSERT_EQ(i, (*tree.find(pNew))->_i); + p = pNew; + } + ++i; + } + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + ASSERT_EQ(0, tree.relocate(points[0], points[1])); + // Check that small tree works + tree.emplace(points[0], std::make_unique(1)); + ASSERT_EQ(1u, tree.relocate(points[0], points[1])); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(1, (*tree.find(points[1]))->_i); + ASSERT_EQ(1u, tree.size()); + tree.clear(); + + // check that existing destination fails + tree.emplace(points[0], std::make_unique(1)); + tree.emplace(points[1], std::make_unique(2)); + ASSERT_EQ(0, tree.relocate(points[0], points[1])); +} + +TEST(PhTreeTestUniquePtr, TestUpdateWithRelocateIf) { + const dimension_t dim = 3; + TestTree tree; + size_t N = 10000; + std::array deltas{0, 1, 10, 100}; + std::vector> points; + populate(tree, points, N); + + size_t d_n = 0; + for (int x = 0; x < 10; ++x) { + int i = 0; + size_t done = 0; + auto pred = [](const Id& id) { return id->_i % 2 == 0; }; + for (auto& p : points) { + auto pOld = p; + d_n = (d_n + 1) % deltas.size(); + scalar_64_t delta = deltas[d_n]; + TestPoint pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta}; + if ((delta > 0 && tree.find(pNew) != tree.end()) || (i % 2 != 0)) { + // Skip this, there is already another entry + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } else { + ASSERT_EQ(1, tree.relocate_if(pOld, pNew, pred)); + if (delta > 0) { + // second time fails because value has already been moved + ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred)); + } + ASSERT_EQ(i, (*tree.find(pNew))->_i); + p = pNew; + ++done; + } + ++i; + } + ASSERT_GT(done, i * 0.4); + ASSERT_LT(done, i * 0.6); + PhTreeDebugHelper::CheckConsistency(tree); + } + + ASSERT_EQ(N, tree.size()); + tree.clear(); + + // Check that empty tree works + auto pred = [](const Id&) { return true; }; + ASSERT_EQ(0, tree.relocate_if(points[0], points[1], pred)); + // Check that small tree works + tree.emplace(points[0], std::make_unique(1)); + ASSERT_EQ(1, tree.relocate_if(points[0], points[1], pred)); + ASSERT_EQ(tree.end(), tree.find(points[0])); + ASSERT_EQ(1, (*tree.find(points[1]))->_i); + ASSERT_EQ(1u, tree.size()); +} diff --git a/phtree/testing/BUILD b/test/testing/BUILD similarity index 100% rename from phtree/testing/BUILD rename to test/testing/BUILD diff --git a/phtree/testing/gtest_main/BUILD b/test/testing/gtest_main/BUILD similarity index 100% rename from phtree/testing/gtest_main/BUILD rename to test/testing/gtest_main/BUILD diff --git a/phtree/testing/gtest_main/gtest_main.cc b/test/testing/gtest_main/gtest_main.cc similarity index 95% rename from phtree/testing/gtest_main/gtest_main.cc rename to test/testing/gtest_main/gtest_main.cc index 1e11ab41..6f44e64a 100644 --- a/phtree/testing/gtest_main/gtest_main.cc +++ b/test/testing/gtest_main/gtest_main.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include int main(int argc, char** argv) { testing::InitGoogleMock(&argc, argv); diff --git a/tools/runners/sanitizers/msan/BUILD b/tools/runners/sanitizers/msan/BUILD new file mode 100644 index 00000000..bc7d5f6f --- /dev/null +++ b/tools/runners/sanitizers/msan/BUILD @@ -0,0 +1,9 @@ +package(default_visibility = ["//visibility:private"]) + +sh_binary( + name = "msan", + srcs = ["msan.sh"], + data = [ + "msan-suppressions.txt", + ], +) diff --git a/tools/runners/sanitizers/msan/msan-suppressions.txt b/tools/runners/sanitizers/msan/msan-suppressions.txt new file mode 100644 index 00000000..e69de29b diff --git a/tools/runners/sanitizers/msan/msan.sh b/tools/runners/sanitizers/msan/msan.sh new file mode 100755 index 00000000..c796ac7a --- /dev/null +++ b/tools/runners/sanitizers/msan/msan.sh @@ -0,0 +1 @@ +MSAN_OPTIONS=suppressions="tools/runners/sanitizers/msan/msan-suppressions.txt ${MSAN_OPTIONS}" "${@}"