diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2420c0d6..36850fa0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,16 +4,23 @@ on: [push, pull_request] jobs: build: - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 + - name: Check Java codestyle + run: | + cd java + mvn spotless:check + - name: Get cmake uses: lukka/get-cmake@v3.19.2 - name: Install packages - run: sudo apt install make clang-format-9 pkg-config g++ autoconf libtool asciidoctor libkmod-dev libudev-dev uuid-dev libjson-c-dev libkeyutils-dev pandoc libhwloc-dev libgflags-dev libtext-diff-perl bash-completion systemd wget git + run: | + sudo apt update + sudo apt install make clang-format-9 pkg-config g++ autoconf libtool asciidoctor libkmod-dev libudev-dev uuid-dev libjson-c-dev libkeyutils-dev pandoc libhwloc-dev libgflags-dev libtext-diff-perl bash-completion systemd wget git - name: Install ndctl run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index 3b5a73ab..b4b701c3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,14 +8,18 @@ set(KVDK_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}) include(${KVDK_ROOT_DIR}/cmake/functions.cmake) include(GNUInstallDirs) -set(CMAKE_CXX_STANDARD 11) +# set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 17) option(COVERAGE "code coverage" OFF) +option(KVDK_ENABLE_VHASH "Enable experimental VHash in KVDK" ON) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mrdseed -mrdrnd -mclwb -mclflushopt") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") if (CMAKE_BUILD_TYPE STREQUAL "Release") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2") elseif (CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") elseif (CMAKE_BUILD_TYPE STREQUAL "MinSizeRel") elseif (CMAKE_BUILD_TYPE STREQUAL "Debug") @@ -48,6 +52,7 @@ endif() set(SOURCES engine/c/kvdk_basic_op.cpp engine/c/kvdk_batch.cpp + engine/c/kvdk_transaction.cpp engine/c/kvdk_hash.cpp engine/c/kvdk_list.cpp engine/c/kvdk_sorted.cpp @@ -68,6 +73,7 @@ set(SOURCES engine/hash_collection/hash_list.cpp engine/list_collection/list.cpp engine/write_batch_impl.cpp + engine/transaction_impl.cpp engine/dram_allocator.cpp engine/pmem_allocator/pmem_allocator.cpp engine/thread_manager.cpp @@ -75,7 +81,19 @@ set(SOURCES engine/data_record.cpp engine/dl_list.cpp engine/version/old_records_cleaner.cpp + ) + +if (KVDK_ENABLE_VHASH) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mlzcnt -mbmi -mavx512bw -mavx512vl") + add_compile_definitions(KVDK_ENABLE_VHASH) + set(SOURCES + ${SOURCES} + engine/kv_engine_vhash.cpp + engine/experimental/vhash_kv.cpp + engine/experimental/vhash.cpp + engine/experimental/vhash_group.cpp ) +endif() # .so library add_library(engine SHARED ${SOURCES}) diff --git a/README.md b/README.md index 37cec6f0..decdead5 100644 --- a/README.md +++ b/README.md @@ -5,19 +5,18 @@ `KVDK` (Key-Value Development Kit) is a key-value store library implemented in C++ language. It is designed for supporting DRAM, Optane persistent memory and CXL memory pool. It also demonstrates several optimization methods for high performance with tiered memory. Besides providing the basic APIs of key-value store, it offers several advanced features, like read-modify-write, checkpoint, etc. ## Features +* Rich data types + * string, sorted, hash, list, hash * Basic KV operations - * string get/set/update/delete -* Sorted KV operations - * sorted string get/set/update/scan/delete -* Rich value types - * list, hash + * get/put/update/delete/scan * Read-Modify-Write * Support TTL * Atomic Batch Write * Snapshot based Scan * Consistent Dump & Restore to/from storage +* Consistent Checkpoint +* Transaction * C/C++/Java APIs -* Support Transaction (coming soon) # Limitations * The maximum supported key-value size is 64KB-4GB. diff --git a/benchmark/bench.cpp b/benchmark/bench.cpp index 93cf6031..ba891cd0 100644 --- a/benchmark/bench.cpp +++ b/benchmark/bench.cpp @@ -78,7 +78,7 @@ DEFINE_bool( "Populate pmem space while creating a new instance. This can improve write " "performance in runtime, but will take long time to init the instance"); -DEFINE_int32(max_access_threads, 32, "Max access threads of the instance"); +DEFINE_uint64(max_access_threads, 64, "Max access threads of the instance"); DEFINE_uint64(space, (256ULL << 30), "Max usable PMem space of the instance"); @@ -122,12 +122,28 @@ std::vector> latencies; std::vector random_engines; std::vector ranges; -enum class DataType { String, Sorted, Hashes, List, Blackhole } bench_data_type; +enum class DataType { + String, + Sorted, + Hashes, + List, + VHash, + Blackhole +} bench_data_type; enum class KeyDistribution { Range, Uniform, Zipf } key_dist; enum class ValueSizeDistribution { Constant, Uniform } vsz_dist; +void LaunchNThreads(int n_thread, std::function func, + int id_start = 0) { + std::vector ts; + for (int i = id_start; i < id_start + n_thread; i++) { + ts.emplace_back(std::thread(func, i)); + } + for (auto& t : ts) t.join(); +} + std::uint64_t generate_key(size_t tid) { static std::uint64_t max_key = FLAGS_existing_keys_ratio == 0 ? UINT64_MAX @@ -164,6 +180,24 @@ size_t generate_value_size(size_t tid) { } } +#ifdef KVDK_ENABLE_VHASH +void FillVHash(size_t tid) { + std::string key(8, ' '); + for (size_t i = 0; i < FLAGS_num_kv / FLAGS_num_collection; ++i) { + std::uint64_t num = ranges[tid].gen(); + std::uint64_t cid = num % FLAGS_num_collection; + memcpy(&key[0], &num, 8); + StringView value = StringView(value_pool.data(), generate_value_size(tid)); + + Status s = engine->VHashPut(collections[cid], key, value); + + if (s != Status::Ok) { + throw std::runtime_error{"VHashPut error"}; + } + } +} +#endif + void DBWrite(int tid) { std::string key(8, ' '); std::unique_ptr batch; @@ -230,6 +264,14 @@ void DBWrite(int tid) { s = engine->ListPushFront(collections[cid], value); break; } + case DataType::VHash: { +#ifdef KVDK_ENABLE_VHASH + s = engine->VHashPut(collections[cid], key, value); +#else + s = Status::NotSupported; +#endif + break; + } case DataType::Blackhole: { s = Status::Ok; break; @@ -313,6 +355,20 @@ void DBScan(int tid) { engine->HashIteratorRelease(iter); break; } + case DataType::VHash: { + auto iter = engine->VHashIteratorCreate(collections[cid]); + if (!iter) throw std::runtime_error{"Fail creating VHashIterator"}; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + key = iter->Key(); + value_sink = iter->Value(); + ++operations; + if (operations > operations_counted + 1000) { + read_ops += (operations - operations_counted); + operations_counted = operations; + } + } + break; + } case DataType::Blackhole: { operations += 1024; read_ops.fetch_add(1024); @@ -366,6 +422,14 @@ void DBRead(int tid) { s = engine->ListPopBack(collections[cid], &value_sink); break; } + case DataType::VHash: { +#ifdef KVDK_ENABLE_VHASH + s = engine->VHashGet(collections[cid], key, &value_sink); +#else + s = Status::NotSupported; +#endif + break; + } case DataType::Blackhole: { s = Status::Ok; break; @@ -412,6 +476,8 @@ void ProcessBenchmarkConfigs() { bench_data_type = DataType::Hashes; } else if (FLAGS_type == "list") { bench_data_type = DataType::List; + } else if (FLAGS_type == "vhash") { + bench_data_type = DataType::VHash; } else if (FLAGS_type == "blackhole") { bench_data_type = DataType::Blackhole; } else { @@ -425,6 +491,7 @@ void ProcessBenchmarkConfigs() { } case DataType::Hashes: case DataType::List: + case DataType::VHash: case DataType::Sorted: { collections.resize(FLAGS_num_collection); for (size_t i = 0; i < FLAGS_num_collection; i++) { @@ -437,6 +504,9 @@ void ProcessBenchmarkConfigs() { if (FLAGS_batch_size > 0 && (bench_data_type == DataType::List)) { throw std::invalid_argument{R"(List does not support batch write.)"}; } + if (FLAGS_batch_size > 0 && (bench_data_type == DataType::VHash)) { + throw std::invalid_argument{R"(VHash does not support batch write.)"}; + } // Check for scan flag switch (bench_data_type) { @@ -458,10 +528,11 @@ void ProcessBenchmarkConfigs() { random_engines.resize(FLAGS_threads); if (FLAGS_fill) { + assert(bench_data_type != DataType::VHash && "VHash don't need fill"); assert(FLAGS_read_ratio == 0); key_dist = KeyDistribution::Range; - operations_per_thread = FLAGS_num_kv / FLAGS_max_access_threads + 1; - for (int i = 0; i < FLAGS_max_access_threads; i++) { + operations_per_thread = FLAGS_num_kv / FLAGS_threads + 1; + for (size_t i = 0; i < FLAGS_threads; i++) { ranges.emplace_back(i * operations_per_thread, (i + 1) * operations_per_thread); } @@ -475,6 +546,14 @@ void ProcessBenchmarkConfigs() { throw std::invalid_argument{"Invalid key distribution"}; } } + if (bench_data_type == DataType::VHash) { + // Vhash needs fill for read and update benchmarks + operations_per_thread = FLAGS_num_kv / FLAGS_max_access_threads + 1; + for (size_t i = 0; i < FLAGS_max_access_threads; i++) { + ranges.emplace_back(i * operations_per_thread, + (i + 1) * operations_per_thread); + } + } if (FLAGS_value_size_distribution == "constant") { vsz_dist = ValueSizeDistribution::Constant; @@ -535,7 +614,6 @@ int main(int argc, char** argv) { throw std::runtime_error{"Fail to create Sorted collection"}; } } - engine->ReleaseAccessThread(); break; } case DataType::Hashes: { @@ -545,7 +623,6 @@ int main(int argc, char** argv) { throw std::runtime_error{"Fail to create Hashset"}; } } - engine->ReleaseAccessThread(); break; } case DataType::List: { @@ -555,7 +632,24 @@ int main(int argc, char** argv) { throw std::runtime_error{"Fail to create List"}; } } - engine->ReleaseAccessThread(); + break; + } + case DataType::VHash: { +#ifdef KVDK_ENABLE_VHASH + for (auto col : collections) { + Status s = + engine->VHashCreate(col, FLAGS_num_kv / FLAGS_num_collection); + if (s != Status::Ok) { + throw std::runtime_error{"Fail to create VHash"}; + } + } + if (!FLAGS_fill) { + LaunchNThreads(FLAGS_threads, FillVHash); + } +#else + throw std::runtime_error{"VHash not supported!"}; +#endif + break; } default: { diff --git a/doc/benchmark.md b/doc/benchmark.md index 887e4508..34311427 100644 --- a/doc/benchmark.md +++ b/doc/benchmark.md @@ -4,6 +4,15 @@ To test performance of KVDK, you can run our benchmark tool "bench", the tool is You can manually run individual benchmark follow the examples as shown bellow, or simply run our basic benchmark script "scripts/run_benchmark.py" to test all the basic read/write performance. +To run the script, you shoulf first build kvdk, then run: + +``` +scripts/run_benchmark.py [data_type] [key distribution] +``` + +data_type: Which data type to benchmark, it can be string/sorted/hash/list/blackhole/all + +key distribution: Distribution of key of the benchmark workloads, it can be random/zipf/all ## Fill data to new instance To test performance, we need to first fill key-value pairs to the KVDK instance. Since KVDK did not support cross-socket access yet, we need to bind bench program to a numa node: @@ -20,7 +29,7 @@ Explanation of arguments: -space: PMem space that allocate to the KVDK instance. - -max_access_threads: Max concurrent access threads of the KVDK instance, set it to the number of the hyper-threads for performance consideration. + -max_access_threads: Max concurrent access threads in the KVDK instance, set it to the number of the hyper-threads for performance consideration. You can call KVDK API with any number of threads, but if your parallel threads more than max_access_threads, the performance will be degraded due to synchronization cost -type: Type of key-value pairs to benchmark, it can be "string", "hash" or "sorted". diff --git a/doc/user_doc.md b/doc/user_doc.md index 809a8e32..c982c499 100644 --- a/doc/user_doc.md +++ b/doc/user_doc.md @@ -1,9 +1,9 @@ KVDK ======= -KVDK(Key-Value Development Kit) is a Key-Value store for Persistent memory(PMem). +KVDK(Key-Value Development Kit) is a Key-Value store for Persistent Memory (PMem). -KVDK supports both sorted and unsorted KV-Pairs. +KVDK supports basic read and write operations on both sorted and unsorted KV-Pairs, it also support some advanced features, such as **backup**, **checkpoint**, **expire key**, **atomic batch write** and **transactions**. Code snippets in this user documents are from `./examples/tutorial/cpp_api_tutorial.cpp`, which is built as `./build/examples/tutorial/cpp_api_tutorial`. @@ -70,7 +70,7 @@ int main() `kvdk::Status` indicates status of KVDK function calls. Functions return `kvdk::Status::Ok` if such a function call is a success. If exceptions are raised during function calls, other `kvdk::Status` is returned, -such as `kvdk::Status::MemoryOverflow`. +such as `kvdk::Status::MemoryOverflow` while no enough memory to allocate. ## Close a KVDK instance @@ -97,26 +97,45 @@ int main() ``` ## Data types -KVDK currently supports string type for both keys and values. -### Strings -All keys and values in a KVDK instance are strings. +KVDK currently supports raw string, sorted collection, hash collection and list data type. + +### Raw String + +All keys and values in a KVDK instance are strings. You can directly store or read key-value pairs in global namespace, which is accessible via Get, Put, Delete and Modify operations, we call them string type data in kvdk. Keys are limited to have a maximum size of 64KB. -A string value can be at max 64MB in length by default. The maximum length can be configured when initializing a KVDK instance. +A value can be at max 64MB in length by default. The maximum length can be configured when initializing a KVDK instance. + +### Collections + +Instead of raw string, you can organize key-value pairs to a collection, each collection has its own namespace. + +Currently we have three types of collection: + +#### Sorted Collection + +KV pairs are stored with some kind of order (lexicographical order by default) in Sorted Collection, they can be iterated forward or backward starting from an arbitrary point(at a key or between two keys) by an iterator. They can also be directly accessed via SortedGet, SortedPut, SortedDelete operations. + +#### Hash Collection + +Hash Collection is like Raw String with a name space, you can access KV pairs via HashGet, HashPut, HashDelete and HashModify operations. + +In current version, performance of operations on hash collection is similar to sorted collection, which much slower than raw-string, so we recomend use raw-string or sorted collection as high priority. + +#### List -## Collections -All Key-Value pairs(KV-Pairs) are organized into collections. +List is a list of string elements, you can access elems at the front or back via ListPushFront, ListPushBack, ListPopFron, ListPopBack, or operation elems with index via ListInsertAt, ListInsertBefore, ListInsertAfter and ListErase. Notice that operation with index take O(n) time, while operation on front and back only takes O(1). -There is an anonymous global collection with KV-Pairs directly accessible via Get, Put, Delete operations. The anonymous global collection is unsorted. +### Namespace -Users can also create named collections. +Each collection has its own namespace, so you can store same key in every collection. Howevery, collection name and raw string key are in a same namespace, so you can't assign same name for a collection and a string key, otherwise a error status (Status::WrongType) will be returned. -KVDK currently supports sorted named collections. Users can iterate forward or backward starting from an arbitrary point(at a key or between two keys) by an iterator. Elements can also be directly accessed via SortedGet, SortedPut, SortedDelete operations. +## API Examples -## Reads and Writes in Anonymous Global Collection +### Reads and Writes with String type -A KVDK instance provides Get, Put, Delete methods to query/modify/delete entries. +A KVDK instance provides Get, Put, Delete methods to query/modify/delete raw string kvs. The following code performs a series of Get, Put and Delete operations. @@ -125,7 +144,7 @@ int main() { ... Open a KVDK instance as described in "Open a KVDK instance" ... - // Reads and Writes on Anonymous Global Collection + // Reads and Writes String KV { std::string key1{"key1"}; std::string key2{"key2"}; @@ -173,11 +192,11 @@ int main() } ``` -## Reads and Writes in a Named Collection +### Reads and Writes in a Sorted Collection A KVDK instance provides SortedGet, SortedPut, SortedDelete methods to query/modify/delete sorted entries. -The following code performs a series of SortedGet, SortedPut and SortedDelete operations, which also initialize a named collection implicitly. +The following code performs a series of SortedGet, SortedPut and SortedDelete operations on a sorted collection. ```c++ int main() @@ -194,9 +213,13 @@ int main() std::string value2{"value2"}; std::string v; + // You must create sorted collections before you do any operations on them + status = engine->SortedCreate(collection1); + assert(status == kvdk::Status::Ok); + status = engine->SortedCreate(collection2); + assert(status == kvdk::Status::Ok); + // Insert key1-value1 into "my_collection_1". - // Implicitly create a collection named "my_collection_1" in which - // key1-value1 is stored. status = engine->SortedPut(collection1, key1, value1); assert(status == kvdk::Status::Ok); @@ -206,8 +229,6 @@ int main() assert(v == value1); // Insert key1-value2 into "my_collection_2". - // Implicitly create a collection named "my_collection_2" in which - // key1-value2 is stored. status = engine->SortedPut(collection2, key1, value2); assert(status == kvdk::Status::Ok); @@ -236,8 +257,13 @@ int main() status = engine->SortedDelete(collection1, key1); assert(status == kvdk::Status::Ok); - printf("Successfully performed SortedGet, SortedPut, SortedDelete operations on named " - "collections.\n"); + // Destroy sorted collections + status = engine->SortedDestroy(collection1); + assert(status == kvdk::Status::Ok); + status = engine->SrotedDestroy(collection2); + assert(status == kvdk::Status::Ok); + + printf("Successfully performed SortedGet, SortedPut, SortedDelete operations.\n"); } ... Do something else with KVDK instance ... @@ -246,17 +272,18 @@ int main() } ``` -## Iterating a Named Collection -The following example demonstrates how to iterate through a named collection. It also demonstrates how to iterate through a range defined by Key. +### Iterating a Sorted Collection +The following example demonstrates how to iterate through a sorted collection at a consistent view of data. It also demonstrates how to iterate through a range defined by Key. ```c++ int main() { ... Open a KVDK instance as described in "Open a KVDK instance" ... - // Iterating a Sorted Named Collection + // Iterating a Sorted Sorted Collection { std::string sorted_collection{"my_sorted_collection"}; + engine->SortedCreate(sorted_collection); // Create toy keys and values. std::vector> kv_pairs; for (int i = 0; i < 10; ++i) { @@ -282,7 +309,9 @@ int main() // Sort kv_pairs for checking the order of "my_sorted_collection". std::sort(kv_pairs.begin(), kv_pairs.end()); - // Iterate through collection "my_sorted_collection" + // Iterate through collection "my_sorted_collection", the iter is + // created on a consistent view while you create it, e.g. all + // modifications after you create the iter won't be observed auto iter = engine->SortedIteratorCreate(sorted_collection); iter->SeekToFirst(); { @@ -320,7 +349,7 @@ int main() } } - printf("Successfully iterated through a sorted named collections.\n"); + printf("Successfully iterated through a sorted collections.\n"); engine->SortedIteratorRelease(iter); } @@ -330,7 +359,7 @@ int main() } ``` -## Atomic Updates +### Atomic Updates KVDK supports organizing a series of Put, Delete operations into a `kvdk::WriteBatch` object as an atomic operation. If KVDK fail to apply the `kvdk::WriteBatch` object as a whole, i.e. the system shuts down during applying the batch, it will roll back to the status right before applying the `kvdk::WriteBatch`. ```c++ @@ -387,7 +416,12 @@ A KVDK instance can be accessed by multiple read and write threads safely. Synch Users can configure KVDK to adapt to their system environment by setting up a `kvdk::Configs` object and passing it to 'kvdk::Engine::Open' when initializing a KVDK instance. ### Max Access Threads -Maximum number of access threads is specified by `kvdk::Configs::max_access_threads`. Defaulted to 48. It's recommended to set this number to the number of threads provided by CPU. +Maximum number of internal access threads in kvdk is specified by `kvdk::Configs::max_access_threads`. Defaulted to 64. It's recommended to set this number to the number of threads provided by CPU. + +You can call KVDK API with any number of threads, but if your parallel threads more than max_access_threads, the performance will be degraded due to synchronization cost + +### Clean Threads +KVDK reclaim space of updated/deleted data in background with dynamic number of clean threads, you can specify max clean thread number with `kvdk::Configs::clean_threads`. Defaulted to 8, you can config more clean threads in delete intensive workloads to avoid space be exhausted. ### PMem File Size `kvdk::Configs::pmem_file_size` specifies the space allocated to a KVDK instance. Defaulted to 2^38Bytes = 256GB. @@ -418,3 +452,7 @@ Specified by `kvdk::Configs::hash_bucket_num`. Greater number will improve perfo ### Buckets per Slot Specified by `kvdk::Configs::num_buckets_per_slot`. Smaller number will improve performance by reducing lock contentions and improving caching at the cost of greater DRAM space. Please read Architecture Documentation for details before tuning this parameter. + +## Advanced features and more API + +Please read examples/tutorial for more API and advanced features in KVDK. diff --git a/engine/allocator.hpp b/engine/allocator.hpp index fdef669d..15290aaa 100644 --- a/engine/allocator.hpp +++ b/engine/allocator.hpp @@ -28,4 +28,20 @@ class Allocator { virtual SpaceEntry Allocate(uint64_t size) = 0; virtual void Free(const SpaceEntry& entry) = 0; }; + +class IVolatileAllocator { + public: + virtual void* Allocate(size_t bytes) = 0; + virtual void Deallocate(void* addr, size_t bytes) = 0; +}; + +class CharAllocator final : public IVolatileAllocator { + void* Allocate(size_t n) final { + void* mem = ::malloc(n); + if (mem == nullptr) throw std::bad_alloc{}; + return mem; + } + void Deallocate(void* addr, size_t) final { ::free(addr); } +}; + } // namespace KVDK_NAMESPACE \ No newline at end of file diff --git a/engine/c/kvdk_basic_op.cpp b/engine/c/kvdk_basic_op.cpp index 81b2bfdb..03ec0586 100644 --- a/engine/c/kvdk_basic_op.cpp +++ b/engine/c/kvdk_basic_op.cpp @@ -96,10 +96,6 @@ KVDKStatus KVDKRestore(const char* name, const char* backup_log, return s; } -void KVDKReleaseAccessThread(KVDKEngine* engine) { - engine->rep->ReleaseAccessThread(); -} - KVDKSnapshot* KVDKGetSnapshot(KVDKEngine* engine, int make_checkpoint) { KVDKSnapshot* snapshot = new KVDKSnapshot; snapshot->rep = engine->rep->GetSnapshot(make_checkpoint); diff --git a/engine/c/kvdk_c.hpp b/engine/c/kvdk_c.hpp index 01aba7cf..318cb623 100644 --- a/engine/c/kvdk_c.hpp +++ b/engine/c/kvdk_c.hpp @@ -12,6 +12,7 @@ #include "kvdk/engine.h" #include "kvdk/engine.hpp" #include "kvdk/iterator.hpp" +#include "kvdk/transaction.hpp" #include "kvdk/write_batch.hpp" using kvdk::StringView; @@ -23,6 +24,7 @@ using kvdk::ListIterator; using kvdk::Snapshot; using kvdk::SortedCollectionConfigs; using kvdk::SortedIterator; +using kvdk::Transaction; using kvdk::WriteBatch; using kvdk::WriteOptions; @@ -40,6 +42,10 @@ struct KVDKWriteBatch { std::unique_ptr rep; }; +struct KVDKTransaction { + std::unique_ptr rep; +}; + struct KVDKSortedIterator { SortedIterator* rep; }; diff --git a/engine/c/kvdk_transaction.cpp b/engine/c/kvdk_transaction.cpp new file mode 100644 index 00000000..9d5d3816 --- /dev/null +++ b/engine/c/kvdk_transaction.cpp @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2021-2022 Intel Corporation + */ + +#include "kvdk_c.hpp" + +extern "C" { +KVDKTransaction* KVDKTransactionCreate(KVDKEngine* engine) { + KVDKTransaction* txn = new KVDKTransaction{}; + txn->rep = engine->rep->TransactionCreate(); + return txn; +} +void KVDKTransactionDestory(KVDKTransaction* txn) { delete txn; } +KVDKStatus KVDKTransactionStringPut(KVDKTransaction* txn, char const* key_data, + size_t key_len, char const* val_data, + size_t val_len) { + return txn->rep->StringPut(std::string(key_data, key_len), + std::string(val_data, val_len)); +} +KVDKStatus KVDKTransactionStringDelete(KVDKTransaction* txn, + char const* key_data, size_t key_len) { + return txn->rep->StringDelete(std::string(key_data, key_len)); +} +KVDKStatus KVDKTransactionSortedPut(KVDKTransaction* txn, + char const* collection, + size_t collection_len, char const* key_data, + size_t key_len, char const* val_data, + size_t val_len) { + return txn->rep->SortedPut(std::string(collection, collection_len), + std::string(key_data, key_len), + std::string(val_data, val_len)); +} + +KVDKStatus KVDKTransactionSortedDelete(KVDKTransaction* txn, + char const* collection, + size_t collection_len, + char const* key_data, size_t key_len) { + return txn->rep->SortedDelete(std::string(collection, collection_len), + std::string(key_data, key_len)); +} +KVDKStatus KVDKTransactionHashPut(KVDKTransaction* txn, char const* collection, + size_t collection_len, char const* key_data, + size_t key_len, char const* val_data, + size_t val_len) { + return txn->rep->HashPut(std::string(collection, collection_len), + std::string(key_data, key_len), + std::string(val_data, val_len)); +} +KVDKStatus KVDKTransactionHashDelete(KVDKTransaction* txn, + char const* collection, + size_t collection_len, + char const* key_data, size_t key_len) { + return txn->rep->HashDelete(std::string(collection, collection_len), + std::string(key_data, key_len)); +} +KVDKStatus KVDKTransactionCommit(KVDKTransaction* txn) { + return txn->rep->Commit(); +} +void KVDKTransactionRollback(KVDKTransaction* txn) { + return txn->rep->Rollback(); +} + +} // extern "C" diff --git a/engine/data_record.hpp b/engine/data_record.hpp index 3ca296ff..b8ea2283 100644 --- a/engine/data_record.hpp +++ b/engine/data_record.hpp @@ -17,11 +17,11 @@ namespace KVDK_NAMESPACE { enum RecordType : uint8_t { Empty = 0, String = (1 << 0), - SortedHeader = (1 << 1), + SortedRecord = (1 << 1), SortedElem = (1 << 2), - HashHeader = (1 << 3), + HashRecord = (1 << 3), HashElem = (1 << 4), - ListHeader = (1 << 5), + ListRecord = (1 << 5), ListElem = (1 << 6), }; @@ -35,8 +35,8 @@ enum class RecordStatus : uint8_t { }; const uint8_t ExpirableRecordType = - (RecordType::String | RecordType::SortedHeader | RecordType::HashHeader | - RecordType::ListHeader); + (RecordType::String | RecordType::SortedRecord | RecordType::HashRecord | + RecordType::ListRecord); const uint8_t PrimaryRecordType = ExpirableRecordType; @@ -44,8 +44,8 @@ const uint8_t ElemType = (RecordType::SortedElem | RecordType::HashElem | RecordType::ListElem); const uint8_t CollectionType = - (RecordType::SortedHeader | RecordType::HashHeader | - RecordType::ListHeader); + (RecordType::SortedRecord | RecordType::HashRecord | + RecordType::ListRecord); struct DataHeader { DataHeader() = default; @@ -365,9 +365,9 @@ struct DLRecord { prev(_prev), next(_next), expired_time(_expired_time) { - kvdk_assert(_type & (RecordType::SortedElem | RecordType::SortedHeader | - RecordType::HashElem | RecordType::HashHeader | - RecordType::ListElem | RecordType::ListHeader), + kvdk_assert(_type & (RecordType::SortedElem | RecordType::SortedRecord | + RecordType::HashElem | RecordType::HashRecord | + RecordType::ListElem | RecordType::ListRecord), ""); memcpy(data, _key.data(), _key.size()); memcpy(data + _key.size(), _value.data(), _value.size()); diff --git a/engine/dram_allocator.cpp b/engine/dram_allocator.cpp index 67ea8e75..87fdcfdf 100644 --- a/engine/dram_allocator.cpp +++ b/engine/dram_allocator.cpp @@ -13,31 +13,34 @@ void ChunkBasedAllocator::Free(const SpaceEntry&) { } SpaceEntry ChunkBasedAllocator::Allocate(uint64_t size) { + kvdk_assert(ThreadManager::ThreadID() >= 0, ""); SpaceEntry entry; + auto& tc = dalloc_thread_cache_[ThreadManager::ThreadID() % + dalloc_thread_cache_.size()]; if (size > chunk_size_) { void* addr = aligned_alloc(64, size); if (addr != nullptr) { entry.size = chunk_size_; entry.offset = addr2offset(addr); - dalloc_thread_cache_[access_thread.id].allocated_chunks.push_back(addr); + tc.allocated_chunks.push_back(addr); } return entry; } - if (dalloc_thread_cache_[access_thread.id].usable_bytes < size) { + if (tc.usable_bytes < size) { void* addr = aligned_alloc(64, chunk_size_); if (addr == nullptr) { return entry; } - dalloc_thread_cache_[access_thread.id].chunk_addr = (char*)addr; - dalloc_thread_cache_[access_thread.id].usable_bytes = chunk_size_; - dalloc_thread_cache_[access_thread.id].allocated_chunks.push_back(addr); + tc.chunk_addr = (char*)addr; + tc.usable_bytes = chunk_size_; + tc.allocated_chunks.push_back(addr); } entry.size = size; - entry.offset = addr2offset(dalloc_thread_cache_[access_thread.id].chunk_addr); - dalloc_thread_cache_[access_thread.id].chunk_addr += size; - dalloc_thread_cache_[access_thread.id].usable_bytes -= size; + entry.offset = addr2offset(tc.chunk_addr); + tc.chunk_addr += size; + tc.usable_bytes -= size; return entry; } } // namespace KVDK_NAMESPACE \ No newline at end of file diff --git a/engine/engine.cpp b/engine/engine.cpp index 1f2bf1bd..9ba3bf3a 100644 --- a/engine/engine.cpp +++ b/engine/engine.cpp @@ -7,15 +7,15 @@ #include "kv_engine.hpp" namespace KVDK_NAMESPACE { -Status Engine::Open(const std::string& name, Engine** engine_ptr, +Status Engine::Open(const StringView name, Engine** engine_ptr, const Configs& configs, FILE* log_file) { GlobalLogger.Init(log_file, configs.log_level); Status s = KVEngine::Open(name, engine_ptr, configs); return s; } -Status Engine::Restore(const std::string& engine_path, - const std::string& backup_file, Engine** engine_ptr, +Status Engine::Restore(const StringView engine_path, + const StringView backup_file, Engine** engine_ptr, const Configs& configs, FILE* log_file) { GlobalLogger.Init(log_file, configs.log_level); Status s = KVEngine::Restore(engine_path, backup_file, engine_ptr, configs); diff --git a/engine/experimental/hashptr_map.hpp b/engine/experimental/hashptr_map.hpp new file mode 100644 index 00000000..db659bb8 --- /dev/null +++ b/engine/experimental/hashptr_map.hpp @@ -0,0 +1,1282 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../macros.hpp" +#define dynamic_assert kvdk_assert + +/// TODO: allocator for aligned allocation to work under C++11 + +namespace KVDK_NAMESPACE { + +// Internal helpers +namespace { +// Templates to convert between a pointer and its compact representation +// If we use a cache-backed pool allocator, or a allocator with reference count, +// which defines it's own pointer class, we need these converters +// to convert the pointer and std::uint64_t. +template ::pointer> +struct cvt_helper + : public std::enable_if< + std::is_convertible().to_pointer( + std::declval())), + Pointer>::value && + std::is_convertible().to_rep( + std::declval())), + std::uint64_t>::value, + T>::type {}; + +template ::pointer, + typename = void> +Pointer to_pointer(std::uint64_t rep, Alloc const&) { + return reinterpret_cast(rep); +} + +template ::pointer, + typename = void> +std::uint64_t to_rep(Pointer ptr, Alloc const&) { + return reinterpret_cast(ptr); +} + +// template::pointer> auto to_pointer(std::uint64_t rep, +// Alloc const& alloc) -> typename cvt_helper::type +// { +// return alloc.to_pointer(rep); +// } + +// template::pointer> auto to_rep(Pointer const& ptr, Alloc +// const& alloc) -> typename cvt_helper::type +// { +// return alloc.to_rep(ptr); +// } + +std::uint64_t reverse_bits(std::uint64_t u64) { + u64 = (u64 & 0xFFFFFFFF00000000) >> 32 | (u64 & 0x00000000FFFFFFFF) << 32; + u64 = (u64 & 0xFFFF0000FFFF0000) >> 16 | (u64 & 0x0000FFFF0000FFFF) << 16; + u64 = (u64 & 0xFF00FF00FF00FF00) >> 8 | (u64 & 0x00FF00FF00FF00FF) << 8; + u64 = (u64 & 0xF0F0F0F0F0F0F0F0) >> 4 | (u64 & 0x0F0F0F0F0F0F0F0F) << 4; + u64 = (u64 & 0xCCCCCCCCCCCCCCCC) >> 2 | (u64 & 0x3333333333333333) << 2; + u64 = (u64 & 0xAAAAAAAAAAAAAAAA) >> 1 | (u64 & 0x5555555555555555) << 1; + return u64; +} + +void mm_pause() { + constexpr size_t NPause = 32; + for (size_t i = 0; i < NPause; i++) { + _mm_pause(); + } +} + +template +struct maybe_add_pointer + : public std::conditional::value, + typename std::add_pointer::type, Func> {}; + +} // namespace + +// A hashptr_map consists of multiple buckets. +// Each Bucket is a single linked list of node +// Each node consists of 1 control_field and field_cnt - 1 storage_field. +template , + typename KeyEqual = std::equal_to, size_t field_cnt = 8, + size_t embed_cnt = 4, size_t max_scale = 32, bool PML5 = false, + typename Allocator = std::allocator> +class hashptr_map { + private: + static_assert(field_cnt == 8 || field_cnt == 16, ""); + static_assert(((embed_cnt - 1) & embed_cnt) == 0, + "embed_cnt must be power of 2"); + static_assert(max_scale <= 64, ""); + + using Hash = std::uint64_t; + static_assert(std::is_convertible()( + std::declval())), + Hash>::value, + ""); + + using field_rep = std::uint64_t; + using control_rep = field_rep; + using storage_rep = field_rep; + using hash_rep = std::uint64_t; + using tag_type = + typename std::conditional::type; + using mask_type = std::uint16_t; + + class node; + using node_alloc = + typename std::allocator_traits::template rebind_alloc; + using node_alloc_traits = + typename std::allocator_traits::template rebind_traits; + using node_pointer = typename node_alloc_traits::pointer; + + // constants for bit manipulation in nodes + // A node consists of multiple 64-bit fields, one control field and several + // storage fields. The control field holds the pointer to next node, and bits + // for lock and rehash. A storage field holds a user pointer to some data, a + // tag extracted from hash value and a bit for rehash. + static_assert(sizeof(field_rep) == sizeof(void*), "64-bit system required"); + static_assert(sizeof(Hash) == 8, "64-bit hash required"); + + // Actual bits used by a pointer in a 4-level or 5-level paging system + static constexpr size_t pointer_bits = PML5 ? 56 : 48; + + // Bits [63:56]/[63:48] are reused by a node to store meta information + static constexpr field_rep meta_mask = + ((~0UL >> pointer_bits) << pointer_bits); + + // Bits [55:0]/[47:0] are untouched + static constexpr field_rep ptr_mask = ~meta_mask; + + // Bit 63 is used as a mark for rehash + static constexpr field_rep mark_bit = (0x1UL << 63); + + // For the storage unit, + // bits [62:56]/[62:48] are used to store a tag. + // The tag consists high 7/15 bits inside a hash value + static constexpr storage_rep tag_mask = (meta_mask & ~mark_bit); + + // For the control unit of a node, + // bits [62:56]/[62:48] are used as a read-write lock. + static constexpr control_rep lock_mask = (meta_mask & ~mark_bit); + static constexpr control_rep slock_val = (0x1UL << pointer_bits); + static constexpr control_rep elock_bit = (0x1UL << 62); + + class tagged_pointer { + private: + storage_rep u64{0}; + + public: + explicit tagged_pointer(nullptr_t) {} + + explicit tagged_pointer(tag_type tag, Pointer p) + : u64{reinterpret_cast(p)} { + dynamic_assert(!(u64 & meta_mask), "Upper bits in Pointer not zeros!"); + u64 |= (static_cast(tag) << pointer_bits); + dynamic_assert(!(u64 & mark_bit), "Invalid tag!"); + } + + explicit tagged_pointer(storage_rep rep) : u64{rep} { u64 &= ~mark_bit; } + + tagged_pointer(tagged_pointer const&) = default; + tagged_pointer(tagged_pointer&&) = default; + + tag_type tag() const { + return static_cast((u64 & tag_mask) >> pointer_bits); + } + + Pointer pointer() const { + return reinterpret_cast(u64 & ptr_mask); + } + + storage_rep rep() const { return u64; } + }; + + class alignas(64) node { + private: + std::atomic meta{}; + std::array, field_cnt - 1> data{}; + + public: + node() : node{false} {} + + explicit node(bool mark) { + if (!mark) return; + meta_mark_flip(); + for (size_t i = 0; i < data.size(); i++) entry_mark_flip(i); + } + + // Access Entrys by indexing + void set_entry(size_t index, tagged_pointer tp) { + dynamic_assert((data.at(index).load() & ~mark_bit) == 0, + "Entry already set!"); + + storage_rep rep = tp.rep(); + rep |= data.at(index).load() & mark_bit; + data.at(index).store(rep); + } + + void set_entry(size_t index, tag_type tag, Pointer p) { + set_entry(index, tagged_pointer{tag, p}); + } + + void set_entry(size_t index, Pointer p) { + dynamic_assert((data.at(index).load() & ptr_mask) != 0, + "Entry not set yet!"); + storage_rep rep = tagged_pointer{0, p}.rep(); + rep |= data.at(index).load() & meta_mask; + data.at(index).store(rep); + } + + void erase_entry(size_t index) { + dynamic_assert((data.at(index).load() & ptr_mask) != 0, + "Entry not set yet!"); + storage_rep rep = tagged_pointer{nullptr}.rep(); + rep |= data.at(index).load() & mark_bit; + data.at(index).store(rep); + } + + tagged_pointer load_entry(size_t index) { + return tagged_pointer{data.at(index).load()}; + } + + void entry_mark_flip(size_t index) { data.at(index).fetch_xor(mark_bit); } + + bool entry_mark(size_t index) { return data.at(index).load() & mark_bit; } + + // Every node can be locked, + // but only the first node in a bucket is locked + // for exclusive/shared access of the bucket + bool try_lock() { + control_rep old = meta.load(); + if ((old & elock_bit) || + !meta.compare_exchange_strong(old, old | elock_bit)) + return false; + return true; + } + + void lock() { + while (!try_lock()) { + mm_pause(); + } + } + + void unlock() { + dynamic_assert((meta.load() & lock_mask) == elock_bit, + "Unlock a lock not locked yet!"); + meta.fetch_xor(elock_bit); + } + + bool try_lock_shared() { + field_rep old = meta.load(); + if ((old & elock_bit) || (old & lock_mask) + slock_val >= mark_bit || + !meta.compare_exchange_strong(old, old + slock_val)) { + return false; + } + return true; + } + + void lock_shared() { + while (!try_lock_shared()) { + mm_pause(); + } + } + + void unlock_shared() { + dynamic_assert( + (meta.load() & lock_mask) != 0 && !(meta.load() & elock_bit), + "Unlock a lock yet locked!"); + meta.fetch_sub(slock_val); + } + + void meta_mark_flip() { meta.fetch_xor(mark_bit); } + + bool meta_mark() { return meta.load() & mark_bit; } + + node_pointer next_node(node_alloc const& alloc) const { + control_rep u64 = meta.load(); + u64 &= ptr_mask; + return to_pointer(u64, alloc); + } + + node_pointer append_new_node(node_alloc& alloc) { + node_pointer p_new_node = node_alloc_traits::allocate(alloc, 1); + node_alloc_traits::construct(alloc, p_new_node, meta_mark()); + control_rep rep = to_rep(p_new_node, alloc); + + dynamic_assert(next_node(alloc) == nullptr, "Not last node!"); + dynamic_assert(!(rep & meta_mask), ""); + + meta.fetch_or(rep); + return p_new_node; + } + + static void remove_appended_nodes(node_pointer node, node_alloc& alloc) { + node_pointer next = node->next_node(alloc); + if (next == nullptr) return; + + remove_appended_nodes(next, alloc); + node->meta.fetch_and(meta_mask); + node_alloc_traits::destroy(alloc, next); + node_alloc_traits::deallocate(alloc, next, 1); + } + + // Unlink, destroy and deallocate next node if it is empty + static bool remove_empty_nodes(node_pointer node, node_alloc& alloc) { + node_pointer next = node->next_node(alloc); + + if (next == nullptr) return true; + if (!remove_empty_nodes(next, alloc)) return false; + + for (size_t i = 0; i < field_cnt - 1; i++) + if (next->load_entry(i).pointer() != nullptr) return false; + + remove_appended_nodes(node, alloc); + return true; + } + + template ::type = true> + mask_type match_tag_impl(tag_type tag) const { + static_assert(sizeof(node) / sizeof(__m512i) == 1 || + sizeof(node) / sizeof(__m512i) == 2, + ""); + union { + __m128i m128; + std::array mask{}; + } results{}; + + // 0b1000 0000 + constexpr std::uint64_t high_mask = 0x8080808080808080; + // 0b0100 0000 + constexpr std::uint64_t low_mask = 0x4040404040404040; + __m512i low_tag = _mm512_set1_epi8(static_cast(tag)); + __m512i high_tag = _mm512_set1_epi8(static_cast(tag >> 8)); + __m512i high_tag2 = + _mm512_set1_epi8(static_cast(tag >> 8) | (mark_bit >> 56)); + + { + __m512i m512_0 = _mm512_load_si512(&meta); + results.mask[0] = + (_mm512_mask_cmpeq_epi8_mask(low_mask, low_tag, m512_0) | + _mm512_mask_cmpeq_epi8_mask(high_mask, high_tag, m512_0) | + _mm512_mask_cmpeq_epi8_mask(high_mask, high_tag2, m512_0)); + } + if (sizeof(node) / sizeof(__m512i) == 2) { + __m512i m512_1 = + _mm512_load_si512(&data[sizeof(__m512i) / sizeof(Pointer) - 1]); + results.mask[1] = + (_mm512_mask_cmpeq_epi8_mask(low_mask, low_tag, m512_1) | + _mm512_mask_cmpeq_epi8_mask(high_mask, high_tag, m512_1) | + _mm512_mask_cmpeq_epi8_mask(high_mask, high_tag2, m512_1)); + } + // 0b1100 0000 + constexpr char compress_mask = static_cast(0xC0); + mask_type compressed = + _mm_cmpeq_epi8_mask(results.m128, _mm_set1_epi8(compress_mask)); + + return (compressed & match_nonempty()); + } + + template ::type = true> + mask_type match_tag_impl(tag_type tag) const { + /// TODO: test this on a machine with real five-level paging + static_assert(sizeof(node) / sizeof(__m512i) == 1 || + sizeof(node) / sizeof(__m512i) == 2, + ""); + union { + __m128i m128; + std::array mask{}; + } results{}; + + // 0b1000 0000 + constexpr std::uint64_t mask = 0x8080808080808080; + __m512i my_tag = _mm512_set1_epi8(static_cast(tag)); + __m512i my_tag2 = _mm512_set1_epi8(static_cast(tag) | mark_bit); + { + __m512i m512_0 = _mm512_load_si512(&meta); + results.mask[0] = _mm512_mask_cmpeq_epi8_mask(mask, my_tag, m512_0) | + _mm512_mask_cmpeq_epi8_mask(mask, my_tag2, m512_0); + } + if (sizeof(node) / sizeof(__m512i) == 2) { + __m512i m512_1 = + _mm512_load_si512(&data[sizeof(__m512i) / sizeof(Pointer) - 1]); + results.mask[1] = _mm512_mask_cmpeq_epi8_mask(mask, my_tag, m512_1) | + _mm512_mask_cmpeq_epi8_mask(mask, my_tag2, m512_1); + } + // 0b1000 0000 + constexpr char compress_mask = static_cast(0xB0); + mask_type compressed = + _mm_cmpeq_epi8_mask(results.m128, _mm_set1_epi8(compress_mask)); + + return (compressed & match_nonempty()); + } + + mask_type match_tag(tag_type tag) const { + return match_tag_impl(tag); + } + + mask_type match_empty() const { + static_assert(sizeof(node) / sizeof(__m512i) == 1 || + sizeof(node) / sizeof(__m512i) == 2, + ""); + std::array empty{}; + + __m512i m512_0 = _mm512_load_si512(&meta); + empty[0] = _mm512_cmpeq_epu64_mask(_mm512_set1_epi64(0L), m512_0) | + _mm512_cmpeq_epu64_mask(_mm512_set1_epi64(mark_bit), m512_0); + if (sizeof(node) / sizeof(__m512i) == 2) { + __m512i m512_1 = + _mm512_load_si512(&data[sizeof(__m512i) / sizeof(Pointer) - 1]); + empty[1] = _mm512_cmpeq_epu64_mask(_mm512_set1_epi64(0L), m512_1) | + _mm512_cmpeq_epu64_mask(_mm512_set1_epi64(mark_bit), m512_1); + } + + return ((empty[0] | (empty[1] << 8)) & 0xFFFE); + } + + mask_type match_nonempty() const { return (~match_empty() & 0xFFFE); } + + static size_t consume_mask(mask_type& match_result) { + size_t tz = __tzcnt_u16(match_result); + match_result ^= (0x0001 << tz); + return tz - 1; + } + + // for debugging + friend std::ostream& operator<<(std::ostream& out, node const& node) { + out << std::bitset<64>(node.meta.load()) << "\n"; + for (size_t i = 0; i < field_cnt - 1; i++) { + out << std::bitset(node.data[i].tag()) << "\t" + << node.data[i].pointer() << "\n"; + } + return out; + } + + private: + /// TODO: use this helper function to improve readibility + static char match_epi8_in_epi64(__m512i m512, std::uint8_t u8, + size_t bias) { + dynamic_assert(bias <= 7, ""); + union { + __m128i m128; + std::array u64; + } result; + long long mask = (0x0101010101010101 << bias); + result.u64[0] = + _mm512_mask_cmpeq_epi8_mask(mask, m512, _mm512_set1_epi8(u8)); + return _mm_cmpgt_epi8_mask(result.m128, _mm_set1_epi8(0)); + } + }; + + static_assert(sizeof(node) == field_cnt * sizeof(void*), ""); + static_assert(sizeof(node) % 64 == 0, ""); + static_assert(alignof(node) % 64 == 0, ""); + + struct entry_pointer { + node_pointer np{nullptr}; + size_t idx{field_cnt - 1}; + + entry_pointer() = default; + entry_pointer(node_pointer p, size_t i) : np{p}, idx{i} {} + static entry_pointer next(entry_pointer ep, node_alloc const& a) { + dynamic_assert(ep.np != nullptr && ep.idx < field_cnt - 1, ""); + ++ep.idx; + if (ep.idx < field_cnt - 1) return ep; + ep.np = ep.np->next_node(a); + ep.idx = (ep.np != nullptr) ? 0 : field_cnt - 1; + return ep; + } + }; + + struct bucket_tuple { + std::uint64_t old_svar; + size_t b_cnt; + node_pointer bucket1; + node_pointer bucket2; + node_pointer my_bucket; + + std::unique_lock guard1; + std::unique_lock guard2; + + bucket_tuple() = default; + bucket_tuple(std::uint64_t s, size_t cnt, node_pointer b1, node_pointer b2, + node_pointer mb, + std::unique_lock&& g1 = std::unique_lock{}, + std::unique_lock&& g2 = std::unique_lock{}) + : old_svar{s}, + b_cnt{cnt}, + bucket1{b1}, + bucket2{b2}, + my_bucket{mb}, + guard1{std::move(g1)}, + guard2{std::move(g2)} {} + bucket_tuple(bucket_tuple const&) = delete; + bucket_tuple(bucket_tuple&&) = default; + bucket_tuple& operator=(bucket_tuple const&) = delete; + bucket_tuple& operator=(bucket_tuple&&) = default; + }; + + class accessor { + private: + friend hashptr_map; + friend class iterator; + + // help end relocation + hashptr_map* map; + + // buckets that the key may fall in + bucket_tuple tuple; + + // Location of looked-up key + entry_pointer loc{}; + + public: + explicit accessor(hashptr_map& m) : map{&m} {} + accessor(accessor const&) = delete; + accessor& operator=(accessor const&) = delete; + accessor(accessor&&) = default; + accessor& operator=(accessor&&) = default; + ~accessor() { + { + std::unique_lock g1{}; + std::unique_lock g2{}; + tuple.guard2.swap(g2); + tuple.guard1.swap(g1); + } + map->help_end_relocation(); + } + + Pointer pointer() const { + return (loc.np == nullptr) ? nullptr + : loc.np->load_entry(loc.idx).pointer(); + } + + void set_pointer(Pointer p) { + if (p == nullptr) { + erase(); + } else if (loc.np == nullptr) { + // Since relocation may have been done by lookup(), + // we must guarantee Pointer p is inserted to the correct bucket. + Hash h = map->hasher(map->extract(p)); + loc = map->allocate_empty_entry(tuple.my_bucket); + loc.np->set_entry(loc.idx, map->make_tag(h), p); + } else { + Pointer old_p = loc.np->load_entry(loc.idx).pointer(); + dynamic_assert(map->equal(map->extract(p), map->extract(old_p)), ""); + loc.np->set_entry(loc.idx, p); + } + } + + void erase() { + if (loc.np != nullptr) loc.np->erase_entry(loc.idx); + } + + private: + explicit accessor(hashptr_map& m, bucket_tuple&& t, entry_pointer l) + : map{&m}, tuple{std::move(t)}, loc{l} {} + }; + + public: + class lockless_t {}; + static constexpr lockless_t lockless{}; + class acquire_lock_t {}; + static constexpr acquire_lock_t acquire_lock{}; + + // We iterate through hashmap by + class iterator { + public: + iterator() = delete; + iterator(iterator const&) = delete; + iterator& operator=(iterator const&) = delete; + iterator(iterator&&) = default; + iterator& operator=(iterator&&) = default; + + iterator& operator++() { + acc.loc = entry_pointer::next(acc.loc, acc.map->alloc); + while (true) { + acc.loc = skip_visited(acc.loc); + // Found unvisited entry + if (acc.loc.np != nullptr) return *this; + if (acc.tuple.my_bucket == acc.tuple.bucket1) { + acc.tuple.my_bucket = acc.tuple.bucket2; + acc.loc = skip_visited(entry_pointer{acc.tuple.my_bucket, 0}); + if (acc.loc.np != nullptr) return *this; + } + // All entries visited, reset lock and goto next bucket + inc_hash(); + acc.tuple = bucket_tuple{}; + // inc_hash() overflow, end of iteration + if (hash == Hash{}) break; + + acc.tuple = acc.map->load_and_lock_buckets(hash); + acc.tuple.my_bucket = acc.tuple.bucket1; + acc.loc = entry_pointer{acc.tuple.my_bucket, 0}; + } + *this = iterator{*acc.map}; + return *this; + } + + // No copy because iterator holds locks. + iterator operator++(int) = delete; + + // Only two end iterators are equal + // Two iterators cannot be equal as they acquire locks on buckets + bool operator==(iterator const& rhs) const { + return (acc.loc.np == nullptr) && (rhs.acc.loc.np == nullptr); + } + + bool operator!=(iterator const& rhs) const { return !operator==(rhs); } + + accessor& operator*() { return acc; } + + accessor const& operator*() const { return acc; } + + accessor* operator->() { return &acc; } + + accessor const* operator->() const { return &acc; } + + private: + friend hashptr_map; + + accessor acc; + Hash hash; + Hash rev_hash; + Hash rev_lim; + + // End iterator + iterator(hashptr_map& map) : acc{map}, hash{}, rev_hash{} {} + + iterator(hashptr_map& map, Hash h, Hash lim = -1UL) + : acc{map}, + hash{h}, + rev_hash{reverse_bits(h)}, + rev_lim{reverse_bits(lim)} { + while (true) { + acc.tuple = acc.map->load_and_lock_buckets(hash); + acc.tuple.my_bucket = acc.tuple.bucket1; + acc.loc = skip_visited(entry_pointer{acc.tuple.my_bucket, 0}); + // Found unvisited entry in bucket1 + if (acc.loc.np != nullptr) return; + acc.tuple.my_bucket = acc.tuple.bucket2; + acc.loc = skip_visited(entry_pointer{acc.tuple.my_bucket, 0}); + // Found unvisited entry in bucket2 + if (acc.loc.np != nullptr) return; + + // All entries visited, reset lock and goto next bucket + inc_hash(); + acc.tuple = bucket_tuple{}; + // inc_hash() overflow, end of iteration + if (hash == Hash{}) break; + } + *this = iterator{map}; + } + + // For 2^N buckets, low N bits determines the bucket + // To increment the bucket index, + // we need to increment the (N-1)th highest bit in reversed hash, + // and clear all lower bits. + // Must be called when iterator holds a valid accessor! + void inc_hash() { + std::uint64_t v = reverse_bits(acc.tuple.b_cnt >> 1); + rev_hash &= ~(v - 1); + rev_hash += v; + hash = reverse_bits(rev_hash); + } + + /// TODO: examine if we may miss some entries + bool visited(entry_pointer pos) { + dynamic_assert(pos.np != nullptr, ""); + Pointer ptr = pos.np->load_entry(pos.idx).pointer(); + Hash h = acc.map->hasher(acc.map->extract(ptr)); + h = reverse_bits(h); + return (h < rev_hash) || (rev_lim < h); + } + + // Seek first non-empty entry in the bucket from pos. + entry_pointer skip_empty(entry_pointer pos) { + if (pos.np == nullptr) return entry_pointer{}; + if (pos.np->load_entry(pos.idx).pointer() != nullptr) return pos; + pos = entry_pointer::next(pos, acc.map->alloc); + return skip_empty(pos); + } + + // Seek first non-empty entry not visited yet in the bucket from pos. + entry_pointer skip_visited(entry_pointer pos) { + pos = skip_empty(pos); + if (pos.np == nullptr) return entry_pointer{}; + if (!visited(pos)) return pos; + return skip_visited(entry_pointer::next(pos, acc.map->alloc)); + } + }; + + hashptr_map(size_t buckets, KeyExtract const& ext, + HashFunc const& hash = HashFunc{}, + KeyEqual const& eq = KeyEqual{}, Allocator const& a = Allocator{}) + : alloc{a}, hasher{hash}, equal{eq}, extract{ext} { + node_blocks[0] = &embedded_block[0]; + size_t b_cnt = bucket_count(); + while (b_cnt < buckets) { + node_pointer new_block = node_alloc_traits::allocate(alloc, b_cnt); + for (size_t i = 0; i < b_cnt; i++) + node_alloc_traits::construct(alloc, new_block + i, false); + node_blocks[block_index(b_cnt)] = new_block; + active_blocks.fetch_add(active_blocks.load() + 1); + b_cnt = bucket_count(); + } + } + + ~hashptr_map() { + for (size_t i = 1; i < node_blocks.size(); i++) { + node_pointer old_block = node_blocks[i]; + if (old_block == nullptr) continue; + size_t cnt = (embed_cnt << i) >> 1; + for (size_t i = 0; i < cnt; i++) { + node_pointer np = old_block + i; + node::remove_appended_nodes(np, alloc); + node_alloc_traits::destroy(alloc, np); + } + node_alloc_traits::deallocate(alloc, old_block, cnt); + } + for (size_t i = 0; i < embedded_block.size(); i++) + node::remove_appended_nodes(&embedded_block.at(i), alloc); + } + + Pointer lookup(Key const& key, lockless_t) const { + Hash h = hasher(key); + tag_type tag = make_tag(h); + while (true) { + Pointer p = nullptr; + bucket_tuple tuple = load_buckets(h); + if (!is_relocating(tuple.old_svar)) { + p = search_backward(key, tag, tuple.bucket1); + } else { + // Relocation from bucket1 to bucket2 + using std::swap; + if (is_shrk_proc(tuple.old_svar)) swap(tuple.bucket1, tuple.bucket2); + + // Look up the moved-from bucket first, then moved-to bucket. + p = search_backward(key, tag, tuple.bucket1); + p = (p != nullptr) ? p : search_backward(key, tag, tuple.bucket2); + } + + // svar not incremented, lookup result is valid, return. + if (tuple.old_svar == svar.load()) return p; + } + } + + // deref_lim should be larger than 1. + // 1 is merely enough to maintain load factor at current level + // if we keep inserting and try_double_capacity(). + accessor lookup(Key const& key, acquire_lock_t, size_t deref_lim = 4) { + Hash h = hasher(key); + bucket_tuple tuple = load_and_lock_buckets(h); + + // Help relocation + if (is_relocating(tuple.old_svar)) { + if (tuple.bucket1->meta_mark() != mark_of(tuple.old_svar)) + deref_lim -= + is_exp_proc(tuple.old_svar) + ? relocate_bucket(tuple.bucket2, tuple.bucket1, deref_lim) + : relocate_bucket(tuple.bucket1, tuple.bucket2, deref_lim); + relocate_global(deref_lim, tuple.old_svar); + } + + // Actual lookup, done after relocation to prevent loc from + // being invalidated. + tag_type tag = make_tag(h); + entry_pointer loc{}; + if (!is_relocating(tuple.old_svar)) { + loc = search_forward(key, tag, tuple.bucket1); + } else { + loc = search_forward(key, tag, tuple.bucket1); + if (loc.np == nullptr) loc = search_forward(key, tag, tuple.bucket2); + } + return accessor{*this, std::move(tuple), loc}; + } + + iterator begin() { return iterator{*this, 0}; } + + iterator end() { return iterator{*this}; } + + // Estimate load factor by checking embedded nodes + double approx_load_factor() const { + size_t cnt = 0; + for (size_t i = 0; i < embed_cnt; i++) + cnt += entry_count(const_cast(&embedded_block[i])); + return static_cast(cnt) / ((field_cnt - 1) * embed_cnt); + } + + // Triggers rehash by write threads. + // Return false if rehash is under progress. + bool try_double_capacity() { + std::uint64_t old_svar = svar.load(); + if (old_svar % 8 != 0 || + !svar.compare_exchange_strong(old_svar, old_svar + 1)) + return false; + + old_svar = svar.load(); + size_t cnt = bucket_count(); + node_pointer new_block = node_alloc_traits::allocate(alloc, cnt); + for (size_t i = 0; i < cnt; i++) { + // Newly allocated buckets do not need relocation. + // For convenience, the new bucket has same mark as its conjugated bucket, + // the mark is cleared after scan + node_alloc_traits::construct(alloc, new_block + i, !mark_of(old_svar)); + } + // activate new block + node_blocks[block_index(cnt)] = new_block; + dynamic_assert(svar.load() % 4 == 1, ""); + pvar.store(0UL); + svar.fetch_add(1UL); + return true; + } + + // Triggers rehash by write threads. + // Return false if rehash is under progress. + bool try_halve_capacity() { + std::uint64_t old_svar = svar.load(); + if (old_svar % 8 != 0 || + !svar.compare_exchange_strong(old_svar, old_svar + 5)) { + return false; + } + if (active_blocks.load() == 0x0001UL) { + // Cannot shrink embedded block + svar.fetch_add(3U); + return false; + } + // The last block is logically deactivated. + // A bucket pair forms a logical bucket internally. + active_blocks.fetch_sub((active_blocks.load() + 1) >> 1); + dynamic_assert(svar.load() % 4 == 1, ""); + pvar.store(0UL); + svar.fetch_add(1); + return true; + } + + size_t help_relocate() { + size_t old_svar = svar.load(); + if (!is_relocating(old_svar)) return 0; + + size_t n = relocate_global(-1UL, svar.load()); + help_end_relocation(); + return n; + } + + // Logical count of buckets + // During relocation, a pair of conjugated buckets are considered as + // a logical bucket. + // After shrinkage, the higher half of buckets are removed. + // After expansion, the pair is seperated. + size_t bucket_count() const { + std::uint64_t active = active_blocks.load(); + return embed_cnt * (active + 1) / 2; + } + + private: + static tag_type make_tag(Hash h) { return (h >> (pointer_bits + 1)); } + + enum class stage : std::uint64_t { + // No rehashing + stable = 0, + + // Initializing expansion, allocate space + exp_init = 1, + // Processing expansion, relocating entries + exp_proc = 2, + // Finalizing expansion + exp_fin = 3, + + // Block rehashing + blocking = 4, + + // Initializing shrinkage + shrk_init = 5, + // Processing shrinkage, relocating entries + shrk_proc = 6, + // Finalizing shrinkage, modify node_block pointers + shrk_fin = 7 + }; + // Initialization and finalizing stage are ignored by accessors. + // Accessors help relocation during processing stage. + + static stage stage_of(std::uint64_t old_svar) { + return static_cast(old_svar % 8); + } + + // Mark of svar is flipped each time + // try_halve_capacity() or try_double_capacity() is called. + // Relocation will check buckets and flip their marks accordingly. + static bool mark_of(std::uint64_t old_svar) { + return ((old_svar + 7) / 8) % 2 == 1; + } + + static bool is_relocating(std::uint64_t old_svar) { + return (is_exp_proc(old_svar) || is_shrk_proc(old_svar)); + } + + static bool is_exp_proc(std::uint64_t old_svar) { + return (stage_of(old_svar) == stage::exp_proc); + } + + static bool is_shrk_proc(std::uint64_t old_svar) { + return (stage_of(old_svar) == stage::shrk_proc); + } + + static size_t block_index(size_t bucket_index) { + return 64 - __lzcnt64(bucket_index / embed_cnt); + } + + static size_t block_offset(size_t bucket_index) { + return bucket_index - ((1UL << block_index(bucket_index)) >> 1) * embed_cnt; + } + + node_pointer locate_bucket(size_t idx) const { + size_t block_idx = block_index(idx); + size_t block_off = block_offset(idx); + node_pointer base = node_blocks[block_idx]; + return base + block_off; + } + + // Load a consistent triplet of + bucket_tuple load_buckets(Hash h) const { + while (true) { + std::uint64_t old_svar = svar.load(); + size_t b_cnt = bucket_count(); + node_pointer b1 = locate_bucket(h % b_cnt); + node_pointer b2 = locate_bucket(h % b_cnt + b_cnt); + + /// TODO: Investigate this carefully!!!! + node_pointer b = + is_exp_proc(old_svar) ? locate_bucket(h % (b_cnt * 2)) : b1; + // Initializing stages and finalizing stages are dangerous zones! + /// TODO: Investigate extremely carefully and optimize. + if (old_svar != svar.load() || old_svar % 2 != 0) { + mm_pause(); + continue; + } + if (!is_relocating(old_svar)) + return bucket_tuple{old_svar, b_cnt, b1, nullptr, b}; + else + return bucket_tuple{old_svar, b_cnt, b1, b2, b}; + } + } + + bucket_tuple load_and_lock_buckets(Hash h) { + while (true) { + std::uint64_t old_svar = svar.load(); + size_t b_cnt = bucket_count(); + node_pointer b1 = locate_bucket(h % b_cnt); + node_pointer b2 = locate_bucket(h % b_cnt + b_cnt); + + node_pointer mb = + is_exp_proc(old_svar) ? locate_bucket(h % (b_cnt * 2)) : b1; + if (!is_relocating(old_svar)) { + /// TODO: mm_pause()? + std::unique_lock g1{*b1, std::try_to_lock}; + if (!g1.owns_lock()) continue; + if (old_svar != svar.load() || old_svar % 2 != 0) continue; + + return bucket_tuple{old_svar, b_cnt, b1, nullptr, mb, std::move(g1)}; + } else { + std::unique_lock g1{*b1, std::try_to_lock}; + if (!g1.owns_lock()) continue; + std::unique_lock g2{*b2, std::try_to_lock}; + if (!g2.owns_lock()) continue; + if (old_svar != svar.load() || old_svar % 2 != 0) continue; + + return bucket_tuple{old_svar, b_cnt, b1, b2, + mb, std::move(g1), std::move(g2)}; + } + } + } + + entry_pointer allocate_empty_entry(node_pointer np) { + dynamic_assert(np != nullptr, ""); + for (size_t i = 0; i < field_cnt - 1; i++) + if (np->load_entry(i).pointer() == nullptr) return entry_pointer{np, i}; + np = (np->next_node(alloc) == nullptr) ? np->append_new_node(alloc) + : np->next_node(alloc); + return allocate_empty_entry(np); + } + + size_t entry_count(node_pointer node) const { + if (node == nullptr) return 0; + size_t cnt = 0; + for (size_t i = 0; i < field_cnt - 1; i++) + if (node->load_entry(i).pointer() != nullptr) ++cnt; + return cnt + entry_count(node->next_node(alloc)); + } + + void print(node_pointer node) { + std::cout << std::endl; + if (node == nullptr) return; + for (size_t i = 0; i < field_cnt - 1; i++) { + Pointer p = node->load_entry(i).pointer(); + if (p == nullptr) + std::cout << "nullptr\t"; + else + std::cout << extract(p) << "\t"; + } + print(node->next_node(alloc)); + } + + static void relocate(node_pointer dst_node, size_t dst_idx, + node_pointer src_node, size_t src_idx) { + tagged_pointer tp = src_node->load_entry(src_idx); + dynamic_assert(tp.pointer() != nullptr, ""); + // Copy then erase, so that lockless readers will not miss the entry + dst_node->set_entry(dst_idx, tp); + src_node->erase_entry(src_idx); + } + + void compress(node_pointer bkt) { + std::deque empties; + node_pointer np = bkt; + while (np != nullptr) { + for (size_t i = 0; i < field_cnt - 1; i++) { + tagged_pointer tp = np->load_entry(i); + if (tp.pointer() == nullptr) { + // Empty entry, reserved for compaction + empties.emplace_back(np, i); + continue; + } + + if (empties.empty()) continue; + + // Relocate + auto dst = empties.front(); + empties.pop_front(); + relocate(dst.np, dst.idx, np, i); + empties.emplace_back(np, i); + } + np = np->next_node(alloc); + } + node::remove_empty_nodes(bkt, alloc); + } + + // Recursively flip marks in a bucket that won't need relocation + // During shrinkage, lower half of buckets does not need relocation, + // but their mark has to be flipped. + void flip_mark(node_pointer node) { + if (node == nullptr) return; + + flip_mark(node->next_node(alloc)); + + for (size_t i = 0; i < field_cnt - 1; i++) node->entry_mark_flip(i); + + node->meta_mark_flip(); + return; + } + + // Scan and relocate entries in sub-chain led by src_node in src_bucket + // It's guaranteed that the mark of dst_bucket and src_bucket are flipped + // together. + size_t relocate_helper(node_pointer dst_bucket, node_pointer src_bucket, + node_pointer src_node, size_t deref_lim) { + if (src_node == nullptr) return 0; + + std::uint64_t old_svar = svar.load(); + dynamic_assert(is_relocating(old_svar), ""); + bool mark = mark_of(old_svar); + dynamic_assert(src_bucket->meta_mark() != mark, ""); + dynamic_assert(dst_bucket->meta_mark() != mark, ""); + + // The node already scanned and relocated + if (src_node->meta_mark() == mark) return 0; + + size_t deref_cnt = relocate_helper(dst_bucket, src_bucket, + src_node->next_node(alloc), deref_lim); + deref_lim -= deref_cnt; + dynamic_assert(old_svar == svar.load(), ""); + if (deref_lim == 0) return deref_cnt; + + size_t b_cnt = bucket_count(); + + for (size_t i = 0; i < field_cnt - 1; i++) { + if (deref_lim == 0) return deref_cnt; + + // Already relocated, skip + if (src_node->entry_mark(i) == mark) continue; + + src_node->entry_mark_flip(i); + tagged_pointer tp = src_node->load_entry(i); + + // Empty entry, skip + if (tp.pointer() == nullptr) continue; + + // extract() is the potential bottleneck + --deref_lim; + ++deref_cnt; + Hash h = hasher(extract(tp.pointer())); + + // No need to relocate, skip + if (h % (b_cnt * 2) < b_cnt) continue; + + // Relocation necessary, find an empty entry + entry_pointer dst = allocate_empty_entry(dst_bucket); + relocate(dst.np, dst.idx, src_node, i); + + dynamic_assert(old_svar == svar.load(), ""); + } + // Whole node scanned + // When src_node is the leading node, + // flip the mark means the whole bucket is relocated. + src_node->meta_mark_flip(); + + // Relocation is done + if (src_bucket == src_node) { + compress(src_bucket); + compress(dst_bucket); + flip_mark(dst_bucket); + } + + dynamic_assert(old_svar == svar.load(), ""); + return deref_cnt; + } + + size_t relocate_bucket(node_pointer dst_bucket, node_pointer src_bucket, + size_t deref_lim) { + return relocate_helper(dst_bucket, src_bucket, src_bucket, deref_lim); + } + + void help_end_relocation() { + std::uint64_t old_svar = svar.load(); + if (!is_relocating(old_svar)) return; + + // Not done yet. + size_t b_cnt = bucket_count(); + if (pvar.load() != b_cnt) return; + + // Only one thread is allowed to continue + if (!pvar.compare_exchange_strong(b_cnt, -1UL)) return; + if (is_exp_proc(old_svar)) { + // End of expansion + svar.fetch_add(1); + active_blocks.fetch_add(active_blocks.load() + 1); + svar.fetch_add(5); + } else { + dynamic_assert(is_shrk_proc(old_svar), ""); + // End of shrinkage + svar.fetch_add(1UL); + node_pointer old_block = node_blocks[block_index(b_cnt)]; + node_blocks[block_index(b_cnt)] = nullptr; + svar.fetch_add(1UL); + std::this_thread::sleep_for(std::chrono::milliseconds{1000}); + for (size_t i = 0; i < b_cnt; i++) { + node_pointer np = old_block + i; + node::remove_appended_nodes(np, alloc); + node_alloc_traits::destroy(alloc, np); + } + node_alloc_traits::deallocate(alloc, old_block, b_cnt); + } + } + + // When bucket associated with accessor is relocated, + // accessor helps relocate entries in other buckets. + // Since locks are acquired inside, we have to pass old_svar from outside + /// TODO: can we load old_svar inside? + size_t relocate_global(size_t deref_lim, std::uint64_t old_svar) { + // Logical bucket count + size_t b_cnt = bucket_count(); + size_t deref_cnt = 0; + + for (size_t idx = pvar.load(); idx < b_cnt && deref_lim != 0; idx++) { + // Loading a new bucket pair also counts as a dereference + deref_lim--; + bucket_tuple tuple = load_buckets(idx); + if (old_svar != tuple.old_svar) return deref_cnt; + std::unique_lock guard1{*tuple.bucket1, std::try_to_lock}; + if (!guard1.owns_lock()) { + idx += __rdtsc() % (b_cnt - idx); + continue; + } + std::unique_lock guard2{*tuple.bucket2, std::try_to_lock}; + if (!guard2.owns_lock()) continue; + + if (tuple.bucket1->meta_mark() != mark_of(old_svar)) { + // Both buckets locked, + // meta_mark indicates necessity for relocation + // svar not incremented + dynamic_assert(is_relocating(old_svar), ""); + size_t cnt = + is_exp_proc(old_svar) + ? relocate_bucket(tuple.bucket2, tuple.bucket1, deref_lim) + : relocate_bucket(tuple.bucket1, tuple.bucket2, deref_lim); + deref_lim -= cnt; + deref_cnt += cnt; + } + + // Relocated bucket pair, refresh relocation progress + // pvar is protected by lock on bucket indexed by idx1 + if (tuple.bucket1->meta_mark() == mark_of(old_svar)) + if (idx == pvar.load()) pvar.fetch_add(1U); + } + return deref_cnt; + } + + // Recursively traverse a bucket backwards. + // Since compaction only moves entry from back of a bucket to front, + // we avoid missing an entry during rehash. + Pointer search_backward(Key const& key, tag_type tag, node_pointer np) const { + if (np == nullptr) return nullptr; + + Pointer p = search_backward(key, tag, np->next_node(alloc)); + if (p != nullptr) return p; + + // Next node does not exist or does not contain entry of interest. + // Lookup key in current node. + for (size_t i = 0; i < field_cnt - 1; i++) { + size_t idx = field_cnt - 2 - i; + tagged_pointer tp = np->load_entry(idx); + + if (tp.pointer() == nullptr || tp.tag() != tag) continue; + if (!equal(extract(tp.pointer()), key)) continue; + return tp.pointer(); + } + return nullptr; + } + + // Traverse a bucket in normal order. + entry_pointer search_forward(Key const& key, tag_type tag, + node_pointer node) { + if (node == nullptr) return entry_pointer{}; + + mask_type match = node->match_tag(tag); + for (size_t idx = node::consume_mask(match); idx < field_cnt - 1; + idx = node::consume_mask(match)) + if (equal(extract(node->load_entry(idx).pointer()), key)) + return entry_pointer{node, idx}; + + return search_forward(key, tag, node->next_node(alloc)); + } + + private: + node_alloc alloc; + // Nodes are organized into blocks. + // First embed_cnt nodes are put in embedded_block, + // and when rehashed, new nodes are allocated into node_blocks. + // allocated_nodes[0] points to embedded_block and + // allocated_nodes[k] contains embed_cnt * 2^(k-1) nodes + std::array embedded_block{}; + std::array node_blocks{}; + // Initially only embedded_block is active + std::atomic_uint64_t active_blocks{1U}; + + // state variable indicating the map shrinking or expanding + std::atomic_uint64_t svar{0UL}; + // progress variable indicating relocation progress + // 0 for start, -1 for finish + std::atomic pvar{-1UL}; + + typename maybe_add_pointer::type hasher; + typename maybe_add_pointer::type equal; + typename maybe_add_pointer::type extract; +}; + +} // namespace KVDK_NAMESPACE diff --git a/engine/experimental/vhash.cpp b/engine/experimental/vhash.cpp new file mode 100644 index 00000000..24c4fa70 --- /dev/null +++ b/engine/experimental/vhash.cpp @@ -0,0 +1,129 @@ +#include "vhash.hpp" + +namespace KVDK_NAMESPACE { + +bool VHash::Get(StringView key, StringView& value) { + VHashKV* kvp = hpmap.lookup(key, hpmap.lockless); + if (kvp == nullptr) return false; + value = kvp->Value(); + return true; +} + +void VHash::Put(StringView key, StringView value) { + VHashKV* new_kv = kvb.NewKV(key, value); + VHashKV* old_kv = nullptr; + { + auto acc = hpmap.lookup(key, hpmap.acquire_lock); + old_kv = acc.pointer(); + acc.set_pointer(new_kv); + } + if (old_kv == nullptr) sz.fetch_add(1LL); + kvb.Recycle(old_kv); +} + +void VHash::Delete(StringView key) { + VHashKV* old_kv = nullptr; + { + auto acc = hpmap.lookup(key, hpmap.acquire_lock); + old_kv = acc.pointer(); + acc.erase(); + } + if (old_kv != nullptr) sz.fetch_sub(1LL); + kvb.Recycle(old_kv); +} + +void VHash::deleteAll() { + kvdk_assert(ref_cnt.load() == 0UL, "Iterator outlives VHash!"); + for (auto iter = hpmap.begin(); iter != hpmap.end(); ++iter) { + VHashKV* old_kv = iter->pointer(); + iter->erase(); + // It's safe to call Delete() instead of Recycle() here, + // As deleteAll() is called by cleaner. + kvb.Delete(old_kv); + sz.fetch_sub(1LL); + } + kvdk_assert(sz.load() == 0LL, ""); +} + +bool VHash::Modify(StringView key, VHash::ModifyFunc modify, void* cb_args, + VHash::Cleanup cleanup) { + VHashKV* old_kv = nullptr; + ModifyOperation op; + { + auto acc = hpmap.lookup(key, hpmap.acquire_lock); + old_kv = acc.pointer(); + StringView new_value; + StringView old_value; + old_value = old_kv ? old_kv->Value() : old_value; + op = modify(old_kv ? &old_value : nullptr, new_value, cb_args); + switch (op) { + case ModifyOperation::Write: { + VHashKV* new_kv = kvb.NewKV(key, new_value); + acc.set_pointer(new_kv); + if (old_kv == nullptr) sz.fetch_add(1LL); + kvb.Recycle(old_kv); + break; + } + case ModifyOperation::Delete: { + kvdk_assert(old_kv != nullptr, "Invalid callback!"); + acc.erase(); + sz.fetch_sub(1LL); + kvb.Recycle(old_kv); + break; + } + case ModifyOperation::Noop: + case ModifyOperation::Abort: { + break; + } + } + cleanup(new_value); + } + return (op != ModifyOperation::Abort); +} + +void VHash::Iterator::SeekToFirst() { pos = owner.hpmap.begin(); } + +void VHash::Iterator::Next() { + if (Valid()) ++pos; +} + +bool VHash::Iterator::Valid() const { return (pos != owner.hpmap.end()); } + +std::string VHash::Iterator::Key() const { + VHashKV* kv = pos->pointer(); + StringView key = kv->Key(); + return std::string(key.data(), key.size()); +} + +std::string VHash::Iterator::Value() const { + VHashKV* kv = pos->pointer(); + StringView value = kv->Value(); + return std::string(value.data(), value.size()); +} + +std::unique_ptr VHash::MakeIterator() { + // Initialized to end() iterator without acquiring lock. + return std::unique_ptr{new Iterator{*this, hpmap.end()}}; +} + +VHashBuilder::VHashBuilder(OldRecordsCleaner& c) : cleaner{c} { + cleaner.RegisterDelayDeleter(*this); +} + +VHash* VHashBuilder::NewVHash(StringView name, VHashKVBuilder& kvb, + size_t capacity) { + return new VHash{name, kvb, capacity}; +} + +void VHashBuilder::Recycle(VHash* vhash) { + if (vhash == nullptr) return; + cleaner.DelayDelete(*this, vhash); +} + +void VHashBuilder::Delete(void* vhash) { + VHash* vh = static_cast(vhash); + vh->deleteAll(); + delete vh; +} + +} // namespace KVDK_NAMESPACE diff --git a/engine/experimental/vhash.hpp b/engine/experimental/vhash.hpp new file mode 100644 index 00000000..c50d84ea --- /dev/null +++ b/engine/experimental/vhash.hpp @@ -0,0 +1,101 @@ +#pragma once + +#include +#include +#include + +#include "../version/old_records_cleaner.hpp" +#include "hashptr_map.hpp" +#include "kvdk/iterator.hpp" +#include "vhash_kv.hpp" + +namespace KVDK_NAMESPACE { +/// TODO: Support dynamically choose a allocator when creating VHash +/// Currently VHash allocate KVs by VHashKVBuilder, +/// which can bind to other allocators, +/// but VHashBuilder does not support custom allocator for +/// hashptr_map and name +class VHash { + private: + hashptr_map hpmap; + VHashKVBuilder& kvb; + std::atomic_int64_t sz{0LL}; + std::string name; + // Number of iterators alive + // This is a temporary workaround for iterator outliving VHash. + /// TODO: Access token in KVDK for iterators + std::atomic_uint64_t ref_cnt{0ULL}; + + public: + VHash(StringView n, VHashKVBuilder& b, size_t capacity) + : hpmap{capacity / 4, VHashKV::ExtractKey}, + kvb{b}, + name{n.data(), n.size()} {} + + ~VHash() { deleteAll(); } + + StringView Name() const { return name; } + + static StringView ExtractName(VHash* vhash) { return vhash->Name(); } + + size_t Size() const { return sz.load(); } + + /// TODO: Reserve() API for performance. + // void Reserve(size_t n); + + bool Get(StringView key, StringView& value); + + void Put(StringView key, StringView value); + + void Delete(StringView key); + + // Cleanup is for cleaning up memory allocated by ModifyFunc. + using ModifyFunc = + std::function; + using Cleanup = std::function; + bool Modify(StringView key, ModifyFunc modify, void* cb_args, + Cleanup cleanup); + + class Iterator : public VHashIterator { + public: + void SeekToFirst() final; + void Next() final; + bool Valid() const final; + std::string Key() const final; + std::string Value() const final; + virtual ~Iterator() { owner.ref_cnt--; } + + private: + friend VHash; + using rep = typename decltype(hpmap)::iterator; + VHash& owner; + rep pos; + Iterator(VHash& o, rep&& p) : owner{o}, pos{std::move(p)} { + owner.ref_cnt++; + } + }; + + std::unique_ptr MakeIterator(); + + private: + friend class VHashBuilder; + // Called by VHashBuilder::Delete() to Delete all VHashKVs inside it. + void deleteAll(); +}; + +class VHashBuilder : public IDeleter { + private: + OldRecordsCleaner& cleaner; + + public: + VHashBuilder(OldRecordsCleaner& c); + + // Called by VHashGroup to create a VHash. + VHash* NewVHash(StringView name, VHashKVBuilder& b, size_t capacity = 16); + + void Recycle(VHash* vhash); + + void Delete(void* vhash) final; +}; + +} // namespace KVDK_NAMESPACE diff --git a/engine/experimental/vhash_group.cpp b/engine/experimental/vhash_group.cpp new file mode 100644 index 00000000..d9193961 --- /dev/null +++ b/engine/experimental/vhash_group.cpp @@ -0,0 +1,31 @@ +#include "vhash_group.hpp" + +namespace KVDK_NAMESPACE { + +VHashGroup::~VHashGroup() { + for (auto iter = hpmap.begin(); iter != hpmap.end(); ++iter) + delete iter->pointer(); +} + +bool VHashGroup::Create(StringView name, size_t capacity) { + auto acc = hpmap.lookup(name, hpmap.acquire_lock); + if (acc.pointer() != nullptr) return false; + VHash* vhash = vhb.NewVHash(name, kvb, capacity); + acc.set_pointer(vhash); + return true; +} + +bool VHashGroup::Destroy(StringView name) { + auto acc = hpmap.lookup(name, hpmap.acquire_lock); + VHash* vhash = acc.pointer(); + if (vhash == nullptr) return false; + acc.erase(); + vhb.Recycle(vhash); + return true; +} + +VHash* VHashGroup::Get(StringView name) { + return hpmap.lookup(name, hpmap.lockless); +} + +} // namespace KVDK_NAMESPACE diff --git a/engine/experimental/vhash_group.hpp b/engine/experimental/vhash_group.hpp new file mode 100644 index 00000000..80a38491 --- /dev/null +++ b/engine/experimental/vhash_group.hpp @@ -0,0 +1,37 @@ +#pragma once + +#include + +#include "../alias.hpp" +#include "hashptr_map.hpp" +#include "vhash.hpp" + +namespace KVDK_NAMESPACE { + +// A VHashGroup contains VHashes that share the same memory allocator for kvs. +/// TODO: Add hpmap_alloc to allocate memory for hashptr_maps. +class VHashGroup { + private: + IVolatileAllocator& kv_alloc; + OldRecordsCleaner& cleaner; + VHashKVBuilder kvb{kv_alloc, cleaner}; + VHashBuilder vhb{cleaner}; + + hashptr_map hpmap{ + 4, VHash::ExtractName}; + std::atomic_int64_t sz{0LL}; + + public: + VHashGroup(IVolatileAllocator& a, OldRecordsCleaner& c) + : kv_alloc{a}, cleaner{c} {} + + ~VHashGroup(); + + bool Create(StringView name, size_t capacity); + + bool Destroy(StringView name); + + VHash* Get(StringView name); +}; + +} // namespace KVDK_NAMESPACE diff --git a/engine/experimental/vhash_kv.cpp b/engine/experimental/vhash_kv.cpp new file mode 100644 index 00000000..46993724 --- /dev/null +++ b/engine/experimental/vhash_kv.cpp @@ -0,0 +1,29 @@ +#include "vhash_kv.hpp" + +#include "../version/old_records_cleaner.hpp" + +namespace KVDK_NAMESPACE { + +VHashKVBuilder::VHashKVBuilder(IVolatileAllocator& a, OldRecordsCleaner& c) + : alloc{a}, cleaner{c} { + c.RegisterDelayDeleter(*this); +} + +VHashKV* VHashKVBuilder::NewKV(StringView key, StringView value) { + void* dst = alloc.Allocate(sizeof(VHashKV) + key.size() + value.size()); + new (dst) VHashKV{key, value}; + return static_cast(dst); +} + +void VHashKVBuilder::Recycle(VHashKV* kv) { + if (kv == nullptr) return; + cleaner.DelayDelete(*this, kv); +} + +void VHashKVBuilder::Delete(void* obj) { + VHashKV* kv = static_cast(obj); + kv->~VHashKV(); + alloc.Deallocate(kv, sizeof(VHashKV) + kv->Key().size() + kv->Value().size()); +} + +} // namespace KVDK_NAMESPACE diff --git a/engine/experimental/vhash_kv.hpp b/engine/experimental/vhash_kv.hpp new file mode 100644 index 00000000..5b02c4d8 --- /dev/null +++ b/engine/experimental/vhash_kv.hpp @@ -0,0 +1,55 @@ +#pragma once + +#include + +#include "../alias.hpp" +#include "../allocator.hpp" +#include "../macros.hpp" +#include "../version/old_records_cleaner.hpp" + +namespace KVDK_NAMESPACE { + +/// TODO: Add timestamp field for MVCC if necessary +class VHashKV { + private: + std::uint32_t key_sz; + std::uint32_t value_sz; + char data[]; + + public: + VHashKV(StringView key, StringView value) { + kvdk_assert(key.size() <= std::numeric_limits::max(), ""); + kvdk_assert(value.size() <= std::numeric_limits::max(), ""); + key_sz = static_cast(key.size()); + value_sz = static_cast(value.size()); + memcpy(data, key.data(), key.size()); + memcpy(data + key_sz, value.data(), value.size()); + } + + StringView Key() const { return StringView{data, key_sz}; } + + StringView Value() const { return StringView{data + key_sz, value_sz}; } + + static StringView ExtractKey(VHashKV* kvp) { return kvp->Key(); } +}; + +class VHashKVBuilder : public IDeleter { + private: + IVolatileAllocator& alloc; + OldRecordsCleaner& cleaner; + + public: + VHashKVBuilder(IVolatileAllocator& a, OldRecordsCleaner& c); + VHashKVBuilder(VHashKVBuilder const&) = delete; + VHashKVBuilder(VHashKVBuilder&&) = default; + + VHashKV* NewKV(StringView key, StringView value); + + // Recycle VHashKV to OldRecordsCleaner for later deletion. + void Recycle(VHashKV* kv); + + // Called by OldRecordsCleaner to delete KV. + void Delete(void* kv) final; +}; + +} // namespace KVDK_NAMESPACE diff --git a/engine/hash_collection/hash_list.cpp b/engine/hash_collection/hash_list.cpp index e10bd1e6..fa65ab41 100644 --- a/engine/hash_collection/hash_list.cpp +++ b/engine/hash_collection/hash_list.cpp @@ -218,7 +218,7 @@ HashList::WriteResult HashList::SetExpireTime(ExpireTimeType expired_time, } DLRecord* pmem_record = DLRecord::PersistDLRecord( pmem_allocator_->offset2addr_checked(space.offset), space.size, timestamp, - RecordType::HashHeader, RecordStatus::Normal, + RecordType::HashRecord, RecordStatus::Normal, pmem_allocator_->addr2offset_checked(header), header->prev, header->next, header->Key(), header->Value(), expired_time); bool success = dl_list_.Replace(header, pmem_record); @@ -265,7 +265,7 @@ CollectionIDType HashList::FetchID(const DLRecord* record) { switch (record->GetRecordType()) { case RecordType::HashElem: return ExtractID(record->Key()); - case RecordType::HashHeader: + case RecordType::HashRecord: return DecodeID(record->Value()); default: GlobalLogger.Error("Wrong record type %u in HashListID", diff --git a/engine/hash_collection/hash_list.hpp b/engine/hash_collection/hash_list.hpp index 86425417..033bf691 100644 --- a/engine/hash_collection/hash_list.hpp +++ b/engine/hash_collection/hash_list.hpp @@ -82,6 +82,17 @@ class HashList : public Collection { // Notice: the deleting key should already been locked by engine WriteResult Delete(const StringView& key, TimestampType timestamp); + // Modify value of "key" in the hash list + // + // Args: + // * modify_func: customized function to modify existing value of key. See + // definition of ModifyFunc (types.hpp) for more details. + // * modify_args: customized arguments of modify_func. + // + // Return: + // Status::Ok if modify success. + // Status::Abort if modify function abort modifying. + // Return other non-Ok status on any error. WriteResult Modify(const StringView key, ModifyFunc modify_func, void* modify_args, TimestampType timestamp); @@ -162,7 +173,7 @@ class HashList : public Collection { static bool MatchType(const DLRecord* record) { RecordType type = record->GetRecordType(); - return type == RecordType::HashElem || type == RecordType::HashHeader; + return type == RecordType::HashElem || type == RecordType::HashRecord; } private: diff --git a/engine/hash_collection/rebuilder.hpp b/engine/hash_collection/rebuilder.hpp index 8388478f..1211f895 100644 --- a/engine/hash_collection/rebuilder.hpp +++ b/engine/hash_collection/rebuilder.hpp @@ -23,14 +23,13 @@ class HashListRebuilder { }; HashListRebuilder(PMEMAllocator* pmem_allocator, HashTable* hash_table, - LockTable* lock_table, ThreadManager* thread_manager, - uint64_t num_rebuild_threads, const CheckPoint& checkpoint) + LockTable* lock_table, uint64_t num_rebuild_threads, + const CheckPoint& checkpoint) : recovery_utils_(pmem_allocator), rebuilder_thread_cache_(num_rebuild_threads), pmem_allocator_(pmem_allocator), hash_table_(hash_table), lock_table_(lock_table), - thread_manager_(thread_manager), num_rebuild_threads_(num_rebuild_threads), checkpoint_(checkpoint) {} @@ -122,11 +121,6 @@ class HashListRebuilder { bool recoverToCheckPoint() { return checkpoint_.Valid(); } Status initRebuildLists() { - Status s = thread_manager_->MaybeInitThread(access_thread); - if (s != Status::Ok) { - return s; - } - // Keep headers with same id together for recognize outdated ones auto cmp = [](const DLRecord* header1, const DLRecord* header2) { auto id1 = HashList::FetchID(header1); @@ -209,7 +203,7 @@ class HashListRebuilder { if (!outdated) { auto lookup_result = hash_table_->Insert( - collection_name, RecordType::HashHeader, RecordStatus::Normal, + collection_name, RecordType::HashRecord, RecordStatus::Normal, hlist.get(), PointerType::HashList); switch (lookup_result.s) { case Status::Ok: { @@ -258,11 +252,8 @@ class HashListRebuilder { } Status rebuildIndex(HashList* hlist) { - Status s = thread_manager_->MaybeInitThread(access_thread); - if (s != Status::Ok) { - return s; - } - defer(thread_manager_->Release(access_thread)); + this_thread.id = next_tid_.fetch_add(1); + size_t num_elems = 0; auto iter = hlist->GetDLList()->GetRecordIterator(); @@ -314,9 +305,10 @@ class HashListRebuilder { } void addUnlinkedRecord(DLRecord* pmem_record) { - assert(access_thread.id >= 0); - rebuilder_thread_cache_[access_thread.id].unlinked_records.push_back( - pmem_record); + kvdk_assert(ThreadManager::ThreadID() >= 0, ""); + rebuilder_thread_cache_[ThreadManager::ThreadID() % + rebuilder_thread_cache_.size()] + .unlinked_records.push_back(pmem_record); } void cleanInvalidRecords() { @@ -354,7 +346,6 @@ class HashListRebuilder { PMEMAllocator* pmem_allocator_; HashTable* hash_table_; LockTable* lock_table_; - ThreadManager* thread_manager_; const size_t num_rebuild_threads_; CheckPoint checkpoint_; SpinMutex lock_; @@ -363,5 +354,10 @@ class HashListRebuilder { std::unordered_map> rebuild_hlists_; CollectionIDType max_recovered_id_; + + // We manually allocate recovery thread id for no conflict in multi-thread + // recovering + // Todo: do not hard code + std::atomic next_tid_{0}; }; } // namespace KVDK_NAMESPACE \ No newline at end of file diff --git a/engine/hash_table.cpp b/engine/hash_table.cpp index e6ec2a0e..5cc5986f 100644 --- a/engine/hash_table.cpp +++ b/engine/hash_table.cpp @@ -174,10 +174,10 @@ HashTable::LookupResult HashTable::Insert(const StringView& key, } Status HashTable::allocateEntry(HashBucketIterator& bucket_iter) { - kvdk_assert(bucket_iter.hash_table_ == this && - bucket_iter.entry_idx_ == - hash_bucket_entries_[bucket_iter.bucket_idx_], - "Only allocate new hash entry at end of hash bucket"); + kvdk_assert(bucket_iter.hash_table_ == this, ""); + kvdk_assert( + bucket_iter.entry_idx_ == hash_bucket_entries_[bucket_iter.bucket_idx_], + "Only allocate new hash entry at end of hash bucket"); assert(bucket_iter.bucket_ptr_ != nullptr); if (hash_bucket_entries_[bucket_iter.bucket_idx_] > 0 && hash_bucket_entries_[bucket_iter.bucket_idx_] % kNumEntryPerBucket == 0) { diff --git a/engine/hash_table.hpp b/engine/hash_table.hpp index 6dff0666..a0d6d6fd 100644 --- a/engine/hash_table.hpp +++ b/engine/hash_table.hpp @@ -184,6 +184,8 @@ class HashTable { return std::unique_lock{*getHint(key).spin}; } + SpinMutex* GetLock(StringView const& key) { return getHint(key).spin; } + HashTableIterator GetIterator(uint64_t start_slot_idx, uint64_t end_slot_idx); size_t GetSlotsNum() { return slots_.size(); } diff --git a/engine/kv_engine.cpp b/engine/kv_engine.cpp index 4f3af3ab..cb7cc794 100644 --- a/engine/kv_engine.cpp +++ b/engine/kv_engine.cpp @@ -57,11 +57,13 @@ KVEngine::~KVEngine() { GlobalLogger.Info("Instance closed\n"); } -Status KVEngine::Open(const std::string& name, Engine** engine_ptr, +Status KVEngine::Open(const StringView engine_path, Engine** engine_ptr, const Configs& configs) { - GlobalLogger.Info("Opening kvdk instance from %s ...\n", name.c_str()); + std::string engine_path_str(string_view_2_string(engine_path)); + GlobalLogger.Info("Opening kvdk instance from %s ...\n", + engine_path_str.c_str()); KVEngine* engine = new KVEngine(configs); - Status s = engine->init(name, configs); + Status s = engine->init(engine_path_str, configs); if (s == Status::Ok) { s = engine->restoreExistingData(); } @@ -76,16 +78,18 @@ Status KVEngine::Open(const std::string& name, Engine** engine_ptr, return s; } -Status KVEngine::Restore(const std::string& engine_path, - const std::string& backup_log, Engine** engine_ptr, +Status KVEngine::Restore(const StringView engine_path, + const StringView backup_log, Engine** engine_ptr, const Configs& configs) { + std::string engine_path_str(string_view_2_string(engine_path)); + std::string backup_log_str(string_view_2_string(backup_log)); GlobalLogger.Info( "Restoring kvdk instance from backup log %s to engine path %s\n", - backup_log.c_str(), engine_path.c_str()); + backup_log_str.c_str(), engine_path_str.c_str()); KVEngine* engine = new KVEngine(configs); - Status s = engine->init(engine_path, configs); + Status s = engine->init(engine_path_str, configs); if (s == Status::Ok) { - s = engine->restoreDataFromBackup(backup_log); + s = engine->restoreDataFromBackup(backup_log_str); } if (s == Status::Ok) { @@ -94,7 +98,7 @@ Status KVEngine::Restore(const std::string& engine_path, engine->ReportPMemUsage(); } else { GlobalLogger.Error("Restore kvdk instance from backup log %s failed: %d\n", - backup_log.c_str(), s); + backup_log_str.c_str(), s); delete engine; } return s; @@ -158,12 +162,12 @@ Status KVEngine::init(const std::string& name, const Configs& configs) { return Status::IOError; } - db_file_ = data_file(); + data_file_ = data_file(); configs_ = configs; } else { configs_ = configs; - db_file_ = name; + data_file_ = name; // The devdax mode need to execute the shell scripts/init_devdax.sh, // then a fsdax model namespace will be created and the @@ -185,18 +189,16 @@ Status KVEngine::init(const std::string& name, const Configs& configs) { } pmem_allocator_.reset(PMEMAllocator::NewPMEMAllocator( - db_file_, configs_.pmem_file_size, configs_.pmem_segment_blocks, + data_file_, configs_.pmem_file_size, configs_.pmem_segment_blocks, configs_.pmem_block_size, configs_.max_access_threads, configs_.populate_pmem_space, configs_.use_devdax_mode, &version_controller_)); - thread_manager_.reset(new (std::nothrow) - ThreadManager(configs_.max_access_threads)); hash_table_.reset(HashTable::NewHashTable( configs_.hash_bucket_num, configs_.num_buckets_per_slot, pmem_allocator_.get(), configs_.max_access_threads)); dllist_locks_.reset(new LockTable{1UL << 20}); if (pmem_allocator_ == nullptr || hash_table_ == nullptr || - thread_manager_ == nullptr || dllist_locks_ == nullptr) { + dllist_locks_ == nullptr) { GlobalLogger.Error("Init kvdk basic components error\n"); return Status::Abort; } @@ -208,16 +210,16 @@ Status KVEngine::init(const std::string& name, const Configs& configs) { } Status KVEngine::restoreData() { - Status s = maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } + this_thread.id = next_recovery_tid_.fetch_add(1); + EngineThreadCache& engine_thread_cache = - engine_thread_cache_[access_thread.id]; + engine_thread_cache_[ThreadManager::ThreadID() % + configs_.max_access_threads]; SpaceEntry segment_recovering; DataEntry data_entry_cached; uint64_t cnt = 0; + Status s; while (true) { if (segment_recovering.size == 0) { if (!pmem_allocator_->FetchSegment(&segment_recovering)) { @@ -248,11 +250,11 @@ Status KVEngine::restoreData() { switch (data_entry_cached.meta.type) { case RecordType::SortedElem: - case RecordType::SortedHeader: + case RecordType::SortedRecord: case RecordType::String: - case RecordType::HashHeader: + case RecordType::HashRecord: case RecordType::HashElem: - case RecordType::ListHeader: + case RecordType::ListRecord: case RecordType::ListElem: { if (data_entry_cached.meta.status == RecordStatus::Dirty) { data_entry_cached.meta.type = RecordType::Empty; @@ -303,7 +305,7 @@ Status KVEngine::restoreData() { s = restoreSortedElem(static_cast(recovering_pmem_record)); break; } - case RecordType::SortedHeader: { + case RecordType::SortedRecord: { s = restoreSortedHeader(static_cast(recovering_pmem_record)); break; } @@ -313,7 +315,7 @@ Status KVEngine::restoreData() { data_entry_cached); break; } - case RecordType::ListHeader: { + case RecordType::ListRecord: { s = listRestoreList(static_cast(recovering_pmem_record)); break; } @@ -321,7 +323,7 @@ Status KVEngine::restoreData() { s = listRestoreElem(static_cast(recovering_pmem_record)); break; } - case RecordType::HashHeader: { + case RecordType::HashRecord: { s = restoreHashHeader(static_cast(recovering_pmem_record)); break; } @@ -342,7 +344,6 @@ Status KVEngine::restoreData() { } } restored_.fetch_add(cnt); - ReleaseAccessThread(); return s; } @@ -353,11 +354,11 @@ bool KVEngine::validateRecord(void* data_record) { case RecordType::String: { return static_cast(data_record)->Validate(); } - case RecordType::SortedHeader: + case RecordType::SortedRecord: case RecordType::SortedElem: - case RecordType::HashHeader: + case RecordType::HashRecord: case RecordType::HashElem: - case RecordType::ListHeader: + case RecordType::ListRecord: case RecordType::ListElem: { return static_cast(data_record)->Validate(); } @@ -425,7 +426,7 @@ Status KVEngine::Backup(const pmem::obj::string_view backup_log, } break; } - case RecordType::SortedHeader: { + case RecordType::SortedRecord: { DLRecord* header = slot_iter->GetIndex().skiplist->HeaderRecord(); while (header != nullptr && header->GetTimestamp() > backup_ts) { header = @@ -433,7 +434,7 @@ Status KVEngine::Backup(const pmem::obj::string_view backup_log, } if (header && header->GetRecordStatus() == RecordStatus::Normal && !header->HasExpired()) { - s = backup.Append(RecordType::SortedHeader, header->Key(), + s = backup.Append(RecordType::SortedRecord, header->Key(), header->Value(), header->GetExpireTime()); if (s == Status::Ok) { // Append skiplist elems following the header @@ -455,7 +456,7 @@ Status KVEngine::Backup(const pmem::obj::string_view backup_log, } break; } - case RecordType::HashHeader: { + case RecordType::HashRecord: { DLRecord* header = slot_iter->GetIndex().hlist->HeaderRecord(); while (header != nullptr && header->GetTimestamp() > backup_ts) { header = @@ -463,7 +464,7 @@ Status KVEngine::Backup(const pmem::obj::string_view backup_log, } if (header && header->GetRecordStatus() == RecordStatus::Normal && !header->HasExpired()) { - s = backup.Append(RecordType::HashHeader, header->Key(), + s = backup.Append(RecordType::HashRecord, header->Key(), header->Value(), header->GetExpireTime()); if (s == Status::Ok) { // Append hlist elems following the header @@ -484,7 +485,7 @@ Status KVEngine::Backup(const pmem::obj::string_view backup_log, } break; } - case RecordType::ListHeader: { + case RecordType::ListRecord: { DLRecord* header = slot_iter->GetIndex().list->HeaderRecord(); while (header != nullptr && header->GetTimestamp() > backup_ts) { header = @@ -492,7 +493,7 @@ Status KVEngine::Backup(const pmem::obj::string_view backup_log, } if (header && header->GetRecordStatus() == RecordStatus::Normal && !header->HasExpired()) { - s = backup.Append(RecordType::ListHeader, header->Key(), + s = backup.Append(RecordType::ListRecord, header->Key(), header->Value(), header->GetExpireTime()); if (s == Status::Ok) { // Append hlist elems following the header @@ -548,13 +549,8 @@ Status KVEngine::initOrRestoreCheckpoint() { Status KVEngine::restoreDataFromBackup(const std::string& backup_log) { // TODO: make this multi-thread - Status s = maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } - defer(ReleaseAccessThread()); BackupLog backup; - s = backup.Open(backup_log); + Status s = backup.Open(backup_log); if (s != Status::Ok) { return s; } @@ -580,7 +576,7 @@ Status KVEngine::restoreDataFromBackup(const std::string& backup_log) { iter->Next(); break; } - case RecordType::SortedHeader: { + case RecordType::SortedRecord: { // Maybe reuse id? std::shared_ptr skiplist = nullptr; if (!expired) { @@ -620,7 +616,7 @@ Status KVEngine::restoreDataFromBackup(const std::string& backup_log) { } break; } - case RecordType::HashHeader: { + case RecordType::HashRecord: { std::shared_ptr hlist = nullptr; if (!expired) { s = buildHashlist(record.key, hlist); @@ -653,7 +649,7 @@ Status KVEngine::restoreDataFromBackup(const std::string& backup_log) { } break; } - case RecordType::ListHeader: { + case RecordType::ListRecord: { std::shared_ptr list = nullptr; if (!expired) { s = buildList(record.key, list); @@ -715,20 +711,15 @@ Status KVEngine::restoreDataFromBackup(const std::string& backup_log) { } Status KVEngine::restoreExistingData() { - access_thread.id = 0; - defer(access_thread.id = -1); - sorted_rebuilder_.reset(new SortedCollectionRebuilder( this, configs_.opt_large_sorted_collection_recovery, configs_.max_access_threads, *persist_checkpoint_)); - hash_rebuilder_.reset( - new HashListRebuilder(pmem_allocator_.get(), hash_table_.get(), - dllist_locks_.get(), thread_manager_.get(), - configs_.max_access_threads, *persist_checkpoint_)); - list_rebuilder_.reset( - new ListRebuilder(pmem_allocator_.get(), hash_table_.get(), - dllist_locks_.get(), thread_manager_.get(), - configs_.max_access_threads, *persist_checkpoint_)); + hash_rebuilder_.reset(new HashListRebuilder( + pmem_allocator_.get(), hash_table_.get(), dllist_locks_.get(), + configs_.max_access_threads, *persist_checkpoint_)); + list_rebuilder_.reset(new ListRebuilder( + pmem_allocator_.get(), hash_table_.get(), dllist_locks_.get(), + configs_.max_access_threads, *persist_checkpoint_)); Status s = batchWriteRollbackLogs(); if (s != Status::Ok) { @@ -883,12 +874,13 @@ Status KVEngine::checkConfigs(const Configs& configs) { } Status KVEngine::maybeInitBatchLogFile() { - auto& tc = engine_thread_cache_[access_thread.id]; + kvdk_assert(ThreadManager::ThreadID() >= 0, ""); + auto work_id = ThreadManager::ThreadID() % configs_.max_access_threads; + auto& tc = engine_thread_cache_[work_id]; if (tc.batch_log == nullptr) { int is_pmem; size_t mapped_len; - std::string log_file_name = - batch_log_dir_ + std::to_string(access_thread.id); + std::string log_file_name = batch_log_dir_ + std::to_string(work_id); void* addr = pmem_map_file(log_file_name.c_str(), BatchWriteLog::MaxBytes(), PMEM_FILE_CREATE, 0666, &mapped_len, &is_pmem); if (addr == NULL) { @@ -902,26 +894,30 @@ Status KVEngine::maybeInitBatchLogFile() { } Status KVEngine::BatchWrite(std::unique_ptr const& batch) { - WriteBatchImpl const* batch_impl = - dynamic_cast(batch.get()); + const WriteBatchImpl* batch_impl = + dynamic_cast(batch.get()); if (batch_impl == nullptr) { return Status::InvalidArgument; } - return batchWriteImpl(*batch_impl); + return batchWriteImpl(*batch_impl, true); +} + +Status KVEngine::CommitTransaction(TransactionImpl* txn) { + const WriteBatchImpl* batch = txn->GetBatch(); + kvdk_assert(batch != nullptr, ""); + return batchWriteImpl(*batch, + false /* key should already been locked by txn */); } -Status KVEngine::batchWriteImpl(WriteBatchImpl const& batch) { +Status KVEngine::batchWriteImpl(WriteBatchImpl const& batch, bool lock_key) { if (batch.Size() > BatchWriteLog::Capacity()) { return Status::InvalidBatchSize; } - Status s = maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); - s = maybeInitBatchLogFile(); + Status s = maybeInitBatchLogFile(); if (s != Status::Ok) { return s; } @@ -943,7 +939,7 @@ Status KVEngine::batchWriteImpl(WriteBatchImpl const& batch) { // Lookup Skiplists and Hashes for further operations for (auto const& sorted_op : batch.SortedOps()) { - auto res = lookupKey(sorted_op.collection, RecordType::SortedHeader); + auto res = lookupKey(sorted_op.collection, RecordType::SortedRecord); /// TODO: this is a temporary work-around /// We cannot lock both key and field, which may trigger deadlock. /// However, if a collection is created and a field is inserted, @@ -972,14 +968,16 @@ Status KVEngine::batchWriteImpl(WriteBatchImpl const& batch) { // Keys/internal keys to be locked on HashTable std::vector keys_to_lock; - for (auto const& string_op : batch.StringOps()) { - keys_to_lock.push_back(string_op.key); - } - for (auto const& arg : sorted_args) { - keys_to_lock.push_back(arg.skiplist->InternalKey(arg.key)); - } - for (auto const& arg : hash_args) { - keys_to_lock.push_back(arg.hlist->InternalKey(arg.key)); + if (lock_key) { + for (auto const& string_op : batch.StringOps()) { + keys_to_lock.push_back(string_op.key); + } + for (auto const& arg : sorted_args) { + keys_to_lock.push_back(arg.skiplist->InternalKey(arg.key)); + } + for (auto const& arg : hash_args) { + keys_to_lock.push_back(arg.hlist->InternalKey(arg.key)); + } } auto guard = hash_table_->RangeLock(keys_to_lock); @@ -1045,7 +1043,8 @@ Status KVEngine::batchWriteImpl(WriteBatchImpl const& batch) { // Preparation done. Persist BatchLog for rollback. BatchWriteLog log; log.SetTimestamp(bw_token.Timestamp()); - auto& tc = engine_thread_cache_[access_thread.id]; + auto& tc = engine_thread_cache_[ThreadManager::ThreadID() % + configs_.max_access_threads]; for (auto& args : string_args) { if (args.space.size == 0) { continue; @@ -1227,10 +1226,10 @@ Status KVEngine::batchWriteRollbackLogs() { return Status::Ok; } -Status KVEngine::GetTTL(const StringView str, TTLType* ttl_time) { +Status KVEngine::GetTTL(const StringView key, TTLType* ttl_time) { *ttl_time = kInvalidTTL; - auto ul = hash_table_->AcquireLock(str); - auto res = lookupKey(str, ExpirableRecordType); + auto ul = hash_table_->AcquireLock(key); + auto res = lookupKey(key, ExpirableRecordType); if (res.s == Status::Ok) { ExpireTimeType expire_time; @@ -1267,7 +1266,7 @@ Status KVEngine::TypeOf(StringView key, ValueType* type) { if (res.s == Status::Ok) { switch (res.entry_ptr->GetIndexType()) { case PointerType::Skiplist: { - *type = ValueType::SortedSet; + *type = ValueType::SortedCollection; break; } case PointerType::List: { @@ -1275,7 +1274,7 @@ Status KVEngine::TypeOf(StringView key, ValueType* type) { break; } case PointerType::HashList: { - *type = ValueType::HashSet; + *type = ValueType::HashCollection; break; } case PointerType::StringRecord: { @@ -1290,11 +1289,8 @@ Status KVEngine::TypeOf(StringView key, ValueType* type) { return res.s == Status::Outdated ? Status::NotFound : res.s; } -Status KVEngine::Expire(const StringView str, TTLType ttl_time) { - Status s = maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } +Status KVEngine::Expire(const StringView key, TTLType ttl_time) { + auto thread_holder = AcquireAccessThread(); int64_t base_time = TimeUtils::millisecond_time(); if (!TimeUtils::CheckTTL(ttl_time, base_time)) { @@ -1302,10 +1298,10 @@ Status KVEngine::Expire(const StringView str, TTLType ttl_time) { } ExpireTimeType expired_time = TimeUtils::TTLToExpireTime(ttl_time, base_time); - auto ul = hash_table_->AcquireLock(str); + auto ul = hash_table_->AcquireLock(key); auto snapshot_holder = version_controller_.GetLocalSnapshotHolder(); // TODO: maybe have a wrapper function(lookupKeyAndMayClean). - auto lookup_result = lookupKey(str, ExpirableRecordType); + auto lookup_result = lookupKey(key, ExpirableRecordType); if (lookup_result.s == Status::Outdated) { return Status::NotFound; } @@ -1317,7 +1313,7 @@ Status KVEngine::Expire(const StringView str, TTLType ttl_time) { ul.unlock(); version_controller_.ReleaseLocalSnapshot(); lookup_result.s = Modify( - str, + key, [](const std::string* old_val, std::string* new_val, void*) { new_val->assign(*old_val); return ModifyOperation::Write; @@ -1400,17 +1396,17 @@ HashTable::LookupResult KVEngine::lookupKey(StringView key, uint8_t type_mask) { : Status::Ok; break; } - case RecordType::SortedHeader: + case RecordType::SortedRecord: result.s = result.entry.GetIndex().skiplist->HasExpired() ? Status::Outdated : Status::Ok; break; - case RecordType::ListHeader: + case RecordType::ListRecord: result.s = result.entry.GetIndex().list->HasExpired() ? Status::Outdated : Status::Ok; break; - case RecordType::HashHeader: { + case RecordType::HashRecord: { result.s = result.entry.GetIndex().hlist->HasExpired() ? Status::Outdated : Status::Ok; diff --git a/engine/kv_engine.hpp b/engine/kv_engine.hpp index 8b48be29..04673c62 100644 --- a/engine/kv_engine.hpp +++ b/engine/kv_engine.hpp @@ -35,11 +35,16 @@ #include "sorted_collection/skiplist.hpp" #include "structures.hpp" #include "thread_manager.hpp" +#include "transaction_impl.hpp" #include "utils/utils.hpp" #include "version/old_records_cleaner.hpp" #include "version/version_controller.hpp" #include "write_batch_impl.hpp" +#ifdef KVDK_ENABLE_VHASH +#include "experimental/vhash_group.hpp" +#endif + namespace KVDK_NAMESPACE { class KVEngine : public Engine { friend class SortedCollectionRebuilder; @@ -47,19 +52,19 @@ class KVEngine : public Engine { public: ~KVEngine(); - static Status Open(const std::string& name, Engine** engine_ptr, + static Status Open(const StringView engine_path, Engine** engine_ptr, const Configs& configs); - static Status Restore(const std::string& engine_path, - const std::string& backup_log, Engine** engine_ptr, + static Status Restore(const StringView engine_path, + const StringView backup_log, Engine** engine_ptr, const Configs& configs); - Snapshot* GetSnapshot(bool make_checkpoint) override; + Snapshot* GetSnapshot(bool make_checkpoint) final; Status Backup(const pmem::obj::string_view backup_log, - const Snapshot* snapshot) override; + const Snapshot* snapshot) final; - void ReleaseSnapshot(const Snapshot* snapshot) override { + void ReleaseSnapshot(const Snapshot* snapshot) final { { std::lock_guard lg(checkpoint_lock_); persist_checkpoint_->MaybeRelease( @@ -76,38 +81,38 @@ class KVEngine : public Engine { // 1. Expire assumes that str is not duplicated among all types, which is not // implemented yet // 2. Expire is not compatible with checkpoint for now - Status Expire(const StringView str, TTLType ttl_time) override; + Status Expire(const StringView key, TTLType ttl_time) final; // Get time to expire of str // // Notice: // Expire assumes that str is not duplicated among all types, which is not // implemented yet - Status GetTTL(const StringView str, TTLType* ttl_time) override; + Status GetTTL(const StringView key, TTLType* ttl_time) final; Status TypeOf(StringView key, ValueType* type) final; // String - Status Get(const StringView key, std::string* value) override; + Status Get(const StringView key, std::string* value) final; Status Put(const StringView key, const StringView value, - const WriteOptions& write_options) override; - Status Delete(const StringView key) override; + const WriteOptions& write_options) final; + Status Delete(const StringView key) final; Status Modify(const StringView key, ModifyFunc modify_func, void* modify_args, - const WriteOptions& options) override; + const WriteOptions& options) final; // Sorted Status SortedCreate(const StringView collection_name, - const SortedCollectionConfigs& configs) override; - Status SortedDestroy(const StringView collection_name) override; - Status SortedSize(const StringView collection, size_t* size) override; + const SortedCollectionConfigs& configs) final; + Status SortedDestroy(const StringView collection_name) final; + Status SortedSize(const StringView collection, size_t* size) final; Status SortedGet(const StringView collection, const StringView user_key, - std::string* value) override; + std::string* value) final; Status SortedPut(const StringView collection, const StringView user_key, - const StringView value) override; + const StringView value) final; Status SortedDelete(const StringView collection, - const StringView user_key) override; + const StringView user_key) final; SortedIterator* SortedIteratorCreate(const StringView collection, - Snapshot* snapshot, Status* s) override; - void SortedIteratorRelease(SortedIterator* sorted_iterator) override; + Snapshot* snapshot, Status* s) final; + void SortedIteratorRelease(SortedIterator* sorted_iterator) final; // List Status ListCreate(StringView key) final; @@ -156,6 +161,20 @@ class KVEngine : public Engine { Status* s) final; void HashIteratorRelease(HashIterator*) final; +#ifdef KVDK_ENABLE_VHASH + // Volatile Hash + Status VHashCreate(StringView key, size_t capacity) final; + Status VHashDestroy(StringView key) final; + Status VHashSize(StringView key, size_t* len) final; + Status VHashGet(StringView key, StringView field, std::string* value) final; + Status VHashPut(StringView key, StringView field, StringView value) final; + Status VHashDelete(StringView key, StringView field) final; + Status VHashModify(StringView key, StringView field, ModifyFunc modify_func, + void* cb_args) final; + std::unique_ptr VHashIteratorCreate(StringView key, + Status* s) final; +#endif + // BatchWrite // It takes 3 stages // Stage 1: Preparation @@ -173,7 +192,19 @@ class KVEngine : public Engine { return std::unique_ptr{new WriteBatchImpl{}}; } - void ReleaseAccessThread() override { access_thread.Release(); } + std::unique_ptr TransactionCreate() final { + return std::unique_ptr(new TransactionImpl(this)); + } + + // Call this function before doing collection related transaction to avoid + // collection be destroyed during transaction + std::unique_ptr + AcquireCollectionTransactionLock() { + return std::unique_ptr( + new CollectionTransactionCV::TransactionToken(&ct_cv_)); + } + + Status CommitTransaction(TransactionImpl* txn); // For test cases const std::unordered_map>& @@ -189,7 +220,8 @@ class KVEngine : public Engine { friend Cleaner; KVEngine(const Configs& configs) - : engine_thread_cache_(configs.max_access_threads), + : access_thread_cv_(configs.max_access_threads), + engine_thread_cache_(configs.max_access_threads), cleaner_thread_cache_(configs.max_access_threads), version_controller_(configs.max_access_threads), old_records_cleaner_(this, configs.max_access_threads), @@ -222,6 +254,42 @@ class KVEngine : public Engine { SpinMutex mtx; }; + struct AccessThreadCV { + public: + struct Holder { + public: + Holder(AccessThreadCV* cv) : cv_(cv) { cv->Acquire(); } + ~Holder() { cv_->Release(); } + + private: + AccessThreadCV* cv_; + }; + + private: + void Acquire() { + std::unique_lock ul(spin_); + while (holder_id_ != ThreadManager::ThreadID() && holder_id_ != -1) { + cv_.wait(ul); + } + holder_id_ = ThreadManager::ThreadID(); + } + + void Release() { + std::unique_lock ul(spin_); + holder_id_ = -1; + cv_.notify_one(); + } + + int64_t holder_id_ = -1; + SpinMutex spin_; + std::condition_variable_any cv_; + }; + + AccessThreadCV::Holder AcquireAccessThread() { + return AccessThreadCV::Holder(&access_thread_cv_[ThreadManager::ThreadID() % + access_thread_cv_.size()]); + } + bool checkKeySize(const StringView& key) { return key.size() <= UINT16_MAX; } bool checkValueSize(const StringView& value) { @@ -234,15 +302,12 @@ class KVEngine : public Engine { Status hashGetImpl(const StringView& key, std::string* value, uint16_t type_mask); - inline Status maybeInitAccessThread() { - return thread_manager_->MaybeInitThread(access_thread); - } - bool registerComparator(const StringView& collection_name, Comparator comp_func) { return comparators_.RegisterComparator(collection_name, comp_func); } + private: // Look up a first level key in hash table(e.g. collections or string, not // collection elems), the first level key should be unique among all types // @@ -294,6 +359,14 @@ class KVEngine : public Engine { hash_table_->Insert(ret, type, status, addr, pointerType(type)); } + // Call this function before create or destroy a collection to avoid + // collection be destroyed during a related transaction + std::unique_ptr + acquireCollectionCreateOrDestroyLock() { + return std::unique_ptr( + new CollectionTransactionCV::CollectionToken(&ct_cv_)); + } + template static constexpr RecordType collectionType() { static_assert(std::is_same::value || @@ -301,11 +374,11 @@ class KVEngine : public Engine { std::is_same::value, "Invalid type!"); return std::is_same::value - ? RecordType::SortedHeader + ? RecordType::SortedRecord : std::is_same::value - ? RecordType::ListHeader + ? RecordType::ListRecord : std::is_same::value - ? RecordType::HashHeader + ? RecordType::HashRecord : RecordType::Empty; } @@ -321,13 +394,13 @@ class KVEngine : public Engine { kvdk_assert(false, "Not supported!"); return PointerType::Invalid; } - case RecordType::SortedHeader: { + case RecordType::SortedRecord: { return PointerType::Skiplist; } - case RecordType::ListHeader: { + case RecordType::ListRecord: { return PointerType::List; } - case RecordType::HashHeader: { + case RecordType::HashRecord: { return PointerType::HashList; } case RecordType::HashElem: { @@ -398,7 +471,7 @@ class KVEngine : public Engine { Status persistOrRecoverImmutableConfigs(); - Status batchWriteImpl(WriteBatchImpl const& batch); + Status batchWriteImpl(WriteBatchImpl const& batch, bool lock_key); Status batchWriteRollbackLogs(); @@ -560,6 +633,8 @@ class KVEngine : public Engine { void terminateBackgroundWorks(); + Array access_thread_cv_; + Array engine_thread_cache_; Array cleaner_thread_cache_; @@ -585,8 +660,7 @@ class KVEngine : public Engine { std::string dir_; std::string batch_log_dir_; - std::string db_file_; - std::shared_ptr thread_manager_; + std::string data_file_; std::unique_ptr pmem_allocator_; Configs configs_; bool closing_{false}; @@ -599,6 +673,11 @@ class KVEngine : public Engine { OldRecordsCleaner old_records_cleaner_; Cleaner cleaner_; +#ifdef KVDK_ENABLE_VHASH + CharAllocator char_alloc_; + VHashGroup vhashes_{char_alloc_, old_records_cleaner_}; +#endif + ComparatorTable comparators_; struct BackgroundWorkSignals { @@ -619,6 +698,12 @@ class KVEngine : public Engine { BackgroundWorkSignals bg_work_signals_; std::atomic round_robin_id_{0}; + + CollectionTransactionCV ct_cv_; + // We manually allocate recovery thread id for no conflict in multi-thread + // recovering + // Todo: do not hard code + std::atomic next_recovery_tid_{0}; }; } // namespace KVDK_NAMESPACE diff --git a/engine/kv_engine_cleaner.cpp b/engine/kv_engine_cleaner.cpp index ce53a6a8..b5f17b5f 100644 --- a/engine/kv_engine_cleaner.cpp +++ b/engine/kv_engine_cleaner.cpp @@ -44,8 +44,9 @@ template void KVEngine::removeAndCacheOutdatedVersion(T* record) { static_assert(std::is_same::value || std::is_same::value); - kvdk_assert(access_thread.id >= 0, ""); - auto& tc = cleaner_thread_cache_[access_thread.id]; + kvdk_assert(ThreadManager::ThreadID() >= 0, ""); + auto& tc = cleaner_thread_cache_[ThreadManager::ThreadID() % + configs_.max_access_threads]; if (std::is_same::value) { StringRecord* old_record = removeOutDatedVersion( (StringRecord*)record, version_controller_.GlobalOldestSnapshotTs()); @@ -87,8 +88,9 @@ void KVEngine::cleanOutdatedRecordImpl(T* old_record) { } void KVEngine::tryCleanCachedOutdatedRecord() { - kvdk_assert(access_thread.id >= 0, ""); - auto& tc = cleaner_thread_cache_[access_thread.id]; + kvdk_assert(ThreadManager::ThreadID() >= 0, ""); + auto& tc = cleaner_thread_cache_[ThreadManager::ThreadID() % + configs_.max_access_threads]; // Regularly update local oldest snapshot thread_local uint64_t round = 0; if (++round % kForegroundUpdateSnapshotInterval == 0) { @@ -158,7 +160,7 @@ void KVEngine::purgeAndFreeDLRecords( } break; } - case RecordType::SortedHeader: { + case RecordType::SortedRecord: { if (record_status != RecordStatus::Outdated && !pmem_record->HasExpired()) { entries.emplace_back( @@ -181,7 +183,7 @@ void KVEngine::purgeAndFreeDLRecords( } break; } - case RecordType::HashHeader: { + case RecordType::HashRecord: { if (record_status != RecordStatus::Outdated && !pmem_record->HasExpired()) { entries.emplace_back( @@ -204,7 +206,7 @@ void KVEngine::purgeAndFreeDLRecords( } break; } - case RecordType::ListHeader: { + case RecordType::ListRecord: { if (record_status != RecordStatus::Outdated && !pmem_record->HasExpired()) { entries.emplace_back( @@ -758,7 +760,7 @@ void Cleaner::FetchOutdatedCollections( auto skiplist_iter = outdated_collections_.skiplists.begin(); if (skiplist_iter->second < min_snapshot_ts) { outdated_collection = skiplist_iter->first; - record_type = RecordType::SortedHeader; + record_type = RecordType::SortedRecord; outdated_collections_.skiplists.erase(skiplist_iter); } } @@ -767,7 +769,7 @@ void Cleaner::FetchOutdatedCollections( auto list_iter = outdated_collections_.lists.begin(); if (list_iter->second < min_snapshot_ts) { outdated_collection = list_iter->first; - record_type = RecordType::ListHeader; + record_type = RecordType::ListRecord; outdated_collections_.lists.erase(list_iter); } } @@ -776,7 +778,7 @@ void Cleaner::FetchOutdatedCollections( auto hash_list_iter = outdated_collections_.hashlists.begin(); if (hash_list_iter->second < min_snapshot_ts) { outdated_collection = hash_list_iter->first; - record_type = RecordType::HashHeader; + record_type = RecordType::HashRecord; outdated_collections_.hashlists.erase(hash_list_iter); } } diff --git a/engine/kv_engine_hash.cpp b/engine/kv_engine_hash.cpp index 705b4de6..ca144cd9 100644 --- a/engine/kv_engine_hash.cpp +++ b/engine/kv_engine_hash.cpp @@ -7,10 +7,7 @@ namespace KVDK_NAMESPACE { Status KVEngine::HashCreate(StringView collection) { - Status s = maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); if (!checkKeySize(collection)) { return Status::InvalidDataSize; @@ -25,9 +22,10 @@ Status KVEngine::buildHashlist(const StringView& collection, auto ul = hash_table_->AcquireLock(collection); auto holder = version_controller_.GetLocalSnapshotHolder(); TimestampType new_ts = holder.Timestamp(); - auto lookup_result = lookupKey(collection, RecordType::HashHeader); + auto lookup_result = lookupKey(collection, RecordType::HashRecord); if (lookup_result.s == Status::NotFound || lookup_result.s == Status::Outdated) { + auto create_token = acquireCollectionCreateOrDestroyLock(); DLRecord* existing_header = lookup_result.s == Outdated ? lookup_result.entry.GetIndex().hlist->HeaderRecord() @@ -43,7 +41,7 @@ Status KVEngine::buildHashlist(const StringView& collection, // header point to itself DLRecord* pmem_record = DLRecord::PersistDLRecord( pmem_allocator_->offset2addr_checked(space.offset), space.size, new_ts, - RecordType::HashHeader, RecordStatus::Normal, + RecordType::HashRecord, RecordStatus::Normal, pmem_allocator_->addr2offset(existing_header), space.offset, space.offset, collection, value_str); hlist = std::make_shared(pmem_record, collection, id, @@ -51,7 +49,7 @@ Status KVEngine::buildHashlist(const StringView& collection, dllist_locks_.get()); kvdk_assert(hlist != nullptr, ""); addHashlistToMap(hlist); - insertKeyOrElem(lookup_result, RecordType::HashHeader, RecordStatus::Normal, + insertKeyOrElem(lookup_result, RecordType::HashRecord, RecordStatus::Normal, hlist.get()); return Status::Ok; } else { @@ -60,11 +58,7 @@ Status KVEngine::buildHashlist(const StringView& collection, } Status KVEngine::HashDestroy(StringView collection) { - auto s = maybeInitAccessThread(); - defer(ReleaseAccessThread()); - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); if (!checkKeySize(collection)) { return Status::InvalidDataSize; @@ -74,10 +68,11 @@ Status KVEngine::HashDestroy(StringView collection) { auto snapshot_holder = version_controller_.GetLocalSnapshotHolder(); auto new_ts = snapshot_holder.Timestamp(); HashList* hlist; - s = hashListFind(collection, &hlist); + Status s = hashListFind(collection, &hlist); if (s == Status::Ok) { + auto destroy = acquireCollectionCreateOrDestroyLock(); DLRecord* header = hlist->HeaderRecord(); - kvdk_assert(header->GetRecordType() == RecordType::HashHeader, ""); + kvdk_assert(header->GetRecordType() == RecordType::HashRecord, ""); StringView value = header->Value(); auto request_size = DLRecord::RecordSize(collection, value); SpaceEntry space = pmem_allocator_->Allocate(request_size); @@ -86,12 +81,12 @@ Status KVEngine::HashDestroy(StringView collection) { } DLRecord* pmem_record = DLRecord::PersistDLRecord( pmem_allocator_->offset2addr_checked(space.offset), space.size, new_ts, - RecordType::HashHeader, RecordStatus::Outdated, + RecordType::HashRecord, RecordStatus::Outdated, pmem_allocator_->addr2offset_checked(header), header->prev, header->next, collection, value); bool success = hlist->Replace(header, pmem_record); kvdk_assert(success, "existing header should be linked on its hlist"); - hash_table_->Insert(collection, RecordType::HashHeader, + hash_table_->Insert(collection, RecordType::HashRecord, RecordStatus::Outdated, hlist, PointerType::HashList); { std::unique_lock hlist_lock(hlists_mu_); @@ -105,9 +100,7 @@ Status KVEngine::HashSize(StringView collection, size_t* len) { if (!checkKeySize(collection)) { return Status::InvalidDataSize; } - if (maybeInitAccessThread() != Status::Ok) { - return Status::TooManyAccessThreads; - } + auto thread_holder = AcquireAccessThread(); auto token = version_controller_.GetLocalSnapshotHolder(); HashList* hlist; @@ -121,17 +114,13 @@ Status KVEngine::HashSize(StringView collection, size_t* len) { Status KVEngine::HashGet(StringView collection, StringView key, std::string* value) { - Status s = maybeInitAccessThread(); - - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); // Hold current snapshot in this thread auto holder = version_controller_.GetLocalSnapshotHolder(); HashList* hlist; - s = hashListFind(collection, &hlist); + Status s = hashListFind(collection, &hlist); if (s == Status::Ok) { s = hlist->Get(key, value); } @@ -140,17 +129,13 @@ Status KVEngine::HashGet(StringView collection, StringView key, Status KVEngine::HashPut(StringView collection, StringView key, StringView value) { - Status s = maybeInitAccessThread(); - - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); // Hold current snapshot in this thread auto holder = version_controller_.GetLocalSnapshotHolder(); HashList* hlist; - s = hashListFind(collection, &hlist); + Status s = hashListFind(collection, &hlist); if (s == Status::Ok) { std::string collection_key(hlist->InternalKey(key)); if (!checkKeySize(collection_key) || !checkValueSize(value)) { @@ -172,17 +157,13 @@ Status KVEngine::HashPut(StringView collection, StringView key, } Status KVEngine::HashDelete(StringView collection, StringView key) { - Status s = maybeInitAccessThread(); - - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); // Hold current snapshot in this thread auto holder = version_controller_.GetLocalSnapshotHolder(); HashList* hlist; - s = hashListFind(collection, &hlist); + Status s = hashListFind(collection, &hlist); if (s == Status::Ok) { std::string collection_key(hlist->InternalKey(key)); if (!checkKeySize(collection_key)) { @@ -204,15 +185,12 @@ Status KVEngine::HashDelete(StringView collection, StringView key) { Status KVEngine::HashModify(StringView collection, StringView key, ModifyFunc modify_func, void* cb_args) { - Status s = maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); // Hold current snapshot in this thread auto holder = version_controller_.GetLocalSnapshotHolder(); HashList* hlist; - s = hashListFind(collection, &hlist); + Status s = hashListFind(collection, &hlist); if (s == Status::Ok) { std::string internal_key(hlist->InternalKey(key)); auto ul = hash_table_->AcquireLock(internal_key); @@ -272,7 +250,7 @@ void KVEngine::HashIteratorRelease(HashIterator* hash_iter) { Status KVEngine::hashListFind(StringView collection, HashList** hlist) { // Callers should acquire the access token or snapshot. // Lockless lookup for the collection - auto result = lookupKey(collection, RecordType::HashHeader); + auto result = lookupKey(collection, RecordType::HashRecord); if (result.s == Status::Outdated) { return Status::NotFound; } diff --git a/engine/kv_engine_list.cpp b/engine/kv_engine_list.cpp index 0427f525..695a7779 100644 --- a/engine/kv_engine_list.cpp +++ b/engine/kv_engine_list.cpp @@ -4,10 +4,7 @@ namespace KVDK_NAMESPACE { Status KVEngine::ListCreate(StringView list_name) { - Status s = maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); if (!checkKeySize(list_name)) { return Status::InvalidDataSize; @@ -22,9 +19,10 @@ Status KVEngine::buildList(const StringView& list_name, auto ul = hash_table_->AcquireLock(list_name); auto holder = version_controller_.GetLocalSnapshotHolder(); TimestampType new_ts = holder.Timestamp(); - auto lookup_result = lookupKey(list_name, RecordType::ListHeader); + auto lookup_result = lookupKey(list_name, RecordType::ListRecord); if (lookup_result.s == Status::NotFound || lookup_result.s == Status::Outdated) { + auto create_token = acquireCollectionCreateOrDestroyLock(); DLRecord* existing_header = lookup_result.s == Outdated ? lookup_result.entry.GetIndex().hlist->HeaderRecord() @@ -40,14 +38,14 @@ Status KVEngine::buildList(const StringView& list_name, // header point to itself DLRecord* pmem_record = DLRecord::PersistDLRecord( pmem_allocator_->offset2addr_checked(space.offset), space.size, new_ts, - RecordType::ListHeader, RecordStatus::Normal, + RecordType::ListRecord, RecordStatus::Normal, pmem_allocator_->addr2offset(existing_header), space.offset, space.offset, list_name, value_str); list = std::make_shared(pmem_record, list_name, id, pmem_allocator_.get(), dllist_locks_.get()); kvdk_assert(list != nullptr, ""); addListToMap(list); - insertKeyOrElem(lookup_result, RecordType::ListHeader, RecordStatus::Normal, + insertKeyOrElem(lookup_result, RecordType::ListRecord, RecordStatus::Normal, list.get()); return Status::Ok; } else { @@ -59,9 +57,7 @@ Status KVEngine::ListDestroy(StringView collection) { if (!checkKeySize(collection)) { return Status::InvalidDataSize; } - if (maybeInitAccessThread() != Status::Ok) { - return Status::TooManyAccessThreads; - } + auto thread_holder = AcquireAccessThread(); auto ul = hash_table_->AcquireLock(collection); auto snapshot_holder = version_controller_.GetLocalSnapshotHolder(); @@ -69,8 +65,9 @@ Status KVEngine::ListDestroy(StringView collection) { List* list; Status s = listFind(collection, &list); if (s == Status::Ok) { + auto destroy_token = acquireCollectionCreateOrDestroyLock(); DLRecord* header = list->HeaderRecord(); - kvdk_assert(header->GetRecordType() == RecordType::ListHeader, ""); + kvdk_assert(header->GetRecordType() == RecordType::ListRecord, ""); StringView value = header->Value(); auto request_size = DLRecord::RecordSize(collection, value); SpaceEntry space = pmem_allocator_->Allocate(request_size); @@ -79,12 +76,12 @@ Status KVEngine::ListDestroy(StringView collection) { } DLRecord* pmem_record = DLRecord::PersistDLRecord( pmem_allocator_->offset2addr_checked(space.offset), space.size, new_ts, - RecordType::ListHeader, RecordStatus::Outdated, + RecordType::ListRecord, RecordStatus::Outdated, pmem_allocator_->addr2offset_checked(header), header->prev, header->next, collection, value); bool success = list->Replace(header, pmem_record); kvdk_assert(success, "existing header should be linked on its list"); - hash_table_->Insert(collection, RecordType::ListHeader, + hash_table_->Insert(collection, RecordType::ListRecord, RecordStatus::Outdated, list, PointerType::List); { std::unique_lock list_lock(lists_mu_); @@ -98,9 +95,7 @@ Status KVEngine::ListSize(StringView list_name, size_t* sz) { if (!checkKeySize(list_name)) { return Status::InvalidDataSize; } - if (maybeInitAccessThread() != Status::Ok) { - return Status::TooManyAccessThreads; - } + auto thread_holder = AcquireAccessThread(); auto token = version_controller_.GetLocalSnapshotHolder(); @@ -118,9 +113,7 @@ Status KVEngine::ListPushFront(StringView collection, StringView elem) { if (!checkKeySize(collection) || !checkValueSize(elem)) { return Status::InvalidDataSize; } - if (maybeInitAccessThread() != Status::Ok) { - return Status::TooManyAccessThreads; - } + auto thread_holder = AcquireAccessThread(); auto token = version_controller_.GetLocalSnapshotHolder(); List* list; @@ -137,9 +130,7 @@ Status KVEngine::ListPushBack(StringView list_name, StringView elem) { if (!checkKeySize(list_name) || !checkValueSize(elem)) { return Status::InvalidDataSize; } - if (maybeInitAccessThread() != Status::Ok) { - return Status::TooManyAccessThreads; - } + auto thread_holder = AcquireAccessThread(); auto token = version_controller_.GetLocalSnapshotHolder(); List* list; @@ -156,9 +147,7 @@ Status KVEngine::ListPopFront(StringView list_name, std::string* elem) { if (!checkKeySize(list_name)) { return Status::InvalidDataSize; } - if (maybeInitAccessThread() != Status::Ok) { - return Status::TooManyAccessThreads; - } + auto thread_holder = AcquireAccessThread(); auto token = version_controller_.GetLocalSnapshotHolder(); List* list; @@ -189,9 +178,7 @@ Status KVEngine::ListPopBack(StringView list_name, std::string* elem) { if (!checkKeySize(list_name)) { return Status::InvalidDataSize; } - if (maybeInitAccessThread() != Status::Ok) { - return Status::TooManyAccessThreads; - } + auto thread_holder = AcquireAccessThread(); auto token = version_controller_.GetLocalSnapshotHolder(); List* list; @@ -242,11 +229,9 @@ Status KVEngine::ListBatchPushFront(StringView list_name, return Status::InvalidDataSize; } } - Status s = maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } - s = maybeInitBatchLogFile(); + auto thread_holder = AcquireAccessThread(); + + Status s = maybeInitBatchLogFile(); if (s != Status::Ok) { return s; } @@ -272,11 +257,9 @@ Status KVEngine::ListBatchPushBack(StringView list_name, return Status::InvalidDataSize; } } - Status s = maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } - s = maybeInitBatchLogFile(); + auto thread_holder = AcquireAccessThread(); + + Status s = maybeInitBatchLogFile(); if (s != Status::Ok) { return s; } @@ -288,11 +271,9 @@ Status KVEngine::ListBatchPopFront(StringView list_name, size_t n, if (!checkKeySize(list_name)) { return Status::InvalidDataSize; } - Status s = maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } - s = maybeInitBatchLogFile(); + auto thread_holder = AcquireAccessThread(); + + Status s = maybeInitBatchLogFile(); if (s != Status::Ok) { return s; } @@ -304,11 +285,8 @@ Status KVEngine::ListBatchPopBack(StringView list_name, size_t n, if (!checkKeySize(list_name)) { return Status::InvalidDataSize; } - Status s = maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } - s = maybeInitBatchLogFile(); + auto thread_holder = AcquireAccessThread(); + Status s = maybeInitBatchLogFile(); if (s != Status::Ok) { return s; } @@ -321,12 +299,9 @@ Status KVEngine::ListMove(StringView src, ListPos src_pos, StringView dst, if (!checkKeySize(src) || !checkKeySize(dst)) { return Status::InvalidDataSize; } - Status s = maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); - s = maybeInitBatchLogFile(); + Status s = maybeInitBatchLogFile(); if (s != Status::Ok) { return s; } @@ -390,7 +365,8 @@ Status KVEngine::ListMove(StringView src, ListPos src_pos, StringView dst, log.ListDelete(pop_args.spaces[0].offset); log.ListEmplace(push_args.spaces[0].offset); - auto& tc = engine_thread_cache_[access_thread.id]; + auto& tc = engine_thread_cache_[ThreadManager::ThreadID() % + configs_.max_access_threads]; log.EncodeTo(tc.batch_log); BatchWriteLog::MarkProcessing(tc.batch_log); @@ -410,9 +386,8 @@ Status KVEngine::ListInsertAt(StringView list_name, StringView elem, if (!checkValueSize(elem)) { return Status::InvalidDataSize; } - if (maybeInitAccessThread() != Status::Ok) { - return Status::TooManyAccessThreads; - } + auto thread_holder = AcquireAccessThread(); + auto token = version_controller_.GetLocalSnapshotHolder(); List* list; Status s = listFind(list_name, &list); @@ -429,9 +404,7 @@ Status KVEngine::ListInsertBefore(StringView list_name, StringView elem, if (!checkValueSize(elem)) { return Status::InvalidDataSize; } - if (maybeInitAccessThread() != Status::Ok) { - return Status::TooManyAccessThreads; - } + auto thread_holder = AcquireAccessThread(); auto token = version_controller_.GetLocalSnapshotHolder(); List* list; @@ -451,9 +424,7 @@ Status KVEngine::ListInsertAfter(StringView collection, StringView elem, if (!checkValueSize(elem)) { return Status::InvalidDataSize; } - if (maybeInitAccessThread() != Status::Ok) { - return Status::TooManyAccessThreads; - } + auto thread_holder = AcquireAccessThread(); auto token = version_controller_.GetLocalSnapshotHolder(); List* list; @@ -469,9 +440,7 @@ Status KVEngine::ListInsertAfter(StringView collection, StringView elem, Status KVEngine::ListErase(StringView list_name, long index, std::string* elem) { - if (maybeInitAccessThread() != Status::Ok) { - return Status::TooManyAccessThreads; - } + auto thread_holder = AcquireAccessThread(); auto token = version_controller_.GetLocalSnapshotHolder(); List* list; @@ -499,9 +468,7 @@ Status KVEngine::ListErase(StringView list_name, long index, // Replace the element at pos Status KVEngine::ListReplace(StringView collection, long index, StringView elem) { - if (maybeInitAccessThread() != Status::Ok) { - return Status::TooManyAccessThreads; - } + auto thread_holder = AcquireAccessThread(); auto token = version_controller_.GetLocalSnapshotHolder(); List* list; @@ -563,7 +530,7 @@ Status KVEngine::listRestoreList(DLRecord* pmp_record) { } Status KVEngine::listFind(StringView list_name, List** list) { - auto result = lookupKey(list_name, RecordType::ListHeader); + auto result = lookupKey(list_name, RecordType::ListRecord); if (result.s == Status::Outdated) { return Status::NotFound; } @@ -597,7 +564,8 @@ Status KVEngine::listBatchPushImpl(StringView list_name, ListPos pos, log.ListEmplace(space.offset); } - auto& tc = engine_thread_cache_[access_thread.id]; + auto& tc = engine_thread_cache_[ThreadManager::ThreadID() % + configs_.max_access_threads]; log.EncodeTo(tc.batch_log); BatchWriteLog::MarkProcessing(tc.batch_log); @@ -630,7 +598,8 @@ Status KVEngine::listBatchPopImpl(StringView list_name, ListPos pos, size_t n, log.ListDelete(space.offset); } - auto& tc = engine_thread_cache_[access_thread.id]; + auto& tc = engine_thread_cache_[ThreadManager::ThreadID() % + configs_.max_access_threads]; log.EncodeTo(tc.batch_log); BatchWriteLog::MarkProcessing(tc.batch_log); diff --git a/engine/kv_engine_sorted.cpp b/engine/kv_engine_sorted.cpp index ab2d573d..9dc801ed 100644 --- a/engine/kv_engine_sorted.cpp +++ b/engine/kv_engine_sorted.cpp @@ -8,11 +8,7 @@ namespace KVDK_NAMESPACE { Status KVEngine::SortedCreate(const StringView collection_name, const SortedCollectionConfigs& s_configs) { - Status s = maybeInitAccessThread(); - defer(ReleaseAccessThread()); - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); if (!checkKeySize(collection_name)) { return Status::InvalidDataSize; @@ -30,8 +26,9 @@ Status KVEngine::buildSkiplist(const StringView& collection_name, auto holder = version_controller_.GetLocalSnapshotHolder(); TimestampType new_ts = holder.Timestamp(); auto lookup_result = - lookupKey(collection_name, RecordType::SortedHeader); + lookupKey(collection_name, RecordType::SortedRecord); if (lookup_result.s == NotFound || lookup_result.s == Outdated) { + auto create_token = acquireCollectionCreateOrDestroyLock(); DLRecord* existing_header = lookup_result.s == Outdated ? lookup_result.entry.GetIndex().skiplist->HeaderRecord() @@ -56,7 +53,7 @@ Status KVEngine::buildSkiplist(const StringView& collection_name, // header point to itself DLRecord* pmem_record = DLRecord::PersistDLRecord( pmem_allocator_->offset2addr(space_entry.offset), space_entry.size, - new_ts, RecordType::SortedHeader, RecordStatus::Normal, + new_ts, RecordType::SortedRecord, RecordStatus::Normal, pmem_allocator_->addr2offset(existing_header), space_entry.offset, space_entry.offset, collection_name, value_str); @@ -65,7 +62,7 @@ Status KVEngine::buildSkiplist(const StringView& collection_name, pmem_allocator_.get(), hash_table_.get(), dllist_locks_.get(), s_configs.index_with_hashtable); addSkiplistToMap(skiplist); - insertKeyOrElem(lookup_result, RecordType::SortedHeader, + insertKeyOrElem(lookup_result, RecordType::SortedRecord, RecordStatus::Normal, skiplist.get()); } else { return lookup_result.s == Status::Ok ? Status::Existed : lookup_result.s; @@ -74,20 +71,18 @@ Status KVEngine::buildSkiplist(const StringView& collection_name, } Status KVEngine::SortedDestroy(const StringView collection_name) { - auto s = maybeInitAccessThread(); - defer(ReleaseAccessThread()); - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); + auto ul = hash_table_->AcquireLock(collection_name); auto snapshot_holder = version_controller_.GetLocalSnapshotHolder(); auto new_ts = snapshot_holder.Timestamp(); auto lookup_result = - lookupKey(collection_name, RecordType::SortedHeader); + lookupKey(collection_name, RecordType::SortedRecord); if (lookup_result.s == Status::Ok) { + auto destroy_token = acquireCollectionCreateOrDestroyLock(); Skiplist* skiplist = lookup_result.entry.GetIndex().skiplist; DLRecord* header = skiplist->HeaderRecord(); - assert(header->GetRecordType() == RecordType::SortedHeader); + assert(header->GetRecordType() == RecordType::SortedRecord); StringView value = header->Value(); auto request_size = sizeof(DLRecord) + collection_name.size() + value.size(); @@ -97,14 +92,14 @@ Status KVEngine::SortedDestroy(const StringView collection_name) { } DLRecord* pmem_record = DLRecord::PersistDLRecord( pmem_allocator_->offset2addr_checked(space_entry.offset), - space_entry.size, new_ts, RecordType::SortedHeader, + space_entry.size, new_ts, RecordType::SortedRecord, RecordStatus::Outdated, pmem_allocator_->addr2offset_checked(header), header->prev, header->next, collection_name, value, 0); bool success = Skiplist::Replace(header, pmem_record, skiplist->HeaderNode(), pmem_allocator_.get(), dllist_locks_.get()); kvdk_assert(success, "existing header should be linked on its skiplist"); - insertKeyOrElem(lookup_result, RecordType::SortedHeader, + insertKeyOrElem(lookup_result, RecordType::SortedRecord, RecordStatus::Outdated, skiplist); { std::unique_lock skiplist_lock(skiplists_mu_); @@ -118,16 +113,12 @@ Status KVEngine::SortedDestroy(const StringView collection_name) { } Status KVEngine::SortedSize(const StringView collection, size_t* size) { - Status s = maybeInitAccessThread(); - - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); auto holder = version_controller_.GetLocalSnapshotHolder(); Skiplist* skiplist = nullptr; - auto ret = lookupKey(collection, RecordType::SortedHeader); + auto ret = lookupKey(collection, RecordType::SortedRecord); if (ret.s != Status::Ok) { return ret.s == Status::Outdated ? Status::NotFound : ret.s; } @@ -141,17 +132,13 @@ Status KVEngine::SortedSize(const StringView collection, size_t* size) { Status KVEngine::SortedGet(const StringView collection, const StringView user_key, std::string* value) { - Status s = maybeInitAccessThread(); - - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); // Hold current snapshot in this thread auto holder = version_controller_.GetLocalSnapshotHolder(); Skiplist* skiplist = nullptr; - auto ret = lookupKey(collection, RecordType::SortedHeader); + auto ret = lookupKey(collection, RecordType::SortedRecord); if (ret.s != Status::Ok) { return ret.s == Status::Outdated ? Status::NotFound : ret.s; } @@ -166,16 +153,13 @@ Status KVEngine::SortedGet(const StringView collection, Status KVEngine::SortedPut(const StringView collection, const StringView user_key, const StringView value) { - Status s = maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); auto snapshot_holder = version_controller_.GetLocalSnapshotHolder(); Skiplist* skiplist = nullptr; - auto ret = lookupKey(collection, RecordType::SortedHeader); + auto ret = lookupKey(collection, RecordType::SortedRecord); if (ret.s != Status::Ok) { return ret.s == Status::Outdated ? Status::NotFound : ret.s; } @@ -188,18 +172,17 @@ Status KVEngine::SortedPut(const StringView collection, Status KVEngine::SortedDelete(const StringView collection, const StringView user_key) { - Status s = maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); + // Hold current snapshot in this thread auto holder = version_controller_.GetLocalSnapshotHolder(); Skiplist* skiplist = nullptr; - auto ret = lookupKey(collection, RecordType::SortedHeader); + auto ret = lookupKey(collection, RecordType::SortedRecord); if (ret.s != Status::Ok) { - return (ret.s == Status::Outdated || ret.s == Status::NotFound) ? Status::Ok - : ret.s; + return (ret.s == Status::Outdated || ret.s == Status::NotFound) + ? Status::NotFound + : ret.s; } kvdk_assert(ret.entry.GetIndexType() == PointerType::Skiplist, @@ -217,7 +200,7 @@ SortedIterator* KVEngine::SortedIteratorCreate(const StringView collection, snapshot = GetSnapshot(false); } // find collection - auto res = lookupKey(collection, RecordType::SortedHeader); + auto res = lookupKey(collection, RecordType::SortedRecord); if (s != nullptr) { *s = (res.s == Status::Outdated) ? Status::NotFound : res.s; } diff --git a/engine/kv_engine_string.cpp b/engine/kv_engine_string.cpp index bc2b4b87..ef372e58 100644 --- a/engine/kv_engine_string.cpp +++ b/engine/kv_engine_string.cpp @@ -14,10 +14,7 @@ Status KVEngine::Modify(const StringView key, ModifyFunc modify_func, return Status::InvalidArgument; } - Status s = maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); auto ul = hash_table_->AcquireLock(key); auto holder = version_controller_.GetLocalSnapshotHolder(); @@ -106,10 +103,7 @@ Status KVEngine::Modify(const StringView key, ModifyFunc modify_func, Status KVEngine::Put(const StringView key, const StringView value, const WriteOptions& options) { - Status s = maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); if (!checkKeySize(key) || !checkValueSize(value)) { return Status::InvalidDataSize; @@ -119,11 +113,7 @@ Status KVEngine::Put(const StringView key, const StringView value, } Status KVEngine::Get(const StringView key, std::string* value) { - Status s = maybeInitAccessThread(); - - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); if (!checkKeySize(key)) { return Status::InvalidDataSize; @@ -144,11 +134,7 @@ Status KVEngine::Get(const StringView key, std::string* value) { } Status KVEngine::Delete(const StringView key) { - Status s = maybeInitAccessThread(); - - if (s != Status::Ok) { - return s; - } + auto thread_holder = AcquireAccessThread(); if (!checkKeySize(key)) { return Status::InvalidDataSize; diff --git a/engine/kv_engine_vhash.cpp b/engine/kv_engine_vhash.cpp new file mode 100644 index 00000000..52a977ee --- /dev/null +++ b/engine/kv_engine_vhash.cpp @@ -0,0 +1,140 @@ +#include "kv_engine.hpp" +#include "macros.hpp" + +namespace KVDK_NAMESPACE { + +Status KVEngine::VHashCreate(StringView key, size_t capacity) KVDK_TRY { + if (!checkKeySize(key)) return Status::InvalidDataSize; + + auto thread_access = AcquireAccessThread(); + return vhashes_.Create(key, capacity) ? Status::Ok : Status::Existed; +} +KVDK_HANDLE_EXCEPTIONS + +Status KVEngine::VHashDestroy(StringView key) KVDK_TRY { + if (!checkKeySize(key)) return Status::InvalidDataSize; + + auto thread_access = AcquireAccessThread(); + return vhashes_.Destroy(key) ? Status::Ok : Status::NotFound; +} +KVDK_HANDLE_EXCEPTIONS + +Status KVEngine::VHashSize(StringView key, size_t* len) KVDK_TRY { + if (!checkKeySize(key)) return Status::InvalidDataSize; + + auto thread_access = AcquireAccessThread(); + auto token = version_controller_.GetLocalSnapshotHolder(); + + VHash* vhash = vhashes_.Get(key); + if (vhash == nullptr) return Status::NotFound; + *len = vhash->Size(); + return Status::Ok; +} +KVDK_HANDLE_EXCEPTIONS + +Status KVEngine::VHashGet(StringView key, StringView field, + std::string* value) KVDK_TRY { + if (!checkKeySize(key) || !checkKeySize(field)) + return Status::InvalidDataSize; + + auto thread_access = AcquireAccessThread(); + auto token = version_controller_.GetLocalSnapshotHolder(); + + VHash* vhash = vhashes_.Get(key); + if (vhash == nullptr) return Status::NotFound; + + StringView val; + if (!vhash->Get(field, val)) return Status::NotFound; + value->assign(val.data(), val.size()); + return Status::Ok; +} +KVDK_HANDLE_EXCEPTIONS + +Status KVEngine::VHashPut(StringView key, StringView field, + StringView value) KVDK_TRY { + if (!checkKeySize(key) || !checkKeySize(field) || !checkValueSize(value)) + return Status::InvalidDataSize; + + auto thread_access = AcquireAccessThread(); + auto token = version_controller_.GetLocalSnapshotHolder(); + + VHash* vhash = vhashes_.Get(key); + if (vhash == nullptr) return Status::NotFound; + + vhash->Put(field, value); + return Status::Ok; +} +KVDK_HANDLE_EXCEPTIONS + +Status KVEngine::VHashDelete(StringView key, StringView field) KVDK_TRY { + if (!checkKeySize(key) || !checkKeySize(field)) + return Status::InvalidDataSize; + + auto thread_access = AcquireAccessThread(); + auto token = version_controller_.GetLocalSnapshotHolder(); + + VHash* vhash = vhashes_.Get(key); + if (vhash == nullptr) return Status::NotFound; + vhash->Delete(field); + return Status::Ok; +} +KVDK_HANDLE_EXCEPTIONS + +Status KVEngine::VHashModify(StringView key, StringView field, + ModifyFunc modify_func, void* cb_args) KVDK_TRY { + if (!checkKeySize(key) || !checkKeySize(field)) + return Status::InvalidDataSize; + + std::string old_value; + std::string new_value; + auto modify = [&](StringView const* old_val, StringView& new_val, + void* args) { + if (old_val != nullptr) old_value.assign(old_val->data(), old_val->size()); + ModifyOperation op = + modify_func(old_val ? &old_value : nullptr, &new_value, args); + new_val = new_value; + return op; + }; + + auto cleanup = [&](StringView) { return; }; + + auto thread_access = AcquireAccessThread(); + auto token = version_controller_.GetLocalSnapshotHolder(); + VHash* vhash = vhashes_.Get(key); + if (vhash == nullptr) return Status::NotFound; + + return (vhash->Modify(field, modify, cb_args, cleanup)) ? Status::Ok + : Status::Abort; +} +KVDK_HANDLE_EXCEPTIONS + +std::unique_ptr KVEngine::VHashIteratorCreate(StringView key, + Status* s) try { + Status sink; + s = (s != nullptr) ? s : &sink; + *s = Status::NotFound; + + if (!checkKeySize(key)) return nullptr; + + auto thread_access = AcquireAccessThread(); + /// TODO: iterator should hold an access token to keep VHash valid. + auto token = version_controller_.GetLocalSnapshotHolder(); + + VHash* vhash = vhashes_.Get(key); + if (vhash == nullptr) return nullptr; + + *s = Status::Ok; + return vhash->MakeIterator(); +} catch (std::exception const& ex) { + Status sink; + s = (s != nullptr) ? s : &sink; + *s = ExceptionToStatus(ex); + return nullptr; +} catch (...) { + Status sink; + s = (s != nullptr) ? s : &sink; + *s = Status::Abort; + return nullptr; +} + +} // namespace KVDK_NAMESPACE diff --git a/engine/list_collection/list.cpp b/engine/list_collection/list.cpp index cbe98fad..1098a839 100644 --- a/engine/list_collection/list.cpp +++ b/engine/list_collection/list.cpp @@ -17,7 +17,7 @@ List::WriteResult List::SetExpireTime(ExpireTimeType expired_time, } DLRecord* pmem_record = DLRecord::PersistDLRecord( pmem_allocator_->offset2addr_checked(space.offset), space.size, timestamp, - RecordType::ListHeader, RecordStatus::Normal, + RecordType::ListRecord, RecordStatus::Normal, pmem_allocator_->addr2offset_checked(header), header->prev, header->next, header->Key(), header->Value(), expired_time); bool success = dl_list_.Replace(header, pmem_record); diff --git a/engine/list_collection/list.hpp b/engine/list_collection/list.hpp index 16613c0b..999c660b 100644 --- a/engine/list_collection/list.hpp +++ b/engine/list_collection/list.hpp @@ -128,7 +128,7 @@ class List : public Collection { switch (record->GetRecordType()) { case RecordType::ListElem: return ExtractID(record->Key()); - case RecordType::ListHeader: + case RecordType::ListRecord: return DecodeID(record->Value()); default: GlobalLogger.Error("Wrong record type %u in ListID", @@ -140,7 +140,7 @@ class List : public Collection { static bool MatchType(const DLRecord* record) { RecordType type = record->GetRecordType(); - return type == RecordType::ListElem || type == RecordType::ListHeader; + return type == RecordType::ListElem || type == RecordType::ListRecord; } private: diff --git a/engine/list_collection/rebuilder.hpp b/engine/list_collection/rebuilder.hpp index 3f0b49f2..5f93dfa6 100644 --- a/engine/list_collection/rebuilder.hpp +++ b/engine/list_collection/rebuilder.hpp @@ -22,14 +22,13 @@ class ListRebuilder { }; ListRebuilder(PMEMAllocator* pmem_allocator, HashTable* hash_table, - LockTable* lock_table, ThreadManager* thread_manager, - uint64_t num_rebuild_threads, const CheckPoint& checkpoint) + LockTable* lock_table, uint64_t num_rebuild_threads, + const CheckPoint& checkpoint) : recovery_utils_(pmem_allocator), rebuilder_thread_cache_(num_rebuild_threads), pmem_allocator_(pmem_allocator), hash_table_(hash_table), lock_table_(lock_table), - thread_manager_(thread_manager), num_rebuild_threads_(num_rebuild_threads), checkpoint_(checkpoint) {} @@ -173,7 +172,7 @@ class ListRebuilder { if (!outdated) { auto lookup_result = hash_table_->Insert( - collection_name, RecordType::ListHeader, RecordStatus::Normal, + collection_name, RecordType::ListRecord, RecordStatus::Normal, list.get(), PointerType::List); switch (lookup_result.s) { case Status::Ok: { @@ -222,12 +221,10 @@ class ListRebuilder { } Status rebuildIndex(List* list) { + this_thread.id = next_tid_.fetch_add(1); + auto ul = list->AcquireLock(); - Status s = thread_manager_->MaybeInitThread(access_thread); - if (s != Status::Ok) { - return s; - } - defer(thread_manager_->Release(access_thread)); + auto iter = list->GetDLList()->GetRecordIterator(); iter->SeekToFirst(); while (iter->Valid()) { @@ -253,9 +250,10 @@ class ListRebuilder { } void addUnlinkedRecord(DLRecord* pmem_record) { - assert(access_thread.id >= 0); - rebuilder_thread_cache_[access_thread.id].unlinked_records.push_back( - pmem_record); + kvdk_assert(ThreadManager::ThreadID() >= 0, ""); + rebuilder_thread_cache_[ThreadManager::ThreadID() % + rebuilder_thread_cache_.size()] + .unlinked_records.push_back(pmem_record); } void cleanInvalidRecords() { @@ -296,12 +294,16 @@ class ListRebuilder { PMEMAllocator* pmem_allocator_; HashTable* hash_table_; LockTable* lock_table_; - ThreadManager* thread_manager_; const size_t num_rebuild_threads_; CheckPoint checkpoint_; SpinMutex lock_; std::unordered_map> invalid_lists_; std::unordered_map> rebuild_lists_; CollectionIDType max_recovered_id_; + + // We manually allocate recovery thread id for no conflict in multi-thread + // recovering + // Todo: do not hard code + std::atomic next_tid_{0}; }; } // namespace KVDK_NAMESPACE \ No newline at end of file diff --git a/engine/macros.hpp b/engine/macros.hpp index b40c546e..641c8522 100644 --- a/engine/macros.hpp +++ b/engine/macros.hpp @@ -4,6 +4,8 @@ #include #include +#include "kvdk/types.hpp" + #define to_hex(x) \ std::hex << std::setfill('0') << std::setw(sizeof(decltype(x)) * 2) << x \ << std::dec @@ -20,3 +22,25 @@ ":\t" + std::string{msg}}; \ } \ } + +namespace KVDK_NAMESPACE { +inline Status ExceptionToStatus(std::exception const& ex) { + if (dynamic_cast(&ex)) return Status::MemoryOverflow; + if (dynamic_cast(&ex)) return Status::OutOfRange; + if (dynamic_cast(&ex)) + return Status::InvalidArgument; + + return Status::Abort; +} + +#define KVDK_TRY try + +#define KVDK_HANDLE_EXCEPTIONS \ + catch (std::exception const& ex) { \ + return ExceptionToStatus(ex); \ + } \ + catch (...) { \ + return Status::Abort; \ + } + +} // namespace KVDK_NAMESPACE diff --git a/engine/pmem_allocator/free_list.cpp b/engine/pmem_allocator/free_list.cpp index e1aba2a5..88c16db6 100644 --- a/engine/pmem_allocator/free_list.cpp +++ b/engine/pmem_allocator/free_list.cpp @@ -280,7 +280,8 @@ void Freelist::Push(const SpaceEntry& entry) { auto b_offset = entry.offset / block_size_; space_map_.Set(b_offset, b_size); - auto& flist_thread_cache = flist_thread_cache_[access_thread.id]; + auto& flist_thread_cache = flist_thread_cache_[ThreadManager::ThreadID() % + flist_thread_cache_.size()]; if (b_size >= flist_thread_cache.small_entry_offsets_.size()) { size_t size_index = blockSizeIndex(b_size); std::lock_guard lg( @@ -387,7 +388,8 @@ void Freelist::MoveCachedEntriesToPool() { bool Freelist::getSmallEntry(uint32_t size, SpaceEntry* space_entry) { auto b_size = size / block_size_; - auto& flist_thread_cache = flist_thread_cache_[access_thread.id]; + auto& flist_thread_cache = flist_thread_cache_[ThreadManager::ThreadID() % + flist_thread_cache_.size()]; for (uint32_t i = b_size; i < flist_thread_cache.small_entry_offsets_.size(); i++) { search_entry: @@ -418,7 +420,8 @@ bool Freelist::getSmallEntry(uint32_t size, SpaceEntry* space_entry) { bool Freelist::getLargeEntry(uint32_t size, SpaceEntry* space_entry) { auto b_size = size / block_size_; - auto& flist_thread_cache = flist_thread_cache_[access_thread.id]; + auto& flist_thread_cache = flist_thread_cache_[ThreadManager::ThreadID() % + flist_thread_cache_.size()]; auto size_index = blockSizeIndex(b_size); for (size_t i = size_index; i < flist_thread_cache.large_entries_.size(); diff --git a/engine/pmem_allocator/pmem_allocator.cpp b/engine/pmem_allocator/pmem_allocator.cpp index 119c482f..f7307c97 100644 --- a/engine/pmem_allocator/pmem_allocator.cpp +++ b/engine/pmem_allocator/pmem_allocator.cpp @@ -35,7 +35,7 @@ void PMEMAllocator::Free(const SpaceEntry& space_entry) { if (space_entry.size > 0) { assert(space_entry.size % block_size_ == 0); free_list_.Push(space_entry); - LogDeallocation(access_thread.id, space_entry.size); + LogDeallocation(ThreadManager::ThreadID(), space_entry.size); } } @@ -237,7 +237,8 @@ SpaceEntry PMEMAllocator::Allocate(uint64_t size) { if (aligned_size > segment_size_) { return space_entry; } - auto& palloc_thread_cache = palloc_thread_cache_[access_thread.id]; + auto& palloc_thread_cache = palloc_thread_cache_[ThreadManager::ThreadID() % + palloc_thread_cache_.size()]; while (palloc_thread_cache.segment_entry.size < aligned_size) { // allocate from free list space if (palloc_thread_cache.free_entry.size >= aligned_size) { @@ -260,13 +261,14 @@ SpaceEntry PMEMAllocator::Allocate(uint64_t size) { } palloc_thread_cache.free_entry.size -= aligned_size; palloc_thread_cache.free_entry.offset += aligned_size; - LogAllocation(access_thread.id, aligned_size); + LogAllocation(ThreadManager::ThreadID(), aligned_size); return space_entry; } if (palloc_thread_cache.free_entry.size > 0) { // Not a true free - LogAllocation(access_thread.id, palloc_thread_cache.free_entry.size); + LogAllocation(ThreadManager::ThreadID(), + palloc_thread_cache.free_entry.size); Free(palloc_thread_cache.free_entry); palloc_thread_cache.free_entry.size = 0; } @@ -276,7 +278,8 @@ SpaceEntry PMEMAllocator::Allocate(uint64_t size) { continue; } - LogAllocation(access_thread.id, palloc_thread_cache.segment_entry.size); + LogAllocation(ThreadManager::ThreadID(), + palloc_thread_cache.segment_entry.size); Free(palloc_thread_cache.segment_entry); // allocate a new segment, add remainning space of the old one // to the free list @@ -293,7 +296,7 @@ SpaceEntry PMEMAllocator::Allocate(uint64_t size) { persistSpaceEntry(space_entry.offset, space_entry.size); palloc_thread_cache.segment_entry.offset += space_entry.size; palloc_thread_cache.segment_entry.size -= space_entry.size; - LogAllocation(access_thread.id, space_entry.size); + LogAllocation(ThreadManager::ThreadID(), space_entry.size); return space_entry; } diff --git a/engine/pmem_allocator/pmem_allocator.hpp b/engine/pmem_allocator/pmem_allocator.hpp index a60cbb69..4a608ae3 100644 --- a/engine/pmem_allocator/pmem_allocator.hpp +++ b/engine/pmem_allocator/pmem_allocator.hpp @@ -112,25 +112,27 @@ class PMEMAllocator : public Allocator { void BatchFree(const std::vector& entries) { if (entries.size() > 0) { uint64_t freed = free_list_.BatchPush(entries); - LogDeallocation(access_thread.id, freed); + LogDeallocation(ThreadManager::ThreadID(), freed); } } - void LogAllocation(int tid, size_t sz) { + void LogAllocation(int64_t tid, size_t sz) { if (tid == -1) { global_allocated_size_.fetch_add(sz); } else { assert(tid >= 0); - palloc_thread_cache_[tid].allocated_sz += sz; + palloc_thread_cache_[tid % palloc_thread_cache_.size()].allocated_sz += + sz; } } - void LogDeallocation(int tid, size_t sz) { + void LogDeallocation(int64_t tid, size_t sz) { if (tid == -1) { global_allocated_size_.fetch_sub(sz); } else { assert(tid >= 0); - palloc_thread_cache_[tid].allocated_sz -= sz; + palloc_thread_cache_[tid % palloc_thread_cache_.size()].allocated_sz -= + sz; } } diff --git a/engine/sorted_collection/rebuilder.cpp b/engine/sorted_collection/rebuilder.cpp index 2c868bf3..4b27b691 100644 --- a/engine/sorted_collection/rebuilder.cpp +++ b/engine/sorted_collection/rebuilder.cpp @@ -40,7 +40,7 @@ SortedCollectionRebuilder::RebuildResult SortedCollectionRebuilder::Rebuild() { } Status SortedCollectionRebuilder::AddHeader(DLRecord* header_record) { - assert(header_record->GetRecordType() == RecordType::SortedHeader); + assert(header_record->GetRecordType() == RecordType::SortedRecord); bool linked_record = recovery_utils_.CheckAndRepairLinkage(header_record); if (!linked_record) { @@ -74,7 +74,8 @@ Status SortedCollectionRebuilder::AddElement(DLRecord* record) { } } else { if (segment_based_rebuild_ && - ++rebuilder_thread_cache_[access_thread.id] + ++rebuilder_thread_cache_[ThreadManager::ThreadID() % + rebuilder_thread_cache_.size()] .visited_skiplists[Skiplist::FetchID(record)] % kRestoreSkiplistStride == 0 && @@ -117,10 +118,6 @@ Status SortedCollectionRebuilder::Rollback( Status SortedCollectionRebuilder::initRebuildLists() { PMEMAllocator* pmem_allocator = kv_engine_->pmem_allocator_.get(); - Status s = kv_engine_->maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } // Keep headers with same id together for recognize outdated ones auto cmp = [](const DLRecord* header1, const DLRecord* header2) { @@ -245,7 +242,7 @@ Status SortedCollectionRebuilder::initRebuildLists() { } } linked_headers_.clear(); - return s; + return Status::Ok; } Status SortedCollectionRebuilder::segmentBasedIndexRebuild() { @@ -253,11 +250,7 @@ Status SortedCollectionRebuilder::segmentBasedIndexRebuild() { std::vector> fs; auto rebuild_segments_index = [&]() -> Status { - Status s = this->kv_engine_->maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } - defer(this->kv_engine_->ReleaseAccessThread()); + this_thread.id = next_tid_.fetch_add(1); for (auto iter = this->recovery_segments_.begin(); iter != this->recovery_segments_.end(); iter++) { if (!iter->second.visited) { @@ -467,6 +460,8 @@ void SortedCollectionRebuilder::linkSegmentDramNodes(SkiplistNode* start_node, } Status SortedCollectionRebuilder::linkHighDramNodes(Skiplist* skiplist) { + this_thread.id = next_tid_.fetch_add(1); + Splice splice(skiplist); for (uint8_t i = 1; i <= kMaxHeight; i++) { splice.prevs[i] = skiplist->HeaderNode(); @@ -492,12 +487,7 @@ Status SortedCollectionRebuilder::linkHighDramNodes(Skiplist* skiplist) { } Status SortedCollectionRebuilder::rebuildSkiplistIndex(Skiplist* skiplist) { - Status s = kv_engine_->maybeInitAccessThread(); - if (s != Status::Ok) { - return s; - } - defer(kv_engine_->ReleaseAccessThread()); - + this_thread.id = next_tid_.fetch_add(1); size_t num_elems = 0; Splice splice(skiplist); @@ -649,12 +639,12 @@ Status SortedCollectionRebuilder::insertHashIndex(const StringView& key, RecordType::SortedElem, ""); } else if (index_type == PointerType::Skiplist) { - record_type = RecordType::SortedHeader; + record_type = RecordType::SortedRecord; record_status = static_cast(index_ptr)->HeaderRecord()->GetRecordStatus(); kvdk_assert( static_cast(index_ptr)->HeaderRecord()->GetRecordType() == - RecordType::SortedHeader, + RecordType::SortedRecord, ""); } else { kvdk_assert(false, "Wrong type in sorted collection rebuilder"); diff --git a/engine/sorted_collection/rebuilder.hpp b/engine/sorted_collection/rebuilder.hpp index feafba84..278ed7a1 100644 --- a/engine/sorted_collection/rebuilder.hpp +++ b/engine/sorted_collection/rebuilder.hpp @@ -108,9 +108,10 @@ class SortedCollectionRebuilder { PointerType index_type); void addUnlinkedRecord(DLRecord* pmem_record) { - assert(access_thread.id >= 0); - rebuilder_thread_cache_[access_thread.id].unlinked_records.push_back( - pmem_record); + assert(ThreadManager::ThreadID() >= 0); + rebuilder_thread_cache_[ThreadManager::ThreadID() % + rebuilder_thread_cache_.size()] + .unlinked_records.push_back(pmem_record); } struct ThreadCache { @@ -140,6 +141,12 @@ class SortedCollectionRebuilder { CollectionIDType max_recovered_id_ = 0; // Select elements as a segment start point for segment based rebuild every // kRestoreSkiplistStride elements per skiplist + + // We manually allocate recovery thread id for no conflict in multi-thread + // recovering + // Todo: do not hard code + std::atomic next_tid_{0}; + const uint64_t kRestoreSkiplistStride = 10000; }; } // namespace KVDK_NAMESPACE \ No newline at end of file diff --git a/engine/sorted_collection/skiplist.cpp b/engine/sorted_collection/skiplist.cpp index 90d01d12..67c0f961 100644 --- a/engine/sorted_collection/skiplist.cpp +++ b/engine/sorted_collection/skiplist.cpp @@ -65,7 +65,7 @@ Skiplist::WriteResult Skiplist::SetExpireTime(ExpireTimeType expired_time, } DLRecord* pmem_record = DLRecord::PersistDLRecord( pmem_allocator_->offset2addr_checked(space_entry.offset), - space_entry.size, timestamp, RecordType::SortedHeader, + space_entry.size, timestamp, RecordType::SortedRecord, RecordStatus::Normal, pmem_allocator_->addr2offset_checked(header), header->prev, header->next, header->Key(), header->Value(), expired_time); bool success = Skiplist::Replace(header, pmem_record, HeaderNode(), @@ -310,10 +310,10 @@ bool Skiplist::lockInsertPosition(const StringView& inserting_key, auto check_order = [&]() { bool res = /*check next*/ (next_record->GetRecordType() == - RecordType::SortedHeader || + RecordType::SortedRecord || Compare(inserting_key, UserKey(next_record)) <= 0) && /*check prev*/ (prev_record->GetRecordType() == - RecordType::SortedHeader || + RecordType::SortedRecord || Compare(inserting_key, UserKey(prev_record)) > 0); return res; }; diff --git a/engine/sorted_collection/skiplist.hpp b/engine/sorted_collection/skiplist.hpp index 588bc7cf..a069ca48 100644 --- a/engine/sorted_collection/skiplist.hpp +++ b/engine/sorted_collection/skiplist.hpp @@ -305,7 +305,7 @@ class Skiplist : public Collection { static bool MatchType(DLRecord* record) { RecordType type = record->GetRecordType(); - return type == RecordType::SortedElem || type == RecordType::SortedHeader; + return type == RecordType::SortedElem || type == RecordType::SortedRecord; } // Remove a dl record from its skiplist by unlinking @@ -377,7 +377,7 @@ class Skiplist : public Collection { case RecordType::SortedElem: return ExtractID(record->Key()); break; - case RecordType::SortedHeader: + case RecordType::SortedRecord: return DecodeID(record->Value()); default: GlobalLogger.Error("Wrong record type %u in SkiplistID", diff --git a/engine/thread_manager.cpp b/engine/thread_manager.cpp index 1bbea1c7..a6510fbc 100644 --- a/engine/thread_manager.cpp +++ b/engine/thread_manager.cpp @@ -8,47 +8,44 @@ namespace KVDK_NAMESPACE { -void Thread::Release() { - assert(id == -1 || thread_manager != nullptr); - if (thread_manager) { - thread_manager->Release(*this); - thread_manager = nullptr; +constexpr size_t kMaxRecycleID = 1024; + +std::shared_ptr ThreadManager::manager_(new ThreadManager); + +Thread::~Thread() { + if (manager != nullptr) { + manager->Release(*this); } - id = -1; } -Thread::~Thread() { Release(); } - -Status ThreadManager::MaybeInitThread(Thread& t) { +void ThreadManager::MaybeInitThread(Thread& t) { if (t.id < 0) { - if (!usable_id_.empty()) { + if (!recycle_id_.empty()) { std::lock_guard lg(spin_); - if (!usable_id_.empty()) { - auto it = usable_id_.begin(); + if (!recycle_id_.empty()) { + auto it = recycle_id_.begin(); + t.manager = shared_from_this(); t.id = *it; - usable_id_.erase(it); - t.thread_manager = shared_from_this(); - return Status::Ok; + recycle_id_.erase(it); + return; } } int id = ids_.fetch_add(1, std::memory_order_relaxed); - if (static_cast(id) >= max_threads_) { - return Status::TooManyAccessThreads; - } + t.manager = shared_from_this(); t.id = id; - t.thread_manager = shared_from_this(); } - return Status::Ok; } -void ThreadManager::Release(const Thread& t) { - if (t.id >= 0) { - assert(static_cast(t.id) < max_threads_); +void ThreadManager::Release(Thread& t) { + if (t.manager.get() == this && t.id >= 0 && + recycle_id_.size() < kMaxRecycleID) { std::lock_guard lg(spin_); - usable_id_.insert(t.id); + recycle_id_.insert(t.id); } + t.id = -1; + t.manager = nullptr; } -thread_local Thread access_thread; +thread_local Thread this_thread; } // namespace KVDK_NAMESPACE diff --git a/engine/thread_manager.hpp b/engine/thread_manager.hpp index 748987dc..741f4122 100644 --- a/engine/thread_manager.hpp +++ b/engine/thread_manager.hpp @@ -17,27 +17,32 @@ class ThreadManager; struct Thread { public: - Thread() : id(-1), thread_manager(nullptr) {} + Thread() : id(-1), manager(nullptr) {} + int64_t id; + std::shared_ptr manager; + ~Thread(); - void Release(); - int id; - std::shared_ptr thread_manager; }; +extern thread_local Thread this_thread; + class ThreadManager : public std::enable_shared_from_this { public: - ThreadManager(uint32_t max_threads) : max_threads_(max_threads), ids_(0) {} - Status MaybeInitThread(Thread& t); - - void Release(const Thread& t); + static ThreadManager* Get() { return manager_.get(); } + static int64_t ThreadID() { + Get()->MaybeInitThread(this_thread); + return this_thread.id; + } + void MaybeInitThread(Thread& t); + void Release(Thread& t); private: - uint32_t max_threads_; - std::atomic ids_; - std::unordered_set usable_id_; + ThreadManager() : ids_(0), recycle_id_(), spin_() {} + + static std::shared_ptr manager_; + std::atomic ids_; + std::unordered_set recycle_id_; SpinMutex spin_; }; -extern thread_local Thread access_thread; - } // namespace KVDK_NAMESPACE \ No newline at end of file diff --git a/engine/transaction_impl.cpp b/engine/transaction_impl.cpp new file mode 100644 index 00000000..51bcde49 --- /dev/null +++ b/engine/transaction_impl.cpp @@ -0,0 +1,269 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2021 Intel Corporation + */ + +#include "transaction_impl.hpp" + +#include "kv_engine.hpp" + +namespace KVDK_NAMESPACE { +// To avoid deadlock in transaction, we abort locking key in +// random(kLockTimeoutMicrosecondsMin,kLockTimeoutMicrosecondsMax) and +// return Timeout in write operations +constexpr int64_t kLockTimeoutMicrosecondsMin = 5000; +constexpr int64_t kLockTimeoutMicrosecondsMax = 15000; + +TransactionImpl::TransactionImpl(KVEngine* engine) + : engine_(engine), timeout_(randomTimeout()) { + kvdk_assert(engine_ != nullptr, ""); + batch_.reset( + dynamic_cast(engine_->WriteBatchCreate().release())); + kvdk_assert(batch_ != nullptr, ""); +} + +TransactionImpl::~TransactionImpl() { Rollback(); } + +Status TransactionImpl::StringPut(const StringView key, + const StringView value) { + auto hash_table = engine_->GetHashTable(); + if (!tryLock(hash_table->GetLock(key))) { + status_ = Status::Timeout; + return status_; + } + + batch_->StringPut(key, value); + return Status::Ok; +} + +Status TransactionImpl::StringDelete(const StringView key) { + auto hash_table = engine_->GetHashTable(); + if (!tryLock(hash_table->GetLock(key))) { + status_ = Status::Timeout; + return status_; + } + + batch_->StringDelete(key); + return Status::Ok; +} + +Status TransactionImpl::StringGet(const StringView key, std::string* value) { + auto op = batch_->StringGet(key); + if (op != nullptr) { + if (op->op == WriteOp::Delete) { + return Status::NotFound; + } else { + value->assign(op->value); + return Status::Ok; + } + } else { + auto hash_table = engine_->GetHashTable(); + if (!tryLock(hash_table->GetLock(key))) { + status_ = Status::Timeout; + return status_; + } + return engine_->Get(key, value); + } +} + +Status TransactionImpl::SortedPut(const StringView collection, + const StringView key, + const StringView value) { + acquireCollectionTransaction(); + auto hash_table = engine_->GetHashTable(); + auto lookup_result = + hash_table->Lookup(collection, RecordType::SortedRecord); + if (lookup_result.s != Status::Ok) { + kvdk_assert(lookup_result.s == Status::NotFound, ""); + return lookup_result.s; + } + Skiplist* skiplist = lookup_result.entry.GetIndex().skiplist; + auto internal_key = skiplist->InternalKey(key); + if (!tryLock(hash_table->GetLock(internal_key))) { + status_ = Status::Timeout; + return status_; + } + + batch_->SortedPut(collection, key, value); + return Status::Ok; +} + +Status TransactionImpl::SortedDelete(const StringView collection, + const StringView key) { + acquireCollectionTransaction(); + auto hash_table = engine_->GetHashTable(); + auto lookup_result = + hash_table->Lookup(collection, RecordType::SortedRecord); + if (lookup_result.s != Status::Ok) { + kvdk_assert(lookup_result.s == Status::NotFound, ""); + return lookup_result.s; + } + Skiplist* skiplist = lookup_result.entry.GetIndex().skiplist; + auto internal_key = skiplist->InternalKey(key); + if (!tryLock(hash_table->GetLock(internal_key))) { + status_ = Status::Timeout; + return status_; + } + + batch_->SortedDelete(collection, key); + return Status::Ok; +} + +Status TransactionImpl::SortedGet(const StringView collection, + const StringView key, std::string* value) { + auto op = batch_->SortedGet(collection, key); + if (op != nullptr) { + if (op->op == WriteOp::Delete) { + return Status::NotFound; + } else { + value->assign(op->value); + return Status::Ok; + } + } else { + acquireCollectionTransaction(); + auto hash_table = engine_->GetHashTable(); + auto lookup_result = + hash_table->Lookup(collection, RecordType::SortedRecord); + if (lookup_result.s != Status::Ok) { + kvdk_assert(lookup_result.s == Status::NotFound, ""); + return lookup_result.s; + } + Skiplist* skiplist = lookup_result.entry.GetIndex().skiplist; + auto internal_key = skiplist->InternalKey(key); + if (!tryLock(hash_table->GetLock(internal_key))) { + status_ = Status::Timeout; + return status_; + } + + return skiplist->Get(key, value); + } +} + +Status TransactionImpl::HashPut(const StringView collection, + const StringView key, const StringView value) { + acquireCollectionTransaction(); + auto hash_table = engine_->GetHashTable(); + auto lookup_result = + hash_table->Lookup(collection, RecordType::HashRecord); + if (lookup_result.s != Status::Ok) { + kvdk_assert(lookup_result.s == Status::NotFound, ""); + return lookup_result.s; + } + HashList* hlist = lookup_result.entry.GetIndex().hlist; + auto internal_key = hlist->InternalKey(key); + if (!tryLock(hash_table->GetLock(internal_key))) { + status_ = Status::Timeout; + return status_; + } + + batch_->HashPut(collection, key, value); + return Status::Ok; +} + +Status TransactionImpl::HashDelete(const StringView collection, + const StringView key) { + acquireCollectionTransaction(); + auto hash_table = engine_->GetHashTable(); + auto lookup_result = + hash_table->Lookup(collection, RecordType::HashRecord); + if (lookup_result.s != Status::Ok) { + kvdk_assert(lookup_result.s == Status::NotFound, ""); + return lookup_result.s; + } + HashList* hlist = lookup_result.entry.GetIndex().hlist; + auto internal_key = hlist->InternalKey(key); + if (!tryLock(hash_table->GetLock(internal_key))) { + status_ = Status::Timeout; + return status_; + } + + batch_->HashDelete(collection, key); + return Status::Ok; +} + +Status TransactionImpl::HashGet(const StringView collection, + const StringView key, std::string* value) { + auto op = batch_->HashGet(collection, key); + if (op != nullptr) { + if (op->op == WriteOp::Delete) { + return Status::NotFound; + } else { + value->assign(op->value); + return Status::Ok; + } + } else { + acquireCollectionTransaction(); + auto hash_table = engine_->GetHashTable(); + auto lookup_result = + hash_table->Lookup(collection, RecordType::HashRecord); + if (lookup_result.s != Status::Ok) { + kvdk_assert(lookup_result.s == Status::NotFound, ""); + return lookup_result.s; + } + HashList* hlist = lookup_result.entry.GetIndex().hlist; + auto internal_key = hlist->InternalKey(key); + if (!tryLock(hash_table->GetLock(internal_key))) { + status_ = Status::Timeout; + return status_; + } + + return hlist->Get(key, value); + } +} + +void TransactionImpl::acquireCollectionTransaction() { + if (ct_token_ == nullptr) { + ct_token_ = engine_->AcquireCollectionTransactionLock(); + } +} + +bool TransactionImpl::tryLock(SpinMutex* spin) { + auto iter = locked_.find(spin); + if (iter == locked_.end()) { + if (tryLockImpl(spin)) { + locked_.insert(spin); + return true; + } + return false; + } else { + return true; + } +} + +bool TransactionImpl::tryLockImpl(SpinMutex* spin) { + auto now = TimeUtils::microseconds_time(); + while (!spin->try_lock()) { + if (TimeUtils::microseconds_time() - now > timeout_) { + return false; + } + } + return true; +} + +Status TransactionImpl::Commit() { + Status s = engine_->CommitTransaction(this); + Rollback(); + return s; +} + +void TransactionImpl::Rollback() { + for (SpinMutex* s : locked_) { + s->unlock(); + } + locked_.clear(); + string_kv_.clear(); + sorted_kv_.clear(); + hash_kv_.clear(); + batch_->Clear(); + ct_token_ = nullptr; +} + +void TransactionImpl::SetLockTimeout(int64_t microseconds) { + timeout_ = microseconds; +} + +int64_t TransactionImpl::randomTimeout() { + return fast_random_64() % + (kLockTimeoutMicrosecondsMax - kLockTimeoutMicrosecondsMin) + + kLockTimeoutMicrosecondsMin; +} +} // namespace KVDK_NAMESPACE \ No newline at end of file diff --git a/engine/transaction_impl.hpp b/engine/transaction_impl.hpp new file mode 100644 index 00000000..00ef3bf6 --- /dev/null +++ b/engine/transaction_impl.hpp @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2021 Intel Corporation + */ + +#pragma once + +#include +#include +#include + +#include "alias.hpp" +#include "kvdk/transaction.hpp" +#include "write_batch_impl.hpp" + +namespace KVDK_NAMESPACE { + +class KVEngine; + +// Collections of in processing transaction should not be created or +// destroyed, we use this for communication between collection related +// transactions and collection create/destroy threads +class CollectionTransactionCV { + public: + struct TransactionToken { + TransactionToken(CollectionTransactionCV* cv) : cv_(cv) { + kvdk_assert(cv_ != nullptr, ""); + cv_->AcquireTransaction(); + } + + TransactionToken(const TransactionToken&) = delete; + TransactionToken(TransactionToken&&) = delete; + + ~TransactionToken() { cv_->FinishTransaction(); } + + private: + CollectionTransactionCV* cv_; + }; + + struct CollectionToken { + CollectionToken(CollectionTransactionCV* cv) : cv_(cv) { + kvdk_assert(cv_ != nullptr, ""); + cv_->AcquireCollection(); + } + + ~CollectionToken() { cv_->FinishCollection(); } + + CollectionToken(const CollectionToken&) = delete; + CollectionToken(CollectionToken&&) = delete; + + private: + CollectionTransactionCV* cv_; + }; + + CollectionTransactionCV() = default; + + CollectionTransactionCV(const CollectionTransactionCV&) = delete; + + void AcquireTransaction() { + std::unique_lock ul(spin_); + while (processing_collection_ > 0) { + cv_.wait(ul); + } + kvdk_assert(processing_collection_ == 0, ""); + processing_transaction_++; + } + + void AcquireCollection() { + std::unique_lock ul(spin_); + // collection create/destroy has higher priority than txn as it's faster, + // so we add processing cnt here to forbit hungry + processing_collection_++; + while (processing_transaction_ > 0) { + cv_.wait(ul); + } + kvdk_assert(processing_transaction_ == 0, ""); + } + + void FinishTransaction() { + std::unique_lock ul(spin_); + if (--processing_transaction_ == 0) { + cv_.notify_all(); + } + } + + void FinishCollection() { + std::unique_lock ul(spin_); + if (--processing_collection_ == 0) { + cv_.notify_all(); + } + } + + private: + std::condition_variable_any cv_; + SpinMutex spin_; + int processing_collection_ = 0; + int processing_transaction_ = 0; +}; + +class TransactionImpl final : public Transaction { + public: + TransactionImpl(KVEngine* engine); + ~TransactionImpl() final; + Status StringPut(const StringView key, const StringView value) final; + Status StringDelete(const StringView key) final; + Status StringGet(const StringView key, std::string* value) final; + Status SortedPut(const StringView collection, const StringView key, + const StringView value) final; + Status SortedDelete(const StringView collection, const StringView key) final; + Status SortedGet(const StringView collection, const StringView key, + std::string* value) final; + Status HashPut(const StringView collection, const StringView key, + const StringView value) final; + Status HashDelete(const StringView collection, const StringView key) final; + Status HashGet(const StringView collection, const StringView key, + std::string* value) final; + Status Commit() final; + void Rollback() final; + Status InternalStatus() final { return status_; } + + // This used by kv engine + WriteBatchImpl* GetBatch() { return batch_.get(); } + + // Set lock time out while acquiring a key lock in transaction, if <0, + // operations will immediately return timeout while failed to lock a key + void SetLockTimeout(int64_t micro_seconds); + + private: + struct KVOp { + WriteOp op; + std::string value; + }; + + bool tryLock(SpinMutex* spin); + bool tryLockImpl(SpinMutex* spin); + void acquireCollectionTransaction(); + int64_t randomTimeout(); + + KVEngine* engine_; + Status status_; + std::unordered_map> + sorted_kv_; + std::unordered_map> + hash_kv_; + std::unordered_map string_kv_; + std::unique_ptr batch_; + // TODO use std::unique_lock + std::unordered_set locked_; + std::unique_ptr ct_token_; + int64_t timeout_; +}; +} // namespace KVDK_NAMESPACE \ No newline at end of file diff --git a/engine/utils/utils.hpp b/engine/utils/utils.hpp index 39cd6062..ed501b81 100644 --- a/engine/utils/utils.hpp +++ b/engine/utils/utils.hpp @@ -155,7 +155,7 @@ inline int compare_string_view(const StringView& src, auto size = std::min(src.size(), target.size()); for (uint32_t i = 0; i < size; i++) { if (src[i] != target[i]) { - return src[i] - target[i]; + return (unsigned char)src[i] - (unsigned char)target[i]; } } return src.size() - target.size(); @@ -512,6 +512,8 @@ inline UnixTimeType unix_time(void) { inline int64_t millisecond_time() { return unix_time() / 1000; } +inline int64_t microseconds_time() { return unix_time(); } + inline bool CheckIsExpired(ExpireTimeType expired_time) { if (expired_time >= 0 && expired_time <= millisecond_time()) { return true; diff --git a/engine/version/old_records_cleaner.cpp b/engine/version/old_records_cleaner.cpp index 53c981de..7dc9d25d 100644 --- a/engine/version/old_records_cleaner.cpp +++ b/engine/version/old_records_cleaner.cpp @@ -9,6 +9,59 @@ namespace KVDK_NAMESPACE { +OldRecordsCleaner::~OldRecordsCleaner() { + // Clean twice to ensure all resources are released. + TryGlobalClean(); + TryGlobalClean(); + for (size_t i = 0; i < cleaner_thread_cache_.size(); i++) { + auto& tc = cleaner_thread_cache_[i]; + std::lock_guard lg(tc.old_records_lock); + if (!tc.pending_free_space_entries.empty()) + GlobalLogger.Error("PendingFree leaked!"); + for (auto const& q : tc.local_queues_) { + if (!q.empty()) GlobalLogger.Error("DRAM leaked!"); + } + } + if (!global_pending_free_space_entries_.empty()) + GlobalLogger.Error("PendingFree leaked!"); +} + +void OldRecordsCleaner::RegisterDelayDeleter(IDeleter& deleter) { + size_t idx = global_queues_.size(); + delay_deleters_[&deleter] = idx; + global_queues_.emplace_back(); + for (size_t i = 0; i < cleaner_thread_cache_.size(); i++) + cleaner_thread_cache_[i].local_queues_.emplace_back(); +} + +void OldRecordsCleaner::DelayDelete(IDeleter& deleter, void* obj) { + kvdk_assert(this_thread.id >= 0, ""); + auto& tc = cleaner_thread_cache_[ThreadManager::ThreadID() % + cleaner_thread_cache_.size()]; + std::lock_guard guard{tc.old_records_lock}; + + TimestampType ts = kv_engine_->version_controller_.GetCurrentTimestamp(); + + size_t idx = delay_deleters_.at(&deleter); + tc.local_queues_[idx].emplace_back(ts, obj); + + constexpr size_t kMaxFreePending = 16; + tryPurge(deleter, tc.local_queues_[idx], kMaxFreePending); +} + +void OldRecordsCleaner::tryPurge(IDeleter& deleter, PendingQueue& pending_kvs, + size_t lim) { + maybeUpdateOldestSnapshot(); + TimestampType acc_ts = + kv_engine_->version_controller_.LocalOldestSnapshotTS(); + for (size_t i = 0; i < lim && !pending_kvs.empty(); i++) { + auto const& pair = pending_kvs.front(); + if (pair.first >= acc_ts) break; + deleter.Delete(pair.second); + pending_kvs.pop_front(); + } +} + void OldRecordsCleaner::TryGlobalClean() { std::vector space_to_free; // Update recorded oldest snapshot up to state so we can know which records @@ -32,6 +85,11 @@ void OldRecordsCleaner::TryGlobalClean() { } cleaner_thread_cache.pending_free_space_entries.clear(); } + + std::lock_guard lg(cleaner_thread_cache.old_records_lock); + for (auto const& del_idx : delay_deleters_) + tryPurge(*del_idx.first, + cleaner_thread_cache.local_queues_[del_idx.second], -1U); } auto iter = global_pending_free_space_entries_.begin(); diff --git a/engine/version/old_records_cleaner.hpp b/engine/version/old_records_cleaner.hpp index bb75edc9..5d832532 100644 --- a/engine/version/old_records_cleaner.hpp +++ b/engine/version/old_records_cleaner.hpp @@ -5,6 +5,8 @@ #include #include +#include +#include #include #include "../alias.hpp" @@ -19,6 +21,12 @@ namespace KVDK_NAMESPACE { class KVEngine; +class IDeleter { + public: + // Called by OldRecordsCleaner for actual deletion. + virtual void Delete(void* obj) = 0; +}; + // OldRecordsCleaner is used to clean old version PMem records of kvdk // // To support multi-version machenism and consistent backup of kvdk, @@ -31,23 +39,40 @@ class OldRecordsCleaner { assert(kv_engine_ != nullptr); } + ~OldRecordsCleaner(); + + // Warning: not thread safe. Must be called during kv_engine initialization. + void RegisterDelayDeleter(IDeleter& deleter); + + void DelayDelete(IDeleter& deleter, void* obj); + // Try to clean global old records void TryGlobalClean(); private: + using PendingQueue = std::deque>; + struct CleanerThreadCache { std::deque pending_free_space_entries{}; + std::vector local_queues_; SpinMutex old_records_lock; }; const uint64_t kLimitCachedDeleteRecords = 1000000; void maybeUpdateOldestSnapshot(); + // Try purging some entries from a locked PendingQueue with associated + // deleter. + void tryPurge(IDeleter& deleter, PendingQueue& pending_kvs, size_t lim); + KVEngine* kv_engine_; Array cleaner_thread_cache_; std::deque global_pending_free_space_entries_; + + std::unordered_map delay_deleters_; + std::vector global_queues_; }; } // namespace KVDK_NAMESPACE diff --git a/engine/version/version_controller.hpp b/engine/version/version_controller.hpp index 7c714d32..97f1b50f 100644 --- a/engine/version/version_controller.hpp +++ b/engine/version/version_controller.hpp @@ -82,7 +82,9 @@ class VersionController { public: LocalSnapshotHolder(VersionController* o) : owner_{o} { owner_->HoldLocalSnapshot(); - ts_ = owner_->version_thread_cache_[access_thread.id] + ts_ = owner_ + ->version_thread_cache_[ThreadManager::ThreadID() % + owner_->version_thread_cache_.size()] .holding_snapshot.timestamp; }; LocalSnapshotHolder(LocalSnapshotHolder const&) = delete; @@ -145,7 +147,9 @@ class VersionController { BatchWriteToken(VersionController* o) : owner_{o}, ts_{owner_->GetCurrentTimestamp()} { ts_ = owner_->GetCurrentTimestamp(); - auto& tc = owner_->version_thread_cache_[access_thread.id]; + auto& tc = + owner_->version_thread_cache_[ThreadManager::ThreadID() % + owner_->version_thread_cache_.size()]; kvdk_assert(tc.batch_write_ts == kMaxTimestamp, ""); tc.batch_write_ts = ts_; } @@ -158,7 +162,9 @@ class VersionController { } ~BatchWriteToken() { if (owner_ != nullptr) { - auto& tc = owner_->version_thread_cache_[access_thread.id]; + auto& tc = + owner_->version_thread_cache_[ThreadManager::ThreadID() % + owner_->version_thread_cache_.size()]; tc.batch_write_ts = kMaxTimestamp; } } @@ -196,23 +202,23 @@ class VersionController { } inline void HoldLocalSnapshot() { - kvdk_assert(access_thread.id >= 0 && static_cast(access_thread.id) < - version_thread_cache_.size(), + kvdk_assert(ThreadManager::ThreadID() >= 0, "Uninitialized thread in NewLocalSnapshot"); - kvdk_assert( - version_thread_cache_[access_thread.id].holding_snapshot.timestamp == - kMaxTimestamp, - "Previous LocalSnapshot not released yet!"); - version_thread_cache_[access_thread.id].holding_snapshot.timestamp = - GetCurrentTimestamp(); + kvdk_assert(version_thread_cache_[ThreadManager::ThreadID() % + version_thread_cache_.size()] + .holding_snapshot.timestamp == kMaxTimestamp, + "Previous LocalSnapshot not released yet!"); + version_thread_cache_[ThreadManager::ThreadID() % + version_thread_cache_.size()] + .holding_snapshot.timestamp = GetCurrentTimestamp(); } inline void ReleaseLocalSnapshot() { - kvdk_assert(access_thread.id >= 0 && static_cast(access_thread.id) < - version_thread_cache_.size(), + kvdk_assert(ThreadManager::ThreadID() >= 0, "Uninitialized thread in ReleaseLocalSnapshot"); - version_thread_cache_[access_thread.id].holding_snapshot.timestamp = - kMaxTimestamp; + version_thread_cache_[ThreadManager::ThreadID() % + version_thread_cache_.size()] + .holding_snapshot.timestamp = kMaxTimestamp; } inline const SnapshotImpl& GetLocalSnapshot(size_t thread_num) { @@ -222,10 +228,11 @@ class VersionController { } inline const SnapshotImpl& GetLocalSnapshot() { - kvdk_assert(access_thread.id >= 0 && static_cast(access_thread.id) < - version_thread_cache_.size(), + kvdk_assert(ThreadManager::ThreadID() >= 0, "Uninitialized thread in GetLocalSnapshot"); - return version_thread_cache_[access_thread.id].holding_snapshot; + return version_thread_cache_[ThreadManager::ThreadID() % + version_thread_cache_.size()] + .holding_snapshot; } // Create a new global snapshot diff --git a/engine/write_batch_impl.hpp b/engine/write_batch_impl.hpp index 86a2cc57..7ce49045 100644 --- a/engine/write_batch_impl.hpp +++ b/engine/write_batch_impl.hpp @@ -14,17 +14,25 @@ namespace KVDK_NAMESPACE { -struct Splice; - class WriteBatchImpl final : public WriteBatch { public: struct StringOp { + StringOp(WriteOp o, const StringView& k, const StringView& v) + : op(o), key(string_view_2_string(k)), value(string_view_2_string(v)) {} + WriteOp op; std::string key; std::string value; }; struct SortedOp { + SortedOp(WriteOp o, const StringView& c, const StringView& k, + const StringView& v) + : op(o), + collection(string_view_2_string(c)), + key(string_view_2_string(k)), + value(string_view_2_string(v)) {} + WriteOp op; std::string collection; std::string key; @@ -32,6 +40,13 @@ class WriteBatchImpl final : public WriteBatch { }; struct HashOp { + HashOp(WriteOp o, const StringView& c, const StringView& k, + const StringView& v) + : op(o), + collection(string_view_2_string(c)), + key(string_view_2_string(k)), + value(string_view_2_string(v)) {} + WriteOp op; std::string collection; std::string key; @@ -59,44 +74,77 @@ class WriteBatchImpl final : public WriteBatch { } }; - void StringPut(std::string const& key, std::string const& value) final { + void StringPut(const StringView key, const StringView value) final { StringOp op{WriteOp::Put, key, value}; string_ops_.erase(op); string_ops_.insert(op); } - void StringDelete(std::string const& key) final { + void StringDelete(const StringView key) final { StringOp op{WriteOp::Delete, key, std::string{}}; string_ops_.erase(op); string_ops_.insert(op); } - void SortedPut(std::string const& key, std::string const& field, - std::string const& value) final { - SortedOp op{WriteOp::Put, key, field, value}; + void SortedPut(const StringView collection, const StringView key, + const StringView value) final { + SortedOp op{WriteOp::Put, collection, key, value}; sorted_ops_.erase(op); sorted_ops_.insert(op); } - void SortedDelete(std::string const& key, std::string const& field) final { - SortedOp op{WriteOp::Delete, key, field, std::string{}}; + void SortedDelete(const StringView collection, const StringView key) final { + SortedOp op{WriteOp::Delete, collection, key, std::string{}}; sorted_ops_.erase(op); sorted_ops_.insert(op); } - void HashPut(std::string const& key, std::string const& field, - std::string const& value) final { - HashOp op{WriteOp::Put, key, field, value}; + void HashPut(const StringView collection, const StringView key, + const StringView value) final { + HashOp op{WriteOp::Put, collection, key, value}; hash_ops_.erase(op); hash_ops_.insert(op); } - void HashDelete(std::string const& key, std::string const& field) final { - HashOp op{WriteOp::Delete, key, field, std::string{}}; + void HashDelete(const StringView collection, const StringView key) final { + HashOp op{WriteOp::Delete, collection, key, std::string{}}; hash_ops_.erase(op); hash_ops_.insert(op); } + // Get a string op from this batch + // if key not exist in this batch, return nullptr + const StringOp* StringGet(const StringView key) { + StringOp op{WriteOp::Put, key, ""}; + auto iter = string_ops_.find(op); + if (iter == string_ops_.end()) { + return nullptr; + } + return &(*iter); + } + + // Get a sorted op from this batch + // if collection key not exist in this batch, return nullptr + const SortedOp* SortedGet(const StringView collection, const StringView key) { + SortedOp op{WriteOp::Put, collection, key, ""}; + auto iter = sorted_ops_.find(op); + if (iter == sorted_ops_.end()) { + return nullptr; + } + return &(*iter); + } + + // Get a hash op from this batch + // if the collection key not exist in this batch, return nullptr + const HashOp* HashGet(const StringView collection, const StringView key) { + HashOp op{WriteOp::Put, collection, key, ""}; + auto iter = hash_ops_.find(op); + if (iter == hash_ops_.end()) { + return nullptr; + } + return &(*iter); + } + void Clear() final { string_ops_.clear(); sorted_ops_.clear(); diff --git a/examples/kvredis/redis b/examples/kvredis/redis new file mode 160000 index 00000000..4930d19e --- /dev/null +++ b/examples/kvredis/redis @@ -0,0 +1 @@ +Subproject commit 4930d19e70c391750479951022e207e19111eb55 diff --git a/examples/tutorial/c_api_tutorial.c b/examples/tutorial/c_api_tutorial.c index 91c712d8..df5d7fc5 100644 --- a/examples/tutorial/c_api_tutorial.c +++ b/examples/tutorial/c_api_tutorial.c @@ -26,7 +26,7 @@ static int StrCmp(const char* a, size_t alen, const char* b, size_t blen) { return r; } -void AnonymousCollectionExample(KVDKEngine* kvdk_engine) { +void StringExample(KVDKEngine* kvdk_engine) { const char* key1 = "key1"; const char* key2 = "key2"; const char* value1 = "value1"; @@ -301,7 +301,7 @@ void CompFuncForSortedCollectionExample(KVDKEngine* kvdk_engine) { KVDKDestroySortedCollectionConfigs(s_configs); } -void BatchWriteAnonCollectionExample(KVDKEngine* kvdk_engine) { +void BatchWriteStringExample(KVDKEngine* kvdk_engine) { const char* key1 = "key1"; const char* key2 = "key2"; const char* value1 = "value1"; @@ -324,12 +324,52 @@ void BatchWriteAnonCollectionExample(KVDKEngine* kvdk_engine) { assert(s == Ok); cmp = StrCmp(read_v2, read_v2_len, value2, strlen(value2)); assert(cmp == 0); - printf("Successfully performed BatchWrite on anonymous global collection.\n"); + printf("Successfully performed BatchWrite on String.\n"); KVDKWriteBatchDestory(kvdk_wb); free(read_v1); free(read_v2); } +void TransactionStringExample(KVDKEngine* kvdk_engine) { + const char* receiver = "Jack"; + const char* payer = "Tom"; + const char* payer_balance = "10"; + const char* receiver_balance = "0"; + KVDKWriteOptions* write_option = KVDKCreateWriteOptions(); + + KVDKPut(kvdk_engine, payer, strlen(payer), payer_balance, + strlen(payer_balance), write_option); + KVDKPut(kvdk_engine, receiver, strlen(receiver), receiver_balance, + strlen(receiver_balance), write_option); + + KVDKTransaction* txn = KVDKTransactionCreate(kvdk_engine); + assert(txn != NULL); + KVDKStatus s = + KVDKTransactionStringPut(txn, payer, strlen(payer), "0", strlen("0")); + assert(s == Ok); + s = KVDKTransactionStringPut(txn, receiver, strlen(receiver), "10", + strlen("10")); + assert(s == Ok); + s = KVDKTransactionCommit(txn); + assert(s == Ok); + + char* val; + size_t val_len; + s = KVDKGet(kvdk_engine, payer, strlen(payer), &val_len, &val); + assert(s == Ok); + assert(val_len == strlen("0")); + assert(memcmp(val, "0", val_len) == 0); + free(val); + s = KVDKGet(kvdk_engine, receiver, strlen(receiver), &val_len, &val); + assert(s == Ok); + assert(val_len == strlen("10")); + assert(memcmp(val, "10", val_len) == 0); + free(val); + + KVDKTransactionDestory(txn); + printf("Successfully performed Transaction on String.\n"); +} + void HashesCollectionExample(KVDKEngine* kvdk_engine) { const char* nums[10] = {"9", "5", "2", "0", "7", "3", "1", "8", "6", "4"}; const char* hash_collection = "hash_collection"; @@ -739,7 +779,7 @@ int main() { ModifyExample(kvdk_engine); // Anonymous Global Collection Example - AnonymousCollectionExample(kvdk_engine); + StringExample(kvdk_engine); // Named Sorted Collection Example SortedCollectionExample(kvdk_engine); @@ -749,8 +789,11 @@ int main() { CompFuncForSortedCollectionExample(kvdk_engine); - // BatchWrite on Anonymous Global Collection Example - BatchWriteAnonCollectionExample(kvdk_engine); + // BatchWrite on String Example + BatchWriteStringExample(kvdk_engine); + + // Transaction on String Example + TransactionStringExample(kvdk_engine); // Hashes Collection Example HashesCollectionExample(kvdk_engine); diff --git a/include/kvdk/configs.hpp b/include/kvdk/configs.hpp index 1bf89b0a..0dca292c 100644 --- a/include/kvdk/configs.hpp +++ b/include/kvdk/configs.hpp @@ -19,9 +19,6 @@ enum class LogLevel : uint8_t { None, }; -// A snapshot indicates a immutable view of a KVDK engine at a certain time -struct Snapshot {}; - // Configs of created sorted collection // For correctness of encoding, please add new config field in the end of the // existing fields @@ -31,11 +28,15 @@ struct SortedCollectionConfigs { }; struct Configs { - // Max number of access threads to read/write data to kvdk instance. + // TODO: rename to concurrent internal threads + // + // Max number of concurrent threads read/write the kvdk instance internally. + // Set it to the number of the hyper-threads to get best performance // - // Notice that the allocated resources of a access thread would be released - // only if the thread exited or call KVEngine::ReleaseAccessThread(). - uint64_t max_access_threads = 48; + // Notice: you can call KVDK API with any number of threads, but if your + // parallel threads more than max_access_threads, the performance will be + // degraded due to synchronization cost + uint64_t max_access_threads = 64; // Size of PMem space to store KV data, this is not scalable in current // edition. diff --git a/include/kvdk/engine.h b/include/kvdk/engine.h index e4879732..41c4fae0 100644 --- a/include/kvdk/engine.h +++ b/include/kvdk/engine.h @@ -18,6 +18,7 @@ typedef struct KVDKEngine KVDKEngine; typedef struct KVDKConfigs KVDKConfigs; typedef struct KVDKWriteOptions KVDKWriteOptions; typedef struct KVDKWriteBatch KVDKWriteBatch; +typedef struct KVDKTransaction KVDKTransaction; typedef struct KVDKSortedIterator KVDKSortedIterator; typedef struct KVDKListIterator KVDKListIterator; typedef struct KVDKHashIterator KVDKHashIterator; @@ -63,7 +64,6 @@ extern KVDKStatus KVDKBackup(KVDKEngine* engine, const char* backup_path, extern KVDKStatus KVDKRestore(const char* name, const char* backup_log, const KVDKConfigs* config, FILE* log_file, KVDKEngine** engine); -extern void KVDKReleaseAccessThread(KVDKEngine* engine); extern void KVDKCloseEngine(KVDKEngine* engine); extern void KVDKRemovePMemContents(const char* name); extern KVDKSnapshot* KVDKGetSnapshot(KVDKEngine* engine, int make_checkpoint); @@ -102,7 +102,38 @@ extern void KVDKWriteBatchHashDelete(KVDKWriteBatch* batch, extern KVDKStatus KVDKBatchWrite(KVDKEngine* engine, KVDKWriteBatch const* batch); -// For Anonymous Global Collection +// For Transactions +extern KVDKTransaction* KVDKTransactionCreate(KVDKEngine* engine); +extern void KVDKTransactionDestory(KVDKTransaction* txn); +extern KVDKStatus KVDKTransactionStringPut(KVDKTransaction* txn, + char const* key_data, size_t key_len, + char const* val_data, + size_t val_len); +extern KVDKStatus KVDKTransactionStringDelete(KVDKTransaction* txn, + char const* key_data, + size_t key_len); +extern KVDKStatus KVDKTransactionSortedPut( + KVDKTransaction* txn, char const* collection, size_t collection_len, + char const* key_data, size_t key_len, char const* val_data, size_t val_len); +extern KVDKStatus KVDKTransactionSortedDelete(KVDKTransaction* txn, + char const* collection, + size_t collection_len, + char const* key_data, + size_t key_len); +extern KVDKStatus KVDKTransactionHashPut(KVDKTransaction* txn, + char const* collection, + size_t collection_len, + char const* key_data, size_t key_len, + char const* val_data, size_t val_len); +extern KVDKStatus KVDKTransactionHashDelete(KVDKTransaction* txn, + char const* collection, + size_t collection_len, + char const* key_data, + size_t key_len); +extern KVDKStatus KVDKTransactionCommit(KVDKTransaction* txn); +extern void KVDKTransactionRollback(KVDKTransaction* txn); + +// For String KV extern KVDKStatus KVDKGet(KVDKEngine* engine, const char* key, size_t key_len, size_t* val_len, char** val); extern KVDKStatus KVDKPut(KVDKEngine* engine, const char* key, size_t key_len, diff --git a/include/kvdk/engine.hpp b/include/kvdk/engine.hpp index fcc1880b..ca77a517 100644 --- a/include/kvdk/engine.hpp +++ b/include/kvdk/engine.hpp @@ -10,6 +10,8 @@ #include "comparator.hpp" #include "configs.hpp" #include "iterator.hpp" +#include "snapshot.hpp" +#include "transaction.hpp" #include "types.hpp" #include "write_batch.hpp" @@ -21,7 +23,7 @@ class Engine { public: // Open a new KVDK instance or restore a existing KVDK instance // - // Para: + // Args: // * engine_path: indicates the dir path that persist the instance // * engine_ptr: store the pointer to restored instance // * configs: engine configs4 @@ -31,12 +33,12 @@ class Engine { // Return Status::Ok on sucess, return other status for any error // // To close the instance, just delete *engine_ptr. - static Status Open(const std::string& engine_path, Engine** engine_ptr, + static Status Open(const StringView engine_path, Engine** engine_ptr, const Configs& configs, FILE* log_file = stdout); // Restore a KVDK instance from a backup log file. // - // Para: + // Args: // * engine_path: indicates the dir path that persist the instance // * backup_log: the backup log file restored from // * engine_ptr: store the pointer to restored instance @@ -48,20 +50,47 @@ class Engine { // // Notice: // "engine_path" should be an empty dir - static Status Restore(const std::string& engine_path, - const std::string& backup_log, Engine** engine_ptr, + static Status Restore(const StringView engine_path, + const StringView backup_log, Engine** engine_ptr, const Configs& configs, FILE* log_file = stdout); + // Get type of key, it can be String, SortedCollection, HashCollection or List + // + // Return: + // Status::Ok and store type to "*type" on success + // Status::NotFound if key does not exist virtual Status TypeOf(StringView key, ValueType* type) = 0; - // Insert a STRING-type KV to set "key" to hold "value", return Ok on - // successful persistence, return non-Ok on any error. + // Insert a STRING-type KV to set "key" to hold "value". + // + // Args: + // *options: customized write options + // + // Return: + // Status Ok on success . + // Status::WrongType if key exists but is a collection. + // Status::PMemOverflow/Status::MemoryOverflow if PMem/DRAM exhausted. virtual Status Put(const StringView key, const StringView value, const WriteOptions& options = WriteOptions()) = 0; + // Search the STRING-type KV of "key" in the kvdk instance. + // + // Return: + // Return Status::Ok and store the corresponding value to *value on success. + // Return Status::NotFound if the "key" does not exist. + virtual Status Get(const StringView key, std::string* value) = 0; + + // Remove STRING-type KV of "key". + // + // Return: + // Status::Ok on success or the "key" did not exist + // Status::WrongType if key exists but is a collection type + // Status::PMemOverflow if PMem exhausted + virtual Status Delete(const StringView key) = 0; + // Modify value of existing key in the engine // - // Para: + // Args: // * modify_func: customized function to modify existing value of key. See // definition of ModifyFunc (types.hpp) for more details. // * modify_args: customized arguments of modify_func. @@ -74,43 +103,77 @@ class Engine { void* modify_args, const WriteOptions& options = WriteOptions()) = 0; + // Atomically do a batch of operations (Put or Delete) to the instance, these + // operations either all succeed, or all fail. The data will be rollbacked if + // the instance crash during a batch write + // + // Return: + // Status::Ok on success + // Status::NotFound if a collection operated by the batch does not exist + // Status::PMemOverflow/Status::MemoryOverflow if PMem/DRAM exhausted + // + // Notice: + // BatchWrite has no isolation guaranteed, if you need it, you should use + // Transaction API virtual Status BatchWrite(std::unique_ptr const& batch) = 0; + // Create a write batch for BatchWrite operation virtual std::unique_ptr WriteBatchCreate() = 0; - // Search the STRING-type KV of "key" and store the corresponding value to - // *value on success. If the "key" does not exist, return NotFound. - virtual Status Get(const StringView key, std::string* value) = 0; - - // Search the STRING-type or Collection and store the corresponding expired + // Start a transaction on the kvdk instance. + // + // The transaction is implemented with pessimistic locking, so any operation + // may conflict with other access thread and compete locks. A transaction + // operation will return Status::Timeout in this case to avoid dead lock. + // + // Return: + // Return a pointer to transaction struct for doing transactions. + // + // Notice: + // 1. A thread should not call normal write APIs while a transaction of + // it has not been committed or rollbacked, otherwise the thread may deadlock + // as the transaction may holding locks required by normal write operations. + // 2. Once the transaction is committed or rollbacked, you can start next + // transaction on this struct. + // 3. Commit or Rollback the transaction as soon as possible to release locks + // it holds. + virtual std::unique_ptr TransactionCreate() = 0; + + // Search the STRING-type or Collection and get the corresponding expired // time to *expired_time on success. - /* - * @param ttl_time. - * If the key is persist, ttl_time is INT64_MAX and Status::Ok; - * If the key is expired or does not exist, ttl_time is 0 and return - * Status::NotFound. - */ - virtual Status GetTTL(const StringView str, int64_t* ttl_time) = 0; - - /* Put the STRING-type or Collection type expired time. - * @param ttl_time is negetive or positive number. - * If ttl_time == INT64_MAX, the key is persistent; - * If ttl_time <=0, the key is expired immediately; - */ - virtual Status Expire(const StringView str, int64_t ttl_time) = 0; + // + // Args: + // * key: STRING-type key or collection name to search. + // * ttl_time: store TTL result. + // + // Return: + // Status::Ok and store ttl time to *ttl_time on success + // Status::NotFound if key is expired or does not exist + virtual Status GetTTL(const StringView key, int64_t* ttl_time) = 0; - // Remove STRING-type KV of "key". - // Return Ok on success or the "key" did not exist, return non-Ok on any - // error. - virtual Status Delete(const StringView key) = 0; + // Set ttl_time for STRING-type or Collection type data + // + // Args: + // * key: STRING-type key or collection name to set ttl_time. + // * ttl_time: ttl time to set. if ttl_time == kPersistTTL, the name will not + // be expired. If ttl_time <=0, the name is expired immediately. + // + // Return: + // Status::Ok on success. + // Status::NotFound if key does not exist. + virtual Status Expire(const StringView key, int64_t ttl_time) = 0; - // Create a sorted collection with configs + // Create a empty sorted collection with configs. You should always create + // collection before you do any operations on it + // + // Args: + // * configs: customized config of creating collection + // // Return: // Status::Ok on success // Status::Existed if sorted collection already existed // Status::WrongType if collection existed but not a sorted collection - // Status::PMemOverflow if PMem exhausted - // Status::MemoryOverflow if DRAM exhausted + // Status::PMemOverflow/Status::MemoryOverflow if PMem/DRAM exhausted virtual Status SortedCreate( const StringView collection, const SortedCollectionConfigs& configs = SortedCollectionConfigs()) = 0; @@ -124,24 +187,36 @@ class Engine { // Get number of elements in a sorted collection // - // Return Ok on success, return NotFound if collection not exist + // Return: + // Status::Ok on success + // Status::NotFound if collection not exist virtual Status SortedSize(const StringView collection, size_t* size) = 0; - // Insert a SORTED-type KV to set "key" of sorted collection "collection" - // to hold "value", if "collection" not exist, it will be created, return - // Ok on successful persistence, return non-Ok on any error. + // Insert a KV to set "key" in sorted collection "collection" + // to hold "value" + // Return: + // Status::Ok on success. + // Status::NotFound if collection not exist. + // Status::WrongType if collection exists but is not a sorted collection. + // Status::PMemOverflow/Status::MemoryOverflow if PMem/DRAM exhausted. virtual Status SortedPut(const StringView collection, const StringView key, const StringView value) = 0; - // Search the SORTED-type KV of "key" in sorted collection "collection" - // and store the corresponding value to *value on success. If the - // "collection"/"key" did not exist, return NotFound. + // Search the KV of "key" in sorted collection "collection" + // + // Return: + // Status::Ok and store the corresponding value to *value on success. + // Status::NotFound If the "collection" or "key" does not exist. virtual Status SortedGet(const StringView collection, const StringView key, std::string* value) = 0; - // Remove SORTED-type KV of "key" in the sorted collection "collection". - // Return Ok on success or the "collection"/"key" did not exist, return non-Ok - // on any error. + // Remove KV of "key" in the sorted collection "collection". + // + // Return: + // Status::Ok on success or key not existed in collection + // Status::NotFound if collection not exist + // Status::WrongType if collection exists but is not a sorted collection. + // Status::PMemOverflow if PMem exhausted. virtual Status SortedDelete(const StringView collection, const StringView key) = 0; @@ -149,65 +224,65 @@ class Engine { // Create an empty List. // Return: - // Status::WrongType if list name is not a List. - // Status::Existed if a List named list already exists. - // Status::PMemOverflow if PMem exhausted. - // Status::Ok if successfully created the List. + // Status::WrongType if list name existed but is not a List. + // Status::Existed if a List named list already exists. + // Status::PMemOverflow if PMem exhausted. + // Status::Ok if successfully created the List. virtual Status ListCreate(StringView list) = 0; // Destroy a List associated with key // Return: - // Status::WrongType if list name is not a List. - // Status::Ok if successfully destroyed the List. - // Status::NotFound if list does not exist. + // Status::WrongType if list name is not a List. + // Status::Ok if successfully destroyed the List or the List not existed + // Status::PMemOverflow if PMem exhausted virtual Status ListDestroy(StringView list) = 0; // Total elements in List. // Return: - // Status::InvalidDataSize if list name is too long - // Status::WrongType if list name is not a List. - // Status::NotFound if list does not exist or has expired. - // Status::Ok and length of List if List exists. + // Status::InvalidDataSize if list name is too long + // Status::WrongType if list name is not a List. + // Status::NotFound if list does not exist or has expired. + // Status::Ok and length of List if List exists. virtual Status ListSize(StringView list, size_t* sz) = 0; // Push element as first element of List // Return: - // Status::InvalidDataSize if list name or elem is too long - // Status::WrongType if list name is not a List. - // Status::PMemOverflow if PMem exhausted. - // Status::Ok if operation succeeded. + // Status::InvalidDataSize if list name or elem is too long + // Status::WrongType if list name is not a List. + // Status::PMemOverflow if PMem exhausted. + // Status::Ok if operation succeeded. virtual Status ListPushFront(StringView list, StringView elem) = 0; // Push element as last element of List // Return: - // Status::InvalidDataSize if list name or elem is too long - // Status::WrongType if list name in the instance is not a List. - // Status::PMemOverflow if PMem exhausted. - // Status::Ok if operation succeeded. + // Status::InvalidDataSize if list name or elem is too long + // Status::WrongType if list name in the instance is not a List. + // Status::PMemOverflow if PMem exhausted. + // Status::Ok if operation succeeded. virtual Status ListPushBack(StringView list, StringView elem) = 0; // Pop first element of list // Return: - // Status::InvalidDataSize if list name is too long - // Status::WrongType if list is not a List. - // Status::NotFound if list does not exist or has expired. - // Status::Ok and element if operation succeeded. + // Status::InvalidDataSize if list name is too long + // Status::WrongType if list is not a List. + // Status::NotFound if list does not exist or has expired. + // Status::Ok and element if operation succeeded. virtual Status ListPopFront(StringView list, std::string* elem) = 0; // Pop last element of List // Return: - // Status::InvalidDataSize if list name is too long - // Status::WrongType if list is not a List. - // Status::NotFound if list does not exist or has expired. - // Status::Ok and element if operation succeeded. + // Status::InvalidDataSize if list name is too long + // Status::WrongType if list is not a List. + // Status::NotFound if list does not exist or has expired. + // Status::Ok and element if operation succeeded. virtual Status ListPopBack(StringView list, std::string* elem) = 0; // Push multiple elements to the front of List // Return: - // Status::InvalidDataSize if list name or elem is too long - // Status::WrongType if list is not a List. - // Status::PMemOverflow if PMem exhausted. - // Status::Ok if operation succeeded. + // Status::InvalidDataSize if list name or elem is too long + // Status::WrongType if list is not a List. + // Status::PMemOverflow if PMem exhausted. + // Status::Ok if operation succeeded. virtual Status ListBatchPushFront(StringView list, std::vector const& elems) = 0; virtual Status ListBatchPushFront(StringView list, @@ -215,10 +290,10 @@ class Engine { // Push multiple elements to the back of List // Return: - // Status::InvalidDataSize if list or elem is too long - // Status::WrongType if list name is not a List. - // Status::PMemOverflow if PMem exhausted. - // Status::Ok if operation succeeded. + // Status::InvalidDataSize if list or elem is too long + // Status::WrongType if list name is not a List. + // Status::PMemOverflow if PMem exhausted. + // Status::Ok if operation succeeded. virtual Status ListBatchPushBack(StringView list, std::vector const& elems) = 0; virtual Status ListBatchPushBack(StringView list, @@ -226,19 +301,19 @@ class Engine { // Pop first N element of List // Return: - // Status::InvalidDataSize if list is too long - // Status::WrongType if list is not a List. - // Status::NotFound if list does not exist or has expired. - // Status::Ok and element if operation succeeded. + // Status::InvalidDataSize if list is too long + // Status::WrongType if list is not a List. + // Status::NotFound if list does not exist or has expired. + // Status::Ok and element if operation succeeded. virtual Status ListBatchPopFront(StringView list, size_t n, std::vector* elems) = 0; // Pop last N element of List // Return: - // Status::InvalidDataSize if list is too long - // Status::WrongType if list is not a List. - // Status::NotFound if list does not exist or has expired. - // Status::Ok and element if operation succeeded. + // Status::InvalidDataSize if list is too long + // Status::WrongType if list is not a List. + // Status::NotFound if list does not exist or has expired. + // Status::Ok and element if operation succeeded. virtual Status ListBatchPopBack(StringView list, size_t n, std::vector* elems) = 0; @@ -246,9 +321,9 @@ class Engine { // src_pos and dst_pos can only be 0, indicating List front, // or -1, indicating List back. // Return: - // Status::WrongType if src or dst is not a List. - // Status::NotFound if list or element not exist - // Status::Ok and moved element if operation succeeded. + // Status::WrongType if src or dst is not a List. + // Status::NotFound if list or element not exist + // Status::Ok and moved element if operation succeeded. virtual Status ListMove(StringView src_list, ListPos src_pos, StringView dst_list, ListPos dst_pos, std::string* elem) = 0; @@ -256,49 +331,58 @@ class Engine { // Insert a element to a list at index, the index can be positive or // negative // Return: - // Status::InvalidDataSize if list name is too large - // Status::WrongType if collection is not a list - // Status::NotFound if collection not found or index is beyond list size - // Status::Ok if operation succeeded + // Status::InvalidDataSize if list name is too large + // Status::WrongType if collection is not a list + // Status::NotFound if collection not found or index is beyond list size + // Status::Ok if operation succeeded virtual Status ListInsertAt(StringView list, StringView elem, long index) = 0; // Insert an element before element "pos" in list "collection" // Return: - // Status::InvalidDataSize if elem is too long. - // Status::PMemOverflow if PMem exhausted. - // Status::NotFound if List of the ListIterator has expired or been - // deleted, or "pos" not exist in the list - // Status::Ok if operation succeeded. + // Status::InvalidDataSize if elem is too long. + // Status::PMemOverflow if PMem exhausted. + // Status::NotFound if List of the ListIterator has expired or been + // deleted, or "pos" not exist in the list + // Status::Ok if operation succeeded. virtual Status ListInsertBefore(StringView list, StringView elem, StringView pos) = 0; // Insert an element after element "pos" in list "collection" // Return: - // Status::InvalidDataSize if elem is too long. - // Status::PMemOverflow if PMem exhausted. - // Status::NotFound if List of the ListIterator has expired or been - // deleted, or "pos" not exist in the list - // Status::Ok if operation succeeded. + // Status::InvalidDataSize if elem is too long. + // Status::PMemOverflow if PMem exhausted. + // Status::NotFound if List of the ListIterator has expired or been + // deleted, or "pos" not exist in the list + // Status::Ok if operation succeeded. virtual Status ListInsertAfter(StringView list, StringView elem, StringView pos) = 0; // Remove the element at index // Return: - // Status::NotFound if the index beyond list size. - // Status::Ok if operation succeeded, and store removed elem in "elem" + // Status::NotFound if the index beyond list size. + // Status::Ok if operation succeeded, and store removed elem in "elem" virtual Status ListErase(StringView list, long index, std::string* elem) = 0; // Replace the element at index // Return: - // Status::InvalidDataSize if elem is too long - // Status::NotFound if if the index beyond list size. - // Status::Ok if operation succeeded. + // Status::InvalidDataSize if elem is too long + // Status::NotFound if if the index beyond list size. + // Status::Ok if operation succeeded. virtual Status ListReplace(StringView list, long index, StringView elem) = 0; // Create a KV iterator on list "list", which is able to iterate all elems in - // the list at "snapshot" version, if snapshot is nullptr, then a - // internal snapshot will be created at current version and the iterator will - // be created on it + // the list + // + // Args: + // * snapshot: iterator will iterate all elems a t "snapshot" version, if + // snapshot is nullptr, then a internal snapshot will be created at current + // version and the iterator will be created on it + // * status: store operation status if not null + // + // Return: + // Return A pointer to iterator on success. + // Return nullptr if list not exist or any other errors, and store error + // status to "status" // // Notice: // 1. Iterator will be invalid after the passed snapshot is released @@ -307,20 +391,76 @@ class Engine { virtual ListIterator* ListIteratorCreate(StringView list, Snapshot* snapshot = nullptr, Status* status = nullptr) = 0; + // Release a ListIterator and its holding resources virtual void ListIteratorRelease(ListIterator*) = 0; /// Hash APIs /////////////////////////////////////////////////////////////// + // Create a empty hash collection. You should always create collection before + // you do any operations on it + // + // Return: + // Status::Ok on success + // Status::Existed if hash collection already existed + // Status::WrongType if collection existed but not a hash collection + // Status::PMemOverflow/Status::MemoryOverflow if PMem/DRAM exhausted virtual Status HashCreate(StringView collection) = 0; + + // Destroy a hash collection + // Return: + // Status::Ok on success + // Status::WrongType if collection existed but not a hash collection + // Status::PMemOverflow if PMem exhausted virtual Status HashDestroy(StringView collection) = 0; + + // Get number of elements in a hash collection + // + // Return: + // Status::Ok on success + // Status::NotFound if collection not exist virtual Status HashSize(StringView collection, size_t* len) = 0; + + // Search the KV of "key" in hash collection "collection" + // + // Return: + // Status::Ok and store the corresponding value to *value on success. + // Status::NotFound If the "collection" or "key" does not exist. virtual Status HashGet(StringView collection, StringView key, std::string* value) = 0; + + // Insert a KV to set "key" in hash collection "collection" + // to hold "value" + // Return: + // Status::Ok on success. + // Status::NotFound if collection not exist. + // Status::WrongType if collection exists but is not a hash collection. + // Status::PMemOverflow/Status::MemoryOverflow if PMem/DRAM exhausted. virtual Status HashPut(StringView collection, StringView key, StringView value) = 0; + + // Remove KV of "key" in the hash collection "collection". + // + // Return: + // Status::Ok on success or key not existed in collection + // Status::NotFound if collection not exist + // Status::WrongType if collection exists but is not a hash collection. + // Status::PMemOverflow if PMem exhausted. virtual Status HashDelete(StringView collection, StringView key) = 0; + + // Modify value of a existing key in a hash collection + // + // Args: + // * modify_func: customized function to modify existing value of key. See + // definition of ModifyFunc (types.hpp) for more details. + // * modify_args: customized arguments of modify_func. + // + // Return: + // Return Status::Ok if modify success. + // Return Status::Abort if modify function abort modifying. + // Return other non-Ok status on any error. virtual Status HashModify(StringView collection, StringView key, ModifyFunc modify_func, void* cb_args) = 0; + // Create a KV iterator on hash collection "collection", which is able to // iterate all elems in the collection at "snapshot" version, if snapshot is // nullptr, then a internal snapshot will be created at current version and @@ -335,6 +475,23 @@ class Engine { Status* s = nullptr) = 0; virtual void HashIteratorRelease(HashIterator*) = 0; + /// Volatile Hash APIs ////////////////////////////////////////////////////// + +#ifdef KVDK_ENABLE_VHASH + virtual Status VHashCreate(StringView key, size_t capacity = (1UL << 20)) = 0; + virtual Status VHashDestroy(StringView key) = 0; + virtual Status VHashSize(StringView key, size_t* len) = 0; + virtual Status VHashGet(StringView key, StringView field, + std::string* value) = 0; + virtual Status VHashPut(StringView key, StringView field, + StringView value) = 0; + virtual Status VHashDelete(StringView key, StringView field) = 0; + virtual Status VHashModify(StringView key, StringView field, + ModifyFunc modify_func, void* cb_args) = 0; + virtual std::unique_ptr VHashIteratorCreate( + StringView key, Status* s = nullptr) = 0; +#endif + /// Other /////////////////////////////////////////////////////////////////// // Get a snapshot of the instance at this moment. @@ -356,9 +513,18 @@ class Engine { virtual void ReleaseSnapshot(const Snapshot*) = 0; // Create a KV iterator on sorted collection "collection", which is able to - // sequentially iterate all KVs in the "collection" at "snapshot" version, if + // sequentially iterate all KVs in the "collection". + // + // Args: + // * snapshot: iterator will iterate all elems a t "snapshot" version, if // snapshot is nullptr, then a internal snapshot will be created at current // version and the iterator will be created on it + // * status: store operation status if not null + // + // Return: + // Return A pointer to iterator on success. + // Return nullptr if collection not exist or any other errors, and store error + // status to "status" // // Notice: // 1. Iterator will be invalid after the passed snapshot is released @@ -368,17 +534,14 @@ class Engine { Snapshot* snapshot = nullptr, Status* s = nullptr) = 0; - // Release a sorted iterator + // Release a sorted iterator and its holding resouces virtual void SortedIteratorRelease(SortedIterator*) = 0; - // Release resources occupied by this access thread so new thread can take - // part. New write requests of this thread need to re-request write resources. - virtual void ReleaseAccessThread() = 0; - // Register a customized comparator to the engine on runtime // - // Return true on success, return false if a comparator of comparator_name - // already existed + // Return: + // Return true on success + // Return false if a comparator of comparator_name already existed virtual bool registerComparator(const StringView& comparator_name, Comparator) = 0; diff --git a/include/kvdk/iterator.hpp b/include/kvdk/iterator.hpp index 10e7e981..2eeab302 100644 --- a/include/kvdk/iterator.hpp +++ b/include/kvdk/iterator.hpp @@ -79,4 +79,21 @@ class HashIterator { virtual ~HashIterator() = default; }; +#ifdef KVDK_ENABLE_VHASH +class VHashIterator { + public: + virtual void SeekToFirst() = 0; + + virtual void Next() = 0; + + virtual bool Valid() const = 0; + + virtual std::string Key() const = 0; + + virtual std::string Value() const = 0; + + virtual ~VHashIterator() = default; +}; +#endif + } // namespace KVDK_NAMESPACE \ No newline at end of file diff --git a/include/kvdk/snapshot.hpp b/include/kvdk/snapshot.hpp new file mode 100644 index 00000000..91b29622 --- /dev/null +++ b/include/kvdk/snapshot.hpp @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2021 Intel Corporation + */ + +#pragma once + +namespace KVDK_NAMESPACE { + +// A snapshot indicates a immutable view of a KVDK engine at a certain time +struct Snapshot {}; + +} // namespace KVDK_NAMESPACE \ No newline at end of file diff --git a/include/kvdk/transaction.hpp b/include/kvdk/transaction.hpp new file mode 100644 index 00000000..9a02978c --- /dev/null +++ b/include/kvdk/transaction.hpp @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2021 Intel Corporation + */ + +#pragma once + +#include + +#include "types.hpp" + +namespace KVDK_NAMESPACE { +// This struct is used to do transaction operations. A transaction struct is +// assotiated with a kvdk instance +class Transaction { + public: + // Put a STRING-type KV to the transaction + // + // Return: + // Status::Ok on success + // Status::Timeout on conflict and long-time lock contention + virtual Status StringPut(const StringView key, const StringView value) = 0; + // Delete a STRING-type key to the transaction + // + // Return: + // Status::Ok on success + // Status::Timeout on conflict and long-time lock contention + virtual Status StringDelete(const StringView key) = 0; + // Get value of a STRING-type KV. It will first get from the transaction + // operations (Put/Delete), then the kvdk instance of the transaction + // + // Return: + // Status::Ok on success and store value to "*value" + // Status::NotFound if key not existed or be deleted by this transaction + // Status::Timeout on conflict and long-time lock contention + virtual Status StringGet(const StringView key, std::string* value) = 0; + // Put a KV of sorted collection to the transaction + // + // Return: + // Status::Ok on success + // Status::NotFound if collection does not exist + // Status::Timeout on conflict and long-time lock contention + virtual Status SortedPut(const StringView collection, const StringView key, + const StringView value) = 0; + // Delete a KV from sorted collection to the trnasaction. + // + // Return: + // Status::Ok on success + // Status::NotFound if collection does not exist + // Status::Timeout on conflict and long-time lock contention + virtual Status SortedDelete(const StringView collection, + const StringView key) = 0; + // Get value of a KV from sorted collection. It will first get from the + // transaction operations (Put/Delete), then the kvdk instance of the + // transaction + // + // Return: + // Status::Ok and store value to "*value" on success + // Status::NotFound if collection or key does not exist + // Status::Timeout on conflict and long-time lock contention + virtual Status SortedGet(const StringView collection, const StringView key, + std::string* value) = 0; + // Put a KV of hash collection to the transaction + // + // Return: + // Status::Ok on success + // Status::NotFound if collection does not exist + // Status::Timeout on conflict and long-time lock contention + virtual Status HashPut(const StringView collection, const StringView key, + const StringView value) = 0; + // Delete a KV from hash collection to the transaction + // + // Return: + // Status::Ok on success + // Status::NotFound if collection does not exist + // Status::Timeout on conflict and long-time lock contention + virtual Status HashDelete(const StringView collection, + const StringView key) = 0; + + // Get value of a KV from hash collection. It will first get from the + // transaction operations (Put/Delete), then the kvdk instance of the + // transaction + // + // Return: + // Status::Ok and store value to "*value" on success + // Status::NotFound if collection or key does not exist + // Status::Timeout on conflict and long-time lock contention + virtual Status HashGet(const StringView collection, const StringView key, + std::string* value) = 0; + + // Commit all operations of the transaction to the kvdk instance, and + // release all locks it holds + // + // Return: + // Status::Ok on success, all operations will be persistent on the instance + // Status::PMemOverflow/Status::MemoryOverflow if PMem/DRAM exhausted + virtual Status Commit() = 0; + + // Rollback all operations of the transaction, release locks it holds + virtual void Rollback() = 0; + + // Return status of the last transaction operation + virtual Status InternalStatus() = 0; + + virtual ~Transaction() = default; +}; +} // namespace KVDK_NAMESPACE \ No newline at end of file diff --git a/include/kvdk/types.h b/include/kvdk/types.h index 8150154d..f7a1b637 100644 --- a/include/kvdk/types.h +++ b/include/kvdk/types.h @@ -43,8 +43,8 @@ typedef void (*KVDKFreeFunc)(void*); #define KVDK_TYPES(GEN) \ GEN(String) \ - GEN(SortedSet) \ - GEN(HashSet) \ + GEN(SortedCollection) \ + GEN(HashCollection) \ GEN(List) typedef enum { KVDK_TYPES(GENERATE_ENUM) } KVDKValueType; @@ -65,12 +65,12 @@ __attribute__((unused)) static char const* KVDKValueTypeString[] = { GEN(NotSupported) \ GEN(PMemMapFileError) \ GEN(InvalidBatchSize) \ - GEN(TooManyAccessThreads) \ GEN(InvalidDataSize) \ GEN(InvalidArgument) \ GEN(IOError) \ GEN(InvalidConfiguration) \ GEN(Fail) \ + GEN(Timeout) \ GEN(Abort) typedef enum { KVDK_STATUS(GENERATE_ENUM) } KVDKStatus; diff --git a/include/kvdk/write_batch.hpp b/include/kvdk/write_batch.hpp index 63b9949d..b156dff1 100644 --- a/include/kvdk/write_batch.hpp +++ b/include/kvdk/write_batch.hpp @@ -12,17 +12,18 @@ namespace KVDK_NAMESPACE { class WriteBatch { public: - virtual void StringPut(std::string const& key, std::string const& value) = 0; - virtual void StringDelete(std::string const& key) = 0; - - virtual void SortedPut(std::string const& key, std::string const& field, - std::string const& value) = 0; - virtual void SortedDelete(std::string const& key, - std::string const& field) = 0; - - virtual void HashPut(std::string const& key, std::string const& field, - std::string const& value) = 0; - virtual void HashDelete(std::string const& key, std::string const& field) = 0; + virtual void StringPut(const StringView key, const StringView value) = 0; + virtual void StringDelete(const StringView key) = 0; + + virtual void SortedPut(const StringView collection, const StringView key, + const StringView value) = 0; + virtual void SortedDelete(const StringView collection, + const StringView key) = 0; + + virtual void HashPut(const StringView collection, const StringView key, + const StringView value) = 0; + virtual void HashDelete(const StringView collection, + const StringView key) = 0; virtual void Clear() = 0; diff --git a/java/benchmark/src/main/java/io/pmem/kvdk/benchmark/KVDKBenchmark.java b/java/benchmark/src/main/java/io/pmem/kvdk/benchmark/KVDKBenchmark.java index 0ee0d558..b672f6f6 100644 --- a/java/benchmark/src/main/java/io/pmem/kvdk/benchmark/KVDKBenchmark.java +++ b/java/benchmark/src/main/java/io/pmem/kvdk/benchmark/KVDKBenchmark.java @@ -275,7 +275,6 @@ private void createSortedCollections() throws KVDKException { closeableObjects.add(nameHandle); kvdkEngine.sortedCreate(nameHandle); } - kvdkEngine.releaseAccessThread(); } private void startTasks() { diff --git a/java/kvdkjni/engine.cc b/java/kvdkjni/engine.cc index b3bf7871..324319bc 100644 --- a/java/kvdkjni/engine.cc +++ b/java/kvdkjni/engine.cc @@ -436,14 +436,3 @@ void Java_io_pmem_kvdk_Engine_batchWrite(JNIEnv* env, jobject, KVDK_NAMESPACE::KVDKExceptionJni::ThrowNew(env, s); } } - -/* - * Class: io_pmem_kvdk_Engine - * Method: releaseAccessThread - * Signature: (JJ)V - */ -void Java_io_pmem_kvdk_Engine_releaseAccessThread(JNIEnv*, jobject, - jlong engine_handle) { - auto* engine = reinterpret_cast(engine_handle); - engine->ReleaseAccessThread(); -} diff --git a/java/kvdkjni/kvdkjni.h b/java/kvdkjni/kvdkjni.h index 3c32ed57..fa6b9ee7 100644 --- a/java/kvdkjni/kvdkjni.h +++ b/java/kvdkjni/kvdkjni.h @@ -116,20 +116,18 @@ class StatusJni : public JavaClass { return 0xA; case KVDK_NAMESPACE::Status::InvalidBatchSize: return 0xB; - case KVDK_NAMESPACE::Status::TooManyAccessThreads: - return 0xC; case KVDK_NAMESPACE::Status::InvalidDataSize: - return 0xD; + return 0xC; case KVDK_NAMESPACE::Status::InvalidArgument: - return 0xE; + return 0xD; case KVDK_NAMESPACE::Status::IOError: - return 0xF; + return 0xE; case KVDK_NAMESPACE::Status::InvalidConfiguration: - return 0x10; + return 0xF; case KVDK_NAMESPACE::Status::Fail: - return 0x11; + return 0x10; case KVDK_NAMESPACE::Status::Abort: - return 0x12; + return 0x11; default: return 0x7F; // undefined } diff --git a/java/src/main/java/io/pmem/kvdk/Engine.java b/java/src/main/java/io/pmem/kvdk/Engine.java index 38e3893c..ec952aa9 100644 --- a/java/src/main/java/io/pmem/kvdk/Engine.java +++ b/java/src/main/java/io/pmem/kvdk/Engine.java @@ -278,10 +278,6 @@ public void batchWrite(WriteBatch batch) throws KVDKException { batchWrite(nativeHandle_, batch.getNativeHandle()); } - public void releaseAccessThread() { - releaseAccessThread(nativeHandle_); - } - // Native methods @Override protected final native void closeInternal(long handle); @@ -347,8 +343,6 @@ private native void sortedDelete( private native void batchWrite(long engineHandle, long batchHandle); - private native void releaseAccessThread(long engineHandle); - private enum LibraryState { NOT_LOADED, LOADING, diff --git a/java/src/main/java/io/pmem/kvdk/Status.java b/java/src/main/java/io/pmem/kvdk/Status.java index b6fec1b7..728a2e48 100644 --- a/java/src/main/java/io/pmem/kvdk/Status.java +++ b/java/src/main/java/io/pmem/kvdk/Status.java @@ -61,13 +61,12 @@ public enum Code { NotSupported((byte) 0x9), PMemMapFileError((byte) 0xA), InvalidBatchSize((byte) 0xB), - TooManyAccessThreads((byte) 0xC), - InvalidDataSize((byte) 0xD), - InvalidArgument((byte) 0xE), - IOError((byte) 0xF), - InvalidConfiguration((byte) 0x10), - Fail((byte) 0x11), - Abort((byte) 0x12), + InvalidDataSize((byte) 0xC), + InvalidArgument((byte) 0xD), + IOError((byte) 0xE), + InvalidConfiguration((byte) 0xF), + Fail((byte) 0x10), + Abort((byte) 0x11), Undefined((byte) 0x7F); private final byte value; diff --git a/java/src/test/java/io/pmem/kvdk/EngineTest.java b/java/src/test/java/io/pmem/kvdk/EngineTest.java index ea6cab42..73683ee4 100644 --- a/java/src/test/java/io/pmem/kvdk/EngineTest.java +++ b/java/src/test/java/io/pmem/kvdk/EngineTest.java @@ -90,8 +90,13 @@ public void testSortedCollection() throws KVDKException { // destroy destroyed sorted collection: OK kvdkEngine.sortedDestroy(nameHandle); - // delete on destroyed sorted collection: OK - kvdkEngine.sortedDelete(nameHandle, key.getBytes()); + // delete on destroyed sorted collection: Not OK + try { + kvdkEngine.sortedDelete(nameHandle, key.getBytes()); + } catch (KVDKException ex) { + // should be NotFound + assertEquals(ex.getStatus().getCode(), Code.NotFound); + } // put on destroyed sorted collection: Not OK try { diff --git a/java/src/test/java/io/pmem/kvdk/EngineTestBase.java b/java/src/test/java/io/pmem/kvdk/EngineTestBase.java index 2cbef86c..76ef6326 100644 --- a/java/src/test/java/io/pmem/kvdk/EngineTestBase.java +++ b/java/src/test/java/io/pmem/kvdk/EngineTestBase.java @@ -34,7 +34,6 @@ public void init() throws KVDKException, IOException { @After public void teardown() { - kvdkEngine.releaseAccessThread(); kvdkEngine.close(); } } diff --git a/scripts/benchmark_impl.py b/scripts/benchmark_impl.py index 30a009ee..e9ddcb72 100644 --- a/scripts/benchmark_impl.py +++ b/scripts/benchmark_impl.py @@ -141,6 +141,7 @@ def run_benchmark( timestamp) os.system("mkdir -p {}".format(report_path)) + populate_on_fill = populate_on_fill if (data_type != "vhash") else 0 # run benchmarks print("Run benchmarks for data type :{}, value size distribution: {}".format( data_type, value_size_distribution)) diff --git a/scripts/run_benchmark.py b/scripts/run_benchmark.py index ce8990f2..7c601b8e 100644 --- a/scripts/run_benchmark.py +++ b/scripts/run_benchmark.py @@ -8,13 +8,14 @@ exec = "numactl --cpunodebind={0} --membind={0} {1}".format(numanode, bin) num_thread = 64 -value_sizes = [120] +value_sizes = [24] # constant: value size always be "value_size", # random: value size uniformly distributed in [1, value_size] value_size_distributions = ['constant'] timeout = 30 # For operations other than fill -populate_on_fill = 1 # For fill only -pmem_size = 384 * 1024 * 1024 * 1024 # we need enough space to test insert +populate_on_fill = 0 # For fill only +# pmem_size = 384 * 1024 * 1024 * 1024 # we need enough space to test insert +pmem_size = 64 * 1024 * 1024 * 1024 num_collection = 16 benchmarks = [ @@ -44,8 +45,10 @@ data_types = ['list'] elif sys.argv[1] == 'blackhole': data_types = ['blackhole'] + elif sys.argv[1] == 'vhash': + data_types = ['vhash'] elif sys.argv[1] == 'all': - data_types = ['blackhole', 'string', 'sorted', 'hash', 'list'] + data_types = ['blackhole', 'string', 'sorted', 'hash', 'list', 'vhash'] else: print(usage) exit(1) diff --git a/tests/allocator.hpp b/tests/allocator.hpp index fca45946..a1d02ff7 100644 --- a/tests/allocator.hpp +++ b/tests/allocator.hpp @@ -22,7 +22,6 @@ class AllocatorAdaptor { public: virtual op_alloc_info wrapped_malloc(uint64_t size) = 0; virtual op_alloc_info wrapped_free(op_alloc_info* data) = 0; - virtual void InitThread() {} virtual ~AllocatorAdaptor(void) {} }; @@ -45,7 +44,6 @@ class PMemAllocatorWrapper : public AllocatorAdaptor { void InitPMemAllocator(const std::string& pmem_path, uint64_t pmem_size, uint64_t num_segment_blocks, uint32_t block_size, uint32_t num_write_threads) { - thread_manager_.reset(new ThreadManager(num_write_threads)); pmem_alloc_ = PMEMAllocator::NewPMEMAllocator( pmem_path, pmem_size, num_segment_blocks, block_size, num_write_threads, true, false, nullptr); @@ -71,10 +69,6 @@ class PMemAllocatorWrapper : public AllocatorAdaptor { } } - void InitThread() override { - thread_manager_->MaybeInitThread(access_thread); - } - ~PMemAllocatorWrapper(void) { closing_ = true; // background thread exit; @@ -88,5 +82,4 @@ class PMemAllocatorWrapper : public AllocatorAdaptor { PMEMAllocator* pmem_alloc_; bool closing_ = false; std::vector background; - std::shared_ptr thread_manager_; }; diff --git a/tests/pmem_allocator_bench.cpp b/tests/pmem_allocator_bench.cpp index f767a250..4457eecd 100644 --- a/tests/pmem_allocator_bench.cpp +++ b/tests/pmem_allocator_bench.cpp @@ -98,7 +98,6 @@ class AllocatorBench { std::vector> records(num_thread); double elapesd_time = 0; auto RandomBench = [&](uint64_t id) { - allocator->InitThread(); std::vector work_sets(iter_num); std::chrono::system_clock::time_point to_begin = std::chrono::high_resolution_clock::now(); @@ -125,8 +124,6 @@ class AllocatorBench { std::cout << "Total execute time: " << std::fixed << std::setprecision(5) << elapesd_time << " seconds\n"; - // Clear all memory to avoid memory leak - allocator->InitThread(); for (auto record : records) { for (auto r : record) { allocator->wrapped_free(&r); diff --git a/tests/test_pmem_allocator.cpp b/tests/test_pmem_allocator.cpp index dbcf10b5..4f644c09 100644 --- a/tests/test_pmem_allocator.cpp +++ b/tests/test_pmem_allocator.cpp @@ -27,7 +27,6 @@ class EnginePMemAllocatorTest : public testing::Test { protected: Engine* engine = nullptr; Configs configs; - std::shared_ptr thread_manager_; std::string pmem_path; virtual void SetUp() override { @@ -38,16 +37,14 @@ class EnginePMemAllocatorTest : public testing::Test { int res __attribute__((unused)) = system(cmd); } - void RemovePathAndReleaseThread() { - // Release thread. - access_thread.Release(); + void RemovePath() { // delete db_path. char cmd[1024]; sprintf(cmd, "rm -rf %s\n", pmem_path.c_str()); int res __attribute__((unused)) = system(cmd); } - virtual void TearDown() { RemovePathAndReleaseThread(); } + virtual void TearDown() { RemovePath(); } }; TEST_F(EnginePMemAllocatorTest, TestBasicAlloc) { @@ -61,8 +58,6 @@ TEST_F(EnginePMemAllocatorTest, TestBasicAlloc) { for (auto num_segment_block : num_segment_blocks) { for (auto block_size : block_sizes) { for (auto num_thread : num_threads) { - // init pmem allocator and thread_manager. - thread_manager_.reset(new ThreadManager(num_thread)); PMEMAllocator* pmem_alloc = PMEMAllocator::NewPMEMAllocator( pmem_path, pmem_size, num_segment_block, block_size, num_thread, true, false, nullptr); @@ -76,7 +71,6 @@ TEST_F(EnginePMemAllocatorTest, TestBasicAlloc) { // Test function: allocate all pmem, and free all under multi-threaded // scenario. auto TestPmemAlloc = [&](size_t) { - thread_manager_->MaybeInitThread(access_thread); std::vector records; for (uint64_t j = 0; j < num_segment_block; ++j) { auto space_entry = pmem_alloc->Allocate(alloc_size); @@ -92,10 +86,7 @@ TEST_F(EnginePMemAllocatorTest, TestBasicAlloc) { Freelist* free_list = pmem_alloc->GetFreeList(); free_list->MoveCachedEntriesToPool(); free_list->MergeSpaceInPool(); - access_thread.Release(); - // Then allocate all pmem. - thread_manager_->MaybeInitThread(access_thread); int alloc_cnt = 0; while (true) { SpaceEntry space_entry = pmem_alloc->Allocate(alloc_size); @@ -105,7 +96,7 @@ TEST_F(EnginePMemAllocatorTest, TestBasicAlloc) { } ASSERT_EQ(pmem_size / block_size, alloc_cnt); delete pmem_alloc; - RemovePathAndReleaseThread(); + RemovePath(); } } } @@ -117,7 +108,6 @@ TEST_F(EnginePMemAllocatorTest, TestPMemFragmentation) { uint64_t num_segment_block = 1024; uint64_t block_size = 64; std::vector alloc_size{8 * 64, 8 * 64, 16 * 64, 32 * 64}; - thread_manager_.reset(new ThreadManager(num_thread)); PMEMAllocator* pmem_alloc = PMEMAllocator::NewPMEMAllocator( pmem_path, pmem_size, num_segment_block, block_size, num_thread, true, false, nullptr); @@ -127,13 +117,11 @@ TEST_F(EnginePMemAllocatorTest, TestPMemFragmentation) { * | 8 | 8 | 16 | 32 | 8 | 8 | 16 | 32 | 8 | 8 | 16 | 32 | 8 | 8 | 16 | 32 | */ std::vector records(num_thread); - thread_manager_->MaybeInitThread(access_thread); for (uint32_t i = 0; i < records.size(); ++i) { SpaceEntry space_entry = pmem_alloc->Allocate(alloc_size[i % 4]); records[i] = space_entry; ASSERT_NE(space_entry.size, 0); } - access_thread.Release(); /* Allocated pmem status: * | null | null | null | 32 | null | null | null | 32 | null | null | null @@ -141,19 +129,16 @@ TEST_F(EnginePMemAllocatorTest, TestPMemFragmentation) { */ // Notice threads (more than one) may share the same list of space pool. auto TestPmemFree = [&](uint64_t id) { - thread_manager_->MaybeInitThread(access_thread); if ((id + 1) % 4 != 0) { pmem_alloc->Free(records[id]); } }; - access_thread.Release(); LaunchNThreads(num_thread, TestPmemFree); Freelist* free_list = pmem_alloc->GetFreeList(); free_list->MoveCachedEntriesToPool(); free_list->MergeSpaceInPool(); // Test merge free memory - thread_manager_->MaybeInitThread(access_thread); for (uint32_t id = 0; id < num_thread / 4; ++id) { SpaceEntry space_entry = pmem_alloc->Allocate(alloc_size[3]); ASSERT_NE(space_entry.size, 0); @@ -168,13 +153,11 @@ TEST_F(EnginePMemAllocatorTest, TestPMemAllocFreeList) { uint64_t block_size = 64; uint64_t pmem_size = num_segment_block * block_size * num_thread; std::deque records; - thread_manager_.reset(new ThreadManager(num_thread)); PMEMAllocator* pmem_alloc = PMEMAllocator::NewPMEMAllocator( pmem_path, pmem_size, num_segment_block, block_size, num_thread, true, false, nullptr); ASSERT_NE(pmem_alloc, nullptr); - thread_manager_->MaybeInitThread(access_thread); // allocate 1024 bytes records.push_back(pmem_alloc->Allocate(1024ULL)); ASSERT_EQ(pmem_alloc->PMemUsageInBytes(), 1024LL); diff --git a/tests/tests.cpp b/tests/tests.cpp index f9be5ae5..0422ad57 100644 --- a/tests/tests.cpp +++ b/tests/tests.cpp @@ -56,7 +56,7 @@ class EngineBasicTest : public testing::Test { configs.pmem_segment_blocks = 8 * 1024; // For faster test, no interval so it would not block engine closing configs.background_work_interval = 0.1; - configs.max_access_threads = 1; + configs.max_access_threads = 8; db_path = FLAGS_path; backup_path = FLAGS_path + "_backup"; backup_log = FLAGS_path + ".backup"; @@ -119,9 +119,6 @@ class EngineBasicTest : public testing::Test { // Return the current configuration. Configs CurrentConfigs() { switch (config_option_) { - case MultiThread: - configs.max_access_threads = 16; - break; case OptRestore: configs.opt_large_sorted_collection_recovery = true; break; @@ -300,7 +297,6 @@ class EngineBasicTest : public testing::Test { ASSERT_EQ(val, got_val); ASSERT_EQ(DeleteFunc(collection, key), Status::Ok); ASSERT_EQ(GetFunc(collection, key, &got_val), Status::NotFound); - engine->ReleaseAccessThread(); } void testDestroy(const std::string& collection, DestroyFunc DestroyFunc, @@ -315,7 +311,7 @@ class EngineBasicTest : public testing::Test { ASSERT_EQ(DestroyFunc(collection), Status::Ok); ASSERT_EQ(PutFunc(collection, key, val), Status::NotFound); ASSERT_EQ(GetFunc(collection, key, &got_val), Status::NotFound); - ASSERT_EQ(DeleteFunc(collection, key), Status::Ok); + ASSERT_EQ(DeleteFunc(collection, key), Status::NotFound); } void createBasicOperationTest(const std::string& collection, @@ -359,6 +355,8 @@ class EngineBasicTest : public testing::Test { class BatchWriteTest : public EngineBasicTest {}; +class TrasactionTest : public EngineBasicTest {}; + TEST_F(EngineBasicTest, TestUniqueKey) { std::string sorted_collection("sorted_collection"); std::string hash_collection("unordered_collection"); @@ -475,8 +473,8 @@ TEST_F(EngineBasicTest, TypeOfKey) { ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); std::unordered_map key_types; - for (auto type : {ValueType::String, ValueType::HashSet, ValueType::List, - ValueType::SortedSet}) { + for (auto type : {ValueType::String, ValueType::HashCollection, + ValueType::List, ValueType::SortedCollection}) { std::string key = KVDKValueTypeString[type]; key_types[key] = type; ValueType type_resp; @@ -485,7 +483,7 @@ TEST_F(EngineBasicTest, TypeOfKey) { ASSERT_EQ(engine->Put(key, ""), Status::Ok); break; } - case ValueType::HashSet: { + case ValueType::HashCollection: { ASSERT_EQ(engine->HashCreate(key), Status::Ok); break; } @@ -493,7 +491,7 @@ TEST_F(EngineBasicTest, TypeOfKey) { ASSERT_EQ(engine->ListCreate(key), Status::Ok); break; } - case ValueType::SortedSet: { + case ValueType::SortedCollection: { ASSERT_EQ(engine->SortedCreate(key), Status::Ok); break; } @@ -505,33 +503,10 @@ TEST_F(EngineBasicTest, TypeOfKey) { delete engine; } -TEST_F(EngineBasicTest, TestThreadManager) { - int max_access_threads = 1; - configs.max_access_threads = max_access_threads; - ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), - Status::Ok); - std::string key("k"); - std::string val("value"); - ASSERT_EQ(engine->Put(key, val, WriteOptions()), Status::Ok); - - // Reach max access threads - auto s = std::async(&Engine::Put, engine, key, val, WriteOptions()); - ASSERT_EQ(s.get(), Status::TooManyAccessThreads); - // Manually release access thread - engine->ReleaseAccessThread(); - s = std::async(&Engine::Put, engine, key, val, WriteOptions()); - ASSERT_EQ(s.get(), Status::Ok); - // Release access thread on thread exits - s = std::async(&Engine::Put, engine, key, val, WriteOptions()); - ASSERT_EQ(s.get(), Status::Ok); - delete engine; -} - // Test iterator/backup/checkpoint on a snapshot TEST_F(EngineBasicTest, TestBasicSnapshot) { uint32_t num_threads = 16; int count = 100; - configs.max_access_threads = num_threads; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); @@ -545,7 +520,6 @@ TEST_F(EngineBasicTest, TestBasicSnapshot) { ASSERT_EQ(engine->SortedCreate(sorted_collection), Status::Ok); ASSERT_EQ(engine->HashCreate(hash_collection), Status::Ok); ASSERT_EQ(engine->ListCreate(list), Status::Ok); - engine->ReleaseAccessThread(); bool snapshot_done(false); std::atomic_uint64_t set_finished_threads(0); @@ -572,7 +546,6 @@ TEST_F(EngineBasicTest, TestBasicSnapshot) { } // Wait snapshot done set_finished_threads.fetch_add(1); - engine->ReleaseAccessThread(); { std::unique_lock ul(spin); while (!snapshot_done) { @@ -625,7 +598,6 @@ TEST_F(EngineBasicTest, TestBasicSnapshot) { ASSERT_EQ(engine->SortedCreate(sorted_collection_after_snapshot), Status::Ok); ASSERT_EQ(engine->HashCreate(hash_collection_after_snapshot), Status::Ok); ASSERT_EQ(engine->ListCreate(list_after_snapshot), Status::Ok); - engine->ReleaseAccessThread(); { std::lock_guard ul(spin); snapshot_done = true; @@ -865,7 +837,6 @@ TEST_F(EngineBasicTest, TestStringModify) { int num_threads = 16; int ops_per_thread = 1000; uint64_t incr_by = 5; - configs.max_access_threads = num_threads; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); @@ -873,7 +844,6 @@ TEST_F(EngineBasicTest, TestStringModify) { std::string wrong_value_key = "wrong_value"; ASSERT_EQ(engine->Put(wrong_value_key, std::string(10, 'a')), Status::Ok); - engine->ReleaseAccessThread(); auto TestModify = [&](int) { IncNArgs args{5, 0}; @@ -897,7 +867,6 @@ TEST_F(EngineBasicTest, TestStringModify) { TEST_F(BatchWriteTest, Sorted) { size_t num_threads = 1; - configs.max_access_threads = num_threads + 1; for (int index_with_hashtable : {0, 1}) { ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); @@ -979,7 +948,6 @@ TEST_F(BatchWriteTest, Sorted) { TEST_F(BatchWriteTest, String) { size_t num_threads = 16; - configs.max_access_threads = num_threads; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); size_t batch_size = 100; @@ -1050,7 +1018,6 @@ TEST_F(BatchWriteTest, String) { TEST_F(BatchWriteTest, Hash) { size_t num_threads = 16; - configs.max_access_threads = num_threads + 1; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); size_t batch_size = 100; @@ -1198,7 +1165,6 @@ TEST_F(EngineBasicTest, TestSeek) { TEST_F(EngineBasicTest, TestStringRestore) { size_t num_threads = 16; - configs.max_access_threads = num_threads; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); // insert and delete some keys, then re-insert some deleted keys @@ -1278,7 +1244,6 @@ TEST_F(EngineBasicTest, TestStringLargeValue) { TEST_F(EngineBasicTest, TestSortedRestore) { size_t num_threads = 16; - configs.max_access_threads = num_threads; for (int opt_large_sorted_collection_recovery : {0, 1}) { for (int index_with_hashtable : {0, 1}) { SortedCollectionConfigs s_configs; @@ -1337,7 +1302,6 @@ TEST_F(EngineBasicTest, TestSortedRestore) { GlobalLogger.Debug( "Restore with opt_large_sorted_collection_restore: %d\n", opt_large_sorted_collection_recovery); - configs.max_access_threads = num_threads; configs.opt_large_sorted_collection_recovery = opt_large_sorted_collection_recovery; // reopen and restore engine and try gets @@ -1454,7 +1418,6 @@ TEST_F(EngineBasicTest, TestSortedRestore) { TEST_F(EngineBasicTest, TestMultiThreadSortedRestore) { size_t num_threads = 16; size_t num_collections = 16; - configs.max_access_threads = num_threads; configs.opt_large_sorted_collection_recovery = true; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); @@ -1521,7 +1484,6 @@ TEST_F(EngineBasicTest, TestMultiThreadSortedRestore) { TEST_F(EngineBasicTest, TestList) { size_t num_threads = 1; size_t count = 1000; - configs.max_access_threads = num_threads + 1; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); std::vector> elems_vec(num_threads); @@ -1792,7 +1754,6 @@ TEST_F(EngineBasicTest, TestList) { TEST_F(EngineBasicTest, TestHash) { size_t num_threads = 1; size_t count = 1000; - configs.max_access_threads = num_threads + 1; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); std::string key{"Hash"}; @@ -1935,10 +1896,98 @@ TEST_F(EngineBasicTest, TestHash) { delete engine; } +TEST_F(EngineBasicTest, TestVHash) { + size_t num_threads = 1; + size_t count = 1000; + configs.max_access_threads = num_threads + 1; + ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), + Status::Ok); + std::string key{"VHash"}; + ASSERT_EQ(engine->VHashCreate(key), Status::Ok); + ASSERT_EQ(engine->VHashDestroy(key), Status::Ok); + ASSERT_EQ(engine->VHashCreate(key), Status::Ok); + using umap = std::unordered_map; + std::vector local_copies(num_threads); + std::mutex mu; + + auto VPut = [&](size_t tid) { + umap& local_copy = local_copies[tid]; + for (size_t j = 0; j < count; j++) { + std::string field{std::to_string(tid) + "_" + GetRandomString(10)}; + std::string value{GetRandomString(120)}; + ASSERT_EQ(engine->VHashPut(key, field, value), Status::Ok); + local_copy[field] = value; + } + }; + + auto VGet = [&](size_t tid) { + umap const& local_copy = local_copies[tid]; + for (auto const& kv : local_copy) { + std::string resp; + ASSERT_EQ(engine->VHashGet(key, kv.first, &resp), Status::Ok); + ASSERT_EQ(resp, kv.second) << "Field:\t" << kv.first << "\n"; + } + }; + + auto VDelete = [&](size_t tid) { + umap& local_copy = local_copies[tid]; + std::string sink; + for (size_t i = 0; i < count / 2; i++) { + auto iter = local_copy.begin(); + ASSERT_EQ(engine->VHashDelete(key, iter->first), Status::Ok); + ASSERT_EQ(engine->VHashGet(key, iter->first, &sink), Status::NotFound); + local_copy.erase(iter); + } + }; + + auto VSize = [&](size_t) { + size_t len = 0; + ASSERT_EQ(engine->VHashSize(key, &len), Status::Ok); + size_t cnt = 0; + for (size_t tid = 0; tid < num_threads; tid++) { + cnt += local_copies[tid].size(); + } + ASSERT_EQ(len, cnt); + }; + + auto VIterate = [&](size_t) { + umap combined; + for (size_t tid = 0; tid < num_threads; tid++) { + umap const& local_copy = local_copies[tid]; + for (auto const& kv : local_copy) { + combined[kv.first] = kv.second; + } + } + + auto iter = engine->VHashIteratorCreate(key); + + ASSERT_NE(iter, nullptr); + size_t cnt = 0; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ++cnt; + ASSERT_EQ(combined[iter->Key()], iter->Value()); + } + ASSERT_EQ(cnt, combined.size()); + }; + + for (size_t i = 0; i < 3; i++) { + LaunchNThreads(num_threads, VPut); + LaunchNThreads(num_threads, VGet); + LaunchNThreads(num_threads, VDelete); + LaunchNThreads(num_threads, VIterate); + LaunchNThreads(num_threads, VSize); + LaunchNThreads(num_threads, VPut); + LaunchNThreads(num_threads, VGet); + LaunchNThreads(num_threads, VDelete); + LaunchNThreads(num_threads, VIterate); + LaunchNThreads(num_threads, VSize); + } + delete engine; +} + TEST_F(EngineBasicTest, TestStringHotspot) { size_t n_thread_reading = 16; size_t n_thread_writing = 16; - configs.max_access_threads = n_thread_writing + n_thread_reading; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); @@ -1948,7 +1997,6 @@ TEST_F(EngineBasicTest, TestStringHotspot) { std::string val2(1023, 'b'); ASSERT_EQ(engine->Put(key, val1), Status::Ok); - engine->ReleaseAccessThread(); auto EvenWriteOddRead = [&](uint32_t id) { for (size_t i = 0; i < count; i++) { @@ -1990,7 +2038,6 @@ TEST_F(EngineBasicTest, TestStringHotspot) { TEST_F(EngineBasicTest, TestSortedHotspot) { size_t n_thread_reading = 16; size_t n_thread_writing = 16; - configs.max_access_threads = n_thread_writing + n_thread_reading; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); @@ -2004,7 +2051,6 @@ TEST_F(EngineBasicTest, TestSortedHotspot) { for (const std::string& key : keys) { ASSERT_EQ(engine->SortedPut(collection_name, key, val1), Status::Ok); - engine->ReleaseAccessThread(); auto EvenWriteOddRead = [&](uint32_t id) { for (size_t i = 0; i < count; i++) { @@ -2050,7 +2096,6 @@ TEST_F(EngineBasicTest, TestSortedHotspot) { TEST_F(EngineBasicTest, TestSortedCustomCompareFunction) { using kvpair = std::pair; size_t num_threads = 16; - configs.max_access_threads = num_threads; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); @@ -2156,8 +2201,7 @@ TEST_F(EngineBasicTest, TestSortedCustomCompareFunction) { } TEST_F(EngineBasicTest, TestHashTableIterator) { - size_t threads = 32; - configs.max_access_threads = threads; + size_t num_threads = 32; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); std::string collection_name = "sortedcollection"; @@ -2173,7 +2217,7 @@ TEST_F(EngineBasicTest, TestHashTableIterator) { Status::Ok); } }; - LaunchNThreads(threads, MixedPut); + LaunchNThreads(num_threads, MixedPut); auto test_kvengine = static_cast(engine); auto hash_table = test_kvengine->GetHashTable(); @@ -2224,15 +2268,12 @@ TEST_F(EngineBasicTest, TestHashTableIterator) { } hashtable_iter.Next(); } - ASSERT_EQ(total_entry_num, threads + 1); + ASSERT_EQ(total_entry_num, num_threads + 1); } delete engine; } TEST_F(EngineBasicTest, TestExpireAPI) { - size_t n_thread_reading = 1; - size_t n_thread_writing = 1; - configs.max_access_threads = n_thread_writing + n_thread_reading; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); @@ -2390,7 +2431,6 @@ TEST_F(EngineBasicTest, TestExpireAPI) { TEST_F(EngineBasicTest, TestbackgroundDestroyCollections) { size_t n_thread_writing = 16; - configs.max_access_threads = n_thread_writing; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); TTLType ttl = 1000; // 1s @@ -2458,13 +2498,404 @@ TEST_F(EngineBasicTest, TestbackgroundDestroyCollections) { delete engine; } +TEST_F(TrasactionTest, TransactionBasic) { + size_t num_threads = 32; + configs.max_access_threads = num_threads; + ASSERT_EQ(Engine::Open(db_path, &engine, configs, stdout), Status::Ok); + int amount = num_threads; + int transfer_amount = 1; + std::vector accounts{"Jack", "Tom"}; + int round = 10000; + std::string sorted_bank{"sorted_bank"}; + std::string hash_bank{"hash_bank"}; + + ASSERT_EQ(engine->SortedCreate(sorted_bank), Status::Ok); + ASSERT_EQ(engine->HashCreate(hash_bank), Status::Ok); + + auto string_transfer_txn = [&](size_t) { + int cnt = round; + while (cnt--) { + std::string payer; + std::string receiver; + if (fast_random_64() % 2 == 0) { + payer = accounts[0]; + receiver = accounts[1]; + } else { + payer = accounts[1]; + receiver = accounts[0]; + } + + std::string payer_balance; + std::string receiver_balance; + bool exist = true; + + auto txn = engine->TransactionCreate(); + Status s = txn->StringGet(payer, &payer_balance); + if (s == Status::Ok) { + s = txn->StringGet(receiver, &receiver_balance); + if (s == Status::Timeout) { + txn->Rollback(); + continue; + } else { + ASSERT_EQ(s, Status::Ok); + } + } else if (s == Status::NotFound) { + exist = false; + s = txn->StringGet(receiver, &receiver_balance); + if (s == Status::Timeout) { + txn->Rollback(); + continue; + } else { + ASSERT_EQ(s, Status::NotFound); + payer_balance = std::to_string(amount); + receiver_balance = std::to_string(amount); + // Key is locked in previous get, so no conflict + ASSERT_EQ(txn->StringPut(payer, payer_balance), Status::Ok); + ASSERT_EQ(txn->StringPut(receiver, receiver_balance), Status::Ok); + } + } else { + ASSERT_EQ(s, Status::Timeout); + txn->Rollback(); + continue; + } + ASSERT_EQ(std::stoi(payer_balance) + std::stoi(receiver_balance), + 2 * amount); + if (std::stoi(payer_balance) == 0) { + txn->Rollback(); + continue; + } + + // Do transfer + std::string payer_balance_after_transfer = + std::to_string(std::stoi(payer_balance) - transfer_amount); + std::string receiver_balance_after_transfer = + std::to_string(std::stoi(receiver_balance) + transfer_amount); + ASSERT_EQ(txn->StringPut(payer, payer_balance_after_transfer), + Status::Ok); + ASSERT_EQ(txn->StringPut(receiver, receiver_balance_after_transfer), + Status::Ok); + std::string value; + // Un-commited change should be visible by this transaction, but invisible + // outsider the transaction + ASSERT_EQ(txn->StringGet(payer, &value), Status::Ok); + ASSERT_EQ(value, payer_balance_after_transfer); + ASSERT_EQ(txn->StringGet(receiver, &value), Status::Ok); + ASSERT_EQ(value, receiver_balance_after_transfer); + ASSERT_EQ(std::stoi(payer_balance_after_transfer) + + std::stoi(receiver_balance_after_transfer), + 2 * amount); + if (exist) { + ASSERT_EQ(engine->Get(payer, &value), Status::Ok); + ASSERT_EQ(value, payer_balance); + ASSERT_EQ(engine->Get(receiver, &value), Status::Ok); + ASSERT_EQ(value, receiver_balance); + } else { + ASSERT_EQ(engine->Get(payer, &value), Status::NotFound); + ASSERT_EQ(engine->Get(receiver, &value), Status::NotFound); + } + ASSERT_EQ(txn->Commit(), Status::Ok); + break; + } + }; + + auto sorted_transfer_txn = [&](size_t) { + int cnt = round; + while (cnt--) { + std::string payer; + std::string receiver; + if (fast_random_64() % 2 == 0) { + payer = accounts[0]; + receiver = accounts[1]; + } else { + payer = accounts[1]; + receiver = accounts[0]; + } + + std::string payer_balance; + std::string receiver_balance; + bool exist = true; + + auto txn = engine->TransactionCreate(); + Status s = txn->SortedGet(sorted_bank, payer, &payer_balance); + if (s == Status::Ok) { + s = txn->SortedGet(sorted_bank, receiver, &receiver_balance); + if (s == Status::Timeout) { + txn->Rollback(); + continue; + } else { + ASSERT_EQ(s, Status::Ok); + } + } else if (s == Status::NotFound) { + exist = false; + s = txn->SortedGet(sorted_bank, receiver, &receiver_balance); + if (s == Status::Timeout) { + txn->Rollback(); + continue; + } else { + ASSERT_EQ(s, Status::NotFound); + payer_balance = std::to_string(amount); + receiver_balance = std::to_string(amount); + // Key is locked in previous get, so no conflict + ASSERT_EQ(txn->SortedPut(sorted_bank, payer, payer_balance), + Status::Ok); + ASSERT_EQ(txn->SortedPut(sorted_bank, receiver, receiver_balance), + Status::Ok); + } + } else { + ASSERT_EQ(s, Status::Timeout); + txn->Rollback(); + continue; + } + ASSERT_EQ(std::stoi(payer_balance) + std::stoi(receiver_balance), + 2 * amount); + if (std::stoi(payer_balance) == 0) { + txn->Rollback(); + continue; + } + + // Do transfer + std::string payer_balance_after_transfer = + std::to_string(std::stoi(payer_balance) - transfer_amount); + std::string receiver_balance_after_transfer = + std::to_string(std::stoi(receiver_balance) + transfer_amount); + ASSERT_EQ( + txn->SortedPut(sorted_bank, payer, payer_balance_after_transfer), + Status::Ok); + ASSERT_EQ(txn->SortedPut(sorted_bank, receiver, + receiver_balance_after_transfer), + Status::Ok); + std::string value; + // Un-commited change should be visible by this transaction, but invisible + // outsider the transaction + ASSERT_EQ(txn->SortedGet(sorted_bank, payer, &value), Status::Ok); + ASSERT_EQ(value, payer_balance_after_transfer); + ASSERT_EQ(txn->SortedGet(sorted_bank, receiver, &value), Status::Ok); + ASSERT_EQ(value, receiver_balance_after_transfer); + ASSERT_EQ(std::stoi(payer_balance_after_transfer) + + std::stoi(receiver_balance_after_transfer), + 2 * amount); + if (exist) { + ASSERT_EQ(engine->SortedGet(sorted_bank, payer, &value), Status::Ok); + ASSERT_EQ(value, payer_balance); + ASSERT_EQ(engine->SortedGet(sorted_bank, receiver, &value), Status::Ok); + ASSERT_EQ(value, receiver_balance); + } else { + ASSERT_EQ(engine->SortedGet(sorted_bank, payer, &value), + Status::NotFound); + ASSERT_EQ(engine->SortedGet(sorted_bank, receiver, &value), + Status::NotFound); + } + ASSERT_EQ(txn->Commit(), Status::Ok); + break; + } + }; + + auto hash_transfer_txn = [&](size_t) { + int cnt = round; + while (cnt--) { + std::string payer; + std::string receiver; + if (fast_random_64() % 2 == 0) { + payer = accounts[0]; + receiver = accounts[1]; + } else { + payer = accounts[1]; + receiver = accounts[0]; + } + + std::string payer_balance; + std::string receiver_balance; + bool exist = true; + + auto txn = engine->TransactionCreate(); + Status s = txn->HashGet(hash_bank, payer, &payer_balance); + if (s == Status::Ok) { + s = txn->HashGet(hash_bank, receiver, &receiver_balance); + if (s == Status::Timeout) { + txn->Rollback(); + continue; + } else { + ASSERT_EQ(s, Status::Ok); + } + } else if (s == Status::NotFound) { + exist = false; + s = txn->HashGet(hash_bank, receiver, &receiver_balance); + if (s == Status::Timeout) { + txn->Rollback(); + continue; + } else { + ASSERT_EQ(s, Status::NotFound); + payer_balance = std::to_string(amount); + receiver_balance = std::to_string(amount); + // Key is locked in previous get, so no conflict + ASSERT_EQ(txn->HashPut(hash_bank, payer, payer_balance), Status::Ok); + ASSERT_EQ(txn->HashPut(hash_bank, receiver, receiver_balance), + Status::Ok); + } + } else { + ASSERT_EQ(s, Status::Timeout); + txn->Rollback(); + continue; + } + ASSERT_EQ(std::stoi(payer_balance) + std::stoi(receiver_balance), + 2 * amount); + if (std::stoi(payer_balance) == 0) { + txn->Rollback(); + continue; + } + + // Do transfer + std::string payer_balance_after_transfer = + std::to_string(std::stoi(payer_balance) - transfer_amount); + std::string receiver_balance_after_transfer = + std::to_string(std::stoi(receiver_balance) + transfer_amount); + ASSERT_EQ(txn->HashPut(hash_bank, payer, payer_balance_after_transfer), + Status::Ok); + ASSERT_EQ( + txn->HashPut(hash_bank, receiver, receiver_balance_after_transfer), + Status::Ok); + std::string value; + // Un-commited change should be visible by this transaction, but invisible + // outsider the transaction + ASSERT_EQ(txn->HashGet(hash_bank, payer, &value), Status::Ok); + ASSERT_EQ(value, payer_balance_after_transfer); + ASSERT_EQ(txn->HashGet(hash_bank, receiver, &value), Status::Ok); + ASSERT_EQ(value, receiver_balance_after_transfer); + ASSERT_EQ(std::stoi(payer_balance_after_transfer) + + std::stoi(receiver_balance_after_transfer), + 2 * amount); + if (exist) { + ASSERT_EQ(engine->HashGet(hash_bank, payer, &value), Status::Ok); + ASSERT_EQ(value, payer_balance); + ASSERT_EQ(engine->HashGet(hash_bank, receiver, &value), Status::Ok); + ASSERT_EQ(value, receiver_balance); + } else { + ASSERT_EQ(engine->HashGet(hash_bank, payer, &value), Status::NotFound); + ASSERT_EQ(engine->HashGet(hash_bank, receiver, &value), + Status::NotFound); + } + ASSERT_EQ(txn->Commit(), Status::Ok); + break; + } + }; + + auto not_existed_collection_txn = [&]() { + std::string not_exist_collection("not_exist"); + std::string key("key"); + std::string val("val"); + std::string got_val; + auto txn = engine->TransactionCreate(); + ASSERT_TRUE(txn != nullptr); + ASSERT_EQ(txn->SortedPut(not_exist_collection, key, val), Status::NotFound); + ASSERT_EQ(txn->SortedDelete(not_exist_collection, key), Status::NotFound); + ASSERT_EQ(txn->SortedGet(not_exist_collection, key, &got_val), + Status::NotFound); + + ASSERT_EQ(txn->HashPut(not_exist_collection, key, val), Status::NotFound); + ASSERT_EQ(txn->HashDelete(not_exist_collection, key), Status::NotFound); + ASSERT_EQ(txn->HashGet(not_exist_collection, key, &got_val), + Status::NotFound); + }; + + LaunchNThreads(num_threads, string_transfer_txn); + LaunchNThreads(num_threads, sorted_transfer_txn); + LaunchNThreads(num_threads, hash_transfer_txn); + std::vector balances{2}; + ASSERT_EQ(engine->Get(accounts[0], &balances[0]), Status::Ok); + ASSERT_EQ(engine->Get(accounts[1], &balances[1]), Status::Ok); + ASSERT_EQ(std::stoi(balances[0]) + std::stoi(balances[1]), 2 * amount); + ASSERT_EQ(engine->SortedGet(sorted_bank, accounts[0], &balances[0]), + Status::Ok); + ASSERT_EQ(engine->SortedGet(sorted_bank, accounts[1], &balances[1]), + Status::Ok); + ASSERT_EQ(std::stoi(balances[0]) + std::stoi(balances[1]), 2 * amount); + ASSERT_EQ(engine->HashGet(hash_bank, accounts[0], &balances[0]), Status::Ok); + ASSERT_EQ(engine->HashGet(hash_bank, accounts[1], &balances[1]), Status::Ok); + ASSERT_EQ(std::stoi(balances[0]) + std::stoi(balances[1]), 2 * amount); + + not_existed_collection_txn(); + + delete engine; +} + +TEST_F(TrasactionTest, Conflict) { + configs.max_access_threads = 3; + ASSERT_EQ(Engine::Open(db_path, &engine, configs, stdout), Status::Ok); + std::string sorted_collection{"sorted_collection"}; + std::string hash_collection{"hash_collection"}; + std::string key{"key"}; + std::string val("val"); + ASSERT_EQ(engine->SortedCreate(sorted_collection), Status::Ok); + ASSERT_EQ(engine->HashCreate(hash_collection), Status::Ok); + + Status expected_return = Status::Timeout; + + auto operation_thread = [&](size_t) { + std::string got_val; + auto txn = engine->TransactionCreate(); + ASSERT_TRUE(txn != nullptr); + ASSERT_EQ(txn->StringPut(key, val), expected_return); + ASSERT_EQ(txn->StringGet(key, &got_val), expected_return); + if (expected_return == Status::Ok) { + ASSERT_EQ(got_val, val); + } + ASSERT_EQ(txn->StringDelete(key), expected_return); + txn->Rollback(); + + ASSERT_EQ(txn->SortedPut(sorted_collection, key, val), expected_return); + ASSERT_EQ(txn->SortedGet(sorted_collection, key, &got_val), + expected_return); + if (expected_return == Status::Ok) { + ASSERT_EQ(got_val, val); + } + ASSERT_EQ(txn->SortedDelete(sorted_collection, key), expected_return); + txn->Rollback(); + + ASSERT_EQ(txn->HashPut(hash_collection, key, val), expected_return); + ASSERT_EQ(txn->HashGet(hash_collection, key, &got_val), expected_return); + if (expected_return == Status::Ok) { + ASSERT_EQ(got_val, val); + } + ASSERT_EQ(txn->HashDelete(hash_collection, key), expected_return); + txn->Rollback(); + }; + + auto txn = engine->TransactionCreate(); + ASSERT_EQ(txn->StringPut(key, val), Status::Ok); + ASSERT_EQ(txn->SortedPut(sorted_collection, key, val), Status::Ok); + ASSERT_EQ(txn->HashPut(hash_collection, key, val), Status::Ok); + LaunchNThreads(1, operation_thread); + ASSERT_EQ(txn->Commit(), Status::Ok); + + std::string got_val; + ASSERT_EQ(txn->StringGet(key, &got_val), Status::Ok); + ASSERT_EQ(got_val, val); + ASSERT_EQ(txn->SortedGet(sorted_collection, key, &got_val), Status::Ok); + ASSERT_EQ(got_val, val); + ASSERT_EQ(txn->HashGet(hash_collection, key, &got_val), Status::Ok); + ASSERT_EQ(got_val, val); + LaunchNThreads(1, operation_thread); + ASSERT_EQ(txn->Commit(), Status::Ok); + + expected_return = Status::Ok; + LaunchNThreads(1, operation_thread); + + expected_return = Status::Timeout; + ASSERT_EQ(txn->StringDelete(key), Status::Ok); + ASSERT_EQ(txn->SortedDelete(sorted_collection, key), Status::Ok); + ASSERT_EQ(txn->HashDelete(hash_collection, key), Status::Ok); + LaunchNThreads(1, operation_thread); + ASSERT_EQ(txn->Commit(), Status::Ok); + + delete engine; +} + // ========================= Sync Point ====================================== #if KVDK_DEBUG_LEVEL > 0 TEST_F(BatchWriteTest, SortedRollback) { size_t num_threads = 1; - configs.max_access_threads = num_threads + 1; for (int index_with_hashtable : {0, 1}) { // Test crash before commit SyncPoint::GetInstance()->EnableCrashPoint( @@ -2587,7 +3018,6 @@ TEST_F(BatchWriteTest, StringRollBack) { // another thread may reuse this id and the old batch log file // is overwritten. size_t num_threads = 1; - configs.max_access_threads = num_threads; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); size_t batch_size = 100; @@ -2662,7 +3092,6 @@ TEST_F(BatchWriteTest, StringRollBack) { TEST_F(BatchWriteTest, HashRollback) { size_t num_threads = 1; - configs.max_access_threads = num_threads + 1; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); size_t batch_size = 100; @@ -2751,7 +3180,6 @@ TEST_F(BatchWriteTest, ListBatchOperationRollback) { *((std::atomic_bool*)close_reclaimer) = true; return; }); - configs.max_access_threads = 1; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); size_t count = 100; @@ -2864,7 +3292,6 @@ TEST_F(BatchWriteTest, ListBatchOperationRollback) { // Then Repair TEST_F(EngineBasicTest, TestSortedRecoverySyncPointCaseOne) { Configs test_config = configs; - test_config.max_access_threads = 16; std::atomic update_num(1); int cnt = 20; @@ -2955,7 +3382,6 @@ TEST_F(EngineBasicTest, TestSortedRecoverySyncPointCaseTwo) { }); SyncPoint::GetInstance()->EnableProcessing(); - test_config.max_access_threads = 16; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, test_config, stdout), Status::Ok); @@ -3017,7 +3443,6 @@ TEST_F(EngineBasicTest, TestSortedRecoverySyncPointCaseTwo) { // thread2: iter TEST_F(EngineBasicTest, TestSortedSyncPoint) { Configs test_config = configs; - test_config.max_access_threads = 16; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, test_config, stdout), Status::Ok); std::vector ths; @@ -3072,8 +3497,6 @@ TEST_F(EngineBasicTest, TestSortedSyncPoint) { } TEST_F(EngineBasicTest, TestHashTableRangeIter) { - uint64_t threads = 16; - configs.max_access_threads = threads; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); std::string key = "stringkey"; @@ -3130,7 +3553,6 @@ TEST_F(EngineBasicTest, TestBackGroundCleaner) { }); SyncPoint::GetInstance()->EnableProcessing(); - configs.max_access_threads = 16; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); @@ -3342,8 +3764,6 @@ TEST_F(EngineBasicTest, TestBackGroundIterNoHashIndexSkiplist) { {{"KVEngine::BackgroundCleaner::IterSkiplist::UnlinkDeleteRecord", "KVEngine::SkiplistNoHashIndex::Put"}}); SyncPoint::GetInstance()->EnableProcessing(); - uint64_t threads = 16; - configs.max_access_threads = threads; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout), Status::Ok); std::string collection_name = "Skiplist_with_hash_index"; @@ -3432,7 +3852,6 @@ TEST_F(EngineBasicTest, TestDynamicCleaner) { return; }); SyncPoint::GetInstance()->EnableProcessing(); - configs.max_access_threads = 32; configs.hash_bucket_num = 256; configs.clean_threads = 8; ASSERT_EQ(Engine::Open(db_path.c_str(), &engine, configs, stdout),