diff --git a/CMakeLists.txt b/CMakeLists.txt index b3f08248c..826fca47b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -981,7 +981,7 @@ endfunction() file_glob_recurse(FILES_NEED_FORMAT DIRECTORIES "src" "modules" "python" "test" "benchmark" PATTERNS ".*\\.(cc|cpp|h|hpp|vineyard-mod)$" - EXCLUDE_PATTERNS "(.*\\.vineyard.h$)|(modules/kv-state-cache/radix-tree/ra.*)" + EXCLUDE_PATTERNS "(.*\\.vineyard.h$)|(.*modules/kv-state-cache/radix-tree/radix\.(cc|h)$)" ) # the `memcpy.h` is borrowed from external project diff --git a/modules/kv-state-cache/radix-tree/radix-tree.cc b/modules/kv-state-cache/radix-tree/radix-tree.cc index c528f44c8..45b8f1e85 100644 --- a/modules/kv-state-cache/radix-tree/radix-tree.cc +++ b/modules/kv-state-cache/radix-tree/radix-tree.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "radix-tree.h" +#include "kv-state-cache/radix-tree/radix-tree.h" #include "common/util/base64.h" #include "common/util/logging.h" @@ -21,7 +21,7 @@ limitations under the License. #include "zstd/lib/zstd.h" -using namespace vineyard; +using namespace vineyard; // NOLINT(build/namespaces) RadixTree::RadixTree(int cacheCapacity) { this->tree = raxNew(); @@ -52,8 +52,8 @@ RadixTree::~RadixTree() { raxNode* dataNode = raxFindAndReturnDataNode(this->tree, rootToken.data(), rootToken.size(), NULL, false); if (dataNode != nullptr) { - delete (DataWrapper*) dataNode->custom_data; - delete (DataWrapper*) raxGetData(dataNode); + delete reinterpret_cast(dataNode->custom_data); + delete reinterpret_cast(raxGetData(dataNode)); } raxFree(this->tree); @@ -99,7 +99,7 @@ std::shared_ptr RadixTree::InsertInternal( raxNode* dataNode = NULL; int retval = raxInsertAndReturnDataNode( this->tree, insertTokensArray, insertTokensArrayLen, dummyData, - (void**) &dataNode, (void**) &oldData); + reinterpret_cast(&dataNode), reinterpret_cast(&oldData)); if (dataNode == NULL) { throw std::runtime_error("Insert token list failed"); return NULL; @@ -140,8 +140,8 @@ std::shared_ptr RadixTree::InsertInternal( if (subTreeNode == nullptr) { return std::make_shared(dummyData, nullptr); } - return std::make_shared(dummyData, - (DataWrapper*) subTreeNode->custom_data); + return std::make_shared( + dummyData, reinterpret_cast(subTreeNode->custom_data)); } void RadixTree::DeleteInternal(std::vector tokens, @@ -162,10 +162,10 @@ void RadixTree::DeleteInternal(std::vector tokens, nodeIsSubTree = true; } int retval = raxRemove(this->tree, deleteTokensArray, deleteTokensArrayLen, - (void**) &oldData); + reinterpret_cast(&oldData)); if (retval == 1) { evictedNode = std::make_shared( - oldData, (DataWrapper*) subTreeNode->custom_data); + oldData, reinterpret_cast(subTreeNode->custom_data)); nodeCount--; if (nodeIsSubTree) { evictedNode->cleanTreeData = true; @@ -193,8 +193,9 @@ std::shared_ptr RadixTree::QueryInternal(std::vector key) { return NULL; } - return std::make_shared((DataWrapper*) raxGetData(dataNode), - (DataWrapper*) subTreeNode->custom_data); + return std::make_shared( + reinterpret_cast(raxGetData(dataNode)), + reinterpret_cast(subTreeNode->custom_data)); } std::string RadixTree::Serialize() { @@ -241,10 +242,13 @@ std::string RadixTree::Serialize() { serializedStr += subTreeSizeOSS.str() + "|"; // convert data to hex string - char* bytes = (char*) ((DataWrapper*) dataList[index])->data; + char* bytes = reinterpret_cast( + (reinterpret_cast(dataList[index]))->data); std::ostringstream dataOSS; - for (int i = 0; i < ((DataWrapper*) dataList[index])->dataLength; i++) { + for (int i = 0; + i < (reinterpret_cast(dataList[index]))->dataLength; + i++) { dataOSS << std::hex << std::setw(2) << std::setfill('0') << static_cast(static_cast(bytes[i])); } @@ -264,17 +268,22 @@ std::string RadixTree::Serialize() { serializedStr += "|"; // convert custom data to hex string - char* bytes = (char*) ((DataWrapper*) subTreeDataList[index])->data; + char* bytes = reinterpret_cast( + (reinterpret_cast(subTreeDataList[index]))->data); std::ostringstream dataOSS; - LOG(INFO) << "data length:" - << ((DataWrapper*) subTreeDataList[index])->dataLength; - for (int i = 0; i < ((DataWrapper*) subTreeDataList[index])->dataLength; + LOG(INFO) + << "data length:" + << (reinterpret_cast(subTreeDataList[index]))->dataLength; + for (int i = 0; + i < + (reinterpret_cast(subTreeDataList[index]))->dataLength; ++i) { dataOSS << std::hex << std::setw(2) << std::setfill('0') << static_cast(static_cast(bytes[i])); } - LOG(INFO) << "data:" << ((DataWrapper*) subTreeDataList[index])->data; + LOG(INFO) << "data:" + << (reinterpret_cast(subTreeDataList[index]))->data; LOG(INFO) << "data oss:" << dataOSS.str(); serializedStr += dataOSS.str() + "\n"; } @@ -282,10 +291,11 @@ std::string RadixTree::Serialize() { // use ZSTD to compress the serialized string size_t srcSize = serializedStr.size(); - std::string compressedStr(srcSize, '\0'); - int compressedSize = - ZSTD_compress((void*) (compressedStr.c_str()), compressedStr.length(), - serializedStr.c_str(), srcSize, 3); + size_t dstSize = ZSTD_compressBound(srcSize); + std::string compressedStr(dstSize + 1, '\0'); + LOG(INFO) << "src size:" << srcSize << " dst size:" << dstSize; + int compressedSize = ZSTD_compress(compressedStr.data(), compressedStr.size(), + serializedStr.c_str(), srcSize, 3); if (ZSTD_isError(compressedSize)) { LOG(ERROR) << "ZSTD compression failed: " << ZSTD_getErrorName(compressedSize); @@ -293,9 +303,10 @@ std::string RadixTree::Serialize() { int cacheCapacity = this->cacheCapacity - 1; std::string result = - std::string((char*) &srcSize, sizeof(int)) + - std::string((char*) &cacheCapacity, sizeof(int)) + - std::string((char*) &(this->tree->head->numnodes), sizeof(uint32_t)) + + std::string(reinterpret_cast(&compressedSize), sizeof(int)) + + std::string(reinterpret_cast(&cacheCapacity), sizeof(int)) + + std::string(reinterpret_cast(&(this->tree->head->numnodes)), + sizeof(uint32_t)) + compressedStr; return result; @@ -304,16 +315,23 @@ std::string RadixTree::Serialize() { std::shared_ptr RadixTree::Deserialize(std::string data) { LOG(INFO) << "Deserialize......"; // use LZ4 to decompress the serialized string - int srcSize = *(int*) data.c_str(); + int compressedSize = *reinterpret_cast(data.data()); data.erase(0, sizeof(int)); - int cacheCapacity = *(int*) data.c_str(); + int cacheCapacity = *reinterpret_cast(data.data()); data.erase(0, sizeof(int)); - int rootNumNodes = *(uint32_t*) data.c_str(); + int rootNumNodes = *reinterpret_cast(data.data()); data.erase(0, sizeof(uint32_t)); - std::string decompressedStr(srcSize, '\0'); + int ds = ZSTD_getFrameContentSize(data.c_str(), data.size()); + if (ds == ZSTD_CONTENTSIZE_ERROR) { + LOG(ERROR) << "Error: not a valid compressed frame"; + } else if (ds == ZSTD_CONTENTSIZE_UNKNOWN) { + LOG(ERROR) + << "Error: original size unknown. Use streaming decompression instead."; + } + + std::string decompressedStr(ds + 1, '\0'); int decompressedSize = - ZSTD_decompress((void*) (decompressedStr.c_str()), decompressedStr.size(), - data.c_str(), srcSize); + ZSTD_decompress(decompressedStr.data(), ds, data.c_str(), compressedSize); if (ZSTD_isError(decompressedSize)) { LOG(ERROR) << "ZSTD decompression failed: " << ZSTD_getErrorName(decompressedSize); @@ -338,7 +356,6 @@ std::shared_ptr RadixTree::Deserialize(std::string data) { line.pop_back(); continue; } - LOG(INFO) << "data line:" << line << std::endl; std::istringstream lineStream(line); std::string tokenListPart, timestampPart, dataPart, subTreeSizePart; @@ -357,7 +374,7 @@ std::shared_ptr RadixTree::Deserialize(std::string data) { } } if (!std::getline(lineStream, dataPart)) { - LOG(INFO) << "data length is 0"; + LOG(ERROR) << "data length is 0"; } std::istringstream keyStream(tokenListPart); @@ -371,17 +388,14 @@ std::shared_ptr RadixTree::Deserialize(std::string data) { if (isMainTree) { std::istringstream timestampStream(timestampPart); if (!(timestampStream >> std::hex >> timestamp)) { - LOG(INFO) << "Invalid timestamp format."; - throw std::runtime_error("Invalid timestamp format."); + LOG(ERROR) << "Invalid timestamp format."; } std::istringstream subTreeSizeStream(subTreeSizePart); uint32_t subTreeSize; if (!(subTreeSizeStream >> std::hex >> subTreeSize)) { - LOG(INFO) << "Invalid sub tree size format."; - throw std::runtime_error("Invalid sub tree size format."); + LOG(ERROR) << "Invalid sub tree size format."; } - LOG(INFO) << "Deserialize sub tree size:" << subTreeSize; subTreeSizeList.push_back(subTreeSize); } @@ -455,8 +469,8 @@ std::shared_ptr RadixTree::Deserialize(std::string data) { // TBD // check retval raxInsertAndReturnDataNode(radixTree->tree, insertTokensArray, - insertTokensArrayLen, data, (void**) &dataNode, - NULL); + insertTokensArrayLen, data, + reinterpret_cast(&dataNode), NULL); if (dataNode == NULL) { throw std::runtime_error("Insert token list failed"); @@ -517,7 +531,7 @@ std::vector> RadixTree::SplitInternal( treeData->dataLength = 0; subTreeRootNode->custom_data = treeData; header = std::make_shared( - (DataWrapper*) raxGetData(subTreeRootNode), treeData); + reinterpret_cast(raxGetData(subTreeRootNode)), treeData); return TraverseTreeWithoutSubTree(subTreeRootNode); } @@ -536,8 +550,8 @@ std::vector> RadixTree::TraverseTreeWithoutSubTree( LOG(INFO) << "data node list:" << dataNodeList.size(); for (size_t i = 0; i < dataNodeList.size(); i++) { nodes.push_back(std::make_shared( - (DataWrapper*) raxGetData(dataNodeList[i]), - (DataWrapper*) dataNodeList[i]->custom_data)); + reinterpret_cast(raxGetData(dataNodeList[i])), + reinterpret_cast(dataNodeList[i]->custom_data))); } return nodes; } @@ -555,8 +569,9 @@ void RadixTree::ClearSubtreeData(void* data) { std::shared_ptr RadixTree::GetRootNode() { raxNode* node = raxFindAndReturnDataNode(this->tree, rootToken.data(), rootToken.size(), NULL); - return std::make_shared((DataWrapper*) raxGetData(node), - (DataWrapper*) node->custom_data); + return std::make_shared( + reinterpret_cast(raxGetData(node)), + reinterpret_cast(node->custom_data)); } void RadixTree::MergeTree(std::shared_ptr tree_1, @@ -591,7 +606,8 @@ std::set RadixTree::GetAllNodeData() { if (node->isnull) { continue; } - nodeDataSet.insert(((DataWrapper*) raxGetData(node))->data); + nodeDataSet.insert( + (reinterpret_cast(raxGetData(node)))->data); } return nodeDataSet; -} \ No newline at end of file +} diff --git a/modules/kv-state-cache/radix-tree/radix-tree.h b/modules/kv-state-cache/radix-tree/radix-tree.h index 8a2d5a959..23774ed02 100644 --- a/modules/kv-state-cache/radix-tree/radix-tree.h +++ b/modules/kv-state-cache/radix-tree/radix-tree.h @@ -13,21 +13,22 @@ See the License for the specific language governing permissions and limitations under the License. */ -#ifndef RADIX_TREE_H -#define RADIX_TREE_H +#ifndef MODULES_KV_STATE_CACHE_RADIX_TREE_RADIX_TREE_H_ +#define MODULES_KV_STATE_CACHE_RADIX_TREE_RADIX_TREE_H_ -#include "radix.h" - -#include "common/util/base64.h" -#include "common/util/logging.h" +#include "kv-state-cache/radix-tree/radix.h" #include #include #include #include +#include #include -using namespace vineyard; +#include "common/util/base64.h" +#include "common/util/logging.h" + +using namespace vineyard; // NOLINT(build/namespaces) struct DataWrapper { void* data; @@ -75,7 +76,7 @@ class RadixTree : public std::enable_shared_from_this { std::vector tokens, std::shared_ptr& header); public: - RadixTree(int cacheCapacity); + RadixTree(int cacheCapacity); // NOLINT(runtime/explicit) ~RadixTree(); @@ -117,4 +118,4 @@ class RadixTree : public std::enable_shared_from_this { std::set GetAllNodeData(); }; -#endif +#endif // MODULES_KV_STATE_CACHE_RADIX_TREE_RADIX_TREE_H_" diff --git a/modules/kv-state-cache/radix-tree/radix.h b/modules/kv-state-cache/radix-tree/radix.h index 57da727cc..cbfebf2ba 100644 --- a/modules/kv-state-cache/radix-tree/radix.h +++ b/modules/kv-state-cache/radix-tree/radix.h @@ -33,13 +33,13 @@ #include #include -#include -#include -#include -#include #include +#include +#include +#include #include #include +#include /* Representation of a radix tree as implemented in this file, that contains * the token lists [1, 2, 3], [1, 2, 3, 4, 5, 6] and [1, 2, 3, 6, 7, 8] after @@ -110,9 +110,9 @@ typedef struct raxNode { uint32_t size : 26; /* Number of children, or compressed string len. */ uint32_t numnodes; /* Number of the child nodes */ uint32_t numele; /* Number of elements inside this node. */ - uint64_t timestamp; /* Timestamps of the node */ + uint64_t timestamp; /* Timestamps of the node */ uint32_t sub_tree_size; /* Number of nodes in the sub tree */ - void *custom_data; + void* custom_data; /* Data layout is as follows: * * If node is not compressed we have 'size' bytes, one for each children @@ -196,9 +196,10 @@ typedef struct raxIterator { size_t key_len; /* Current key length. */ size_t key_max; /* Max key len the current key buffer can hold. */ int key_static_tokens[RAX_ITER_STATIC_LEN]; - bool add_to_subtree_list; /* Whether to add the current node to the subtree list. */ - std::vector> *subtree_list; /* List of subtrees. */ - std::vector *subtree_data_list; /* List of subtrees' data. */ + bool add_to_subtree_list; /* Whether to add the current node to the subtree + list. */ + std::vector>* subtree_list; /* List of subtrees. */ + std::vector* subtree_data_list; /* List of subtrees' data. */ raxNode* node; /* Current node. Only for unsafe iteration. */ raxStack stack; /* Stack used for unsafe iteration. */ raxNodeCallback node_cb; /* Optional node callback. Normally set to NULL. */ @@ -209,19 +210,24 @@ extern void* raxNotFound; /* Exported API. */ rax* raxNew(void); -int raxInsert(rax *rax, int *s, size_t len, void *data, void **old, bool set_timestamp = true); +int raxInsert(rax* rax, int* s, size_t len, void* data, void** old, + bool set_timestamp = true); int raxTryInsert(rax* rax, int* s, size_t len, void* data, void** old); int raxInsertAndReturnDataNode(rax* rax, int* s, size_t len, void* data, void** node, void** old); -int raxRemove(rax* rax, int* s, size_t len, void** old, bool set_timestamp = true); +int raxRemove(rax* rax, int* s, size_t len, void** old, + bool set_timestamp = true); void* raxFind(rax* rax, int* s, size_t len); -raxNode* raxFindAndReturnDataNode(rax* rax, int* s, size_t len, raxNode** sub_tree_node = NULL, bool set_timestamp = true); -void raxSetSubtree(raxNode *n); -void raxSetSubtreeAllocated(raxNode *node); -void raxSetSubtreeNotNull(raxNode *node); -int raxFindNodeWithParent(rax *rax, int *s, size_t len, void **node, void **parent); +raxNode* raxFindAndReturnDataNode(rax* rax, int* s, size_t len, + raxNode** sub_tree_node = NULL, + bool set_timestamp = true); +void raxSetSubtree(raxNode* n); +void raxSetSubtreeAllocated(raxNode* node); +void raxSetSubtreeNotNull(raxNode* node); +int raxFindNodeWithParent(rax* rax, int* s, size_t len, void** node, + void** parent); void raxFree(rax* rax); -void raxFreeWithCallback(rax *rax, void (*free_callback)(raxNode *)); +void raxFreeWithCallback(rax* rax, void (*free_callback)(raxNode*)); void raxStart(raxIterator* it, rax* rt); int raxSeek(raxIterator* it, const char* op, int* ele, size_t len); int raxNext(raxIterator* it); @@ -232,25 +238,30 @@ void raxStop(raxIterator* it); int raxEOF(raxIterator* it); void raxShow(rax* rax); uint64_t raxSize(rax* rax); -void raxSetCustomData(raxNode *n, void *data); -void *raxGetCustomData(raxNode *n); +void raxSetCustomData(raxNode* n, void* data); +void* raxGetCustomData(raxNode* n); unsigned long raxTouch(raxNode* n); void raxSetDebugMsg(int onoff); void raxTraverse(raxNode* rax, std::vector>& dataNodeList); -void raxTraverseSubTree(raxNode* n, std::vector &dataNodeList); -raxNode *raxSplit(rax *rax, int *s, size_t len, std::vector& key); -void raxSerialize(rax* root, std::vector>& tokenList, std::vector& dataList, std::vector ×tampsList, - std::vector> *subtreeList, std::vector *subtreeNodeList); +void raxTraverseSubTree(raxNode* n, std::vector& dataNodeList); +raxNode* raxSplit(rax* rax, int* s, size_t len, std::vector& key); +void raxSerialize(rax* root, std::vector>& tokenList, + std::vector& dataList, + std::vector& timestampsList, + std::vector>* subtreeList, + std::vector* subtreeNodeList); /* Internal API. May be used by the node callback in order to access rax nodes * in a low level way, so this function is exported as well. */ -void raxSetData(raxNode *n, void *data); -void *raxGetData(raxNode *n); -int raxFindNode(rax *rax, int *s, size_t len, void **node); -void raxFindLastRecentNode(raxNode *node, std::vector& key); -void mergeTree(rax* first_tree, rax* second_tree, std::vector>& evicted_tokens, std::set>& insert_tokens, int max_node); -void testIteRax(rax *tree); +void raxSetData(raxNode* n, void* data); +void* raxGetData(raxNode* n); +int raxFindNode(rax* rax, int* s, size_t len, void** node); +void raxFindLastRecentNode(raxNode* node, std::vector& key); +void mergeTree(rax* first_tree, rax* second_tree, + std::vector>& evicted_tokens, + std::set>& insert_tokens, int max_node); +void testIteRax(rax* tree); raxNode* raxGetFirstChildPtr(raxNode* node); // raxNode *raxSetSubTreeAndReturnDataNode(rax *rax, int *s, size_t len); #endif diff --git a/modules/kv-state-cache/radix-tree/rax_malloc.h b/modules/kv-state-cache/radix-tree/rax_malloc.h index e9d5d5d7b..c62af522e 100644 --- a/modules/kv-state-cache/radix-tree/rax_malloc.h +++ b/modules/kv-state-cache/radix-tree/rax_malloc.h @@ -35,9 +35,10 @@ * the include of your alternate allocator if needed (not needed in order * to use the default libc allocator). */ -#ifndef RAX_ALLOC_H -#define RAX_ALLOC_H +#ifndef MODULES_KV_STATE_CACHE_RADIX_TREE_RAX_MALLOC_H_ +#define MODULES_KV_STATE_CACHE_RADIX_TREE_RAX_MALLOC_H_ #define rax_malloc malloc #define rax_realloc realloc #define rax_free free -#endif + +#endif // MODULES_KV_STATE_CACHE_RADIX_TREE_RAX_MALLOC_H_