Skip to content

Commit

Permalink
Enhance the implementation of llm cache (#1913)
Browse files Browse the repository at this point in the history
Signed-off-by: Tao He <[email protected]>
  • Loading branch information
sighingnow authored Jun 18, 2024
1 parent 7a29347 commit 3ddde84
Show file tree
Hide file tree
Showing 37 changed files with 1,677 additions and 1,490 deletions.
96 changes: 35 additions & 61 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -856,49 +856,54 @@ if(BUILD_VINEYARD_CLIENT)
list(APPEND VINEYARD_INSTALL_LIBS vineyard_client)
endif()

if(BUILD_VINEYARD_PYTHON_BINDINGS)
if (BUILD_VINEYARD_PYTHON_BINDINGS)
set(PYBIND11_PYTHON_VERSION 3)
if(NOT (CMAKE_VERSION VERSION_LESS "3.27"))
set(PYBIND11_FINDPYTHON ON)
endif()
add_subdirectory_static(thirdparty/pybind11)
set(PYTHON_BIND_FILES "python/client.cc"
"python/core.cc"
"python/error.cc"
"python/pybind11_docs.cc"
"python/pybind11_utils.cc"
"python/vineyard.cc")
pybind11_add_module(_C MODULE ${PYTHON_BIND_FILES})
target_add_debuginfo(_C)
target_link_libraries(_C PRIVATE vineyard_client)
target_include_directories(_C PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/pybind11/include")
target_compile_options(_C PRIVATE -Wno-unused-value)
set_target_properties(_C PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/shared-lib")
endif()

macro(setup_pybind11_module target relpath)
target_add_debuginfo(${target})
target_link_libraries(${target} PRIVATE vineyard_client)
target_include_directories(${target} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/pybind11/include")
target_compile_options(${target} PRIVATE -Wno-unused-value)
set_target_properties(${target} PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/shared-lib")
if(UNIX AND NOT APPLE)
target_add_link_options(_C PRIVATE OPTIONS -Wl,--exclude-libs=ALL)
target_add_link_options(${target} PRIVATE OPTIONS -Wl,--exclude-libs=ALL)
endif()
if(BUILD_VINEYARD_PYPI_PACKAGES AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
target_compile_options(_C PRIVATE -static)
target_add_link_options(_C PRIVATE OPTIONS -static)
target_compile_options(${target} PRIVATE -static)
target_add_link_options(${target} PRIVATE OPTIONS -static)
else()
target_compile_options(_C PRIVATE -Os)
target_add_link_options(_C PRIVATE OPTIONS -Os)
target_compile_options(${target} PRIVATE -Os)
target_add_link_options(${target} PRIVATE OPTIONS -Os)
endif()

file(RELATIVE_PATH RELATIVE_BUILD_PATH "${PROJECT_SOURCE_DIR}/python/vineyard" "${CMAKE_BINARY_DIR}/shared-lib")
if(UNIX AND NOT APPLE)
set_target_properties(_C PROPERTIES
set_target_properties(${target} PROPERTIES
BUILD_WITH_INSTALL_RPATH TRUE
INSTALL_RPATH_USE_LINK_PATH TRUE
INSTALL_RPATH ".:\$ORIGIN:\$ORIGIN/${RELATIVE_BUILD_PATH}/:${CMAKE_INSTALL_PREFIX}/lib:${CMAKE_INSTALL_PREFIX}/lib64:${INSTALL_RPATH}")
INSTALL_RPATH ".:\$ORIGIN:\$ORIGIN/${relpath}/:${CMAKE_INSTALL_PREFIX}/lib:${CMAKE_INSTALL_PREFIX}/lib64:${INSTALL_RPATH}")
endif()
if(APPLE)
set_target_properties(_C PROPERTIES
set_target_properties(${target} PROPERTIES
BUILD_WITH_INSTALL_RPATH TRUE
INSTALL_RPATH_USE_LINK_PATH TRUE
INSTALL_RPATH ".;@loader_path;@loader_path/${RELATIVE_BUILD_PATH}/;${CMAKE_INSTALL_PREFIX}/lib;${CMAKE_INSTALL_PREFIX}/lib64;${INSTALL_RPATH}")
INSTALL_RPATH ".;@loader_path;@loader_path/${relpath}/;${CMAKE_INSTALL_PREFIX}/lib;${CMAKE_INSTALL_PREFIX}/lib64;${INSTALL_RPATH}")
endif()
endmacro()

if(BUILD_VINEYARD_PYTHON_BINDINGS)
pybind11_add_module(_C MODULE "python/client.cc"
"python/core.cc"
"python/error.cc"
"python/pybind11_docs.cc"
"python/pybind11_utils.cc"
"python/vineyard.cc")
file(RELATIVE_PATH RELATIVE_BUILD_PATH "${PROJECT_SOURCE_DIR}/python/vineyard" "${CMAKE_BINARY_DIR}/shared-lib")
setup_pybind11_module(_C ${RELATIVE_BUILD_PATH})
add_custom_target(vineyard_client_python
ALL
COMMAND cp "$<TARGET_FILE:_C>" "${PROJECT_SOURCE_DIR}/python/vineyard/"
Expand All @@ -909,47 +914,16 @@ if(BUILD_VINEYARD_PYTHON_BINDINGS)
endif()

if(BUILD_VINEYARD_PYTHON_BINDINGS AND BUILD_VINEYARD_LLM_CACHE)
set(PYBIND11_PYTHON_VERSION 3)
if(NOT (CMAKE_VERSION VERSION_LESS "3.27"))
set(PYBIND11_FINDPYTHON ON)
endif()
file(GLOB PYTHON_BIND_FILES "python/vineyard/llm/kv_state_cache.cc")
pybind11_add_module(llm_C MODULE ${PYTHON_BIND_FILES})
# make sure `vineyard_llm_cache` been built.
add_dependencies(llm_C vineyard_llm_cache)
target_link_libraries(llm_C PRIVATE vineyard_client vineyard_llm_cache)
target_include_directories(llm_C PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/pybind11/include")
target_compile_options(llm_C PRIVATE -Wno-unused-value)
set_target_properties(llm_C PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/shared-lib")
if(UNIX AND NOT APPLE)
target_add_link_options(llm_C PRIVATE OPTIONS -Wl,--exclude-libs=ALL)
endif()
if(BUILD_VINEYARD_PYPI_PACKAGES AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
target_compile_options(llm_C PRIVATE -static)
target_add_link_options(llm_C PRIVATE OPTIONS -static)
else()
target_compile_options(llm_C PRIVATE -Os)
target_add_link_options(llm_C PRIVATE OPTIONS -Os)
endif()

pybind11_add_module(_llm_C MODULE "python/vineyard/llm/cache.cc")
file(RELATIVE_PATH RELATIVE_BUILD_PATH "${PROJECT_SOURCE_DIR}/python/vineyard/llm" "${CMAKE_BINARY_DIR}/shared-lib")
if(UNIX AND NOT APPLE)
set_target_properties(llm_C PROPERTIES
BUILD_WITH_INSTALL_RPATH TRUE
INSTALL_RPATH_USE_LINK_PATH TRUE
INSTALL_RPATH ".:\$ORIGIN:\$ORIGIN/${RELATIVE_BUILD_PATH}/:${CMAKE_INSTALL_PREFIX}/lib:${CMAKE_INSTALL_PREFIX}/lib64:${INSTALL_RPATH}")
endif()
if(APPLE)
set_target_properties(llm_C PROPERTIES
BUILD_WITH_INSTALL_RPATH TRUE
INSTALL_RPATH_USE_LINK_PATH TRUE
INSTALL_RPATH ".;@loader_path;@loader_path/${RELATIVE_BUILD_PATH}/;${CMAKE_INSTALL_PREFIX}/lib;${CMAKE_INSTALL_PREFIX}/lib64;${INSTALL_RPATH}")
endif()

setup_pybind11_module(_llm_C ${RELATIVE_BUILD_PATH})
# make sure `vineyard_llm_cache` been built.
add_dependencies(_llm_C vineyard_llm_cache)
target_link_libraries(_llm_C PRIVATE vineyard_client vineyard_llm_cache)
add_custom_target(vineyard_llm_python
ALL
COMMAND cp "$<TARGET_FILE:llm_C>" "${PROJECT_SOURCE_DIR}/python/vineyard/llm/"
DEPENDS llm_C
COMMAND cp "$<TARGET_FILE:_llm_C>" "${PROJECT_SOURCE_DIR}/python/vineyard/llm/"
DEPENDS _llm_C
COMMENT "Copying llm kv cache python extensions."
VERBATIM)
add_dependencies(vineyard_llm_python vineyard_client_python)
Expand Down
43 changes: 37 additions & 6 deletions modules/basic/ds/arrow_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ limitations under the License.
#include "arrow/api.h"
#include "arrow/compute/api.h"
#include "arrow/ipc/api.h" // IWYU pragma: keep
#include "boost/algorithm/string/classification.hpp"
#include "boost/algorithm/string/join.hpp"
#include "boost/algorithm/string/split.hpp"

#include "client/ds/blob.h"
#include "client/ds/remote_blob.h"
Expand All @@ -34,6 +31,41 @@ limitations under the License.

namespace vineyard {

namespace detail {

static inline std::string string_join(std::vector<std::string> const& srcs,
std::string const& sep) {
std::stringstream ss;
if (!srcs.empty()) {
ss << srcs[0];
for (size_t i = 1; i < srcs.size(); ++i) {
ss << sep << srcs[i];
}
}
return ss.str();
}

static inline void string_split(std::vector<std::string>& rs,
std::string const& content,
std::string const& patterns) {
size_t i = 0, k = 0;
while (i < content.size()) {
while (k < content.size()) {
char c = content[k];
if (patterns.find_first_of(c) != std::string::npos) {
break;
}
k += 1;
}
if (i < k) {
rs.emplace_back(content.substr(i, k - i));
}
i = k;
}
}

} // namespace detail

std::shared_ptr<arrow::DataType> FromAnyType(AnyType type) {
switch (type) {
case AnyType::Undefined:
Expand Down Expand Up @@ -1439,7 +1471,7 @@ Status ConsolidateColumns(const std::shared_ptr<arrow::Table>& table,
std::string const& consolidated_column_name,
std::shared_ptr<arrow::Table>& out) {
// check the types of columns that will be consolidated
std::string column_names_joined = boost::algorithm::join(column_names, ",");
std::string column_names_joined = detail::string_join(column_names, ",");
auto schema = table->schema();
std::shared_ptr<arrow::DataType> dtype = nullptr;
std::vector<int> column_indexes;
Expand Down Expand Up @@ -1514,8 +1546,7 @@ Status ConsolidateColumns(const std::shared_ptr<arrow::Table>& table,
}

std::vector<std::string> consolidate_columns_vec;
boost::algorithm::split(consolidate_columns_vec, consolidate_columns,
boost::is_any_of(",;"));
detail::string_split(consolidate_columns_vec, consolidate_columns, ",;");
return ConsolidateColumns(table, consolidate_columns_vec, "", out);
}

Expand Down
7 changes: 3 additions & 4 deletions modules/llm-cache/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,9 @@ target_link_libraries(vineyard_llm_cache PRIVATE libzstd_static ${GLOG_LIBRARIES
target_link_libraries(vineyard_llm_cache PUBLIC vineyard_client)

# install bundled thirdparty: rax and MurmurHash3
install(DIRECTORY
${PROJECT_SOURCE_DIR}/thirdparty/rax
${PROJECT_SOURCE_DIR}/thirdparty/MurmurHash3
${PROJECT_SOURCE_DIR}/thirdparty/cityhash
install(DIRECTORY ${PROJECT_SOURCE_DIR}/thirdparty/rax
${PROJECT_SOURCE_DIR}/thirdparty/MurmurHash3
${PROJECT_SOURCE_DIR}/thirdparty/cityhash
DESTINATION include/vineyard/contrib # target directory
FILES_MATCHING # install only matched files
PATTERN "*.h" # select header files
Expand Down
16 changes: 8 additions & 8 deletions modules/llm-cache/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ In this section, we will compare the two methods in terms of latency and suitabl

## Usage

We provide [C++](https://github.com/v6d-io/v6d/blob/main/modules/llm-cache/ds/kv_state_cache_manager.h) and [Python](https://github.com/v6d-io/v6d/blob/main/python/vineyard/llm/__init__.py) APIs for Vineyard LLM KV Cache. Based on the inference framework, you can use the corresponding API to integrate the Vineyard LLM KV Cache.
We provide [C++](https://github.com/v6d-io/v6d/blob/main/modules/llm-cache/ds/kv_cache_manager.h) and [Python](https://github.com/v6d-io/v6d/blob/main/python/vineyard/llm/__init__.py) APIs for Vineyard LLM KV Cache. Based on the inference framework, you can use the corresponding API to integrate the Vineyard LLM KV Cache.

### C++ API

Expand Down Expand Up @@ -90,10 +90,10 @@ $ ./build/bin/vineyardd --socket /tmp/vineyard_test.sock
Then open another terminal to run the vineyard llm kv cache test.

```bash
$ ./bin/kv_state_cache_test --client-num 1 --vineyard-ipc-sockets /tmp/vineyard_test.sock
$ ./bin/kv_cache_test --client-num 1 --vineyard-ipc-sockets /tmp/vineyard_test.sock
```

For more information about how to use the C++ API, you can refer to the the [C++ API implementation](https://github.com/v6d-io/v6d/blob/main/modules/llm-cache/ds/kv_state_cache_manager.cc) and the [related tests](https://github.com/v6d-io/v6d/tree/main/modules/llm-cache/tests).
For more information about how to use the C++ API, you can refer to the the [C++ API implementation](https://github.com/v6d-io/v6d/blob/main/modules/llm-cache/ds/kv_cache_manager.cc) and the [related tests](https://github.com/v6d-io/v6d/tree/main/modules/llm-cache/tests).


### Python API
Expand Down Expand Up @@ -165,8 +165,8 @@ vineyard_cache_config = VineyardCacheConfig(
)
cache = KVCache(
cache_config=vineyard_cache_config,
tensor_bytes=16, # should be the same as the nbytes of the tensor
cache_capacity=10,
tensor_nbytes=16, # should be the same as the nbytes of the tensor
cache_capacity=1024,
layer=2,
)

Expand Down Expand Up @@ -248,13 +248,13 @@ from vineyard.llm.config import VineyardCacheConfig

file_cache_config = FileCacheConfig(
chunk_size=2,
split_number=2,
hash_chunk_size=2,
root="/tmp/vineyard/llm_cache",
)
cache = KVCache(
cache_config=file_cache_config,
tensor_bytes=16, # should be the same as the nbytes of the tensor
cache_capacity=10,
tensor_nbytes=16, # should be the same as the nbytes of the tensor
cache_capacity=1024,
layer=2,
)

Expand Down
22 changes: 11 additions & 11 deletions modules/llm-cache/ds/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,32 +49,32 @@ struct VineyardCacheConfig : public KVCacheConfig {
};

struct FileCacheConfig : public KVCacheConfig {
int batchSize;
int splitNumber;
int chunkSize;
int hashChunkSize;
std::string root;
FilesystemType filesystemType;
int clientGCInterval; // second
int ttl; // second
int gcInterval; // in seconds
int ttl; // in seconds
bool enbaleGlobalGC;
int globalGCInterval; // second
int globalTTL; // second
int globalGCInterval; // in seconds
int globalTTL; // in seconds

// Default gc interval is 30 minutes and default global gc interval is 3
// hours.
FileCacheConfig(int tensorByte = 10, int cacheCapacity = 10, int layer = 1,
int batchSize = 4, int splitNumber = 2,
int chunkSize = 4, int hashChunkSize = 2,
std::string root = "/tmp/llm_cache/",
FilesystemType filesystemType = LOCAL,
int clientGCInterval = 30 * 60, int ttl = 30 * 60,
int gcInterval = 30 * 60, int ttl = 30 * 60,
bool enbaleGlobalGC = false,
int globalGCInterval = 3 * 60 * 60,
int globalTTL = 3 * 60 * 60)
: KVCacheConfig{tensorByte, cacheCapacity, layer} {
this->root = root;
this->batchSize = batchSize;
this->splitNumber = splitNumber;
this->chunkSize = chunkSize;
this->hashChunkSize = hashChunkSize;
this->filesystemType = filesystemType;
this->clientGCInterval = clientGCInterval;
this->gcInterval = gcInterval;
this->ttl = ttl;
this->enbaleGlobalGC = enbaleGlobalGC;
this->globalGCInterval = globalGCInterval;
Expand Down
Loading

0 comments on commit 3ddde84

Please sign in to comment.