Skip to content

Commit

Permalink
Refactor the GPU shared memory interfaces (#1585)
Browse files Browse the repository at this point in the history
Related issue number
--------------------

Fixes #1582, refactor the CMake script to minimize the dependency set as well.

Signed-off-by: Tao He <[email protected]>
  • Loading branch information
sighingnow authored Sep 29, 2023
1 parent d79f2e1 commit dc44bbd
Show file tree
Hide file tree
Showing 31 changed files with 592 additions and 832 deletions.
89 changes: 56 additions & 33 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ if(POLICY CMP0069)
cmake_policy(SET CMP0069 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
endif()
if(POLICY CMP0104)
cmake_policy(SET CMP0104 OLD)
set(CMAKE_POLICY_DEFAULT_CMP0104 OLD)
endif()
if(POLICY CMP0146)
cmake_policy(SET CMP0146 OLD)
set(CMAKE_POLICY_DEFAULT_CMP0146 OLD)
endif()

project(vineyard LANGUAGES C CXX VERSION ${VINEYARD_VERSION})

Expand All @@ -32,10 +40,12 @@ option(VINEYARD_USE_LTO "Using IPO/LTO support for link-time optimization" OFF)
option(USE_LIBUNWIND "Using libunwind to retrieve the stack backtrace when exception occurs" ON)
option(USE_INCLUDE_WHAT_YOU_USE "Simply the intra-module dependencies with iwyu" OFF)
option(USE_JSON_DIAGNOSTICS "Using json diagnostics to check the validity of metadata" OFF)
option(USE_GPU "Using gpu support" OFF)
option(USE_CUDA "Enabling GPU (CUDA) support" OFF)

option(BUILD_VINEYARD_SERVER "Build vineyard's server" ON)
option(BUILD_VINEYARD_SERVER_REDIS "Enable redis as the metadata backend" OFF)
option(BUILD_VINEYARD_SERVER_ETCD "Enable redis as the metadata backend" ON)
option(BUILD_VINEYARD_SERVER_SPILLING "Enable spilling functionalities in vineyardd" ON)
option(BUILD_VINEYARD_CLIENT "Build vineyard's client" ON)
option(BUILD_VINEYARD_CLIENT_VERBOSE "Build vineyard's client with the most verbose logs" OFF)
option(BUILD_VINEYARD_PYTHON_BINDINGS "Build vineyard's python bindings" ON)
Expand Down Expand Up @@ -310,7 +320,7 @@ macro(find_boost)
set(Boost_USE_STATIC_LIBS ON CACHE BOOL "Use static version of boost libraries.")
set(Boost_USE_STATIC_RUNTIME ON CACHE BOOL "Use static version of boost runtimes.")
endif()
find_package(Boost COMPONENTS system)
find_package(Boost COMPONENTS system filesystem)
# Make boost::property_tree thread safe.
add_compile_options(-DBOOST_SPIRIT_THREADSAFE)
# Don't depends on the boost::system library.
Expand Down Expand Up @@ -473,11 +483,9 @@ macro(find_pthread)
endmacro()

macro(find_cuda)
#find cuda
enable_language(CUDA)
include(CheckLanguage)
check_language(CUDA)
find_package(CUDA REQUIRED)
# find cuda runtime library
set(CUDA_USE_STATIC_CUDA_RUNTIME ON)
find_package(CUDA REQUIRED)
endmacro(find_cuda)

macro(find_zstd)
Expand Down Expand Up @@ -649,16 +657,23 @@ endif()

# build vineyardd
if(BUILD_VINEYARD_SERVER)
find_apache_arrow()
find_etcd_cpp_apiv3()
find_gflags()
find_mimalloc()
find_openssl_libraries(REQUIRED)

if(BUILD_VINEYARD_SERVER_REDIS)
find_hiredis()
find_redis_plus_plus()
endif()
if(BUILD_VINEYARD_SERVER_ETCD)
find_etcd_cpp_apiv3()
find_openssl_libraries(REQUIRED)
endif()
if(BUILD_VINEYARD_SERVER_SPILLING)
find_apache_arrow()
endif()
if(USE_CUDA)
find_cuda()
endif()

file(GLOB_RECURSE SERVER_SRC_FILES "src/server/*.cc"
"src/common/compression/*.cc"
Expand All @@ -674,32 +689,13 @@ if(BUILD_VINEYARD_SERVER)
target_compile_options(vineyardd PRIVATE -DBUILD_VINEYARDD)
target_link_libraries(vineyardd PRIVATE ${Boost_LIBRARIES}
${CMAKE_DL_LIBS}
${CPPREST_LIB}
${ETCD_CPP_LIBRARIES}
${GFLAGS_LIBRARIES}
${GLOG_LIBRARIES}
${OPENSSL_LIBRARIES}
libzstd_static
mimalloc-static
)
target_include_directories(vineyardd PRIVATE ${ARROW_INCLUDE_DIR}
${GFLAGS_INCLUDE_DIR}
)
if(ARROW_SHARED_LIB)
target_link_libraries(vineyardd PRIVATE ${ARROW_SHARED_LIB})
else()
target_link_libraries(vineyardd PRIVATE ${ARROW_STATIC_LIB})
endif()

if(USE_GPU)
enable_language(CUDA)
find_package(CUDA REQUIRED)
add_definitions(-DENABLE_GPU)
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
target_link_libraries(vineyardd PUBLIC ${CUDA_LIBRARIES})
endif()
target_include_directories(vineyardd PRIVATE ${GFLAGS_INCLUDE_DIR})

target_include_directories(vineyardd PRIVATE ${ETCD_CPP_INCLUDE_DIR})
if(BUILD_VINEYARD_SERVER_REDIS)
target_compile_options(vineyardd PRIVATE -DBUILD_VINEYARDD_REDIS)
target_include_directories(vineyardd PRIVATE ${HIREDIS_INCLUDE_DIR}
Expand All @@ -709,6 +705,29 @@ if(BUILD_VINEYARD_SERVER)
${REDIS_PLUS_PLUS_LIBRARIES}
)
endif()
if(BUILD_VINEYARD_SERVER_ETCD)
target_compile_options(vineyardd PRIVATE -DBUILD_VINEYARDD_ETCD)
target_include_directories(vineyardd PRIVATE ${ETCD_CPP_INCLUDE_DIR})
target_link_libraries(vineyardd PRIVATE ${CPPREST_LIB}
${ETCD_CPP_LIBRARIES}
${OPENSSL_LIBRARIES}
)
endif()
if(BUILD_VINEYARD_SERVER_SPILLING)
target_compile_options(vineyardd PRIVATE -DBUILD_VINEYARDD_SPILLING)
target_include_directories(vineyardd PRIVATE ${ARROW_INCLUDE_DIR})
if(ARROW_SHARED_LIB)
target_link_libraries(vineyardd PRIVATE ${ARROW_SHARED_LIB})
else()
target_link_libraries(vineyardd PRIVATE ${ARROW_STATIC_LIB})
endif()
endif()

if(USE_CUDA)
target_compile_options(vineyardd PRIVATE -DENABLE_CUDA)
target_include_directories(vineyardd PRIVATE ${CUDA_INCLUDE_DIRS})
target_link_libraries(vineyardd PRIVATE ${CUDA_LIBRARIES})
endif()

if(${LIBUNWIND_FOUND})
target_link_libraries(vineyardd PRIVATE ${LIBUNWIND_LIBRARIES})
Expand Down Expand Up @@ -771,9 +790,13 @@ if(BUILD_VINEYARD_CLIENT)
Threads::Threads
)
target_link_libraries(vineyard_client PRIVATE libzstd_static)
if(USE_GPU)
find_package(CUDA REQUIRED)
target_link_libraries(vineyard_client PUBLIC ${CUDA_LIBRARIES})

if(USE_CUDA)
find_cuda()

target_compile_options(vineyard_client PRIVATE -DENABLE_CUDA)
target_include_directories(vineyard_client PRIVATE ${CUDA_INCLUDE_DIRS})
target_link_libraries(vineyard_client PRIVATE ${CUDA_LIBRARIES})
endif()

# make sure `vineyard_internal_registry` been built.
Expand Down
200 changes: 43 additions & 157 deletions docs/tutorials/data-processing/gpu-memory-sharing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,8 @@ kinds of implementation (GPU, PPU, etc.) as the abstraction to share GPU memory
between different processes, as well as sharing memory between the host and
device.

The unified memory abstractions provides the following utilities:

- Accessing the memory from GPU or CPU using an unified abstraction: the
:code:`GPUData()` and :code:`CPUData()`;
- Synchronizing the memory between GPU and CPU: the :code:`syncFromCPU()` and
:code:`syncFromGPU()`.
The unified memory abstraction is able to automatically synchronize the memory
between host and devices by leverage the RAII mechanism of C++.

Example
~~~~~~~
Expand All @@ -45,48 +41,66 @@ Example

ObjectID object_id;
Payload object;
std::shared_ptr<GPUUnifiedAddress> gua = nullptr;
RETURN_ON_ERROR(client.CreateGPUBuffer(data_size(), object_id, object, gua));
std::shared_ptr<MutableBuffer> buffer = nullptr;
RETURN_ON_ERROR(client.CreateGPUBuffer(data_size(), object_id, object, buffer));

CHECK(!buffer->is_cpu());
CHECK(buffer->is_mutable());

- Write data to the GPU buffer:
The result buffer's data :code:`buffer->mutable_data()` is a GPU memory pointer,
which can be directly passed to GPU kernels, e.g.,

.. code:: c++

void* gpu_ptr = nullptr;
RETURN_ON_ERROR(gua->GPUData(&gpu_ptr));
cudaMemcpy(gpu_ptr, data, data_size(), cudaMemcpyHostToDevice);
printKernel<<<1, 1>>>(buffer->data());

or, copy to CPU memory first and then synchronize to GPU memory (like CUDA unified memory):
- Composing the buffer content from host code like Unified Memory:

.. code:: c++

void* cpu_ptr = nullptr;
RETURN_ON_ERROR(gua->CPUData(&cpu_ptr));
memcpy(cpu_ptr, data, data_size());
RETURN_ON_ERROR(gua->syncFromCPU());
{
CUDABufferMirror mirror(*buffer, false);
memcpy(mirror.mutable_data(), "hello world", 12);
}
Here the :code:`mirror`'s :code:`data()` and :code:`mutable_data()` are host memory pointers
allocated using the :code:`cudaHostAlloc()` API. When :code:`CUDABufferMirror` destructing,
the host memory will be copied back to the GPU memory automatically.

The second argument of :code:`CUDABufferMirror` indicates whether the initial memory of the
GPU buffer needs to be copied to the host memory. Defaults to :code:`false`.

- Accessing the GPU buffer from another process:

.. code:: c++

ObjectID object_id = ...;
std::shared_ptr<GPUUnifiedAddress> gua = nullptr;
RETURN_ON_ERROR(client.GetGPUBuffer(object_id, true, gua));
std::shared_ptr<Buffer> buffer = nullptr;
RETURN_ON_ERROR(client.GetGPUBuffer(object_id, true, buffer));
CHECK(!buffer->is_cpu());
CHECK(!buffer->is_mutable());

void* gpu_ptr = nullptr;
RETURN_ON_ERROR(gua->GPUData(&gpu_ptr));
The result buffer's data :code:`buffer->data()` is a GPU memory pointer, which can be directly
passed to GPU kernels, e.g.,

.. code:: c++

printKernel<<<1, 1>>>(buffer->data());

- Accessing the shared GPU buffer from CPU:

.. code:: c++

ObjectID object_id = ...;
std::shared_ptr<GPUUnifiedAddress> gua = nullptr;
RETURN_ON_ERROR(client.GetGPUBuffer(object_id, true, gua));
{
CUDABufferMirror mirror(*buffer, true);
printf("CPU data from GPU is: %s\n",
reinterpret_cast<const char*>(mirror.data()));
}

void* cpu_ptr = nullptr;
RETURN_ON_ERROR(gua->CPUData(&cpu_ptr));
RETURN_ON_ERROR(gua->syncFromGPU());
Using the :code:`CUDABufferMirror` to access the GPU buffer from CPU, the mirror's :code:`data()`
is a host memory pointer allocated using the :code:`cudaHostAlloc()` API. For immutable :code:`Buffer`,
the second argument of :code:`CUDABufferMirror` must be :code:`true`, and the GPU memory will be
copied to the host memory when the mirror is constructed.

- Freeing the shared GPU buffer:

Expand All @@ -95,133 +109,5 @@ Example
ObjectID object_id = ...;
RETURN_ON_ERROR(client.DelData(object_id));

:code:`UnifiedMemory` APIs
~~~~~~~~~~~~~~~~~~~~~~~~~~

The complete :code:`UnifiedMemory` APIs are defined as:

.. code:: c++

class GPUUnifiedAddress {
/**
* @brief get the cpu memry address
*
* @param ptr the return cpu data address
* @return GUAError_t the error type
*/
GUAError_t CPUData(void** ptr);

/**
* @brief get the gpu memory address
*
* @param ptr the return gpu data address
* @return GUAError_t the error type
*/
GUAError_t GPUData(void** ptr);

/**
* @brief sync data from GPU related to this gua
*
* @return GUAError_t the error type
*/
GUAError_t syncFromCPU();
/**
* @brief sync data from CPU related to this gua
*
* @return GUAError_t the error type
*/
GUAError_t syncFromGPU();
/**
* @brief Malloc memory related to this gua if needed.
*
* @param size the memory size to be allocated
* @param ptr the memory address on cpu or GPU
* @param is_GPU allocate on GPU
* @return GUAError_t the error type
*/
GUAError_t ManagedMalloc(size_t size, void** ptr, bool is_GPU = false);
/**
* @brief Free the memory
*
*/
void ManagedFree();
/**
* @brief GUA to json
*
*/
void GUAToJSON();
/**
* @brief Get the Ipc Handle object
*
* @param handle the returned handle
* @return GUAError_t the error type
*/
GUAError_t getIpcHandle(cudaIpcMemHandle_t& handle);
/**
* @brief Set the IpcHandle of this GUA
*
* @param handle
*/
void setIpcHandle(cudaIpcMemHandle_t handle);
/**
* @brief Get the IpcHandle of this GUA as vector
*
* @return std::vector<int64_t>
*/
std::vector<int64_t> getIpcHandleVec();
/**
* @brief Set the IpcHandle vector of this GUA
*
* @param handle_vec
*/
void setIpcHandleVec(std::vector<int64_t> handle_vec);
/**
* @brief Set the GPU Mem Ptr object
*
* @param ptr
*/
void setGPUMemPtr(void* ptr);

/**
* @brief return the GPU memory pointer
*
* @return void* the GPU-side memory address
*/
void* getGPUMemPtr();
/**
* @brief Set the Cpu Mem Ptr object
*
* @param ptr
*/
void setCPUMemPtr(void* ptr);

/**
* @brief Get the Cpu Mem Ptr object
*
* @return void*
*/
void* getCPUMemPtr();

/**
* @brief Get the Size object
*
* @return int64_t
*/
int64_t getSize();
/**
* @brief Set the Size object
*
* @param data_size
*/
void setSize(int64_t data_size);
};
For complete example about GPU memory sharing, please refer to
`gpumalloc_test.cu <https://github.com/v6d-io/v6d/blob/main/test/gpumalloc_test.cu>`_
Loading

0 comments on commit dc44bbd

Please sign in to comment.