From 36f70980a330855b9586cbbee8bbbc6382582dea Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Tue, 31 Dec 2024 14:54:21 +0000 Subject: [PATCH] chore: clean up cudaLaunchKernel --- codegen/manual_client.cpp | 54 ++++++++++++--------------------------- 1 file changed, 17 insertions(+), 37 deletions(-) diff --git a/codegen/manual_client.cpp b/codegen/manual_client.cpp index a47cb8c..64a7aae 100644 --- a/codegen/manual_client.cpp +++ b/codegen/manual_client.cpp @@ -329,34 +329,20 @@ cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, size_t sharedMem, cudaStream_t stream) { cudaError_t return_value; + cudaError_t memcpy_return; - cuda_memcpy_unified_ptrs(0, cudaMemcpyHostToDevice); + memcpy_return = cuda_memcpy_unified_ptrs(0, cudaMemcpyHostToDevice); + if (memcpy_return != cudaSuccess) + return memcpy_return; // Start the RPC request int request_id = rpc_start_request(0, RPC_cudaLaunchKernel); - if (request_id < 0) { - return cudaErrorDevicesUnavailable; - } - - if (rpc_write(0, &func, sizeof(const void *)) < 0) { - return cudaErrorDevicesUnavailable; - } - - if (rpc_write(0, &gridDim, sizeof(dim3)) < 0) { - return cudaErrorDevicesUnavailable; - } - - if (rpc_write(0, &blockDim, sizeof(dim3)) < 0) { - return cudaErrorDevicesUnavailable; - } - - if (rpc_write(0, &sharedMem, sizeof(size_t)) < 0) { - return cudaErrorDevicesUnavailable; - } - - if (rpc_write(0, &stream, sizeof(cudaStream_t)) < 0) { + if (request_id < 0 || rpc_write(0, &func, sizeof(const void *)) < 0 || + rpc_write(0, &gridDim, sizeof(dim3)) < 0 || + rpc_write(0, &blockDim, sizeof(dim3)) < 0 || + rpc_write(0, &sharedMem, sizeof(size_t)) < 0 || + rpc_write(0, &stream, sizeof(cudaStream_t)) < 0) return cudaErrorDevicesUnavailable; - } Function *f = nullptr; for (auto &function : functions) @@ -372,15 +358,13 @@ cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, return cudaErrorDevicesUnavailable; } - if (rpc_wait_for_response(0) < 0) { - return cudaErrorDevicesUnavailable; - } - - if (rpc_end_response(0, &return_value) < 0) { + if (rpc_wait_for_response(0) < 0 || rpc_end_response(0, &return_value) < 0) { return cudaErrorDevicesUnavailable; } - cuda_memcpy_unified_ptrs(0, cudaMemcpyDeviceToHost); + memcpy_return = cuda_memcpy_unified_ptrs(0, cudaMemcpyDeviceToHost); + if (memcpy_return != cudaSuccess) + return memcpy_return; return return_value; } @@ -436,8 +420,6 @@ void parse_ptx_string(void *fatCubin, const char *ptx_string, i++; if (ptx_string[i] == '(') { - std::cout << "found function args" << std::endl; - // parse out the args-list for (; arg_count < MAX_ARGS; arg_count++) { int arg_size = 0; @@ -463,7 +445,6 @@ void parse_ptx_string(void *fatCubin, const char *ptx_string, i++; if (ptx_string[i] == '.') { - std::cout << "found arg type" << std::endl; // read the type, ignoring if it's not a valid type int type_size = get_type_size(ptx_string + (++i)); if (type_size == 0) @@ -673,10 +654,10 @@ extern "C" void __cudaRegisterFunction(void **fatCubinHandle, function.host_func = hostFun; } -extern "C" { -void __cudaRegisterVar(void **fatCubinHandle, char *hostVar, - char *deviceAddress, const char *deviceName, int ext, - size_t size, int constant, int global) { +extern "C" void __cudaRegisterVar(void **fatCubinHandle, char *hostVar, + char *deviceAddress, const char *deviceName, + int ext, size_t size, int constant, + int global) { void *return_value; std::cout << "calling __cudaRegisterVar" << std::endl; @@ -754,7 +735,6 @@ void __cudaRegisterVar(void **fatCubinHandle, char *hostVar, return; } } -} cudaError_t cudaFree(void *devPtr) { cudaError_t return_value;