Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: clean up cudaLaunchKernel #70

Merged
merged 1 commit into from
Dec 31, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 17 additions & 37 deletions codegen/manual_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,34 +329,20 @@ cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
void **args, size_t sharedMem,
cudaStream_t stream) {
cudaError_t return_value;
cudaError_t memcpy_return;

cuda_memcpy_unified_ptrs(0, cudaMemcpyHostToDevice);
memcpy_return = cuda_memcpy_unified_ptrs(0, cudaMemcpyHostToDevice);
if (memcpy_return != cudaSuccess)
return memcpy_return;

// Start the RPC request
int request_id = rpc_start_request(0, RPC_cudaLaunchKernel);
if (request_id < 0) {
return cudaErrorDevicesUnavailable;
}

if (rpc_write(0, &func, sizeof(const void *)) < 0) {
return cudaErrorDevicesUnavailable;
}

if (rpc_write(0, &gridDim, sizeof(dim3)) < 0) {
return cudaErrorDevicesUnavailable;
}

if (rpc_write(0, &blockDim, sizeof(dim3)) < 0) {
return cudaErrorDevicesUnavailable;
}

if (rpc_write(0, &sharedMem, sizeof(size_t)) < 0) {
return cudaErrorDevicesUnavailable;
}

if (rpc_write(0, &stream, sizeof(cudaStream_t)) < 0) {
if (request_id < 0 || rpc_write(0, &func, sizeof(const void *)) < 0 ||
rpc_write(0, &gridDim, sizeof(dim3)) < 0 ||
rpc_write(0, &blockDim, sizeof(dim3)) < 0 ||
rpc_write(0, &sharedMem, sizeof(size_t)) < 0 ||
rpc_write(0, &stream, sizeof(cudaStream_t)) < 0)
return cudaErrorDevicesUnavailable;
}

Function *f = nullptr;
for (auto &function : functions)
Expand All @@ -372,15 +358,13 @@ cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
return cudaErrorDevicesUnavailable;
}

if (rpc_wait_for_response(0) < 0) {
return cudaErrorDevicesUnavailable;
}

if (rpc_end_response(0, &return_value) < 0) {
if (rpc_wait_for_response(0) < 0 || rpc_end_response(0, &return_value) < 0) {
return cudaErrorDevicesUnavailable;
}

cuda_memcpy_unified_ptrs(0, cudaMemcpyDeviceToHost);
memcpy_return = cuda_memcpy_unified_ptrs(0, cudaMemcpyDeviceToHost);
if (memcpy_return != cudaSuccess)
return memcpy_return;

return return_value;
}
Expand Down Expand Up @@ -436,8 +420,6 @@ void parse_ptx_string(void *fatCubin, const char *ptx_string,
i++;

if (ptx_string[i] == '(') {
std::cout << "found function args" << std::endl;

// parse out the args-list
for (; arg_count < MAX_ARGS; arg_count++) {
int arg_size = 0;
Expand All @@ -463,7 +445,6 @@ void parse_ptx_string(void *fatCubin, const char *ptx_string,
i++;

if (ptx_string[i] == '.') {
std::cout << "found arg type" << std::endl;
// read the type, ignoring if it's not a valid type
int type_size = get_type_size(ptx_string + (++i));
if (type_size == 0)
Expand Down Expand Up @@ -673,10 +654,10 @@ extern "C" void __cudaRegisterFunction(void **fatCubinHandle,
function.host_func = hostFun;
}

extern "C" {
void __cudaRegisterVar(void **fatCubinHandle, char *hostVar,
char *deviceAddress, const char *deviceName, int ext,
size_t size, int constant, int global) {
extern "C" void __cudaRegisterVar(void **fatCubinHandle, char *hostVar,
char *deviceAddress, const char *deviceName,
int ext, size_t size, int constant,
int global) {
void *return_value;

std::cout << "calling __cudaRegisterVar" << std::endl;
Expand Down Expand Up @@ -754,7 +735,6 @@ void __cudaRegisterVar(void **fatCubinHandle, char *hostVar,
return;
}
}
}

cudaError_t cudaFree(void *devPtr) {
cudaError_t return_value;
Expand Down
Loading