Skip to content

Commit

Permalink
cuda: add test for device linking
Browse files Browse the repository at this point in the history
  • Loading branch information
SoapGentoo committed May 7, 2024
1 parent 3a73c28 commit c2a7aca
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 0 deletions.
5 changes: 5 additions & 0 deletions test cases/cuda/17 separate compilation linking/b.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "b.h"

__device__ int g[N];

__device__ void bar(void) { g[threadIdx.x]++; }
5 changes: 5 additions & 0 deletions test cases/cuda/17 separate compilation linking/b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#define N 8

extern __device__ int g[N];

extern __device__ void bar(void);
44 changes: 44 additions & 0 deletions test cases/cuda/17 separate compilation linking/main.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#include <stdio.h>

#include "b.h"

__global__ void foo(void)
{
__shared__ int a[N];
a[threadIdx.x] = threadIdx.x;

__syncthreads();

g[threadIdx.x] = a[blockDim.x - threadIdx.x - 1];

bar();
}

int main(void)
{
unsigned int i;
int *dg, hg[N];
int sum = 0;

foo<<<1, N>>>();

if (cudaGetSymbolAddress((void**)&dg, g)) {
printf("couldn't get the symbol addr\n");
return 1;
}
if (cudaMemcpy(hg, dg, N * sizeof(int), cudaMemcpyDeviceToHost)) {
printf("couldn't memcpy\n");
return 1;
}

for (i = 0; i < N; i++) {
sum += hg[i];
}
if (sum == 36) {
printf("PASSED\n");
} else {
printf("FAILED (%d)\n", sum);
}

return 0;
}
19 changes: 19 additions & 0 deletions test cases/cuda/17 separate compilation linking/meson.build
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# example here is inspired by Nvidia's blog post:
# https://developer.nvidia.com/blog/separate-compilation-linking-cuda-device-code/
# code:
# https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#examples

project('device linking', ['cpp', 'cuda'], version : '1.0.0')

nvcc = meson.get_compiler('cuda')
cuda = import('unstable-cuda')

arch_flags = cuda.nvcc_arch_flags(nvcc.version(), 'Auto', detected : ['8.0'])

message('NVCC version: ' + nvcc.version())
message('NVCC flags: ' + ' '.join(arch_flags))

# test device linking with -dc (which is equivalent to `--relocatable-device-code true`)
lib = static_library('devicefuncs', ['b.cu'], cuda_args : ['-dc'] + arch_flags)
exe = executable('app', 'main.cu', cuda_args : ['-dc'] + arch_flags, link_with : lib, link_args : arch_flags)
test('cudatest', exe)

0 comments on commit c2a7aca

Please sign in to comment.