-
Notifications
You must be signed in to change notification settings - Fork 1
/
vector_sum_hip_nvidia.nim
59 lines (46 loc) · 1.74 KB
/
vector_sum_hip_nvidia.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# this example builds hip for nvidia
# vector_sum_hip_nvidia.nims is setup for hipcc with HIP_PLATFORM=nvidia to build for GPU
# requires nim >= 2.1.9
# HIP_PLATFORM=nvidia nim cpp -r examples/vector_sum_hip_nvidia.nim
# only use hip or hippo functions (maps to hip)
# requires `--passC:"-isystem \"/opt/rocm/include\""` for some reason. not sure why it's not including it
import hippo
const N: int32 = 10
proc addKernel(a, b, c: ptr[cint]){.hippoGlobal.} =
let tid = blockIdx.x # handle data at this index as an integer
if tid < N.uint: # guard for out of bounds
let aArray = cast[ptr UncheckedArray[cint]](a)
let bArray = cast[ptr UncheckedArray[cint]](b)
let cArray = cast[ptr UncheckedArray[cint]](c)
cArray[tid] = aArray[tid] + bArray[tid]
proc main() =
var a,b,c: array[N, int32]
var dev_a, dev_b, dev_c: pointer
# allocate gpu memory
handleError(hipMalloc(addr dev_a, sizeof(int32)*N))
handleError(hipMalloc(addr dev_b, sizeof(int32)*N))
handleError(hipMalloc(addr dev_c, sizeof(int32)*N))
# fill in arrays a and b on the host
for i in 0..<N:
a[i] = -i
b[i] = i * i
# copy data to device
handleError(hipMemcpy(dev_a, addr a[0], sizeof(int32)*N, hipMemcpyHostToDevice))
handleError(hipMemcpy(dev_b, addr b[0], sizeof(int32)*N, hipMemcpyHostToDevice))
# launch kernel
hippoLaunchKernel(
addkernel,
gridDim = newDim3(N.uint32),
args = (dev_a, dev_b, dev_c)
)
# copy result back to host
handleError(hipMemcpy(addr c[0], dev_c, sizeof(int32)*N, hipMemcpyDeviceToHost))
# display the results
for i in 0..<N:
echo a[i], " + ", b[i], " = ", c[i]
# free gpu memory
handleError(hipFree(dev_a))
handleError(hipFree(dev_b))
handleError(hipFree(dev_c))
when isMainModule:
main()