Skip to content
This repository was archived by the owner on Mar 20, 2023. It is now read-only.

Commit 9ebd22d

Browse files
committed
Cleanup
1 parent df32d6f commit 9ebd22d

File tree

3 files changed

+22
-28
lines changed

3 files changed

+22
-28
lines changed

coreneuron/gpu/nrn_acc_manager.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -670,9 +670,8 @@ void delete_ivoc_vect_from_device(IvocVect& vec) {
670670
if (n) {
671671
cnrn_target_delete(vec.data(), n);
672672
}
673-
// cnrn_target_delete(&vec);
674673
#else
675-
(void) vec;
674+
static_cast<void>(vec);
676675
#endif
677676
}
678677

@@ -1336,8 +1335,6 @@ void init_gpu() {
13361335
std::cout << " Info : " << num_devices_per_node << " GPUs shared by " << local_size
13371336
<< " ranks per node\n";
13381337
}
1339-
1340-
init_nrnran123();
13411338
}
13421339

13431340
void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay) {

coreneuron/gpu/nrn_acc_manager.hpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
/*
22
# =============================================================================
3-
# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL
3+
# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL
44
#
55
# See top-level LICENSE file for details.
66
# =============================================================================
77
*/
8-
9-
#ifndef _nrn_device_manager_
10-
#define _nrn_device_manager_
11-
12-
#include "coreneuron/sim/multicore.hpp"
8+
#pragma once
139

1410
namespace coreneuron {
11+
struct Memb_list;
12+
struct NrnThread;
13+
struct NetSendBuffer_t;
1514
void setup_nrnthreads_on_device(NrnThread* threads, int nthreads);
1615
void delete_nrnthreads_on_device(NrnThread* threads, int nthreads);
1716
void update_nrnthreads_on_host(NrnThread* threads, int nthreads);
@@ -24,6 +23,4 @@ void update_net_send_buffer_on_host(NrnThread* nt, NetSendBuffer_t* nsb);
2423

2524
void update_weights_from_gpu(NrnThread* threads, int nthreads);
2625
void init_gpu();
27-
void init_nrnran123();
2826
} // namespace coreneuron
29-
#endif // _nrn_device_manager_

coreneuron/utils/randoms/nrnran123.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,16 @@
2121
#include <unordered_map>
2222
#endif
2323

24+
#ifdef __CUDACC__
2425
#include <nv/target>
26+
#endif
2527

2628
// Defining these attributes seems to help nvc++ in OpenMP target offload mode.
2729
#if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \
2830
defined(_OPENMP) && defined(__CUDACC__)
2931
#define CORENRN_HOST_DEVICE __host__ __device__
3032
#elif defined(__CUDACC__)
33+
// This is necessary to make the new CUDA-syntax-in-.cpp version compile
3134
#define CORENRN_HOST_DEVICE __host__ __device__
3235
#else
3336
#define CORENRN_HOST_DEVICE
@@ -88,20 +91,24 @@ using random123_allocator = coreneuron::unified_allocator<coreneuron::nrnran123_
8891
OMP_Mutex g_instance_count_mutex;
8992
std::size_t g_instance_count{};
9093

91-
// not sure quite how nvc++ handles these, not sure we actually need the 2
92-
// different names?
9394
philox4x32_key_t g_k{};
95+
#ifdef __CUDACC__
96+
// Not 100% clear we need a different name (g_k_dev) here in addition to g_k,
97+
// but it's clearer and the overhead cannot be high (if it exists).
9498
__constant__ __device__ philox4x32_key_t g_k_dev{};
9599
// noinline to force "CUDA" not "acc routine seq" behaviour :shrug:
96100
__attribute__((noinline)) philox4x32_key_t& global_state() {
97101
if target (nv::target::is_device) {
98-
// printf("dev: &g_k=%p [seed %d]\n", &g_k_dev, g_k_dev.v[0]);
99102
return g_k_dev;
100103
} else {
101-
// printf("host: &g_k=%p [seed %d]\n", &g_k, g_k.v[0]);
102104
return g_k;
103105
}
104106
}
107+
#else
108+
philox4x32_key_t& global_state() {
109+
return g_k;
110+
}
111+
#endif
105112

106113
constexpr double SHIFT32 = 1.0 / 4294967297.0; /* 1/(2^32 + 1) */
107114

@@ -114,14 +121,6 @@ CORENRN_HOST_DEVICE philox4x32_ctr_t philox4x32_helper(coreneuron::nrnran123_Sta
114121
} // namespace
115122

116123
namespace coreneuron {
117-
void init_nrnran123() {
118-
// if(coreneuron::gpu_enabled()) {
119-
// // TODO only do this if it isn't already present?
120-
// auto& g_k = global_state();
121-
// nrn_pragma_acc(enter data copyin(g_k))
122-
// }
123-
}
124-
125124
std::size_t nrnran123_instance_count() {
126125
return g_instance_count;
127126
}
@@ -216,6 +215,7 @@ void nrnran123_set_globalindex(uint32_t gix) {
216215
if (g_k.v[0] != gix) {
217216
g_k.v[0] = gix;
218217
if (coreneuron::gpu_enabled()) {
218+
#ifdef __CUDACC__
219219
{
220220
auto const code = cudaMemcpyToSymbol(g_k_dev, &g_k, sizeof(g_k));
221221
assert(code == cudaSuccess);
@@ -224,10 +224,10 @@ void nrnran123_set_globalindex(uint32_t gix) {
224224
auto const code = cudaDeviceSynchronize();
225225
assert(code == cudaSuccess);
226226
}
227-
std::cout << "trying to read g_k_dev from host..." << std::endl;
228-
std::cout << g_k_dev.v[0] << std::endl;
229-
// nrn_pragma_acc(update device(g_k))
230-
// nrn_pragma_omp(target update to(g_k))
227+
#else
228+
nrn_pragma_acc(update device(g_k))
229+
nrn_pragma_omp(target update to(g_k))
230+
#endif
231231
}
232232
}
233233
}

0 commit comments

Comments
 (0)