Skip to content
This repository has been archived by the owner on Mar 20, 2023. It is now read-only.

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
olupton committed Apr 26, 2022
1 parent df32d6f commit 9ebd22d
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 28 deletions.
5 changes: 1 addition & 4 deletions coreneuron/gpu/nrn_acc_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -670,9 +670,8 @@ void delete_ivoc_vect_from_device(IvocVect& vec) {
if (n) {
cnrn_target_delete(vec.data(), n);
}
// cnrn_target_delete(&vec);
#else
(void) vec;
static_cast<void>(vec);
#endif
}

Expand Down Expand Up @@ -1336,8 +1335,6 @@ void init_gpu() {
std::cout << " Info : " << num_devices_per_node << " GPUs shared by " << local_size
<< " ranks per node\n";
}

init_nrnran123();
}

void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay) {
Expand Down
13 changes: 5 additions & 8 deletions coreneuron/gpu/nrn_acc_manager.hpp
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
/*
# =============================================================================
# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL
# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL
#
# See top-level LICENSE file for details.
# =============================================================================
*/

#ifndef _nrn_device_manager_
#define _nrn_device_manager_

#include "coreneuron/sim/multicore.hpp"
#pragma once

namespace coreneuron {
struct Memb_list;
struct NrnThread;
struct NetSendBuffer_t;
void setup_nrnthreads_on_device(NrnThread* threads, int nthreads);
void delete_nrnthreads_on_device(NrnThread* threads, int nthreads);
void update_nrnthreads_on_host(NrnThread* threads, int nthreads);
Expand All @@ -24,6 +23,4 @@ void update_net_send_buffer_on_host(NrnThread* nt, NetSendBuffer_t* nsb);

void update_weights_from_gpu(NrnThread* threads, int nthreads);
void init_gpu();
void init_nrnran123();
} // namespace coreneuron
#endif // _nrn_device_manager_
32 changes: 16 additions & 16 deletions coreneuron/utils/randoms/nrnran123.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,16 @@
#include <unordered_map>
#endif

#ifdef __CUDACC__
#include <nv/target>
#endif

// Defining these attributes seems to help nvc++ in OpenMP target offload mode.
#if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \
defined(_OPENMP) && defined(__CUDACC__)
#define CORENRN_HOST_DEVICE __host__ __device__
#elif defined(__CUDACC__)
// This is necessary to make the new CUDA-syntax-in-.cpp version compile
#define CORENRN_HOST_DEVICE __host__ __device__
#else
#define CORENRN_HOST_DEVICE
Expand Down Expand Up @@ -88,20 +91,24 @@ using random123_allocator = coreneuron::unified_allocator<coreneuron::nrnran123_
OMP_Mutex g_instance_count_mutex;
std::size_t g_instance_count{};

// not sure quite how nvc++ handles these, not sure we actually need the 2
// different names?
philox4x32_key_t g_k{};
#ifdef __CUDACC__
// Not 100% clear we need a different name (g_k_dev) here in addition to g_k,
// but it's clearer and the overhead cannot be high (if it exists).
__constant__ __device__ philox4x32_key_t g_k_dev{};
// noinline to force "CUDA" not "acc routine seq" behaviour :shrug:
__attribute__((noinline)) philox4x32_key_t& global_state() {
if target (nv::target::is_device) {
// printf("dev: &g_k=%p [seed %d]\n", &g_k_dev, g_k_dev.v[0]);
return g_k_dev;
} else {
// printf("host: &g_k=%p [seed %d]\n", &g_k, g_k.v[0]);
return g_k;
}
}
#else
philox4x32_key_t& global_state() {
return g_k;
}
#endif

constexpr double SHIFT32 = 1.0 / 4294967297.0; /* 1/(2^32 + 1) */

Expand All @@ -114,14 +121,6 @@ CORENRN_HOST_DEVICE philox4x32_ctr_t philox4x32_helper(coreneuron::nrnran123_Sta
} // namespace

namespace coreneuron {
void init_nrnran123() {
// if(coreneuron::gpu_enabled()) {
// // TODO only do this if it isn't already present?
// auto& g_k = global_state();
// nrn_pragma_acc(enter data copyin(g_k))
// }
}

std::size_t nrnran123_instance_count() {
return g_instance_count;
}
Expand Down Expand Up @@ -216,6 +215,7 @@ void nrnran123_set_globalindex(uint32_t gix) {
if (g_k.v[0] != gix) {
g_k.v[0] = gix;
if (coreneuron::gpu_enabled()) {
#ifdef __CUDACC__
{
auto const code = cudaMemcpyToSymbol(g_k_dev, &g_k, sizeof(g_k));
assert(code == cudaSuccess);
Expand All @@ -224,10 +224,10 @@ void nrnran123_set_globalindex(uint32_t gix) {
auto const code = cudaDeviceSynchronize();
assert(code == cudaSuccess);
}
std::cout << "trying to read g_k_dev from host..." << std::endl;
std::cout << g_k_dev.v[0] << std::endl;
// nrn_pragma_acc(update device(g_k))
// nrn_pragma_omp(target update to(g_k))
#else
nrn_pragma_acc(update device(g_k))
nrn_pragma_omp(target update to(g_k))
#endif
}
}
}
Expand Down

0 comments on commit 9ebd22d

Please sign in to comment.