Skip to content

Commit

Permalink
print computeMode
Browse files Browse the repository at this point in the history
  • Loading branch information
cwpearson committed Feb 10, 2020
1 parent 2508ba8 commit 036267a
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 5 deletions.
9 changes: 4 additions & 5 deletions include/stencil/local_domain.cuh
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#pragma once

#include <iostream>
#include <mpi.h>

#include "stencil/cuda_runtime.hpp"
#include "stencil/dim3.hpp"
Expand Down Expand Up @@ -45,13 +44,13 @@ public:

~LocalDomain() {

int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// int rank;
// MPI_Comm_rank(MPI_COMM_WORLD, &rank);

std::cerr << "rank=" << rank << " ~LocalDomain(): device=" << dev_ << "\n";
// std::cerr << "rank=" << rank << " ~LocalDomain(): device=" << dev_ << "\n";
CUDA_RUNTIME(cudaSetDevice(dev_));
for (auto p : currDataPtrs_) {
std::cerr << "rank=" << rank << " ~LocalDomain(): cudaFree " << uintptr_t(p) << "\n";
// std::cerr << "rank=" << rank << " ~LocalDomain(): cudaFree " << uintptr_t(p) << "\n";
CUDA_RUNTIME(cudaFree(p));
}
CUDA_RUNTIME(cudaFree(devCurrDataPtrs_));
Expand Down
16 changes: 16 additions & 0 deletions include/stencil/stencil.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,22 @@ class DistributedDomain {
CUDA_RUNTIME(cudaGetDeviceCount(&deviceCount));
std::cerr << "[" << rank_ << "] cudaGetDeviceCount= " << deviceCount << "\n";

/*
cudaComputeModeDefault = 0
Default compute mode (Multiple threads can use cudaSetDevice() with this device)
cudaComputeModeExclusive = 1
Compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice() with this device)
cudaComputeModeProhibited = 2
Compute-prohibited mode (No threads can use cudaSetDevice() with this device)
cudaComputeModeExclusiveProcess = 3
Compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice() with this device)
*/
cudaDeviceProp prop;
for (int i = 0; i < deviceCount; ++i) {
CUDA_RUNTIME(cudaGetDeviceProperties(&prop, i));
std::cerr << "[" << rank_ << "] cudaDeviceProp.computeMode=" << prop.computeMode << "\n";
}

// Determine GPUs this DistributedDomain is reposible for
if (gpus_.empty()) {
// if fewer colocated ranks than GPUs, round-robin GPUs to ranks
Expand Down

0 comments on commit 036267a

Please sign in to comment.