Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bindings for cuPy pointers #4459

Merged
merged 4 commits into from
Feb 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions bindings/Python/py11Engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,39 @@ void Engine::Put(Variable variable, const pybind11::array &array, const Mode lau
}
}

#ifdef ADIOS2_HAVE_CUDA
void Engine::Put(Variable variable, long array, const Mode launch)
{
helper::CheckForNullptr(m_Engine, "in call to Engine::Put list of cupy pointers");
helper::CheckForNullptr(variable.m_VariableBase,
"for variable, in call to Engine::Put list of cupy pointers");

const adios2::DataType type = helper::GetDataTypeFromString(variable.Type());

if (type == adios2::DataType::Struct)
{
// not supported
}
#define declare_type(T) \
else if (type == helper::GetDataType<T>()) \
{ \
m_Engine->Put(*dynamic_cast<core::Variable<T> *>(variable.m_VariableBase), \
reinterpret_cast<const T *>(array), launch); \
}
ADIOS2_FOREACH_NUMPY_TYPE_1ARG(declare_type)
#undef declare_type
else
{
throw std::invalid_argument("ERROR: for variable " + variable.Name() +
" cupy pointer type " + variable.Type() +
" is not supported (found type " + ToString(type) +
") or "
"is not memory contiguous "
", in call to Put\n");
}
}
#endif

void Engine::Put(Variable variable, const std::vector<int64_t> &ints, const Mode launch)
{
helper::CheckForNullptr(m_Engine, "in call to Engine::Put list of ints");
Expand Down Expand Up @@ -170,6 +203,38 @@ void Engine::Get(Variable variable, pybind11::array &array, const Mode launch)
}
}

#ifdef ADIOS2_HAVE_CUDA
void Engine::Get(Variable variable, long array, const Mode launch)
{
helper::CheckForNullptr(m_Engine, "for engine, in call to Engine::Get a cupy pointer");
helper::CheckForNullptr(variable.m_VariableBase,
"for variable, in call to Engine::Get a cupy pointer");

const adios2::DataType type = helper::GetDataTypeFromString(variable.Type());

if (type == adios2::DataType::Struct)
{
// not supported
}
#define declare_type(T) \
else if (type == helper::GetDataType<T>()) \
{ \
m_Engine->Get(*dynamic_cast<core::Variable<T> *>(variable.m_VariableBase), \
reinterpret_cast<T *>(array), launch); \
}
ADIOS2_FOREACH_NUMPY_TYPE_1ARG(declare_type)
#undef declare_type
else
{
throw std::invalid_argument("ERROR: in variable " + variable.Name() + " of type " +
variable.Type() +
", cupy pointer type is 1) not supported, 2) a type mismatch or"
"3) is not memory contiguous "
", in call to Get\n");
}
}
#endif

std::string Engine::Get(Variable variable, const Mode launch)
{
std::string string;
Expand Down
6 changes: 6 additions & 0 deletions bindings/Python/py11Engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,19 @@ class Engine
const Mode launch = Mode::Deferred);
void Put(Variable variable, const std::vector<double> &doubles,
const Mode launch = Mode::Deferred);
#ifdef ADIOS2_HAVE_CUDA
void Put(Variable variable, long array, const Mode launch = Mode::Deferred);
#endif
void Put(Variable variable, const std::vector<std::complex<double>> &complexes,
const Mode launch = Mode::Deferred);
void Put(Variable variable, const std::string &string);
void PerformPuts();
void PerformDataWrite();

void Get(Variable variable, pybind11::array &array, const Mode launch = Mode::Deferred);
#ifdef ADIOS2_HAVE_CUDA
void Get(Variable variable, long array, const Mode launch = Mode::Deferred);
#endif
std::string Get(Variable variable, const Mode launch = Mode::Deferred);

void PerformGets();
Expand Down
31 changes: 31 additions & 0 deletions bindings/Python/py11Variable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,37 @@ Dims Variable::Shape(const size_t step) const
return shape;
}

#ifdef ADIOS2_HAVE_CUDA
void Variable::SetMemorySpace(const MemorySpace memSpace)
{
m_VariableBase->SetMemorySpace(memSpace);
}

Dims Variable::Shape(const MemorySpace memSpace, const size_t step) const
{
helper::CheckForNullptr(m_VariableBase, "in call to Variable::Shape");

const adios2::DataType typeCpp = m_VariableBase->m_Type;
Dims shape;

if (typeCpp == adios2::DataType::Struct)
{
// not supported
}
#define declare_template_instantiation(T) \
else if (typeCpp == adios2::helper::GetDataType<T>()) \
{ \
const adios2::core::Variable<T> *variable = \
dynamic_cast<const adios2::core::Variable<T> *>(m_VariableBase); \
shape = variable->Shape(step, memSpace); \
}
ADIOS2_FOREACH_STDTYPE_1ARG(declare_template_instantiation)
#undef declare_template_instantiation

return shape;
}

#endif
Dims Variable::Start() const
{
helper::CheckForNullptr(m_VariableBase, "in call to Variable::Start");
Expand Down
5 changes: 5 additions & 0 deletions bindings/Python/py11Variable.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ class Variable
* @return shape vector
*/
Dims Shape(const size_t step = adios2::EngineCurrentStep) const;
#ifdef ADIOS2_HAVE_CUDA
Dims Shape(const MemorySpace memSpace, const size_t step = adios2::EngineCurrentStep) const;

void SetMemorySpace(const MemorySpace memSpace);
#endif

/**
* Inspects current start point
Expand Down
39 changes: 38 additions & 1 deletion bindings/Python/py11glue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,12 @@ PYBIND11_MODULE(ADIOS2_PYTHON_MODULE_NAME, m)
.value("StoreData", adios2::DerivedVarType::StoreData)
.export_values();

#ifdef ADIOS2_HAVE_CUDA
pybind11::enum_<adios2::MemorySpace>(m, "MemorySpace")
.value("Host", adios2::MemorySpace::Host)
.value("GPU", adios2::MemorySpace::GPU);
#endif

pybind11::class_<adios2::Accuracy>(m, "Accuracy")
.def(pybind11::init<double, double, bool>())
.def_readwrite("error", &adios2::Accuracy::error)
Expand Down Expand Up @@ -396,8 +402,21 @@ PYBIND11_MODULE(ADIOS2_PYTHON_MODULE_NAME, m)
.def("Type", &adios2::py11::Variable::Type)
.def("Sizeof", &adios2::py11::Variable::Sizeof)
.def("ShapeID", &adios2::py11::Variable::ShapeID)
.def("Shape", &adios2::py11::Variable::Shape,
.def("Shape",
(adios2::Dims(adios2::py11::Variable::*)(const size_t) const) &
adios2::py11::Variable::Shape,
pybind11::arg("step") = adios2::EngineCurrentStep)
#ifdef ADIOS2_HAVE_CUDA
.def("Shape",
(adios2::Dims(adios2::py11::Variable::*)(const adios2::MemorySpace, const size_t)
const) &
adios2::py11::Variable::Shape,
pybind11::arg("memSpace"), pybind11::arg("step") = adios2::EngineCurrentStep)
.def("SetMemorySpace",
(void(adios2::py11::Variable::*)(const adios2::MemorySpace)) &
adios2::py11::Variable::SetMemorySpace,
pybind11::arg("memSpace"))
#endif
.def("Start", &adios2::py11::Variable::Start)
.def("Count", &adios2::py11::Variable::Count)
.def("Steps", &adios2::py11::Variable::Steps)
Expand Down Expand Up @@ -495,6 +514,15 @@ PYBIND11_MODULE(ADIOS2_PYTHON_MODULE_NAME, m)
pybind11::arg("variable"), pybind11::arg("floats"),
pybind11::arg("launch") = adios2::Mode::Sync)

#ifdef ADIOS2_HAVE_CUDA
.def("Put",
(void(adios2::py11::Engine::*)(adios2::py11::Variable variable, long,
const adios2::Mode launch)) &
adios2::py11::Engine::Put,
pybind11::arg("variable"), pybind11::arg("cypyPointer"),
pybind11::arg("launch") = adios2::Mode::Sync)
#endif

.def("Put",
(void(adios2::py11::Engine::*)(adios2::py11::Variable,
const std::vector<std::complex<double>> &,
Expand All @@ -514,6 +542,15 @@ PYBIND11_MODULE(ADIOS2_PYTHON_MODULE_NAME, m)
pybind11::arg("variable"), pybind11::arg("array"),
pybind11::arg("launch") = adios2::Mode::Deferred)

#ifdef ADIOS2_HAVE_CUDA
.def("Get",
(void(adios2::py11::Engine::*)(adios2::py11::Variable variable, long,
const adios2::Mode launch)) &
adios2::py11::Engine::Get,
pybind11::arg("variable"), pybind11::arg("cupyPointer"),
pybind11::arg("launch") = adios2::Mode::Sync)
#endif

.def("Get",
(std::string(adios2::py11::Engine::*)(adios2::py11::Variable,
const adios2::Mode launch)) &
Expand Down
73 changes: 73 additions & 0 deletions examples/hello/bpStepsWriteReadCuda/bpStepsWriteReadCuda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import numpy as np
import cupy as cp
from adios2 import FileReader
import adios2.bindings as adios2

def write_array(fileName, nSteps, gpuArray, cpuArray):
adios = adios2.ADIOS()
ioWriter = adios.DeclareIO("cupyWriter")
# define adios variables, the cpuArray is used for both variables to
# define the type of the variables (float32 in this case)
gpuVar = ioWriter.DefineVariable("gpuArray", cpuArray, gpuArray.shape,
[0] * len(gpuArray.shape), gpuArray.shape)
# optionally the memory space can be set to GPU
gpuVar.SetMemorySpace(adios2.MemorySpace.GPU)
cpuVar = ioWriter.DefineVariable("cpuArray", cpuArray, cpuArray.shape,
[0] * len(cpuArray.shape), cpuArray.shape)

# write both cpu and gpu arrays for each simulation step
wStream = ioWriter.Open(fileName, adios2.Mode.Write)
for step in range(nSteps):
# write buffers
wStream.BeginStep()
wStream.Put(cpuVar, cpuArray)
wStream.Put(gpuVar, gpuArray.data.ptr)
wStream.EndStep()
# update buffers
gpuArray = gpuArray * 2
cpuArray = cpuArray + 1
wStream.Close()
print("Write to file %s: %s data from GPU and %s data from CPU" % (
fileName, gpuArray.shape, cpuArray.shape))

def read_array(fileName, nSteps):
adios = adios2.ADIOS()
ioReader = adios.DeclareIO("cupyReader")
rStream = ioReader.Open(fileName, adios2.Mode.Read)
for step in range(nSteps):
rStream.BeginStep()
# prepare input buffers
gpuVar = ioReader.InquireVariable("gpuArray")
cpuVar = ioReader.InquireVariable("cpuArray")
cpuBuffer = np.zeros(cpuVar.Shape(), dtype=np.float32)
gpuShape = gpuVar.Shape(adios2.MemorySpace.GPU)
gpuBuffer = cp.zeros(gpuShape, dtype=np.float32)
gpuVar.SetSelection([(0, 0), gpuShape])
# populate data
rStream.Get(gpuVar, gpuBuffer.data.ptr)
rStream.Get(cpuVar, cpuBuffer)
rStream.EndStep()
print("Step %d: read GPU data\n %s" % (step, gpuBuffer))
print("Step %d: read CPU data\n %s" % (step, cpuBuffer))
rStream.Close()


if __name__ == '__main__':
# define simulation host data
cpuArray = np.array([[0, 1.0, 2.0], [3.0, 4.0, 5.0]], dtype=np.float32)
# copy the data on the device
gpuArray = cp.asarray(cpuArray)
print("Array allocation: ", gpuArray.device)

mempool = cp.get_default_memory_pool()
pinned_mempool = cp.get_default_pinned_memory_pool()
print("Bytes required to store the gpu array", gpuArray.nbytes)
print("Bytes allocated on the device memory pool", mempool.total_bytes())
print("Bytes used on the device memory pool", mempool.used_bytes())
print("Blocks allocated on the pinned memory pool (The allocated pinned"
" memory is released just after the transfer is complete)",
pinned_mempool.n_free_blocks())

nSteps = 2
write_array("StepsWriteReadCuPy.bp", nSteps, gpuArray, cpuArray)
read_array("StepsWriteReadCuPy.bp", nSteps)
Loading