diff --git a/bindings/Python/py11Engine.cpp b/bindings/Python/py11Engine.cpp index 090c18f6d9..a93aa204e1 100644 --- a/bindings/Python/py11Engine.cpp +++ b/bindings/Python/py11Engine.cpp @@ -78,6 +78,39 @@ void Engine::Put(Variable variable, const pybind11::array &array, const Mode lau } } +#ifdef ADIOS2_HAVE_CUDA +void Engine::Put(Variable variable, long array, const Mode launch) +{ + helper::CheckForNullptr(m_Engine, "in call to Engine::Put list of cupy pointers"); + helper::CheckForNullptr(variable.m_VariableBase, + "for variable, in call to Engine::Put list of cupy pointers"); + + const adios2::DataType type = helper::GetDataTypeFromString(variable.Type()); + + if (type == adios2::DataType::Struct) + { + // not supported + } +#define declare_type(T) \ + else if (type == helper::GetDataType()) \ + { \ + m_Engine->Put(*dynamic_cast *>(variable.m_VariableBase), \ + reinterpret_cast(array), launch); \ + } + ADIOS2_FOREACH_NUMPY_TYPE_1ARG(declare_type) +#undef declare_type + else + { + throw std::invalid_argument("ERROR: for variable " + variable.Name() + + " cupy pointer type " + variable.Type() + + " is not supported (found type " + ToString(type) + + ") or " + "is not memory contiguous " + ", in call to Put\n"); + } +} +#endif + void Engine::Put(Variable variable, const std::vector &ints, const Mode launch) { helper::CheckForNullptr(m_Engine, "in call to Engine::Put list of ints"); @@ -170,6 +203,38 @@ void Engine::Get(Variable variable, pybind11::array &array, const Mode launch) } } +#ifdef ADIOS2_HAVE_CUDA +void Engine::Get(Variable variable, long array, const Mode launch) +{ + helper::CheckForNullptr(m_Engine, "for engine, in call to Engine::Get a cupy pointer"); + helper::CheckForNullptr(variable.m_VariableBase, + "for variable, in call to Engine::Get a cupy pointer"); + + const adios2::DataType type = helper::GetDataTypeFromString(variable.Type()); + + if (type == adios2::DataType::Struct) + { + // not supported + } +#define declare_type(T) \ + else if (type == helper::GetDataType()) \ + { \ + m_Engine->Get(*dynamic_cast *>(variable.m_VariableBase), \ + reinterpret_cast(array), launch); \ + } + ADIOS2_FOREACH_NUMPY_TYPE_1ARG(declare_type) +#undef declare_type + else + { + throw std::invalid_argument("ERROR: in variable " + variable.Name() + " of type " + + variable.Type() + + ", cupy pointer type is 1) not supported, 2) a type mismatch or" + "3) is not memory contiguous " + ", in call to Get\n"); + } +} +#endif + std::string Engine::Get(Variable variable, const Mode launch) { std::string string; diff --git a/bindings/Python/py11Engine.h b/bindings/Python/py11Engine.h index 617efd1d55..ec7323c504 100644 --- a/bindings/Python/py11Engine.h +++ b/bindings/Python/py11Engine.h @@ -53,6 +53,9 @@ class Engine const Mode launch = Mode::Deferred); void Put(Variable variable, const std::vector &doubles, const Mode launch = Mode::Deferred); +#ifdef ADIOS2_HAVE_CUDA + void Put(Variable variable, long array, const Mode launch = Mode::Deferred); +#endif void Put(Variable variable, const std::vector> &complexes, const Mode launch = Mode::Deferred); void Put(Variable variable, const std::string &string); @@ -60,6 +63,9 @@ class Engine void PerformDataWrite(); void Get(Variable variable, pybind11::array &array, const Mode launch = Mode::Deferred); +#ifdef ADIOS2_HAVE_CUDA + void Get(Variable variable, long array, const Mode launch = Mode::Deferred); +#endif std::string Get(Variable variable, const Mode launch = Mode::Deferred); void PerformGets(); diff --git a/bindings/Python/py11Variable.cpp b/bindings/Python/py11Variable.cpp index bc677f0012..55fe28c608 100644 --- a/bindings/Python/py11Variable.cpp +++ b/bindings/Python/py11Variable.cpp @@ -173,6 +173,37 @@ Dims Variable::Shape(const size_t step) const return shape; } +#ifdef ADIOS2_HAVE_CUDA +void Variable::SetMemorySpace(const MemorySpace memSpace) +{ + m_VariableBase->SetMemorySpace(memSpace); +} + +Dims Variable::Shape(const MemorySpace memSpace, const size_t step) const +{ + helper::CheckForNullptr(m_VariableBase, "in call to Variable::Shape"); + + const adios2::DataType typeCpp = m_VariableBase->m_Type; + Dims shape; + + if (typeCpp == adios2::DataType::Struct) + { + // not supported + } +#define declare_template_instantiation(T) \ + else if (typeCpp == adios2::helper::GetDataType()) \ + { \ + const adios2::core::Variable *variable = \ + dynamic_cast *>(m_VariableBase); \ + shape = variable->Shape(step, memSpace); \ + } + ADIOS2_FOREACH_STDTYPE_1ARG(declare_template_instantiation) +#undef declare_template_instantiation + + return shape; +} + +#endif Dims Variable::Start() const { helper::CheckForNullptr(m_VariableBase, "in call to Variable::Start"); diff --git a/bindings/Python/py11Variable.h b/bindings/Python/py11Variable.h index 6407799561..5afdba97cd 100644 --- a/bindings/Python/py11Variable.h +++ b/bindings/Python/py11Variable.h @@ -74,6 +74,11 @@ class Variable * @return shape vector */ Dims Shape(const size_t step = adios2::EngineCurrentStep) const; +#ifdef ADIOS2_HAVE_CUDA + Dims Shape(const MemorySpace memSpace, const size_t step = adios2::EngineCurrentStep) const; + + void SetMemorySpace(const MemorySpace memSpace); +#endif /** * Inspects current start point diff --git a/bindings/Python/py11glue.cpp b/bindings/Python/py11glue.cpp index a6ccdcab42..7aa442d672 100644 --- a/bindings/Python/py11glue.cpp +++ b/bindings/Python/py11glue.cpp @@ -134,6 +134,12 @@ PYBIND11_MODULE(ADIOS2_PYTHON_MODULE_NAME, m) .value("StoreData", adios2::DerivedVarType::StoreData) .export_values(); +#ifdef ADIOS2_HAVE_CUDA + pybind11::enum_(m, "MemorySpace") + .value("Host", adios2::MemorySpace::Host) + .value("GPU", adios2::MemorySpace::GPU); +#endif + pybind11::class_(m, "Accuracy") .def(pybind11::init()) .def_readwrite("error", &adios2::Accuracy::error) @@ -396,8 +402,21 @@ PYBIND11_MODULE(ADIOS2_PYTHON_MODULE_NAME, m) .def("Type", &adios2::py11::Variable::Type) .def("Sizeof", &adios2::py11::Variable::Sizeof) .def("ShapeID", &adios2::py11::Variable::ShapeID) - .def("Shape", &adios2::py11::Variable::Shape, + .def("Shape", + (adios2::Dims(adios2::py11::Variable::*)(const size_t) const) & + adios2::py11::Variable::Shape, pybind11::arg("step") = adios2::EngineCurrentStep) +#ifdef ADIOS2_HAVE_CUDA + .def("Shape", + (adios2::Dims(adios2::py11::Variable::*)(const adios2::MemorySpace, const size_t) + const) & + adios2::py11::Variable::Shape, + pybind11::arg("memSpace"), pybind11::arg("step") = adios2::EngineCurrentStep) + .def("SetMemorySpace", + (void(adios2::py11::Variable::*)(const adios2::MemorySpace)) & + adios2::py11::Variable::SetMemorySpace, + pybind11::arg("memSpace")) +#endif .def("Start", &adios2::py11::Variable::Start) .def("Count", &adios2::py11::Variable::Count) .def("Steps", &adios2::py11::Variable::Steps) @@ -495,6 +514,15 @@ PYBIND11_MODULE(ADIOS2_PYTHON_MODULE_NAME, m) pybind11::arg("variable"), pybind11::arg("floats"), pybind11::arg("launch") = adios2::Mode::Sync) +#ifdef ADIOS2_HAVE_CUDA + .def("Put", + (void(adios2::py11::Engine::*)(adios2::py11::Variable variable, long, + const adios2::Mode launch)) & + adios2::py11::Engine::Put, + pybind11::arg("variable"), pybind11::arg("cypyPointer"), + pybind11::arg("launch") = adios2::Mode::Sync) +#endif + .def("Put", (void(adios2::py11::Engine::*)(adios2::py11::Variable, const std::vector> &, @@ -514,6 +542,15 @@ PYBIND11_MODULE(ADIOS2_PYTHON_MODULE_NAME, m) pybind11::arg("variable"), pybind11::arg("array"), pybind11::arg("launch") = adios2::Mode::Deferred) +#ifdef ADIOS2_HAVE_CUDA + .def("Get", + (void(adios2::py11::Engine::*)(adios2::py11::Variable variable, long, + const adios2::Mode launch)) & + adios2::py11::Engine::Get, + pybind11::arg("variable"), pybind11::arg("cupyPointer"), + pybind11::arg("launch") = adios2::Mode::Sync) +#endif + .def("Get", (std::string(adios2::py11::Engine::*)(adios2::py11::Variable, const adios2::Mode launch)) & diff --git a/examples/hello/bpStepsWriteReadCuda/bpStepsWriteReadCuda.py b/examples/hello/bpStepsWriteReadCuda/bpStepsWriteReadCuda.py new file mode 100644 index 0000000000..3d4618c6b1 --- /dev/null +++ b/examples/hello/bpStepsWriteReadCuda/bpStepsWriteReadCuda.py @@ -0,0 +1,73 @@ +import numpy as np +import cupy as cp +from adios2 import FileReader +import adios2.bindings as adios2 + +def write_array(fileName, nSteps, gpuArray, cpuArray): + adios = adios2.ADIOS() + ioWriter = adios.DeclareIO("cupyWriter") + # define adios variables, the cpuArray is used for both variables to + # define the type of the variables (float32 in this case) + gpuVar = ioWriter.DefineVariable("gpuArray", cpuArray, gpuArray.shape, + [0] * len(gpuArray.shape), gpuArray.shape) + # optionally the memory space can be set to GPU + gpuVar.SetMemorySpace(adios2.MemorySpace.GPU) + cpuVar = ioWriter.DefineVariable("cpuArray", cpuArray, cpuArray.shape, + [0] * len(cpuArray.shape), cpuArray.shape) + + # write both cpu and gpu arrays for each simulation step + wStream = ioWriter.Open(fileName, adios2.Mode.Write) + for step in range(nSteps): + # write buffers + wStream.BeginStep() + wStream.Put(cpuVar, cpuArray) + wStream.Put(gpuVar, gpuArray.data.ptr) + wStream.EndStep() + # update buffers + gpuArray = gpuArray * 2 + cpuArray = cpuArray + 1 + wStream.Close() + print("Write to file %s: %s data from GPU and %s data from CPU" % ( + fileName, gpuArray.shape, cpuArray.shape)) + +def read_array(fileName, nSteps): + adios = adios2.ADIOS() + ioReader = adios.DeclareIO("cupyReader") + rStream = ioReader.Open(fileName, adios2.Mode.Read) + for step in range(nSteps): + rStream.BeginStep() + # prepare input buffers + gpuVar = ioReader.InquireVariable("gpuArray") + cpuVar = ioReader.InquireVariable("cpuArray") + cpuBuffer = np.zeros(cpuVar.Shape(), dtype=np.float32) + gpuShape = gpuVar.Shape(adios2.MemorySpace.GPU) + gpuBuffer = cp.zeros(gpuShape, dtype=np.float32) + gpuVar.SetSelection([(0, 0), gpuShape]) + # populate data + rStream.Get(gpuVar, gpuBuffer.data.ptr) + rStream.Get(cpuVar, cpuBuffer) + rStream.EndStep() + print("Step %d: read GPU data\n %s" % (step, gpuBuffer)) + print("Step %d: read CPU data\n %s" % (step, cpuBuffer)) + rStream.Close() + + +if __name__ == '__main__': + # define simulation host data + cpuArray = np.array([[0, 1.0, 2.0], [3.0, 4.0, 5.0]], dtype=np.float32) + # copy the data on the device + gpuArray = cp.asarray(cpuArray) + print("Array allocation: ", gpuArray.device) + + mempool = cp.get_default_memory_pool() + pinned_mempool = cp.get_default_pinned_memory_pool() + print("Bytes required to store the gpu array", gpuArray.nbytes) + print("Bytes allocated on the device memory pool", mempool.total_bytes()) + print("Bytes used on the device memory pool", mempool.used_bytes()) + print("Blocks allocated on the pinned memory pool (The allocated pinned" + " memory is released just after the transfer is complete)", + pinned_mempool.n_free_blocks()) + + nSteps = 2 + write_array("StepsWriteReadCuPy.bp", nSteps, gpuArray, cpuArray) + read_array("StepsWriteReadCuPy.bp", nSteps)