From bdaddae45749fc19825ad73797c7d5e6ea5fb92d Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Sun, 11 Feb 2024 22:17:59 -0800 Subject: [PATCH 01/17] add dummy dlpack api --- dlpack/LICENSE | 201 +++++++++++++++++++++ dlpack/dlpack.h | 233 +++++++++++++++++++++++++ simtbx/diffBragg/src/diffBragg_ext.cpp | 209 ++++++++++++++++++++++ 3 files changed, 643 insertions(+) create mode 100644 dlpack/LICENSE create mode 100644 dlpack/dlpack.h diff --git a/dlpack/LICENSE b/dlpack/LICENSE new file mode 100644 index 0000000000..20a9c8a7b4 --- /dev/null +++ b/dlpack/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2017 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/dlpack/dlpack.h b/dlpack/dlpack.h new file mode 100644 index 0000000000..ef6960b23a --- /dev/null +++ b/dlpack/dlpack.h @@ -0,0 +1,233 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file dlpack.h + * \brief The common header of DLPack. + */ +#ifndef DLPACK_DLPACK_H_ +#define DLPACK_DLPACK_H_ + +/** + * \brief Compatibility with C++ + */ +#ifdef __cplusplus +#define DLPACK_EXTERN_C extern "C" +#else +#define DLPACK_EXTERN_C +#endif + +/*! \brief The current version of dlpack */ +#define DLPACK_VERSION 70 + +/*! \brief The current ABI version of dlpack */ +#define DLPACK_ABI_VERSION 1 + +/*! \brief DLPACK_DLL prefix for windows */ +#ifdef _WIN32 +#ifdef DLPACK_EXPORTS +#define DLPACK_DLL __declspec(dllexport) +#else +#define DLPACK_DLL __declspec(dllimport) +#endif +#else +#define DLPACK_DLL +#endif + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +/*! + * \brief The device type in DLDevice. + */ +#ifdef __cplusplus +typedef enum : int32_t { +#else +typedef enum { +#endif + /*! \brief CPU device */ + kDLCPU = 1, + /*! \brief CUDA GPU device */ + kDLCUDA = 2, + /*! + * \brief Pinned CUDA CPU memory by cudaMallocHost + */ + kDLCUDAHost = 3, + /*! \brief OpenCL devices. */ + kDLOpenCL = 4, + /*! \brief Vulkan buffer for next generation graphics. */ + kDLVulkan = 7, + /*! \brief Metal for Apple GPU. */ + kDLMetal = 8, + /*! \brief Verilog simulator buffer */ + kDLVPI = 9, + /*! \brief ROCm GPUs for AMD GPUs */ + kDLROCM = 10, + /*! + * \brief Pinned ROCm CPU memory allocated by hipMallocHost + */ + kDLROCMHost = 11, + /*! + * \brief Reserved extension device type, + * used for quickly test extension device + * The semantics can differ depending on the implementation. + */ + kDLExtDev = 12, + /*! + * \brief CUDA managed/unified memory allocated by cudaMallocManaged + */ + kDLCUDAManaged = 13, + /*! + * \brief Unified shared memory allocated on a oneAPI non-partititioned + * device. Call to oneAPI runtime is required to determine the device + * type, the USM allocation type and the sycl context it is bound to. + * + */ + kDLOneAPI = 14, + /*! \brief GPU support for next generation WebGPU standard. */ + kDLWebGPU = 15, + /*! \brief Qualcomm Hexagon DSP */ + kDLHexagon = 16, +} DLDeviceType; + +/*! + * \brief A Device for Tensor and operator. + */ +// NB: This is the only difference from +// https://github.com/dmlc/dlpack/blob/v0.7/include/dlpack/dlpack.h Required to +// allow forward declaration of DLDevice. +typedef struct DLDevice_ { + /*! \brief The device type used in the device. */ + DLDeviceType device_type; + /*! + * \brief The device index. + * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0. + */ + int32_t device_id; +} DLDevice; + +/*! + * \brief The type code options DLDataType. + */ +typedef enum { + /*! \brief signed integer */ + kDLInt = 0U, + /*! \brief unsigned integer */ + kDLUInt = 1U, + /*! \brief IEEE floating point */ + kDLFloat = 2U, + /*! + * \brief Opaque handle type, reserved for testing purposes. + * Frameworks need to agree on the handle data type for the exchange to be + * well-defined. + */ + kDLOpaqueHandle = 3U, + /*! \brief bfloat16 */ + kDLBfloat = 4U, + /*! + * \brief complex number + * (C/C++/Python layout: compact struct per complex number) + */ + kDLComplex = 5U, +} DLDataTypeCode; + +/*! + * \brief The data type the tensor can hold. The data type is assumed to follow + * the native endian-ness. An explicit error message should be raised when + * attempting to export an array with non-native endianness + * + * Examples + * - float: type_code = 2, bits = 32, lanes=1 + * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4 + * - int8: type_code = 0, bits = 8, lanes=1 + * - std::complex: type_code = 5, bits = 64, lanes = 1 + */ +typedef struct { + /*! + * \brief Type code of base types. + * We keep it uint8_t instead of DLDataTypeCode for minimal memory + * footprint, but the value should be one of DLDataTypeCode enum values. + * */ + uint8_t code; + /*! + * \brief Number of bits, common choices are 8, 16, 32. + */ + uint8_t bits; + /*! \brief Number of lanes in the type, used for vector types. */ + uint16_t lanes; +} DLDataType; + +/*! + * \brief Plain C Tensor object, does not manage memory. + */ +typedef struct { + /*! + * \brief The data pointer points to the allocated data. This will be CUDA + * device pointer or cl_mem handle in OpenCL. It may be opaque on some device + * types. This pointer is always aligned to 256 bytes as in CUDA. The + * `byte_offset` field should be used to point to the beginning of the data. + * + * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow, + * TVM, perhaps others) do not adhere to this 256 byte aligment requirement + * on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed + * (after which this note will be updated); at the moment it is recommended + * to not rely on the data pointer being correctly aligned. + * + * For given DLTensor, the size of memory required to store the contents of + * data is calculated as follows: + * + * \code{.c} + * static inline size_t GetDataSize(const DLTensor* t) { + * size_t size = 1; + * for (tvm_index_t i = 0; i < t->ndim; ++i) { + * size *= t->shape[i]; + * } + * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8; + * return size; + * } + * \endcode + */ + void* data; + /*! \brief The device of the tensor */ + DLDevice device; + /*! \brief Number of dimensions */ + int32_t ndim; + /*! \brief The data type of the pointer*/ + DLDataType dtype; + /*! \brief The shape of the tensor */ + int64_t* shape; + /*! + * \brief strides of the tensor (in number of elements, not bytes) + * can be NULL, indicating tensor is compact and row-majored. + */ + int64_t* strides; + /*! \brief The offset in bytes to the beginning pointer to data */ + uint64_t byte_offset; +} DLTensor; + +/*! + * \brief C Tensor object, manage memory of DLTensor. This data structure is + * intended to facilitate the borrowing of DLTensor by another framework. It is + * not meant to transfer the tensor. When the borrowing framework doesn't need + * the tensor, it should call the deleter to notify the host that the resource + * is no longer needed. + */ +typedef struct DLManagedTensor { + /*! \brief DLTensor which is being memory managed */ + DLTensor dl_tensor; + /*! \brief the context of the original host framework of DLManagedTensor in + * which DLManagedTensor is used in the framework. It can also be NULL. + */ + void* manager_ctx; + /*! \brief Destructor signature void (*)(void*) - this should be called + * to destruct manager_ctx which holds the DLManagedTensor. It can be NULL + * if there is no way for the caller to provide a reasonable destructor. + * The destructors deletes the argument self as well. + */ + void (*deleter)(struct DLManagedTensor* self); +} DLManagedTensor; +#ifdef __cplusplus +} // DLPACK_EXTERN_C +#endif +#endif // DLPACK_DLPACK_H_ diff --git a/simtbx/diffBragg/src/diffBragg_ext.cpp b/simtbx/diffBragg/src/diffBragg_ext.cpp index 47d500ca77..35eb280737 100644 --- a/simtbx/diffBragg/src/diffBragg_ext.cpp +++ b/simtbx/diffBragg/src/diffBragg_ext.cpp @@ -4,6 +4,7 @@ #include #include #include +#include using namespace boost::python; namespace simtbx{ @@ -512,6 +513,203 @@ namespace boost_python { namespace { Kokkos::initialize(Kokkos::InitializationSettings() .set_device_id(dev)); } + + void PrintDLTensorParameters( PyObject* capsule ) { + auto tensor = static_cast(PyCapsule_GetPointer(capsule, "dltensor")); + if (tensor == nullptr) { + std::cerr << "The input DLTensor is null." << std::endl; + return; + } + + // Print the number of dimensions. + std::cout << "Number of dimensions (ndim): " << tensor->ndim << std::endl; + + // Print the shape of the tensor. + std::cout << "Shape: ["; + for (int i = 0; i < tensor->ndim; ++i) { + std::cout << tensor->shape[i]; + if (i < tensor->ndim - 1) { + std::cout << ", "; + } + } + std::cout << "]" << std::endl; + + // Print the data type of the tensor. + std::cout << "Data type (dtype): "; + switch (tensor->dtype.code) { + case kDLInt: std::cout << "Int"; break; + case kDLUInt: std::cout << "UInt"; break; + case kDLFloat: std::cout << "Float"; break; + case kDLBfloat: std::cout << "BFloat"; break; + default: std::cout << "Unknown"; break; + } + std::cout << " (" << int(tensor->dtype.bits) << " bits, " << tensor->dtype.lanes << " lanes)" << std::endl; + + // Print the device context (device type and device id). + std::cout << "Device context: "; + switch (tensor->device.device_type) { + case kDLCPU: std::cout << "CPU"; break; + case kDLCUDA: std::cout << "CUDA"; break; + case kDLCUDAHost: std::cout << "CUDA Host"; break; + case kDLOpenCL: std::cout << "OpenCL"; break; + case kDLVulkan: std::cout << "Vulkan"; break; + case kDLMetal: std::cout << "Metal"; break; + case kDLVPI: std::cout << "VPI"; break; + case kDLROCM: std::cout << "ROCM"; break; + default: std::cout << "Unknown"; break; + } + std::cout << ", Device ID: " << tensor->device.device_id << std::endl; + + // Print the strides of the tensor, if available. + if (tensor->strides != nullptr) { + std::cout << "Strides: ["; + for (int i = 0; i < tensor->ndim; ++i) { + std::cout << tensor->strides[i]; + if (i < tensor->ndim - 1) { + std::cout << ", "; + } + } + std::cout << "]" << std::endl; + } else { + std::cout << "Strides: [Contiguous in memory]" << std::endl; + } + + // Print the byte offset of the tensor data. + std::cout << "Byte Offset: " << tensor->byte_offset << std::endl; +} + + template + class KokkosViewToDLPack { + public: + KokkosViewToDLPack(ViewType view) : view_(view) {} + + torch::Tensor convertToDLPack() { + // Convert the Kokkos view to DLPack + DLManagedTensor* dlpackTensor = convertToDLPack(); + + // Convert the DLPack tensor to PyTorch + torch::Tensor tensor = torch::from_dlpack(dlpackTensor); + + // Free the DLPack tensor memory + delete[] dlpackTensor->dl_tensor.shape; + delete dlpackTensor; + + return tensor; + } + + private: + ViewType view_; + + DLManagedTensor* convertToDLPack() { + // Get the Kokkos view size and dimensions + size_t numDims = ViewType::rank; + size_t* shape = new size_t[numDims]; + for (size_t i = 0; i < numDims; i++) { + shape[i] = view_.extent(i); + } + + // Create a DLPack tensor + DLManagedTensor* dlpackTensor = new DLManagedTensor; + dlpackTensor->dl_tensor.data = view_.data(); + dlpackTensor->dl_tensor.ctx = const_cast(view_.impl_map().template device_data()); + dlpackTensor->dl_tensor.ndim = numDims; + dlpackTensor->dl_tensor.dtype = getDLPackDataType(); + dlpackTensor->dl_tensor.shape = shape; + dlpackTensor->dl_tensor.strides = nullptr; + dlpackTensor->dl_tensor.byte_offset = 0; + dlpackTensor->manager_ctx = nullptr; + dlpackTensor->deleter = [](DLManagedTensor* tensor) { delete[] tensor->dl_tensor.shape; }; + + return dlpackTensor; + } + + DLDataType getDLPackDataType() { + DLDataType dtype; + dtype.code = getDLPackTypeCode(); + dtype.bits = sizeof(typename ViewType::value_type) * 8; + dtype.lanes = 1; + return dtype; + } + + DLDataTypeCode getDLPackTypeCode() { + using ValueType = typename ViewType::value_type; + if (std::is_same::value) { + return kDLFloat; + } else if (std::is_same::value) { + return kDLFloat; + } else if (std::is_same::value) { + return kDLInt; + } else if (std::is_same::value) { + return kDLUInt; + } else if (std::is_same::value) { + return kDLBool; + } else { + // Unsupported data type + throw std::runtime_error("Unsupported data type for DLPack conversion"); + } + } + }; + + +struct DLPackAPI { + + double container[50]; + + PyObject* dlpack() { + // Get the Kokkos view size and dimensions + size_t numDims = 1; + int64_t* shape = new int64_t[numDims]; + for (size_t i = 0; i < numDims; i++) { + shape[i] = 50; + } + + // Create a DLPack tensor + DLManagedTensor* dlpackTensor = new DLManagedTensor; + dlpackTensor->dl_tensor.data = static_cast(&container); + dlpackTensor->dl_tensor.device.device_type = DLDeviceType::kDLCPU; + dlpackTensor->dl_tensor.device.device_id = 0; + dlpackTensor->dl_tensor.ndim = numDims; + dlpackTensor->dl_tensor.dtype = getDLPackDataType(); + dlpackTensor->dl_tensor.shape = shape; + dlpackTensor->dl_tensor.strides = nullptr; + dlpackTensor->dl_tensor.byte_offset = 0; + dlpackTensor->manager_ctx = nullptr; + dlpackTensor->deleter = [](DLManagedTensor* tensor) { + std::cout << "Blob" << std::endl; + delete[] tensor->dl_tensor.shape; + }; + + // Create a PyCapsule with the DLPack tensor + PyObject* capsule = PyCapsule_New(dlpackTensor, "dltensor", nullptr); + + return capsule; + } + + DLDataType getDLPackDataType() { + DLDataType dtype; + dtype.code = kDLFloat; + dtype.bits = sizeof(double) * 8; + dtype.lanes = 1; + return dtype; + } + + void print_hello() { + std::cout << "Hello Python!" << std::endl; + } + + void print_container() { + std::cout << "C = [ "; + for (int i=0; i<50; i++) { + std::cout << container[i] << " "; + } + std::cout << "]" << std::endl; + } + + boost::python::tuple dlpack_device() { + return boost::python::make_tuple(static_cast(DLDeviceType::kDLCPU), 0); + } + + }; #endif void diffBragg_init_module() { @@ -529,8 +727,19 @@ namespace boost_python { namespace { def("initialize_kokkos", initialize_kokkos, "the sole argument `dev` (an int from 0 to Ngpu-1) is passed to Kokkos::initialize()"); + + def("print_dlpack",PrintDLTensorParameters,"Print information about a dlpack"); + + class_("KokkosViewToDLPack", no_init) + .def(init("DLPack init")) + .def("__dlpack__", &DLPackAPI::dlpack, "Part of DLPack API") + .def("__dlpack_device__", &DLPackAPI::dlpack_device, "Part of DLPack API") + .def("hello", &DLPackAPI::print_hello, "Dummy test function") + .def("print", &DLPackAPI::print_container, "Print container") + ; #endif + class_ > ("diffBragg", no_init) /* constructor that takes a dxtbx detector and beam model */ From ffc0ce1987f9a19dc0fae13c1bed2351d6d7b81c Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Mon, 12 Feb 2024 15:19:11 -0800 Subject: [PATCH 02/17] DLPack interface trial for diffBragg --- kokkostbx/kokkos_dlpack.h | 167 +++++++++++++++++++++++ simtbx/diffBragg/src/diffBragg.cpp | 10 ++ simtbx/diffBragg/src/diffBragg.h | 4 +- simtbx/diffBragg/src/diffBraggKOKKOS.cpp | 4 + simtbx/diffBragg/src/diffBraggKOKKOS.h | 3 + simtbx/diffBragg/src/diffBragg_ext.cpp | 137 ++++++++++--------- 6 files changed, 257 insertions(+), 68 deletions(-) create mode 100644 kokkostbx/kokkos_dlpack.h diff --git a/kokkostbx/kokkos_dlpack.h b/kokkostbx/kokkos_dlpack.h new file mode 100644 index 0000000000..1f47a31220 --- /dev/null +++ b/kokkostbx/kokkos_dlpack.h @@ -0,0 +1,167 @@ +#ifndef KOKKOS_DLPACK_H +#define KOKKOS_DLPACK_H +#include +#include + +namespace kokkostbx { + +template +DLDataTypeCode getDLPackTypeCode() { + using ValueType = typename Kokkos::View::value_type; + if (std::is_same::value) { + return kDLFloat; + } else if (std::is_same::value) { + return kDLFloat; + } else if (std::is_same::value) { + return kDLInt; + } else if (std::is_same::value) { + return kDLUInt; + // } else if (std::is_same::value) { + // return kDLBool; + } else { + // Unsupported data type + throw std::runtime_error("Unsupported data type for DLPack conversion"); + } +} + +template +DLDataType getDLPackDataType() { + DLDataType dtype; + dtype.code = getDLPackTypeCode(); + dtype.bits = sizeof(typename Kokkos::View::value_type) * 8; + dtype.lanes = 1; + return dtype; +} + +template +DLDevice getDLPackDevice() { + DLDevice dl_device; + if (std::is_same::value) { + dl_device = {kDLCPU, 0}; + } +#ifdef KOKKOS_ENABLE_CUDA + else if (std::is_same::value) { + dl_device = {kDLCUDA, 0}; + } else if (std::is_same::value) { + dl_device = {kDLCUDAManaged, 0}; + } else if (std::is_same::value) { + dl_device = {kDLCUDAHost, 0}; + } +#endif +#ifdef KOKKOS_ENABLE_HIP + else if (std::is_same::value) { + dl_device = {kDLROCM, 0}; + } else if (std::is_same::value) { + dl_device = {kDLROCMHost, 0}; + } +#endif + else { + // Extend to other device types as needed + throw std::runtime_error("Unsupported Kokkos device type for DLPack conversion."); + } + return dl_device; +} + +template +DLManagedTensor* view_to_dlpack(Kokkos::View& view) { + // Get the Kokkos view size and dimensions + constexpr size_t numDims = Kokkos::View::rank; + int64_t* shape = new int64_t[numDims]; + for (size_t i = 0; i < numDims; i++) { + shape[i] = view.extent(i); + } + + // Create a DLPack tensor + DLManagedTensor* dlpackTensor = new DLManagedTensor; + dlpackTensor->dl_tensor.data = view.data(); + // dlpackTensor->dl_tensor.device.device_type = DLDeviceType::kDLCPU; + // dlpackTensor->dl_tensor.device.device_id = 0; + dlpackTensor->dl_tensor.device = getDLPackDevice(); + dlpackTensor->dl_tensor.ndim = numDims; + dlpackTensor->dl_tensor.dtype = getDLPackDataType(); + dlpackTensor->dl_tensor.shape = shape; + dlpackTensor->dl_tensor.strides = nullptr; + dlpackTensor->dl_tensor.byte_offset = 0; + dlpackTensor->manager_ctx = nullptr; + dlpackTensor->deleter = [](DLManagedTensor* tensor) { + std::cout << "Blob" << std::endl; + delete[] tensor->dl_tensor.shape; + }; + return dlpackTensor; +} + +// template +// class KokkosViewToDLPack { +// public: +// KokkosViewToDLPack(ViewType view) : view_(view) {} + +// torch::Tensor convertToDLPack() { +// // Convert the Kokkos view to DLPack +// DLManagedTensor* dlpackTensor = convertToDLPack(); + +// // Convert the DLPack tensor to PyTorch +// torch::Tensor tensor = torch::from_dlpack(dlpackTensor); + +// // Free the DLPack tensor memory +// delete[] dlpackTensor->dl_tensor.shape; +// delete dlpackTensor; + +// return tensor; +// } + +// private: +// ViewType view_; + +// DLManagedTensor* convertToDLPack() { +// // Get the Kokkos view size and dimensions +// size_t numDims = ViewType::rank; +// size_t* shape = new size_t[numDims]; +// for (size_t i = 0; i < numDims; i++) { +// shape[i] = view_.extent(i); +// } + +// // Create a DLPack tensor +// DLManagedTensor* dlpackTensor = new DLManagedTensor; +// dlpackTensor->dl_tensor.data = view_.data(); +// dlpackTensor->dl_tensor.ctx = const_cast(view_.impl_map().template device_data()); +// dlpackTensor->dl_tensor.ndim = numDims; +// dlpackTensor->dl_tensor.dtype = getDLPackDataType(); +// dlpackTensor->dl_tensor.shape = shape; +// dlpackTensor->dl_tensor.strides = nullptr; +// dlpackTensor->dl_tensor.byte_offset = 0; +// dlpackTensor->manager_ctx = nullptr; +// dlpackTensor->deleter = [](DLManagedTensor* tensor) { delete[] tensor->dl_tensor.shape; }; + +// return dlpackTensor; +// } + +// DLDataType getDLPackDataType() { +// DLDataType dtype; +// dtype.code = getDLPackTypeCode(); +// dtype.bits = sizeof(typename ViewType::value_type) * 8; +// dtype.lanes = 1; +// return dtype; +// } + +// DLDataTypeCode getDLPackTypeCode() { +// using ValueType = typename ViewType::value_type; +// if (std::is_same::value) { +// return kDLFloat; +// } else if (std::is_same::value) { +// return kDLFloat; +// } else if (std::is_same::value) { +// return kDLInt; +// } else if (std::is_same::value) { +// return kDLUInt; +// } else if (std::is_same::value) { +// return kDLBool; +// } else { +// // Unsupported data type +// throw std::runtime_error("Unsupported data type for DLPack conversion"); +// } +// } +// }; + +} + +#endif // KOKKOS_DLPACK_H diff --git a/simtbx/diffBragg/src/diffBragg.cpp b/simtbx/diffBragg/src/diffBragg.cpp index a42cc5b960..51988d2bdc 100644 --- a/simtbx/diffBragg/src/diffBragg.cpp +++ b/simtbx/diffBragg/src/diffBragg.cpp @@ -1507,6 +1507,16 @@ boost::python::tuple diffBragg::get_ncells_derivative_pixels(){ return derivative_pixels; } +#ifdef DIFFBRAGG_HAVE_KOKKOS +PyObject* diffBragg::get_d_Ncells_images() { + + if (diffBragg_runner == nullptr) { + return nullptr; + } + return PyCapsule_New(diffBragg_runner->get_d_Ncells_images(), "dltensor", nullptr); +} +#endif + boost::python::tuple diffBragg::get_diffuse_gamma_derivative_pixels(){ SCITBX_ASSERT(db_flags.refine_diffuse); int Npix_total = first_deriv_imgs.diffuse_gamma.size() / 3; diff --git a/simtbx/diffBragg/src/diffBragg.h b/simtbx/diffBragg/src/diffBragg.h index c26d0d019e..c27567ca03 100644 --- a/simtbx/diffBragg/src/diffBragg.h +++ b/simtbx/diffBragg/src/diffBragg.h @@ -170,7 +170,6 @@ class diffBragg: public nanoBragg{ inline void kokkos_free() { diffBragg_runner.reset(); } // allocate when needed to avoid problems with kokkos initialization when cuda/kokkos isn't used std::shared_ptr diffBragg_runner{}; - // diffBraggKOKKOS diffBragg_runner; #endif inline void gpu_free(){ @@ -238,6 +237,9 @@ class diffBragg: public nanoBragg{ af::flex_double get_raw_pixels_roi(); boost::python::tuple get_fp_fdp_derivative_pixels(); boost::python::tuple get_ncells_derivative_pixels(); +#ifdef DIFFBRAGG_HAVE_KOKKOS + PyObject* get_d_Ncells_images(); +#endif boost::python::tuple get_diffuse_gamma_derivative_pixels(); boost::python::tuple get_diffuse_sigma_derivative_pixels(); boost::python::tuple get_ncells_def_derivative_pixels(); diff --git a/simtbx/diffBragg/src/diffBraggKOKKOS.cpp b/simtbx/diffBragg/src/diffBraggKOKKOS.cpp index 273088f765..3420469ed8 100644 --- a/simtbx/diffBragg/src/diffBraggKOKKOS.cpp +++ b/simtbx/diffBragg/src/diffBraggKOKKOS.cpp @@ -43,6 +43,10 @@ uint32_t combine_refinement_flags(flags& db_flags) { return refine_flag; } +DLManagedTensor* diffBraggKOKKOS::get_d_Ncells_images() { + return kokkostbx::view_to_dlpack(m_d_Ncells_images); +} + void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( int Npix_to_model, std::vector& panels_fasts_slows, diff --git a/simtbx/diffBragg/src/diffBraggKOKKOS.h b/simtbx/diffBragg/src/diffBraggKOKKOS.h index 3aa07ae27b..a88ef5d4d3 100644 --- a/simtbx/diffBragg/src/diffBraggKOKKOS.h +++ b/simtbx/diffBragg/src/diffBraggKOKKOS.h @@ -7,6 +7,7 @@ #include "kokkostbx/kokkos_types.h" #include "kokkostbx/kokkos_utils.h" +#include "kokkostbx/kokkos_dlpack.h" #include "simtbx/diffBragg/src/util.h" #include "simtbx/diffBragg/src/util_kokkos.h" #include "simtbx/diffBragg/src/diffBragg_refine_flag.h" @@ -147,6 +148,8 @@ class diffBraggKOKKOS { cuda_flags& db_cu_flags, // diffBragg_kokkosPointers& kp, timer_variables& TIMERS); + + DLManagedTensor* get_d_Ncells_images(); }; #endif diff --git a/simtbx/diffBragg/src/diffBragg_ext.cpp b/simtbx/diffBragg/src/diffBragg_ext.cpp index 35eb280737..43906ce078 100644 --- a/simtbx/diffBragg/src/diffBragg_ext.cpp +++ b/simtbx/diffBragg/src/diffBragg_ext.cpp @@ -578,77 +578,77 @@ namespace boost_python { namespace { std::cout << "Byte Offset: " << tensor->byte_offset << std::endl; } - template - class KokkosViewToDLPack { - public: - KokkosViewToDLPack(ViewType view) : view_(view) {} + // template + // class KokkosViewToDLPack { + // public: + // KokkosViewToDLPack(ViewType view) : view_(view) {} - torch::Tensor convertToDLPack() { - // Convert the Kokkos view to DLPack - DLManagedTensor* dlpackTensor = convertToDLPack(); + // torch::Tensor convertToDLPack() { + // // Convert the Kokkos view to DLPack + // DLManagedTensor* dlpackTensor = convertToDLPack(); - // Convert the DLPack tensor to PyTorch - torch::Tensor tensor = torch::from_dlpack(dlpackTensor); + // // Convert the DLPack tensor to PyTorch + // torch::Tensor tensor = torch::from_dlpack(dlpackTensor); - // Free the DLPack tensor memory - delete[] dlpackTensor->dl_tensor.shape; - delete dlpackTensor; + // // Free the DLPack tensor memory + // delete[] dlpackTensor->dl_tensor.shape; + // delete dlpackTensor; - return tensor; - } + // return tensor; + // } - private: - ViewType view_; - - DLManagedTensor* convertToDLPack() { - // Get the Kokkos view size and dimensions - size_t numDims = ViewType::rank; - size_t* shape = new size_t[numDims]; - for (size_t i = 0; i < numDims; i++) { - shape[i] = view_.extent(i); - } - - // Create a DLPack tensor - DLManagedTensor* dlpackTensor = new DLManagedTensor; - dlpackTensor->dl_tensor.data = view_.data(); - dlpackTensor->dl_tensor.ctx = const_cast(view_.impl_map().template device_data()); - dlpackTensor->dl_tensor.ndim = numDims; - dlpackTensor->dl_tensor.dtype = getDLPackDataType(); - dlpackTensor->dl_tensor.shape = shape; - dlpackTensor->dl_tensor.strides = nullptr; - dlpackTensor->dl_tensor.byte_offset = 0; - dlpackTensor->manager_ctx = nullptr; - dlpackTensor->deleter = [](DLManagedTensor* tensor) { delete[] tensor->dl_tensor.shape; }; - - return dlpackTensor; - } - - DLDataType getDLPackDataType() { - DLDataType dtype; - dtype.code = getDLPackTypeCode(); - dtype.bits = sizeof(typename ViewType::value_type) * 8; - dtype.lanes = 1; - return dtype; - } - - DLDataTypeCode getDLPackTypeCode() { - using ValueType = typename ViewType::value_type; - if (std::is_same::value) { - return kDLFloat; - } else if (std::is_same::value) { - return kDLFloat; - } else if (std::is_same::value) { - return kDLInt; - } else if (std::is_same::value) { - return kDLUInt; - } else if (std::is_same::value) { - return kDLBool; - } else { - // Unsupported data type - throw std::runtime_error("Unsupported data type for DLPack conversion"); - } - } - }; + // private: + // ViewType view_; + + // DLManagedTensor* convertToDLPack() { + // // Get the Kokkos view size and dimensions + // size_t numDims = ViewType::rank; + // size_t* shape = new size_t[numDims]; + // for (size_t i = 0; i < numDims; i++) { + // shape[i] = view_.extent(i); + // } + + // // Create a DLPack tensor + // DLManagedTensor* dlpackTensor = new DLManagedTensor; + // dlpackTensor->dl_tensor.data = view_.data(); + // dlpackTensor->dl_tensor.ctx = const_cast(view_.impl_map().template device_data()); + // dlpackTensor->dl_tensor.ndim = numDims; + // dlpackTensor->dl_tensor.dtype = getDLPackDataType(); + // dlpackTensor->dl_tensor.shape = shape; + // dlpackTensor->dl_tensor.strides = nullptr; + // dlpackTensor->dl_tensor.byte_offset = 0; + // dlpackTensor->manager_ctx = nullptr; + // dlpackTensor->deleter = [](DLManagedTensor* tensor) { delete[] tensor->dl_tensor.shape; }; + + // return dlpackTensor; + // } + + // DLDataType getDLPackDataType() { + // DLDataType dtype; + // dtype.code = getDLPackTypeCode(); + // dtype.bits = sizeof(typename ViewType::value_type) * 8; + // dtype.lanes = 1; + // return dtype; + // } + + // DLDataTypeCode getDLPackTypeCode() { + // using ValueType = typename ViewType::value_type; + // if (std::is_same::value) { + // return kDLFloat; + // } else if (std::is_same::value) { + // return kDLFloat; + // } else if (std::is_same::value) { + // return kDLInt; + // } else if (std::is_same::value) { + // return kDLUInt; + // } else if (std::is_same::value) { + // return kDLBool; + // } else { + // // Unsupported data type + // throw std::runtime_error("Unsupported data type for DLPack conversion"); + // } + // } + // }; struct DLPackAPI { @@ -710,6 +710,7 @@ struct DLPackAPI { } }; + #endif void diffBragg_init_module() { @@ -730,6 +731,8 @@ struct DLPackAPI { def("print_dlpack",PrintDLTensorParameters,"Print information about a dlpack"); + // def("get_d_Ncells_images", &get_dlpack, "Return DLPackTensor for d_Ncells_images; pot. on GPU") + class_("KokkosViewToDLPack", no_init) .def(init("DLPack init")) .def("__dlpack__", &DLPackAPI::dlpack, "Part of DLPack API") @@ -787,7 +790,7 @@ struct DLPackAPI { .def("set_ncells_values", &simtbx::nanoBragg::diffBragg::set_ncells_values, "set Ncells values as a 3-tuple (Na, Nb, Nc)") .def("get_ncells_values", &simtbx::nanoBragg::diffBragg::get_ncells_values, "get Ncells values as a 3-tuple (Na, Nb, Nc)") - + .def("get_d_Ncells_images", &simtbx::nanoBragg::diffBragg::get_d_Ncells_images, "get DLPackTensor for d_Ncells_images; pot. on GPU") .def("add_diffBragg_spots_full", &simtbx::nanoBragg::diffBragg::add_diffBragg_spots_full, "forward model and gradients at every pixel") From 59f5d5d78e9d629e0fdfa1d7ebb7ffb9c685b0e6 Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Wed, 14 Feb 2024 11:19:27 -0800 Subject: [PATCH 03/17] Fix missing destructor --- simtbx/diffBragg/src/diffBragg_ext.cpp | 87 +++++--------------------- 1 file changed, 15 insertions(+), 72 deletions(-) diff --git a/simtbx/diffBragg/src/diffBragg_ext.cpp b/simtbx/diffBragg/src/diffBragg_ext.cpp index 43906ce078..1245730aa7 100644 --- a/simtbx/diffBragg/src/diffBragg_ext.cpp +++ b/simtbx/diffBragg/src/diffBragg_ext.cpp @@ -578,78 +578,21 @@ namespace boost_python { namespace { std::cout << "Byte Offset: " << tensor->byte_offset << std::endl; } - // template - // class KokkosViewToDLPack { - // public: - // KokkosViewToDLPack(ViewType view) : view_(view) {} - - // torch::Tensor convertToDLPack() { - // // Convert the Kokkos view to DLPack - // DLManagedTensor* dlpackTensor = convertToDLPack(); - - // // Convert the DLPack tensor to PyTorch - // torch::Tensor tensor = torch::from_dlpack(dlpackTensor); - - // // Free the DLPack tensor memory - // delete[] dlpackTensor->dl_tensor.shape; - // delete dlpackTensor; - - // return tensor; - // } - - // private: - // ViewType view_; - - // DLManagedTensor* convertToDLPack() { - // // Get the Kokkos view size and dimensions - // size_t numDims = ViewType::rank; - // size_t* shape = new size_t[numDims]; - // for (size_t i = 0; i < numDims; i++) { - // shape[i] = view_.extent(i); - // } - - // // Create a DLPack tensor - // DLManagedTensor* dlpackTensor = new DLManagedTensor; - // dlpackTensor->dl_tensor.data = view_.data(); - // dlpackTensor->dl_tensor.ctx = const_cast(view_.impl_map().template device_data()); - // dlpackTensor->dl_tensor.ndim = numDims; - // dlpackTensor->dl_tensor.dtype = getDLPackDataType(); - // dlpackTensor->dl_tensor.shape = shape; - // dlpackTensor->dl_tensor.strides = nullptr; - // dlpackTensor->dl_tensor.byte_offset = 0; - // dlpackTensor->manager_ctx = nullptr; - // dlpackTensor->deleter = [](DLManagedTensor* tensor) { delete[] tensor->dl_tensor.shape; }; - - // return dlpackTensor; - // } - - // DLDataType getDLPackDataType() { - // DLDataType dtype; - // dtype.code = getDLPackTypeCode(); - // dtype.bits = sizeof(typename ViewType::value_type) * 8; - // dtype.lanes = 1; - // return dtype; - // } - - // DLDataTypeCode getDLPackTypeCode() { - // using ValueType = typename ViewType::value_type; - // if (std::is_same::value) { - // return kDLFloat; - // } else if (std::is_same::value) { - // return kDLFloat; - // } else if (std::is_same::value) { - // return kDLInt; - // } else if (std::is_same::value) { - // return kDLUInt; - // } else if (std::is_same::value) { - // return kDLBool; - // } else { - // // Unsupported data type - // throw std::runtime_error("Unsupported data type for DLPack conversion"); - // } - // } - // }; +void dlpack_destructor(PyObject* capsule) { + if (!PyCapsule_IsValid(capsule, "dltensor")) { + std::cout << "Muh0 " << PyCapsule_GetPointer(capsule, "used_dltensor") << std::endl; + return; + } + // If the capsule has not been used, we need to delete it + std::cout << "Muh1" << std::endl; + DLManagedTensor* dlpackTensor = static_cast(PyCapsule_GetPointer(capsule, "dltensor")); + std::cout << "Muh2" << std::endl; + dlpackTensor->deleter(dlpackTensor); + std::cout << "Muh3" << std::endl; + delete dlpackTensor; + std::cout << "Muh4" << std::endl; +} struct DLPackAPI { @@ -680,7 +623,7 @@ struct DLPackAPI { }; // Create a PyCapsule with the DLPack tensor - PyObject* capsule = PyCapsule_New(dlpackTensor, "dltensor", nullptr); + PyObject* capsule = PyCapsule_New(dlpackTensor, "dltensor", dlpack_destructor); return capsule; } From e1085661de1af65d07652fed0eb64373d9dadc99 Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Thu, 15 Feb 2024 10:21:49 -0800 Subject: [PATCH 04/17] Fix device id --- kokkostbx/kokkos_dlpack.h | 91 +++--------------------------- simtbx/diffBragg/src/diffBragg.cpp | 28 ++++++++- 2 files changed, 35 insertions(+), 84 deletions(-) diff --git a/kokkostbx/kokkos_dlpack.h b/kokkostbx/kokkos_dlpack.h index 1f47a31220..eb00e79fda 100644 --- a/kokkostbx/kokkos_dlpack.h +++ b/kokkostbx/kokkos_dlpack.h @@ -35,31 +35,31 @@ DLDataType getDLPackDataType() { template DLDevice getDLPackDevice() { - DLDevice dl_device; + const int device_id = std::max(0, Kokkos::device_id()); // convert host id from -1 to 0 + if (std::is_same::value) { - dl_device = {kDLCPU, 0}; + return {kDLCPU, device_id}; } #ifdef KOKKOS_ENABLE_CUDA else if (std::is_same::value) { - dl_device = {kDLCUDA, 0}; + return {kDLCUDA, device_id}; } else if (std::is_same::value) { - dl_device = {kDLCUDAManaged, 0}; + return {kDLCUDAManaged, device_id}; } else if (std::is_same::value) { - dl_device = {kDLCUDAHost, 0}; + return {kDLCUDAHost, device_id}; } #endif #ifdef KOKKOS_ENABLE_HIP else if (std::is_same::value) { - dl_device = {kDLROCM, 0}; + return {kDLROCM, device_id}; } else if (std::is_same::value) { - dl_device = {kDLROCMHost, 0}; + return {kDLROCMHost, device_id}; } #endif else { // Extend to other device types as needed throw std::runtime_error("Unsupported Kokkos device type for DLPack conversion."); } - return dl_device; } template @@ -74,8 +74,6 @@ DLManagedTensor* view_to_dlpack(Kokkos::View& view) { // Create a DLPack tensor DLManagedTensor* dlpackTensor = new DLManagedTensor; dlpackTensor->dl_tensor.data = view.data(); - // dlpackTensor->dl_tensor.device.device_type = DLDeviceType::kDLCPU; - // dlpackTensor->dl_tensor.device.device_id = 0; dlpackTensor->dl_tensor.device = getDLPackDevice(); dlpackTensor->dl_tensor.ndim = numDims; dlpackTensor->dl_tensor.dtype = getDLPackDataType(); @@ -84,84 +82,11 @@ DLManagedTensor* view_to_dlpack(Kokkos::View& view) { dlpackTensor->dl_tensor.byte_offset = 0; dlpackTensor->manager_ctx = nullptr; dlpackTensor->deleter = [](DLManagedTensor* tensor) { - std::cout << "Blob" << std::endl; delete[] tensor->dl_tensor.shape; }; return dlpackTensor; } -// template -// class KokkosViewToDLPack { -// public: -// KokkosViewToDLPack(ViewType view) : view_(view) {} - -// torch::Tensor convertToDLPack() { -// // Convert the Kokkos view to DLPack -// DLManagedTensor* dlpackTensor = convertToDLPack(); - -// // Convert the DLPack tensor to PyTorch -// torch::Tensor tensor = torch::from_dlpack(dlpackTensor); - -// // Free the DLPack tensor memory -// delete[] dlpackTensor->dl_tensor.shape; -// delete dlpackTensor; - -// return tensor; -// } - -// private: -// ViewType view_; - -// DLManagedTensor* convertToDLPack() { -// // Get the Kokkos view size and dimensions -// size_t numDims = ViewType::rank; -// size_t* shape = new size_t[numDims]; -// for (size_t i = 0; i < numDims; i++) { -// shape[i] = view_.extent(i); -// } - -// // Create a DLPack tensor -// DLManagedTensor* dlpackTensor = new DLManagedTensor; -// dlpackTensor->dl_tensor.data = view_.data(); -// dlpackTensor->dl_tensor.ctx = const_cast(view_.impl_map().template device_data()); -// dlpackTensor->dl_tensor.ndim = numDims; -// dlpackTensor->dl_tensor.dtype = getDLPackDataType(); -// dlpackTensor->dl_tensor.shape = shape; -// dlpackTensor->dl_tensor.strides = nullptr; -// dlpackTensor->dl_tensor.byte_offset = 0; -// dlpackTensor->manager_ctx = nullptr; -// dlpackTensor->deleter = [](DLManagedTensor* tensor) { delete[] tensor->dl_tensor.shape; }; - -// return dlpackTensor; -// } - -// DLDataType getDLPackDataType() { -// DLDataType dtype; -// dtype.code = getDLPackTypeCode(); -// dtype.bits = sizeof(typename ViewType::value_type) * 8; -// dtype.lanes = 1; -// return dtype; -// } - -// DLDataTypeCode getDLPackTypeCode() { -// using ValueType = typename ViewType::value_type; -// if (std::is_same::value) { -// return kDLFloat; -// } else if (std::is_same::value) { -// return kDLFloat; -// } else if (std::is_same::value) { -// return kDLInt; -// } else if (std::is_same::value) { -// return kDLUInt; -// } else if (std::is_same::value) { -// return kDLBool; -// } else { -// // Unsupported data type -// throw std::runtime_error("Unsupported data type for DLPack conversion"); -// } -// } -// }; - } #endif // KOKKOS_DLPACK_H diff --git a/simtbx/diffBragg/src/diffBragg.cpp b/simtbx/diffBragg/src/diffBragg.cpp index 51988d2bdc..3913481057 100644 --- a/simtbx/diffBragg/src/diffBragg.cpp +++ b/simtbx/diffBragg/src/diffBragg.cpp @@ -1508,12 +1508,38 @@ boost::python::tuple diffBragg::get_ncells_derivative_pixels(){ } #ifdef DIFFBRAGG_HAVE_KOKKOS +void dlpack_destructor(PyObject* capsule) { + if (!PyCapsule_IsValid(capsule, "dltensor")) { + return; + } + + // If the capsule has not been used, we need to delete it + DLManagedTensor* dlpackTensor = static_cast(PyCapsule_GetPointer(capsule, "dltensor")); + dlpackTensor->deleter(dlpackTensor); + delete dlpackTensor; +} + +// template +// struct DLPackAPI { +// PyObject* dlpack() { +// if (diffBragg::diffBragg_runner == nullptr) { +// return nullptr; +// } +// return PyCapsule_New(function(), "dltensor", dlpack_destructor); +// } + +// boost::python::tuple dlpack_device() { +// auto device = kokkostbx::getDLPackDevice(); +// return boost::python::make_tuple(static_cast(device.device_type), device.device_id); +// } +// }; + PyObject* diffBragg::get_d_Ncells_images() { if (diffBragg_runner == nullptr) { return nullptr; } - return PyCapsule_New(diffBragg_runner->get_d_Ncells_images(), "dltensor", nullptr); + return PyCapsule_New(diffBragg_runner->get_d_Ncells_images(), "dltensor", dlpack_destructor); } #endif From 8a148357781a8e63cec6fa8ccc3919943be3e555 Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Thu, 15 Feb 2024 16:37:46 -0800 Subject: [PATCH 05/17] Add DLPack API for diffBragg results --- simtbx/diffBragg/src/diffBragg.cpp | 123 ++++++++++++++++++----- simtbx/diffBragg/src/diffBragg.h | 22 ++++ simtbx/diffBragg/src/diffBraggKOKKOS.cpp | 92 ++++++++++++++++- simtbx/diffBragg/src/diffBraggKOKKOS.h | 21 ++++ simtbx/diffBragg/src/diffBragg_ext.cpp | 47 ++++++++- 5 files changed, 276 insertions(+), 29 deletions(-) diff --git a/simtbx/diffBragg/src/diffBragg.cpp b/simtbx/diffBragg/src/diffBragg.cpp index 3913481057..4aa58e692f 100644 --- a/simtbx/diffBragg/src/diffBragg.cpp +++ b/simtbx/diffBragg/src/diffBragg.cpp @@ -7,6 +7,7 @@ #include #include #include +#include "diffBragg.h" namespace np=boost::python::numpy; @@ -1509,37 +1510,111 @@ boost::python::tuple diffBragg::get_ncells_derivative_pixels(){ #ifdef DIFFBRAGG_HAVE_KOKKOS void dlpack_destructor(PyObject* capsule) { - if (!PyCapsule_IsValid(capsule, "dltensor")) { - return; - } + if (!PyCapsule_IsValid(capsule, "dltensor")) { + return; + } - // If the capsule has not been used, we need to delete it - DLManagedTensor* dlpackTensor = static_cast(PyCapsule_GetPointer(capsule, "dltensor")); - dlpackTensor->deleter(dlpackTensor); - delete dlpackTensor; + // If the capsule has not been used, we need to delete it + DLManagedTensor* dlpackTensor = static_cast(PyCapsule_GetPointer(capsule, "dltensor")); + dlpackTensor->deleter(dlpackTensor); + delete dlpackTensor; } -// template -// struct DLPackAPI { -// PyObject* dlpack() { -// if (diffBragg::diffBragg_runner == nullptr) { -// return nullptr; -// } -// return PyCapsule_New(function(), "dltensor", dlpack_destructor); -// } - -// boost::python::tuple dlpack_device() { -// auto device = kokkostbx::getDLPackDevice(); -// return boost::python::make_tuple(static_cast(device.device_type), device.device_id); -// } -// }; - -PyObject* diffBragg::get_d_Ncells_images() { +// Fun with pointer-to-member-functions +PyObject* diffBragg::PyCapsule_Wrapper( DLManagedTensor* (diffBraggKOKKOS::*func)()) { if (diffBragg_runner == nullptr) { return nullptr; } - return PyCapsule_New(diffBragg_runner->get_d_Ncells_images(), "dltensor", dlpack_destructor); + return PyCapsule_New((*diffBragg_runner.*func)(), "dltensor", dlpack_destructor); +} + +PyObject* diffBragg::get_floatimage() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_floatimage); +} + +PyObject* diffBragg::get_wavelenimage() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_wavelenimage); +} + +PyObject* diffBragg::get_d_diffuse_gamma_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_diffuse_gamma_images); +} + +PyObject* diffBragg::get_d_diffuse_sigma_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_diffuse_sigma_images); +} + +PyObject* diffBragg::get_d_Umat_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_Umat_images); +} + +PyObject* diffBragg::get_d2_Umat_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_Umat_images); +} + +PyObject* diffBragg::get_d_Bmat_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_Bmat_images); +} + +PyObject* diffBragg::get_d2_Bmat_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_Bmat_images); +} + +PyObject* diffBragg::get_d_Ncells_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_Ncells_images); +} + +PyObject* diffBragg::get_d2_Ncells_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_Ncells_images); +} + +PyObject* diffBragg::get_d_fcell_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_fcell_images); +} + +PyObject* diffBragg::get_d2_fcell_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_fcell_images); +} + +PyObject* diffBragg::get_d_eta_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_eta_images); +} + +PyObject* diffBragg::get_d2_eta_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_eta_images); +} + +PyObject* diffBragg::get_d_lambda_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_lambda_images); +} + +PyObject* diffBragg::get_d2_lambda_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_lambda_images); +} + +PyObject* diffBragg::get_d_panel_rot_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_panel_rot_images); +} + +PyObject* diffBragg::get_d2_panel_rot_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_panel_rot_images); +} + +PyObject* diffBragg::get_d_panel_orig_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_panel_orig_images); +} + +PyObject* diffBragg::get_d2_panel_orig_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_panel_orig_images); +} + +PyObject* diffBragg::get_d_fp_fdp_images() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_fp_fdp_images); +} + +PyObject* diffBragg::get_Fhkl_scale_deriv() { + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_Fhkl_scale_deriv); } #endif diff --git a/simtbx/diffBragg/src/diffBragg.h b/simtbx/diffBragg/src/diffBragg.h index c27567ca03..599e55a3d0 100644 --- a/simtbx/diffBragg/src/diffBragg.h +++ b/simtbx/diffBragg/src/diffBragg.h @@ -238,7 +238,29 @@ class diffBragg: public nanoBragg{ boost::python::tuple get_fp_fdp_derivative_pixels(); boost::python::tuple get_ncells_derivative_pixels(); #ifdef DIFFBRAGG_HAVE_KOKKOS + PyObject* PyCapsule_Wrapper(DLManagedTensor* (diffBraggKOKKOS::*func)()); + PyObject* get_floatimage(); + PyObject* get_wavelenimage(); + PyObject* get_d_diffuse_gamma_images(); + PyObject* get_d_diffuse_sigma_images(); + PyObject* get_d_Umat_images(); + PyObject* get_d2_Umat_images(); + PyObject* get_d_Bmat_images(); + PyObject* get_d2_Bmat_images(); PyObject* get_d_Ncells_images(); + PyObject* get_d2_Ncells_images(); + PyObject* get_d_fcell_images(); + PyObject* get_d2_fcell_images(); + PyObject* get_d_eta_images(); + PyObject* get_d2_eta_images(); + PyObject* get_d_lambda_images(); + PyObject* get_d2_lambda_images(); + PyObject* get_d_panel_rot_images(); + PyObject* get_d2_panel_rot_images(); + PyObject* get_d_panel_orig_images(); + PyObject* get_d2_panel_orig_images(); + PyObject* get_d_fp_fdp_images(); + PyObject* get_Fhkl_scale_deriv(); #endif boost::python::tuple get_diffuse_gamma_derivative_pixels(); boost::python::tuple get_diffuse_sigma_derivative_pixels(); diff --git a/simtbx/diffBragg/src/diffBraggKOKKOS.cpp b/simtbx/diffBragg/src/diffBraggKOKKOS.cpp index 3420469ed8..016f684d05 100644 --- a/simtbx/diffBragg/src/diffBraggKOKKOS.cpp +++ b/simtbx/diffBragg/src/diffBraggKOKKOS.cpp @@ -43,10 +43,6 @@ uint32_t combine_refinement_flags(flags& db_flags) { return refine_flag; } -DLManagedTensor* diffBraggKOKKOS::get_d_Ncells_images() { - return kokkostbx::view_to_dlpack(m_d_Ncells_images); -} - void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( int Npix_to_model, std::vector& panels_fasts_slows, @@ -643,3 +639,91 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( Kokkos::Tools::popRegion(); } + +DLManagedTensor* diffBraggKOKKOS::get_floatimage() { + return kokkostbx::view_to_dlpack(m_floatimage); +} + +DLManagedTensor* diffBraggKOKKOS::get_wavelenimage() { + return kokkostbx::view_to_dlpack(m_wavelenimage); +} + +DLManagedTensor* diffBraggKOKKOS::get_d_diffuse_gamma_images() { + return kokkostbx::view_to_dlpack(m_d_diffuse_gamma_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d_diffuse_sigma_images() { + return kokkostbx::view_to_dlpack(m_d_diffuse_sigma_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d_Umat_images() { + return kokkostbx::view_to_dlpack(m_d_Umat_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d2_Umat_images() { + return kokkostbx::view_to_dlpack(m_d2_Umat_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d_Bmat_images() { + return kokkostbx::view_to_dlpack(m_d_Bmat_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d2_Bmat_images() { + return kokkostbx::view_to_dlpack(m_d2_Bmat_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d_Ncells_images() { + return kokkostbx::view_to_dlpack(m_d_Ncells_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d2_Ncells_images() { + return kokkostbx::view_to_dlpack(m_d2_Ncells_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d_fcell_images() { + return kokkostbx::view_to_dlpack(m_d_fcell_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d2_fcell_images() { + return kokkostbx::view_to_dlpack(m_d2_fcell_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d_eta_images() { + return kokkostbx::view_to_dlpack(m_d_eta_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d2_eta_images() { + return kokkostbx::view_to_dlpack(m_d2_eta_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d_lambda_images() { + return kokkostbx::view_to_dlpack(m_d_lambda_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d2_lambda_images() { + return kokkostbx::view_to_dlpack(m_d2_lambda_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d_panel_rot_images() { + return kokkostbx::view_to_dlpack(m_d_panel_rot_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d2_panel_rot_images() { + return kokkostbx::view_to_dlpack(m_d2_panel_rot_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d_panel_orig_images() { + return kokkostbx::view_to_dlpack(m_d_panel_orig_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d2_panel_orig_images() { + return kokkostbx::view_to_dlpack(m_d2_panel_orig_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_d_fp_fdp_images() { + return kokkostbx::view_to_dlpack(m_d_fp_fdp_images); +} + +DLManagedTensor* diffBraggKOKKOS::get_Fhkl_scale_deriv() { + return kokkostbx::view_to_dlpack(m_Fhkl_scale_deriv); +} diff --git a/simtbx/diffBragg/src/diffBraggKOKKOS.h b/simtbx/diffBragg/src/diffBraggKOKKOS.h index a88ef5d4d3..78ed67a505 100644 --- a/simtbx/diffBragg/src/diffBraggKOKKOS.h +++ b/simtbx/diffBragg/src/diffBraggKOKKOS.h @@ -149,7 +149,28 @@ class diffBraggKOKKOS { // diffBragg_kokkosPointers& kp, timer_variables& TIMERS); + DLManagedTensor* get_floatimage(); + DLManagedTensor* get_wavelenimage(); + DLManagedTensor* get_d_diffuse_gamma_images(); + DLManagedTensor* get_d_diffuse_sigma_images(); + DLManagedTensor* get_d_Umat_images(); + DLManagedTensor* get_d2_Umat_images(); + DLManagedTensor* get_d_Bmat_images(); + DLManagedTensor* get_d2_Bmat_images(); DLManagedTensor* get_d_Ncells_images(); + DLManagedTensor* get_d2_Ncells_images(); + DLManagedTensor* get_d_fcell_images(); + DLManagedTensor* get_d2_fcell_images(); + DLManagedTensor* get_d_eta_images(); + DLManagedTensor* get_d2_eta_images(); + DLManagedTensor* get_d_lambda_images(); + DLManagedTensor* get_d2_lambda_images(); + DLManagedTensor* get_d_panel_rot_images(); + DLManagedTensor* get_d2_panel_rot_images(); + DLManagedTensor* get_d_panel_orig_images(); + DLManagedTensor* get_d2_panel_orig_images(); + DLManagedTensor* get_d_fp_fdp_images(); + DLManagedTensor* get_Fhkl_scale_deriv(); }; #endif diff --git a/simtbx/diffBragg/src/diffBragg_ext.cpp b/simtbx/diffBragg/src/diffBragg_ext.cpp index 1245730aa7..84aad9b7de 100644 --- a/simtbx/diffBragg/src/diffBragg_ext.cpp +++ b/simtbx/diffBragg/src/diffBragg_ext.cpp @@ -733,7 +733,6 @@ struct DLPackAPI { .def("set_ncells_values", &simtbx::nanoBragg::diffBragg::set_ncells_values, "set Ncells values as a 3-tuple (Na, Nb, Nc)") .def("get_ncells_values", &simtbx::nanoBragg::diffBragg::get_ncells_values, "get Ncells values as a 3-tuple (Na, Nb, Nc)") - .def("get_d_Ncells_images", &simtbx::nanoBragg::diffBragg::get_d_Ncells_images, "get DLPackTensor for d_Ncells_images; pot. on GPU") .def("add_diffBragg_spots_full", &simtbx::nanoBragg::diffBragg::add_diffBragg_spots_full, "forward model and gradients at every pixel") @@ -1093,6 +1092,52 @@ struct DLPackAPI { make_function(&set_beams,dcp()), "list of dxtbx::Beam objects corresponding to each zero-divergence and monochromatic x-ray point source in the numerical simulation ") +#ifdef DIFFBRAGG_HAVE_KOKKOS + .def("get_floatimage", &simtbx::nanoBragg::diffBragg::get_floatimage, "get DLPackTensor for floatimage; pot. on GPU") + + .def("get_wavelenimage", &simtbx::nanoBragg::diffBragg::get_wavelenimage, "get DLPackTensor for wavelenimage; pot. on GPU") + + .def("get_d_diffuse_gamma_images", &simtbx::nanoBragg::diffBragg::get_d_diffuse_gamma_images, "get DLPackTensor for d_diffuse_gamma_images; pot. on GPU") + + .def("get_d_diffuse_sigma_images", &simtbx::nanoBragg::diffBragg::get_d_diffuse_sigma_images, "get DLPackTensor for d_diffuse_sigma_images; pot. on GPU") + + .def("get_d_Umat_images", &simtbx::nanoBragg::diffBragg::get_d_Umat_images, "get DLPackTensor for d_Umat_images; pot. on GPU") + + .def("get_d2_Umat_images", &simtbx::nanoBragg::diffBragg::get_d2_Umat_images, "get DLPackTensor for d2_Umat_images; pot. on GPU") + + .def("get_d_Bmat_images", &simtbx::nanoBragg::diffBragg::get_d_Bmat_images, "get DLPackTensor for d_Bmat_images; pot. on GPU") + + .def("get_d2_Bmat_images", &simtbx::nanoBragg::diffBragg::get_d2_Bmat_images, "get DLPackTensor for d2_Bmat_images; pot. on GPU") + + .def("get_d_Ncells_images", &simtbx::nanoBragg::diffBragg::get_d_Ncells_images, "get DLPackTensor for d_Ncells_images; pot. on GPU") + + .def("get_d2_Ncells_images", &simtbx::nanoBragg::diffBragg::get_d2_Ncells_images, "get DLPackTensor for d2_Ncells_images; pot. on GPU") + + .def("get_d_fcell_images", &simtbx::nanoBragg::diffBragg::get_d_fcell_images, "get DLPackTensor for d_fcell_images; pot. on GPU") + + .def("get_d2_fcell_images", &simtbx::nanoBragg::diffBragg::get_d2_fcell_images, "get DLPackTensor for d2_fcell_images; pot. on GPU") + + .def("get_d_eta_images", &simtbx::nanoBragg::diffBragg::get_d_eta_images, "get DLPackTensor for d_eta_images; pot. on GPU") + + .def("get_d2_eta_images", &simtbx::nanoBragg::diffBragg::get_d2_eta_images, "get DLPackTensor for d2_eta_images; pot. on GPU") + + .def("get_d_lambda_images", &simtbx::nanoBragg::diffBragg::get_d_lambda_images, "get DLPackTensor for d_lambda_images; pot. on GPU") + + .def("get_d2_lambda_images", &simtbx::nanoBragg::diffBragg::get_d2_lambda_images, "get DLPackTensor for d2_lambda_images; pot. on GPU") + + .def("get_d_panel_rot_images", &simtbx::nanoBragg::diffBragg::get_d_panel_rot_images, "get DLPackTensor for d_panel_rot_images; pot. on GPU") + + .def("get_d2_panel_rot_images", &simtbx::nanoBragg::diffBragg::get_d2_panel_rot_images, "get DLPackTensor for d2_panel_rot_images; pot. on GPU") + + .def("get_d_panel_orig_images", &simtbx::nanoBragg::diffBragg::get_d_panel_orig_images, "get DLPackTensor for d_panel_orig_images; pot. on GPU") + + .def("get_d2_panel_orig_images", &simtbx::nanoBragg::diffBragg::get_d2_panel_orig_images, "get DLPackTensor for d2_panel_orig_images; pot. on GPU") + + .def("get_d_fp_fdp_images", &simtbx::nanoBragg::diffBragg::get_d_fp_fdp_images, "get DLPackTensor for d_fp_fdp_images; pot. on GPU") + + .def("get_Fhkl_scale_deriv", &simtbx::nanoBragg::diffBragg::get_Fhkl_scale_deriv, "get DLPackTensor for Fhkl_scale_deriv; pot. on GPU") +#endif + ; // end of diffBragg extention } // end of diffBragg_init_module From db99d1d10fcbe1b2381c23a853f581540276f674 Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Thu, 15 Feb 2024 16:39:59 -0800 Subject: [PATCH 06/17] clean clutter --- kokkostbx/kokkos_dlpack.h | 4 ++-- simtbx/diffBragg/src/diffBragg.cpp | 2 +- simtbx/diffBragg/src/diffBragg_ext.cpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kokkostbx/kokkos_dlpack.h b/kokkostbx/kokkos_dlpack.h index eb00e79fda..dedddf6264 100644 --- a/kokkostbx/kokkos_dlpack.h +++ b/kokkostbx/kokkos_dlpack.h @@ -54,7 +54,7 @@ DLDevice getDLPackDevice() { return {kDLROCM, device_id}; } else if (std::is_same::value) { return {kDLROCMHost, device_id}; - } + } #endif else { // Extend to other device types as needed @@ -75,7 +75,7 @@ DLManagedTensor* view_to_dlpack(Kokkos::View& view) { DLManagedTensor* dlpackTensor = new DLManagedTensor; dlpackTensor->dl_tensor.data = view.data(); dlpackTensor->dl_tensor.device = getDLPackDevice(); - dlpackTensor->dl_tensor.ndim = numDims; + dlpackTensor->dl_tensor.ndim = numDims; dlpackTensor->dl_tensor.dtype = getDLPackDataType(); dlpackTensor->dl_tensor.shape = shape; dlpackTensor->dl_tensor.strides = nullptr; diff --git a/simtbx/diffBragg/src/diffBragg.cpp b/simtbx/diffBragg/src/diffBragg.cpp index 4aa58e692f..4ede77192d 100644 --- a/simtbx/diffBragg/src/diffBragg.cpp +++ b/simtbx/diffBragg/src/diffBragg.cpp @@ -1526,7 +1526,7 @@ PyObject* diffBragg::PyCapsule_Wrapper( DLManagedTensor* (diffBraggKOKKOS::*func if (diffBragg_runner == nullptr) { return nullptr; } - return PyCapsule_New((*diffBragg_runner.*func)(), "dltensor", dlpack_destructor); + return PyCapsule_New((*diffBragg_runner.*func)(), "dltensor", dlpack_destructor); } PyObject* diffBragg::get_floatimage() { diff --git a/simtbx/diffBragg/src/diffBragg_ext.cpp b/simtbx/diffBragg/src/diffBragg_ext.cpp index 84aad9b7de..1cac964785 100644 --- a/simtbx/diffBragg/src/diffBragg_ext.cpp +++ b/simtbx/diffBragg/src/diffBragg_ext.cpp @@ -611,7 +611,7 @@ struct DLPackAPI { dlpackTensor->dl_tensor.data = static_cast(&container); dlpackTensor->dl_tensor.device.device_type = DLDeviceType::kDLCPU; dlpackTensor->dl_tensor.device.device_id = 0; - dlpackTensor->dl_tensor.ndim = numDims; + dlpackTensor->dl_tensor.ndim = numDims; dlpackTensor->dl_tensor.dtype = getDLPackDataType(); dlpackTensor->dl_tensor.shape = shape; dlpackTensor->dl_tensor.strides = nullptr; @@ -634,7 +634,7 @@ struct DLPackAPI { dtype.bits = sizeof(double) * 8; dtype.lanes = 1; return dtype; - } + } void print_hello() { std::cout << "Hello Python!" << std::endl; From 62c95849865768ca28b8516c2520505919d6443b Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Thu, 15 Feb 2024 17:45:32 -0800 Subject: [PATCH 07/17] clean clutter --- libtbx/citations.params | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libtbx/citations.params b/libtbx/citations.params index 3b10bf44aa..0a4a3b2d0a 100644 --- a/libtbx/citations.params +++ b/libtbx/citations.params @@ -484,12 +484,12 @@ citation { article_id = mmseqs2b authors = Mirdita M, Steinegger M, Söding J title = MMseqs2 desktop and local web server app for fast interactive sequence searches - journal = Bioinformatics + journal = Bioinformatics volume = 35 - pages = 2856-2858 + pages = 2856-2858 year = 2019 doi_id = 10.1093/bioinformatics/bty1057 - pmid = 30615063 + pmid = 30615063 } citation { article_id = mmseqs2 From c4c702023bd248ae5d868c81b9c22d97ea1dd52e Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Tue, 20 Feb 2024 11:16:20 -0800 Subject: [PATCH 08/17] add kokkos_device function --- simtbx/diffBragg/src/diffBragg_ext.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/simtbx/diffBragg/src/diffBragg_ext.cpp b/simtbx/diffBragg/src/diffBragg_ext.cpp index 1cac964785..dd1fe393b8 100644 --- a/simtbx/diffBragg/src/diffBragg_ext.cpp +++ b/simtbx/diffBragg/src/diffBragg_ext.cpp @@ -504,6 +504,22 @@ namespace boost_python { namespace { return boost::python::make_tuple(diffBragg.pythony_indices,diffBragg.pythony_amplitudes); } +std::string kokkos_device() { + std::string backend = "cpu:0"; +#ifdef DIFFBRAGG_HAVE_KOKKOS + if (Kokkos::is_finalized()) { + throw std::runtime_error("Error: Kokkos has been finalized.\n"); + } + if (!Kokkos::is_initialized()) { + throw std::runtime_error("Error: Kokkos not initialized.\n"); + } +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) + backend = "cuda:" + std::to_string( Kokkos::device_id() ); +#endif +#endif + return backend; +} + #ifdef DIFFBRAGG_HAVE_KOKKOS void finalize_kokkos(){ Kokkos::finalize(); @@ -672,6 +688,8 @@ struct DLPackAPI { def("initialize_kokkos", initialize_kokkos, "the sole argument `dev` (an int from 0 to Ngpu-1) is passed to Kokkos::initialize()"); + def("kokkos_device", kokkos_device, "returns kokkos device for use in PyTorch"); + def("print_dlpack",PrintDLTensorParameters,"Print information about a dlpack"); // def("get_d_Ncells_images", &get_dlpack, "Return DLPackTensor for d_Ncells_images; pot. on GPU") From b8de3fb3104a1246b6776e4a44cacec067f3e071 Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Tue, 5 Mar 2024 10:32:33 -0800 Subject: [PATCH 09/17] update diffBragg:model() to use pytorch --- simtbx/diffBragg/hopper_utils.py | 42 +++++++++++++++----------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/simtbx/diffBragg/hopper_utils.py b/simtbx/diffBragg/hopper_utils.py index 5acd3ced80..43f844e17b 100644 --- a/simtbx/diffBragg/hopper_utils.py +++ b/simtbx/diffBragg/hopper_utils.py @@ -2,6 +2,7 @@ import time import os import json +import torch from dials.algorithms.shoebox import MaskCode from copy import deepcopy from dials.model.data import Shoebox @@ -24,7 +25,7 @@ from simtbx.diffBragg import utils from simtbx.diffBragg.refiners.parameters import RangedParameter, Parameters, PositiveParameter from simtbx.diffBragg.attr_list import NB_BEAM_ATTRS, NB_CRYST_ATTRS, DIFFBRAGG_ATTRS -from simtbx.diffBragg import psf +from simtbx.diffBragg import psf, kokkos_device try: from line_profiler import LineProfiler @@ -1570,7 +1571,7 @@ def model(x, Mod, SIM, compute_grad=True, dont_rescale_gradient=False, update_s J = None if compute_grad: # This should be all params save the Fhkl params - J = np.zeros((nparam-SIM.Num_ASU*SIM.num_Fhkl_channels, npix)) # gradients + J = torch.zeros((nparam-SIM.Num_ASU*SIM.num_Fhkl_channels, npix), device=kokkos_device()) # gradients model_pix = None #TODO check roiScales mode and if its broken, git rid of it! @@ -1581,7 +1582,7 @@ def model(x, Mod, SIM, compute_grad=True, dont_rescale_gradient=False, update_s if not Mod.params.fix.perRoiScale: perRoiParams = [Mod.P["scale_roi%d" % roi_id] for roi_id in Mod.roi_id_unique] perRoiScaleFactors = [p.get_val(x[p.xpos]) for p in perRoiParams] - roiScalesPerPix = np.zeros(npix) + roiScalesPerPix = torch.zeros(npix, device=kokkos_device()) for i_roi, roi_id in enumerate(Mod.roi_id_unique): slc = Mod.roi_id_slices[roi_id][0] roiScalesPerPix[slc] = perRoiScaleFactors[i_roi] @@ -1615,8 +1616,7 @@ def model(x, Mod, SIM, compute_grad=True, dont_rescale_gradient=False, update_s SIM.D.add_diffBragg_spots(pfs) - pix_noRoiScale = SIM.D.raw_pixels_roi[:npix] - pix_noRoiScale = pix_noRoiScale.as_numpy_array() + pix_noRoiScale = torch.from_dlpack(SIM.D.get_floatimage())[:npix] pix = pix_noRoiScale * roiScalesPerPix @@ -1635,15 +1635,15 @@ def model(x, Mod, SIM, compute_grad=True, dont_rescale_gradient=False, update_s if RotXYZ_params[0].refine: for i_rot in range(3): - rot_grad = scale * SIM.D.get_derivative_pixels(ROTXYZ_IDS[i_rot]).as_numpy_array()[:npix] + rot_grad = scale * torch.from_dlpack(SIM.D.get_d_Umat_images())[:npix] rot_p = RotXYZ_params[i_rot] rot_grad = rot_p.get_deriv(x[rot_p.xpos], rot_grad) J[rot_p.xpos] += rot_grad if Nabc_params[0].refine: - Nabc_grads = SIM.D.get_ncells_derivative_pixels() + Nabc_grads = scale * torch.from_dlpack(SIM.D.get_d_Ncells_images())[:3*npix] for i_n in range(3): - N_grad = scale*(Nabc_grads[i_n][:npix].as_numpy_array()) + N_grad = Nabc_grads[i_n*npix:(i_n+1)*npix] p = Nabc_params[i_n] N_grad = p.get_deriv(x[p.xpos], N_grad) J[p.xpos] += N_grad @@ -1651,53 +1651,51 @@ def model(x, Mod, SIM, compute_grad=True, dont_rescale_gradient=False, update_s break if Ndef_params[0].refine: - Ndef_grads = SIM.D.get_ncells_def_derivative_pixels() + Ndef_grads = scale * torch.from_dlpack(SIM.D.get_d_Ncells_images())[3*npix:] for i_n in range(3): - N_grad = scale * (Ndef_grads[i_n][:npix].as_numpy_array()) + N_grad = Ndef_grads[i_n*npix:(i_n+1)*npix] p = Ndef_params[i_n] N_grad = p.get_deriv(x[p.xpos], N_grad) J[p.xpos] += N_grad if SIM.D.use_diffuse: for t in ['gamma','sigma']: - diffuse_grads = getattr(SIM.D, "get_diffuse_%s_derivative_pixels" % t)() + diffuse_grads = scale * torch.from_dlpack( getattr(SIM.D, "get_d_diffuse_%s_images" % t)() ) if diffuse_params_lookup[t][0].refine: for i_diff in range(3): - diff_grad = scale*(diffuse_grads[i_diff][:npix].as_numpy_array()) + diff_grad = diffuse_grads[i_diff*npix:(i_diff+1)*npix] p = diffuse_params_lookup[t][i_diff] diff_grad = p.get_deriv(x[p.xpos], diff_grad) J[p.xpos] += diff_grad if eta_params[0].refine: - if SIM.D.has_anisotropic_mosaic_spread: - eta_derivs = SIM.D.get_aniso_eta_deriv_pixels() - else: - eta_derivs = [SIM.D.get_derivative_pixels(ETA_ID)] + eta_derivs = scale * torch.from_dlpack(SIM.D.get_d_eta_images()) num_eta = 3 if SIM.D.has_anisotropic_mosaic_spread else 1 for i_eta in range(num_eta): p = eta_params[i_eta] - eta_grad = scale * (eta_derivs[i_eta][:npix].as_numpy_array()) + eta_grad = eta_derivs[i_eta*npix:(i_eta+1)*npix] eta_grad = p.get_deriv(x[p.xpos], eta_grad) J[p.xpos] += eta_grad if ucell_params[0].refine: + ucell_grads = scale * torch.from_dlpack(SIM.D.get_d_Umat_images()) for i_ucell in range(nucell): p = ucell_params[i_ucell] - deriv = scale*SIM.D.get_derivative_pixels(UCELL_ID_OFFSET+i_ucell).as_numpy_array()[:npix] + deriv = ucell_grads[i_ucell*npix: (i_ucell+1)*npix] deriv = p.get_deriv(x[p.xpos], deriv) J[p.xpos] += deriv if DetZ.refine: - d = SIM.D.get_derivative_pixels(DETZ_ID).as_numpy_array()[:npix] + d = torch.from_dlpack(SIM.D.get_d_panel_orig_images())[npix:2*npix] d = DetZ.get_deriv(x[DetZ.xpos], d) J[DetZ.xpos] += d if Mod.P["lambda_offset"].refine: - lambda_derivs = SIM.D.get_lambda_derivative_pixels() + lambda_derivs = torch.from_dlpack(SIM.D.get_d_lambda_images()) lambda_param_names = "lambda_offset", "lambda_scale" - for d,name in zip(lambda_derivs, lambda_param_names): + for i_lmbd,name in enumerate(lambda_param_names): p = Mod.P[name] - d = d.as_numpy_array()[:npix] + d = lambda_derivs[i_lmbd*npix:(i_lmbd+1)*npix] d = p.get_deriv(x[p.xpos], d) J[p.xpos] += d From 9187961ff5820bd93f132a5d89ffdf4c5a8a88a2 Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Tue, 5 Mar 2024 14:46:10 -0800 Subject: [PATCH 10/17] Update diffBragg:DataModeler to use pytorch --- simtbx/diffBragg/hopper_utils.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/simtbx/diffBragg/hopper_utils.py b/simtbx/diffBragg/hopper_utils.py index 43f844e17b..a3440ec0d8 100644 --- a/simtbx/diffBragg/hopper_utils.py +++ b/simtbx/diffBragg/hopper_utils.py @@ -693,16 +693,17 @@ def data_to_one_dim(self, img_data, is_trusted, background): self.all_q_perpix = np.array(all_q_perpix) pan_fast_slow = np.ascontiguousarray((np.vstack([all_pid, all_fast, all_slow]).T).ravel()) self.pan_fast_slow = flex.size_t(pan_fast_slow) - self.all_background = np.array(all_background) + self.all_background = torch.tensor(all_background, device=kokkos_device()) self.roi_id = np.array(roi_id) - self.all_data = np.array(all_data) + self.all_data = torch.tensor(all_data, device=kokkos_device()) if np.allclose(all_sigma_rdout, self.nominal_sigma_rdout): self.all_sigma_rdout = self.nominal_sigma_rdout else: self.all_sigma_rdout = np.array(all_sigma_rdout) - self.all_sigmas = np.array(all_sigmas) + self.all_sigmas = torch.tensor(all_sigmas, device=kokkos_device()) # note rare chance for sigmas to be nan if the args of sqrt is below 0 - self.all_trusted = np.logical_and(np.array(all_trusted), ~np.isnan(all_sigmas)) + all_trusted = torch.tensor(all_trusted, device=kokkos_device()) + self.all_trusted = torch.logical_and(all_trusted, ~torch.isnan(self.all_sigmas)) if self.params.roi.skip_roi_with_negative_bg: # Dont include pixels whose background model is below 0 @@ -1834,7 +1835,7 @@ def __call__(self, x, *args, **kwargs): self.old_J = J self.iteration += 1 self.g = g - return f + return f.cpu() def target_func(x, udpate_terms, mod, SIM, compute_grad=True, return_all_zscores=False): @@ -1905,14 +1906,14 @@ def target_func(x, udpate_terms, mod, SIM, compute_grad=True, return_all_zscores V = model_pix + sigma_rdout**2 # TODO:what if V is allowed to be negative? The logarithm/sqrt will explore below resid_square = resid**2 - fLogLike = (.5*(np.log(2*np.pi*V) + resid_square / V)) + fLogLike = (.5*(torch.log(2*torch.pi*V) + resid_square / V)) if params.roi.allow_overlapping_spots: fLogLike /= mod.all_freq fLogLike = fLogLike[trusted].sum() # negative log Likelihood target # width of z-score should decrease as refinement proceeds - zscore_per = resid/np.sqrt(V) - zscore_sigma = np.std(zscore_per[trusted]) + zscore_per = resid/torch.sqrt(V) + zscore_sigma = torch.std(zscore_per[trusted]) restraint_terms = {} if params.use_restraints: From 1ccab2873ceadf4d13fba2a53ca8f69299e77f60 Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Wed, 6 Mar 2024 14:14:04 -0800 Subject: [PATCH 11/17] more update hopper_utils to use PyTorch --- simtbx/diffBragg/hopper_utils.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/simtbx/diffBragg/hopper_utils.py b/simtbx/diffBragg/hopper_utils.py index a3440ec0d8..1c885d6102 100644 --- a/simtbx/diffBragg/hopper_utils.py +++ b/simtbx/diffBragg/hopper_utils.py @@ -1679,7 +1679,7 @@ def model(x, Mod, SIM, compute_grad=True, dont_rescale_gradient=False, update_s J[p.xpos] += eta_grad if ucell_params[0].refine: - ucell_grads = scale * torch.from_dlpack(SIM.D.get_d_Umat_images()) + ucell_grads = scale * torch.from_dlpack(SIM.D.get_d_Bmat_images()) for i_ucell in range(nucell): p = ucell_params[i_ucell] deriv = ucell_grads[i_ucell*npix: (i_ucell+1)*npix] @@ -1835,7 +1835,7 @@ def __call__(self, x, *args, **kwargs): self.old_J = J self.iteration += 1 self.g = g - return f.cpu() + return f def target_func(x, udpate_terms, mod, SIM, compute_grad=True, return_all_zscores=False): @@ -1909,11 +1909,11 @@ def target_func(x, udpate_terms, mod, SIM, compute_grad=True, return_all_zscores fLogLike = (.5*(torch.log(2*torch.pi*V) + resid_square / V)) if params.roi.allow_overlapping_spots: fLogLike /= mod.all_freq - fLogLike = fLogLike[trusted].sum() # negative log Likelihood target + fLogLike = fLogLike[trusted].sum().item() # negative log Likelihood target # width of z-score should decrease as refinement proceeds zscore_per = resid/torch.sqrt(V) - zscore_sigma = torch.std(zscore_per[trusted]) + zscore_sigma = torch.std(zscore_per[trusted]).item() restraint_terms = {} if params.use_restraints: @@ -2188,12 +2188,12 @@ def get_new_xycalcs(Modeler, new_exp, old_refl_tag="dials"): for i_roi in range(len(bragg_subimg)): ref_idx = Modeler.refls_idx[i_roi] - #assert ref_idx==i_roi - if np.any(bragg_subimg[i_roi] > 0): + if torch.any(bragg_subimg[i_roi] > 0): I = bragg_subimg[i_roi] - assert np.all(I>=0) - Y, X = np.indices(bragg_subimg[i_roi].shape) + assert torch.all(I>=0) + ny, nx = bragg_subimg[i_roi].shape + X, Y = torch.meshgrid(torch.arange(nx, device=kokkos_device()), torch.arange(ny, device=kokkos_device()), indexing='xy') x1, _, y1, _ = Modeler.rois[i_roi] com_x, com_y, _ = new_refls[ref_idx]["xyzobs.px.value"] @@ -2206,11 +2206,13 @@ def get_new_xycalcs(Modeler, new_exp, old_refl_tag="dials"): except IndexError: continue - X += x1 - Y += y1 + X = X + x1 + Y = Y + y1 Isum = I.sum() xcom = (X * I).sum() / Isum + .5 + xcom = xcom.item() ycom = (Y * I).sum() / Isum + .5 + ycom = ycom.item() com = xcom, ycom, 0 pid = Modeler.pids[i_roi] From d3907bd913e524b3905efaccbb11b31a3ccf4f2f Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Wed, 6 Mar 2024 17:29:08 -0800 Subject: [PATCH 12/17] Fix for pytorch in hopper --- simtbx/diffBragg/hopper_utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/simtbx/diffBragg/hopper_utils.py b/simtbx/diffBragg/hopper_utils.py index 1c885d6102..50e0acaadb 100644 --- a/simtbx/diffBragg/hopper_utils.py +++ b/simtbx/diffBragg/hopper_utils.py @@ -684,10 +684,10 @@ def data_to_one_dim(self, img_data, is_trusted, background): x1, x2, y1, y2 = self.rois[i_roi] freq = pixel_counter[pid, y1:y2, x1:x2].ravel() all_freq += list(freq) - self.all_freq = np.array(all_freq, np.int32) # if no overlapping pixels, this should be an array of 1's + self.all_freq = torch.tensor(all_freq, dtype=torch.int32, device=kokkos_device()) # if no overlapping pixels, this should be an array of 1's if not self.params.roi.allow_overlapping_spots: - if not np.all(self.all_freq==1): - print(set(self.all_freq)) + if not torch.all(self.all_freq==1): + print(set(self.all_freq.cpu().numpy())) raise ValueError("There are overlapping regions of interest, despite the command to not allow overlaps") self.all_q_perpix = np.array(all_q_perpix) @@ -1635,8 +1635,9 @@ def model(x, Mod, SIM, compute_grad=True, dont_rescale_gradient=False, update_s J[G.xpos] += scale_grad if RotXYZ_params[0].refine: + rot_grads = scale * torch.from_dlpack(SIM.D.get_d_Umat_images()) for i_rot in range(3): - rot_grad = scale * torch.from_dlpack(SIM.D.get_d_Umat_images())[:npix] + rot_grad = rot_grads[i_rot*npix:(i_rot+1)*npix] rot_p = RotXYZ_params[i_rot] rot_grad = rot_p.get_deriv(x[rot_p.xpos], rot_grad) J[rot_p.xpos] += rot_grad From 5a2893517295240cb2af2f5c46e191f1c5aa1ff6 Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Mon, 11 Mar 2024 08:48:11 -0700 Subject: [PATCH 13/17] updates to hopper_utils for pytorch --- simtbx/diffBragg/hopper_ensemble_utils.py | 9 +++-- simtbx/diffBragg/hopper_utils.py | 49 +++++++++++------------ 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/simtbx/diffBragg/hopper_ensemble_utils.py b/simtbx/diffBragg/hopper_ensemble_utils.py index f15f608d10..16976e754a 100644 --- a/simtbx/diffBragg/hopper_ensemble_utils.py +++ b/simtbx/diffBragg/hopper_ensemble_utils.py @@ -5,6 +5,7 @@ import socket import logging import os +import torch import numpy as np from scipy.optimize import basinhopping @@ -105,7 +106,7 @@ def target_func(x, modelers): f = 0 # target functional g = np.zeros(modelers.num_total_modelers * num_shot_params) - g_fhkl = np.zeros(num_fhkl_params) + g_fhkl = torch.zeros(num_fhkl_params) zscore_sigs = [] fcell_params = x[-num_fhkl_params:] for ii, i_shot in enumerate(modelers): @@ -126,13 +127,13 @@ def target_func(x, modelers): # data contributions to target function V = model_pix + shot_modeler.all_sigma_rdout**2 resid_square = resid**2 - shot_fLogLike = (.5*(np.log(2*np.pi*V) + resid_square / V)) + shot_fLogLike = (.5*(torch.log(2*np.pi*V) + resid_square / V)) if shot_modeler.params.roi.allow_overlapping_spots: shot_fLogLike /= shot_modeler.all_freq shot_fLogLike = shot_fLogLike[shot_modeler.all_trusted].sum() # negative log Likelihood target f += shot_fLogLike - zscore_sig = np.std((resid / np.sqrt(V))[shot_modeler.all_trusted]) + zscore_sig = torch.std((resid / torch.sqrt(V))[shot_modeler.all_trusted]).item() zscore_sigs.append(zscore_sig) # get this shots contribution to the gradient @@ -145,7 +146,7 @@ def target_func(x, modelers): for name in shot_modeler.non_fhkl_params: p = shot_modeler.P[name] Jac_p = Jac[p.xpos] - shot_g[p.xpos] += (Jac_p[shot_modeler.all_trusted] * common_grad_term).sum() + shot_g[p.xpos] += (Jac_p[shot_modeler.all_trusted] * common_grad_term).sum().item() np.add.at(g, shot_x_slice, shot_g) spot_scale_p = shot_modeler.P["G_xtal0"] diff --git a/simtbx/diffBragg/hopper_utils.py b/simtbx/diffBragg/hopper_utils.py index 50e0acaadb..e3138da67c 100644 --- a/simtbx/diffBragg/hopper_utils.py +++ b/simtbx/diffBragg/hopper_utils.py @@ -730,18 +730,18 @@ def dump_gathered_to_refl(self, output_name, do_xyobs_sanity_check=False): roi_sel = self.roi_id==i_roi x1, x2, y1, y2 = self.rois[i_roi] roi_shape = y2-y1, x2-x1 - roi_img = self.all_data[roi_sel].reshape(roi_shape).astype(np.float32) #NOTE this has already been converted to photon units - roi_bg = self.all_background[roi_sel].reshape(roi_shape).astype(np.float32) + roi_img = self.all_data[roi_sel].reshape(roi_shape).float() #NOTE this has already been converted to photon units + roi_bg = self.all_background[roi_sel].reshape(roi_shape).float() sb = Shoebox((x1, x2, y1, y2, 0, 1)) sb.allocate() - sb.data = flex.float(np.ascontiguousarray(roi_img[None])) - sb.background = flex.float(np.ascontiguousarray(roi_bg[None])) + sb.data = flex.float(roi_img[None].cpu().contiguous().numpy()) + sb.background = flex.float(roi_bg[None].cpu().contiguous().numpy()) - dials_mask = np.zeros(roi_img.shape).astype(np.int32) + dials_mask = torch.zeros(roi_img.shape, device=kokkos_device()).int() mask = self.all_trusted[roi_sel].reshape(roi_shape) dials_mask[mask] = dials_mask[mask] + MaskCode.Valid - sb.mask = flex.int(np.ascontiguousarray(dials_mask[None])) + sb.mask = flex.int(dials_mask[None].cpu().contiguous().numpy()) # quick sanity test if do_xyobs_sanity_check: @@ -1250,7 +1250,7 @@ def save_up(self, x, SIM, rank=0, i_shot=0, SIM.D.force_cpu = True MAIN_LOGGER.info("Getting Fhkl errors (forcing CPUkernel usage)... might take some time") Fhkl_scale_errors = SIM.D.add_Fhkl_gradients( - self.pan_fast_slow, resid, V, self.all_trusted, self.all_freq, + self.pan_fast_slow, resid.cpu().numpy(), V.cpu().numpy(), self.all_trusted.cpu().numpy(), self.all_freq.cpu().numpy(), SIM.num_Fhkl_channels, G, track=True, errors=True) SIM.D.force_gpu = force_cpu # ------------ @@ -1338,21 +1338,22 @@ def save_up(self, x, SIM, rank=0, i_shot=0, fit = model_subimg[i_roi] trust = trusted_subimg[i_roi] if sigma_rdout_subimg is not None: - sig = np.sqrt(fit + sigma_rdout_subimg[i_roi] ** 2) + sig = torch.sqrt(fit + sigma_rdout_subimg[i_roi] ** 2) else: - sig = np.sqrt(fit + Modeler.nominal_sigma_rdout ** 2) + sig = torch.sqrt(fit + Modeler.nominal_sigma_rdout ** 2) Z = (dat - fit) / sig sigmaZ = np.nan - if np.any(trust): - sigmaZ = Z[trust].std() + if torch.any(trust): + sigmaZ = Z[trust].std().item() sigmaZs.append(sigmaZ) if bragg_subimg[0] is not None: - if np.any(bragg_subimg[i_roi] > 0): + if torch.any(bragg_subimg[i_roi] > 0): ref_idx = Modeler.refls_idx[i_roi] ref = Modeler.refls[ref_idx] I = bragg_subimg[i_roi] - Y, X = np.indices(bragg_subimg[i_roi].shape) + ny, nx = bragg_subimg[i_roi].shape + Y, X = torch.meshgrid(torch.arange(ny, device=kokkos_device()), torch.arange(nx, device=kokkos_device()), indexing='ij') x1, x2, y1, y2 = Modeler.rois[i_roi] com_x, com_y, _ = ref["xyzobs.px.value"] com_x = int(com_x - x1 - 0.5) @@ -1363,11 +1364,11 @@ def save_up(self, x, SIM, rank=0, i_shot=0, continue except IndexError: continue - X += x1 - Y += y1 - Isum = I.sum() - xcom = (X * I).sum() / Isum - ycom = (Y * I).sum() / Isum + X = X + x1 + Y = Y + y1 + Isum = I.sum().item() + xcom = (X * I).sum().item() / Isum + ycom = (Y * I).sum().item() / Isum com = xcom + .5, ycom + .5, 0 new_xycalcs[ref_idx] = com if not Modeler.params.fix.perRoiScale: @@ -2041,8 +2042,8 @@ def target_func(x, udpate_terms, mod, SIM, compute_grad=True, return_all_zscores if SIM.refining_Fhkl: spot_scale_p = mod.P["G_xtal0"] G = spot_scale_p.get_val(x[spot_scale_p.xpos]) - fhkl_grad = SIM.D.add_Fhkl_gradients(pfs, resid, V, trusted, - mod.all_freq, SIM.num_Fhkl_channels, G) + fhkl_grad = SIM.D.add_Fhkl_gradients(pfs, resid.cpu().numpy(), V.cpu().numpy(), trusted.cpu().numpy(), + mod.all_freq.cpu().numpy(), SIM.num_Fhkl_channels, G) if params.betas.Fhkl is not None: for i_chan in range(SIM.num_Fhkl_channels): @@ -2209,11 +2210,9 @@ def get_new_xycalcs(Modeler, new_exp, old_refl_tag="dials"): X = X + x1 Y = Y + y1 - Isum = I.sum() - xcom = (X * I).sum() / Isum + .5 - xcom = xcom.item() - ycom = (Y * I).sum() / Isum + .5 - ycom = ycom.item() + Isum = I.sum().item() + xcom = (X * I).sum().item() / Isum + .5 + ycom = (Y * I).sum().item() / Isum + .5 com = xcom, ycom, 0 pid = Modeler.pids[i_roi] From ae85f39b04fc9da70e404a1bdbcd48dcf91bef32 Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Thu, 14 Mar 2024 14:04:44 -0700 Subject: [PATCH 14/17] Add host_transfer flag to toggle D2H copies --- simtbx/diffBragg/attr_list.py | 1 + simtbx/diffBragg/hopper_utils.py | 8 + simtbx/diffBragg/src/diffBragg.cpp | 205 +++++++++++++++-------- simtbx/diffBragg/src/diffBragg.h | 5 +- simtbx/diffBragg/src/diffBraggKOKKOS.cpp | 96 +++++------ simtbx/diffBragg/src/diffBragg_ext.cpp | 12 +- simtbx/diffBragg/src/util.h | 45 +++++ 7 files changed, 244 insertions(+), 128 deletions(-) diff --git a/simtbx/diffBragg/attr_list.py b/simtbx/diffBragg/attr_list.py index f961528cd0..fdf7d65459 100644 --- a/simtbx/diffBragg/attr_list.py +++ b/simtbx/diffBragg/attr_list.py @@ -29,6 +29,7 @@ 'fluence', 'flux', 'has_anisotropic_mosaic_spread', + 'host_transfer', 'interpolate', 'isotropic_ncells', 'lambda_coefficients', diff --git a/simtbx/diffBragg/hopper_utils.py b/simtbx/diffBragg/hopper_utils.py index e3138da67c..5c647f6729 100644 --- a/simtbx/diffBragg/hopper_utils.py +++ b/simtbx/diffBragg/hopper_utils.py @@ -2081,6 +2081,11 @@ def refine(exp, ref, params, spec=None, gpu_device=None, return_modeler=False, b SIM = get_simulator_for_data_modelers(Modeler) Modeler.set_parameters_for_experiment(best=best) SIM.D.device_Id = gpu_device + old_transfer = None + if os.environ.get("DIFFBRAGG_USE_KOKKOS") is not None: + if SIM.D.host_transfer == True: + old_transfer = True + SIM.D.host_transfer = False nparam = len(Modeler.P) if SIM.refining_Fhkl: @@ -2108,6 +2113,9 @@ def refine(exp, ref, params, spec=None, gpu_device=None, return_modeler=False, b if free_mem: Modeler.clean_up(SIM) + if old_transfer is not None: + SIM.D.host_transfer = old_transfer + if return_modeler: return new_exp, new_refl, Modeler, SIM, x diff --git a/simtbx/diffBragg/src/diffBragg.cpp b/simtbx/diffBragg/src/diffBragg.cpp index 4ede77192d..3bd9821444 100644 --- a/simtbx/diffBragg/src/diffBragg.cpp +++ b/simtbx/diffBragg/src/diffBragg.cpp @@ -364,6 +364,7 @@ diffBragg::diffBragg(const dxtbx::model::Detector& detector, const dxtbx::model: O_reference <<0,0,0; + host_transfer = true; update_oversample_during_refinement = true; oversample_omega = true; only_save_omega_kahn = false; @@ -1508,7 +1509,6 @@ boost::python::tuple diffBragg::get_ncells_derivative_pixels(){ return derivative_pixels; } -#ifdef DIFFBRAGG_HAVE_KOKKOS void dlpack_destructor(PyObject* capsule) { if (!PyCapsule_IsValid(capsule, "dltensor")) { return; @@ -1530,7 +1530,12 @@ PyObject* diffBragg::PyCapsule_Wrapper( DLManagedTensor* (diffBraggKOKKOS::*func } PyObject* diffBragg::get_floatimage() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_floatimage); +#ifdef DIFFBRAGG_HAVE_KOKKOS + if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_floatimage); + } +#endif + return PyCapsule_New(array_to_dlpack(raw_pixels_roi.begin(), Npix_to_model), "dltensor", dlpack_destructor); } PyObject* diffBragg::get_wavelenimage() { @@ -1546,7 +1551,12 @@ PyObject* diffBragg::get_d_diffuse_sigma_images() { } PyObject* diffBragg::get_d_Umat_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_Umat_images); +#ifdef DIFFBRAGG_HAVE_KOKKOS + if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_Umat_images); + } +#endif + return PyCapsule_New(array_to_dlpack(first_deriv_imgs.Umat.data(), 3*Npix_to_model), "dltensor", dlpack_destructor); } PyObject* diffBragg::get_d2_Umat_images() { @@ -1554,15 +1564,25 @@ PyObject* diffBragg::get_d2_Umat_images() { } PyObject* diffBragg::get_d_Bmat_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_Bmat_images); -} +#ifdef DIFFBRAGG_HAVE_KOKKOS + if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_Bmat_images); + } +#endif + return PyCapsule_New(array_to_dlpack(first_deriv_imgs.Bmat.data(), 6*Npix_to_model), "dltensor", dlpack_destructor); +} PyObject* diffBragg::get_d2_Bmat_images() { return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_Bmat_images); } PyObject* diffBragg::get_d_Ncells_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_Ncells_images); +#ifdef DIFFBRAGG_HAVE_KOKKOS + if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_Ncells_images); + } +#endif + return PyCapsule_New(array_to_dlpack(first_deriv_imgs.Ncells.data(), 6*Npix_to_model), "dltensor", dlpack_destructor); } PyObject* diffBragg::get_d2_Ncells_images() { @@ -1570,7 +1590,12 @@ PyObject* diffBragg::get_d2_Ncells_images() { } PyObject* diffBragg::get_d_fcell_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_fcell_images); +#ifdef DIFFBRAGG_HAVE_KOKKOS + if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_fcell_images); + } +#endif + return PyCapsule_New(array_to_dlpack(first_deriv_imgs.fcell.data(), Npix_to_model), "dltensor", dlpack_destructor); } PyObject* diffBragg::get_d2_fcell_images() { @@ -1578,7 +1603,12 @@ PyObject* diffBragg::get_d2_fcell_images() { } PyObject* diffBragg::get_d_eta_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_eta_images); +#ifdef DIFFBRAGG_HAVE_KOKKOS + if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_eta_images); + } +#endif + return PyCapsule_New(array_to_dlpack(first_deriv_imgs.eta.data(), first_deriv_imgs.eta.size()), "dltensor", dlpack_destructor); } PyObject* diffBragg::get_d2_eta_images() { @@ -1586,7 +1616,12 @@ PyObject* diffBragg::get_d2_eta_images() { } PyObject* diffBragg::get_d_lambda_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_lambda_images); +#ifdef DIFFBRAGG_HAVE_KOKKOS + if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_lambda_images); + } +#endif + return PyCapsule_New(array_to_dlpack(first_deriv_imgs.lambda.data(), 2*Npix_to_model), "dltensor", dlpack_destructor); } PyObject* diffBragg::get_d2_lambda_images() { @@ -1594,7 +1629,12 @@ PyObject* diffBragg::get_d2_lambda_images() { } PyObject* diffBragg::get_d_panel_rot_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_panel_rot_images); +#ifdef DIFFBRAGG_HAVE_KOKKOS + if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_panel_rot_images); + } +#endif + return PyCapsule_New(array_to_dlpack(first_deriv_imgs.panel_rot.data(), 3*Npix_to_model), "dltensor", dlpack_destructor); } PyObject* diffBragg::get_d2_panel_rot_images() { @@ -1602,7 +1642,12 @@ PyObject* diffBragg::get_d2_panel_rot_images() { } PyObject* diffBragg::get_d_panel_orig_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_panel_orig_images); +#ifdef DIFFBRAGG_HAVE_KOKKOS + if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_panel_orig_images); + } +#endif + return PyCapsule_New(array_to_dlpack(first_deriv_imgs.panel_orig.data(), 3*Npix_to_model), "dltensor", dlpack_destructor); } PyObject* diffBragg::get_d2_panel_orig_images() { @@ -1610,13 +1655,23 @@ PyObject* diffBragg::get_d2_panel_orig_images() { } PyObject* diffBragg::get_d_fp_fdp_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_fp_fdp_images); +#ifdef DIFFBRAGG_HAVE_KOKKOS + if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_fp_fdp_images); + } +#endif + return PyCapsule_New(array_to_dlpack(first_deriv_imgs.fp_fdp.data(), 2*Npix_to_model), "dltensor", dlpack_destructor); } PyObject* diffBragg::get_Fhkl_scale_deriv() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_Fhkl_scale_deriv); -} +#ifdef DIFFBRAGG_HAVE_KOKKOS + if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_fp_fdp_images); + } #endif + return PyCapsule_New(array_to_dlpack(first_deriv_imgs.fp_fdp.data(), first_deriv_imgs.fp_fdp.size()), "dltensor", dlpack_destructor); +} + boost::python::tuple diffBragg::get_diffuse_gamma_derivative_pixels(){ SCITBX_ASSERT(db_flags.refine_diffuse); @@ -1958,7 +2013,6 @@ void diffBragg::add_diffBragg_spots(const af::shared& panels_fasts_slows Npix_to_model = panels_fasts_slows.size()/3; SCITBX_ASSERT(Npix_to_model <= Npix_total); - double * floatimage_roi = raw_pixels_roi.begin(); diffBragg_rot_mats(); /* make sure we are normalizing with the right number of sub-steps */ @@ -2050,6 +2104,7 @@ void diffBragg::add_diffBragg_spots(const af::shared& panels_fasts_slows db_flags.refine_fp_fdp = fp_fdp_managers[0]->refine_me; db_flags.use_lambda_coefficients = use_lambda_coefficients; db_flags.oversample_omega = oversample_omega; + db_flags.host_transfer = host_transfer; db_flags.printout_fpixel = printout_fpixel; db_flags.printout_spixel = printout_spixel; db_flags.verbose = verbose; @@ -2278,81 +2333,84 @@ void diffBragg::add_diffBragg_spots(const af::shared& panels_fasts_slows gettimeofday(&t1,0 ); - for (int i_pix=0; i_pix< Npix_to_model; i_pix++){ - floatimage_roi[i_pix] = image[i_pix]; + if (db_flags.host_transfer) { + double * floatimage_roi = raw_pixels_roi.begin(); + for (int i_pix=0; i_pix< Npix_to_model; i_pix++){ + floatimage_roi[i_pix] = image[i_pix]; - for (int i_rot=0; i_rot<3; i_rot++){ - if (rot_managers[i_rot]->refine_me){ - int idx = i_rot*Npix_to_model + i_pix; - rot_managers[i_rot]->increment_image(i_pix, first_deriv_imgs.Umat[idx], second_deriv_imgs.Umat[idx], compute_curvatures); + for (int i_rot=0; i_rot<3; i_rot++){ + if (rot_managers[i_rot]->refine_me){ + int idx = i_rot*Npix_to_model + i_pix; + rot_managers[i_rot]->increment_image(i_pix, first_deriv_imgs.Umat[idx], second_deriv_imgs.Umat[idx], compute_curvatures); + } } - } - for (int i_uc=0; i_uc<6; i_uc++){ - if (ucell_managers[i_uc]->refine_me){ - int idx = i_uc*Npix_to_model + i_pix; - ucell_managers[i_uc]->increment_image(i_pix, first_deriv_imgs.Bmat[idx], second_deriv_imgs.Bmat[idx], compute_curvatures); + for (int i_uc=0; i_uc<6; i_uc++){ + if (ucell_managers[i_uc]->refine_me){ + int idx = i_uc*Npix_to_model + i_pix; + ucell_managers[i_uc]->increment_image(i_pix, first_deriv_imgs.Bmat[idx], second_deriv_imgs.Bmat[idx], compute_curvatures); + } } - } - if (Ncells_managers[0]->refine_me){ - Ncells_managers[0]->increment_image(i_pix, first_deriv_imgs.Ncells[i_pix], second_deriv_imgs.Ncells[i_pix], compute_curvatures); - if (! isotropic_ncells){ - int idx= Npix_to_model+i_pix; - Ncells_managers[1]->increment_image(i_pix, first_deriv_imgs.Ncells[idx], second_deriv_imgs.Ncells[idx], compute_curvatures); - idx = 2*Npix_to_model + i_pix; - Ncells_managers[2]->increment_image(i_pix, first_deriv_imgs.Ncells[idx], second_deriv_imgs.Ncells[idx], compute_curvatures); + if (Ncells_managers[0]->refine_me){ + Ncells_managers[0]->increment_image(i_pix, first_deriv_imgs.Ncells[i_pix], second_deriv_imgs.Ncells[i_pix], compute_curvatures); + if (! isotropic_ncells){ + int idx= Npix_to_model+i_pix; + Ncells_managers[1]->increment_image(i_pix, first_deriv_imgs.Ncells[idx], second_deriv_imgs.Ncells[idx], compute_curvatures); + idx = 2*Npix_to_model + i_pix; + Ncells_managers[2]->increment_image(i_pix, first_deriv_imgs.Ncells[idx], second_deriv_imgs.Ncells[idx], compute_curvatures); + } } - } - if (refine_Ncells_def){ - for (int i_nc =3; i_nc < 6; i_nc++){ - int idx= i_nc*Npix_to_model+i_pix; - Ncells_managers[i_nc]->increment_image(i_pix, first_deriv_imgs.Ncells[idx], second_deriv_imgs.Ncells[idx], compute_curvatures); + if (refine_Ncells_def){ + for (int i_nc =3; i_nc < 6; i_nc++){ + int idx= i_nc*Npix_to_model+i_pix; + Ncells_managers[i_nc]->increment_image(i_pix, first_deriv_imgs.Ncells[idx], second_deriv_imgs.Ncells[idx], compute_curvatures); + } } - } - if (fcell_managers[0]->refine_me){ - int idx= i_pix; - fcell_managers[0]->increment_image(i_pix, first_deriv_imgs.fcell[idx], second_deriv_imgs.fcell[idx], compute_curvatures); - } + if (fcell_managers[0]->refine_me){ + int idx= i_pix; + fcell_managers[0]->increment_image(i_pix, first_deriv_imgs.fcell[idx], second_deriv_imgs.fcell[idx], compute_curvatures); + } - if (eta_managers[0]->refine_me){ - eta_managers[0]->increment_image(i_pix, first_deriv_imgs.eta[i_pix], second_deriv_imgs.eta[i_pix], compute_curvatures); - if (modeling_anisotropic_mosaic_spread){ - if (verbose && i_pix==0)printf("copying aniso eta derivatives\n"); - for(int i_eta=1; i_eta < 3; i_eta++){ - int idx = i_eta*Npix_to_model+i_pix; - eta_managers[i_eta]->increment_image(i_pix, first_deriv_imgs.eta[idx], second_deriv_imgs.eta[idx], compute_curvatures); + if (eta_managers[0]->refine_me){ + eta_managers[0]->increment_image(i_pix, first_deriv_imgs.eta[i_pix], second_deriv_imgs.eta[i_pix], compute_curvatures); + if (modeling_anisotropic_mosaic_spread){ + if (verbose && i_pix==0)printf("copying aniso eta derivatives\n"); + for(int i_eta=1; i_eta < 3; i_eta++){ + int idx = i_eta*Npix_to_model+i_pix; + eta_managers[i_eta]->increment_image(i_pix, first_deriv_imgs.eta[idx], second_deriv_imgs.eta[idx], compute_curvatures); + } } } - } - for(int i_lam=0; i_lam < 2; i_lam++){ - if (lambda_managers[i_lam]->refine_me){ - int idx= Npix_to_model*i_lam + i_pix; - lambda_managers[i_lam]->increment_image(i_pix, first_deriv_imgs.lambda[idx], second_deriv_imgs.lambda[idx], compute_curvatures); + for(int i_lam=0; i_lam < 2; i_lam++){ + if (lambda_managers[i_lam]->refine_me){ + int idx= Npix_to_model*i_lam + i_pix; + lambda_managers[i_lam]->increment_image(i_pix, first_deriv_imgs.lambda[idx], second_deriv_imgs.lambda[idx], compute_curvatures); + } } - } - for(int i_pan=0; i_pan <3; i_pan++){ - int i_rot = pan_rot_ids[i_pan]; - if (panels[i_rot]->refine_me){ - int idx = Npix_to_model*i_pan + i_pix; - panels[i_rot]->increment_image(i_pix, first_deriv_imgs.panel_rot[idx], second_deriv_imgs.panel_rot[idx], compute_curvatures); - } + for(int i_pan=0; i_pan <3; i_pan++){ + int i_rot = pan_rot_ids[i_pan]; + if (panels[i_rot]->refine_me){ + int idx = Npix_to_model*i_pan + i_pix; + panels[i_rot]->increment_image(i_pix, first_deriv_imgs.panel_rot[idx], second_deriv_imgs.panel_rot[idx], compute_curvatures); + } - int i_orig = pan_orig_ids[i_pan]; - if(panels[i_orig]->refine_me){ - int idx= Npix_to_model*i_pan + i_pix; - panels[i_orig]->increment_image(i_pix, first_deriv_imgs.panel_orig[idx], second_deriv_imgs.panel_orig[idx], compute_curvatures); + int i_orig = pan_orig_ids[i_pan]; + if(panels[i_orig]->refine_me){ + int idx= Npix_to_model*i_pan + i_pix; + panels[i_orig]->increment_image(i_pix, first_deriv_imgs.panel_orig[idx], second_deriv_imgs.panel_orig[idx], compute_curvatures); + } } - } - if (fp_fdp_managers[0]->refine_me) - fp_fdp_managers[0]->increment_image(i_pix, first_deriv_imgs.fp_fdp[i_pix], 0, compute_curvatures); - if (fp_fdp_managers[1]->refine_me) - fp_fdp_managers[1]->increment_image(i_pix, first_deriv_imgs.fp_fdp[i_pix+Npix_to_model], 0, compute_curvatures); + if (fp_fdp_managers[0]->refine_me) + fp_fdp_managers[0]->increment_image(i_pix, first_deriv_imgs.fp_fdp[i_pix], 0, compute_curvatures); + if (fp_fdp_managers[1]->refine_me) + fp_fdp_managers[1]->increment_image(i_pix, first_deriv_imgs.fp_fdp[i_pix+Npix_to_model], 0, compute_curvatures); - } // END of flex array update + } // END of flex array update + } delete[] db_steps.subS_pos; delete[] db_steps.subF_pos; @@ -2368,7 +2426,6 @@ void diffBragg::add_diffBragg_spots(const af::shared& panels_fasts_slows TIMERS.timings += 1; // only increment timings at the end of the add_diffBragg_spots call } - if(verbose) printf("done with pixel loop\n"); } // END of add_diffBragg_spots diff --git a/simtbx/diffBragg/src/diffBragg.h b/simtbx/diffBragg/src/diffBragg.h index 599e55a3d0..882a150870 100644 --- a/simtbx/diffBragg/src/diffBragg.h +++ b/simtbx/diffBragg/src/diffBragg.h @@ -237,7 +237,7 @@ class diffBragg: public nanoBragg{ af::flex_double get_raw_pixels_roi(); boost::python::tuple get_fp_fdp_derivative_pixels(); boost::python::tuple get_ncells_derivative_pixels(); -#ifdef DIFFBRAGG_HAVE_KOKKOS + PyObject* PyCapsule_Wrapper(DLManagedTensor* (diffBraggKOKKOS::*func)()); PyObject* get_floatimage(); PyObject* get_wavelenimage(); @@ -261,7 +261,7 @@ class diffBragg: public nanoBragg{ PyObject* get_d2_panel_orig_images(); PyObject* get_d_fp_fdp_images(); PyObject* get_Fhkl_scale_deriv(); -#endif + boost::python::tuple get_diffuse_gamma_derivative_pixels(); boost::python::tuple get_diffuse_sigma_derivative_pixels(); boost::python::tuple get_ncells_def_derivative_pixels(); @@ -311,6 +311,7 @@ class diffBragg: public nanoBragg{ bool update_oversample_during_refinement; bool oversample_omega; bool only_save_omega_kahn; + bool host_transfer; // miller array void quick_Fcell_update(boost::python::tuple const& value); diff --git a/simtbx/diffBragg/src/diffBraggKOKKOS.cpp b/simtbx/diffBragg/src/diffBraggKOKKOS.cpp index 016f684d05..4656835311 100644 --- a/simtbx/diffBragg/src/diffBraggKOKKOS.cpp +++ b/simtbx/diffBragg/src/diffBraggKOKKOS.cpp @@ -581,56 +581,58 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( gettimeofday(&t1, 0); // COPY BACK FROM DEVICE Kokkos::Tools::pushRegion("COPY BACK FROM DEVICE"); - kokkostbx::transfer_kokkos2vector(floatimage, m_floatimage); + if (db_flags.host_transfer) { + kokkostbx::transfer_kokkos2vector(floatimage, m_floatimage); - if (db_flags.wavelength_img) { - kokkostbx::transfer_kokkos2vector(d_image.wavelength, m_wavelenimage); - } - if (db_flags.refine_fcell) { - kokkostbx::transfer_kokkos2vector(d_image.fcell, m_d_fcell_images); - kokkostbx::transfer_kokkos2vector(d2_image.fcell, m_d2_fcell_images); - } - if (db_flags.Fhkl_gradient_mode){ - if (db_flags.Fhkl_errors_mode){ - kokkostbx::transfer_kokkos2vector(d_image.Fhkl_hessian, m_Fhkl_scale_deriv); + if (db_flags.wavelength_img) { + kokkostbx::transfer_kokkos2vector(d_image.wavelength, m_wavelenimage); } - else{ - kokkostbx::transfer_kokkos2vector(d_image.Fhkl_scale_deriv, m_Fhkl_scale_deriv); + if (db_flags.refine_fcell) { + kokkostbx::transfer_kokkos2vector(d_image.fcell, m_d_fcell_images); + kokkostbx::transfer_kokkos2vector(d2_image.fcell, m_d2_fcell_images); + } + if (db_flags.Fhkl_gradient_mode){ + if (db_flags.Fhkl_errors_mode){ + kokkostbx::transfer_kokkos2vector(d_image.Fhkl_hessian, m_Fhkl_scale_deriv); + } + else{ + kokkostbx::transfer_kokkos2vector(d_image.Fhkl_scale_deriv, m_Fhkl_scale_deriv); + } + } + if (std::count(db_flags.refine_Umat.begin(), db_flags.refine_Umat.end(), true) > 0) { + kokkostbx::transfer_kokkos2vector(d_image.Umat, m_d_Umat_images); + kokkostbx::transfer_kokkos2vector(d2_image.Umat, m_d2_Umat_images); + } + if (std::count(db_flags.refine_panel_rot.begin(), db_flags.refine_panel_rot.end(), true) > 0) { + kokkostbx::transfer_kokkos2vector(d_image.panel_rot, m_d_panel_rot_images); + } + if (std::count(db_flags.refine_panel_origin.begin(), db_flags.refine_panel_origin.end(), true) > + 0) { + kokkostbx::transfer_kokkos2vector(d_image.panel_orig, m_d_panel_orig_images); + } + if (db_flags.refine_eta) { + kokkostbx::transfer_kokkos2vector(d_image.eta, m_d_eta_images); + kokkostbx::transfer_kokkos2vector(d2_image.eta, m_d2_eta_images); + } + if (std::count(db_flags.refine_Ncells.begin(), db_flags.refine_Ncells.end(), true) > 0 || + db_flags.refine_Ncells_def) { + kokkostbx::transfer_kokkos2vector(d_image.Ncells, m_d_Ncells_images); + kokkostbx::transfer_kokkos2vector(d2_image.Ncells, m_d2_Ncells_images); + } + if (db_flags.refine_diffuse) { + kokkostbx::transfer_kokkos2vector(d_image.diffuse_gamma, m_d_diffuse_gamma_images); + kokkostbx::transfer_kokkos2vector(d_image.diffuse_sigma, m_d_diffuse_sigma_images); + } + if (std::count(db_flags.refine_Bmat.begin(), db_flags.refine_Bmat.end(), true) > 0) { + kokkostbx::transfer_kokkos2vector(d_image.Bmat, m_d_Bmat_images); + kokkostbx::transfer_kokkos2vector(d2_image.Bmat, m_d2_Bmat_images); + } + if (std::count(db_flags.refine_lambda.begin(), db_flags.refine_lambda.end(), true) > 0) { + kokkostbx::transfer_kokkos2vector(d_image.lambda, m_d_lambda_images); + } + if (db_flags.refine_fp_fdp) { + kokkostbx::transfer_kokkos2vector(d_image.fp_fdp, m_d_fp_fdp_images); } - } - if (std::count(db_flags.refine_Umat.begin(), db_flags.refine_Umat.end(), true) > 0) { - kokkostbx::transfer_kokkos2vector(d_image.Umat, m_d_Umat_images); - kokkostbx::transfer_kokkos2vector(d2_image.Umat, m_d2_Umat_images); - } - if (std::count(db_flags.refine_panel_rot.begin(), db_flags.refine_panel_rot.end(), true) > 0) { - kokkostbx::transfer_kokkos2vector(d_image.panel_rot, m_d_panel_rot_images); - } - if (std::count(db_flags.refine_panel_origin.begin(), db_flags.refine_panel_origin.end(), true) > - 0) { - kokkostbx::transfer_kokkos2vector(d_image.panel_orig, m_d_panel_orig_images); - } - if (db_flags.refine_eta) { - kokkostbx::transfer_kokkos2vector(d_image.eta, m_d_eta_images); - kokkostbx::transfer_kokkos2vector(d2_image.eta, m_d2_eta_images); - } - if (std::count(db_flags.refine_Ncells.begin(), db_flags.refine_Ncells.end(), true) > 0 || - db_flags.refine_Ncells_def) { - kokkostbx::transfer_kokkos2vector(d_image.Ncells, m_d_Ncells_images); - kokkostbx::transfer_kokkos2vector(d2_image.Ncells, m_d2_Ncells_images); - } - if (db_flags.refine_diffuse) { - kokkostbx::transfer_kokkos2vector(d_image.diffuse_gamma, m_d_diffuse_gamma_images); - kokkostbx::transfer_kokkos2vector(d_image.diffuse_sigma, m_d_diffuse_sigma_images); - } - if (std::count(db_flags.refine_Bmat.begin(), db_flags.refine_Bmat.end(), true) > 0) { - kokkostbx::transfer_kokkos2vector(d_image.Bmat, m_d_Bmat_images); - kokkostbx::transfer_kokkos2vector(d2_image.Bmat, m_d2_Bmat_images); - } - if (std::count(db_flags.refine_lambda.begin(), db_flags.refine_lambda.end(), true) > 0) { - kokkostbx::transfer_kokkos2vector(d_image.lambda, m_d_lambda_images); - } - if (db_flags.refine_fp_fdp) { - kokkostbx::transfer_kokkos2vector(d_image.fp_fdp, m_d_fp_fdp_images); } Kokkos::Tools::popRegion(); diff --git a/simtbx/diffBragg/src/diffBragg_ext.cpp b/simtbx/diffBragg/src/diffBragg_ext.cpp index dd1fe393b8..feafb15c49 100644 --- a/simtbx/diffBragg/src/diffBragg_ext.cpp +++ b/simtbx/diffBragg/src/diffBragg_ext.cpp @@ -507,11 +507,8 @@ namespace boost_python { namespace { std::string kokkos_device() { std::string backend = "cpu:0"; #ifdef DIFFBRAGG_HAVE_KOKKOS - if (Kokkos::is_finalized()) { - throw std::runtime_error("Error: Kokkos has been finalized.\n"); - } - if (!Kokkos::is_initialized()) { - throw std::runtime_error("Error: Kokkos not initialized.\n"); + if (Kokkos::is_finalized() || !Kokkos::is_initialized()) { + return backend; } #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) backend = "cuda:" + std::to_string( Kokkos::device_id() ); @@ -879,6 +876,11 @@ struct DLPackAPI { make_setter(&simtbx::nanoBragg::diffBragg::oversample_omega,dcp()), "whether to use an average solid angle correction per pixel, or one at the sub pixel level") + .add_property("host_transfer", + make_getter(&simtbx::nanoBragg::diffBragg::host_transfer,rbv()), + make_setter(&simtbx::nanoBragg::diffBragg::host_transfer,dcp()), + "whether to transfer results from device to host") + .add_property("force_cpu", make_getter(&simtbx::nanoBragg::diffBragg::force_cpu,rbv()), make_setter(&simtbx::nanoBragg::diffBragg::force_cpu,dcp()), diff --git a/simtbx/diffBragg/src/util.h b/simtbx/diffBragg/src/util.h index ec197c1dc9..b95ffebd4f 100644 --- a/simtbx/diffBragg/src/util.h +++ b/simtbx/diffBragg/src/util.h @@ -10,6 +10,8 @@ #include #include +#include "dlpack/dlpack.h" + #ifndef CUDAREAL #define CUDAREAL double #endif @@ -29,6 +31,48 @@ inline void easy_time(double& timer, struct timeval& t, bool recording){ timer += time; } +template +DLDataTypeCode getDLPackTypeCode() { + if (std::is_same::value) { + return kDLFloat; + } else if (std::is_same::value) { + return kDLFloat; + } else if (std::is_same::value) { + return kDLInt; + } else if (std::is_same::value) { + return kDLUInt; + // } else if (std::is_same::value) { + // return kDLBool; + } else { + // Unsupported data type + throw std::runtime_error("Unsupported data type for DLPack conversion"); + } +} + +template +DLManagedTensor* array_to_dlpack(DataType* pointer, int64_t length) { + + int64_t* shape = new int64_t[1]; + shape[0] = length; + + // Create a DLPack tensor + DLManagedTensor* dlpackTensor = new DLManagedTensor; + dlpackTensor->dl_tensor.data = static_cast(pointer); + dlpackTensor->dl_tensor.device = {kDLCPU, 0}; + dlpackTensor->dl_tensor.dtype.code = getDLPackTypeCode(); + dlpackTensor->dl_tensor.dtype.bits = sizeof(DataType) * 8; + dlpackTensor->dl_tensor.dtype.lanes = 1; + dlpackTensor->dl_tensor.ndim = 1; + dlpackTensor->dl_tensor.shape = shape; + dlpackTensor->dl_tensor.strides = nullptr; + dlpackTensor->dl_tensor.byte_offset = 0; + dlpackTensor->manager_ctx = nullptr; + dlpackTensor->deleter = [](DLManagedTensor* tensor) { + delete[] tensor->dl_tensor.shape; + }; + return dlpackTensor; +} + struct timer_variables{ double add_spots_pre=0; // times the initializations for add spots kernel double add_spots_post=0; // times the copies that occur after add spots kernel @@ -124,6 +168,7 @@ struct flags{ bool isotropic_ncells = false; // one mosaic domain parameter bool complex_miller = false; // is the miller array complex (such thet Fhkl_linear and Fhkl2_linear are both defined) bool no_Nabc_scale = false; // no Nabc prefactor + bool host_transfer = true; // transfer data after add_diffbragg_spots bool refine_diffuse = false; // flag for computing diffuse gradients std::vector refine_Bmat; // Bmatrix std::vector refine_Ncells; // mosaic domain size From fff52bfc65f488f387b9ccde99d8d1959a19f625 Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Fri, 15 Mar 2024 09:31:50 -0700 Subject: [PATCH 15/17] Update hopper_utils_ensemble --- simtbx/diffBragg/hopper_ensemble_utils.py | 10 ++++++---- simtbx/diffBragg/hopper_utils.py | 3 ++- simtbx/diffBragg/src/diffBragg.cpp | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/simtbx/diffBragg/hopper_ensemble_utils.py b/simtbx/diffBragg/hopper_ensemble_utils.py index 16976e754a..c3e0d712aa 100644 --- a/simtbx/diffBragg/hopper_ensemble_utils.py +++ b/simtbx/diffBragg/hopper_ensemble_utils.py @@ -87,7 +87,7 @@ def __call__(self, x, *args, **kwargs): if modelers.SIM.D.record_timings: modelers.SIM.D.show_timings() - return f + return f.item() def target_func(x, modelers): @@ -152,8 +152,10 @@ def target_func(x, modelers): spot_scale_p = shot_modeler.P["G_xtal0"] G = spot_scale_p.get_val(x[spot_scale_p.xpos]) g_fhkl += modelers.SIM.D.add_Fhkl_gradients( - shot_modeler.pan_fast_slow, resid, V, shot_modeler.all_trusted, - shot_modeler.all_freq, modelers.SIM.num_Fhkl_channels, G) + shot_modeler.pan_fast_slow, resid.cpu().numpy(), V.cpu().numpy(), shot_modeler.all_trusted.cpu().numpy(), + shot_modeler.all_freq.cpu().numpy(), modelers.SIM.num_Fhkl_channels, G) + if not modelers.SIM.D.host_transfer: + g_fhkl += torch.from_dlpack(modelers.SIM.D.get_Fhkl_scale_deriv()) # add up target and gradients across all ranks f = COMM.bcast(COMM.reduce(f)) @@ -496,7 +498,7 @@ def save_up(self, x, ref_iter=None): if i_shot % 100==0: MAIN_LOGGER.info("Getting Fhkl errors for shot %d/%d ... " % (i_shot+1, self.num_modelers)) Fhkl_scale_hessian += self.SIM.D.add_Fhkl_gradients( - mod.pan_fast_slow, resid, V, mod.all_trusted, mod.all_freq, + mod.pan_fast_slow, resid.cpu().numpy(), V.cpu().numpy(), mod.all_trusted.cpu().numpy(), mod.all_freq.cpu().numpy(), self.SIM.num_Fhkl_channels, G, track=False, errors=True) # ------------ diff --git a/simtbx/diffBragg/hopper_utils.py b/simtbx/diffBragg/hopper_utils.py index 5c647f6729..9b6a74eb37 100644 --- a/simtbx/diffBragg/hopper_utils.py +++ b/simtbx/diffBragg/hopper_utils.py @@ -2044,6 +2044,8 @@ def target_func(x, udpate_terms, mod, SIM, compute_grad=True, return_all_zscores G = spot_scale_p.get_val(x[spot_scale_p.xpos]) fhkl_grad = SIM.D.add_Fhkl_gradients(pfs, resid.cpu().numpy(), V.cpu().numpy(), trusted.cpu().numpy(), mod.all_freq.cpu().numpy(), SIM.num_Fhkl_channels, G) + if not SIM.D.host_transfer: + fhkl_grad = torch.from_dlpack(SIM.D.get_Fhkl_scale_deriv()).cpu().numpy() if params.betas.Fhkl is not None: for i_chan in range(SIM.num_Fhkl_channels): @@ -2058,7 +2060,6 @@ def target_func(x, udpate_terms, mod, SIM, compute_grad=True, return_all_zscores gnorm = np.linalg.norm(g) - debug_s = "F=%10.7g sigZ=%10.7g (Fracs of F: %s), |g|=%10.7g" \ % (f, zscore_sigma, restraint_debug_s, gnorm) diff --git a/simtbx/diffBragg/src/diffBragg.cpp b/simtbx/diffBragg/src/diffBragg.cpp index 3bd9821444..3499b5f7db 100644 --- a/simtbx/diffBragg/src/diffBragg.cpp +++ b/simtbx/diffBragg/src/diffBragg.cpp @@ -1666,7 +1666,7 @@ PyObject* diffBragg::get_d_fp_fdp_images() { PyObject* diffBragg::get_Fhkl_scale_deriv() { #ifdef DIFFBRAGG_HAVE_KOKKOS if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_fp_fdp_images); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_Fhkl_scale_deriv); } #endif return PyCapsule_New(array_to_dlpack(first_deriv_imgs.fp_fdp.data(), first_deriv_imgs.fp_fdp.size()), "dltensor", dlpack_destructor); From 08b2d320ca6f652dc51eeee176a8c1a86e6ce506 Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Fri, 15 Mar 2024 14:23:22 -0700 Subject: [PATCH 16/17] add missing get_dlpack functions and refactor --- simtbx/diffBragg/src/diffBragg.cpp | 117 ++++++++++------------------- simtbx/diffBragg/src/diffBragg.h | 3 +- 2 files changed, 42 insertions(+), 78 deletions(-) diff --git a/simtbx/diffBragg/src/diffBragg.cpp b/simtbx/diffBragg/src/diffBragg.cpp index 3499b5f7db..c332bf5e92 100644 --- a/simtbx/diffBragg/src/diffBragg.cpp +++ b/simtbx/diffBragg/src/diffBragg.cpp @@ -1522,154 +1522,117 @@ void dlpack_destructor(PyObject* capsule) { // Fun with pointer-to-member-functions -PyObject* diffBragg::PyCapsule_Wrapper( DLManagedTensor* (diffBraggKOKKOS::*func)()) { - if (diffBragg_runner == nullptr) { - return nullptr; +PyObject* diffBragg::PyCapsule_Wrapper( DLManagedTensor* (diffBraggKOKKOS::*func)(), image_type &vec) { +#ifdef DIFFBRAGG_HAVE_KOKKOS + if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ + if (diffBragg_runner == nullptr) { + return nullptr; + } + return PyCapsule_New((*diffBragg_runner.*func)(), "dltensor", dlpack_destructor); } - return PyCapsule_New((*diffBragg_runner.*func)(), "dltensor", dlpack_destructor); +#endif + return PyCapsule_New(array_to_dlpack(vec.data(), vec.size()), "dltensor", dlpack_destructor); } PyObject* diffBragg::get_floatimage() { #ifdef DIFFBRAGG_HAVE_KOKKOS - if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_floatimage); + if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL) { + if (diffBragg_runner == nullptr) { + return nullptr; + } + return PyCapsule_New(diffBragg_runner->get_floatimage(), "dltensor", dlpack_destructor); } #endif return PyCapsule_New(array_to_dlpack(raw_pixels_roi.begin(), Npix_to_model), "dltensor", dlpack_destructor); } PyObject* diffBragg::get_wavelenimage() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_wavelenimage); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_wavelenimage, first_deriv_imgs.wavelength); } PyObject* diffBragg::get_d_diffuse_gamma_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_diffuse_gamma_images); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_diffuse_gamma_images, first_deriv_imgs.diffuse_gamma); } PyObject* diffBragg::get_d_diffuse_sigma_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_diffuse_sigma_images); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_diffuse_sigma_images, first_deriv_imgs.diffuse_sigma); } PyObject* diffBragg::get_d_Umat_images() { -#ifdef DIFFBRAGG_HAVE_KOKKOS - if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_Umat_images); - } -#endif - return PyCapsule_New(array_to_dlpack(first_deriv_imgs.Umat.data(), 3*Npix_to_model), "dltensor", dlpack_destructor); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_Umat_images, first_deriv_imgs.Umat); } PyObject* diffBragg::get_d2_Umat_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_Umat_images); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_Umat_images, second_deriv_imgs.Umat); } PyObject* diffBragg::get_d_Bmat_images() { -#ifdef DIFFBRAGG_HAVE_KOKKOS - if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_Bmat_images); - } -#endif - return PyCapsule_New(array_to_dlpack(first_deriv_imgs.Bmat.data(), 6*Npix_to_model), "dltensor", dlpack_destructor); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_Bmat_images, first_deriv_imgs.Bmat); } PyObject* diffBragg::get_d2_Bmat_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_Bmat_images); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_Bmat_images, second_deriv_imgs.Bmat); } PyObject* diffBragg::get_d_Ncells_images() { -#ifdef DIFFBRAGG_HAVE_KOKKOS - if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_Ncells_images); - } -#endif - return PyCapsule_New(array_to_dlpack(first_deriv_imgs.Ncells.data(), 6*Npix_to_model), "dltensor", dlpack_destructor); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_Ncells_images, first_deriv_imgs.Ncells); } PyObject* diffBragg::get_d2_Ncells_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_Ncells_images); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_Ncells_images, second_deriv_imgs.Ncells); } PyObject* diffBragg::get_d_fcell_images() { -#ifdef DIFFBRAGG_HAVE_KOKKOS - if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_fcell_images); - } -#endif - return PyCapsule_New(array_to_dlpack(first_deriv_imgs.fcell.data(), Npix_to_model), "dltensor", dlpack_destructor); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_fcell_images, first_deriv_imgs.fcell); } PyObject* diffBragg::get_d2_fcell_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_fcell_images); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_fcell_images, second_deriv_imgs.fcell); } PyObject* diffBragg::get_d_eta_images() { -#ifdef DIFFBRAGG_HAVE_KOKKOS - if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_eta_images); - } -#endif - return PyCapsule_New(array_to_dlpack(first_deriv_imgs.eta.data(), first_deriv_imgs.eta.size()), "dltensor", dlpack_destructor); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_eta_images, first_deriv_imgs.eta); } PyObject* diffBragg::get_d2_eta_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_eta_images); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_eta_images, second_deriv_imgs.eta); } PyObject* diffBragg::get_d_lambda_images() { -#ifdef DIFFBRAGG_HAVE_KOKKOS - if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_lambda_images); - } -#endif - return PyCapsule_New(array_to_dlpack(first_deriv_imgs.lambda.data(), 2*Npix_to_model), "dltensor", dlpack_destructor); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_lambda_images, first_deriv_imgs.lambda); } PyObject* diffBragg::get_d2_lambda_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_lambda_images); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_lambda_images, second_deriv_imgs.lambda); } PyObject* diffBragg::get_d_panel_rot_images() { -#ifdef DIFFBRAGG_HAVE_KOKKOS - if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_panel_rot_images); - } -#endif - return PyCapsule_New(array_to_dlpack(first_deriv_imgs.panel_rot.data(), 3*Npix_to_model), "dltensor", dlpack_destructor); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_panel_rot_images, first_deriv_imgs.panel_rot); } PyObject* diffBragg::get_d2_panel_rot_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_panel_rot_images); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_panel_rot_images, second_deriv_imgs.panel_rot); } PyObject* diffBragg::get_d_panel_orig_images() { -#ifdef DIFFBRAGG_HAVE_KOKKOS - if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_panel_orig_images); - } -#endif - return PyCapsule_New(array_to_dlpack(first_deriv_imgs.panel_orig.data(), 3*Npix_to_model), "dltensor", dlpack_destructor); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_panel_orig_images, first_deriv_imgs.panel_orig); } PyObject* diffBragg::get_d2_panel_orig_images() { - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_panel_orig_images); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_panel_orig_images, second_deriv_imgs.panel_orig); } PyObject* diffBragg::get_d_fp_fdp_images() { -#ifdef DIFFBRAGG_HAVE_KOKKOS - if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_fp_fdp_images); - } -#endif - return PyCapsule_New(array_to_dlpack(first_deriv_imgs.fp_fdp.data(), 2*Npix_to_model), "dltensor", dlpack_destructor); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_fp_fdp_images, first_deriv_imgs.fp_fdp); } PyObject* diffBragg::get_Fhkl_scale_deriv() { -#ifdef DIFFBRAGG_HAVE_KOKKOS - if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ - return PyCapsule_Wrapper(&diffBraggKOKKOS::get_Fhkl_scale_deriv); - } -#endif - return PyCapsule_New(array_to_dlpack(first_deriv_imgs.fp_fdp.data(), first_deriv_imgs.fp_fdp.size()), "dltensor", dlpack_destructor); + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_Fhkl_scale_deriv, first_deriv_imgs.Fhkl_scale_deriv); +} + +PyObject* diffBragg::get_Fhkl_hessian() { + // Fhkl_scale_deriv is overloaded, depending on Fhkl_errors_mode + return PyCapsule_Wrapper(&diffBraggKOKKOS::get_Fhkl_scale_deriv, first_deriv_imgs.Fhkl_hessian); } diff --git a/simtbx/diffBragg/src/diffBragg.h b/simtbx/diffBragg/src/diffBragg.h index 882a150870..501f83b58b 100644 --- a/simtbx/diffBragg/src/diffBragg.h +++ b/simtbx/diffBragg/src/diffBragg.h @@ -238,7 +238,7 @@ class diffBragg: public nanoBragg{ boost::python::tuple get_fp_fdp_derivative_pixels(); boost::python::tuple get_ncells_derivative_pixels(); - PyObject* PyCapsule_Wrapper(DLManagedTensor* (diffBraggKOKKOS::*func)()); + PyObject* PyCapsule_Wrapper(DLManagedTensor* (diffBraggKOKKOS::*func)(), image_type &vec); PyObject* get_floatimage(); PyObject* get_wavelenimage(); PyObject* get_d_diffuse_gamma_images(); @@ -261,6 +261,7 @@ class diffBragg: public nanoBragg{ PyObject* get_d2_panel_orig_images(); PyObject* get_d_fp_fdp_images(); PyObject* get_Fhkl_scale_deriv(); + PyObject* get_Fhkl_hessian(); boost::python::tuple get_diffuse_gamma_derivative_pixels(); boost::python::tuple get_diffuse_sigma_derivative_pixels(); From 27560804df40db64290592d5ffdd0567c9b19585 Mon Sep 17 00:00:00 2001 From: Felix Wittwer Date: Fri, 15 Mar 2024 14:25:10 -0700 Subject: [PATCH 17/17] Clean clutter --- simtbx/diffBragg/src/diffBragg.cpp | 8 ++++---- simtbx/diffBragg/src/util.h | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/simtbx/diffBragg/src/diffBragg.cpp b/simtbx/diffBragg/src/diffBragg.cpp index c332bf5e92..f4b29476d4 100644 --- a/simtbx/diffBragg/src/diffBragg.cpp +++ b/simtbx/diffBragg/src/diffBragg.cpp @@ -1528,7 +1528,7 @@ PyObject* diffBragg::PyCapsule_Wrapper( DLManagedTensor* (diffBraggKOKKOS::*func if (diffBragg_runner == nullptr) { return nullptr; } - return PyCapsule_New((*diffBragg_runner.*func)(), "dltensor", dlpack_destructor); + return PyCapsule_New((*diffBragg_runner.*func)(), "dltensor", dlpack_destructor); } #endif return PyCapsule_New(array_to_dlpack(vec.data(), vec.size()), "dltensor", dlpack_destructor); @@ -1539,8 +1539,8 @@ PyObject* diffBragg::get_floatimage() { if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL) { if (diffBragg_runner == nullptr) { return nullptr; - } - return PyCapsule_New(diffBragg_runner->get_floatimage(), "dltensor", dlpack_destructor); + } + return PyCapsule_New(diffBragg_runner->get_floatimage(), "dltensor", dlpack_destructor); } #endif return PyCapsule_New(array_to_dlpack(raw_pixels_roi.begin(), Npix_to_model), "dltensor", dlpack_destructor); @@ -1568,7 +1568,7 @@ PyObject* diffBragg::get_d2_Umat_images() { PyObject* diffBragg::get_d_Bmat_images() { return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d_Bmat_images, first_deriv_imgs.Bmat); -} +} PyObject* diffBragg::get_d2_Bmat_images() { return PyCapsule_Wrapper(&diffBraggKOKKOS::get_d2_Bmat_images, second_deriv_imgs.Bmat); diff --git a/simtbx/diffBragg/src/util.h b/simtbx/diffBragg/src/util.h index b95ffebd4f..5de97ce4fd 100644 --- a/simtbx/diffBragg/src/util.h +++ b/simtbx/diffBragg/src/util.h @@ -51,7 +51,7 @@ DLDataTypeCode getDLPackTypeCode() { template DLManagedTensor* array_to_dlpack(DataType* pointer, int64_t length) { - + int64_t* shape = new int64_t[1]; shape[0] = length; @@ -61,8 +61,8 @@ DLManagedTensor* array_to_dlpack(DataType* pointer, int64_t length) { dlpackTensor->dl_tensor.device = {kDLCPU, 0}; dlpackTensor->dl_tensor.dtype.code = getDLPackTypeCode(); dlpackTensor->dl_tensor.dtype.bits = sizeof(DataType) * 8; - dlpackTensor->dl_tensor.dtype.lanes = 1; - dlpackTensor->dl_tensor.ndim = 1; + dlpackTensor->dl_tensor.dtype.lanes = 1; + dlpackTensor->dl_tensor.ndim = 1; dlpackTensor->dl_tensor.shape = shape; dlpackTensor->dl_tensor.strides = nullptr; dlpackTensor->dl_tensor.byte_offset = 0;