Skip to content

replace pybind11 -> nanobind #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ jobs:
test:
if: github.event_name != 'pull_request' || !contains('OWNER,MEMBER,COLLABORATOR', github.event.pull_request.author_association)
name: py${{ matrix.python }}
runs-on: ubuntu-${{ matrix.python == 3.7 && '22.04' || 'latest' }}
runs-on: ubuntu-latest
strategy:
matrix:
python: [3.7, 3.12]
python: [3.8, 3.12]
steps:
- uses: actions/checkout@v4
with: {fetch-depth: 0}
Expand All @@ -47,14 +47,14 @@ jobs:
git clean -Xdf
pip install build
python -m build -n -w \
-Ccmake.define.CMAKE_CXX_FLAGS="-Wall -Wextra -Wpedantic -Werror -Wno-missing-field-initializers -Wno-unused-parameter -Wno-cast-function-type"
-Ccmake.define.CMAKE_CXX_FLAGS="-Wall -Wextra -Werror -Wno-missing-field-initializers -Wno-unused-parameter -Wno-cast-function-type"
cuda:
if: github.event_name != 'pull_request' || !contains('OWNER,MEMBER,COLLABORATOR', github.event.pull_request.author_association)
name: CUDA py${{ matrix.python }}
runs-on: [self-hosted, cuda, python]
strategy:
matrix:
python: [3.7, 3.12]
python: [3.8, 3.12]
steps:
- uses: actions/checkout@v4
with: {fetch-depth: 0}
Expand All @@ -79,7 +79,7 @@ jobs:
git clean -Xdf
pip install build
python -m build -n -w \
-Ccmake.define.CMAKE_CXX_FLAGS="-Wall -Wextra -Wpedantic -Werror -Wno-missing-field-initializers -Wno-unused-parameter -Wno-cast-function-type" \
-Ccmake.define.CMAKE_CXX_FLAGS="-Wall -Wextra -Werror -Wno-missing-field-initializers -Wno-unused-parameter -Wno-cast-function-type" \
-Ccmake.define.CMAKE_CUDA_ARCHITECTURES=all
- name: Post Run setup-python
run: setup-python -p${{ matrix.python }} -Dr
Expand Down
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ authors:
identifiers:
- type: doi
value: 10.5281/zenodo.7013340
keywords: [Python, C, C++, buffer, vector, array, CUDA, CPython, pybind11, extensions, API]
keywords: [Python, C, C++, buffer, vector, array, CUDA, CPython, nanobind, extensions, API]
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ print("\0".join(c["build-system"]["requires"] + c["project"]["dependencies"] + c
git clean -Xdf
pip install --no-build-isolation --no-deps -t . -U -v . \
-Ccmake.define.CUVEC_DEBUG=1
-Ccmake.define.CMAKE_CXX_FLAGS="-Wall -Wextra -Wpedantic -Werror -Wno-missing-field-initializers -Wno-unused-parameter -Wno-cast-function-type"
-Ccmake.define.CMAKE_CXX_FLAGS="-Wall -Wextra -Werror -Wno-missing-field-initializers -Wno-unused-parameter -Wno-cast-function-type"
git restore numcu/src # undo deletion of sources
```

Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Install

Requirements:

- Python 3.7 or greater (e.g. via `Anaconda or Miniconda <https://docs.conda.io/projects/conda/en/latest/user-guide/install/download.html#anaconda-or-miniconda>`_, or via ``python3-dev``)
- Python 3.8 or greater (e.g. via `Anaconda or Miniconda <https://docs.conda.io/projects/conda/en/latest/user-guide/install/download.html#anaconda-or-miniconda>`_, or via ``python3-dev``)
- (optional) `CUDA SDK/Toolkit <https://developer.nvidia.com/cuda-downloads>`_ (including drivers for an NVIDIA GPU)

* note that if the CUDA SDK/Toolkit is installed *after* NumCu, then NumCu must be re-installed to enable CUDA support
Expand Down
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pip install numcu

Requirements:

- Python 3.7 or greater (e.g. via [Anaconda or Miniconda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/download.html#anaconda-or-miniconda) or via `python3-dev`)
- Python 3.8 or greater (e.g. via [Anaconda or Miniconda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/download.html#anaconda-or-miniconda) or via `python3-dev`)
- (optional) [CUDA SDK/Toolkit](https://developer.nvidia.com/cuda-downloads) (including drivers for an NVIDIA GPU)
+ note that if the CUDA SDK/Toolkit is installed *after* NumCu, then NumCu must be re-installed to enable CUDA support

Expand Down
7 changes: 3 additions & 4 deletions numcu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
endif()
cmake_policy(SET CMP0104 NEW) # CMAKE_CUDA_ARCHITECTURES
find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)
find_package(pybind11 CONFIG REQUIRED)
find_package(nanobind CONFIG REQUIRED)
if(NOT NUMCU_CUDA_OPTIONAL)
find_package(CUDAToolkit REQUIRED)
enable_language(CUDA)
Expand All @@ -33,8 +33,7 @@ else()
endif()
execute_process(
COMMAND "${Python_EXECUTABLE}" -c "import cuvec; print(cuvec.include_path)"
OUTPUT_VARIABLE CUVEC_INCLUDE_DIRS
OUTPUT_STRIP_TRAILING_WHITESPACE)
OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE CUVEC_INCLUDE_DIRS)
if("${CUVEC_INCLUDE_DIRS}" STREQUAL "")
message(WARNING "Could not find cuvec includes")
else()
Expand Down Expand Up @@ -71,7 +70,7 @@ file(GLOB SRC LIST_DIRECTORIES false "src/*.cu")
include_directories(${Python_INCLUDE_DIRS})
include_directories(${CUVEC_INCLUDE_DIRS})

pybind11_add_module(${PROJECT_NAME} MODULE WITH_SOABI ${SRC})
nanobind_add_module(${PROJECT_NAME} ${SRC})
add_library(AMYPAD::${PROJECT_NAME} ALIAS ${PROJECT_NAME})
target_include_directories(${PROJECT_NAME} PUBLIC
"$<BUILD_INTERFACE:${${CMAKE_PROJECT_NAME}_INCLUDE_DIRS}>"
Expand Down
2 changes: 1 addition & 1 deletion numcu/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from .lib import add, div, mul

p = resources.files('numcu').resolve()
# for C++/CUDA/pybind11 includes
# for C++/CUDA includes
include_path = p / 'include'
# for use in `cmake -DCMAKE_PREFIX_PATH=...`
cmake_prefix = p / 'cmake'
44 changes: 9 additions & 35 deletions numcu/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,26 +24,6 @@ def get_namespace(*xs, default=cu):
return default # backwards compatibility


def check_cuvec(a, shape, dtype, xp=cu):
"""Asserts that CuVec `a` is of `shape` & `dtype`"""
if not isinstance(a, xp.CuVec):
raise TypeError(f"must be a {xp.CuVec}")
elif np.dtype(a.dtype) != np.dtype(dtype):
raise TypeError(f"dtype must be {dtype}: got {a.dtype}")
elif a.shape != shape:
raise IndexError(f"shape must be {shape}: got {a.shape}")


def check_similar(*arrays, allow_none=True):
"""Asserts that all arrays are `CuVec`s of the same `shape` & `dtype`"""
arrs = tuple(filter(lambda x: x is not None, arrays))
if not allow_none and len(arrays) != len(arrs):
raise TypeError("must not be None")
shape, dtype, xp = arrs[0].shape, arrs[0].dtype, get_namespace(*arrs)
for a in arrs:
check_cuvec(a, shape, dtype, xp)


def div(numerator, divisor, default=FLOAT_MAX, output=None, dev_id=0, sync=True):
"""
Elementwise `output = numerator / divisor if divisor else default`
Expand All @@ -59,12 +39,10 @@ def div(numerator, divisor, default=FLOAT_MAX, output=None, dev_id=0, sync=True)
res = np.divide(numerator, divisor, out=output)
res[np.isnan(res)] = default
return res
assert numerator.size == divisor.size
cu.dev_set(dev_id)
xp = get_namespace(numerator, divisor, output)
numerator = xp.asarray(numerator, 'float32')
divisor = xp.asarray(divisor, 'float32')
output = xp.zeros_like(numerator) if output is None else xp.asarray(output, 'float32')
check_similar(numerator, divisor, output)
if output is None:
output = get_namespace(numerator, divisor, output).zeros_like(numerator)
ext.div(numerator, divisor, output, default=default)
if sync: cu.dev_sync()
return output
Expand All @@ -81,12 +59,10 @@ def mul(a, b, output=None, dev_id=0, sync=True):
sync(bool): whether to `cudaDeviceSynchronize()` after GPU operations.
"""
if dev_id is False: return np.multiply(a, b, out=output)
assert a.size == b.size
cu.dev_set(dev_id)
xp = get_namespace(a, b, output)
a = xp.asarray(a, 'float32')
b = xp.asarray(b, 'float32')
output = xp.zeros_like(a) if output is None else xp.asarray(output, 'float32')
check_similar(a, b, output)
if output is None:
output = get_namespace(a, b, output).zeros_like(a)
ext.mul(a, b, output)
if sync: cu.dev_sync()
return output
Expand All @@ -103,12 +79,10 @@ def add(a, b, output=None, dev_id=0, sync=True):
sync(bool): whether to `cudaDeviceSynchronize()` after GPU operations.
"""
if dev_id is False: return np.add(a, b, out=output)
assert a.size == b.size
cu.dev_set(dev_id)
xp = get_namespace(a, b, output)
a = xp.asarray(a, 'float32')
b = xp.asarray(b, 'float32')
output = xp.zeros_like(a) if output is None else xp.asarray(output, 'float32')
check_similar(a, b, output)
if output is None:
output = get_namespace(a, b, output).zeros_like(a)
ext.add(a, b, output)
if sync: cu.dev_sync()
return output
28 changes: 12 additions & 16 deletions numcu/src/numcu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,31 @@
* Copyright (2022) Casper da Costa-Luis
*/
#include "elemwise.h" // div, mul, add
#include <pybind11/pybind11.h> // pybind11
#include <nanobind/nanobind.h> // nanobind, NB_MODULE
#include <nanobind/ndarray.h> // ndarray
#include <pycuvec.cuh> // CUDA_PyErr

namespace py = pybind11;
namespace nb = nanobind;
template <typename T> using Arr = const nb::ndarray<T>;

template <typename T>
void elem_div(py::buffer num, py::buffer den, py::buffer dst, T zeroDivDefault) {
py::buffer_info src_num = num.request(), src_den = den.request(), dst_out = dst.request(true);
div(static_cast<T *>(dst_out.ptr), static_cast<T *>(src_num.ptr), static_cast<T *>(src_den.ptr),
dst_out.size, zeroDivDefault);
void elem_div(Arr<const T> &num, Arr<const T> &den, Arr<T> &dst, T zeroDivDefault) {
div(dst.data(), num.data(), den.data(), dst.size(), zeroDivDefault);
if (CUDA_PyErr()) throw std::runtime_error("CUDA kernel");
}

template <typename T> void elem_mul(py::buffer a, py::buffer b, py::buffer dst) {
py::buffer_info src_a = a.request(), src_b = b.request(), dst_out = dst.request(true);
mul(static_cast<T *>(dst_out.ptr), static_cast<T *>(src_a.ptr), static_cast<T *>(src_b.ptr),
dst_out.size);
template <typename T> void elem_mul(Arr<const T> &a, Arr<const T> &b, Arr<T> &dst) {
mul(dst.data(), a.data(), b.data(), dst.size());
if (CUDA_PyErr()) throw std::runtime_error("CUDA kernel");
}

template <typename T> void elem_add(py::buffer a, py::buffer b, py::buffer dst) {
py::buffer_info src_a = a.request(), src_b = b.request(), dst_out = dst.request(true);
add(static_cast<T *>(dst_out.ptr), static_cast<T *>(src_a.ptr), static_cast<T *>(src_b.ptr),
dst_out.size);
template <typename T> void elem_add(Arr<const T> &a, Arr<const T> &b, Arr<T> &dst) {
add(dst.data(), a.data(), b.data(), dst.size());
if (CUDA_PyErr()) throw std::runtime_error("CUDA kernel");
}

using namespace pybind11::literals;
PYBIND11_MODULE(numcu, m) {
using namespace nb::literals;
NB_MODULE(numcu, m) {
m.doc() = "NumCu external module.";
m.def("div", &elem_div<float>, "Elementwise division.", "numerator"_a, "divisor"_a, "output"_a,
"default"_a = FLOAT_MAX);
Expand Down
7 changes: 3 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[build-system]
requires = ["setuptools_scm>=7", "scikit-build-core[pyproject]>=0.5", "cuvec>=2.11.0", "pybind11"]
requires = ["setuptools_scm>=7", "scikit-build-core[pyproject]>=0.5", "cuvec>=2.11.0", "nanobind"]
build-backend = "scikit_build_core.build"

[tool.scikit-build]
Expand All @@ -26,8 +26,8 @@ dynamic = ["version"]
authors = [{name = "Casper da Costa-Luis", email = "[email protected]"}]
description = "Numerical CUDA-based Python library built on CuVec"
readme = "README.rst"
requires-python = ">=3.7"
keywords = ["Python", "C", "C++", "buffer", "vector", "array", "CUDA", "CPython", "pybind11", "extensions", "API"]
requires-python = ">=3.8"
keywords = ["Python", "C", "C++", "buffer", "vector", "array", "CUDA", "CPython", "nanobind", "extensions", "API"]
license = {text = "MPL-2.0"}
classifiers = [
"Development Status :: 5 - Production/Stable",
Expand All @@ -42,7 +42,6 @@ classifiers = [
"Programming Language :: C",
"Programming Language :: C++",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
Expand Down