diff --git a/cuvec/CMakeLists.txt b/cuvec/CMakeLists.txt index 198e58d..909acd4 100644 --- a/cuvec/CMakeLists.txt +++ b/cuvec/CMakeLists.txt @@ -9,7 +9,7 @@ option(CUVEC_CUDA_OPTIONAL "Make CUDA optional rather than forced" ON) cmake_policy(PUSH) cmake_policy(SET CMP0074 NEW) # _ROOT hints for find_package cmake_policy(SET CMP0104 NEW) # CMAKE_CUDA_ARCHITECTURES -find_package(Python3 COMPONENTS Interpreter Development REQUIRED) # NumPy +find_package(Python3 COMPONENTS Interpreter Development.Module REQUIRED) # NumPy if(NOT CUVEC_CUDA_OPTIONAL) find_package(CUDAToolkit REQUIRED) enable_language(CUDA) diff --git a/cuvec/include/cuvec.i b/cuvec/include/cuvec.i index 701c848..b60efce 100644 --- a/cuvec/include/cuvec.i +++ b/cuvec/include/cuvec.i @@ -3,11 +3,10 @@ * for external use via `%include "cuvec.i"`. */ %include "std_vector.i" - %{ #include "cuvec.cuh" // SwigCuVec %} - +/// expose definitions template struct SwigCuVec { CuVec vec; std::vector shape; diff --git a/cuvec/include/pycuvec.cuh b/cuvec/include/pycuvec.cuh index f308b1e..d7843aa 100644 --- a/cuvec/include/pycuvec.cuh +++ b/cuvec/include/pycuvec.cuh @@ -116,7 +116,7 @@ template void PyCuVec_dealloc(PyCuVec *self) { self->shape.shrink_to_fit(); self->strides.clear(); self->strides.shrink_to_fit(); - Py_TYPE(self)->tp_free((PyObject *)self); + Py_TYPE(self)->tp_free(self); } /// __name__ template const char *PyCuVec_t_str() { @@ -300,7 +300,7 @@ template PyCuVec *asPyCuVec(PyCuVec *o) { // return cuvec o = (PyCuVec *)PyObject_GetAttrString((PyObject *)o, "cuvec"); if (!o) return NULL; - Py_DECREF((PyObject *)o); + Py_DECREF(o); } return o; } diff --git a/cuvec/src/example_mod/example_mod.cu b/cuvec/src/example_mod/example_mod.cu index ba81694..fc59f05 100644 --- a/cuvec/src/example_mod/example_mod.cu +++ b/cuvec/src/example_mod/example_mod.cu @@ -49,7 +49,7 @@ static PyObject *increment2d_f(PyObject *self, PyObject *args, PyObject *kwargs) PyErr_SetString(PyExc_IndexError, "`output` must be same shape as `src`"); return NULL; } - Py_INCREF((PyObject *)dst); // anticipating returning + Py_INCREF(dst); // anticipating returning } else { dst = PyCuVec_zeros_like(src); if (!dst) return NULL; diff --git a/cuvec/swigcuvec.py b/cuvec/swigcuvec.py index 7dd5f16..f2a45c6 100644 --- a/cuvec/swigcuvec.py +++ b/cuvec/swigcuvec.py @@ -181,11 +181,11 @@ def asarray(arr, dtype=None, order=None, ownership: str = 'warning') -> CuVec: >>> res = asarray(some_swig_api_func(..., output=getattr(out, 'cuvec', None))) `res.cuvec` and `out.cuvec` are now the same yet garbage collected separately (dangling ptr). - Instead, use: - >>> res = some_swig_api_func(..., output=getattr(out, 'cuvec', None)) - >>> res = out if hasattr(out, 'cuvec') else asarray(res) - NB: `asarray()` is safe if the raw cuvec was created in C++/SWIG, e.g.: - >>> res = asarray(some_swig_api_func(..., output=None), ownership='debug') + Instead, use the `retarray` helper: + >>> raw = some_swig_api_func(..., output=getattr(out, 'cuvec', None)) + >>> res = retarray(raw, out) + NB: `asarray()`/`retarray()` are safe if the raw cuvec was created in C++/SWIG, e.g.: + >>> res = retarray(some_swig_api_func(..., output=None)) """ if is_raw_cuvec(arr): ownership = ownership.lower() @@ -198,3 +198,14 @@ def asarray(arr, dtype=None, order=None, ownership: str = 'warning') -> CuVec: if dtype is None or res.dtype == np.dtype(dtype): return CuVec(np.asanyarray(res, order=order)) return CuVec(np.asanyarray(arr, dtype=dtype, order=order)) + + +def retarray(raw, out: Optional[CuVec] = None): + """ + Returns `out if hasattr(out, 'cuvec') else asarray(raw, ownership='debug')`. + See `asarray` for explanation. + Args: + raw: a raw CuVec (returned by C++/SWIG function). + out: preallocated output array. + """ + return out if hasattr(out, 'cuvec') else asarray(raw, ownership='debug') diff --git a/docs/index.md b/docs/index.md index 2e78c31..0a754fb 100644 --- a/docs/index.md +++ b/docs/index.md @@ -153,7 +153,7 @@ The following involve no memory copies. === "**SWIG API** to **Python**" ```py import cuvec, my_custom_lib - arr = cuvec.swigcuvec.asarray(my_custom_lib.some_swig_api_func()) + arr = cuvec.swigcuvec.retarray(my_custom_lib.some_swig_api_func()) ``` === "**SWIG API** to **C++**" @@ -191,7 +191,7 @@ Python: import cuvec.swigcuvec as cuvec, numpy, mymod arr = cuvec.zeros((1337, 42, 7), "float32") assert all(numpy.mean(arr, axis=(0, 1)) == 0) - print(cuvec.asarray(mymod.myfunc(arr.cuvec)).sum()) + print(cuvec.retarray(mymod.myfunc(arr.cuvec)).sum()) ``` C++: @@ -391,7 +391,7 @@ See also [NumCu](https://github.com/AMYPAD/NumCu), a minimal stand-alone Python ## External Projects === "Python" - Python objects (`arr`, returned by `cuvec.zeros()`, `cuvec.asarray()`, or `cuvec.copy()`) contain all the attributes of a `numpy.ndarray`. Additionally, `arr.cuvec` implements the [buffer protocol](https://docs.python.org/3/c-api/buffer.html), while `arr.__cuda_array_interface__` provides [compatibility with other libraries](https://numba.readthedocs.io/en/latest/cuda/cuda_array_interface.html) such as Numba, CuPy, PyTorch, PyArrow, and RAPIDS. + Python objects (`arr`, returned by `cuvec.zeros()`, `cuvec.asarray()`, or `cuvec.copy()`) contain all the attributes of a `numpy.ndarray`. Additionally, `arr.cuvec` implements the [buffer protocol](https://docs.python.org/3/c-api/buffer.html), while `arr.__cuda_array_interface__` (and `arr.__array_interface__`) provide [compatibility with other libraries](https://numba.readthedocs.io/en/latest/cuda/cuda_array_interface.html) such as Numba, CuPy, PyTorch, PyArrow, and RAPIDS. When using the SWIG alternative module, `arr.cuvec` is a wrapper around `SwigCuVec *`. diff --git a/pyproject.toml b/pyproject.toml index e476125..9d9d6f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [build-system] requires = ["setuptools>=42", "wheel", "setuptools_scm[toml]>=3.4", "miutil[cuda]>=0.4.0", - "scikit-build>=0.11.0", "cmake>=3.18", "ninja"] + "scikit-build>=0.11.0", "cmake>=3.18", "ninja", "swig"] build-backend = "setuptools.build_meta" [tool.setuptools_scm] diff --git a/setup.py b/setup.py index a9ddd87..8b307c9 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ log = logging.getLogger("cuvec.setup") build_ver = ".".join(__version__.split('.')[:3]).split(".dev")[0] -cmake_args = [f"-DCUVEC_BUILD_VERSION={build_ver}", f"-DPython3_ROOT_DIR={sys.prefix}"] +cmake_args = [f"-DCUVEC_BUILD_VERSION={build_ver}"] try: from miutil import cuinfo nvcc_arch_raw = map(cuinfo.compute_capability, range(cuinfo.num_devices())) diff --git a/tests/test_perf.py b/tests/test_perf.py index 865f295..ef4f3e8 100644 --- a/tests/test_perf.py +++ b/tests/test_perf.py @@ -11,7 +11,7 @@ try: # alternative to `cu` # `example_swig` is defined in ../cuvec/src/example_swig/ - from cuvec import example_swig + from cuvec import example_swig # type: ignore # yapf: disable from cuvec import swigcuvec as sw except ImportError: sw, example_swig = None, None # type: ignore # yapf: disable @@ -57,6 +57,7 @@ def test_inner(*args, **kwargs): def test_perf(cu, ex, shape=(1337, 42), quiet=False, return_time=False): if cu is None: skip("SWIG not available") + retarray = getattr(cu, 'retarray', cu.asarray) overhead = np.mean([_time_overhead() for _ in range(100)]) t = {} t['create src'], src = timer(cu.zeros)(shape, "float32") @@ -68,10 +69,10 @@ def test_perf(cu, ex, shape=(1337, 42), quiet=False, return_time=False): if not quiet: t['warmup'], res = timer(ex.increment2d_f)(src.cuvec, None, True) - t['> create dst'], t['> kernel'] = cu.asarray(res)[0, :2] + t['> create dst'], t['> kernel'] = retarray(res)[0, :2] t['call ext'], res = timer(ex.increment2d_f)(src.cuvec, None, True) t['- create dst'], t['- kernel'] = None, None - t['view'], dst = timer(cu.asarray)(res) + t['view'], dst = timer(retarray)(res) t['- create dst'], t['- kernel'] = dst[0, :2] if not quiet: diff --git a/tests/test_swigcuvec.py b/tests/test_swigcuvec.py index 21cbec3..a2aee94 100644 --- a/tests/test_swigcuvec.py +++ b/tests/test_swigcuvec.py @@ -158,5 +158,12 @@ def test_increment(): a[:] = 0 assert (a == 0).all() - res = cu.asarray(increment2d_f(a.cuvec), ownership='debug') - assert (res == 1).all() + b = cu.retarray(increment2d_f(a.cuvec)) + assert (b == 1).all() + + c = cu.retarray(increment2d_f(b.cuvec, a.cuvec), a) + assert (a == 2).all() + assert c.cuvec == a.cuvec + assert (c == a).all() + assert str(c.swvec) == str(a.swvec) + assert np.asarray(c.swvec).data == np.asarray(a.swvec).data