Add void* to tabulate_tensor kernel (#753)

sclaus2 · Claus Susanne · mscroggs · web-flow · commit 238a28932685 · 2025-03-18T14:45:41.000Z
* add void* to tabulate_tensor

* try to trigger CI

* try to trigger CI

* run ruff

* rename user_data custom_data

* add numba functions to obtain empty void* and conversion of numpy array to void*

* fix ruff check

* add line to remove noqa

* expand comment for numba intrinsic function

* add test to use a struct in C-function similar to tabulate_tensor using void*

* changes to custom data test for CI

* specify void* branch for dolfinx test in github actions

* trying to set dolfinx refs for ffcx testing for pull request

* incorporate review suggestions

* add void* argument to test_ds_prisms

---------

Co-authored-by: Claus Susanne &lt;sclaus@Clauss-MacBook-Pro.local&gt;
Co-authored-by: Matthew Scroggs &lt;matthew.w.scroggs@gmail.com&gt;
diff --git a/.github/workflows/dolfinx-tests.yml b/.github/workflows/dolfinx-tests.yml
@@ -10,7 +10,7 @@ on:
     inputs:
       dolfinx_ref:
         description: "DOLFINx branch or tag"
-        default: "main"
+        default: "sclaus2/add-void-to-kernels"
         type: string
       basix_ref:
         description: "Basix branch or tag"
@@ -54,7 +54,7 @@ jobs:
         with:
           path: ./dolfinx
           repository: FEniCS/dolfinx
-          ref: main
+          ref: sclaus2/add-void-to-kernels
       - name: Get DOLFINx source (specified branch/tag)
         if: github.event_name == 'workflow_dispatch'
         uses: actions/checkout@v4
diff --git a/ffcx/codegeneration/C/expressions_template.py b/ffcx/codegeneration/C/expressions_template.py
@@ -23,7 +23,8 @@
                                     const {scalar_type}* restrict c,
                                     const {geom_type}* restrict coordinate_dofs,
                                     const int* restrict entity_local_index,
-                                    const uint8_t* restrict quadrature_permutation)
+                                    const uint8_t* restrict quadrature_permutation,
+                                    void* custom_data)
 {{
 {tabulate_expression}
 }}
diff --git a/ffcx/codegeneration/C/integrals_template.py b/ffcx/codegeneration/C/integrals_template.py
@@ -16,7 +16,8 @@
                                     const {scalar_type}* restrict c,
                                     const {geom_type}* restrict coordinate_dofs,
                                     const int* restrict entity_local_index,
-                                    const uint8_t* restrict quadrature_permutation)
+                                    const uint8_t* restrict quadrature_permutation,
+                                    void* custom_data)
 {{
 {tabulate_tensor}
 }}
diff --git a/ffcx/codegeneration/ufcx.h b/ffcx/codegeneration/ufcx.h
@@ -86,11 +86,15 @@ extern "C"
   /// For integrals not on interior facets, this argument has no effect and a
   /// null pointer can be passed. For interior facets the array will have size 2
   /// (one permutation for each cell adjacent to the facet).
+  /// @param[in] custom_data Custom user data passed to the tabulate function. 
+  /// For example, a struct with additional data needed for the tabulate function.
+  /// See the implementation of runtime integrals for further details.
   typedef void(ufcx_tabulate_tensor_float32)(
       float* restrict A, const float* restrict w, const float* restrict c,
       const float* restrict coordinate_dofs,
       const int* restrict entity_local_index,
-      const uint8_t* restrict quadrature_permutation);
+      const uint8_t* restrict quadrature_permutation,
+      void* custom_data);
 
   /// Tabulate integral into tensor A with compiled
   /// quadrature rule and double precision
@@ -100,7 +104,8 @@ extern "C"
       double* restrict A, const double* restrict w, const double* restrict c,
       const double* restrict coordinate_dofs,
       const int* restrict entity_local_index,
-      const uint8_t* restrict quadrature_permutation);
+      const uint8_t* restrict quadrature_permutation,
+      void* custom_data);
 
 #ifndef __STDC_NO_COMPLEX__
   /// Tabulate integral into tensor A with compiled
@@ -111,7 +116,8 @@ extern "C"
       float _Complex* restrict A, const float _Complex* restrict w,
       const float _Complex* restrict c, const float* restrict coordinate_dofs,
       const int* restrict entity_local_index,
-      const uint8_t* restrict quadrature_permutation);
+      const uint8_t* restrict quadrature_permutation,
+      void* custom_data);
 #endif // __STDC_NO_COMPLEX__
 
 #ifndef __STDC_NO_COMPLEX__
@@ -123,7 +129,8 @@ extern "C"
       double _Complex* restrict A, const double _Complex* restrict w,
       const double _Complex* restrict c, const double* restrict coordinate_dofs,
       const int* restrict entity_local_index,
-      const uint8_t* restrict quadrature_permutation);
+      const uint8_t* restrict quadrature_permutation,
+      void* custom_data);
 #endif // __STDC_NO_COMPLEX__
 
   typedef struct ufcx_integral
diff --git a/ffcx/codegeneration/utils.py b/ffcx/codegeneration/utils.py
@@ -10,6 +10,11 @@
 import numpy as np
 import numpy.typing as npt
 
+try:
+    import numba
+except ImportError:
+    numba = None
+
 
 def dtype_to_c_type(dtype: typing.Union[npt.DTypeLike, str]) -> str:
     """For a NumPy dtype, return the corresponding C type.
@@ -80,6 +85,79 @@ def numba_ufcx_kernel_signature(dtype: npt.DTypeLike, xdtype: npt.DTypeLike):
             types.CPointer(from_dtype(xdtype)),
             types.CPointer(types.intc),
             types.CPointer(types.uint8),
+            types.CPointer(types.void),
         )
     except ImportError as e:
         raise e
+
+
+if numba is not None:
+
+    @numba.extending.intrinsic
+    def empty_void_pointer(typingctx):
+        """Custom intrinsic to return an empty void* pointer.
+
+        This function creates a void pointer initialized to null (0).
+        This is used to pass a nullptr to the UFCx tabulate_tensor interface.
+
+        Args:
+            typingctx: The typing context.
+
+        Returns:
+            A Numba signature and a code generation function that returns a void pointer.
+        """
+
+        def codegen(context, builder, signature, args):
+            null_ptr = context.get_constant(numba.types.voidptr, 0)
+            return null_ptr
+
+        sig = numba.types.voidptr()
+        return sig, codegen
+
+    @numba.extending.intrinsic
+    def get_void_pointer(typingctx, arr):
+        """Custom intrinsic to get a void* pointer from a NumPy array.
+
+        This function takes a NumPy array and returns a void pointer to the array's data.
+        This is used to pass custom data organised in a NumPy array
+        to the UFCx tabulate_tensor interface.
+
+        Args:
+            typingctx: The typing context.
+            arr: The NumPy array to get the void pointer to the first element from.
+            In a multi-dimensional NumPy array, the memory is laid out in a contiguous
+            block of memory, see
+            https://numpy.org/doc/stable/reference/arrays.ndarray.html#internal-memory-layout-of-an-ndarray
+
+        Returns:
+            sig: A Numba signature, which specifies the numba type (here voidptr),
+            codegen: A code generation function, which returns the LLVM IR to cast
+            the raw data pointer to the first element of the of the contiguous block of memory
+            of the NumPy array to void*.
+        """
+        if not isinstance(arr, numba.types.Array):
+            raise TypeError("Expected a NumPy array")
+
+        def codegen(context, builder, signature, args):
+            """Generate LLVM IR code to convert a NumPy array to a void* pointer.
+
+            This function generates the necessary LLVM IR instructions to:
+            1. Allocate memory for the array on the stack.
+            2. Cast the allocated memory to a void* pointer.
+
+            Args:
+                context: The LLVM context.
+                builder: The LLVM IR builder.
+                signature: The function signature.
+                args: The input arguments (NumPy array).
+
+            Returns:
+                A void* pointer to the array's data.
+            """
+            [arr] = args
+            raw_ptr = numba.core.cgutils.alloca_once_value(builder, arr)
+            void_ptr = builder.bitcast(raw_ptr, context.get_value_type(numba.types.voidptr))
+            return void_ptr
+
+        sig = numba.types.voidptr(arr)
+        return sig, codegen
diff --git a/test/test_add_mode.py b/test/test_add_mode.py
@@ -86,6 +86,7 @@ def test_additive_facet_integral(dtype, compile_args):
             ffi.cast(f"{c_xtype} *", coords.ctypes.data),
             ffi.cast("int *", facets.ctypes.data),
             ffi.cast("uint8_t *", perm.ctypes.data),
+            ffi.NULL,
         )
         assert np.isclose(A.sum(), np.sqrt(12) * (i + 1))
 
@@ -158,6 +159,7 @@ def test_additive_cell_integral(dtype, compile_args):
         ffi.cast(f"{c_xtype} *", coords.ctypes.data),
         ffi.NULL,
         ffi.NULL,
+        ffi.NULL,
     )
 
     A0 = np.array(A)
@@ -169,6 +171,7 @@ def test_additive_cell_integral(dtype, compile_args):
             ffi.cast(f"{c_xtype} *", coords.ctypes.data),
             ffi.NULL,
             ffi.NULL,
+            ffi.NULL,
         )
 
         assert np.all(np.isclose(A, (i + 2) * A0))
diff --git a/test/test_custom_data.py b/test/test_custom_data.py
@@ -0,0 +1,118 @@
+# Copyright (C) 2024 Susanne Claus
+#
+# This file is part of FFCx. (https://www.fenicsproject.org)
+#
+# SPDX-License-Identifier:    LGPL-3.0-or-later
+
+import numpy as np
+import pytest
+
+
+def test_tabulate_tensor_integral_add_values():
+    pytest.importorskip("cffi")
+
+    from cffi import FFI
+
+    # Define custom tabulate tensor function in C with a struct
+    # Step 1: Define the function in C and set up the CFFI builder
+    ffibuilder = FFI()
+    ffibuilder.set_source(
+        "_cffi_kernelA",
+        r"""
+        typedef struct {
+            size_t size;
+            double* values;
+        } cell_data;
+
+        void tabulate_tensor_integral_add_values(double* restrict A,
+                                                const double* restrict w,
+                                                const double* restrict c,
+                                                const double* restrict coordinate_dofs,
+                                                const int* restrict entity_local_index,
+                                                const uint8_t* restrict quadrature_permutation,
+                                                void* custom_data)
+        {
+            // Cast the void* custom_data to cell_data*
+            cell_data* custom_data_ptr = (cell_data*)custom_data;
+
+            // Access the custom data
+            size_t size = custom_data_ptr->size;
+            double* values = custom_data_ptr->values;
+
+            // Use the values in your computations
+            for (size_t i = 0; i < size; i++) {
+                A[0] += values[i];
+            }
+        }
+        """,
+    )
+    ffibuilder.cdef(
+        """
+        typedef struct {
+            size_t size;
+            double* values;
+        } cell_data;
+
+        void tabulate_tensor_integral_add_values(double* restrict A,
+                                                const double* restrict w,
+                                                const double* restrict c,
+                                                const double* restrict coordinate_dofs,
+                                                const int* restrict entity_local_index,
+                                                const uint8_t* restrict quadrature_permutation,
+                                                void* custom_data);
+        """
+    )
+
+    # Step 2: Compile the C code
+    ffibuilder.compile(verbose=True)
+
+    # Step 3: Import the compiled library
+    from _cffi_kernelA import ffi, lib
+
+    # Define cell data
+    values = np.array([2.0, 1.0], dtype=np.float64)
+    size = len(values)
+    expected_result = np.array([3.0], dtype=np.float64)
+
+    # Define the input arguments
+    A = np.zeros(1, dtype=np.float64)
+    w = np.array([1.0], dtype=np.float64)
+    c = np.array([0.0], dtype=np.float64)
+    coordinate_dofs = np.array(
+        [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0], dtype=np.float64
+    )
+    entity_local_index = np.array([0], dtype=np.int32)
+    quadrature_permutation = np.array([0], dtype=np.uint8)
+
+    # Cast the arguments to the appropriate C types
+    A_ptr = ffi.cast("double*", A.ctypes.data)
+    w_ptr = ffi.cast("double*", w.ctypes.data)
+    c_ptr = ffi.cast("double*", c.ctypes.data)
+    coordinate_dofs_ptr = ffi.cast("double*", coordinate_dofs.ctypes.data)
+    entity_local_index_ptr = ffi.cast("int*", entity_local_index.ctypes.data)
+    quadrature_permutation_ptr = ffi.cast("uint8_t*", quadrature_permutation.ctypes.data)
+
+    # Use ffi.from_buffer to create a CFFI pointer from the NumPy array
+    values_ptr = ffi.cast("double*", values.ctypes.data)
+
+    # Allocate memory for the struct
+    custom_data = ffi.new("cell_data*")
+    custom_data.size = size
+    custom_data.values = values_ptr
+
+    # Cast the struct to void*
+    custom_data_ptr = ffi.cast("void*", custom_data)
+
+    # Call the function
+    lib.tabulate_tensor_integral_add_values(
+        A_ptr,
+        w_ptr,
+        c_ptr,
+        coordinate_dofs_ptr,
+        entity_local_index_ptr,
+        quadrature_permutation_ptr,
+        custom_data_ptr,
+    )
+
+    # Assert the result
+    np.testing.assert_allclose(A, expected_result, rtol=1e-5)
diff --git a/test/test_jit_expression.py b/test/test_jit_expression.py
@@ -64,6 +64,7 @@ def test_matvec(compile_args):
         ffi.cast(f"{c_xtype} *", coords.ctypes.data),
         ffi.cast("int *", entity_index.ctypes.data),
         ffi.cast("uint8_t *", quad_perm.ctypes.data),
+        ffi.NULL,
     )
 
     # Check the computation against correct NumPy value
@@ -133,6 +134,7 @@ def test_rank1(compile_args):
         ffi.cast(f"{c_xtype} *", coords.ctypes.data),
         ffi.cast("int *", entity_index.ctypes.data),
         ffi.cast("uint8_t *", quad_perm.ctypes.data),
+        ffi.NULL,
     )
 
     f = np.array([[1.0, 2.0, 3.0], [-4.0, -5.0, 6.0]])
@@ -203,6 +205,7 @@ def test_elimiate_zero_tables_tensor(compile_args):
         ffi.cast(f"{c_xtype} *", coords.ctypes.data),
         ffi.cast("int *", entity_index.ctypes.data),
         ffi.cast("uint8_t *", quad_perm.ctypes.data),
+        ffi.NULL,
     )
 
     def exact_expr(x):
@@ -261,6 +264,7 @@ def test_grad_constant(compile_args):
         ffi.cast(f"{c_xtype} *", coords.ctypes.data),
         ffi.cast("int *", entity_index.ctypes.data),
         ffi.cast("uint8_t *", quad_perm.ctypes.data),
+        ffi.NULL,
     )
 
     assert output[0] == pytest.approx(consts[1] * 2 * points[0, 0])
@@ -316,6 +320,7 @@ def test_facet_expression(compile_args):
             ffi.cast(f"{c_xtype} *", coords.ctypes.data),
             ffi.cast("int *", entity_index.ctypes.data),
             ffi.cast("uint8_t *", quad_perm.ctypes.data),
+            ffi.NULL,
         )
         # Assert that facet normal is perpendicular to tangent
         assert np.isclose(np.dot(output, tangent), 0)
@@ -366,6 +371,7 @@ def check_expression(expression_class, output_shape, entity_values, reference_va
                 ffi_data["coords"],
                 ffi_data["entity_index"],
                 ffi_data["quad_perm"],
+                ffi.NULL,
             )
             np.testing.assert_allclose(output, ref_val)
 
@@ -430,5 +436,6 @@ def test_facet_geometry_expressions_3D(compile_args):
         ffi.cast(f"{c_xtype} *", coords.ctypes.data),
         ffi.cast("int *", entity_index.ctypes.data),
         ffi.cast("uint8_t *", quad_perm.ctypes.data),
+        ffi.NULL,
     )
     np.testing.assert_allclose(output, np.asarray(ref_fev)[:3, :])
diff --git a/test/test_jit_forms.py b/test/test_jit_forms.py
diff --git a/test/test_submesh.py b/test/test_submesh.py
diff --git a/test/test_tensor_product.py b/test/test_tensor_product.py

Original file line number	Diff line number	Diff line change
`@@ -86,6 +86,7 @@ def test_additive_facet_integral(dtype, compile_args):`
`86`	`86`	`ffi.cast(f"{c_xtype} *", coords.ctypes.data),`
`87`	`87`	`ffi.cast("int *", facets.ctypes.data),`
`88`	`88`	`ffi.cast("uint8_t *", perm.ctypes.data),`
	`89`	`+ ffi.NULL,`
`89`	`90`	`)`
`90`	`91`	`assert np.isclose(A.sum(), np.sqrt(12) * (i + 1))`
`91`	`92`
`@@ -158,6 +159,7 @@ def test_additive_cell_integral(dtype, compile_args):`
`158`	`159`	`ffi.cast(f"{c_xtype} *", coords.ctypes.data),`
`159`	`160`	`ffi.NULL,`
`160`	`161`	`ffi.NULL,`
	`162`	`+ ffi.NULL,`
`161`	`163`	`)`
`162`	`164`
`163`	`165`	`A0 = np.array(A)`
`@@ -169,6 +171,7 @@ def test_additive_cell_integral(dtype, compile_args):`
`169`	`171`	`ffi.cast(f"{c_xtype} *", coords.ctypes.data),`
`170`	`172`	`ffi.NULL,`
`171`	`173`	`ffi.NULL,`
	`174`	`+ ffi.NULL,`
`172`	`175`	`)`
`173`	`176`
`174`	`177`	`assert np.all(np.isclose(A, (i + 2) * A0))`