From fd9c4636d9cd09898c0f7dcc05202a31735a0f5e Mon Sep 17 00:00:00 2001 From: Carl Johnsen Date: Tue, 14 May 2024 16:28:16 +0200 Subject: [PATCH] #24 Added first (faulty) implementation of 1-bit per bool morphology. --- src/lib/cpp/cpu_seq/morphology.cc | 102 ++++++++++++++++++++++++++++++ src/lib/cpp/include/morphology.hh | 8 +++ src/pybind/morphology-pybind.cc | 50 +++++++++++---- 3 files changed, 149 insertions(+), 11 deletions(-) diff --git a/src/lib/cpp/cpu_seq/morphology.cc b/src/lib/cpp/cpu_seq/morphology.cc index da873c8..a382f9c 100644 --- a/src/lib/cpp/cpu_seq/morphology.cc +++ b/src/lib/cpp/cpu_seq/morphology.cc @@ -46,4 +46,106 @@ void morphology_3d_sphere( } } +template +void morphology_3d_sphere_bitpacked( + const uint32_t *voxels, + const int64_t radius, + const int64_t N[3], + const int64_t strides[3], + uint32_t *result) { + // TODO assumes that Nx is a multiple of 32, which is true for scale <= 4 + Op op; + int64_t + k = radius*2 + 1, + sqradius = radius * radius; + + // TODO handle k < 32 + // TODO templated construction? Has to 'hardcode' a radius, but that is beneficial anyways. + // Create the kernel + uint32_t *kernel = (uint32_t*) malloc(k*k*sizeof(uint32_t)); + + #pragma omp parallel for collapse(2) + for (int64_t z = -radius; z <= radius; z++) { + for (int64_t y = -radius; y <= radius; y++) { + uint32_t row = 0; + for (int64_t x = 0; x < 32; x++) { + uint32_t element = (x-radius)*(x-radius) + y*y + z*z <= sqradius; + row |= element << (31 - x); + } + kernel[(z+radius)*k + y+radius] = row; + //printf("%08x ", row); + } + //printf("\n"); + } + + #pragma omp parallel for collapse(3) + for (int64_t z = 0; z < N[0]; z++) { + for (int64_t y = 0; y < N[1]; y++) { + for (int64_t x = 0; x < N[2]; x++) { + // Compute boundaries + int64_t flat_index = z*strides[0] + y*strides[1] + x*strides[2]; + int64_t X[3] = {z, y, x}; + int64_t limits[6]; + for (int axis = 0; axis < 3; axis++) { + limits[(axis*2)] = -min(radius, X[axis]); + limits[(axis*2)+1] = min(radius, N[axis] - X[axis] - 1); + } + + // Apply the spherical kernel + uint32_t value = neutral; + for (int64_t pz = limits[0]; pz <= limits[1]; pz++) { + for (int64_t py = limits[2]; py <= limits[3]; py++) { + uint32_t + voxels_row = voxels[flat_index / 32], + kernel_row = kernel[pz*k + py]; + + int64_t + beginning = x - radius, + end = x + radius; + if (beginning < 0) { // Case 1 + int64_t + mask_shift = std::abs(beginning), + neutral_shift = 32 - mask_shift; + //if (z == 16 && y == 16) printf("Case 1: %ld %08x %08x %08x %08x\n", x, voxels_row, kernel_row, voxels_row >> mask_shift, neutral << neutral_shift); + voxels_row = voxels_row >> mask_shift | neutral << neutral_shift; + } else if (beginning / 32 != x / 32) { // Case 2 + int64_t + mask1_shift = beginning % 32, + mask0_shift = 32 - mask1_shift; + uint32_t voxels1 = voxels[(flat_index / 32) - 1]; + voxels_row = (voxels1 << mask1_shift) | (voxels_row >> mask0_shift); + } else if (beginning / 32 == end / 32) { // Case 3 + int64_t + mask_shift = beginning % 32, + neutral_shift = 32 - mask_shift; + voxels_row = voxels_row << mask_shift | neutral >> neutral_shift; + } else if(end / 32 != x / 32) { // Case 4 + int64_t + mask0_shift = beginning % 32, + mask1_shift = 32 - mask0_shift; + uint32_t voxels1 = voxels[(flat_index / 32) + 1]; + voxels_row = (voxels_row << mask0_shift) | (voxels1 >> mask1_shift); + } else if (end >= N[2]) { // Case 5 + int64_t + mask_shift = beginning % 32, + neutral_shift = 32 - mask_shift; + voxels_row = voxels_row << mask_shift | neutral >> neutral_shift; + } else { + assert (false && "Should not reach this point - some case is missing."); + } + + value = op(value, voxels_row & kernel_row); + } + } + // dilate: + value = (value != 0) << (31 - x % 32); + // erode: + + // Store the results + result[flat_index/32] |= value; + } + } + } +} + } // namespace cpu_seq \ No newline at end of file diff --git a/src/lib/cpp/include/morphology.hh b/src/lib/cpp/include/morphology.hh index e33cadc..42c5d1a 100644 --- a/src/lib/cpp/include/morphology.hh +++ b/src/lib/cpp/include/morphology.hh @@ -13,6 +13,14 @@ void morphology_3d_sphere( const int64_t strides[3], mask_type *result); +template +void morphology_3d_sphere_bitpacked( + const uint32_t *voxels, + const int64_t radius, + const int64_t N[3], + const int64_t strides[3], + uint32_t *result); } // namespace NS + #endif \ No newline at end of file diff --git a/src/pybind/morphology-pybind.cc b/src/pybind/morphology-pybind.cc index 78d4af5..ba8662b 100644 --- a/src/pybind/morphology-pybind.cc +++ b/src/pybind/morphology-pybind.cc @@ -26,37 +26,65 @@ void morphology_3d_sphere_wrapper( mask_type *result = static_cast(result_info.ptr); if (radius == (int64_t) 16) { - NS::morphology_3d_sphere_r16(voxels, N, strides, result); + //NS::morphology_3d_sphere_r16(voxels, N, strides, result); } else { NS::morphology_3d_sphere(voxels, radius, N, strides, result); } } -template -void morphology_3d_sphere_wrapper_alt( - const py::array_t &np_voxels, +template +void morphology_3d_sphere_bitpacked( + const uint32_t *voxels, const int64_t radius, - py::array_t np_result) { + const int64_t N[3], + const int64_t strides[3], + uint32_t *result); + +template +void morphology_3d_sphere_bitpacked_wrapper( + const py::array_t &np_voxels, + const int64_t radius, + py::array_t np_result) { auto voxels_info = np_voxels.request(), result_info = np_result.request(); - int64_t Nz = voxels_info.shape[0], Ny = voxels_info.shape[1], Nx = voxels_info.shape[2]; + int64_t Nz = voxels_info.shape[0], Ny = voxels_info.shape[1], Nx = voxels_info.shape[2] * 32; int64_t N[3] = {Nz, Ny, Nx}; int64_t strides[3] = {Ny*Nx, Nx, 1}; - const mask_type *voxels = static_cast(voxels_info.ptr); - mask_type *result = static_cast(result_info.ptr); + const uint32_t *voxels = static_cast(voxels_info.ptr); + uint32_t *result = static_cast(result_info.ptr); - NS::morphology_3d_sphere_alt(voxels, radius, N, strides, result); + NS::morphology_3d_sphere_bitpacked(voxels, radius, N, strides, result); } +//template +//void morphology_3d_sphere_wrapper_alt( +// const py::array_t &np_voxels, +// const int64_t radius, +// py::array_t np_result) { +// auto +// voxels_info = np_voxels.request(), +// result_info = np_result.request(); +// +// int64_t Nz = voxels_info.shape[0], Ny = voxels_info.shape[1], Nx = voxels_info.shape[2]; +// int64_t N[3] = {Nz, Ny, Nx}; +// int64_t strides[3] = {Ny*Nx, Nx, 1}; +// +// const mask_type *voxels = static_cast(voxels_info.ptr); +// mask_type *result = static_cast(result_info.ptr); +// +// NS::morphology_3d_sphere_alt(voxels, radius, N, strides, result); +//} + } // namespace python_api PYBIND11_MODULE(morphology, m) { m.doc() = "Morphology operations."; // optional module docstring m.def("dilate_3d_sphere", &python_api::morphology_3d_sphere_wrapper, false>, py::arg("np_voxels"), py::arg("radius"), py::arg("np_result").noconvert()); - m.def("dilate_3d_sphere_alt", &python_api::morphology_3d_sphere_wrapper_alt, false>, py::arg("np_voxels"), py::arg("radius"), py::arg("np_result").noconvert()); + m.def("dilate_3d_sphere_bitpacked", &python_api::morphology_3d_sphere_bitpacked_wrapper, 0>, py::arg("np_voxels"), py::arg("radius"), py::arg("np_result").noconvert()); + //m.def("dilate_3d_sphere_alt", &python_api::morphology_3d_sphere_wrapper_alt, false>, py::arg("np_voxels"), py::arg("radius"), py::arg("np_result").noconvert()); m.def("erode_3d_sphere", &python_api::morphology_3d_sphere_wrapper, true>, py::arg("np_voxels"), py::arg("radius"), py::arg("np_result").noconvert()); - m.def("erode_3d_sphere_alt", &python_api::morphology_3d_sphere_wrapper_alt, true>, py::arg("np_voxels"), py::arg("radius"), py::arg("np_result").noconvert()); + //m.def("erode_3d_sphere_alt", &python_api::morphology_3d_sphere_wrapper_alt, true>, py::arg("np_voxels"), py::arg("radius"), py::arg("np_result").noconvert()); } \ No newline at end of file