From 5213426f9b7c912f8abea096855853e50f4e875e Mon Sep 17 00:00:00 2001 From: "Lee L. Schloesser" Date: Wed, 14 Aug 2024 14:15:23 -0400 Subject: [PATCH] Adds support for CUDA MHT 16-bit demosaicking --- modules/cudaimgproc/src/color.cpp | 40 ++++++++++++++------- modules/cudaimgproc/src/cuda/debayer.cu | 48 ++++++++++++++++--------- 2 files changed, 60 insertions(+), 28 deletions(-) diff --git a/modules/cudaimgproc/src/color.cpp b/modules/cudaimgproc/src/color.cpp index 5adfa6cda6c..eea3c34cabd 100644 --- a/modules/cudaimgproc/src/color.cpp +++ b/modules/cudaimgproc/src/color.cpp @@ -70,8 +70,8 @@ namespace cv { namespace cuda { template void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); - template - void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); + template + void MHCdemosaic(PtrStepSz src, int2 sourceOffset, PtrStepSz dst, int2 firstRed, cudaStream_t stream); } }} @@ -2129,7 +2129,7 @@ void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, GpuMat src = _src.getGpuMat(); const int depth = _src.depth(); - CV_Assert( depth == CV_8U ); + CV_Assert( depth == CV_8U || depth == CV_16U); CV_Assert( src.channels() == 1 ); CV_Assert( dcn == 3 || dcn == 4 ); @@ -2141,16 +2141,27 @@ void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, Size wholeSize; Point ofs; src.locateROI(wholeSize, ofs); - PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step); const int2 firstRed = make_int2(code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGB2BGR_MHT ? 0 : 1, code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1); - if (dcn == 3) - cv::cuda::device::MHCdemosaic<3>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); - else - cv::cuda::device::MHCdemosaic<4>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); - + if (dcn == 3) { + if (depth == CV_8U) { + PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step); + cv::cuda::device::MHCdemosaic<3, uchar>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + } else { + PtrStepSz srcWhole(wholeSize.height, wholeSize.width, src.ptr(), src.step); + cv::cuda::device::MHCdemosaic<3, ushort>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + } + } else { + if (depth == CV_8U) { + PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step); + cv::cuda::device::MHCdemosaic<4, uchar>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + } else { + PtrStepSz srcWhole(wholeSize.height, wholeSize.width, src.ptr(), src.step); + cv::cuda::device::MHCdemosaic<4, ushort>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + } + } break; } @@ -2159,7 +2170,7 @@ void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, GpuMat src = _src.getGpuMat(); const int depth = _src.depth(); - CV_Assert( depth == CV_8U ); + CV_Assert( depth == CV_8U || depth == CV_16U); _dst.create(_src.size(), CV_MAKE_TYPE(depth, 1)); GpuMat dst = _dst.getGpuMat(); @@ -2169,12 +2180,17 @@ void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, Size wholeSize; Point ofs; src.locateROI(wholeSize, ofs); - PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step); const int2 firstRed = make_int2(code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGB2BGR_MHT ? 0 : 1, code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1); - cv::cuda::device::MHCdemosaic<1>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + if (depth == CV_8U) { + PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step); + cv::cuda::device::MHCdemosaic<1, uchar>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + } else { + PtrStepSz srcWhole(wholeSize.height, wholeSize.width, src.ptr(), src.step); + cv::cuda::device::MHCdemosaic<1, ushort>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + } break; } diff --git a/modules/cudaimgproc/src/cuda/debayer.cu b/modules/cudaimgproc/src/cuda/debayer.cu index bfe4b6f5ea8..dfd3b9aa11d 100644 --- a/modules/cudaimgproc/src/cuda/debayer.cu +++ b/modules/cudaimgproc/src/cuda/debayer.cu @@ -390,6 +390,17 @@ namespace cv { namespace cuda { namespace device // // ported to CUDA + template __device__ + typename TypeVec::vec_type make_3(Depth x, Depth y, Depth z); + + template<> __device__ TypeVec::vec_type make_3(uchar x, uchar y, uchar z) { + return make_uchar3(x, y, z); + } + + template<> __device__ TypeVec::vec_type make_3(ushort x, ushort y, ushort z) { + return make_ushort3(x, y, z); + } + template __global__ void MHCdemosaic(PtrStepSz dst, Ptr2D src, const int2 firstRed) { @@ -506,34 +517,36 @@ namespace cv { namespace cuda { namespace device alternate.x = (x + firstRed.x) % 2; alternate.y = (y + firstRed.y) % 2; - // in BGR sequence; - uchar3 pixelColor = + typedef typename VecTraits::elem_type SrcElemType; + typedef typename TypeVec::vec_type SrcType; + + SrcType pixelColor = (alternate.y == 0) ? ((alternate.x == 0) ? - make_uchar3(saturate_cast(PATTERN.y), saturate_cast(PATTERN.x), saturate_cast(C)) : - make_uchar3(saturate_cast(PATTERN.w), saturate_cast(C), saturate_cast(PATTERN.z))) : + make_3(saturate_cast(PATTERN.y), saturate_cast(PATTERN.x), saturate_cast(C)) : + make_3(saturate_cast(PATTERN.w), saturate_cast(C), saturate_cast(PATTERN.z))) : ((alternate.x == 0) ? - make_uchar3(saturate_cast(PATTERN.z), saturate_cast(C), saturate_cast(PATTERN.w)) : - make_uchar3(saturate_cast(C), saturate_cast(PATTERN.x), saturate_cast(PATTERN.y))); + make_3(saturate_cast(PATTERN.z), saturate_cast(C), saturate_cast(PATTERN.w)) : + make_3(saturate_cast(C), saturate_cast(PATTERN.x), saturate_cast(PATTERN.y))); dst(y, x) = toDst(pixelColor); } - template - void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream) + template + void MHCdemosaic(PtrStepSz src, int2 sourceOffset, PtrStepSz dst, int2 firstRed, cudaStream_t stream) { - typedef typename TypeVec::vec_type dst_t; + typedef typename TypeVec::vec_type dst_t; const dim3 block(32, 8); const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); if (sourceOffset.x || sourceOffset.y) { - cv::cudev::TextureOff texSrc(src, sourceOffset.y, sourceOffset.x); - MHCdemosaic><<>>((PtrStepSz)dst, texSrc, firstRed); + cv::cudev::TextureOff texSrc(src, sourceOffset.y, sourceOffset.x); + MHCdemosaic><<>>((PtrStepSz)dst, texSrc, firstRed); } else { - cv::cudev::Texture texSrc(src); - MHCdemosaic><<>>((PtrStepSz)dst, texSrc, firstRed); + cv::cudev::Texture texSrc(src); + MHCdemosaic><<>>((PtrStepSz)dst, texSrc, firstRed); } cudaSafeCall( cudaGetLastError() ); @@ -542,9 +555,12 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - template void MHCdemosaic<1>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); - template void MHCdemosaic<3>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); - template void MHCdemosaic<4>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); + template void MHCdemosaic<1, uchar>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); + template void MHCdemosaic<3, uchar>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); + template void MHCdemosaic<4, uchar>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); + template void MHCdemosaic<1, ushort>(PtrStepSz src, int2 sourceOffset, PtrStepSz dst, int2 firstRed, cudaStream_t stream); + template void MHCdemosaic<3, ushort>(PtrStepSz src, int2 sourceOffset, PtrStepSz dst, int2 firstRed, cudaStream_t stream); + template void MHCdemosaic<4, ushort>(PtrStepSz src, int2 sourceOffset, PtrStepSz dst, int2 firstRed, cudaStream_t stream); }}} #endif /* CUDA_DISABLER */