Skip to content

Commit

Permalink
Adds support for CUDA MHT 16-bit demosaicking
Browse files Browse the repository at this point in the history
  • Loading branch information
Lee L. Schloesser authored and Lee L. Schloesser committed Aug 14, 2024
1 parent 2413f86 commit 5213426
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 28 deletions.
40 changes: 28 additions & 12 deletions modules/cudaimgproc/src/color.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ namespace cv { namespace cuda {
template <int cn>
void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);

template <int cn>
void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
template <int cn, typename Depth>
void MHCdemosaic(PtrStepSz<Depth> src, int2 sourceOffset, PtrStepSz<Depth> dst, int2 firstRed, cudaStream_t stream);
}
}}

Expand Down Expand Up @@ -2129,7 +2129,7 @@ void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn,
GpuMat src = _src.getGpuMat();
const int depth = _src.depth();

CV_Assert( depth == CV_8U );
CV_Assert( depth == CV_8U || depth == CV_16U);
CV_Assert( src.channels() == 1 );
CV_Assert( dcn == 3 || dcn == 4 );

Expand All @@ -2141,16 +2141,27 @@ void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn,
Size wholeSize;
Point ofs;
src.locateROI(wholeSize, ofs);
PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step);

const int2 firstRed = make_int2(code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGB2BGR_MHT ? 0 : 1,
code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1);

if (dcn == 3)
cv::cuda::device::MHCdemosaic<3>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
else
cv::cuda::device::MHCdemosaic<4>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));

if (dcn == 3) {
if (depth == CV_8U) {
PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step);
cv::cuda::device::MHCdemosaic<3, uchar>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
} else {
PtrStepSz<ushort> srcWhole(wholeSize.height, wholeSize.width, src.ptr<ushort>(), src.step);
cv::cuda::device::MHCdemosaic<3, ushort>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
}
} else {
if (depth == CV_8U) {
PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step);
cv::cuda::device::MHCdemosaic<4, uchar>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
} else {
PtrStepSz<ushort> srcWhole(wholeSize.height, wholeSize.width, src.ptr<ushort>(), src.step);
cv::cuda::device::MHCdemosaic<4, ushort>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
}
}
break;
}

Expand All @@ -2159,7 +2170,7 @@ void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn,
GpuMat src = _src.getGpuMat();
const int depth = _src.depth();

CV_Assert( depth == CV_8U );
CV_Assert( depth == CV_8U || depth == CV_16U);

_dst.create(_src.size(), CV_MAKE_TYPE(depth, 1));
GpuMat dst = _dst.getGpuMat();
Expand All @@ -2169,12 +2180,17 @@ void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn,
Size wholeSize;
Point ofs;
src.locateROI(wholeSize, ofs);
PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step);

const int2 firstRed = make_int2(code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGB2BGR_MHT ? 0 : 1,
code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1);

cv::cuda::device::MHCdemosaic<1>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
if (depth == CV_8U) {
PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step);
cv::cuda::device::MHCdemosaic<1, uchar>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
} else {
PtrStepSz<ushort> srcWhole(wholeSize.height, wholeSize.width, src.ptr<ushort>(), src.step);
cv::cuda::device::MHCdemosaic<1, ushort>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
}

break;
}
Expand Down
48 changes: 32 additions & 16 deletions modules/cudaimgproc/src/cuda/debayer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,17 @@ namespace cv { namespace cuda { namespace device
//
// ported to CUDA

template<typename Depth> __device__
typename TypeVec<Depth, 3>::vec_type make_3(Depth x, Depth y, Depth z);

template<> __device__ TypeVec<uchar, 3>::vec_type make_3<uchar>(uchar x, uchar y, uchar z) {
return make_uchar3(x, y, z);
}

template<> __device__ TypeVec<ushort, 3>::vec_type make_3<ushort>(ushort x, ushort y, ushort z) {
return make_ushort3(x, y, z);
}

template <typename DstType, class Ptr2D>
__global__ void MHCdemosaic(PtrStepSz<DstType> dst, Ptr2D src, const int2 firstRed)
{
Expand Down Expand Up @@ -506,34 +517,36 @@ namespace cv { namespace cuda { namespace device
alternate.x = (x + firstRed.x) % 2;
alternate.y = (y + firstRed.y) % 2;

// in BGR sequence;
uchar3 pixelColor =
typedef typename VecTraits<DstType>::elem_type SrcElemType;
typedef typename TypeVec<SrcElemType, 3>::vec_type SrcType;

SrcType pixelColor =
(alternate.y == 0) ?
((alternate.x == 0) ?
make_uchar3(saturate_cast<uchar>(PATTERN.y), saturate_cast<uchar>(PATTERN.x), saturate_cast<uchar>(C)) :
make_uchar3(saturate_cast<uchar>(PATTERN.w), saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.z))) :
make_3<SrcElemType>(saturate_cast<SrcElemType>(PATTERN.y), saturate_cast<SrcElemType>(PATTERN.x), saturate_cast<SrcElemType>(C)) :
make_3<SrcElemType>(saturate_cast<SrcElemType>(PATTERN.w), saturate_cast<SrcElemType>(C), saturate_cast<SrcElemType>(PATTERN.z))) :
((alternate.x == 0) ?
make_uchar3(saturate_cast<uchar>(PATTERN.z), saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.w)) :
make_uchar3(saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.x), saturate_cast<uchar>(PATTERN.y)));
make_3<SrcElemType>(saturate_cast<SrcElemType>(PATTERN.z), saturate_cast<SrcElemType>(C), saturate_cast<SrcElemType>(PATTERN.w)) :
make_3<SrcElemType>(saturate_cast<SrcElemType>(C), saturate_cast<SrcElemType>(PATTERN.x), saturate_cast<SrcElemType>(PATTERN.y)));

dst(y, x) = toDst<DstType>(pixelColor);
}

template <int cn>
void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream)
template <int cn, typename Depth>
void MHCdemosaic(PtrStepSz<Depth> src, int2 sourceOffset, PtrStepSz<Depth> dst, int2 firstRed, cudaStream_t stream)
{
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
typedef typename TypeVec<Depth, cn>::vec_type dst_t;

const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));

if (sourceOffset.x || sourceOffset.y) {
cv::cudev::TextureOff<uchar> texSrc(src, sourceOffset.y, sourceOffset.x);
MHCdemosaic<dst_t, cv::cudev::TextureOffPtr<uchar>><<<grid, block, 0, stream>>>((PtrStepSz<dst_t>)dst, texSrc, firstRed);
cv::cudev::TextureOff<Depth> texSrc(src, sourceOffset.y, sourceOffset.x);
MHCdemosaic<dst_t, cv::cudev::TextureOffPtr<Depth>><<<grid, block, 0, stream>>>((PtrStepSz<dst_t>)dst, texSrc, firstRed);
}
else {
cv::cudev::Texture<uchar> texSrc(src);
MHCdemosaic<dst_t, cv::cudev::TexturePtr<uchar>><<<grid, block, 0, stream>>>((PtrStepSz<dst_t>)dst, texSrc, firstRed);
cv::cudev::Texture<Depth> texSrc(src);
MHCdemosaic<dst_t, cv::cudev::TexturePtr<Depth>><<<grid, block, 0, stream>>>((PtrStepSz<dst_t>)dst, texSrc, firstRed);
}

cudaSafeCall( cudaGetLastError() );
Expand All @@ -542,9 +555,12 @@ namespace cv { namespace cuda { namespace device
cudaSafeCall( cudaDeviceSynchronize() );
}

template void MHCdemosaic<1>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
template void MHCdemosaic<3>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
template void MHCdemosaic<4>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
template void MHCdemosaic<1, uchar>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
template void MHCdemosaic<3, uchar>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
template void MHCdemosaic<4, uchar>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
template void MHCdemosaic<1, ushort>(PtrStepSz<ushort> src, int2 sourceOffset, PtrStepSz<ushort> dst, int2 firstRed, cudaStream_t stream);
template void MHCdemosaic<3, ushort>(PtrStepSz<ushort> src, int2 sourceOffset, PtrStepSz<ushort> dst, int2 firstRed, cudaStream_t stream);
template void MHCdemosaic<4, ushort>(PtrStepSz<ushort> src, int2 sourceOffset, PtrStepSz<ushort> dst, int2 firstRed, cudaStream_t stream);
}}}

#endif /* CUDA_DISABLER */

0 comments on commit 5213426

Please sign in to comment.