From c0d10878d4ff2677f82bf70d3b5afa115c4a48cb Mon Sep 17 00:00:00 2001 From: Vincent Rabaud Date: Tue, 10 Dec 2024 11:37:27 +0100 Subject: [PATCH] Get cudalegacy to compile with clang CUDA and without CUDA --- .../cudalegacy/include/opencv2/cudalegacy.hpp | 2 ++ modules/cudalegacy/src/NCV.cpp | 3 ++ .../cudalegacy/src/cuda/NCVBroxOpticalFlow.cu | 2 -- .../src/cuda/NCVHaarObjectDetection.cu | 4 +-- .../src/cuda/NCVPixelOperations.hpp | 4 +-- modules/cudalegacy/src/cuda/NPP_staging.cu | 32 +++++++++---------- modules/cudalegacy/src/cuda/needle_map.cu | 6 ++-- modules/cudalegacy/src/precomp.hpp | 2 ++ 8 files changed, 29 insertions(+), 26 deletions(-) diff --git a/modules/cudalegacy/include/opencv2/cudalegacy.hpp b/modules/cudalegacy/include/opencv2/cudalegacy.hpp index ace8548e35d..8230eaa2171 100644 --- a/modules/cudalegacy/include/opencv2/cudalegacy.hpp +++ b/modules/cudalegacy/include/opencv2/cudalegacy.hpp @@ -44,11 +44,13 @@ #define OPENCV_CUDALEGACY_HPP #include "opencv2/core/cuda.hpp" +#if defined (HAVE_CUDA) && !defined (CUDA_DISABLER) #include "opencv2/cudalegacy/NCV.hpp" #include "opencv2/cudalegacy/NPP_staging.hpp" #include "opencv2/cudalegacy/NCVPyramid.hpp" #include "opencv2/cudalegacy/NCVHaarObjectDetection.hpp" #include "opencv2/cudalegacy/NCVBroxOpticalFlow.hpp" +#endif #include "opencv2/video/background_segm.hpp" /** diff --git a/modules/cudalegacy/src/NCV.cpp b/modules/cudalegacy/src/NCV.cpp index ddb7003fad0..bad767d0c35 100644 --- a/modules/cudalegacy/src/NCV.cpp +++ b/modules/cudalegacy/src/NCV.cpp @@ -42,6 +42,8 @@ #include "precomp.hpp" +#if defined (HAVE_CUDA) && !defined (CUDA_DISABLER) + //============================================================================== // // Error handling helpers @@ -886,3 +888,4 @@ NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst, { return drawRectsWrapperHost(h_dst, dstStride, dstWidth, dstHeight, h_rects, numRects, color); } +#endif diff --git a/modules/cudalegacy/src/cuda/NCVBroxOpticalFlow.cu b/modules/cudalegacy/src/cuda/NCVBroxOpticalFlow.cu index 3a527a010c3..d37ed9850f1 100644 --- a/modules/cudalegacy/src/cuda/NCVBroxOpticalFlow.cu +++ b/modules/cudalegacy/src/cuda/NCVBroxOpticalFlow.cu @@ -695,8 +695,6 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, //prepare image pyramid ImagePyramid pyr(desc.number_of_outer_iterations); - cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(); - float scale = 1.0f; //cuda arrays for frames diff --git a/modules/cudalegacy/src/cuda/NCVHaarObjectDetection.cu b/modules/cudalegacy/src/cuda/NCVHaarObjectDetection.cu index 9760bcee523..b2a20798b64 100644 --- a/modules/cudalegacy/src/cuda/NCVHaarObjectDetection.cu +++ b/modules/cudalegacy/src/cuda/NCVHaarObjectDetection.cu @@ -193,7 +193,7 @@ __global__ void applyHaarClassifierAnchorParallel(cv::cudev::TexturePtr if (tbDoAtomicCompaction) bInactiveThread = true; else return; } - if (!tbDoAtomicCompaction || tbDoAtomicCompaction && !bInactiveThread) + if (!tbDoAtomicCompaction || (tbDoAtomicCompaction && !bInactiveThread)) { outMaskVal = d_inMask[maskOffset]; y_offs = outMaskVal >> 16; @@ -210,7 +210,7 @@ __global__ void applyHaarClassifierAnchorParallel(cv::cudev::TexturePtr if (tbDoAtomicCompaction) bInactiveThread = true; else return; } - if (!tbDoAtomicCompaction || tbDoAtomicCompaction && !bInactiveThread) + if (!tbDoAtomicCompaction || (tbDoAtomicCompaction && !bInactiveThread)) { maskOffset = y_offs * mask2Dstride + x_offs; diff --git a/modules/cudalegacy/src/cuda/NCVPixelOperations.hpp b/modules/cudalegacy/src/cuda/NCVPixelOperations.hpp index 3d570c5faac..fcebf576d98 100644 --- a/modules/cudalegacy/src/cuda/NCVPixelOperations.hpp +++ b/modules/cudalegacy/src/cuda/NCVPixelOperations.hpp @@ -84,7 +84,7 @@ template<> struct TConvVec2Base {typedef Ncv64f TBase;}; template<> struct TConvVec2Base {typedef Ncv64f TBase;}; template<> struct TConvVec2Base {typedef Ncv64f TBase;}; -#define NC(T) (sizeof(T) / sizeof(TConvVec2Base::TBase)) +#define NC(T) (sizeof(T) / sizeof(typename TConvVec2Base::TBase)) template struct TConvBase2Vec; template<> struct TConvBase2Vec {typedef uchar1 TVec;}; @@ -115,7 +115,7 @@ template inline __host__ __device__ void _TDemoteClampNN(Tin &a, N template inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a+0.5f, 0, UINT_MAX);} template inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32f &out) {out = (Ncv32f)a;} -template inline Tout _pixMakeZero(); +template inline __host__ __device__ Tout _pixMakeZero(); template<> inline __host__ __device__ uchar1 _pixMakeZero() {return make_uchar1(0);} template<> inline __host__ __device__ uchar3 _pixMakeZero() {return make_uchar3(0,0,0);} template<> inline __host__ __device__ uchar4 _pixMakeZero() {return make_uchar4(0,0,0,0);} diff --git a/modules/cudalegacy/src/cuda/NPP_staging.cu b/modules/cudalegacy/src/cuda/NPP_staging.cu index b7a24ee0360..36df5645a62 100644 --- a/modules/cudalegacy/src/cuda/NPP_staging.cu +++ b/modules/cudalegacy/src/cuda/NPP_staging.cu @@ -85,26 +85,24 @@ const Ncv32u NUM_SCAN_THREADS = 256; const Ncv32u LOG2_NUM_SCAN_THREADS = 8; -template +template struct _scanElemOp { - template - static inline __host__ __device__ T_out scanElemOp(T_in elem) - { - return scanElemOp( elem, Int2Type<(int)tbDoSqr>() ); - } - -private: - - template struct Int2Type { enum { value = v }; }; + static __host__ __device__ T_out scanElemOp(T_in elem); +}; - static inline __host__ __device__ T_out scanElemOp(T_in elem, Int2Type<0>) - { - return (T_out)elem; +template +struct _scanElemOp +{ + static inline __host__ __device__ T_out scanElemOp(T_in elem) { + return (T_out)(elem); } +}; - static inline __host__ __device__ T_out scanElemOp(T_in elem, Int2Type<1>) - { +template +struct _scanElemOp +{ + static inline __host__ __device__ T_out scanElemOp(T_in elem) { return (T_out)(elem*elem); } }; @@ -177,7 +175,7 @@ __global__ void scanRows(cv::cudev::TexturePtr tex8u, T_in *d_src, Ncv32u Ncv32u curElemOffs = offsetX + threadIdx.x; T_out curScanElem; - T_in curElem; + T_in curElem = 0; T_out curElemMod; if (curElemOffs < srcWidth) @@ -185,7 +183,7 @@ __global__ void scanRows(cv::cudev::TexturePtr tex8u, T_in *d_src, Ncv32u //load elements curElem = readElem(tex8u, d_src, texOffs, srcStride, curElemOffs); } - curElemMod = _scanElemOp::scanElemOp(curElem); + curElemMod = _scanElemOp::scanElemOp(curElem); //inclusive scan curScanElem = cv::cudev::blockScanInclusive(curElemMod, shmem, threadIdx.x); diff --git a/modules/cudalegacy/src/cuda/needle_map.cu b/modules/cudalegacy/src/cuda/needle_map.cu index a98b17cafed..c5297281025 100644 --- a/modules/cudalegacy/src/cuda/needle_map.cu +++ b/modules/cudalegacy/src/cuda/needle_map.cu @@ -76,19 +76,19 @@ namespace cv { namespace cuda { namespace device // now add the column sums const uint X = threadIdx.x; - if (X | 0xfe == 0xfe) // bit 0 is 0 + if (X | (0xfe == 0xfe)) // bit 0 is 0 { u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 1]; v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 1]; } - if (X | 0xfe == 0xfc) // bits 0 & 1 == 0 + if (X | (0xfe == 0xfc)) // bits 0 & 1 == 0 { u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 2]; v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 2]; } - if (X | 0xf8 == 0xf8) + if (X | (0xf8 == 0xf8)) { u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 4]; v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 4]; diff --git a/modules/cudalegacy/src/precomp.hpp b/modules/cudalegacy/src/precomp.hpp index e87cc8620c9..4524e04b9cb 100644 --- a/modules/cudalegacy/src/precomp.hpp +++ b/modules/cudalegacy/src/precomp.hpp @@ -80,6 +80,8 @@ #endif #include "opencv2/core/private.cuda.hpp" +#if defined (HAVE_CUDA) && !defined (CUDA_DISABLER) #include "opencv2/cudalegacy/private.hpp" +#endif #endif /* __OPENCV_PRECOMP_H__ */