Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get cudalegacy to compile with clang CUDA and without CUDA #3843

Merged
merged 1 commit into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions modules/cudalegacy/include/opencv2/cudalegacy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,13 @@
#define OPENCV_CUDALEGACY_HPP

#include "opencv2/core/cuda.hpp"
#if defined (HAVE_CUDA) && !defined (CUDA_DISABLER)
#include "opencv2/cudalegacy/NCV.hpp"
#include "opencv2/cudalegacy/NPP_staging.hpp"
#include "opencv2/cudalegacy/NCVPyramid.hpp"
#include "opencv2/cudalegacy/NCVHaarObjectDetection.hpp"
#include "opencv2/cudalegacy/NCVBroxOpticalFlow.hpp"
#endif
#include "opencv2/video/background_segm.hpp"

/**
Expand Down
3 changes: 3 additions & 0 deletions modules/cudalegacy/src/NCV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@

#include "precomp.hpp"

#if defined (HAVE_CUDA) && !defined (CUDA_DISABLER)

//==============================================================================
//
// Error handling helpers
Expand Down Expand Up @@ -886,3 +888,4 @@ NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
{
return drawRectsWrapperHost(h_dst, dstStride, dstWidth, dstHeight, h_rects, numRects, color);
}
#endif
2 changes: 0 additions & 2 deletions modules/cudalegacy/src/cuda/NCVBroxOpticalFlow.cu
Original file line number Diff line number Diff line change
Expand Up @@ -695,8 +695,6 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
//prepare image pyramid
ImagePyramid pyr(desc.number_of_outer_iterations);

cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc<float>();

float scale = 1.0f;

//cuda arrays for frames
Expand Down
4 changes: 2 additions & 2 deletions modules/cudalegacy/src/cuda/NCVHaarObjectDetection.cu
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ __global__ void applyHaarClassifierAnchorParallel(cv::cudev::TexturePtr<Ncv32u>
if (tbDoAtomicCompaction) bInactiveThread = true; else return;
}

if (!tbDoAtomicCompaction || tbDoAtomicCompaction && !bInactiveThread)
if (!tbDoAtomicCompaction || (tbDoAtomicCompaction && !bInactiveThread))
{
outMaskVal = d_inMask[maskOffset];
y_offs = outMaskVal >> 16;
Expand All @@ -210,7 +210,7 @@ __global__ void applyHaarClassifierAnchorParallel(cv::cudev::TexturePtr<Ncv32u>
if (tbDoAtomicCompaction) bInactiveThread = true; else return;
}

if (!tbDoAtomicCompaction || tbDoAtomicCompaction && !bInactiveThread)
if (!tbDoAtomicCompaction || (tbDoAtomicCompaction && !bInactiveThread))
{
maskOffset = y_offs * mask2Dstride + x_offs;

Expand Down
4 changes: 2 additions & 2 deletions modules/cudalegacy/src/cuda/NCVPixelOperations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ template<> struct TConvVec2Base<double1> {typedef Ncv64f TBase;};
template<> struct TConvVec2Base<double3> {typedef Ncv64f TBase;};
template<> struct TConvVec2Base<double4> {typedef Ncv64f TBase;};

#define NC(T) (sizeof(T) / sizeof(TConvVec2Base<T>::TBase))
#define NC(T) (sizeof(T) / sizeof(typename TConvVec2Base<T>::TBase))

template<typename TBase, Ncv32u NC> struct TConvBase2Vec;
template<> struct TConvBase2Vec<Ncv8u, 1> {typedef uchar1 TVec;};
Expand Down Expand Up @@ -115,7 +115,7 @@ template<typename Tin> inline __host__ __device__ void _TDemoteClampNN(Tin &a, N
template<typename Tin> inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a+0.5f, 0, UINT_MAX);}
template<typename Tin> inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}

template<typename Tout> inline Tout _pixMakeZero();
template<typename Tout> inline __host__ __device__ Tout _pixMakeZero();
template<> inline __host__ __device__ uchar1 _pixMakeZero<uchar1>() {return make_uchar1(0);}
template<> inline __host__ __device__ uchar3 _pixMakeZero<uchar3>() {return make_uchar3(0,0,0);}
template<> inline __host__ __device__ uchar4 _pixMakeZero<uchar4>() {return make_uchar4(0,0,0,0);}
Expand Down
32 changes: 15 additions & 17 deletions modules/cudalegacy/src/cuda/NPP_staging.cu
Original file line number Diff line number Diff line change
Expand Up @@ -85,26 +85,24 @@ const Ncv32u NUM_SCAN_THREADS = 256;
const Ncv32u LOG2_NUM_SCAN_THREADS = 8;


template<class T_in, class T_out>
template<class T_in, class T_out, bool tbDoSqr>
struct _scanElemOp
{
template<bool tbDoSqr>
static inline __host__ __device__ T_out scanElemOp(T_in elem)
{
return scanElemOp( elem, Int2Type<(int)tbDoSqr>() );
}

private:

template <int v> struct Int2Type { enum { value = v }; };
static __host__ __device__ T_out scanElemOp(T_in elem);
};

static inline __host__ __device__ T_out scanElemOp(T_in elem, Int2Type<0>)
{
return (T_out)elem;
template<class T_in, class T_out>
struct _scanElemOp<T_in, T_out, false>
{
static inline __host__ __device__ T_out scanElemOp(T_in elem) {
return (T_out)(elem);
}
};

static inline __host__ __device__ T_out scanElemOp(T_in elem, Int2Type<1>)
{
template<class T_in, class T_out>
struct _scanElemOp<T_in, T_out, true>
{
static inline __host__ __device__ T_out scanElemOp(T_in elem) {
return (T_out)(elem*elem);
}
};
Expand Down Expand Up @@ -177,15 +175,15 @@ __global__ void scanRows(cv::cudev::TexturePtr<Ncv8u> tex8u, T_in *d_src, Ncv32u
Ncv32u curElemOffs = offsetX + threadIdx.x;
T_out curScanElem;

T_in curElem;
T_in curElem = 0;
T_out curElemMod;

if (curElemOffs < srcWidth)
{
//load elements
curElem = readElem<T_in>(tex8u, d_src, texOffs, srcStride, curElemOffs);
}
curElemMod = _scanElemOp<T_in, T_out>::scanElemOp<tbDoSqr>(curElem);
curElemMod = _scanElemOp<T_in, T_out, tbDoSqr>::scanElemOp(curElem);

//inclusive scan
curScanElem = cv::cudev::blockScanInclusive<NUM_SCAN_THREADS>(curElemMod, shmem, threadIdx.x);
Expand Down
6 changes: 3 additions & 3 deletions modules/cudalegacy/src/cuda/needle_map.cu
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,19 @@ namespace cv { namespace cuda { namespace device
// now add the column sums
const uint X = threadIdx.x;

if (X | 0xfe == 0xfe) // bit 0 is 0
if (X | (0xfe == 0xfe)) // bit 0 is 0
{
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 1];
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 1];
}

if (X | 0xfe == 0xfc) // bits 0 & 1 == 0
if (X | (0xfe == 0xfc)) // bits 0 & 1 == 0
{
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 2];
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 2];
}

if (X | 0xf8 == 0xf8)
if (X | (0xf8 == 0xf8))
{
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 4];
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 4];
Expand Down
2 changes: 2 additions & 0 deletions modules/cudalegacy/src/precomp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@
#endif

#include "opencv2/core/private.cuda.hpp"
#if defined (HAVE_CUDA) && !defined (CUDA_DISABLER)
#include "opencv2/cudalegacy/private.hpp"
#endif

#endif /* __OPENCV_PRECOMP_H__ */