Skip to content

Commit 1334d92

Browse files
committed
cudaimgproc: update for CUDA 8.0 and fix out of bounds memory error
1 parent c7602a8 commit 1334d92

File tree

4 files changed

+54
-21
lines changed

4 files changed

+54
-21
lines changed

modules/cudaimgproc/include/opencv2/cudaimgproc.hpp

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -800,10 +800,21 @@ enum MomentsOrder {
800800
@param order Order of largest moments to calculate with lower order moments requiring less computation.
801801
@returns number of image moments.
802802
803-
@sa cuda::moments, cuda::spatialMoments, cuda::MomentsOrder
803+
@sa cuda::spatialMoments, cuda::moments, cuda::MomentsOrder
804804
*/
805805
CV_EXPORTS_W int numMoments(const MomentsOrder order);
806806

807+
/** @brief Converts the spatial image moments returned from cuda::spatialMoments to cv::Moments.
808+
@param spatialMoments Spatial moments returned from cuda::spatialMoments.
809+
@param order Order used when calculating image moments with cuda::spatialMoments.
810+
@param momentsType Precision used when calculating image moments with cuda::spatialMoments.
811+
812+
@returns cv::Moments.
813+
814+
@sa cuda::spatialMoments, cuda::moments, cuda::cvtToMoments, cuda::numMoments, cuda::MomentsOrder
815+
*/
816+
CV_EXPORTS_W Moments cvtToMoments(Mat spatialMoments, const MomentsOrder order, const int momentsType);
817+
807818
/** @brief Calculates all of the spatial moments up to the 3rd order of a rasterized shape.
808819
809820
Asynchronous version of cuda::moments() which only calculates the spatial (not centralized or normalized) moments, up to the 3rd order, of a rasterized shape.
@@ -813,24 +824,25 @@ Each moment is returned as a column entry in the 1D \a moments array.
813824
@param [out] moments 1D array with each column entry containing a spatial image moment.
814825
@param binaryImage If it is true, all non-zero image pixels are treated as 1's.
815826
@param order Order of largest moments to calculate with lower order moments requiring less computation.
816-
@param momentsType Precision to use when calculating moments. Available types are `CV_32F` and `CV_64F` with the performance of `CV_32F` an order of magnitude greater than `CV_64F`. If the image is small the accuracy from `CV_32F` can be equal or very close to `CV_64F`.
827+
@param momentsType Precision to use when calculating moments. Available types are \ref CV_32F and \ref CV_64F with the performance of \ref CV_32F an order of magnitude greater than \ref CV_64F. If the image is small the accuracy from \ref CV_32F can be equal or very close to \ref CV_64F.
817828
@param stream Stream for the asynchronous version.
818829
819-
@note For maximum performance pre-allocate a 1D GpuMat for \a moments of the correct type and size large enough to store the all the image moments of up to the desired \a order. e.g. With \a order === MomentsOrder::SECOND_ORDER_MOMENTS and \a momentsType == `CV_32F` \a moments can be allocated as
830+
@note For maximum performance pre-allocate a 1D GpuMat for \a moments of the correct type and size large enough to store the all the image moments of up to the desired \a order. e.g. With \a order === MomentsOrder::SECOND_ORDER_MOMENTS and \a momentsType == \ref CV_32F \a moments can be allocated as
820831
```
821832
GpuMat momentsDevice(1,numMoments(MomentsOrder::SECOND_ORDER_MOMENTS),CV_32F)
822833
```
823-
The central and normalized moments can easily be calculated on the host by downloading the \a moments array and using the cv::Moments constructor. e.g.
834+
The central and normalized moments can easily be calculated on the host by downloading the \a moments array and using the cuda::cvtToMoments helper function. e.g.
824835
```
825-
HostMem momentsHostMem(1, numMoments(MomentsOrder::SECOND_ORDER_MOMENTS), CV_32F);
826-
momentsDevice.download(momentsHostMem, stream);
836+
HostMem spatialMomentsHostMem(1, numMoments(MomentsOrder::SECOND_ORDER_MOMENTS), CV_32F);
837+
spatialMomentsDevice.download(spatialMomentsHostMem, stream);
827838
stream.waitForCompletion();
828-
Mat momentsMat = momentsHostMem.createMatHeader();
829-
cv::Moments cvMoments(momentsMat.at<float>(0), momentsMat.at<float>(1), momentsMat.at<float>(2), momentsMat.at<float>(3), momentsMat.at<float>(4), momentsMat.at<float>(5), momentsMat.at<float>(6), momentsMat.at<float>(7), momentsMat.at<float>(8), momentsMat.at<float>(9));
839+
Mat spatialMoments = spatialMomentsHostMem.createMatHeader();
840+
cv::Moments cvMoments = cvtToMomentsT<float>(spatialMoments, order);
830841
```
842+
831843
see the \a CUDA_TEST_P(Moments, Async) test inside opencv_contrib_source_code/modules/cudaimgproc/test/test_moments.cpp for an example.
832844
@returns cv::Moments.
833-
@sa cuda::moments
845+
@sa cuda::moments, cuda::cvtToMoments, cuda::numMoments, cuda::MomentsOrder
834846
*/
835847
CV_EXPORTS_W void spatialMoments(InputArray src, OutputArray moments, const bool binaryImage = false, const MomentsOrder order = MomentsOrder::THIRD_ORDER_MOMENTS, const int momentsType = CV_64F, Stream& stream = Stream::Null());
836848

@@ -842,7 +854,7 @@ results are returned in the structure cv::Moments.
842854
@param src Raster image (single-channel 2D array).
843855
@param binaryImage If it is true, all non-zero image pixels are treated as 1's.
844856
@param order Order of largest moments to calculate with lower order moments requiring less computation.
845-
@param momentsType Precision to use when calculating moments. Available types are `CV_32F` and `CV_64F` with the performance of `CV_32F` an order of magnitude greater than `CV_64F`. If the image is small the accuracy from `CV_32F` can be equal or very close to `CV_64F`.
857+
@param momentsType Precision to use when calculating moments. Available types are \ref CV_32F and \ref CV_64F with the performance of \ref CV_32F an order of magnitude greater than \ref CV_64F. If the image is small the accuracy from \ref CV_32F can be equal or very close to \ref CV_64F.
846858
847859
@note For maximum performance use the asynchronous version cuda::spatialMoments() as this version interally allocates and deallocates both GpuMat and HostMem to respectively perform the calculation on the device and download the result to the host.
848860
The costly HostMem allocation cannot be avoided however the GpuMat device allocation can be by using BufferPool, e.g.
@@ -852,7 +864,7 @@ The costly HostMem allocation cannot be avoided however the GpuMat device alloca
852864
```
853865
see the \a CUDA_TEST_P(Moments, Accuracy) test inside opencv_contrib_source_code/modules/cudaimgproc/test/test_moments.cpp for an example.
854866
@returns cv::Moments.
855-
@sa cuda::spatialMoments
867+
@sa cuda::spatialMoments, cuda::cvtToMoments, cuda::numMoments, cuda::MomentsOrder
856868
*/
857869
CV_EXPORTS_W Moments moments(InputArray src, const bool binaryImage = false, const MomentsOrder order = MomentsOrder::THIRD_ORDER_MOMENTS, const int momentsType = CV_64F);
858870

modules/cudaimgproc/src/cuda/moments.cu

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,22 @@ constexpr int blockSizeY = 16;
1616
template <typename T>
1717
__device__ T butterflyWarpReduction(T value) {
1818
for (int i = 16; i >= 1; i /= 2)
19+
#if (CUDART_VERSION >= 9000)
1920
value += __shfl_xor_sync(0xffffffff, value, i, 32);
21+
#else
22+
value += __shfl_xor(value, i, 32);
23+
#endif
2024
return value;
2125
}
2226

2327
template <typename T>
2428
__device__ T butterflyHalfWarpReduction(T value) {
2529
for (int i = 8; i >= 1; i /= 2)
26-
value += __shfl_xor_sync(0xffff, value, i, 32);
30+
#if (CUDART_VERSION >= 9000)
31+
value += __shfl_xor_sync(0xffff, value, i, 16);
32+
#else
33+
value += __shfl_xor(value, i, 16);
34+
#endif
2735
return value;
2836
}
2937

modules/cudaimgproc/src/moments.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,25 @@ int cv::cuda::numMoments(const MomentsOrder order) {
1212
return order == MomentsOrder::FIRST_ORDER_MOMENTS ? device::imgproc::n1 : order == MomentsOrder::SECOND_ORDER_MOMENTS ? device::imgproc::n12 : device::imgproc::n123;
1313
}
1414

15+
template<typename T>
16+
cv::Moments cvtToMomentsT(Mat spatialMoments, const MomentsOrder order) {
17+
switch (order) {
18+
case MomentsOrder::FIRST_ORDER_MOMENTS:
19+
return Moments(spatialMoments.at<T>(0), spatialMoments.at<T>(1), spatialMoments.at<T>(2), 0, 0, 0, 0, 0, 0, 0);
20+
case MomentsOrder::SECOND_ORDER_MOMENTS:
21+
return Moments(spatialMoments.at<T>(0), spatialMoments.at<T>(1), spatialMoments.at<T>(2), spatialMoments.at<T>(3), spatialMoments.at<T>(4), spatialMoments.at<T>(5), 0, 0, 0, 0);
22+
default:
23+
return Moments(spatialMoments.at<T>(0), spatialMoments.at<T>(1), spatialMoments.at<T>(2), spatialMoments.at<T>(3), spatialMoments.at<T>(4), spatialMoments.at<T>(5), spatialMoments.at<T>(6), spatialMoments.at<T>(7), spatialMoments.at<T>(8), spatialMoments.at<T>(9));
24+
}
25+
}
26+
27+
cv::Moments cv::cuda::cvtToMoments(Mat spatialMoments, const MomentsOrder order, const int momentsType) {
28+
if (momentsType == CV_32F)
29+
return cvtToMomentsT<float>(spatialMoments, order);
30+
else
31+
return cvtToMomentsT<double>(spatialMoments, order);
32+
}
33+
1534
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
1635
Moments cv::cuda::moments(InputArray src, const bool binary, const MomentsOrder order, const int momentsType) { throw_no_cuda(); }
1736
void spatialMoments(InputArray src, OutputArray moments, const bool binary, const MomentsOrder order, const int momentsType, Stream& stream) { throw_no_cuda(); }
@@ -53,15 +72,12 @@ void cv::cuda::spatialMoments(InputArray src, OutputArray moments, const bool bi
5372
}
5473

5574
Moments cv::cuda::moments(InputArray src, const bool binary, const MomentsOrder order, const int momentsType) {
56-
Stream& stream = Stream::Null();
75+
Stream stream;
5776
HostMem dst;
5877
spatialMoments(src, dst, binary, order, momentsType, stream);
5978
stream.waitForCompletion();
6079
Mat moments = dst.createMatHeader();
61-
if(momentsType == CV_32F)
62-
return Moments(moments.at<float>(0), moments.at<float>(1), moments.at<float>(2), moments.at<float>(3), moments.at<float>(4), moments.at<float>(5), moments.at<float>(6), moments.at<float>(7), moments.at<float>(8), moments.at<float>(9));
63-
else
64-
return Moments(moments.at<double>(0), moments.at<double>(1), moments.at<double>(2), moments.at<double>(3), moments.at<double>(4), moments.at<double>(5), moments.at<double>(6), moments.at<double>(7), moments.at<double>(8), moments.at<double>(9));
80+
return cvtToMoments(moments, order, momentsType);
6581
}
6682

6783
#endif /* !defined (HAVE_CUDA) */

modules/cudaimgproc/test/test_moments.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,7 @@ CUDA_TEST_P(Moments, Async)
101101
HostMem momentsHost(1, nMoments, momentsType);
102102
momentsDevice.download(momentsHost, stream);
103103
stream.waitForCompletion();
104-
Mat momentsHost64F = momentsHost.createMatHeader();
105-
if (momentsType == CV_32F)
106-
momentsHost.createMatHeader().convertTo(momentsHost64F, CV_64F);
107-
const cv::Moments moments = cv::Moments(momentsHost64F.at<double>(0), momentsHost64F.at<double>(1), momentsHost64F.at<double>(2), momentsHost64F.at<double>(3), momentsHost64F.at<double>(4), momentsHost64F.at<double>(5), momentsHost64F.at<double>(6), momentsHost64F.at<double>(7), momentsHost64F.at<double>(8), momentsHost64F.at<double>(9));
104+
const cv::Moments moments = cvtToMoments(momentsHost.createMatHeader(), order, momentsType);
108105
Mat imgHostAdjustedType = imgHost(roi);
109106
if (imgType != CV_8U && imgType != CV_32F)
110107
imgHost(roi).convertTo(imgHostAdjustedType, CV_32F);

0 commit comments

Comments
 (0)