diff --git a/apps/DensifyPointCloud/DensifyPointCloud.cpp b/apps/DensifyPointCloud/DensifyPointCloud.cpp index 82b7d5daa..7637e60bb 100644 --- a/apps/DensifyPointCloud/DensifyPointCloud.cpp +++ b/apps/DensifyPointCloud/DensifyPointCloud.cpp @@ -109,7 +109,7 @@ bool Application::Initialize(size_t argc, LPCTSTR* argv) ), "verbosity level") #endif #ifdef _USE_CUDA - ("cuda-device", boost::program_options::value(&CUDA::desiredDeviceID)->default_value(-1), "CUDA device number to be used for depth-map estimation (-2 - CPU processing, -1 - best GPU, >=0 - device index)") + ("cuda-device", boost::program_options::value(&SEACAVE::CUDA::desiredDeviceID)->default_value(-1), "CUDA device number to be used for depth-map estimation (-2 - CPU processing, -1 - best GPU, >=0 - device index)") #endif ; diff --git a/apps/ReconstructMesh/ReconstructMesh.cpp b/apps/ReconstructMesh/ReconstructMesh.cpp index f7c0f6d83..e4fc7bf6a 100644 --- a/apps/ReconstructMesh/ReconstructMesh.cpp +++ b/apps/ReconstructMesh/ReconstructMesh.cpp @@ -118,7 +118,7 @@ bool Application::Initialize(size_t argc, LPCTSTR* argv) ), "verbosity level") #endif #ifdef _USE_CUDA - ("cuda-device", boost::program_options::value(&CUDA::desiredDeviceID)->default_value(-1), "CUDA device number to be used to reconstruct the mesh (-2 - CPU processing, -1 - best GPU, >=0 - device index)") + ("cuda-device", boost::program_options::value(&SEACAVE::CUDA::desiredDeviceID)->default_value(-1), "CUDA device number to be used to reconstruct the mesh (-2 - CPU processing, -1 - best GPU, >=0 - device index)") #endif ; diff --git a/apps/RefineMesh/RefineMesh.cpp b/apps/RefineMesh/RefineMesh.cpp index 3aa6230c5..88c2dc05b 100644 --- a/apps/RefineMesh/RefineMesh.cpp +++ b/apps/RefineMesh/RefineMesh.cpp @@ -108,7 +108,7 @@ bool Application::Initialize(size_t argc, LPCTSTR* argv) ), "verbosity level") #endif #ifdef _USE_CUDA - ("cuda-device", boost::program_options::value(&CUDA::desiredDeviceID)->default_value(-2), "CUDA device number to be used for mesh refinement (-2 - CPU processing, -1 - best GPU, >=0 - device index)") + ("cuda-device", boost::program_options::value(&SEACAVE::CUDA::desiredDeviceID)->default_value(-2), "CUDA device number to be used for mesh refinement (-2 - CPU processing, -1 - best GPU, >=0 - device index)") #endif ; @@ -229,7 +229,7 @@ int main(int argc, LPCTSTR* argv) } TD_TIMER_START(); #ifdef _USE_CUDA - if (CUDA::desiredDeviceID < -1 || + if (SEACAVE::CUDA::desiredDeviceID < -1 || !scene.RefineMeshCUDA(OPT::nResolutionLevel, OPT::nMinResolution, OPT::nMaxViews, OPT::fDecimateMesh, OPT::nCloseHoles, OPT::nEnsureEdgeSize, OPT::nMaxFaceArea, diff --git a/apps/TextureMesh/TextureMesh.cpp b/apps/TextureMesh/TextureMesh.cpp index 703e4b1a8..75cb0f50d 100644 --- a/apps/TextureMesh/TextureMesh.cpp +++ b/apps/TextureMesh/TextureMesh.cpp @@ -110,7 +110,7 @@ bool Application::Initialize(size_t argc, LPCTSTR* argv) ), "verbosity level") #endif #ifdef _USE_CUDA - ("cuda-device", boost::program_options::value(&CUDA::desiredDeviceID)->default_value(-1), "CUDA device number to be used to texture the mesh (-2 - CPU processing, -1 - best GPU, >=0 - device index)") + ("cuda-device", boost::program_options::value(&SEACAVE::CUDA::desiredDeviceID)->default_value(-1), "CUDA device number to be used to texture the mesh (-2 - CPU processing, -1 - best GPU, >=0 - device index)") #endif ; diff --git a/libs/Common/Config.h b/libs/Common/Config.h index d5f7dddec..e22f946a7 100644 --- a/libs/Common/Config.h +++ b/libs/Common/Config.h @@ -206,9 +206,6 @@ # define FORCEINLINE inline #endif -#ifndef _SUPPORT_CPP11 -# define constexpr inline -#endif #ifdef _SUPPORT_CPP17 # undef MAYBEUNUSED # define MAYBEUNUSED [[maybe_unused]] @@ -224,37 +221,36 @@ #ifdef _MSC_VER #define _DEBUGINFO -#define _CRTDBG_MAP_ALLOC //enable this to show also the filename (DEBUG_NEW should also be defined in each file) +#define _CRTDBG_MAP_ALLOC //enable this to show also the filename (DEBUG_NEW should also be defined in each file) #include #include #ifdef _INC_CRTDBG -#define ASSERT(exp) {if (!(exp) && 1 == _CrtDbgReport(_CRT_ASSERT, __FILE__, __LINE__, NULL, #exp)) _CrtDbgBreak();} +#define ASSERT(exp, ...) {if (!(exp) && 1 == _CrtDbgReport(_CRT_ASSERT, __FILE__, __LINE__, NULL, #exp)) _CrtDbgBreak();} #else -#define ASSERT(exp) {if (!(exp)) __debugbreak();} +#define ASSERT(exp, ...) {if (!(exp)) __debugbreak();} #endif // _INC_CRTDBG #define TRACE(...) {TCHAR buffer[2048]; _sntprintf(buffer, 2048, __VA_ARGS__); OutputDebugString(buffer);} #else // _MSC_VER #include -#define ASSERT(exp) assert(exp) +#define ASSERT(exp, ...) assert(exp) #define TRACE(...) #endif // _MSC_VER #else #ifdef _RELEASE -#define ASSERT(exp) +#define ASSERT(exp, ...) #else #ifdef _MSC_VER -#define ASSERT(exp) {if (!(exp)) __debugbreak();} +#define ASSERT(exp, ...) {if (!(exp)) __debugbreak();} #else // _MSC_VER -#define ASSERT(exp) {if (!(exp)) __builtin_trap();} +#define ASSERT(exp, ...) {if (!(exp)) __builtin_trap();} #endif // _MSC_VER #endif #define TRACE(...) #endif // _DEBUG -#define ASSERTM(exp, msg) ASSERT(exp) namespace SEACAVE_ASSERT { diff --git a/libs/Common/FastDelegate.h b/libs/Common/FastDelegate.h index 26a565bfb..72206cb35 100644 --- a/libs/Common/FastDelegate.h +++ b/libs/Common/FastDelegate.h @@ -26,12 +26,6 @@ #include #include -// VC work around for constexpr and noexcept: VC2013 and below do not support these 2 keywords -#if defined(_MSC_VER) && (_MSC_VER <= 1800) -#define constexpr const -#define noexcept throw() -#endif - namespace fastdelegate { diff --git a/libs/Common/UtilCUDA.h b/libs/Common/UtilCUDA.h index d04b04717..6cdbb86a3 100644 --- a/libs/Common/UtilCUDA.h +++ b/libs/Common/UtilCUDA.h @@ -67,8 +67,8 @@ inline CUresult __reportCudaError(CUresult result, LPCSTR errorMessage) { ASSERT("CudaError" == NULL); return result; } -#define reportCudaError(val) CUDA::__reportCudaError(val, #val) -#define checkCudaError(val) { const CUresult ret(CUDA::__reportCudaError(val, #val)); if (ret != CUDA_SUCCESS) return ret; } +#define reportCudaError(val) SEACAVE::CUDA::__reportCudaError(val, #val) +#define checkCudaError(val) { const CUresult ret(SEACAVE::CUDA::__reportCudaError(val, #val)); if (ret != CUDA_SUCCESS) return ret; } // outputs the proper CUDA error code and abort in the event that a CUDA host call returns an error inline void __ensureCudaResult(CUresult result, LPCSTR errorMessage) { @@ -77,7 +77,7 @@ inline void __ensureCudaResult(CUresult result, LPCSTR errorMessage) { ASSERT("CudaAbort" == NULL); exit(EXIT_FAILURE); } -#define ensureCudaResult(val) CUDA::__ensureCudaResult(val, #val) +#define ensureCudaResult(val) SEACAVE::CUDA::__ensureCudaResult(val, #val) /*----------------------------------------------------------------*/ // rounds up addr to the align boundary diff --git a/libs/Common/UtilCUDADevice.h b/libs/Common/UtilCUDADevice.h index 46933650d..40791f8a1 100644 --- a/libs/Common/UtilCUDADevice.h +++ b/libs/Common/UtilCUDADevice.h @@ -24,6 +24,12 @@ // D E F I N E S /////////////////////////////////////////////////// +#if __CUDA_ARCH__ > 0 +#define __CDC__CUDA__ARCH__ 1 +#else +#undef __CDC__CUDA__ARCH__ +#endif + #ifndef VERBOSE #define DEFINE_VERBOSE 1 #define VERBOSE(...) fprintf(stderr, __VA_ARGS__) diff --git a/libs/MVS/CameraCUDA.h b/libs/MVS/CameraCUDA.h new file mode 100644 index 000000000..c4260ebf4 --- /dev/null +++ b/libs/MVS/CameraCUDA.h @@ -0,0 +1,171 @@ +/* +* CameraCUDA.h +* +* Copyright (c) 2014-2024 SEACAVE +* +* Author(s): +* +* cDc +* +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +* +* +* Additional Terms: +* +* You are required to preserve legal notices and author attributions in +* that material or in the Appropriate Legal Notices displayed by works +* containing it. +*/ + +#ifndef _MVS_CAMERACUDA_H_ +#define _MVS_CAMERACUDA_H_ + + +// I N C L U D E S ///////////////////////////////////////////////// + +#define _USE_MATH_DEFINES +#include +#include +#include +#include +#include + +// Eigen +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#include + +// CUDA toolkit +#include +#include +#include +#include +#include + +#include "../Common/UtilCUDADevice.h" + + +// D E F I N E S /////////////////////////////////////////////////// + + +// S T R U C T S /////////////////////////////////////////////////// + +namespace MVS { + +namespace CUDA { + +typedef Eigen::Matrix Point2i; +typedef Eigen::Matrix Point2; +typedef Eigen::Matrix Point3; +typedef Eigen::Matrix Point4; +typedef Eigen::Matrix Matrix3; + +// Linear camera model +struct LinearCameraModel { + Point2 f; // focal length + Point2 p; // principal point + + __host__ __device__ LinearCameraModel() {} + __host__ __device__ LinearCameraModel(float fx, float fy, float cx, float cy) : + f(fx, fy), p(cx, cy) {} + __host__ __device__ LinearCameraModel(const Matrix3& K) : + f(K(0,0), K(1,1)), p(K(0,2), K(1,2)) { ASSERT(K(0,1) == 0); } + + __host__ __device__ inline Matrix3 K() const { + Matrix3 M; M << + f.x(), 0, p.x(), + 0, f.y(), p.y(), + 0, 0, 1; + return M; + } + + // transform a point in image space to camera space + __host__ __device__ inline Point2 NormalizePoint(const Point2& x) const { + return Point2( + (x.x() - p.x()) / f.x(), + (x.y() - p.y()) / f.y()); + } + + // project a point in camera space to image space + __host__ __device__ inline Point2 TransformPointC2I(const Point3& X) const { + return Point2( + f.x() * X.x() / X.z() + p.x(), + f.y() * X.y() / X.z() + p.y()); + } + + // back-project a point in image space to camera space + __host__ __device__ inline Point3 TransformPointI2C(const Point2& x, const float depth = 1.f) const { + return Point3( + depth * (x.x() - p.x()) / f.x(), + depth * (x.y() - p.y()) / f.y(), + depth); + } + + // compute camera ray direction for the given pixel + __host__ __device__ inline Point3 ViewDirection(const Point2i& x) const { + return TransformPointI2C(x.cast()).normalized(); + } +}; +/*----------------------------------------------------------------*/ + +// Camera pose +struct Pose { + Matrix3 R; // rotation matrix + Point3 C; // camera center + + __host__ __device__ Pose() {} + __host__ __device__ Pose(const Matrix3& R, const Point3& C) : + R(R), C(C) {} + + // transform a 3D point from world space to camera space + __host__ __device__ inline Point3 TransformPointW2C(const Point3& X) const { + return R * (X - C); + } + + // transform a 3D point in camera space to world space + __host__ __device__ inline Point3 TransformPointC2W(const Point3& X) const { + return R.transpose() * X + C; + } +}; +/*----------------------------------------------------------------*/ + +// Camera view +struct Camera { + LinearCameraModel model; + Pose pose; + Point2i size; + + __host__ __device__ Camera() {} + __host__ __device__ Camera(const LinearCameraModel& model, const Pose& pose, int width=0, int height=0) : + model(model), pose(pose), size(width, height) {} + __host__ __device__ Camera(const Matrix3& K, const Matrix3& R, const Point3& C, int width=0, int height=0) : + model(K), pose(R, C), size(width, height) {} + + // project a 3D point in world space to image space + __host__ __device__ inline Point2 TransformPointW2I(const Point3& X) const { + return model.TransformPointC2I(pose.TransformPointW2C(X)); + } + + // back-project a point in image space to 3D point in world space + __host__ __device__ inline Point3 TransformPointI2W(const Point2& x, const float depth = 1.f) const { + return pose.TransformPointC2W(model.TransformPointI2C(x, depth)); + } +}; +/*----------------------------------------------------------------*/ + +} // namespace CUDA + +} // namespace MVS + +#endif // _MVS_CAMERACUDA_H_ diff --git a/libs/MVS/Mesh.h b/libs/MVS/Mesh.h index 648504994..8e6df53f0 100644 --- a/libs/MVS/Mesh.h +++ b/libs/MVS/Mesh.h @@ -137,13 +137,13 @@ class MVS_API Mesh Image8U3Arr texturesDiffuse; // textures containing the diffuse color (optional) #ifdef _USE_CUDA - static CUDA::KernelRT kernelComputeFaceNormal; + static SEACAVE::CUDA::KernelRT kernelComputeFaceNormal; #endif public: #ifdef _USE_CUDA inline Mesh() { - InitKernels(CUDA::desiredDeviceID); + InitKernels(SEACAVE::CUDA::desiredDeviceID); } #endif diff --git a/libs/MVS/PatchMatchCUDA.cpp b/libs/MVS/PatchMatchCUDA.cpp index ea12344de..5677f64b5 100644 --- a/libs/MVS/PatchMatchCUDA.cpp +++ b/libs/MVS/PatchMatchCUDA.cpp @@ -1,5 +1,5 @@ /* -* PatchMatchCUDA.cpp +* PatchMatch.cpp * * Copyright (c) 2014-2021 SEACAVE * @@ -35,7 +35,6 @@ #ifdef _USE_CUDA -using namespace MVS; // D E F I N E S /////////////////////////////////////////////////// @@ -43,19 +42,23 @@ using namespace MVS; // S T R U C T S /////////////////////////////////////////////////// -PatchMatchCUDA::PatchMatchCUDA(int device) +namespace MVS { + +namespace CUDA { + +PatchMatch::PatchMatch(int device) { // initialize CUDA device if needed - if (CUDA::devices.IsEmpty()) - CUDA::initDevice(device); + if (SEACAVE::CUDA::devices.IsEmpty()) + SEACAVE::CUDA::initDevice(device); } -PatchMatchCUDA::~PatchMatchCUDA() +PatchMatch::~PatchMatch() { Release(); } -void PatchMatchCUDA::Release() +void PatchMatch::Release() { if (images.empty()) return; @@ -80,7 +83,7 @@ void PatchMatchCUDA::Release() ReleaseCUDA(); } -void PatchMatchCUDA::ReleaseCUDA() +void PatchMatch::ReleaseCUDA() { cudaFree(cudaTextureImages); cudaFree(cudaCameras); @@ -94,7 +97,7 @@ void PatchMatchCUDA::ReleaseCUDA() delete[] depthNormalEstimates; } -void PatchMatchCUDA::Init(bool bGeomConsistency) +void PatchMatch::Init(bool bGeomConsistency) { if (bGeomConsistency) { params.bGeomConsistency = true; @@ -105,7 +108,7 @@ void PatchMatchCUDA::Init(bool bGeomConsistency) } } -void PatchMatchCUDA::AllocatePatchMatchCUDA(const cv::Mat1f& image) +void PatchMatch::AllocatePatchMatchCUDA(const cv::Mat1f& image) { const size_t num_images = images.size(); CUDA_CHECK(cudaMalloc((void**)&cudaTextureImages, sizeof(cudaTextureObject_t) * num_images)); @@ -122,7 +125,7 @@ void PatchMatchCUDA::AllocatePatchMatchCUDA(const cv::Mat1f& image) CUDA_CHECK(cudaMalloc((void**)&cudaRandStates, sizeof(curandState) * size)); } -void PatchMatchCUDA::AllocateImageCUDA(size_t i, const cv::Mat1f& image, bool bInitImage, bool bInitDepthMap) +void PatchMatch::AllocateImageCUDA(size_t i, const cv::Mat1f& image, bool bInitImage, bool bInitDepthMap) { const cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); @@ -171,7 +174,7 @@ void PatchMatchCUDA::AllocateImageCUDA(size_t i, const cv::Mat1f& image, bool bI } } -void PatchMatchCUDA::EstimateDepthMap(DepthData& depthData) +void PatchMatch::EstimateDepthMap(DepthData& depthData) { TD_TIMER_STARTD(); @@ -253,13 +256,12 @@ void PatchMatchCUDA::EstimateDepthMap(DepthData& depthData) for (IIndex i = 0; i < numImages; ++i) { const DepthData::ViewData& view = depthData.images[i]; - Image32F image = view.image; - Camera camera; - camera.K = Eigen::Map(view.camera.K.val).cast(); - camera.R = Eigen::Map(view.camera.R.val).cast(); - camera.C = Eigen::Map(view.camera.C.ptr()).cast(); - camera.height = image.rows; - camera.width = image.cols; + const Image32F image = view.image; + const Camera camera( + Eigen::Map(view.camera.K.val).cast(), + Eigen::Map(view.camera.R.val).cast(), + Eigen::Map(view.camera.C.ptr()).cast(), + image.rows, image.cols); // store camera and image if (i == 0 && (prevNumImages < numImages || images[0].size() != image.size())) { // allocate/reallocate PatchMatch CUDA memory @@ -416,4 +418,8 @@ void PatchMatchCUDA::EstimateDepthMap(DepthData& depthData) } /*----------------------------------------------------------------*/ +} // namespace CUDA + +} // namespace MVS + #endif // _USE_CUDA diff --git a/libs/MVS/PatchMatchCUDA.cu b/libs/MVS/PatchMatchCUDA.cu index 26a9c15d6..4531f8456 100644 --- a/libs/MVS/PatchMatchCUDA.cu +++ b/libs/MVS/PatchMatchCUDA.cu @@ -43,32 +43,29 @@ // patch stepping #define nSizeStep 2 -using namespace MVS; -typedef Eigen::Matrix Point2i; -typedef Eigen::Matrix Point2; -typedef Eigen::Matrix Point3; -typedef Eigen::Matrix Point4; -typedef Eigen::Matrix Matrix3; +namespace MVS { + +namespace CUDA { #define ImagePixels cudaTextureObject_t #define RandState curandState // square the given value -__device__ inline constexpr float Square(float v) { +__device__ constexpr float Square(float v) { return v * v; } // set/check a bit -__device__ inline constexpr void SetBit(unsigned& input, unsigned i) { +__device__ constexpr void SetBit(unsigned& input, unsigned i) { input |= (1u << i); } -__device__ inline constexpr int IsBitSet(unsigned input, unsigned i) { +__device__ constexpr int IsBitSet(unsigned input, unsigned i) { return (input >> i) & 1u; } // swap the given values -__device__ inline constexpr void Swap(float& v0, float& v1) { +__device__ constexpr void Swap(float& v0, float& v1) { const float tmp = v0; v0 = v1; v1 = tmp; @@ -82,27 +79,6 @@ __device__ inline Point2i Idx2Point(int idx, int width) { return Point2i(idx % width, idx / width); } -// project and back-project a 3D point -__device__ inline Point2 ProjectPoint(const PatchMatchCUDA::Camera& camera, const Point3& X) { - const Point3 x = camera.K * camera.R * (X - camera.C); - return x.hnormalized(); -} -__device__ inline Point3 BackProjectPointCamera(const PatchMatchCUDA::Camera& camera, const Point2& p, const float depth = 1.f) { - return Point3( - depth * (p.x() - camera.K(0,2)) / camera.K(0,0), - depth * (p.y() - camera.K(1,2)) / camera.K(1,1), - depth); -} -__device__ inline Point3 BackProjectPoint(const PatchMatchCUDA::Camera& camera, const Point2& p, const float depth) { - const Point3 camX = BackProjectPointCamera(camera, p, depth); - return camera.R.transpose() * camX + camera.C; -} - -// compute camera ray direction for the given pixel -__device__ inline Point3 ViewDirection(const PatchMatchCUDA::Camera& camera, const Point2i& p) { - return BackProjectPointCamera(camera, p.cast()).normalized(); -} - // sort the given values array using bubble sort algorithm __device__ inline void Sort(const float* values, float* sortedValues, int n) { for (int i = 0; i < n; ++i) @@ -149,7 +125,7 @@ __device__ inline void PDF2CDF(float* probs, const int numProbs) { // generate a random normal -__device__ inline Point3 GenerateRandomNormal(const PatchMatchCUDA::Camera& camera, const Point2i& p, RandState* randState) +__device__ inline Point3 GenerateRandomNormal(const CUDA::Camera& camera, const Point2i& p, RandState* randState) { float q1, q2, s; do { @@ -163,16 +139,16 @@ __device__ inline Point3 GenerateRandomNormal(const PatchMatchCUDA::Camera& came 2.f * q2 * sq, 1.f - 2.f * s); - const Point3 viewDirection = ViewDirection(camera, p); + const Point3 viewDirection = camera.model.ViewDirection(p); if (normal.dot(viewDirection) > 0.f) normal = -normal; return normal.normalized(); } // randomly perturb a normal -__device__ inline Point3 GeneratePerturbedNormal(const PatchMatchCUDA::Camera& camera, const Point2i& p, const Point3& normal, RandState* randState, const float perturbation) +__device__ inline Point3 GeneratePerturbedNormal(const CUDA::Camera& camera, const Point2i& p, const Point3& normal, RandState* randState, const float perturbation) { - const Point3 viewDirection = ViewDirection(camera, p); + const Point3 viewDirection = camera.model.ViewDirection(p); const float a1 = (curand_uniform(randState) - 0.5f) * perturbation; const float a2 = (curand_uniform(randState) - 0.5f) * perturbation; @@ -203,7 +179,7 @@ __device__ inline Point3 GeneratePerturbedNormal(const PatchMatchCUDA::Camera& c } // randomly perturb a normal -__device__ inline float GeneratePerturbedDepth(float depth, RandState* randState, const float perturbation, const PatchMatchCUDA::Params& params) +__device__ inline float GeneratePerturbedDepth(float depth, RandState* randState, const float perturbation, const PatchMatch::Params& params) { const float depthMinPerturbed = (1.f - perturbation) * depth; const float depthMaxPerturbed = (1.f + perturbation) * depth; @@ -215,34 +191,34 @@ __device__ inline float GeneratePerturbedDepth(float depth, RandState* randState } // interpolate given pixel's estimate to the current position -__device__ inline float InterpolatePixel(const PatchMatchCUDA::Camera& camera, const Point2i& p, const Point2i& np, float depth, const Point3& normal, const PatchMatchCUDA::Params& params) +__device__ inline float InterpolatePixel(const CUDA::Camera& camera, const Point2i& p, const Point2i& np, float depth, const Point3& normal, const PatchMatch::Params& params) { float depthNew; if (p.x() == np.x()) { - const float nx1 = (p.y() - camera.K(1,2)) / camera.K(1,1); + const float nx1 = (p.y() - camera.model.p.y()) / camera.model.f.y(); const float denom = normal.z() + nx1 * normal.y(); if (abs(denom) < FLT_EPSILON) return depth; - const float x1 = (np.y() - camera.K(1,2)) / camera.K(1,1); + const float x1 = (np.y() - camera.model.p.y()) / camera.model.f.y(); const float nom = depth * (normal.z() + x1 * normal.y()); depthNew = nom / denom; } else if (p.y() == np.y()) { - const float nx1 = (p.x() - camera.K(0,2)) / camera.K(0,0); + const float nx1 = (p.x() - camera.model.p.x()) / camera.model.f.x(); const float denom = normal.z() + nx1 * normal.x(); if (abs(denom) < FLT_EPSILON) return depth; - const float x1 = (np.x() - camera.K(0,2)) / camera.K(0,0); + const float x1 = (np.x() - camera.model.p.x()) / camera.model.f.x(); const float nom = depth * (normal.z() + x1 * normal.x()); depthNew = nom / denom; } else { - const float planeD = normal.dot(BackProjectPointCamera(camera, np.cast(), depth)); - depthNew = planeD / normal.dot(BackProjectPointCamera(camera, p.cast())); + const float planeD = normal.dot(camera.model.TransformPointI2C(np.cast(), depth)); + depthNew = planeD / normal.dot(camera.model.TransformPointI2C(p.cast())); } return (depthNew >= params.fDepthMin && depthNew <= params.fDepthMax) ? depthNew : depth; } // compute normal to the surface given the 4 neighbors -__device__ inline Point3 ComputeDepthGradient(const Matrix3& K, float depth, const Point2i& pos, const Point4& ndepth) { +__device__ inline Point3 ComputeDepthGradient(const LinearCameraModel& model, float depth, const Point2i& pos, const Point4& ndepth) { constexpr float2 nposg[4] = {{0,-1}, {0,1}, {-1,0}, {1,0}}; Point2 dg(0,0); // add neighbor depths at the gradient locations @@ -252,19 +228,19 @@ __device__ inline Point3 ComputeDepthGradient(const Matrix3& K, float depth, con const Point2 d = dg*0.5f; // compute normal from depth gradient return Point3( - K(0,0)*d.x(), - K(1,1)*d.y(), - (K(0,2)-pos.x())*d.x()+(K(1,2)-pos.y())*d.y()-depth).normalized(); + model.f.x()*d.x(), + model.f.y()*d.y(), + (model.p.x()-pos.x())*d.x()+(model.p.y()-pos.y())*d.y()-depth).normalized(); } // compose tho homography matrix that transforms a point from reference to source camera through the given plane -__device__ inline Matrix3 ComputeHomography(const PatchMatchCUDA::Camera& refCamera, const PatchMatchCUDA::Camera& trgCamera, const Point2& p, const Point4& plane) +__device__ inline Matrix3 ComputeHomography(const CUDA::Camera& refCamera, const CUDA::Camera& trgCamera, const Point2& p, const Point4& plane) { - const Point3 X = BackProjectPointCamera(refCamera, p, plane.w()); + const Point3 X = refCamera.model.TransformPointI2C(p, plane.w()); const Point3 normal = plane.topLeftCorner<3,1>(); - const Point3 t = (refCamera.C - trgCamera.C) / (normal.dot(X)); - const Matrix3 H = trgCamera.R * (refCamera.R.transpose() + t*normal.transpose()); - return trgCamera.K * H * refCamera.K.inverse(); + const Point3 t = (refCamera.pose.C - trgCamera.pose.C) / (normal.dot(X)); + const Matrix3 H = trgCamera.pose.R * (refCamera.pose.R.transpose() + t*normal.transpose()); + return trgCamera.model.K() * H * refCamera.model.K().inverse(); } // weight a neighbor texel based on color similarity and distance to the center texel @@ -278,31 +254,31 @@ __device__ inline float ComputeBilateralWeight(int xDist, int yDist, float pix, } // compute the geometric consistency weight -__device__ inline float GeometricConsistencyWeight(const ImagePixels depthImage, const PatchMatchCUDA::Camera& refCamera, const PatchMatchCUDA::Camera& trgCamera, const Point4& plane, const Point2i& p) +__device__ inline float GeometricConsistencyWeight(const ImagePixels depthImage, const CUDA::Camera& refCamera, const CUDA::Camera& trgCamera, const Point4& plane, const Point2i& p) { if (depthImage == NULL) return 0.f; constexpr float maxDist = 4.f; - const Point3 forwardPoint = BackProjectPoint(refCamera, p.cast(), plane.w()); - const Point2 trgPt = ProjectPoint(trgCamera, forwardPoint); + const Point3 forwardPoint = refCamera.TransformPointI2W(p.cast(), plane.w()); + const Point2 trgPt = trgCamera.TransformPointW2I(forwardPoint); const float trgDepth = tex2D(depthImage, trgPt.x() + 0.5f, trgPt.y() + 0.5f); if (trgDepth == 0.f) return maxDist; - const Point3 trgX = BackProjectPoint(trgCamera, trgPt, trgDepth); - const Point2 backwardPoint = ProjectPoint(refCamera, trgX); + const Point3 trgX = trgCamera.TransformPointI2W(trgPt, trgDepth); + const Point2 backwardPoint = refCamera.TransformPointW2I(trgX); const Point2 diff = p.cast() - backwardPoint; const float dist = diff.norm(); return min(maxDist, sqrt(dist*(dist+2.f))); } // compute photometric score using weighted ZNCC -__device__ float ScorePlane(const ImagePixels refImage, const PatchMatchCUDA::Camera& refCamera, const ImagePixels trgImage, const PatchMatchCUDA::Camera& trgCamera, const Point2i& p, const Point4& plane, const float lowDepth, const PatchMatchCUDA::Params& params) +__device__ float ScorePlane(const ImagePixels refImage, const CUDA::Camera& refCamera, const ImagePixels trgImage, const CUDA::Camera& trgCamera, const Point2i& p, const Point4& plane, const float lowDepth, const PatchMatch::Params& params) { constexpr float maxCost = 1.2f; Matrix3 H = ComputeHomography(refCamera, trgCamera, p.cast(), plane); const Point2 pt = (H * p.cast().homogeneous()).hnormalized(); - if (pt.x() >= trgCamera.width || pt.x() < 0.f || pt.y() >= trgCamera.height || pt.y() < 0.f) + if (pt.x() >= trgCamera.size.x() || pt.x() < 0.f || pt.y() >= trgCamera.size.y() || pt.y() < 0.f) return maxCost; Point3 X = H * Point2(p.x()-nSizeHalfWindow, p.y()-nSizeHalfWindow).homogeneous(); Point3 baseX(X); @@ -358,7 +334,7 @@ __device__ float ScorePlane(const ImagePixels refImage, const PatchMatchCUDA::Ca } // compute photometric score for all neighbor images -__device__ inline void MultiViewScorePlane(const ImagePixels *images, const ImagePixels* depthImages, const PatchMatchCUDA::Camera* cameras, const Point2i& p, const Point4& plane, const float lowDepth, float* costVector, const PatchMatchCUDA::Params& params) +__device__ inline void MultiViewScorePlane(const ImagePixels *images, const ImagePixels* depthImages, const CUDA::Camera* cameras, const Point2i& p, const Point4& plane, const float lowDepth, float* costVector, const PatchMatch::Params& params) { for (int imgId = 1; imgId <= params.nNumViews; ++imgId) costVector[imgId-1] = ScorePlane(images[0], cameras[0], images[imgId], cameras[imgId], p, plane, lowDepth, params); @@ -367,7 +343,7 @@ __device__ inline void MultiViewScorePlane(const ImagePixels *images, const Imag costVector[imgId] += 0.1f * GeometricConsistencyWeight(depthImages[imgId], cameras[0], cameras[imgId+1], plane, p); } // same as above, but interpolate the plane to current pixel position -__device__ inline float MultiViewScoreNeighborPlane(const ImagePixels* images, const ImagePixels* depthImages, const PatchMatchCUDA::Camera* cameras, const Point2i& p, const Point2i& np, Point4 plane, const float lowDepth, float* costVector, const PatchMatchCUDA::Params& params) +__device__ inline float MultiViewScoreNeighborPlane(const ImagePixels* images, const ImagePixels* depthImages, const CUDA::Camera* cameras, const Point2i& p, const Point2i& np, Point4 plane, const float lowDepth, float* costVector, const PatchMatch::Params& params) { plane.w() = InterpolatePixel(cameras[0], p, np, plane.w(), plane.topLeftCorner<3,1>(), params); MultiViewScorePlane(images, depthImages, cameras, p, plane, lowDepth, costVector, params); @@ -386,10 +362,10 @@ __device__ inline float AggregateMultiViewScores(const unsigned* viewWeights, co // propagate and refine the plane estimate for the current pixel employing the asymmetric approach described in: // "Multi-View Stereo with Asymmetric Checkerboard Propagation and Multi-Hypothesis Joint View Selection", 2018 -__device__ void ProcessPixel(const ImagePixels* images, const ImagePixels* depthImages, const PatchMatchCUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, RandState* randStates, unsigned* selectedViews, const Point2i& p, const PatchMatchCUDA::Params& params, const int iter) +__device__ void ProcessPixel(const ImagePixels* images, const ImagePixels* depthImages, const CUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, RandState* randStates, unsigned* selectedViews, const Point2i& p, const PatchMatch::Params& params, const int iter) { - int width = cameras[0].width; - int height = cameras[0].height; + const int width = cameras[0].size.x(); + const int height = cameras[0].size.y(); if (p.x() >= width || p.y() >= height) return; const int idx = Point2Idx(p, width); @@ -527,7 +503,7 @@ __device__ void ProcessPixel(const ImagePixels* images, const ImagePixels* depth planes[neighborPositions[2]].w(), planes[neighborPositions[3]].w() ); - surfaceNormal = ComputeDepthGradient(cameras[0].K, depth, p, ndepths); + surfaceNormal = ComputeDepthGradient(cameras[0].model, depth, p, ndepths); numValidPlanes = 4; } constexpr int numPlanes = 4; @@ -547,10 +523,10 @@ __device__ void ProcessPixel(const ImagePixels* images, const ImagePixels* depth } // compute the score of the current plane estimate -__device__ void InitializePixelScore(const ImagePixels *images, const ImagePixels* depthImages, const PatchMatchCUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, RandState* randStates, unsigned* selectedViews, const Point2i& p, const PatchMatchCUDA::Params params) +__device__ void InitializePixelScore(const ImagePixels *images, const ImagePixels* depthImages, const CUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, RandState* randStates, unsigned* selectedViews, const Point2i& p, const PatchMatch::Params params) { - const int width = cameras[0].width; - const int height = cameras[0].height; + const int width = cameras[0].size.x(); + const int height = cameras[0].size.y(); if (p.x() >= width || p.y() >= height) return; const int idx = Point2Idx(p, width); @@ -566,7 +542,7 @@ __device__ void InitializePixelScore(const ImagePixels *images, const ImagePixel // generate random plane plane.topLeftCorner<3,1>() = GenerateRandomNormal(cameras[0], p, randState); plane.w() = curand_uniform(randState) * (params.fDepthMax - params.fDepthMin) + params.fDepthMin; - } else if (plane.topLeftCorner<3,1>().dot(ViewDirection(cameras[0], p)) >= 0.f) { + } else if (plane.topLeftCorner<3,1>().dot(cameras[0].model.ViewDirection(p)) >= 0.f) { // generate random normal plane.topLeftCorner<3,1>() = GenerateRandomNormal(cameras[0], p, randState); } @@ -587,20 +563,20 @@ __device__ void InitializePixelScore(const ImagePixels *images, const ImagePixel SetBit(selectedView, imgId); costs[idx] = cost / params.nInitTopK; } -__global__ void InitializeScore(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const PatchMatchCUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatchCUDA::Params params) +__global__ void InitializeScore(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const CUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatch::Params params) { const Point2i p = Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); InitializePixelScore((const ImagePixels*)textureImages, (const ImagePixels*)textureDepths, cameras, planes, lowDepths, costs, (RandState*)randStates, selectedViews, p, params); } // traverse image in a back/red checkerboard pattern -__global__ void BlackPixelProcess(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const PatchMatchCUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatchCUDA::Params params, const int iter) +__global__ void BlackPixelProcess(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const CUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatch::Params params, const int iter) { Point2i p = Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); p.y() = p.y() * 2 + (threadIdx.x % 2 == 0 ? 0 : 1); ProcessPixel((const ImagePixels*)textureImages, (const ImagePixels*)textureDepths, cameras, planes, lowDepths, costs, (RandState*)randStates, selectedViews, p, params, iter); } -__global__ void RedPixelProcess(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const PatchMatchCUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatchCUDA::Params params, const int iter) +__global__ void RedPixelProcess(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const CUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatch::Params params, const int iter) { Point2i p = Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); p.y() = p.y() * 2 + (threadIdx.x % 2 == 0 ? 1 : 0); @@ -608,7 +584,7 @@ __global__ void RedPixelProcess(const cudaTextureObject_t* textureImages, const } // filter depth/normals -__global__ void FilterPlanes(Point4* planes, float* costs, unsigned* selectedViews, int width, int height, const PatchMatchCUDA::Params params) +__global__ void FilterPlanes(Point4* planes, float* costs, unsigned* selectedViews, int width, int height, const PatchMatch::Params params) { const Point2i p = Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); if (p.x() >= width || p.y() >= height) @@ -626,10 +602,10 @@ __global__ void FilterPlanes(Point4* planes, float* costs, unsigned* selectedVie /*----------------------------------------------------------------*/ -__host__ void PatchMatchCUDA::RunCUDA(float* ptrCostMap, uint32_t* ptrViewsMap) +__host__ void PatchMatch::RunCUDA(float* ptrCostMap, uint32_t* ptrViewsMap) { - const unsigned width = cameras[0].width; - const unsigned height = cameras[0].height; + const unsigned width = cameras[0].size.x(); + const unsigned height = cameras[0].size.y(); constexpr unsigned BLOCK_W = 32; constexpr unsigned BLOCK_H = (BLOCK_W / 2); @@ -660,3 +636,7 @@ __host__ void PatchMatchCUDA::RunCUDA(float* ptrCostMap, uint32_t* ptrViewsMap) cudaDeviceSynchronize(); } /*----------------------------------------------------------------*/ + +} // namespace CUDA + +} // namespace MVS diff --git a/libs/MVS/PatchMatchCUDA.inl b/libs/MVS/PatchMatchCUDA.inl index 349cc33d7..c1db82ab9 100644 --- a/libs/MVS/PatchMatchCUDA.inl +++ b/libs/MVS/PatchMatchCUDA.inl @@ -35,23 +35,7 @@ // I N C L U D E S ///////////////////////////////////////////////// -#define _USE_MATH_DEFINES -#include -#include -#include -#include -#include - -// Eigen -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int -#include - -// CUDA toolkit -#include -#include -#include -#include -#include +#include "CameraCUDA.h" // OpenCV #include @@ -65,20 +49,12 @@ namespace MVS { -#if __CUDA_ARCH__ > 0 -#define __CDC__CUDA__ARCH__ 1 -#else -#undef __CDC__CUDA__ARCH__ -#endif - struct DepthData; -class PatchMatchCUDA { -public: - typedef Eigen::Matrix Point3; - typedef Eigen::Matrix Point4; - typedef Eigen::Matrix Matrix3; +namespace CUDA { +class PatchMatch { +public: struct Params { int nNumViews = 5; int nEstimationIters = 3; @@ -90,17 +66,9 @@ public: float fThresholdKeepCost = 0; }; - struct Camera { - Matrix3 K; - Matrix3 R; - Point3 C; - int height; - int width; - }; - public: - PatchMatchCUDA(int device=0); - ~PatchMatchCUDA(); + PatchMatch(int device=0); + ~PatchMatch(); void Init(bool bGeomConsistency); void Release(); @@ -138,6 +106,8 @@ public: }; /*----------------------------------------------------------------*/ +} // namespace CUDA + } // namespace MVS #endif // _MVS_PATCHMATCHCUDA_INL_ diff --git a/libs/MVS/SceneDensify.cpp b/libs/MVS/SceneDensify.cpp index 9a1f1bf1b..f137feebc 100644 --- a/libs/MVS/SceneDensify.cpp +++ b/libs/MVS/SceneDensify.cpp @@ -2080,9 +2080,9 @@ bool Scene::ComputeDepthMaps(DenseDepthMapData& data) #ifdef _USE_CUDA // initialize CUDA - if (CUDA::desiredDeviceID >= -1 && data.nFusionMode >= 0) { - data.depthMaps.pmCUDA = new PatchMatchCUDA(CUDA::desiredDeviceID); - if (CUDA::devices.IsEmpty()) + if (SEACAVE::CUDA::desiredDeviceID >= -1 && data.nFusionMode >= 0) { + data.depthMaps.pmCUDA = new MVS::CUDA::PatchMatch(SEACAVE::CUDA::desiredDeviceID); + if (SEACAVE::CUDA::devices.IsEmpty()) data.depthMaps.pmCUDA.Release(); else data.depthMaps.pmCUDA->Init(false); diff --git a/libs/MVS/SceneDensify.h b/libs/MVS/SceneDensify.h index 7934ed8e9..97fc9cbbf 100644 --- a/libs/MVS/SceneDensify.h +++ b/libs/MVS/SceneDensify.h @@ -45,7 +45,9 @@ namespace MVS { // Forward declarations class MVS_API Scene; #ifdef _USE_CUDA -class PatchMatchCUDA; +namespace CUDA { +class PatchMatch; +} // namespace CUDA #endif // _USE_CUDA // structure used to compute all depth-maps @@ -90,7 +92,7 @@ class MVS_API DepthMapsData #ifdef _USE_CUDA // used internally to estimate the depth-maps using CUDA - CAutoPtr pmCUDA; + CAutoPtr pmCUDA; #endif // _USE_CUDA }; /*----------------------------------------------------------------*/ diff --git a/libs/MVS/SceneRefineCUDA.cpp b/libs/MVS/SceneRefineCUDA.cpp index 5fee76e0f..a8c671e4e 100644 --- a/libs/MVS/SceneRefineCUDA.cpp +++ b/libs/MVS/SceneRefineCUDA.cpp @@ -1951,10 +1951,10 @@ class MeshRefineCUDA { struct View { Image32F imageHost; // store temporarily the image pixels Image8U::Size size; - CUDA::ArrayRT16F image; - CUDA::MemDevice depthMap; - CUDA::MemDevice faceMap; - CUDA::MemDevice baryMap; + SEACAVE::CUDA::ArrayRT16F image; + SEACAVE::CUDA::MemDevice depthMap; + SEACAVE::CUDA::MemDevice faceMap; + SEACAVE::CUDA::MemDevice baryMap; inline View() {} inline View(View&) {} }; @@ -2000,8 +2000,8 @@ class MeshRefineCUDA { void ImageMeshWarp( const Camera& cameraA, const Camera& cameraB, const Image8U::Size& size, uint32_t idxImageA, uint32_t idxImageB); - void ComputeLocalVariance(const CUDA::ArrayRT16F& image, const Image8U::Size& size, - CUDA::MemDevice& imageMean, CUDA::MemDevice& imageVar); + void ComputeLocalVariance(const SEACAVE::CUDA::ArrayRT16F& image, const Image8U::Size& size, + SEACAVE::CUDA::MemDevice& imageMean, SEACAVE::CUDA::MemDevice& imageVar); void ComputeLocalZNCC(const Image8U::Size& size); void ComputePhotometricGradient(const Camera& cameraA, const Camera& cameraB, const Image8U::Size& size, uint32_t idxImageA, uint32_t idxImageB, uint32_t numVertices, float RegularizationScale); @@ -2023,45 +2023,45 @@ class MeshRefineCUDA { ViewsArr views; // views' data PairIdxArr pairs; // image pairs used to refine the mesh - CUDA::ModuleRTPtr module; - CUDA::KernelRT kernelProjectMesh; - CUDA::KernelRT kernelCrossCheckProjection; - CUDA::KernelRT kernelImageMeshWarp; - CUDA::KernelRT kernelComputeImageMean; - CUDA::KernelRT kernelComputeImageVar; - CUDA::KernelRT kernelComputeImageCov; - CUDA::KernelRT kernelComputeImageZNCC; - CUDA::KernelRT kernelComputeImageDZNCC; - CUDA::KernelRT kernelComputePhotometricGradient; - CUDA::KernelRT kernelUpdatePhotoGradNorm; - CUDA::KernelRT kernelComputeSmoothnessGradient; - CUDA::KernelRT kernelCombineGradients; - CUDA::KernelRT kernelCombineAllGradients; - - CUDA::MemDevice vertices; - CUDA::MemDevice vertexVertices; - CUDA::MemDevice faces; - CUDA::MemDevice faceNormals; - CUDA::TextureRT16F texImageRef; - CUDA::SurfaceRT16F surfImageRef; - CUDA::SurfaceRT16F surfImageProjRef; - CUDA::MemDevice mask; - CUDA::MemDevice imageMeanA; - CUDA::MemDevice imageVarA; - CUDA::ArrayRT16F imageAB; - CUDA::MemDevice imageMeanAB; - CUDA::MemDevice imageVarAB; - CUDA::MemDevice imageCov; - CUDA::MemDevice imageZNCC; - CUDA::MemDevice imageDZNCC; - CUDA::MemDevice photoGrad; - CUDA::MemDevice photoGradNorm; - CUDA::MemDevice photoGradPixels; - CUDA::MemDevice vertexVerticesCont; - CUDA::MemDevice vertexVerticesSizes; - CUDA::MemDevice vertexVerticesPointers; - CUDA::MemDevice smoothGrad1; - CUDA::MemDevice smoothGrad2; + SEACAVE::CUDA::ModuleRTPtr module; + SEACAVE::CUDA::KernelRT kernelProjectMesh; + SEACAVE::CUDA::KernelRT kernelCrossCheckProjection; + SEACAVE::CUDA::KernelRT kernelImageMeshWarp; + SEACAVE::CUDA::KernelRT kernelComputeImageMean; + SEACAVE::CUDA::KernelRT kernelComputeImageVar; + SEACAVE::CUDA::KernelRT kernelComputeImageCov; + SEACAVE::CUDA::KernelRT kernelComputeImageZNCC; + SEACAVE::CUDA::KernelRT kernelComputeImageDZNCC; + SEACAVE::CUDA::KernelRT kernelComputePhotometricGradient; + SEACAVE::CUDA::KernelRT kernelUpdatePhotoGradNorm; + SEACAVE::CUDA::KernelRT kernelComputeSmoothnessGradient; + SEACAVE::CUDA::KernelRT kernelCombineGradients; + SEACAVE::CUDA::KernelRT kernelCombineAllGradients; + + SEACAVE::CUDA::MemDevice vertices; + SEACAVE::CUDA::MemDevice vertexVertices; + SEACAVE::CUDA::MemDevice faces; + SEACAVE::CUDA::MemDevice faceNormals; + SEACAVE::CUDA::TextureRT16F texImageRef; + SEACAVE::CUDA::SurfaceRT16F surfImageRef; + SEACAVE::CUDA::SurfaceRT16F surfImageProjRef; + SEACAVE::CUDA::MemDevice mask; + SEACAVE::CUDA::MemDevice imageMeanA; + SEACAVE::CUDA::MemDevice imageVarA; + SEACAVE::CUDA::ArrayRT16F imageAB; + SEACAVE::CUDA::MemDevice imageMeanAB; + SEACAVE::CUDA::MemDevice imageVarAB; + SEACAVE::CUDA::MemDevice imageCov; + SEACAVE::CUDA::MemDevice imageZNCC; + SEACAVE::CUDA::MemDevice imageDZNCC; + SEACAVE::CUDA::MemDevice photoGrad; + SEACAVE::CUDA::MemDevice photoGradNorm; + SEACAVE::CUDA::MemDevice photoGradPixels; + SEACAVE::CUDA::MemDevice vertexVerticesCont; + SEACAVE::CUDA::MemDevice vertexVerticesSizes; + SEACAVE::CUDA::MemDevice vertexVerticesPointers; + SEACAVE::CUDA::MemDevice smoothGrad1; + SEACAVE::CUDA::MemDevice smoothGrad2; enum { HalfSize = 2 }; // half window size used to compute ZNCC }; @@ -2076,7 +2076,7 @@ MeshRefineCUDA::MeshRefineCUDA(Scene& _scene, unsigned _nAlternatePair, float _w scene(_scene), images(_scene.images) { - if (!InitKernels(CUDA::desiredDeviceID)) + if (!InitKernels(SEACAVE::CUDA::desiredDeviceID)) return; // keep only best neighbor views for each image std::unordered_set mapPairs; @@ -2110,13 +2110,13 @@ bool MeshRefineCUDA::InitKernels(int device) STATIC_ASSERT(sizeof(CameraCUDA) == 176); // initialize CUDA device if needed - if (CUDA::devices.IsEmpty() && CUDA::initDevice(device) != CUDA_SUCCESS) + if (SEACAVE::CUDA::devices.IsEmpty() && SEACAVE::CUDA::initDevice(device) != CUDA_SUCCESS) return false; // initialize CUDA kernels if (module != NULL && module->IsValid()) return true; - module = new CUDA::ModuleRT(g_szMeshRefineModule); + module = new SEACAVE::CUDA::ModuleRT(g_szMeshRefineModule); if (!module->IsValid()) { module.Release(); return false; @@ -2615,8 +2615,8 @@ void MeshRefineCUDA::ImageMeshWarp( } // compute local variance for each image pixel -void MeshRefineCUDA::ComputeLocalVariance(const CUDA::ArrayRT16F& image, const Image8U::Size& size, - CUDA::MemDevice& imageMean, CUDA::MemDevice& imageVar) +void MeshRefineCUDA::ComputeLocalVariance(const SEACAVE::CUDA::ArrayRT16F& image, const Image8U::Size& size, + SEACAVE::CUDA::MemDevice& imageMean, SEACAVE::CUDA::MemDevice& imageVar) { surfImageRef.Bind(image); reportCudaError(kernelComputeImageMean(size,