diff --git a/apps/DensifyPointCloud/DensifyPointCloud.cpp b/apps/DensifyPointCloud/DensifyPointCloud.cpp
index 82b7d5daa..7637e60bb 100644
--- a/apps/DensifyPointCloud/DensifyPointCloud.cpp
+++ b/apps/DensifyPointCloud/DensifyPointCloud.cpp
@@ -109,7 +109,7 @@ bool Application::Initialize(size_t argc, LPCTSTR* argv)
 			), "verbosity level")
 		#endif
 		#ifdef _USE_CUDA
-		("cuda-device", boost::program_options::value(&CUDA::desiredDeviceID)->default_value(-1), "CUDA device number to be used for depth-map estimation (-2 - CPU processing, -1 - best GPU, >=0 - device index)")
+		("cuda-device", boost::program_options::value(&SEACAVE::CUDA::desiredDeviceID)->default_value(-1), "CUDA device number to be used for depth-map estimation (-2 - CPU processing, -1 - best GPU, >=0 - device index)")
 		#endif
 		;
 
diff --git a/apps/ReconstructMesh/ReconstructMesh.cpp b/apps/ReconstructMesh/ReconstructMesh.cpp
index f7c0f6d83..e4fc7bf6a 100644
--- a/apps/ReconstructMesh/ReconstructMesh.cpp
+++ b/apps/ReconstructMesh/ReconstructMesh.cpp
@@ -118,7 +118,7 @@ bool Application::Initialize(size_t argc, LPCTSTR* argv)
 			), "verbosity level")
 		#endif
 		#ifdef _USE_CUDA
-		("cuda-device", boost::program_options::value(&CUDA::desiredDeviceID)->default_value(-1), "CUDA device number to be used to reconstruct the mesh (-2 - CPU processing, -1 - best GPU, >=0 - device index)")
+		("cuda-device", boost::program_options::value(&SEACAVE::CUDA::desiredDeviceID)->default_value(-1), "CUDA device number to be used to reconstruct the mesh (-2 - CPU processing, -1 - best GPU, >=0 - device index)")
 		#endif
 		;
 
diff --git a/apps/RefineMesh/RefineMesh.cpp b/apps/RefineMesh/RefineMesh.cpp
index 3aa6230c5..88c2dc05b 100644
--- a/apps/RefineMesh/RefineMesh.cpp
+++ b/apps/RefineMesh/RefineMesh.cpp
@@ -108,7 +108,7 @@ bool Application::Initialize(size_t argc, LPCTSTR* argv)
 			), "verbosity level")
 		#endif
 		#ifdef _USE_CUDA
-		("cuda-device", boost::program_options::value(&CUDA::desiredDeviceID)->default_value(-2), "CUDA device number to be used for mesh refinement (-2 - CPU processing, -1 - best GPU, >=0 - device index)")
+		("cuda-device", boost::program_options::value(&SEACAVE::CUDA::desiredDeviceID)->default_value(-2), "CUDA device number to be used for mesh refinement (-2 - CPU processing, -1 - best GPU, >=0 - device index)")
 		#endif
 		;
 
@@ -229,7 +229,7 @@ int main(int argc, LPCTSTR* argv)
 	}
 	TD_TIMER_START();
 	#ifdef _USE_CUDA
-	if (CUDA::desiredDeviceID < -1 ||
+	if (SEACAVE::CUDA::desiredDeviceID < -1 ||
 		!scene.RefineMeshCUDA(OPT::nResolutionLevel, OPT::nMinResolution, OPT::nMaxViews,
 							  OPT::fDecimateMesh, OPT::nCloseHoles, OPT::nEnsureEdgeSize,
 							  OPT::nMaxFaceArea,
diff --git a/apps/TextureMesh/TextureMesh.cpp b/apps/TextureMesh/TextureMesh.cpp
index 703e4b1a8..75cb0f50d 100644
--- a/apps/TextureMesh/TextureMesh.cpp
+++ b/apps/TextureMesh/TextureMesh.cpp
@@ -110,7 +110,7 @@ bool Application::Initialize(size_t argc, LPCTSTR* argv)
 			), "verbosity level")
 		#endif
 		#ifdef _USE_CUDA
-		("cuda-device", boost::program_options::value(&CUDA::desiredDeviceID)->default_value(-1), "CUDA device number to be used to texture the mesh (-2 - CPU processing, -1 - best GPU, >=0 - device index)")
+		("cuda-device", boost::program_options::value(&SEACAVE::CUDA::desiredDeviceID)->default_value(-1), "CUDA device number to be used to texture the mesh (-2 - CPU processing, -1 - best GPU, >=0 - device index)")
 		#endif
 		;
 
diff --git a/libs/Common/Config.h b/libs/Common/Config.h
index d5f7dddec..e22f946a7 100644
--- a/libs/Common/Config.h
+++ b/libs/Common/Config.h
@@ -206,9 +206,6 @@
 #	define FORCEINLINE inline
 #endif
 
-#ifndef _SUPPORT_CPP11
-#	define constexpr inline
-#endif
 #ifdef _SUPPORT_CPP17
 #	undef MAYBEUNUSED
 #	define MAYBEUNUSED [[maybe_unused]]
@@ -224,37 +221,36 @@
 
 #ifdef _MSC_VER
 #define _DEBUGINFO
-#define _CRTDBG_MAP_ALLOC	//enable this to show also the filename (DEBUG_NEW should also be defined in each file)
+#define _CRTDBG_MAP_ALLOC //enable this to show also the filename (DEBUG_NEW should also be defined in each file)
 #include <cstdlib>
 #include <crtdbg.h>
 #ifdef _INC_CRTDBG
-#define ASSERT(exp)	{if (!(exp) && 1 == _CrtDbgReport(_CRT_ASSERT, __FILE__, __LINE__, NULL, #exp)) _CrtDbgBreak();}
+#define ASSERT(exp, ...) {if (!(exp) && 1 == _CrtDbgReport(_CRT_ASSERT, __FILE__, __LINE__, NULL, #exp)) _CrtDbgBreak();}
 #else
-#define ASSERT(exp)	{if (!(exp)) __debugbreak();}
+#define ASSERT(exp, ...) {if (!(exp)) __debugbreak();}
 #endif // _INC_CRTDBG
 #define TRACE(...) {TCHAR buffer[2048];	_sntprintf(buffer, 2048, __VA_ARGS__); OutputDebugString(buffer);}
 #else // _MSC_VER
 #include <assert.h>
-#define ASSERT(exp)	assert(exp)
+#define ASSERT(exp, ...) assert(exp)
 #define TRACE(...)
 #endif // _MSC_VER
 
 #else
 
 #ifdef _RELEASE
-#define ASSERT(exp)
+#define ASSERT(exp, ...)
 #else
 #ifdef _MSC_VER
-#define ASSERT(exp) {if (!(exp)) __debugbreak();}
+#define ASSERT(exp, ...) {if (!(exp)) __debugbreak();}
 #else // _MSC_VER
-#define ASSERT(exp) {if (!(exp)) __builtin_trap();}
+#define ASSERT(exp, ...) {if (!(exp)) __builtin_trap();}
 #endif // _MSC_VER
 #endif
 #define TRACE(...)
 
 #endif // _DEBUG
 
-#define ASSERTM(exp, msg) ASSERT(exp)
 
 namespace SEACAVE_ASSERT
 {
diff --git a/libs/Common/FastDelegate.h b/libs/Common/FastDelegate.h
index 26a565bfb..72206cb35 100644
--- a/libs/Common/FastDelegate.h
+++ b/libs/Common/FastDelegate.h
@@ -26,12 +26,6 @@
 #include <type_traits>
 #include <utility>
 
-// VC work around for constexpr and noexcept: VC2013 and below do not support these 2 keywords
-#if defined(_MSC_VER) && (_MSC_VER <= 1800)
-#define constexpr const
-#define noexcept throw()
-#endif
-
 namespace fastdelegate
 {
 
diff --git a/libs/Common/UtilCUDA.h b/libs/Common/UtilCUDA.h
index d04b04717..6cdbb86a3 100644
--- a/libs/Common/UtilCUDA.h
+++ b/libs/Common/UtilCUDA.h
@@ -67,8 +67,8 @@ inline CUresult __reportCudaError(CUresult result, LPCSTR errorMessage) {
 	ASSERT("CudaError" == NULL);
 	return result;
 }
-#define reportCudaError(val) CUDA::__reportCudaError(val, #val)
-#define checkCudaError(val) { const CUresult ret(CUDA::__reportCudaError(val, #val)); if (ret != CUDA_SUCCESS) return ret; }
+#define reportCudaError(val) SEACAVE::CUDA::__reportCudaError(val, #val)
+#define checkCudaError(val) { const CUresult ret(SEACAVE::CUDA::__reportCudaError(val, #val)); if (ret != CUDA_SUCCESS) return ret; }
 
 // outputs the proper CUDA error code and abort in the event that a CUDA host call returns an error
 inline void __ensureCudaResult(CUresult result, LPCSTR errorMessage) {
@@ -77,7 +77,7 @@ inline void __ensureCudaResult(CUresult result, LPCSTR errorMessage) {
 	ASSERT("CudaAbort" == NULL);
 	exit(EXIT_FAILURE);
 }
-#define ensureCudaResult(val) CUDA::__ensureCudaResult(val, #val)
+#define ensureCudaResult(val) SEACAVE::CUDA::__ensureCudaResult(val, #val)
 /*----------------------------------------------------------------*/
 
 // rounds up addr to the align boundary
diff --git a/libs/Common/UtilCUDADevice.h b/libs/Common/UtilCUDADevice.h
index 46933650d..40791f8a1 100644
--- a/libs/Common/UtilCUDADevice.h
+++ b/libs/Common/UtilCUDADevice.h
@@ -24,6 +24,12 @@
 
 // D E F I N E S ///////////////////////////////////////////////////
 
+#if __CUDA_ARCH__ > 0
+#define __CDC__CUDA__ARCH__ 1
+#else
+#undef __CDC__CUDA__ARCH__
+#endif
+
 #ifndef VERBOSE
 #define DEFINE_VERBOSE 1
 #define VERBOSE(...) fprintf(stderr, __VA_ARGS__)
diff --git a/libs/MVS/CameraCUDA.h b/libs/MVS/CameraCUDA.h
new file mode 100644
index 000000000..c4260ebf4
--- /dev/null
+++ b/libs/MVS/CameraCUDA.h
@@ -0,0 +1,171 @@
+/*
+* CameraCUDA.h
+*
+* Copyright (c) 2014-2024 SEACAVE
+*
+* Author(s):
+*
+*	  cDc <cdc.seacave@gmail.com>
+*
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*
+*
+* Additional Terms:
+*
+*	  You are required to preserve legal notices and author attributions in
+*	  that material or in the Appropriate Legal Notices displayed by works
+*	  containing it.
+*/
+
+#ifndef _MVS_CAMERACUDA_H_
+#define _MVS_CAMERACUDA_H_
+
+
+// I N C L U D E S /////////////////////////////////////////////////
+
+#define _USE_MATH_DEFINES
+#include <cmath>
+#include <cstdint>
+#include <float.h>
+#include <string>
+#include <vector>
+
+// Eigen
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#include <Eigen/Dense>
+
+// CUDA toolkit
+#include <cuda_runtime.h>
+#include <cuda_runtime_api.h>
+#include <cuda_texture_types.h>
+#include <curand_kernel.h>
+#include <vector_types.h>
+
+#include "../Common/UtilCUDADevice.h"
+
+
+// D E F I N E S ///////////////////////////////////////////////////
+
+
+// S T R U C T S ///////////////////////////////////////////////////
+
+namespace MVS {
+
+namespace CUDA {
+
+typedef Eigen::Matrix<int,2,1> Point2i;
+typedef Eigen::Matrix<float,2,1> Point2;
+typedef Eigen::Matrix<float,3,1> Point3;
+typedef Eigen::Matrix<float,4,1> Point4;
+typedef Eigen::Matrix<float,3,3> Matrix3;
+
+// Linear camera model
+struct LinearCameraModel {
+	Point2 f; // focal length
+	Point2 p; // principal point
+
+	__host__ __device__ LinearCameraModel() {}
+	__host__ __device__ LinearCameraModel(float fx, float fy, float cx, float cy) :
+		f(fx, fy), p(cx, cy) {}
+	__host__ __device__ LinearCameraModel(const Matrix3& K) :
+		f(K(0,0), K(1,1)), p(K(0,2), K(1,2)) { ASSERT(K(0,1) == 0); }
+
+	__host__ __device__ inline Matrix3 K() const {
+		Matrix3 M; M <<
+			f.x(), 0, p.x(),
+			0, f.y(), p.y(),
+			0, 0, 1;
+		return M;
+	}
+
+	// transform a point in image space to camera space
+	__host__ __device__ inline Point2 NormalizePoint(const Point2& x) const {
+		return Point2(
+			(x.x() - p.x()) / f.x(),
+			(x.y() - p.y()) / f.y());
+	}
+
+	// project a point in camera space to image space
+	__host__ __device__ inline Point2 TransformPointC2I(const Point3& X) const {
+		return Point2(
+			f.x() * X.x() / X.z() + p.x(),
+			f.y() * X.y() / X.z() + p.y());
+	}
+
+	// back-project a point in image space to camera space
+	__host__ __device__ inline Point3 TransformPointI2C(const Point2& x, const float depth = 1.f) const {
+		return Point3(
+			depth * (x.x() - p.x()) / f.x(),
+			depth * (x.y() - p.y()) / f.y(),
+			depth);
+	}
+
+	// compute camera ray direction for the given pixel
+	__host__ __device__ inline Point3 ViewDirection(const Point2i& x) const {
+		return TransformPointI2C(x.cast<float>()).normalized();
+	}
+};
+/*----------------------------------------------------------------*/
+
+// Camera pose
+struct Pose {
+	Matrix3 R; // rotation matrix
+	Point3 C; // camera center
+
+	__host__ __device__ Pose() {}
+	__host__ __device__ Pose(const Matrix3& R, const Point3& C) :
+		R(R), C(C) {}
+
+	// transform a 3D point from world space to camera space
+	__host__ __device__ inline Point3 TransformPointW2C(const Point3& X) const {
+		return R * (X - C);
+	}
+
+	// transform a 3D point in camera space to world space
+	__host__ __device__ inline Point3 TransformPointC2W(const Point3& X) const {
+		return R.transpose() * X + C;
+	}
+};
+/*----------------------------------------------------------------*/
+
+// Camera view
+struct Camera {
+	LinearCameraModel model;
+	Pose pose;
+	Point2i size;
+
+	__host__ __device__ Camera() {}
+	__host__ __device__ Camera(const LinearCameraModel& model, const Pose& pose, int width=0, int height=0) :
+		model(model), pose(pose), size(width, height) {}
+	__host__ __device__ Camera(const Matrix3& K, const Matrix3& R, const Point3& C, int width=0, int height=0) :
+		model(K), pose(R, C), size(width, height) {}
+
+	// project a 3D point in world space to image space
+	__host__ __device__ inline Point2 TransformPointW2I(const Point3& X) const {
+		return model.TransformPointC2I(pose.TransformPointW2C(X));
+	}
+
+	// back-project a point in image space to 3D point in world space
+	__host__ __device__ inline Point3 TransformPointI2W(const Point2& x, const float depth = 1.f) const {
+		return pose.TransformPointC2W(model.TransformPointI2C(x, depth));
+	}
+};
+/*----------------------------------------------------------------*/
+
+} // namespace CUDA
+
+} // namespace MVS
+
+#endif // _MVS_CAMERACUDA_H_
diff --git a/libs/MVS/Mesh.h b/libs/MVS/Mesh.h
index 648504994..8e6df53f0 100644
--- a/libs/MVS/Mesh.h
+++ b/libs/MVS/Mesh.h
@@ -137,13 +137,13 @@ class MVS_API Mesh
 	Image8U3Arr texturesDiffuse; // textures containing the diffuse color (optional)
 
 	#ifdef _USE_CUDA
-	static CUDA::KernelRT kernelComputeFaceNormal;
+	static SEACAVE::CUDA::KernelRT kernelComputeFaceNormal;
 	#endif
 
 public:
 	#ifdef _USE_CUDA
 	inline Mesh() {
-		InitKernels(CUDA::desiredDeviceID);
+		InitKernels(SEACAVE::CUDA::desiredDeviceID);
 	}
 	#endif
 
diff --git a/libs/MVS/PatchMatchCUDA.cpp b/libs/MVS/PatchMatchCUDA.cpp
index ea12344de..5677f64b5 100644
--- a/libs/MVS/PatchMatchCUDA.cpp
+++ b/libs/MVS/PatchMatchCUDA.cpp
@@ -1,5 +1,5 @@
 /*
-* PatchMatchCUDA.cpp
+* PatchMatch.cpp
 *
 * Copyright (c) 2014-2021 SEACAVE
 *
@@ -35,7 +35,6 @@
 
 #ifdef _USE_CUDA
 
-using namespace MVS;
 
 
 // D E F I N E S ///////////////////////////////////////////////////
@@ -43,19 +42,23 @@ using namespace MVS;
 
 // S T R U C T S ///////////////////////////////////////////////////
 
-PatchMatchCUDA::PatchMatchCUDA(int device)
+namespace MVS {
+
+namespace CUDA {
+
+PatchMatch::PatchMatch(int device)
 {
 	// initialize CUDA device if needed
-	if (CUDA::devices.IsEmpty())
-		CUDA::initDevice(device);
+	if (SEACAVE::CUDA::devices.IsEmpty())
+		SEACAVE::CUDA::initDevice(device);
 }
 
-PatchMatchCUDA::~PatchMatchCUDA()
+PatchMatch::~PatchMatch()
 {
 	Release();
 }
 
-void PatchMatchCUDA::Release()
+void PatchMatch::Release()
 {
 	if (images.empty())
 		return;
@@ -80,7 +83,7 @@ void PatchMatchCUDA::Release()
 	ReleaseCUDA();
 }
 
-void PatchMatchCUDA::ReleaseCUDA()
+void PatchMatch::ReleaseCUDA()
 {
 	cudaFree(cudaTextureImages);
 	cudaFree(cudaCameras);
@@ -94,7 +97,7 @@ void PatchMatchCUDA::ReleaseCUDA()
 	delete[] depthNormalEstimates;
 }
 
-void PatchMatchCUDA::Init(bool bGeomConsistency)
+void PatchMatch::Init(bool bGeomConsistency)
 {
 	if (bGeomConsistency) {
 		params.bGeomConsistency = true;
@@ -105,7 +108,7 @@ void PatchMatchCUDA::Init(bool bGeomConsistency)
 	}
 }
 
-void PatchMatchCUDA::AllocatePatchMatchCUDA(const cv::Mat1f& image)
+void PatchMatch::AllocatePatchMatchCUDA(const cv::Mat1f& image)
 {
 	const size_t num_images = images.size();
 	CUDA_CHECK(cudaMalloc((void**)&cudaTextureImages, sizeof(cudaTextureObject_t) * num_images));
@@ -122,7 +125,7 @@ void PatchMatchCUDA::AllocatePatchMatchCUDA(const cv::Mat1f& image)
 	CUDA_CHECK(cudaMalloc((void**)&cudaRandStates, sizeof(curandState) * size));
 }
 
-void PatchMatchCUDA::AllocateImageCUDA(size_t i, const cv::Mat1f& image, bool bInitImage, bool bInitDepthMap)
+void PatchMatch::AllocateImageCUDA(size_t i, const cv::Mat1f& image, bool bInitImage, bool bInitDepthMap)
 {
 	const cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
 
@@ -171,7 +174,7 @@ void PatchMatchCUDA::AllocateImageCUDA(size_t i, const cv::Mat1f& image, bool bI
 	}
 }
 
-void PatchMatchCUDA::EstimateDepthMap(DepthData& depthData)
+void PatchMatch::EstimateDepthMap(DepthData& depthData)
 {
 	TD_TIMER_STARTD();
 
@@ -253,13 +256,12 @@ void PatchMatchCUDA::EstimateDepthMap(DepthData& depthData)
 
 		for (IIndex i = 0; i < numImages; ++i) {
 			const DepthData::ViewData& view = depthData.images[i];
-			Image32F image = view.image;
-			Camera camera;
-			camera.K = Eigen::Map<const SEACAVE::Matrix3x3::EMat>(view.camera.K.val).cast<float>();
-			camera.R = Eigen::Map<const SEACAVE::Matrix3x3::EMat>(view.camera.R.val).cast<float>();
-			camera.C = Eigen::Map<const SEACAVE::Point3::EVec>(view.camera.C.ptr()).cast<float>();
-			camera.height = image.rows;
-			camera.width = image.cols;
+			const Image32F image = view.image;
+			const Camera camera(
+				Eigen::Map<const SEACAVE::Matrix3x3::EMat>(view.camera.K.val).cast<float>(),
+				Eigen::Map<const SEACAVE::Matrix3x3::EMat>(view.camera.R.val).cast<float>(),
+				Eigen::Map<const SEACAVE::Point3::EVec>(view.camera.C.ptr()).cast<float>(),
+				image.rows, image.cols);
 			// store camera and image
 			if (i == 0 && (prevNumImages < numImages || images[0].size() != image.size())) {
 				// allocate/reallocate PatchMatch CUDA memory
@@ -416,4 +418,8 @@ void PatchMatchCUDA::EstimateDepthMap(DepthData& depthData)
 }
 /*----------------------------------------------------------------*/
 
+} // namespace CUDA
+
+} // namespace MVS
+
 #endif // _USE_CUDA
diff --git a/libs/MVS/PatchMatchCUDA.cu b/libs/MVS/PatchMatchCUDA.cu
index 26a9c15d6..4531f8456 100644
--- a/libs/MVS/PatchMatchCUDA.cu
+++ b/libs/MVS/PatchMatchCUDA.cu
@@ -43,32 +43,29 @@
 // patch stepping
 #define nSizeStep 2
 
-using namespace MVS;
 
-typedef Eigen::Matrix<int,2,1> Point2i;
-typedef Eigen::Matrix<float,2,1> Point2;
-typedef Eigen::Matrix<float,3,1> Point3;
-typedef Eigen::Matrix<float,4,1> Point4;
-typedef Eigen::Matrix<float,3,3> Matrix3;
+namespace MVS {
+
+namespace CUDA {
 
 #define ImagePixels cudaTextureObject_t
 #define RandState curandState
 
 // square the given value
-__device__ inline constexpr float Square(float v) {
+__device__ constexpr float Square(float v) {
 	return v * v;
 }
 
 // set/check a bit
-__device__ inline constexpr void SetBit(unsigned& input, unsigned i) {
+__device__ constexpr void SetBit(unsigned& input, unsigned i) {
 	input |= (1u << i);
 }
-__device__ inline constexpr int IsBitSet(unsigned input, unsigned i) {
+__device__ constexpr int IsBitSet(unsigned input, unsigned i) {
 	return (input >> i) & 1u;
 }
 
 // swap the given values
-__device__ inline constexpr void Swap(float& v0, float& v1) {
+__device__ constexpr void Swap(float& v0, float& v1) {
 	const float tmp = v0;
 	v0 = v1;
 	v1 = tmp;
@@ -82,27 +79,6 @@ __device__ inline Point2i Idx2Point(int idx, int width) {
 	return Point2i(idx % width, idx / width);
 }
 
-// project and back-project a 3D point
-__device__ inline Point2 ProjectPoint(const PatchMatchCUDA::Camera& camera, const Point3& X) {
-	const Point3 x = camera.K * camera.R * (X - camera.C);
-	return x.hnormalized();
-}
-__device__ inline Point3 BackProjectPointCamera(const PatchMatchCUDA::Camera& camera, const Point2& p, const float depth = 1.f) {
-	return Point3(
-		depth * (p.x() - camera.K(0,2)) / camera.K(0,0),
-		depth * (p.y() - camera.K(1,2)) / camera.K(1,1),
-		depth);
-}
-__device__ inline Point3 BackProjectPoint(const PatchMatchCUDA::Camera& camera, const Point2& p, const float depth) {
-	const Point3 camX = BackProjectPointCamera(camera, p, depth);
-	return camera.R.transpose() * camX + camera.C;
-}
-
-// compute camera ray direction for the given pixel
-__device__ inline Point3 ViewDirection(const PatchMatchCUDA::Camera& camera, const Point2i& p) {
-	return BackProjectPointCamera(camera, p.cast<float>()).normalized();
-}
-
 // sort the given values array using bubble sort algorithm
 __device__ inline void Sort(const float* values, float* sortedValues, int n) {
 	for (int i = 0; i < n; ++i)
@@ -149,7 +125,7 @@ __device__ inline void PDF2CDF(float* probs, const int numProbs) {
 
 
 // generate a random normal
-__device__ inline Point3 GenerateRandomNormal(const PatchMatchCUDA::Camera& camera, const Point2i& p, RandState* randState)
+__device__ inline Point3 GenerateRandomNormal(const CUDA::Camera& camera, const Point2i& p, RandState* randState)
 {
 	float q1, q2, s;
 	do {
@@ -163,16 +139,16 @@ __device__ inline Point3 GenerateRandomNormal(const PatchMatchCUDA::Camera& came
 		2.f * q2 * sq,
 		1.f - 2.f * s);
 
-	const Point3 viewDirection = ViewDirection(camera, p);
+	const Point3 viewDirection = camera.model.ViewDirection(p);
 	if (normal.dot(viewDirection) > 0.f)
 		normal = -normal;
 	return normal.normalized();
 }
 
 // randomly perturb a normal
-__device__ inline Point3 GeneratePerturbedNormal(const PatchMatchCUDA::Camera& camera, const Point2i& p, const Point3& normal, RandState* randState, const float perturbation)
+__device__ inline Point3 GeneratePerturbedNormal(const CUDA::Camera& camera, const Point2i& p, const Point3& normal, RandState* randState, const float perturbation)
 {
-	const Point3 viewDirection = ViewDirection(camera, p);
+	const Point3 viewDirection = camera.model.ViewDirection(p);
 
 	const float a1 = (curand_uniform(randState) - 0.5f) * perturbation;
 	const float a2 = (curand_uniform(randState) - 0.5f) * perturbation;
@@ -203,7 +179,7 @@ __device__ inline Point3 GeneratePerturbedNormal(const PatchMatchCUDA::Camera& c
 }
 
 // randomly perturb a normal
-__device__ inline float GeneratePerturbedDepth(float depth, RandState* randState, const float perturbation, const PatchMatchCUDA::Params& params)
+__device__ inline float GeneratePerturbedDepth(float depth, RandState* randState, const float perturbation, const PatchMatch::Params& params)
 {
 	const float depthMinPerturbed = (1.f - perturbation) * depth;
 	const float depthMaxPerturbed = (1.f + perturbation) * depth;
@@ -215,34 +191,34 @@ __device__ inline float GeneratePerturbedDepth(float depth, RandState* randState
 }
 
 // interpolate given pixel's estimate to the current position
-__device__ inline float InterpolatePixel(const PatchMatchCUDA::Camera& camera, const Point2i& p, const Point2i& np, float depth, const Point3& normal, const PatchMatchCUDA::Params& params)
+__device__ inline float InterpolatePixel(const CUDA::Camera& camera, const Point2i& p, const Point2i& np, float depth, const Point3& normal, const PatchMatch::Params& params)
 {
 	float depthNew;
 	if (p.x() == np.x()) {
-		const float nx1 = (p.y() - camera.K(1,2)) / camera.K(1,1);
+		const float nx1 = (p.y() - camera.model.p.y()) / camera.model.f.y();
 		const float denom = normal.z() + nx1 * normal.y();
 		if (abs(denom) < FLT_EPSILON)
 			return depth;
-		const float x1 = (np.y() - camera.K(1,2)) / camera.K(1,1);
+		const float x1 = (np.y() - camera.model.p.y()) / camera.model.f.y();
 		const float nom = depth * (normal.z() + x1 * normal.y());
 		depthNew = nom / denom;
 	} else if (p.y() == np.y()) {
-		const float nx1 = (p.x() - camera.K(0,2)) / camera.K(0,0);
+		const float nx1 = (p.x() - camera.model.p.x()) / camera.model.f.x();
 		const float denom = normal.z() + nx1 * normal.x();
 		if (abs(denom) < FLT_EPSILON)
 			return depth;
-		const float x1 = (np.x() - camera.K(0,2)) / camera.K(0,0);
+		const float x1 = (np.x() - camera.model.p.x()) / camera.model.f.x();
 		const float nom = depth * (normal.z() + x1 * normal.x());
 		depthNew = nom / denom;
 	} else {
-		const float planeD = normal.dot(BackProjectPointCamera(camera, np.cast<float>(), depth));
-		depthNew = planeD / normal.dot(BackProjectPointCamera(camera, p.cast<float>()));
+		const float planeD = normal.dot(camera.model.TransformPointI2C(np.cast<float>(), depth));
+		depthNew = planeD / normal.dot(camera.model.TransformPointI2C(p.cast<float>()));
 	}
 	return (depthNew >= params.fDepthMin && depthNew <= params.fDepthMax) ? depthNew : depth;
 }
 
 // compute normal to the surface given the 4 neighbors
-__device__ inline Point3 ComputeDepthGradient(const Matrix3& K, float depth, const Point2i& pos, const Point4& ndepth) {
+__device__ inline Point3 ComputeDepthGradient(const LinearCameraModel& model, float depth, const Point2i& pos, const Point4& ndepth) {
 	constexpr float2 nposg[4] = {{0,-1}, {0,1}, {-1,0}, {1,0}};
 	Point2 dg(0,0);
 	// add neighbor depths at the gradient locations
@@ -252,19 +228,19 @@ __device__ inline Point3 ComputeDepthGradient(const Matrix3& K, float depth, con
 	const Point2 d = dg*0.5f;
 	// compute normal from depth gradient
 	return Point3(
-		K(0,0)*d.x(),
-		K(1,1)*d.y(),
-		(K(0,2)-pos.x())*d.x()+(K(1,2)-pos.y())*d.y()-depth).normalized();
+		model.f.x()*d.x(),
+		model.f.y()*d.y(),
+		(model.p.x()-pos.x())*d.x()+(model.p.y()-pos.y())*d.y()-depth).normalized();
 }
 
 // compose tho homography matrix that transforms a point from reference to source camera through the given plane
-__device__ inline Matrix3 ComputeHomography(const PatchMatchCUDA::Camera& refCamera, const PatchMatchCUDA::Camera& trgCamera, const Point2& p, const Point4& plane)
+__device__ inline Matrix3 ComputeHomography(const CUDA::Camera& refCamera, const CUDA::Camera& trgCamera, const Point2& p, const Point4& plane)
 {
-	const Point3 X = BackProjectPointCamera(refCamera, p, plane.w());
+	const Point3 X = refCamera.model.TransformPointI2C(p, plane.w());
 	const Point3 normal = plane.topLeftCorner<3,1>();
-	const Point3 t = (refCamera.C - trgCamera.C) / (normal.dot(X));
-	const Matrix3 H = trgCamera.R * (refCamera.R.transpose() + t*normal.transpose());
-	return trgCamera.K * H * refCamera.K.inverse();
+	const Point3 t = (refCamera.pose.C - trgCamera.pose.C) / (normal.dot(X));
+	const Matrix3 H = trgCamera.pose.R * (refCamera.pose.R.transpose() + t*normal.transpose());
+	return trgCamera.model.K() * H * refCamera.model.K().inverse();
 }
 
 // weight a neighbor texel based on color similarity and distance to the center texel
@@ -278,31 +254,31 @@ __device__ inline float ComputeBilateralWeight(int xDist, int yDist, float pix,
 }
 
 // compute the geometric consistency weight
-__device__ inline float GeometricConsistencyWeight(const ImagePixels depthImage, const PatchMatchCUDA::Camera& refCamera, const PatchMatchCUDA::Camera& trgCamera, const Point4& plane, const Point2i& p)
+__device__ inline float GeometricConsistencyWeight(const ImagePixels depthImage, const CUDA::Camera& refCamera, const CUDA::Camera& trgCamera, const Point4& plane, const Point2i& p)
 {
 	if (depthImage == NULL)
 		return 0.f;
 	constexpr float maxDist = 4.f;
-	const Point3 forwardPoint = BackProjectPoint(refCamera, p.cast<float>(), plane.w());
-	const Point2 trgPt = ProjectPoint(trgCamera, forwardPoint);
+	const Point3 forwardPoint = refCamera.TransformPointI2W(p.cast<float>(), plane.w());
+	const Point2 trgPt = trgCamera.TransformPointW2I(forwardPoint);
 	const float trgDepth = tex2D<float>(depthImage, trgPt.x() + 0.5f, trgPt.y() + 0.5f);
 	if (trgDepth == 0.f)
 		return maxDist;
-	const Point3 trgX = BackProjectPoint(trgCamera, trgPt, trgDepth);
-	const Point2 backwardPoint = ProjectPoint(refCamera, trgX);
+	const Point3 trgX = trgCamera.TransformPointI2W(trgPt, trgDepth);
+	const Point2 backwardPoint = refCamera.TransformPointW2I(trgX);
 	const Point2 diff = p.cast<float>() - backwardPoint;
 	const float dist = diff.norm();
 	return min(maxDist, sqrt(dist*(dist+2.f)));
 }
 
 // compute photometric score using weighted ZNCC
-__device__ float ScorePlane(const ImagePixels refImage, const PatchMatchCUDA::Camera& refCamera, const ImagePixels trgImage, const PatchMatchCUDA::Camera& trgCamera, const Point2i& p, const Point4& plane, const float lowDepth, const PatchMatchCUDA::Params& params)
+__device__ float ScorePlane(const ImagePixels refImage, const CUDA::Camera& refCamera, const ImagePixels trgImage, const CUDA::Camera& trgCamera, const Point2i& p, const Point4& plane, const float lowDepth, const PatchMatch::Params& params)
 {
 	constexpr float maxCost = 1.2f;
 	
 	Matrix3 H = ComputeHomography(refCamera, trgCamera, p.cast<float>(), plane);
 	const Point2 pt = (H * p.cast<float>().homogeneous()).hnormalized();
-	if (pt.x() >= trgCamera.width || pt.x() < 0.f || pt.y() >= trgCamera.height || pt.y() < 0.f)
+	if (pt.x() >= trgCamera.size.x() || pt.x() < 0.f || pt.y() >= trgCamera.size.y() || pt.y() < 0.f)
 		return maxCost;
 	Point3 X = H * Point2(p.x()-nSizeHalfWindow, p.y()-nSizeHalfWindow).homogeneous();
 	Point3 baseX(X);
@@ -358,7 +334,7 @@ __device__ float ScorePlane(const ImagePixels refImage, const PatchMatchCUDA::Ca
 }
 
 // compute photometric score for all neighbor images
-__device__ inline void MultiViewScorePlane(const ImagePixels *images, const ImagePixels* depthImages, const PatchMatchCUDA::Camera* cameras, const Point2i& p, const Point4& plane, const float lowDepth, float* costVector, const PatchMatchCUDA::Params& params)
+__device__ inline void MultiViewScorePlane(const ImagePixels *images, const ImagePixels* depthImages, const CUDA::Camera* cameras, const Point2i& p, const Point4& plane, const float lowDepth, float* costVector, const PatchMatch::Params& params)
 {
 	for (int imgId = 1; imgId <= params.nNumViews; ++imgId)
 		costVector[imgId-1] = ScorePlane(images[0], cameras[0], images[imgId], cameras[imgId], p, plane, lowDepth, params);
@@ -367,7 +343,7 @@ __device__ inline void MultiViewScorePlane(const ImagePixels *images, const Imag
 			costVector[imgId] += 0.1f * GeometricConsistencyWeight(depthImages[imgId], cameras[0], cameras[imgId+1], plane, p);
 }
 // same as above, but interpolate the plane to current pixel position
-__device__ inline float MultiViewScoreNeighborPlane(const ImagePixels* images, const ImagePixels* depthImages, const PatchMatchCUDA::Camera* cameras, const Point2i& p, const Point2i& np, Point4 plane, const float lowDepth, float* costVector, const PatchMatchCUDA::Params& params)
+__device__ inline float MultiViewScoreNeighborPlane(const ImagePixels* images, const ImagePixels* depthImages, const CUDA::Camera* cameras, const Point2i& p, const Point2i& np, Point4 plane, const float lowDepth, float* costVector, const PatchMatch::Params& params)
 {
 	plane.w() = InterpolatePixel(cameras[0], p, np, plane.w(), plane.topLeftCorner<3,1>(), params);
 	MultiViewScorePlane(images, depthImages, cameras, p, plane, lowDepth, costVector, params);
@@ -386,10 +362,10 @@ __device__ inline float AggregateMultiViewScores(const unsigned* viewWeights, co
 
 // propagate and refine the plane estimate for the current pixel employing the asymmetric approach described in:
 // "Multi-View Stereo with Asymmetric Checkerboard Propagation and Multi-Hypothesis Joint View Selection", 2018
-__device__ void ProcessPixel(const ImagePixels* images, const ImagePixels* depthImages, const PatchMatchCUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, RandState* randStates, unsigned* selectedViews, const Point2i& p, const PatchMatchCUDA::Params& params, const int iter)
+__device__ void ProcessPixel(const ImagePixels* images, const ImagePixels* depthImages, const CUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, RandState* randStates, unsigned* selectedViews, const Point2i& p, const PatchMatch::Params& params, const int iter)
 {
-	int width = cameras[0].width;
-	int height = cameras[0].height;
+	const int width = cameras[0].size.x();
+	const int height = cameras[0].size.y();
 	if (p.x() >= width || p.y() >= height)
 		return;
 	const int idx = Point2Idx(p, width);
@@ -527,7 +503,7 @@ __device__ void ProcessPixel(const ImagePixels* images, const ImagePixels* depth
 			planes[neighborPositions[2]].w(),
 			planes[neighborPositions[3]].w()
 		);
-		surfaceNormal = ComputeDepthGradient(cameras[0].K, depth, p, ndepths);
+		surfaceNormal = ComputeDepthGradient(cameras[0].model, depth, p, ndepths);
 		numValidPlanes = 4;
 	}
 	constexpr int numPlanes = 4;
@@ -547,10 +523,10 @@ __device__ void ProcessPixel(const ImagePixels* images, const ImagePixels* depth
 }
 
 // compute the score of the current plane estimate
-__device__ void InitializePixelScore(const ImagePixels *images, const ImagePixels* depthImages, const PatchMatchCUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, RandState* randStates, unsigned* selectedViews, const Point2i& p, const PatchMatchCUDA::Params params)
+__device__ void InitializePixelScore(const ImagePixels *images, const ImagePixels* depthImages, const CUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, RandState* randStates, unsigned* selectedViews, const Point2i& p, const PatchMatch::Params params)
 {
-	const int width = cameras[0].width;
-	const int height = cameras[0].height;
+	const int width = cameras[0].size.x();
+	const int height = cameras[0].size.y();
 	if (p.x() >= width || p.y() >= height)
 		return;
 	const int idx = Point2Idx(p, width);
@@ -566,7 +542,7 @@ __device__ void InitializePixelScore(const ImagePixels *images, const ImagePixel
 		// generate random plane
 		plane.topLeftCorner<3,1>() = GenerateRandomNormal(cameras[0], p, randState);
 		plane.w() = curand_uniform(randState) * (params.fDepthMax - params.fDepthMin) + params.fDepthMin;
-	} else if (plane.topLeftCorner<3,1>().dot(ViewDirection(cameras[0], p)) >= 0.f) {
+	} else if (plane.topLeftCorner<3,1>().dot(cameras[0].model.ViewDirection(p)) >= 0.f) {
 		// generate random normal
 		plane.topLeftCorner<3,1>() = GenerateRandomNormal(cameras[0], p, randState);
 	}
@@ -587,20 +563,20 @@ __device__ void InitializePixelScore(const ImagePixels *images, const ImagePixel
 			SetBit(selectedView, imgId);
 	costs[idx] = cost / params.nInitTopK;
 }
-__global__ void InitializeScore(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const PatchMatchCUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatchCUDA::Params params)
+__global__ void InitializeScore(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const CUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatch::Params params)
 {
 	const Point2i p = Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y);
 	InitializePixelScore((const ImagePixels*)textureImages, (const ImagePixels*)textureDepths, cameras, planes, lowDepths, costs, (RandState*)randStates, selectedViews, p, params);
 }
 
 // traverse image in a back/red checkerboard pattern
-__global__ void BlackPixelProcess(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const PatchMatchCUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatchCUDA::Params params, const int iter)
+__global__ void BlackPixelProcess(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const CUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatch::Params params, const int iter)
 {
 	Point2i p = Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y);
 	p.y() = p.y() * 2 + (threadIdx.x % 2 == 0 ? 0 : 1);
 	ProcessPixel((const ImagePixels*)textureImages, (const ImagePixels*)textureDepths, cameras, planes, lowDepths, costs, (RandState*)randStates, selectedViews, p, params, iter);
 }
-__global__ void RedPixelProcess(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const PatchMatchCUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatchCUDA::Params params, const int iter)
+__global__ void RedPixelProcess(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const CUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatch::Params params, const int iter)
 {
 	Point2i p = Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y);
 	p.y() = p.y() * 2 + (threadIdx.x % 2 == 0 ? 1 : 0);
@@ -608,7 +584,7 @@ __global__ void RedPixelProcess(const cudaTextureObject_t* textureImages, const
 }
 
 // filter depth/normals
-__global__ void FilterPlanes(Point4* planes, float* costs, unsigned* selectedViews, int width, int height, const PatchMatchCUDA::Params params)
+__global__ void FilterPlanes(Point4* planes, float* costs, unsigned* selectedViews, int width, int height, const PatchMatch::Params params)
 {
 	const Point2i p = Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y);
 	if (p.x() >= width || p.y() >= height)
@@ -626,10 +602,10 @@ __global__ void FilterPlanes(Point4* planes, float* costs, unsigned* selectedVie
 /*----------------------------------------------------------------*/
 
 
-__host__ void PatchMatchCUDA::RunCUDA(float* ptrCostMap, uint32_t* ptrViewsMap)
+__host__ void PatchMatch::RunCUDA(float* ptrCostMap, uint32_t* ptrViewsMap)
 {
-	const unsigned width = cameras[0].width;
-	const unsigned height = cameras[0].height;
+	const unsigned width = cameras[0].size.x();
+	const unsigned height = cameras[0].size.y();
 
 	constexpr unsigned BLOCK_W = 32;
 	constexpr unsigned BLOCK_H = (BLOCK_W / 2);
@@ -660,3 +636,7 @@ __host__ void PatchMatchCUDA::RunCUDA(float* ptrCostMap, uint32_t* ptrViewsMap)
 	cudaDeviceSynchronize();
 }
 /*----------------------------------------------------------------*/
+
+} // namespace CUDA
+
+} // namespace MVS
diff --git a/libs/MVS/PatchMatchCUDA.inl b/libs/MVS/PatchMatchCUDA.inl
index 349cc33d7..c1db82ab9 100644
--- a/libs/MVS/PatchMatchCUDA.inl
+++ b/libs/MVS/PatchMatchCUDA.inl
@@ -35,23 +35,7 @@
 
 // I N C L U D E S /////////////////////////////////////////////////
 
-#define _USE_MATH_DEFINES
-#include <cmath>
-#include <cstdint>
-#include <float.h>
-#include <string>
-#include <vector>
-
-// Eigen
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
-#include <Eigen/Dense>
-
-// CUDA toolkit
-#include <cuda_runtime.h>
-#include <cuda_runtime_api.h>
-#include <cuda_texture_types.h>
-#include <curand_kernel.h>
-#include <vector_types.h>
+#include "CameraCUDA.h"
 
 // OpenCV
 #include <opencv2/core.hpp>
@@ -65,20 +49,12 @@
 
 namespace MVS {
 
-#if __CUDA_ARCH__ > 0
-#define __CDC__CUDA__ARCH__ 1
-#else
-#undef __CDC__CUDA__ARCH__
-#endif
-
 struct DepthData;
 
-class PatchMatchCUDA {
-public:
-	typedef Eigen::Matrix<float,3,1> Point3;
-	typedef Eigen::Matrix<float,4,1> Point4;
-	typedef Eigen::Matrix<float,3,3> Matrix3;
+namespace CUDA {
 
+class PatchMatch {
+public:
 	struct Params {
 		int nNumViews = 5;
 		int nEstimationIters = 3;
@@ -90,17 +66,9 @@ public:
 		float fThresholdKeepCost = 0;
 	};
 
-	struct Camera {
-		Matrix3 K;
-		Matrix3 R;
-		Point3 C;
-		int height;
-		int width;
-	};
-
 public:
-	PatchMatchCUDA(int device=0);
-	~PatchMatchCUDA();
+	PatchMatch(int device=0);
+	~PatchMatch();
 
 	void Init(bool bGeomConsistency);
 	void Release();
@@ -138,6 +106,8 @@ public:
 };
 /*----------------------------------------------------------------*/
 
+} // namespace CUDA
+
 } // namespace MVS
 
 #endif // _MVS_PATCHMATCHCUDA_INL_
diff --git a/libs/MVS/SceneDensify.cpp b/libs/MVS/SceneDensify.cpp
index 9a1f1bf1b..f137feebc 100644
--- a/libs/MVS/SceneDensify.cpp
+++ b/libs/MVS/SceneDensify.cpp
@@ -2080,9 +2080,9 @@ bool Scene::ComputeDepthMaps(DenseDepthMapData& data)
 
 	#ifdef _USE_CUDA
 	// initialize CUDA
-	if (CUDA::desiredDeviceID >= -1 && data.nFusionMode >= 0) {
-		data.depthMaps.pmCUDA = new PatchMatchCUDA(CUDA::desiredDeviceID);
-		if (CUDA::devices.IsEmpty())
+	if (SEACAVE::CUDA::desiredDeviceID >= -1 && data.nFusionMode >= 0) {
+		data.depthMaps.pmCUDA = new MVS::CUDA::PatchMatch(SEACAVE::CUDA::desiredDeviceID);
+		if (SEACAVE::CUDA::devices.IsEmpty())
 			data.depthMaps.pmCUDA.Release();
 		else
 			data.depthMaps.pmCUDA->Init(false);
diff --git a/libs/MVS/SceneDensify.h b/libs/MVS/SceneDensify.h
index 7934ed8e9..97fc9cbbf 100644
--- a/libs/MVS/SceneDensify.h
+++ b/libs/MVS/SceneDensify.h
@@ -45,7 +45,9 @@ namespace MVS {
 // Forward declarations
 class MVS_API Scene;
 #ifdef _USE_CUDA
-class PatchMatchCUDA;
+namespace CUDA {
+class PatchMatch;
+} // namespace CUDA
 #endif // _USE_CUDA
 
 // structure used to compute all depth-maps
@@ -90,7 +92,7 @@ class MVS_API DepthMapsData
 
 	#ifdef _USE_CUDA
 	// used internally to estimate the depth-maps using CUDA
-	CAutoPtr<PatchMatchCUDA> pmCUDA;
+	CAutoPtr<MVS::CUDA::PatchMatch> pmCUDA;
 	#endif // _USE_CUDA
 };
 /*----------------------------------------------------------------*/
diff --git a/libs/MVS/SceneRefineCUDA.cpp b/libs/MVS/SceneRefineCUDA.cpp
index 5fee76e0f..a8c671e4e 100644
--- a/libs/MVS/SceneRefineCUDA.cpp
+++ b/libs/MVS/SceneRefineCUDA.cpp
@@ -1951,10 +1951,10 @@ class MeshRefineCUDA {
 	struct View {
 		Image32F imageHost; // store temporarily the image pixels
 		Image8U::Size size;
-		CUDA::ArrayRT16F image;
-		CUDA::MemDevice depthMap;
-		CUDA::MemDevice faceMap;
-		CUDA::MemDevice baryMap;
+		SEACAVE::CUDA::ArrayRT16F image;
+		SEACAVE::CUDA::MemDevice depthMap;
+		SEACAVE::CUDA::MemDevice faceMap;
+		SEACAVE::CUDA::MemDevice baryMap;
 		inline View() {}
 		inline View(View&) {}
 	};
@@ -2000,8 +2000,8 @@ class MeshRefineCUDA {
 	void ImageMeshWarp(
 		const Camera& cameraA, const Camera& cameraB, const Image8U::Size& size,
 		uint32_t idxImageA, uint32_t idxImageB);
-	void ComputeLocalVariance(const CUDA::ArrayRT16F& image, const Image8U::Size& size,
-		CUDA::MemDevice& imageMean, CUDA::MemDevice& imageVar);
+	void ComputeLocalVariance(const SEACAVE::CUDA::ArrayRT16F& image, const Image8U::Size& size,
+		SEACAVE::CUDA::MemDevice& imageMean, SEACAVE::CUDA::MemDevice& imageVar);
 	void ComputeLocalZNCC(const Image8U::Size& size);
 	void ComputePhotometricGradient(const Camera& cameraA, const Camera& cameraB, const Image8U::Size& size,
 		uint32_t idxImageA, uint32_t idxImageB, uint32_t numVertices, float RegularizationScale);
@@ -2023,45 +2023,45 @@ class MeshRefineCUDA {
 	ViewsArr views; // views' data
 	PairIdxArr pairs; // image pairs used to refine the mesh
 
-	CUDA::ModuleRTPtr module;
-	CUDA::KernelRT kernelProjectMesh;
-	CUDA::KernelRT kernelCrossCheckProjection;
-	CUDA::KernelRT kernelImageMeshWarp;
-	CUDA::KernelRT kernelComputeImageMean;
-	CUDA::KernelRT kernelComputeImageVar;
-	CUDA::KernelRT kernelComputeImageCov;
-	CUDA::KernelRT kernelComputeImageZNCC;
-	CUDA::KernelRT kernelComputeImageDZNCC;
-	CUDA::KernelRT kernelComputePhotometricGradient;
-	CUDA::KernelRT kernelUpdatePhotoGradNorm;
-	CUDA::KernelRT kernelComputeSmoothnessGradient;
-	CUDA::KernelRT kernelCombineGradients;
-	CUDA::KernelRT kernelCombineAllGradients;
-
-	CUDA::MemDevice vertices;
-	CUDA::MemDevice vertexVertices;
-	CUDA::MemDevice faces;
-	CUDA::MemDevice faceNormals;
-	CUDA::TextureRT16F texImageRef;
-	CUDA::SurfaceRT16F surfImageRef;
-	CUDA::SurfaceRT16F surfImageProjRef;
-	CUDA::MemDevice mask;
-	CUDA::MemDevice imageMeanA;
-	CUDA::MemDevice imageVarA;
-	CUDA::ArrayRT16F imageAB;
-	CUDA::MemDevice imageMeanAB;
-	CUDA::MemDevice imageVarAB;
-	CUDA::MemDevice imageCov;
-	CUDA::MemDevice imageZNCC;
-	CUDA::MemDevice imageDZNCC;
-	CUDA::MemDevice photoGrad;
-	CUDA::MemDevice photoGradNorm;
-	CUDA::MemDevice photoGradPixels;
-	CUDA::MemDevice vertexVerticesCont;
-	CUDA::MemDevice vertexVerticesSizes;
-	CUDA::MemDevice vertexVerticesPointers;
-	CUDA::MemDevice smoothGrad1;
-	CUDA::MemDevice smoothGrad2;
+	SEACAVE::CUDA::ModuleRTPtr module;
+	SEACAVE::CUDA::KernelRT kernelProjectMesh;
+	SEACAVE::CUDA::KernelRT kernelCrossCheckProjection;
+	SEACAVE::CUDA::KernelRT kernelImageMeshWarp;
+	SEACAVE::CUDA::KernelRT kernelComputeImageMean;
+	SEACAVE::CUDA::KernelRT kernelComputeImageVar;
+	SEACAVE::CUDA::KernelRT kernelComputeImageCov;
+	SEACAVE::CUDA::KernelRT kernelComputeImageZNCC;
+	SEACAVE::CUDA::KernelRT kernelComputeImageDZNCC;
+	SEACAVE::CUDA::KernelRT kernelComputePhotometricGradient;
+	SEACAVE::CUDA::KernelRT kernelUpdatePhotoGradNorm;
+	SEACAVE::CUDA::KernelRT kernelComputeSmoothnessGradient;
+	SEACAVE::CUDA::KernelRT kernelCombineGradients;
+	SEACAVE::CUDA::KernelRT kernelCombineAllGradients;
+
+	SEACAVE::CUDA::MemDevice vertices;
+	SEACAVE::CUDA::MemDevice vertexVertices;
+	SEACAVE::CUDA::MemDevice faces;
+	SEACAVE::CUDA::MemDevice faceNormals;
+	SEACAVE::CUDA::TextureRT16F texImageRef;
+	SEACAVE::CUDA::SurfaceRT16F surfImageRef;
+	SEACAVE::CUDA::SurfaceRT16F surfImageProjRef;
+	SEACAVE::CUDA::MemDevice mask;
+	SEACAVE::CUDA::MemDevice imageMeanA;
+	SEACAVE::CUDA::MemDevice imageVarA;
+	SEACAVE::CUDA::ArrayRT16F imageAB;
+	SEACAVE::CUDA::MemDevice imageMeanAB;
+	SEACAVE::CUDA::MemDevice imageVarAB;
+	SEACAVE::CUDA::MemDevice imageCov;
+	SEACAVE::CUDA::MemDevice imageZNCC;
+	SEACAVE::CUDA::MemDevice imageDZNCC;
+	SEACAVE::CUDA::MemDevice photoGrad;
+	SEACAVE::CUDA::MemDevice photoGradNorm;
+	SEACAVE::CUDA::MemDevice photoGradPixels;
+	SEACAVE::CUDA::MemDevice vertexVerticesCont;
+	SEACAVE::CUDA::MemDevice vertexVerticesSizes;
+	SEACAVE::CUDA::MemDevice vertexVerticesPointers;
+	SEACAVE::CUDA::MemDevice smoothGrad1;
+	SEACAVE::CUDA::MemDevice smoothGrad2;
 
 	enum { HalfSize = 2 }; // half window size used to compute ZNCC
 };
@@ -2076,7 +2076,7 @@ MeshRefineCUDA::MeshRefineCUDA(Scene& _scene, unsigned _nAlternatePair, float _w
 	scene(_scene),
 	images(_scene.images)
 {
-	if (!InitKernels(CUDA::desiredDeviceID))
+	if (!InitKernels(SEACAVE::CUDA::desiredDeviceID))
 		return;
 	// keep only best neighbor views for each image
 	std::unordered_set<uint64_t> mapPairs;
@@ -2110,13 +2110,13 @@ bool MeshRefineCUDA::InitKernels(int device)
 	STATIC_ASSERT(sizeof(CameraCUDA) == 176);
 
 	// initialize CUDA device if needed
-	if (CUDA::devices.IsEmpty() && CUDA::initDevice(device) != CUDA_SUCCESS)
+	if (SEACAVE::CUDA::devices.IsEmpty() && SEACAVE::CUDA::initDevice(device) != CUDA_SUCCESS)
 		return false;
 
 	// initialize CUDA kernels
 	if (module != NULL && module->IsValid())
 		return true;
-	module = new CUDA::ModuleRT(g_szMeshRefineModule);
+	module = new SEACAVE::CUDA::ModuleRT(g_szMeshRefineModule);
 	if (!module->IsValid()) {
 		module.Release();
 		return false;
@@ -2615,8 +2615,8 @@ void MeshRefineCUDA::ImageMeshWarp(
 }
 
 // compute local variance for each image pixel
-void MeshRefineCUDA::ComputeLocalVariance(const CUDA::ArrayRT16F& image, const Image8U::Size& size,
-	CUDA::MemDevice& imageMean, CUDA::MemDevice& imageVar)
+void MeshRefineCUDA::ComputeLocalVariance(const SEACAVE::CUDA::ArrayRT16F& image, const Image8U::Size& size,
+	SEACAVE::CUDA::MemDevice& imageMean, SEACAVE::CUDA::MemDevice& imageVar)
 {
 	surfImageRef.Bind(image);
 	reportCudaError(kernelComputeImageMean(size,