diff --git a/libs/Common/Config.h b/libs/Common/Config.h index e22f946a7..1165bfdc2 100644 --- a/libs/Common/Config.h +++ b/libs/Common/Config.h @@ -163,8 +163,7 @@ #endif - -//optimization flags +// optimization flags #if defined(_MSC_VER) # define ALIGN(n) __declspec(align(n)) # define NOINITVTABLE __declspec(novtable) //disable generating code to initialize the vfptr in the constructor(s) and destructor of the class @@ -229,7 +228,7 @@ #else #define ASSERT(exp, ...) {if (!(exp)) __debugbreak();} #endif // _INC_CRTDBG -#define TRACE(...) {TCHAR buffer[2048]; _sntprintf(buffer, 2048, __VA_ARGS__); OutputDebugString(buffer);} +#define TRACE(...) {TCHAR buffer[2048]; _sntprintf(buffer, 2048, __VA_ARGS__); OutputDebugString(buffer);} #else // _MSC_VER #include #define ASSERT(exp, ...) assert(exp) diff --git a/libs/Common/List.h b/libs/Common/List.h index 8725a2f5a..7bbd5c078 100644 --- a/libs/Common/List.h +++ b/libs/Common/List.h @@ -12,6 +12,7 @@ // I N C L U D E S ///////////////////////////////////////////////// #include +#include // D E F I N E S /////////////////////////////////////////////////// @@ -1154,6 +1155,17 @@ class cList } } + // remove duplicated values; the values are expected to be sorted, set bSort if not + template + inline void RemoveDuplicates() + { + if (bSort) + Sort(); + const IDX index(std::unique(Begin(), End()) - Begin()); + if (index < _size) + RemoveLast(_size-index); + } + inline void RemoveAtMove(IDX index) { ASSERT(index < _size); diff --git a/libs/MVS/CMakeLists.txt b/libs/MVS/CMakeLists.txt index 853386508..c2e3bae01 100644 --- a/libs/MVS/CMakeLists.txt +++ b/libs/MVS/CMakeLists.txt @@ -31,12 +31,20 @@ if(_USE_CUDA) FILE(GLOB LIBRARY_FILES_CUDA "*.cu") LIST(APPEND LIBRARY_FILES_C ${LIBRARY_FILES_CUDA}) endif() +FILE(GLOB CUDA_LIBRARY_FILES_C "CUDA/*.cpp") +FILE(GLOB CUDA_LIBRARY_FILES_H "CUDA/*.h" "CUDA/*.inl") +if(_USE_CUDA) + FILE(GLOB CUDA_LIBRARY_FILES_CUDA "CUDA/*.cu") + LIST(APPEND CUDA_LIBRARY_FILES_C ${CUDA_LIBRARY_FILES_CUDA}) +endif() +SOURCE_GROUP("CUDA" FILES ${CUDA_LIBRARY_FILES_C} ${CUDA_LIBRARY_FILES_H}) GET_FILENAME_COMPONENT(PATH_PythonWrapper_cpp ${CMAKE_CURRENT_SOURCE_DIR}/PythonWrapper.cpp ABSOLUTE) LIST(REMOVE_ITEM LIBRARY_FILES_C "${PATH_PythonWrapper_cpp}") cxx_library_with_type(MVS "Libs" "" "${cxx_default}" ${LIBRARY_FILES_C} ${LIBRARY_FILES_H} + ${CUDA_LIBRARY_FILES_C} ${CUDA_LIBRARY_FILES_H} ) # Manually set Common.h as the precompiled header diff --git a/libs/MVS/CameraCUDA.h b/libs/MVS/CUDA/Camera.h similarity index 88% rename from libs/MVS/CameraCUDA.h rename to libs/MVS/CUDA/Camera.h index c4260ebf4..b2ecde9eb 100644 --- a/libs/MVS/CameraCUDA.h +++ b/libs/MVS/CUDA/Camera.h @@ -1,5 +1,5 @@ /* -* CameraCUDA.h +* Camera.h * * Copyright (c) 2014-2024 SEACAVE * @@ -35,25 +35,10 @@ // I N C L U D E S ///////////////////////////////////////////////// -#define _USE_MATH_DEFINES -#include -#include -#include #include #include -// Eigen -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int -#include - -// CUDA toolkit -#include -#include -#include -#include -#include - -#include "../Common/UtilCUDADevice.h" +#include "Maths.h" // D E F I N E S /////////////////////////////////////////////////// @@ -65,12 +50,6 @@ namespace MVS { namespace CUDA { -typedef Eigen::Matrix Point2i; -typedef Eigen::Matrix Point2; -typedef Eigen::Matrix Point3; -typedef Eigen::Matrix Point4; -typedef Eigen::Matrix Matrix3; - // Linear camera model struct LinearCameraModel { Point2 f; // focal length diff --git a/libs/MVS/CUDA/Maths.h b/libs/MVS/CUDA/Maths.h new file mode 100644 index 000000000..71f3632ea --- /dev/null +++ b/libs/MVS/CUDA/Maths.h @@ -0,0 +1,312 @@ +/* +* Maths.h +* +* Copyright (c) 2014-2024 SEACAVE +* +* Author(s): +* +* cDc +* +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +* +* +* Additional Terms: +* +* You are required to preserve legal notices and author attributions in +* that material or in the Appropriate Legal Notices displayed by works +* containing it. +*/ + +#pragma once + +#ifndef _MVS_MATHSCUDA_H_ +#define _MVS_MATHSCUDA_H_ + + +// I N C L U D E S ///////////////////////////////////////////////// + +#define _USE_MATH_DEFINES +#include +#include +#include + +// CUDA toolkit +#include +#include // next to cuda_runtime.h +#include +#include +#include +#include + +// Eigen +#ifdef __CUDACC__ +#pragma push_macro("EIGEN_DEFAULT_DENSE_INDEX_TYPE") +#undef EIGEN_DEFAULT_DENSE_INDEX_TYPE +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#endif +#include +#include +#ifdef __CUDACC__ +#pragma pop_macro("EIGEN_DEFAULT_DENSE_INDEX_TYPE") +#endif + +#include "../../Common/UtilCUDADevice.h" + + +// D E F I N E S /////////////////////////////////////////////////// + + +// S T R U C T S /////////////////////////////////////////////////// + +#ifndef __CUDACC__ +// host implementations of CUDA functions +constexpr int max(int a, int b) { + return a > b ? a : b; +} +constexpr int min(int a, int b) { + return a < b ? a : b; +} + +inline float rsqrtf(float x) { + return 1.f / sqrtf(x); +} +#endif + +// CUDA helper math functions +inline __host__ __device__ float2 make_float2(float s) { + return make_float2(s, s); +} +inline __host__ __device__ float2 make_float2(int2 a) { + return make_float2(float(a.x), float(a.y)); +} +inline __host__ __device__ float3 make_float3(int3 a) { + return make_float3(float(a.x), float(a.y), float(a.z)); +} +inline __device__ __host__ int clamp(int f, int a, int b) { + return max(a, min(f, b)); +} +inline __host__ __device__ float2 operator-(float2& a) { + return make_float2(-a.x, -a.y); +} +inline __host__ __device__ int2 operator-(int2& a) { + return make_int2(-a.x, -a.y); +} +inline __host__ __device__ float3 operator-(float3& a) { + return make_float3(-a.x, -a.y, -a.z); +} +inline __host__ __device__ int3 operator-(int3& a) { + return make_int3(-a.x, -a.y, -a.z); +} +inline __host__ __device__ int3 operator-(int3 a, int b) { + return make_int3(a.x - b, a.y - b, a.z - b); +} +inline __host__ __device__ float2 operator-(float2 a, float2 b) { + return make_float2(a.x - b.x, a.y - b.y); +} +inline __host__ __device__ float3 operator-(float3 a, float b) { + return make_float3(a.x - b, a.y - b, a.z - b); +} +inline __host__ __device__ float3 operator-(float3 a, float3 b) { + return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); +} +inline __host__ __device__ float2 operator+(float2 a, float b) { + return make_float2(a.x + b, a.y + b); +} +inline __host__ __device__ float3 operator+(float3 a, float b) { + return make_float3(a.x + b, a.y + b, a.z + b); +} +inline __host__ __device__ int2 operator+(int2 a, int2 b) { + return make_int2(a.x + b.x, a.y + b.y); +} +inline __host__ __device__ int3 operator+(int3 a, int3 b) { + return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); +} +inline __host__ __device__ float3 operator+(float3 a, float3 b) { + return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); +} +inline __host__ __device__ float2 operator*(float2 a, float b) { + return make_float2(a.x * b, a.y * b); +} +inline __host__ __device__ float3 operator*(float3 a, float b) { + return make_float3(a.x * b, a.y * b, a.z * b); +} +inline __host__ __device__ float2 operator/(float2 a, float b) { + return make_float2(a.x / b, a.y / b); +} +inline __host__ __device__ float3 operator/(float3 a, float b) { + return make_float3(a.x / b, a.y / b, a.z / b); +} +inline __host__ __device__ float dot(float2 a, float2 b) { + return a.x * b.x + a.y * b.y; +} +inline __host__ __device__ float dot(float3 a, float3 b) { + return a.x * b.x + a.y * b.y + a.z * b.z; +} +inline __host__ __device__ float length(float2 v) { + return sqrtf(dot(v, v)); +} +inline __host__ __device__ float length(float3 v) { + return sqrtf(dot(v, v)); +} +inline __host__ __device__ float2 normalize(float2 v) { + return v * rsqrtf(dot(v, v)); +} +inline __host__ __device__ float3 normalize(float3 v) { + return v * rsqrtf(dot(v, v)); +} +inline __host__ __device__ float3 cross(float3 a, float3 b) { + return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); +} +/*----------------------------------------------------------------*/ + + +namespace MVS { + +namespace CUDA { + +// common point and matrix types +typedef Eigen::Matrix Point2i; +typedef Eigen::Matrix Point3i; +typedef Eigen::Matrix Point2; +typedef Eigen::Matrix Point3; +typedef Eigen::Matrix Point4; +typedef Eigen::Matrix Matrix3; + +// convert between float2 and Point2 +__host__ __device__ inline float2 Convert(const MVS::CUDA::Point2 m) { + return make_float2(m[0], m[1]); +} +__host__ __device__ inline MVS::CUDA::Point2 Convert(const float2 m) { + return MVS::CUDA::Point2(m.x, m.y); +} +// convert between float3 and Point3 +__host__ __device__ inline float3 Convert(const MVS::CUDA::Point3 m) { + return make_float3(m[0], m[1], m[2]); +} +__host__ __device__ inline MVS::CUDA::Point3 Convert(const float3 m) { + return MVS::CUDA::Point3(m.x, m.y, m.z); +} +// convert between int2 and Point2i +__host__ __device__ inline int2 Convert(const MVS::CUDA::Point2i m) { + return make_int2(m[0], m[1]); +} +__host__ __device__ inline MVS::CUDA::Point2i Convert(const int2 m) { + return MVS::CUDA::Point2i(m.x, m.y); +} +// convert between int3 and Point3i +__host__ __device__ inline int3 Convert(const MVS::CUDA::Point3i m) { + return make_int3(m[0], m[1], m[2]); +} +__host__ __device__ inline MVS::CUDA::Point3i Convert(const int3 m) { + return MVS::CUDA::Point3i(m.x, m.y, m.z); +} +/*----------------------------------------------------------------*/ + + +#ifdef __CUDACC__ +// round and clamp to uint8 +__device__ inline uint8_t RoundAndClampToUint8(const float x) { + return clamp(__float2int_rn(x), 0, 255); +} + +// round and floor to int +__device__ inline Point2i RoundToInt2(const Point2& v) { + return Point2i(__float2int_rn(v.x()), __float2int_rn(v.y())); +} +__device__ inline Point2i FloorToInt2(const Point2& v) { + return Point2i(__float2int_rd(v.x()), __float2int_rd(v.y())); +} + +__device__ inline Point3i RoundToInt3(const Point3& v) { + return Point3i(__float2int_rn(v.x()), __float2int_rn(v.y()), __float2int_rn(v.z())); +} +__device__ inline Point3i FloorToInt3(const Point3& v) { + return Point3i(__float2int_rd(v.x()), __float2int_rd(v.y()), __float2int_rd(v.z())); +} +#endif + + +template +__device__ constexpr T Square(const T x) { + return x * x; +} + +template +__device__ constexpr void Swap(T& a, T& b) { + const T c(a); + a = b; + b = c; +} + +// mod is always positive, whereas % is the remainder, which can be positive or negative +template +__device__ T inline Mod(T a, T b) { + const T result = a % b; + if (result < 0) + return result + b; + return result; +} + +// linear interpolation +inline __device__ __host__ float lerp(float a, float b, float t) { + return a + (b-a) * t; +} +inline __device__ __host__ Point3 lerp(Point3 a, Point3 b, float t) { + return a + (b-a) * t; +} + +#ifdef __CUDACC__ +// thread index +inline __device__ int GetThreadIndex() { + return blockIdx.x * blockDim.x + threadIdx.x; +} + +inline __device__ Point2i GetThreadIndex2() { + return Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); +} + +inline __device__ Point3i GetThreadIndex3() { + return Point3i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y, blockIdx.z * blockDim.z + threadIdx.z); +} +#endif + +// convert 2D to 1D coordinates and back +__device__ inline int Point2Idx(const Point2i& p, int width) { + return p.y() * width + p.x(); +} +__device__ inline int Point2Idx(int stride, const Point2i& pixel, int numChannels = 1) { + return pixel.y() * stride + pixel.x() * numChannels; +} +__device__ inline int Point2Idx(int width, int channels, const Point2i& pixel) { + return pixel.y() * (width * channels) + pixel.x() * channels; +} +__device__ inline Point2i Idx2Point(int idx, int width) { + return Point2i(idx % width, idx / width); +} + +// check if a pixel is inside the image +__device__ inline bool IsInImage(const int x, const int y, const int width, const int height) { + return x >= 0 && y >= 0 && x < width && y < height; +} +__device__ inline bool IsInImage(const Point2i& pixel, const Point2i& size) { + return pixel.x() >= 0 && pixel.y() >= 0 && pixel.x() < size.x() && pixel.y() < size.y(); +} +/*----------------------------------------------------------------*/ + +} // namespace CUDA + +} // namespace MVS + +#endif // _MVS_MATHSCUDA_H_ diff --git a/libs/MVS/Camera.cpp b/libs/MVS/Camera.cpp index 00bbdb791..ebc2e43de 100644 --- a/libs/MVS/Camera.cpp +++ b/libs/MVS/Camera.cpp @@ -77,6 +77,10 @@ Camera Camera::GetScaled(REAL s) const { return Camera(GetScaledK(s), R, C); } +Camera Camera::GetScaled(const cv::Size& size, const cv::Size& newSize) const +{ + return Camera(GetScaledK(size, newSize), R, C); +} /*----------------------------------------------------------------*/ diff --git a/libs/MVS/Camera.h b/libs/MVS/Camera.h index 8e4c186fe..f7bad6738 100644 --- a/libs/MVS/Camera.h +++ b/libs/MVS/Camera.h @@ -272,7 +272,8 @@ class MVS_API Camera : public CameraIntern Camera& operator= (const CameraIntern& camera); - Camera GetScaled(REAL s) const; // return a scaled camera + Camera GetScaled(REAL s) const; // return a camera scaled by the given factor + Camera GetScaled(const cv::Size& size, const cv::Size& newSize) const; // return a camera scaled to the given resolution void ComposeP_RC(); // compose P from R and C only void ComposeP(); // compose P from K, R and C diff --git a/libs/MVS/PatchMatchCUDA.cpp b/libs/MVS/PatchMatchCUDA.cpp index 5677f64b5..12e25512d 100644 --- a/libs/MVS/PatchMatchCUDA.cpp +++ b/libs/MVS/PatchMatchCUDA.cpp @@ -1,5 +1,5 @@ /* -* PatchMatch.cpp +* PatchMatchCUDA.cpp * * Copyright (c) 2014-2021 SEACAVE * @@ -261,7 +261,7 @@ void PatchMatch::EstimateDepthMap(DepthData& depthData) Eigen::Map(view.camera.K.val).cast(), Eigen::Map(view.camera.R.val).cast(), Eigen::Map(view.camera.C.ptr()).cast(), - image.rows, image.cols); + image.cols, image.rows); // store camera and image if (i == 0 && (prevNumImages < numImages || images[0].size() != image.size())) { // allocate/reallocate PatchMatch CUDA memory diff --git a/libs/MVS/PatchMatchCUDA.cu b/libs/MVS/PatchMatchCUDA.cu index 4531f8456..f4192ed9a 100644 --- a/libs/MVS/PatchMatchCUDA.cu +++ b/libs/MVS/PatchMatchCUDA.cu @@ -51,11 +51,6 @@ namespace CUDA { #define ImagePixels cudaTextureObject_t #define RandState curandState -// square the given value -__device__ constexpr float Square(float v) { - return v * v; -} - // set/check a bit __device__ constexpr void SetBit(unsigned& input, unsigned i) { input |= (1u << i); @@ -64,21 +59,6 @@ __device__ constexpr int IsBitSet(unsigned input, unsigned i) { return (input >> i) & 1u; } -// swap the given values -__device__ constexpr void Swap(float& v0, float& v1) { - const float tmp = v0; - v0 = v1; - v1 = tmp; -} - -// convert 2d to 1d coordinates and back -__device__ inline int Point2Idx(const Point2i& p, int width) { - return p.y() * width + p.x(); -} -__device__ inline Point2i Idx2Point(int idx, int width) { - return Point2i(idx % width, idx / width); -} - // sort the given values array using bubble sort algorithm __device__ inline void Sort(const float* values, float* sortedValues, int n) { for (int i = 0; i < n; ++i) @@ -565,20 +545,20 @@ __device__ void InitializePixelScore(const ImagePixels *images, const ImagePixel } __global__ void InitializeScore(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const CUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatch::Params params) { - const Point2i p = Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); + const Point2i p = GetThreadIndex2(); InitializePixelScore((const ImagePixels*)textureImages, (const ImagePixels*)textureDepths, cameras, planes, lowDepths, costs, (RandState*)randStates, selectedViews, p, params); } // traverse image in a back/red checkerboard pattern __global__ void BlackPixelProcess(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const CUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatch::Params params, const int iter) { - Point2i p = Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); + Point2i p = GetThreadIndex2(); p.y() = p.y() * 2 + (threadIdx.x % 2 == 0 ? 0 : 1); ProcessPixel((const ImagePixels*)textureImages, (const ImagePixels*)textureDepths, cameras, planes, lowDepths, costs, (RandState*)randStates, selectedViews, p, params, iter); } __global__ void RedPixelProcess(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const CUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatch::Params params, const int iter) { - Point2i p = Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); + Point2i p = GetThreadIndex2(); p.y() = p.y() * 2 + (threadIdx.x % 2 == 0 ? 1 : 0); ProcessPixel((const ImagePixels*)textureImages, (const ImagePixels*)textureDepths, cameras, planes, lowDepths, costs, (RandState*)randStates, selectedViews, p, params, iter); } @@ -586,7 +566,7 @@ __global__ void RedPixelProcess(const cudaTextureObject_t* textureImages, const // filter depth/normals __global__ void FilterPlanes(Point4* planes, float* costs, unsigned* selectedViews, int width, int height, const PatchMatch::Params params) { - const Point2i p = Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); + const Point2i p = GetThreadIndex2(); if (p.x() >= width || p.y() >= height) return; const int idx = Point2Idx(p, width); diff --git a/libs/MVS/PatchMatchCUDA.h b/libs/MVS/PatchMatchCUDA.h index 22680c90d..8497e6ebd 100644 --- a/libs/MVS/PatchMatchCUDA.h +++ b/libs/MVS/PatchMatchCUDA.h @@ -38,10 +38,7 @@ // I N C L U D E S ///////////////////////////////////////////////// #include "SceneDensify.h" -#pragma push_macro("EIGEN_DEFAULT_DENSE_INDEX_TYPE") -#undef EIGEN_DEFAULT_DENSE_INDEX_TYPE #include "PatchMatchCUDA.inl" -#pragma pop_macro("EIGEN_DEFAULT_DENSE_INDEX_TYPE") // D E F I N E S /////////////////////////////////////////////////// diff --git a/libs/MVS/PatchMatchCUDA.inl b/libs/MVS/PatchMatchCUDA.inl index c1db82ab9..a769ba6b7 100644 --- a/libs/MVS/PatchMatchCUDA.inl +++ b/libs/MVS/PatchMatchCUDA.inl @@ -35,7 +35,7 @@ // I N C L U D E S ///////////////////////////////////////////////// -#include "CameraCUDA.h" +#include "CUDA/Camera.h" // OpenCV #include