From eed838624e7815fe34c1aa13e769e75cb5988745 Mon Sep 17 00:00:00 2001 From: nilspin Date: Fri, 29 Mar 2019 17:35:05 +0530 Subject: [PATCH] Remove thrust everywhere. Only use it inside .cu files. Still no build. --- CameraTracking.cpp | 32 +++++++++++---------- CameraTracking.h | 19 ++++++------ CameraTrackingUtils.cu | 21 +++++++------- Solver.cpp | 65 ++++++++++++++++++++++++++++-------------- Solver.cu | 17 ++++++----- Solver.h | 26 ++++++++++------- 6 files changed, 106 insertions(+), 74 deletions(-) diff --git a/CameraTracking.cpp b/CameraTracking.cpp index 59add19..28d310e 100644 --- a/CameraTracking.cpp +++ b/CameraTracking.cpp @@ -7,15 +7,12 @@ #include #include #include -#include +//#include #include "CameraTracking.h" #include "termcolor.hpp" #include "DebugHelper.hpp" -extern "C" float computeCorrespondences(const float4* d_input, const float4* d_target, - const float4* d_targetNormals, thrust::device_vector& corres, - thrust::device_vector& corresNormals,thrust::device_vector& residual, - const float4x4 deltaTransform, const int width, const int height); +extern "C" float computeCorrespondences(const float4* d_input, const float4* d_target, const float4* d_targetNormals, float4* corres, float4* corresNormals, float* residual, const float4x4 deltaTransform, const int width, const int height); extern "C" bool SetCameraIntrinsic(const float* intrinsic, const float* invIntrinsic); //Takes device pointers, calculates correct position and normals @@ -44,9 +41,10 @@ void CameraTracking::Align(float4* d_input, float4* d_inputNormals, float4* d_ta float4x4 deltaT = float4x4(deltaTransform.data()); //Clear previous data - thrust::fill(d_correspondences.begin(), d_correspondences.end(), make_float4(0)); - thrust::fill(d_correspondenceNormals.begin(), d_correspondenceNormals.end(), make_float4(0)); - thrust::fill(d_residuals.begin(), d_residuals.end(), (float)0.0f); + //TODO All move all this to .cu file + //thrust::fill(d_correspondences.begin(), d_correspondences.end(), make_float4(0)); + //thrust::fill(d_correspondenceNormals.begin(), d_correspondenceNormals.end(), make_float4(0)); + //thrust::fill(d_residuals.begin(), d_residuals.end(), (float)0.0f); //We now have all data we need. find correspondence. @@ -114,11 +112,14 @@ Eigen::Matrix4f CameraTracking::rigidAlignment(const float4* d_input, const floa CameraTracking::CameraTracking(int w, int h):width(w),height(h) { - //const int ARRAY_SIZE = width*height*sizeof(CoordPair); - //checkCudaErrors(cudaMalloc((void**)&d_correspondence, ARRAY_SIZE)); - //checkCudaErrors(cudaMemset(d_correspondence, 0, ARRAY_SIZE)); - //checkCudaErrors(cudaMalloc((void**)&d_correspondenceNormals, ARRAY_SIZE)); - //checkCudaErrors(cudaMemset(d_correspondenceNormals, 0, ARRAY_SIZE)); + const int F4_ARRAY_SIZE = width*height*sizeof(float4); + const int ARRAY_SIZE = width*height*sizeof(float); + checkCudaErrors(cudaMalloc((void**)&d_correspondences, F4_ARRAY_SIZE)); + checkCudaErrors(cudaMemset(d_correspondences, 0, F4_ARRAY_SIZE)); + checkCudaErrors(cudaMalloc((void**)&d_correspondenceNormals, F4_ARRAY_SIZE)); + checkCudaErrors(cudaMemset(d_correspondenceNormals, 0, F4_ARRAY_SIZE)); + checkCudaErrors(cudaMalloc((void**)&d_residuals, ARRAY_SIZE)); + checkCudaErrors(cudaMemset(d_residuals, 0, ARRAY_SIZE)); float arr[16] = { 1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1 }; deltaTransform = Matrix4x4f(arr); const float intrinsics[] = {525.0, 0, 319.5, 0, 525.0, 239.5, 0, 0, 1}; //TODO: read from file @@ -132,8 +133,9 @@ CameraTracking::CameraTracking(int w, int h):width(w),height(h) CameraTracking::~CameraTracking() { - //checkCudaErrors(cudaFree(d_correspondence)); - //checkCudaErrors(cudaFree(d_correspondenceNormals)); + checkCudaErrors(cudaFree(d_correspondences)); + checkCudaErrors(cudaFree(d_correspondenceNormals)); + checkCudaErrors(cudaFree(d_residuals)); } diff --git a/CameraTracking.h b/CameraTracking.h index fbc0146..2c3a220 100644 --- a/CameraTracking.h +++ b/CameraTracking.h @@ -10,8 +10,8 @@ #endif #include -#include -#include +//#include +//#include //This is a simple vector library. Use this with CUDA instead of GLM. #include "cuda_helper/cuda_SimpleMatrixUtil.h" @@ -28,8 +28,8 @@ // int dummy = -2; //padding //}; -using thrust::device_vector; -using thrust::device_ptr; +//using thrust::device_vector; +//using thrust::device_ptr; class CameraTracking { @@ -38,12 +38,13 @@ class CameraTracking { //LinearSystem linearSystem; Solver solver; int maxIters = 20; - //float4* d_correspondenceNormals; - //float4* d_correspondence; + float4* d_correspondenceNormals; + float4* d_correspondences; + float* d_residuals; //thrust::device_vector d_coordPair; - device_vector d_correspondences; - device_vector d_correspondenceNormals; - device_vector d_residuals; + //device_vector d_correspondences; + //device_vector d_correspondenceNormals; + //device_vector d_residuals; Matrix4x4f deltaTransform; float globalCorrespondenceError = 0.0f; Matrix4x4f delinearizeTransformation(const Vector6f & sol); diff --git a/CameraTrackingUtils.cu b/CameraTrackingUtils.cu index be2926d..1407a73 100644 --- a/CameraTrackingUtils.cu +++ b/CameraTrackingUtils.cu @@ -9,8 +9,8 @@ #include #include "cuda_helper/helper_cuda.h" #include "cuda_helper/helper_math.h" -#include -#include +//#include +//#include #include #include "common.h" @@ -34,8 +34,8 @@ dim3 blocks = dim3(20, 15, 1); dim3 threads = dim3(32, 32, 1); -using thrust::device_vector; -using thrust::device_ptr; +//using thrust::device_vector; +//using thrust::device_ptr; __device__ __constant__ float3x3 K; //Camera intrinsic matrix __device__ __constant__ float3x3 K_inv; @@ -175,19 +175,20 @@ void FindCorrespondences(const float4* input, const float4* target, } extern "C" float computeCorrespondences(const float4* d_input, const float4* d_target, - const float4* d_targetNormals, device_vector& corres, - device_vector& corresNormals, device_vector& residuals, + const float4* d_targetNormals, float4* corres, + float4* corresNormals, float* residuals, const float4x4 deltaTransform, const int width, const int height) { //First clear the previous correspondence calculation checkCudaErrors(cudaMemcpyToSymbol(globalError, &idealError, sizeof(float))); - float4* d_correspondences = thrust::raw_pointer_cast(&corres[0]); - float4* d_corresNormals = thrust::raw_pointer_cast(&corresNormals[0]); - float* d_residuals = thrust::raw_pointer_cast(&residuals[0]); + //TODO All move all this to .cu file + thrust::fill(corres, corres + (width*height), float4{0}); + thrust::fill(corresNormals, corresNormals+ (width*height), float4{0}); + thrust::fill(residuals, residuals+ (width*height), (float)0.0f); FindCorrespondences <<>>(d_input, d_target, d_targetNormals, - d_correspondences, d_corresNormals, d_residuals, deltaTransform, distThres, normalThres, width, height); + corres, corresNormals, residuals, deltaTransform, distThres, normalThres, width, height); float globalErrorReadback = 0.0; checkCudaErrors(cudaMemcpyFromSymbol(&globalErrorReadback, globalError, sizeof(float))); diff --git a/Solver.cpp b/Solver.cpp index 01185b0..322cd32 100755 --- a/Solver.cpp +++ b/Solver.cpp @@ -1,14 +1,14 @@ #include "Solver.h" #include "termcolor.hpp" #include -#include -#include +//#include +//#include float alpha = 1.0; float beta = 0.0f; extern "C" -void CalculateJacobiansAndResiduals(const float4* d_input, const device_vector& corres, const device_vector& d_corresNormals, device_vector& d_Jac); +void CalculateJacobiansAndResiduals(const float4* d_input, const float4* corres, const float4* d_corresNormals, const float* d_Jac, const float* d_res); //inline //float calculate_B(const vec3& n, const vec3& d, const vec3& s) { // glm::vec3 p = vec3(d - s); @@ -46,16 +46,16 @@ void Solver::PrintSystem() { // JacMat.row(index) << n.x, n.y, n.z, T.x, T.y, T.z ; //} -void Solver::BuildLinearSystem(const float4* d_input, const device_vector& d_correspondences, const device_vector& d_correspondenceNormals, const device_vector& d_residuals, int width, int height) { +void Solver::BuildLinearSystem(const float4* d_input, const float4* d_correspondences, const float4* d_correspondenceNormals, const float* d_residuals, int width, int height) { - d_residual_ptr = thrust::raw_pointer_cast(&d_residuals[0]); + //d_residual = thrust::raw_pointer_cast(&d_residuals[0]); numCorrPairs = width*height; //corrImageCoords.size(); //d_Jac.resize(numCorrPairs*6); //Do we need to resize at runtime? //d_residual.resize(numCorrPairs); - thrust::fill(d_Jac.begin(), d_Jac.end(), 0); + //thrust::fill(d_Jac.begin(), d_Jac.end(), 0); //thrust::fill(d_residual.begin(), d_residual.end(), 0); - thrust::fill(d_JTJ.begin(), d_JTJ.end(), 0); - thrust::fill(d_JTr.begin(), d_JTr.end(), 0); + //thrust::fill(d_JTJ.begin(), d_JTJ.end(), 0); + //thrust::fill(d_JTr.begin(), d_JTr.end(), 0); //Jac = MatrixXf(numCorrPairs,6); //residual = VectorXf(numCorrPairs); //PrintMatrixDims(Jac, std::string("Jac")); @@ -68,22 +68,26 @@ void Solver::BuildLinearSystem(const float4* d_input, const device_vector #include "Solver.h" #include "cuda_helper/helper_math.h" +#include #define numCols 640 #define numRows 480 @@ -9,8 +10,8 @@ dim3 blocks = dim3(20, 15, 1); dim3 threads = dim3(32, 32, 1); -using FloatVec = thrust::device_vector; -using Float4Vec = thrust::device_vector; +//using FloatVec = thrust::device_vector; +//using Float4Vec = thrust::device_vector; //using CorrPairVec = thrust::device_vector; @@ -51,16 +52,18 @@ void CalculateJacAndResKernel(const float4* d_src, const float4* d_dest, const f //__device__ inline //void CalculateJTJ -extern "C" void CalculateJacobiansAndResidual(const float4* d_src, Float4Vec targ, Float4Vec targNormals, FloatVec Jac) { +extern "C" void CalculateJacobiansAndResidual(const float4* d_src, const float4* d_targ, const float4* d_targNormals, + float* d_Jac) { //First calculate Jacobian and Residual matrices - float4* d_targ = thrust::raw_pointer_cast(&targ[0]); - float4* d_targNormals = thrust::raw_pointer_cast(&targNormals[0]); - float* d_jacobianMatrix = thrust::raw_pointer_cast(&Jac[0]); + //float4* d_targ = thrust::raw_pointer_cast(&targ[0]); + //float4* d_targNormals = thrust::raw_pointer_cast(&targNormals[0]); + //float* d_jacobianMatrix = thrust::raw_pointer_cast(&Jac[0]); //float* d_resVector = thrust::raw_pointer_cast(&residual[0]); //float* d_jtj = thrust::raw_pointer_cast(&JTJ[0]); //float* d_jtr = thrust::raw_pointer_cast(&JTr[0]); - CalculateJacAndResKernel<<>>(d_src, d_targ, d_targNormals, d_jacobianMatrix); + thrust::fill(d_Jac, d_Jac+(numCols*numRows*6), 0); //TODO - is this redundant? + CalculateJacAndResKernel<<>>(d_src, d_targ, d_targNormals, d_Jac); //Then calculate Matrix-vector JTr and Matrix-matrix JTJ products } diff --git a/Solver.h b/Solver.h index e0f39d3..4642054 100755 --- a/Solver.h +++ b/Solver.h @@ -7,17 +7,17 @@ #include "cuda_helper/helper_cuda.h" #include #include -#include -#include +//#include +//#include using namespace Eigen; -using thrust::device_vector; +//using thrust::device_vector; class Solver { public: uint numIters = 10; - void BuildLinearSystem(const float4* , const device_vector& , const device_vector& , const device_vector& , int , int ); + void BuildLinearSystem(const float4* , const float4* , const float4* , const float* , int , int ); void PrintSystem(); @@ -29,6 +29,11 @@ class Solver { Matrix4x4f getTransform() {return SE3Exp(estimate);}; double getError() {return TotalError;}; private: + int JAC_SIZE; + int RES_SIZE; + int JTJ_SIZE; + int JTr_SIZE; + const int num_vars_in_jac = 6; Vector6f update, estimate; //Will hold solution bool solution_exists = false; //Matrix4x4f deltaT; //intermediate estimated transform @@ -47,14 +52,13 @@ class Solver { cudaError_t cudaStat; cublasStatus_t stat; cublasHandle_t handle; - thrust::device_vector d_Jac; //Computed on device + //thrust::device_vector d_Jac; //Computed on device //thrust::device_vector d_residual; //Computed on device - thrust::device_vector d_JTr; //then multiplied on device - thrust::device_vector d_JTJ; //finally this is computed - float* d_Jac_ptr = nullptr; - float* d_residual_ptr = nullptr; - float* d_JTr_ptr = nullptr; - float* d_JTJ_ptr = nullptr; + //thrust::device_vector d_JTr; //then multiplied on device + //thrust::device_vector d_JTJ; //finally this is computed + float* d_Jac = nullptr; + float* d_JTr = nullptr; + float* d_JTJ = nullptr; };