diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ddff224..e0455c4b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.16) project(rmagine LANGUAGES CXX C - VERSION 2.2.5)# TODO update this version when merging into main-branch + VERSION 2.2.6)# TODO update this version when merging into main-branch option(BUILD_TOOLS "Build tools" ON) option(BUILD_TESTS "Build tests" ON) @@ -114,6 +114,9 @@ endif(OPENMP_FOUND) ######################################## ## Optional Deps +# for ouster config loading +find_package(jsoncpp) + ###################################### ## CUDA: For Optix ## ###################################### @@ -178,6 +181,11 @@ set(RMAGINE_STATIC_LIBRARIES) ### RMAGINE CORE LIB add_subdirectory(src/rmagine_core) +### RMAGINE OUSTER LIB +if(jsoncpp_FOUND) + add_subdirectory(src/rmagine_ouster) +endif(jsoncpp_FOUND) + ### RMAGINE EMBREE LIB if(embree_FOUND) add_subdirectory(src/rmagine_embree) @@ -193,7 +201,7 @@ endif(CUDA_FOUND) message(STATUS "${BoldCyan}Components build:${ColourReset}") foreach(LIBRARY ${RMAGINE_LIBRARIES}) - message(STATUS "- ${BoldGreen}${LIBRARY}${ColourReset}") + message(STATUS "- ${BoldGreen}${LIBRARY}${ColourReset}") endforeach() #### TESTS diff --git a/README.md b/README.md index 9c89873f..0eb836bc 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ Rmagine Issues   •   Examples +   •   + Viewer
@@ -116,7 +118,7 @@ More detailed examples explaining each step and how to customize it to your need Please reference the following papers when using the Rmagine library in your scientific work. -```latex +```bib @inproceedings{mock2023rmagine, title={{Rmagine: 3D Range Sensor Simulation in Polygonal Maps via Ray Tracing for Embedded Hardware on Mobile Robots}}, author={Mock, Alexander and Wiemann, Thomas and Hertzberg, Joachim}, @@ -127,6 +129,7 @@ Please reference the following papers when using the Rmagine library in your sci ``` ## Rmagine-accelerated Applications +- [rmagine_viewer](https://github.com/amock/rmagine_viewer) - [rmagine_gazebo_plugins](https://github.com/uos/rmagine_gazebo_plugins) - [RMCL](https://github.com/uos/rmcl) - [radarays_ros](https://github.com/uos/radarays_ros) diff --git a/apps/rmagine_benchmark/Main.cpp b/apps/rmagine_benchmark/Main.cpp index 3cfc533b..91e8359f 100644 --- a/apps/rmagine_benchmark/Main.cpp +++ b/apps/rmagine_benchmark/Main.cpp @@ -116,7 +116,7 @@ int main(int argc, char** argv) // Define what to simulate double velos_per_second_mean = 0.0; - std::cout << "- range of last ray: " << cpu_sim->simulateRanges(Tbm)[Tbm.size() * model->phi.size * model->theta.size - 1] << std::endl; + // std::cout << "- range of last ray: " << cpu_sim->simulateRanges(Tbm)[Tbm.size() * model->phi.size * model->theta.size - 1] << std::endl; std::cout << "-- Starting Benchmark --" << std::endl; // predefine result buffer diff --git a/dat/ouster_config/os0-128x1024x20-meta.json b/dat/ouster_config/os0-128x1024x20-meta.json new file mode 100644 index 00000000..60026a7e --- /dev/null +++ b/dat/ouster_config/os0-128x1024x20-meta.json @@ -0,0 +1,461 @@ +{ + "beam_altitude_angles": + [ + 45.31, + 44.31, + 43.58, + 43.16, + 42.31, + 41.32, + 40.61, + 40.19, + 39.32, + 38.35, + 37.66, + 37.22, + 36.36, + 35.42, + 34.73, + 34.28, + 33.4, + 32.48, + 31.81, + 31.34, + 30.47, + 29.58, + 28.9, + 28.44, + 27.55, + 26.68, + 26.01, + 25.53, + 24.64, + 23.8, + 23.15, + 22.64, + 21.76, + 20.93, + 20.28, + 19.77, + 18.88, + 18.09, + 17.44, + 16.91, + 16.01, + 15.25, + 14.61, + 14.06, + 13.16, + 12.42, + 11.78, + 11.25, + 10.32, + 9.61, + 8.97, + 8.4, + 7.49, + 6.81, + 6.18, + 5.58, + 4.66, + 4.01, + 3.38, + 2.76, + 1.84, + 1.2, + 0.57, + -0.06, + -0.98, + -1.6, + -2.22, + -2.89, + -3.8, + -4.4, + -5.03, + -5.7, + -6.62, + -7.2, + -7.83, + -8.52, + -9.43, + -10, + -10.65, + -11.37, + -12.27, + -12.83, + -13.47, + -14.21, + -15.12, + -15.65, + -16.3, + -17.06, + -17.97, + -18.47, + -19.12, + -19.91, + -20.83, + -21.32, + -21.98, + -22.78, + -23.71, + -24.18, + -24.83, + -25.67, + -26.59, + -27.05, + -27.7, + -28.56, + -29.48, + -29.92, + -30.6, + -31.48, + -32.4, + -32.84, + -33.52, + -34.4, + -35.32, + -35.76, + -36.44, + -37.36, + -38.28, + -38.71, + -39.4, + -40.36, + -41.27, + -41.69, + -42.38, + -43.36, + -44.29, + -44.69, + -45.4, + -46.42 + ], + "beam_azimuth_angles": + [ + 11.58, + 4.26, + -2.91, + -9.95, + 11.11, + 4.08, + -2.85, + -9.65, + 10.69, + 3.9, + -2.8, + -9.39, + 10.34, + 3.75, + -2.75, + -9.18, + 10.03, + 3.62, + -2.72, + -8.99, + 9.77, + 3.5, + -2.71, + -8.84, + 9.53, + 3.4, + -2.68, + -8.72, + 9.32, + 3.3, + -2.68, + -8.6, + 9.14, + 3.21, + -2.69, + -8.52, + 8.99, + 3.13, + -2.69, + -8.45, + 8.84, + 3.06, + -2.69, + -8.41, + 8.73, + 2.99, + -2.7, + -8.37, + 8.63, + 2.94, + -2.73, + -8.36, + 8.54, + 2.9, + -2.75, + -8.35, + 8.48, + 2.85, + -2.77, + -8.37, + 8.43, + 2.83, + -2.81, + -8.4, + 8.4, + 2.78, + -2.84, + -8.43, + 8.38, + 2.76, + -2.87, + -8.51, + 8.37, + 2.72, + -2.91, + -8.57, + 8.38, + 2.72, + -2.96, + -8.65, + 8.4, + 2.7, + -3.03, + -8.76, + 8.45, + 2.7, + -3.08, + -8.87, + 8.49, + 2.69, + -3.15, + -9.01, + 8.57, + 2.69, + -3.22, + -9.18, + 8.67, + 2.7, + -3.31, + -9.36, + 8.79, + 2.71, + -3.41, + -9.58, + 8.92, + 2.73, + -3.52, + -9.81, + 9.09, + 2.77, + -3.63, + -10.1, + 9.27, + 2.79, + -3.78, + -10.42, + 9.5, + 2.84, + -3.93, + -10.8, + 9.78, + 2.9, + -4.11, + -11.23, + 10.11, + 2.95, + -4.34, + -11.75 + ], + "build_date": "2022-09-21T17:47:45Z", + "build_rev": "v2.4.0", + "client_version": "ouster_client 0.7.1", + "data_format": + { + "column_window": + [ + 0, + 1023 + ], + "columns_per_frame": 1024, + "columns_per_packet": 16, + "pixel_shift_by_row": + [ + 33, + 12, + -8, + -28, + 32, + 12, + -8, + -27, + 30, + 11, + -8, + -27, + 29, + 11, + -8, + -26, + 29, + 10, + -8, + -26, + 28, + 10, + -8, + -25, + 27, + 10, + -8, + -25, + 27, + 9, + -8, + -24, + 26, + 9, + -8, + -24, + 26, + 9, + -8, + -24, + 25, + 9, + -8, + -24, + 25, + 9, + -8, + -24, + 25, + 8, + -8, + -24, + 24, + 8, + -8, + -24, + 24, + 8, + -8, + -24, + 24, + 8, + -8, + -24, + 24, + 8, + -8, + -24, + 24, + 8, + -8, + -24, + 24, + 8, + -8, + -24, + 24, + 8, + -8, + -25, + 24, + 8, + -9, + -25, + 24, + 8, + -9, + -25, + 24, + 8, + -9, + -26, + 24, + 8, + -9, + -26, + 25, + 8, + -9, + -27, + 25, + 8, + -10, + -27, + 25, + 8, + -10, + -28, + 26, + 8, + -10, + -29, + 26, + 8, + -11, + -30, + 27, + 8, + -11, + -31, + 28, + 8, + -12, + -32, + 29, + 8, + -12, + -33 + ], + "pixels_per_column": 128, + "udp_profile_imu": "LEGACY", + "udp_profile_lidar": "LEGACY" + }, + "hostname": "", + "image_rev": "ousteros-image-prod-aries-v2.4.0+20220921174636", + "imu_to_sensor_transform": + [ + 1, + 0, + 0, + 6.253, + 0, + 1, + 0, + -11.775, + 0, + 0, + 1, + 7.645, + 0, + 0, + 0, + 1 + ], + "initialization_id": 7109749, + "json_calibration_version": 4, + "lidar_mode": "1024x20", + "lidar_origin_to_beam_origin_mm": 27.67, + "lidar_to_sensor_transform": + [ + -1, + 0, + 0, + 0, + 0, + -1, + 0, + 0, + 0, + 0, + 1, + 36.18, + 0, + 0, + 0, + 1 + ], + "prod_line": "OS-0-128", + "prod_pn": "840-103574-06", + "prod_sn": "122219001184", + "status": "RUNNING", + "udp_port_imu": 55235, + "udp_port_lidar": 52031 +} diff --git a/src/rmagine_core/CMakeLists.txt b/src/rmagine_core/CMakeLists.txt index 9bcfe03d..9ab8b04b 100644 --- a/src/rmagine_core/CMakeLists.txt +++ b/src/rmagine_core/CMakeLists.txt @@ -12,6 +12,7 @@ set(RMAGINE_CORE_SRCS src/types/Memory.cpp src/types/conversions.cpp src/types/sensors.cpp + src/types/mesh_types.cpp # Util src/util/synthetic.cpp src/util/assimp/helper.cpp diff --git a/src/rmagine_core/include/rmagine/math/definitions.h b/src/rmagine_core/include/rmagine/math/definitions.h deleted file mode 100644 index 6b585dc1..00000000 --- a/src/rmagine_core/include/rmagine/math/definitions.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef RMAGINE_MATH_DEFINITIONS_OLD_H -#define RMAGINE_MATH_DEFINITIONS_OLD_H - -#include -#include -#include - -namespace rmagine -{ - -#define __UINT_MAX__ (__INT_MAX__ * 2U + 1U) - -#define DEG_TO_RAD 0.017453292519943295 -#define DEG_TO_RAD_F 0.017453292519943295f -#define RAD_TO_DEG 57.29577951308232 -#define RAD_TO_DEG_F 57.29577951308232f - -// Forward declarations -struct Vector2; -struct Vector3; -struct EulerAngles; -struct Quaternion; -struct Transform; -struct Matrix3x3; -struct Matrix4x4; -struct AABB; - -} // namespace rmagine - -#endif // RMAGINE_MATH_DEFINITIONS_H \ No newline at end of file diff --git a/src/rmagine_core/include/rmagine/math/linalg.h b/src/rmagine_core/include/rmagine/math/linalg.h index 5a6b60b8..58bd0a35 100644 --- a/src/rmagine_core/include/rmagine/math/linalg.h +++ b/src/rmagine_core/include/rmagine/math/linalg.h @@ -41,6 +41,7 @@ #define RMAGINE_MATH_LINALG_H #include "types.h" +#include namespace rmagine { @@ -53,8 +54,10 @@ namespace rmagine * @param scale scale vector * @return Matrix4x4 composed 4x4 transformation matrix */ +RMAGINE_FUNCTION Matrix4x4 compose(const Transform& T, const Vector3& scale); +RMAGINE_FUNCTION Matrix4x4 compose(const Transform& T, const Matrix3x3& S); /** @@ -65,6 +68,7 @@ Matrix4x4 compose(const Transform& T, const Matrix3x3& S); * @param T transform object consisting of translational and rotational parts * @param S 3x3 scale matrix */ +RMAGINE_FUNCTION void decompose(const Matrix4x4& M, Transform& T, Matrix3x3& S); /** @@ -75,6 +79,7 @@ void decompose(const Matrix4x4& M, Transform& T, Matrix3x3& S); * @param T transform object consisting of translational and rotational parts * @param scale scale vector */ +RMAGINE_FUNCTION void decompose(const Matrix4x4& M, Transform& T, Vector3& scale); /** @@ -86,6 +91,7 @@ void decompose(const Matrix4x4& M, Transform& T, Vector3& scale); * - if fac=1.0 it is exactly B * - if fac=2.0 it it goes on a (B-A) length from B (extrapolation) */ +RMAGINE_FUNCTION Quaternion polate(const Quaternion& A, const Quaternion& B, float fac); /** @@ -97,8 +103,69 @@ Quaternion polate(const Quaternion& A, const Quaternion& B, float fac); * - if fac=1.0 it is exactly B * - if fac=2.0 it it goes on a (B-A) length from B (extrapolation) */ +RMAGINE_FUNCTION Transform polate(const Transform& A, const Transform& B, float fac); + +// Numerical Recipes +// M = MatrixT::rows() +// N = MatrixT::cols() +// +// Warning: Numerical Recipes has different SVD matrix shapes +// than Wikipedia +template +struct svd_dims { + using U = MatrixT; // same as input + using w = Matrix_; + using W = Matrix_; + using V = Matrix_; +}; + +/** + * @brief own SVD implementation. + * Why use it? + * - ~2x faster than Eigen + * - SOON: Works insided of CUDA kernels + * + */ +template +void svd( + const Matrix_& A, + Matrix_& U, + Matrix_& W, // matrix + Matrix_& V +); + +template +void svd( + const Matrix_& A, + Matrix_& U, + Matrix_& w, // vector version (Cols should be something with max) + Matrix_& V +); + +/** + * @brief SVD that can be used for both CPU and GPU (Cuda kernels) + * + */ +RMAGINE_FUNCTION +void svd( + const Matrix3x3& A, + Matrix3x3& U, + Matrix3x3& W, + Matrix3x3& V +); + +RMAGINE_FUNCTION +void svd( + const Matrix3x3& A, + Matrix3x3& U, + Vector3& w, + Matrix3x3& V +); + } // namespace rmagine +#include "linalg.tcc" + #endif // RMAGINE_MATH_MATH_LINALG_H \ No newline at end of file diff --git a/src/rmagine_core/include/rmagine/math/linalg.tcc b/src/rmagine_core/include/rmagine/math/linalg.tcc new file mode 100644 index 00000000..ef7a3762 --- /dev/null +++ b/src/rmagine_core/include/rmagine/math/linalg.tcc @@ -0,0 +1,582 @@ +#include +#include + +namespace rmagine +{ + +template +void svd( + const Matrix_& a, + Matrix_& u, + Matrix_& w, + Matrix_& v) +{ + constexpr unsigned int m = Rows; + constexpr unsigned int n = Cols; + + constexpr unsigned int max_iterations = 30; + + // extra memory required + bool flag; + int i, its, j, jj, k, l, nm; + float anorm, c, f, g, h, s, scale, x, y, z; + DataT rv1[n]; + + g = scale = anorm = 0.0; + const float eps = std::numeric_limits::epsilon(); + u = a; + + for(i=0; i < n; i++) + { + l = i + 2; + rv1[i] = scale * g; + g = s = scale = 0.0; + if(i < m) + { + for(k=i; k=0; i--) + { + if(i < n-1) + { + if(g != 0.0) + { + for(j=l; j=0; i--) + { + l = i+1; + g = w(i, i); + for(j=l;j=0; k--) + { + for(its=0; its=0; l--) + { + nm=l-1; + if (l == 0 || abs(rv1[l]) <= eps*anorm) { + flag=false; + break; + } + if (abs(w(nm, nm)) <= eps*anorm) + { + break; + } + } + if(flag) + { + c=0.0; + s=1.0; + for(i=l; i +void svd( + const Matrix_& a, + Matrix_& u, + Matrix_& w, // vector version + Matrix_& v) +{ + constexpr unsigned int m = Rows; + constexpr unsigned int n = Cols; + constexpr unsigned int max_iterations = 30; + + // additional memory required + bool flag; + int i, its, j, jj, k, l, nm; + float anorm, c, f, g, h, s, scale, x, y, z; + DataT rv1[n]; + + g = scale = anorm = 0.0; + float eps = std::numeric_limits::epsilon(); + u = a; + + for(i=0; i < n; i++) + { + l = i+2; + rv1[i] = scale*g; + g = s = scale = 0.0; + if(i < m) + { + for(k=i; k=0; i--) + { + if(i < n-1) + { + if(g != 0.0) + { + for(j=l; j=0; i--) + { + l = i+1; + g = w(i, 0); + for(j=l;j=0; k--) + { + for(its=0; its=0; l--) + { + nm=l-1; + if (l == 0 || abs(rv1[l]) <= eps*anorm) { + flag=false; + break; + } + if (abs(w(nm, 0)) <= eps*anorm) + { + break; + } + } + if(flag) + { + c=0.0; + s=1.0; + for(i=l; i +RMAGINE_INLINE_FUNCTION +T SQR(const T a) {return a*a;} + +template +RMAGINE_INLINE_FUNCTION +const T &MAX(const T &a, const T &b) + {return b > a ? (b) : (a);} + +RMAGINE_INLINE_FUNCTION +float MAX(const double &a, const float &b) + {return b > a ? (b) : float(a);} + +RMAGINE_INLINE_FUNCTION +float MAX(const float &a, const double &b) + {return b > a ? float(b) : (a);} + +template +RMAGINE_INLINE_FUNCTION +const T &MIN(const T &a, const T &b) + {return b < a ? (b) : (a);} + +RMAGINE_INLINE_FUNCTION +float MIN(const double &a, const float &b) + {return b < a ? (b) : float(a);} + +RMAGINE_INLINE_FUNCTION +float MIN(const float &a, const double &b) + {return b < a ? float(b) : (a);} + +template +RMAGINE_INLINE_FUNCTION +T SIGN(const T &a, const T &b) + {return b >= 0 ? (a >= 0 ? a : -a) : (a >= 0 ? -a : a);} + +RMAGINE_INLINE_FUNCTION +float SIGN(const float &a, const double &b) + {return b >= 0 ? (a >= 0 ? a : -a) : (a >= 0 ? -a : a);} + +RMAGINE_INLINE_FUNCTION +float SIGN(const double &a, const float &b) + {return (float)(b >= 0 ? (a >= 0 ? a : -a) : (a >= 0 ? -a : a));} + +template +RMAGINE_INLINE_FUNCTION +void SWAP(T &a, T &b) + {T dum=a; a=b; b=dum;} + +template +RMAGINE_INLINE_FUNCTION +T PYTHAG(const T a, const T b) +{ + T absa = abs(a); + T absb = abs(b); + return (absa > absb ? absa * sqrt(1.0+SQR(absb/absa)) : + (absb == 0.0 ? 0.0 : absb * sqrt(1.0+SQR(absa/absb)))); +} + template Vector3_ min(const Vector3_& a, const Vector3_& b); @@ -56,7 +117,6 @@ Vector3_ min(const Vector3_& a, const Vector3_& b); template Vector3_ max(const Vector3_& a, const Vector3_& b); - ///////////// // #multNxN //////// @@ -371,6 +431,29 @@ Memory cov( const MemoryView& v2 ); +/** + * @brief decompose A = UWV* using singular value decomposition + */ +void svd( + const MemoryView& As, + MemoryView& Us, + MemoryView& Ws, + MemoryView& Vs +); + +/** + * @brief decompose A = UWV* using singular value decomposition + * + * w is a vector which is the diagonal of matrix W + */ +void svd( + const MemoryView& As, + MemoryView& Us, + MemoryView& ws, + MemoryView& Vs +); + + } // namespace rmagine #include "math.tcc" diff --git a/src/rmagine_core/include/rmagine/math/types/Matrix.hpp b/src/rmagine_core/include/rmagine/math/types/Matrix.hpp index 46cdc561..22e824b5 100644 --- a/src/rmagine_core/include/rmagine/math/types/Matrix.hpp +++ b/src/rmagine_core/include/rmagine/math/types/Matrix.hpp @@ -333,6 +333,17 @@ struct Matrix_ { RMAGINE_INLINE_FUNCTION operator EulerAngles_() const; + /** + * @brief Transformation Matrix -> Transform + * WARNING: The matrix has to be isometric, i.e. composed only of + * rotational and translational components. If it has e.g. scalar + * components use the "decompose" function instead + * + * @return Transform_ + */ + RMAGINE_INLINE_FUNCTION + operator Transform_() const; + /** * @brief Data Type Cast to ConvT * diff --git a/src/rmagine_core/include/rmagine/math/types/Matrix.tcc b/src/rmagine_core/include/rmagine/math/types/Matrix.tcc index 5973ce91..5fe5271e 100644 --- a/src/rmagine_core/include/rmagine/math/types/Matrix.tcc +++ b/src/rmagine_core/include/rmagine/math/types/Matrix.tcc @@ -1256,7 +1256,7 @@ Matrix_::operator EulerAngles_() const // pitch (y-axis) if (fabs(sB) >= 1.0) { - e.pitch = copysignf(M_PI / 2, sB); // use 90 degrees if out of range + e.pitch = copysignf(M_PI_2, sB); // use 90 degrees if out of range } else { e.pitch = asinf(sB); } @@ -1267,6 +1267,18 @@ Matrix_::operator EulerAngles_() const return e; } +template +RMAGINE_INLINE_FUNCTION +Matrix_::operator Transform_() const +{ + static_assert(Rows == 4 && Cols == 4); + + Transform_ T; + T.set(*this); + return T; +} + + template template diff --git a/src/rmagine_core/include/rmagine/math/types/Quaternion.tcc b/src/rmagine_core/include/rmagine/math/types/Quaternion.tcc index a3742652..b4dac7da 100644 --- a/src/rmagine_core/include/rmagine/math/types/Quaternion.tcc +++ b/src/rmagine_core/include/rmagine/math/types/Quaternion.tcc @@ -182,8 +182,6 @@ template RMAGINE_INLINE_FUNCTION Quaternion_::operator EulerAngles_() const { - constexpr DataT PI_HALF = M_PI / 2.0; - // TODO: check // https://en.wikipedia.org/wiki/Conversion_between_quaternions_and_Euler_angles // checked once @@ -215,7 +213,7 @@ Quaternion_::operator EulerAngles_() const // pitch (y-axis) if (fabs(sinp) >= 1.0f) { - e.pitch = copysign(PI_HALF, sinp); // use 90 degrees if out of range + e.pitch = copysign(M_PI_2, sinp); // use 90 degrees if out of range } else { e.pitch = asin(sinp); } diff --git a/src/rmagine_core/include/rmagine/math/types/Transform.hpp b/src/rmagine_core/include/rmagine/math/types/Transform.hpp index 2ad90aa9..baef4389 100644 --- a/src/rmagine_core/include/rmagine/math/types/Transform.hpp +++ b/src/rmagine_core/include/rmagine/math/types/Transform.hpp @@ -38,6 +38,12 @@ struct Transform_ RMAGINE_INLINE_FUNCTION void setIdentity(); + /** + * @brief Setting the transform from an 4x4 transformation matrix + * WARNING matrix must be isometric, i.e. must only contain + * rotational and translational parts (not scale). Otherwise, + * use "decompose" function + */ RMAGINE_INLINE_FUNCTION void set(const Matrix_& M); @@ -76,13 +82,6 @@ struct Transform_ RMAGINE_INLINE_FUNCTION Transform_ pow(const DataT& exp) const; - // OPERATORS - RMAGINE_INLINE_FUNCTION - void operator=(const Matrix_& M) - { - set(M); - } - RMAGINE_INLINE_FUNCTION Transform_ operator~() const { @@ -108,6 +107,20 @@ struct Transform_ return mult(v); } + ///////////////////// + // CASTING + + /** + * @brief Transform -> Matrix4x4 + * + * @return Matrix_ + */ + RMAGINE_INLINE_FUNCTION + operator Matrix_() const; + + /** + * @brief Internal data type cast + */ template Transform_ cast() const { diff --git a/src/rmagine_core/include/rmagine/math/types/Transform.tcc b/src/rmagine_core/include/rmagine/math/types/Transform.tcc index 02fd878a..8e8d6b5a 100644 --- a/src/rmagine_core/include/rmagine/math/types/Transform.tcc +++ b/src/rmagine_core/include/rmagine/math/types/Transform.tcc @@ -78,4 +78,13 @@ Transform_ Transform_::pow(const DataT& exp) const return res; } +template +RMAGINE_INLINE_FUNCTION +Transform_::operator Matrix_() const +{ + Matrix_ M; + M.set(*this); + return M; +} + } // namespace rmagine \ No newline at end of file diff --git a/src/rmagine_core/include/rmagine/math/types/Vector3.hpp b/src/rmagine_core/include/rmagine/math/types/Vector3.hpp index e4a0b669..6a249511 100644 --- a/src/rmagine_core/include/rmagine/math/types/Vector3.hpp +++ b/src/rmagine_core/include/rmagine/math/types/Vector3.hpp @@ -245,6 +245,31 @@ struct Vector3_ { return multEwise(b); } + + // Use with care! + RMAGINE_INLINE_FUNCTION + DataT& operator[](const size_t& idx) + { + return *(&x + idx); + } + + RMAGINE_INLINE_FUNCTION + const DataT& operator[](const size_t& idx) const + { + return *(&x + idx); + } + + RMAGINE_INLINE_FUNCTION + DataT& operator()(const size_t& idx) + { + return *(&x + idx); + } + + RMAGINE_INLINE_FUNCTION + const DataT& operator()(const size_t& idx) const + { + return *(&x + idx); + } }; } // namespace rmagine diff --git a/src/rmagine_core/include/rmagine/math/types/definitions.h b/src/rmagine_core/include/rmagine/math/types/definitions.h index 39de2857..39913160 100644 --- a/src/rmagine_core/include/rmagine/math/types/definitions.h +++ b/src/rmagine_core/include/rmagine/math/types/definitions.h @@ -17,7 +17,6 @@ namespace rmagine // Forward declarations - template struct Vector2_; @@ -41,6 +40,8 @@ struct AABB_; using Vector2f = Vector2_; +using Vector2u = Vector2_; +using Vector2i = Vector2_; using Vector3f = Vector3_; using Matrix2x2f = Matrix_; using Matrix3x3f = Matrix_; @@ -79,17 +80,12 @@ using AABB = AABB_; using Vector = Vector3; using Point = Vector; +// @amock TODO: how to define a pixel? unsigned or signed? +// - projection operations can result in negative pixels +// using Pixel = Vector2u; +// using Pixel = Vector2i; -// struct Vector2; -// struct Vector3; -// struct EulerAngles; -// struct Quaternion; -// struct Transform; -// struct Matrix3x3; -// struct Matrix4x4; -// struct AABB; - } // namespace rmagine #endif // RMAGINE_MATH_DEFINITIONS_H \ No newline at end of file diff --git a/src/rmagine_core/include/rmagine/simulation/SimulationResults.hpp b/src/rmagine_core/include/rmagine/simulation/SimulationResults.hpp index aa8fe2cd..9942a1e1 100644 --- a/src/rmagine_core/include/rmagine/simulation/SimulationResults.hpp +++ b/src/rmagine_core/include/rmagine/simulation/SimulationResults.hpp @@ -97,7 +97,6 @@ struct FaceIds { Memory face_ids; }; - /** * @brief GeomIds computed by the simulators * @@ -127,6 +126,13 @@ struct ObjectIds { }; +/** + * @brief Convenience object if we want to access all attributes at intersection + * + * WARNING: use with care; It causes slower runtime in contrast to a more specific + * choice of attributes + * + */ template using IntAttrAll = Bundle< Hits, @@ -156,7 +162,7 @@ template static void resize_memory_bundle(BundleT& res, unsigned int W, unsigned int H, - unsigned int N ) + unsigned int N = 1 ) { if constexpr(BundleT::template has >()) { @@ -194,24 +200,6 @@ static void resize_memory_bundle(BundleT& res, } } -// template -// static void resize_memory_bundle(BundleT& res, -// unsigned int W, -// unsigned int H, -// unsigned int N ) -// { -// resize_memory_bundle_(res, W, H, N); -// } - -template -[[deprecated("Use resize_memory_bundle() instead.")]] -void resizeMemoryBundle(BundleT& res, - unsigned int W, - unsigned int H, - unsigned int N ) -{ - resize_memory_bundle(res, W, H, N); -} } // namespace rmagine diff --git a/src/rmagine_core/include/rmagine/types/Bundle.hpp b/src/rmagine_core/include/rmagine/types/Bundle.hpp index cd015811..24952635 100644 --- a/src/rmagine_core/include/rmagine/types/Bundle.hpp +++ b/src/rmagine_core/include/rmagine/types/Bundle.hpp @@ -93,8 +93,6 @@ struct Bundle : public Tp... static constexpr bool value = has_type::type::value; }; - - template struct Index; diff --git a/src/rmagine_core/include/rmagine/types/Memory.hpp b/src/rmagine_core/include/rmagine/types/Memory.hpp index 3b0279c5..ed7b1702 100644 --- a/src/rmagine_core/include/rmagine/types/Memory.hpp +++ b/src/rmagine_core/include/rmagine/types/Memory.hpp @@ -39,6 +39,7 @@ #include #include #include +#include #include @@ -46,6 +47,15 @@ namespace rmagine { struct RAM; +class MemoryResizeError : public std::runtime_error { +public: + MemoryResizeError() + :std::runtime_error("rmagine: cannot resize memory view!") + { + + } +}; + template class MemoryView { public: @@ -126,6 +136,11 @@ class MemoryView { return m_size; } + // Shall we introduce this? + // virtual void resize(size_t N) { + // throw MemoryResizeError(); + // } + MemoryView slice(size_t idx_start, size_t idx_end) { return MemoryView(m_mem + idx_start, idx_end - idx_start); @@ -173,6 +188,7 @@ class Memory : public MemoryView { ~Memory(); + // virtual void resize(size_t N); void resize(size_t N); // Copy for assignment of same MemT diff --git a/src/rmagine_core/include/rmagine/types/mesh_types.h b/src/rmagine_core/include/rmagine/types/mesh_types.h index 210c7fc1..4e094a4b 100644 --- a/src/rmagine_core/include/rmagine/types/mesh_types.h +++ b/src/rmagine_core/include/rmagine/types/mesh_types.h @@ -43,6 +43,8 @@ #define RMAGINE_TYPES_MESH_TYPES_H #include +#include +#include namespace rmagine { @@ -53,8 +55,24 @@ struct Face { unsigned int v0; unsigned int v1; unsigned int v2; + + // Other access functions + // use with care! No out of range checks + RMAGINE_INLINE_FUNCTION + const unsigned int& operator[](const size_t& idx) const; + + RMAGINE_INLINE_FUNCTION + unsigned int& operator[](const size_t& idx); + + RMAGINE_INLINE_FUNCTION + constexpr size_t size() const + { + return 3; + } }; } // namespace rmagine +#include "mesh_types.tcc" + #endif // RMAGINE_TYPES_MESH_TYPES_H \ No newline at end of file diff --git a/src/rmagine_core/include/rmagine/types/mesh_types.tcc b/src/rmagine_core/include/rmagine/types/mesh_types.tcc new file mode 100644 index 00000000..fa9d07c8 --- /dev/null +++ b/src/rmagine_core/include/rmagine/types/mesh_types.tcc @@ -0,0 +1,18 @@ +#include "mesh_types.h" + +namespace rmagine +{ + +RMAGINE_INLINE_FUNCTION +const unsigned int& Face::operator[](const size_t& idx) const +{ + return *((&v0)+idx); +} + +RMAGINE_INLINE_FUNCTION +unsigned int& Face::operator[](const size_t& idx) +{ + return *((&v0)+idx); +} + +} // namespace rmagine diff --git a/src/rmagine_core/include/rmagine/types/sensor_models.h b/src/rmagine_core/include/rmagine/types/sensor_models.h index 4c69ee3c..d54a15bd 100644 --- a/src/rmagine_core/include/rmagine/types/sensor_models.h +++ b/src/rmagine_core/include/rmagine/types/sensor_models.h @@ -148,6 +148,12 @@ struct SphericalModel return getWidth() * getHeight(); } + RMAGINE_INLINE_FUNCTION + uint32_t getSize() const + { + return getWidth() * getHeight(); + } + RMAGINE_INLINE_FUNCTION float getPhi(uint32_t phi_id) const { @@ -179,6 +185,32 @@ struct SphericalModel { return phi_id * theta.size + theta_id; } + + RMAGINE_INLINE_FUNCTION + Vector2u getPixelCoord(uint32_t buffer_id) const + { + return {buffer_id % theta.size, buffer_id / theta.size}; + } + + // slice horizontal line. vertical is not currently not possible because of memory layout + template + MemoryView getRow(const MemoryView& mem, uint32_t vid) const + { + return mem.slice(vid * getWidth(), (vid+1) * getWidth()); + } + + // for RAM we can access single elements of a buffer + template + DataT& getPixelValue(MemoryView& mem, uint32_t vid, uint32_t hid) const + { + return mem[getBufferId(vid, hid)]; + } + + template + DataT getPixelValue(const MemoryView& mem, uint32_t vid, uint32_t hid) const + { + return mem[getBufferId(vid, hid)]; + } }; using LiDARModel = SphericalModel; @@ -215,6 +247,12 @@ struct PinholeModel { return getWidth() * getHeight(); } + RMAGINE_INLINE_FUNCTION + uint32_t getSize() const + { + return getWidth() * getHeight(); + } + RMAGINE_INLINE_FUNCTION Vector getDirectionOptical(uint32_t vid, uint32_t hid) const { @@ -250,6 +288,31 @@ struct PinholeModel { return vid * width + hid; } + RMAGINE_INLINE_FUNCTION + Vector2u getPixelCoord(uint32_t buffer_id) const + { + return {buffer_id % width, buffer_id / width}; + } + + // slice horizontal line. vertical is not currently not possible because of memory layout + template + MemoryView getRow(const MemoryView& mem, uint32_t vid) const + { + return mem.slice(vid * getWidth(), (vid+1) * getWidth()); + } + + // for RAM we can access single elements of a buffer + template + DataT& getPixelValue(MemoryView& mem, uint32_t vid, uint32_t hid) const + { + return mem[getBufferId(vid, hid)]; + } + + template + DataT getPixelValue(const MemoryView& mem, uint32_t vid, uint32_t hid) const + { + return mem[getBufferId(vid, hid)]; + } }; // Distortion? Fisheye / radial-tangential ? @@ -257,20 +320,20 @@ using CameraModel = PinholeModel; using DepthCameraModel = PinholeModel; // TODO: distortion -struct RadialTangentialDistortion { - // TODO -}; +// struct RadialTangentialDistortion { +// // TODO +// }; -struct FisheyeDistortion { +// struct FisheyeDistortion { -}; +// }; -struct CylindricModel { - static constexpr char name[] = "Cylinder"; - // TODO +// struct CylindricModel { +// static constexpr char name[] = "Cylinder"; +// // TODO -}; +// }; template struct O1DnModel_ { @@ -298,15 +361,15 @@ struct O1DnModel_ { } RMAGINE_INLINE_FUNCTION - uint32_t size() const + uint32_t getSize() const { return getWidth() * getHeight(); } RMAGINE_INLINE_FUNCTION - uint32_t getBufferId(uint32_t vid, uint32_t hid) const + uint32_t size() const { - return vid * getWidth() + hid; + return getWidth() * getHeight(); } RMAGINE_INLINE_FUNCTION @@ -320,6 +383,38 @@ struct O1DnModel_ { { return dirs[getBufferId(vid, hid)]; } + + RMAGINE_INLINE_FUNCTION + uint32_t getBufferId(uint32_t vid, uint32_t hid) const + { + return vid * getWidth() + hid; + } + + RMAGINE_INLINE_FUNCTION + Vector2u getPixelCoord(uint32_t buffer_id) const + { + return {buffer_id % width, buffer_id / width}; + } + + // slice horizontal line. vertical is not currently not possible because of memory layout + template + MemoryView getRow(const MemoryView& mem, uint32_t vid) const + { + return mem.slice(vid * getWidth(), (vid+1) * getWidth()); + } + + // for RAM we can access single elements of a buffer + template + DataT& getPixelValue(MemoryView& mem, uint32_t vid, uint32_t hid) const + { + return mem[getBufferId(vid, hid)]; + } + + template + DataT getPixelValue(const MemoryView& mem, uint32_t vid, uint32_t hid) const + { + return mem[getBufferId(vid, hid)]; + } }; using O1DnModel = O1DnModel_; @@ -351,16 +446,15 @@ struct OnDnModel_ { } RMAGINE_INLINE_FUNCTION - uint32_t size() const + uint32_t getSize() const { return getWidth() * getHeight(); } - RMAGINE_INLINE_FUNCTION - uint32_t getBufferId(uint32_t vid, uint32_t hid) const + uint32_t size() const { - return vid * getWidth() + hid; + return getWidth() * getHeight(); } RMAGINE_INLINE_FUNCTION @@ -375,11 +469,51 @@ struct OnDnModel_ { return dirs[getBufferId(vid, hid)]; } + RMAGINE_INLINE_FUNCTION + uint32_t getBufferId(uint32_t vid, uint32_t hid) const + { + return vid * getWidth() + hid; + } + + RMAGINE_INLINE_FUNCTION + Vector2u getPixelCoord(uint32_t buffer_id) const + { + return {buffer_id % width, buffer_id / width}; + } + + // slice horizontal line. vertical is not currently not possible because of memory layout + template + MemoryView getRow(MemoryView& mem, uint32_t vid) const + { + return mem.slice(vid * getWidth(), (vid+1) * getWidth()); + } + + // for CPU we can access single elements of a buffer + template + DataT& getPixelValue(MemoryView& mem, uint32_t vid, uint32_t hid) const + { + return mem[getBufferId(vid, hid)]; + } + + template + DataT getPixelValue(const MemoryView& mem, uint32_t vid, uint32_t hid) const + { + return mem[getBufferId(vid, hid)]; + } }; using OnDnModel = OnDnModel_; +template +MemoryView slice( + const MemoryView& mem, + const ModelT& model, + const uint32_t pose_id) +{ + return mem.slice(model.getSize() * pose_id, model.getSize() * (pose_id + 1)); +} + } // namespace rmagine #endif // RMAGINE_TYPES_SENSOR_MODELS_H \ No newline at end of file diff --git a/src/rmagine_core/include/rmagine/types/shared_functions.h b/src/rmagine_core/include/rmagine/types/shared_functions.h index a8e11785..c4bdb96d 100644 --- a/src/rmagine_core/include/rmagine/types/shared_functions.h +++ b/src/rmagine_core/include/rmagine/types/shared_functions.h @@ -44,9 +44,17 @@ #ifdef __CUDA_ARCH__ #define RMAGINE_FUNCTION __host__ __device__ #define RMAGINE_INLINE_FUNCTION __inline__ __host__ __device__ +#define RMAGINE_HOST_FUNCTION __host__ +#define RMAGINE_INLINE_HOST_FUNCTION __inline__ __host__ +#define RMAGINE_DEVICE_FUNCTION __device__ +#define RMAGINE_INLINE_DEVICE_FUNCTION __inline__ __device__ #else #define RMAGINE_FUNCTION #define RMAGINE_INLINE_FUNCTION inline +#define RMAGINE_HOST_FUNCTION +#define RMAGINE_INLINE_HOST_FUNCTION inline +#define RMAGINE_DEVICE_FUNCTION +#define RMAGINE_INLINE_DEVICE_FUNCTION inline #endif #endif // RMAGINE_TYPES_SHARED_FUNCTIONS_H \ No newline at end of file diff --git a/src/rmagine_core/src/math/linalg.cpp b/src/rmagine_core/src/math/linalg.cpp index 739b24fb..148d1996 100644 --- a/src/rmagine_core/src/math/linalg.cpp +++ b/src/rmagine_core/src/math/linalg.cpp @@ -1,10 +1,15 @@ #include "rmagine/math/linalg.h" +#include "rmagine/types/Memory.hpp" +#include #include +#include "rmagine/math/math.h" + namespace rmagine { +RMAGINE_HOST_FUNCTION Matrix4x4 compose(const Transform& T, const Vector3& scale) { Matrix4x4 M; @@ -19,6 +24,7 @@ Matrix4x4 compose(const Transform& T, const Vector3& scale) return M * S; } +RMAGINE_HOST_FUNCTION Matrix4x4 compose(const Transform& T, const Matrix3x3& S) { Matrix4x4 M; @@ -38,6 +44,7 @@ Matrix4x4 compose(const Transform& T, const Matrix3x3& S) return M * S_; } +RMAGINE_HOST_FUNCTION void decompose(const Matrix4x4& M, Transform& T, Matrix3x3& S) { Eigen::Matrix4f Meig; @@ -73,6 +80,7 @@ void decompose(const Matrix4x4& M, Transform& T, Matrix3x3& S) T.R.set(R); } +RMAGINE_HOST_FUNCTION void decompose(const Matrix4x4& M, Transform& T, Vector3& scale) { Matrix3x3 S; @@ -85,15 +93,1088 @@ void decompose(const Matrix4x4& M, Transform& T, Vector3& scale) scale.z = S(2,2); } +RMAGINE_HOST_FUNCTION Quaternion polate(const Quaternion& A, const Quaternion& B, float fac) { return A * A.to(B).pow(fac); } +RMAGINE_HOST_FUNCTION Transform polate(const Transform& A, const Transform& B, float fac) { return A * A.to(B).pow(fac); } +RMAGINE_HOST_FUNCTION +void svd( + const Matrix3x3& a, + Matrix3x3& u, + Matrix3x3& w, + Matrix3x3& v) +{ + // TODO: test + constexpr unsigned int max_iterations = 20; + + // additional memory required + bool flag; + int its, j, jj; + float anorm, c, f, g, h, s, scale, x, y, z; + + Vector3 rv1 = Vector3::Zeros(); + + g = s = scale = anorm = 0.0; + float eps = __FLT_EPSILON__; + u = a; + + // FIRST PART + + // i = 0; + // l = 2; + scale = fabs(u(0,0)) + fabs(u(1,0)) + fabs(u(2,0)); + if(scale > 0.0) + { + u(0, 0) /= scale; + u(1, 0) /= scale; + u(2, 0) /= scale; + + s = u(0,0) * u(0,0) + u(1,0) * u(1,0) + u(2,0) * u(2,0); + f = u(0,0); + g = -SIGN(sqrt(s), f); + h = f * g - s; + + u(0, 0) = f - g; + + f = (u(0, 0) * u(0, 1) + u(1, 0) * u(1, 1) + u(2, 0) * u(2, 1)) / h; + u(0, 1) += f * u(0, 0); + u(1, 1) += f * u(1, 0); + u(2, 1) += f * u(2, 0); + + f = (u(0, 0) * u(0, 2) + u(1, 0) * u(1, 2) + u(2, 0) * u(2, 2)) / h; + u(0, 2) += f * u(0, 0); + u(1, 2) += f * u(1, 0); + u(2, 2) += f * u(2, 0); + + u(0, 0) *= scale; + u(1, 0) *= scale; + u(2, 0) *= scale; + } + + w(0, 0) = scale * g; + g = s = scale = 0.0; + + scale = abs(u(0,0)) + abs(u(0,1)) + abs(u(0,2)); + + if(scale > 0.0) + { + u(0, 1) /= scale; + u(0, 2) /= scale; + s = u(0,1) * u(0,1) + u(0,2) * u(0,2); + + f = u(0, 1); + g = -SIGN(sqrt(s),f); + h = f * g-s; + u(0, 1) = f - g; + + rv1.y = u(0, 1) / h; + rv1.z = u(0, 2) / h; + + s = u(1,1) * u(0,1) + u(1,2) * u(0,2); + u(1, 1) += s * rv1.y; + u(1, 2) += s * rv1.z; + + s = u(2,1) * u(0,1) + u(2,2) * u(0,2); + u(2, 1) += s * rv1.y; + u(2, 2) += s * rv1.z; + + u(0, 1) *= scale; + u(0, 2) *= scale; + } + + anorm = fabs(w(0, 0)); + // anorm = MAX(anorm, (fabs(w(0, 0)) + fabs(rv1.x))); // rv1.x is always 0 here, anorm too. fabs(X) >= 0 + + // i = 1; + // l = 3; + rv1.y = scale * g; + g = 0.0; + scale = fabs(u(1, 1)) + fabs(u(2, 1)); + + if(scale > 0.0) + { + u(1,1) /= scale; + u(2,1) /= scale; + + s = u(1,1) * u(1,1) + u(2,1) * u(2,1); + f = u(1,1); + g = -SIGN(sqrt(s),f); + h = f * g - s; + u(1,1) = f-g; + + f = (u(1,1) * u(1,2) + u(2,1) * u(2,2)) / h; + u(1,2) += f * u(1,1); + u(2,2) += f * u(2,1); + + u(1,1) *= scale; + u(2,1) *= scale; + } + + w(1, 1) = scale * g; + g = s = scale = 0.0; + + scale = abs(u(1,2)); + if(scale > 0.0) + { + u(1,2) /= scale; + s = u(1,2) * u(1,2); + + f = u(1, 2); + g = -SIGN(sqrt(s), f); + h = f * g - s; + u(1,2) = f - g; + + rv1.z = u(1,2) / h; + s = u(2,2) * u(1,2); + + u(2,2) += s * rv1.z; + u(1,2) *= scale; + } + + anorm = MAX(anorm, (abs(w(1, 1)) + abs(rv1.y))); + + rv1.z = scale * g; + + scale = abs(u(2, 2)); + if(scale > 0.0) + { + u(2, 2) /= scale; + s = u(2, 2) * u(2, 2); + f = u(2, 2); + g = -SIGN(sqrt(s),f); + h = f * g - s; + + u(2, 2) = f - g; + u(2, 2) *= scale; + } + + w(2, 2) = scale * g; + g = s = scale = 0.0; + + anorm = MAX(anorm, (abs(w(2, 2))+abs(rv1.z))); + + // SECOND PART + v(2, 2) = 1.0; + g = rv1.z; + + // i = 1; + // l = 2; + if(fabs(g) > 0.0) + { + v(2,1) = (u(1,2) / u(1,2)) / g; + s = u(1,2) * v(2,2); + v(2,2) += s * v(2,1); + } + v(1,2) = 0.0; + v(2,1) = 0.0; + v(1,1) = 1.0; + + g = rv1.y; + + // l = 1; + // i = 0; + if(fabs(g) > 0.0) + { + v(1,0) = (u(0,1) / u(0,1)) / g; + v(2,0) = (u(0,2) / u(0,1)) / g; + + s = u(0,1) * v(1,1) + u(0,2) * v(2,1); + v(1,1) += s * v(1,0); + v(2,1) += s * v(2,0); + + s = u(0,1) * v(1,2) + u(0,2) * v(2,2); + v(1,2) += s * v(1,0); + v(2,2) += s * v(2,0); + } + v(0,1) = 0.0; + v(1,0) = 0.0; + v(0,2) = 0.0; + v(2,0) = 0.0; + v(0,0) = 1.0; + g = rv1.x; + + + // THIRD PART + + // i = 2; + // l = 3; + g = w(2, 2); + if(fabs(g) > 0.0) + { + u(2,2) /= g; + } else { + // TODO(amock): shouldnt this be a large number? + u(2,2) = 0.0; + } + u(2,2) += 1.0; + + // i = 1; + // l = 2; + + g = w(1, 1); + u(1,2) = 0.0; + + if(fabs(g) > 0.0) + { + g = 1.0/g; + s = u(2,1) * u(2,2); + f = (s/u(1,1)) * g; + + u(1,2) += f * u(1,1); + u(2,2) += f * u(2,1); + + u(1,1) *= g; + u(2,1) *= g; + } else { + u(1,1) = 0.0; + u(2,1) = 0.0; + } + u(1,1) += 1.0; + + // i = 0; + // l = 1; + g = w(0, 0); + u(0,1) = 0.0; + u(0,2) = 0.0; + + if(fabs(g) > 0.0) + { + f = (u(1,0) * u(1,1) + u(2,0) * u(2,1)) / (g * u(0,0)); + u(0,1) += f * u(0,0); + u(1,1) += f * u(1,0); + u(2,1) += f * u(2,0); + + f = (u(1,0) * u(1,2) + u(2,0) * u(2,2)) / (g * u(0,0)); + u(0,2) += f * u(0,0); + u(1,2) += f * u(1,0); + u(2,2) += f * u(2,0); + + u(0,0) /= g; + u(1,0) /= g; + u(2,0) /= g; + } else { + u(0,0) = 0.0; + u(1,0) = 0.0; + u(2,0) = 0.0; + } + u(0,0) += 1.0; + + int i, l; + + // PART 4: Opti + + // k = 2; + for(its=0; its eps*anorm) + // { + // l = 1; + // if(MIN(fabs(rv1.y),abs(w(0,0))) > eps*anorm) + // { + // l = 0; + // } + // } + + flag=true; + l=2; + if(abs(rv1.z) <= eps*anorm) + { + flag=false; + } + else if(abs(w(1,1)) > eps*anorm) + { + l=1; + if(abs(rv1.y) <= eps*anorm) + { + flag=false; + } + else if(abs(w(0,0)) > eps*anorm) + { + l=0; + flag = false; + } + } + + if(flag) + { + c=0.0; + s=1.0; + for(i=l; i<3; i++) + { + f = s*rv1[i]; + rv1[i] = c*rv1[i]; + if(abs(f) <= eps*anorm) + { + break; + } + g = w(i, i); + h = PYTHAG(f,g); + w(i, i) = h; + h = 1.0/h; + c = g*h; + s = -f*h; + for(j=0; j<3; j++) + { + y = u(j,l-1); + z = u(j,i); + u(j,l-1) = y*c+z*s; + u(j,i) = z*c-y*s; + } + } + } + z = w(2,2); + if(l == 2) + { + if(z < 0.0) + { + w(2, 2) = -z; + for (j=0; j<3; j++) + { + v(j,2) = -v(j,2); + } + } + break; + } + if(its == max_iterations - 1) + { + // std::cout << "no convergence in " << max_iterations << " svdcmp iterations" << std::endl; + // throw std::runtime_error("no convergence in max svdcmp iterations"); + } + x = w(l,l); + y = w(1,1); + g = rv1.y; + h = rv1.z; + f = ((y-z)*(y+z)+(g-h)*(g+h))/(2.f*h*y); + g = PYTHAG(f, 1.f); + f = ((x-z)*(x+z)+h*((y/(f+SIGN(g,f)))-h))/x; + c = s = 1.f; + for (j=l; j<2; j++) + { + i = j+1; + g = rv1[i]; + y = w(i, i); + h = s*g; + g = c*g; + z = PYTHAG(f,h); + rv1[j] = z; + c = f/z; + s = h/z; + f = x*c+g*s; + g = g*c-x*s; + h = y*s; + y *= c; + for(jj=0;jj<3;jj++) + { + x = v(jj,j); + z = v(jj,i); + v(jj,j) = x*c+z*s; + v(jj,i) = z*c-x*s; + } + z = PYTHAG(f,h); + w(j,j) = z; + if(z>0.0) + { + z = 1.f/z; + c = f*z; + s = h*z; + } + f = c*g+s*y; + x = c*y-s*g; + for (jj=0;jj<3;jj++) + { + y = u(jj,j); + z = u(jj,i); + u(jj,j) = y*c+z*s; + u(jj,i) = z*c-y*s; + } + } + rv1[l] = 0.f; + rv1.z = f; + w(2,2) = x; + } + + + for(its=0; its eps*anorm) + { + l=0; + flag = false; + } + + if(flag) + { + c=0.0; + s=1.0; + for(i=l; i<2; i++) + { + f = s*rv1[i]; + rv1[i] = c*rv1[i]; + if(abs(f) <= eps*anorm) + { + break; + } + g = w(i,i); + h = PYTHAG(f,g); + w(i, i) = h; + h = 1.0/h; + c = g*h; + s = -f*h; + for(j=0; j<3; j++) + { + y = u(j,l-1); + z = u(j,i); + u(j,l-1) = y*c+z*s; + u(j,i) = z*c-y*s; + } + } + } + z = w(1,1); + if(l == 1) + { + if(z < 0.0) + { + w(1,1) = -z; + for (j=0; j<3; j++) + { + v(j,1) = -v(j,1); + } + } + break; + } + if(its == max_iterations - 1) + { + // std::cout << "no convergence in " << max_iterations << " svdcmp iterations" << std::endl; + // throw std::runtime_error("no convergence in max svdcmp iterations"); + } + + x = w(l, l); + y = w(0, 0); + g = rv1.x; + h = rv1.y; + f = ((y-z)*(y+z)+(g-h)*(g+h))/(2.f*h*y); + g = PYTHAG(f, 1.f); + f = ((x-z)*(x+z)+h*((y/(f+SIGN(g,f)))-h))/x; + c = s = 1.f; + + if(l == 0) + { + g = rv1.y; + y = w(1,1); + h = s*g; + g = c*g; + z = PYTHAG(f,h); + rv1.x = z; + c = f/z; + s = h/z; + f = x*c+g*s; + g = g*c-x*s; + h = y*s; + y *= c; + + + x = v(0,0); + z = v(0,1); + v(0,0) = x*c+z*s; + v(0,1) = z*c-x*s; + + x = v(1,0); + z = v(1,1); + v(1,0) = x*c+z*s; + v(1,1) = z*c-x*s; + + x = v(2,0); + z = v(2,1); + v(2,0) = x*c+z*s; + v(2,1) = z*c-x*s; + + + z = PYTHAG(f,h); + w(0,0) = z; + if(z>0.f) + { + z = 1.f/z; + c = f*z; + s = h*z; + } + f = c*g+s*y; + x = c*y-s*g; + for(jj=0; jj<3; jj++) + { + y = u(jj,0); + z = u(jj,1); + u(jj,0) = y*c+z*s; + u(jj,1) = z*c-y*s; + } + } + + rv1[l] = 0.f; + rv1.y = f; + w(1,1) = x; + } + + + z = w(0,0); + if (z < 0.0) + { + w(0,0) = -z; + v(0,0) = -v(0,0); + v(1,0) = -v(1,0); + v(2,0) = -v(2,0); + } +} + +RMAGINE_HOST_FUNCTION +void svd( + const Matrix3x3& a, + Matrix3x3& u, + Vector3& w, + Matrix3x3& v) +{ + // TODO: test + constexpr unsigned int m = 3; + constexpr unsigned int n = 3; + constexpr unsigned int max_iterations = 20; + + // additional memory required + bool flag; + int its, j, jj, k, nm; + float anorm, c, f, g, h, s, scale, x, y, z; + + Vector3 rv1 = Vector3::Zeros(); + + g = s = scale = anorm = 0.0; + float eps = std::numeric_limits::epsilon(); + u = a; + + // FIRST PART + + // i = 0; + // l = 2; + scale = fabs(u(0,0)) + fabs(u(1,0)) + fabs(u(2,0)); + if(scale > 0.0) + { + u(0, 0) /= scale; + u(1, 0) /= scale; + u(2, 0) /= scale; + + s = u(0,0) * u(0,0) + u(1,0) * u(1,0) + u(2,0) * u(2,0); + f = u(0,0); + g = -SIGN(sqrt(s), f); + h = f * g - s; + + u(0, 0) = f - g; + + f = (u(0, 0) * u(0, 1) + u(1, 0) * u(1, 1) + u(2, 0) * u(2, 1)) / h; + u(0, 1) += f * u(0, 0); + u(1, 1) += f * u(1, 0); + u(2, 1) += f * u(2, 0); + + f = (u(0, 0) * u(0, 2) + u(1, 0) * u(1, 2) + u(2, 0) * u(2, 2)) / h; + u(0, 2) += f * u(0, 0); + u(1, 2) += f * u(1, 0); + u(2, 2) += f * u(2, 0); + + u(0, 0) *= scale; + u(1, 0) *= scale; + u(2, 0) *= scale; + } + + w.x = scale * g; + g = s = scale = 0.0; + + scale = abs(u(0,0)) + abs(u(0,1)) + abs(u(0,2)); + + if(scale > 0.0) + { + u(0, 1) /= scale; + u(0, 2) /= scale; + s = u(0,1) * u(0,1) + u(0,2) * u(0,2); + + f = u(0, 1); + g = -SIGN(sqrt(s),f); + h = f * g-s; + u(0, 1) = f - g; + + rv1.y = u(0, 1) / h; + rv1.z = u(0, 2) / h; + + s = u(1,1) * u(0,1) + u(1,2) * u(0,2); + u(1, 1) += s * rv1.y; + u(1, 2) += s * rv1.z; + + s = u(2,1) * u(0,1) + u(2,2) * u(0,2); + u(2, 1) += s * rv1.y; + u(2, 2) += s * rv1.z; + + u(0, 1) *= scale; + u(0, 2) *= scale; + } + + anorm = fabs(w.x); + // anorm = MAX(anorm, (fabs(w(0, 0)) + fabs(rv1.x))); // rv1.x is always 0 here, anorm too. fabs(X) >= 0 + + // i = 1; + // l = 3; + rv1.y = scale * g; + g = 0.0; + scale = fabs(u(1, 1)) + fabs(u(2, 1)); + + if(scale > 0.0) + { + u(1,1) /= scale; + u(2,1) /= scale; + + s = u(1,1) * u(1,1) + u(2,1) * u(2,1); + f = u(1,1); + g = -SIGN(sqrt(s),f); + h = f * g - s; + u(1,1) = f-g; + + f = (u(1,1) * u(1,2) + u(2,1) * u(2,2)) / h; + u(1,2) += f * u(1,1); + u(2,2) += f * u(2,1); + + u(1,1) *= scale; + u(2,1) *= scale; + } + + w.y = scale * g; + g = s = scale = 0.0; + + scale = abs(u(1,2)); + if(scale > 0.0) + { + u(1,2) /= scale; + s = u(1,2) * u(1,2); + + f = u(1, 2); + g = -SIGN(sqrt(s), f); + h = f * g - s; + u(1,2) = f - g; + + rv1.z = u(1,2) / h; + s = u(2,2) * u(1,2); + + u(2,2) += s * rv1.z; + u(1,2) *= scale; + } + + anorm = MAX(anorm, (abs(w.y) + abs(rv1.y))); + + rv1.z = scale * g; + + scale = abs(u(2, 2)); + if(scale > 0.0) + { + u(2, 2) /= scale; + s = u(2, 2) * u(2, 2); + f = u(2, 2); + g = -SIGN(sqrt(s),f); + h = f * g - s; + + u(2, 2) = f - g; + u(2, 2) *= scale; + } + + w.z = scale * g; + g = s = scale = 0.0; + + anorm = MAX(anorm, (abs(w.z)+abs(rv1.z))); + + // SECOND PART + v(2, 2) = 1.0; + g = rv1.z; + + // i = 1; + // l = 2; + if(fabs(g) > 0.0) + { + v(2,1) = (u(1,2) / u(1,2)) / g; + s = u(1,2) * v(2,2); + v(2,2) += s * v(2,1); + } + v(1,2) = 0.0; + v(2,1) = 0.0; + v(1,1) = 1.0; + + g = rv1.y; + + // l = 1; + // i = 0; + if(fabs(g) > 0.0) + { + v(1,0) = (u(0,1) / u(0,1)) / g; + v(2,0) = (u(0,2) / u(0,1)) / g; + + s = u(0,1) * v(1,1) + u(0,2) * v(2,1); + v(1,1) += s * v(1,0); + v(2,1) += s * v(2,0); + + s = u(0,1) * v(1,2) + u(0,2) * v(2,2); + v(1,2) += s * v(1,0); + v(2,2) += s * v(2,0); + } + v(0,1) = 0.0; + v(1,0) = 0.0; + v(0,2) = 0.0; + v(2,0) = 0.0; + v(0,0) = 1.0; + g = rv1.x; + + + // THIRD PART + + // i = 2; + // l = 3; + g = w.z; + if(fabs(g) > 0.0) + { + u(2,2) /= g; + } else { + // TODO(amock): shouldnt this be a large number? + u(2,2) = 0.0; + } + u(2,2) += 1.0; + + // i = 1; + // l = 2; + + g = w.y; + u(1,2) = 0.0; + + if(fabs(g) > 0.0) + { + g = 1.0/g; + s = u(2,1) * u(2,2); + f = (s/u(1,1)) * g; + + u(1,2) += f * u(1,1); + u(2,2) += f * u(2,1); + + u(1,1) *= g; + u(2,1) *= g; + } else { + u(1,1) = 0.0; + u(2,1) = 0.0; + } + u(1,1) += 1.0; + + // i = 0; + // l = 1; + g = w.x; + u(0,1) = 0.0; + u(0,2) = 0.0; + + if(fabs(g) > 0.0) + { + f = (u(1,0) * u(1,1) + u(2,0) * u(2,1)) / (g * u(0,0)); + u(0,1) += f * u(0,0); + u(1,1) += f * u(1,0); + u(2,1) += f * u(2,0); + + f = (u(1,0) * u(1,2) + u(2,0) * u(2,2)) / (g * u(0,0)); + u(0,2) += f * u(0,0); + u(1,2) += f * u(1,0); + u(2,2) += f * u(2,0); + + u(0,0) /= g; + u(1,0) /= g; + u(2,0) /= g; + } else { + u(0,0) = 0.0; + u(1,0) = 0.0; + u(2,0) = 0.0; + } + u(0,0) += 1.0; + + int i, l; + + // PART 4: Opti + + // k = 2; + for(its=0; its eps*anorm) + // { + // l = 1; + // if(MIN(fabs(rv1.y),abs(w(0,0))) > eps*anorm) + // { + // l = 0; + // } + // } + + flag=true; + l=2; + if(abs(rv1.z) <= eps*anorm) + { + flag=false; + } + else if(abs(w.y) > eps*anorm) + { + l=1; + if(abs(rv1.y) <= eps*anorm) + { + flag=false; + } + else if(abs(w.x) > eps*anorm) + { + l=0; + flag = false; + } + } + + if(flag) + { + c=0.0; + s=1.0; + for(i=l; i<3; i++) + { + f = s*rv1[i]; + rv1[i] = c*rv1[i]; + if(abs(f) <= eps*anorm) + { + break; + } + g = w(i); + h = PYTHAG(f,g); + w(i) = h; + h = 1.0/h; + c = g*h; + s = -f*h; + for(j=0; j0.0) + { + z = 1.f/z; + c = f*z; + s = h*z; + } + f = c*g+s*y; + x = c*y-s*g; + for (jj=0;jj eps*anorm) + { + l=0; + flag = false; + } + + if(flag) + { + c=0.0; + s=1.0; + for(i=l; i<2; i++) + { + f = s*rv1[i]; + rv1[i] = c*rv1[i]; + if(abs(f) <= eps*anorm) + { + break; + } + g = w(i); + h = PYTHAG(f,g); + w(i) = h; + h = 1.0/h; + c = g*h; + s = -f*h; + for(j=0; j0.f) + { + z = 1.f/z; + c = f*z; + s = h*z; + } + f = c*g+s*y; + x = c*y-s*g; + for(jj=0; jj<3; jj++) + { + y = u(jj,0); + z = u(jj,1); + u(jj,0) = y*c+z*s; + u(jj,1) = z*c-y*s; + } + } + + rv1[l] = 0.f; + rv1.y = f; + w.y = x; + } + + + z = w.x; + if (z < 0.0) + { + w.x = -z; + v(0,0) = -v(0,0); + v(1,0) = -v(1,0); + v(2,0) = -v(2,0); + } +} + } // namespace rmagine \ No newline at end of file diff --git a/src/rmagine_core/src/math/math.cpp b/src/rmagine_core/src/math/math.cpp index 9323175d..42de8ae9 100644 --- a/src/rmagine_core/src/math/math.cpp +++ b/src/rmagine_core/src/math/math.cpp @@ -4,6 +4,8 @@ #include +#include "rmagine/math/linalg.h" + namespace rmagine { @@ -697,5 +699,39 @@ Memory cov( return C; } +/** + * @brief decompose A = UWV* using singular value decomposition + */ +void svd( + const MemoryView& As, + MemoryView& Us, + MemoryView& Ws, + MemoryView& Vs) +{ + #pragma omp parallel for + for(size_t i=0; i& As, + MemoryView& Us, + MemoryView& ws, + MemoryView& Vs) +{ + #pragma omp parallel for + for(size_t i=0; i only works for # CXX_STANDARD 17 ) diff --git a/src/rmagine_cuda/include/rmagine/math/math.cuh b/src/rmagine_cuda/include/rmagine/math/math.cuh index 7439487f..645085cb 100644 --- a/src/rmagine_cuda/include/rmagine/math/math.cuh +++ b/src/rmagine_cuda/include/rmagine/math/math.cuh @@ -543,6 +543,28 @@ Memory cov( const MemoryView& v2 ); +/** + * @brief decompose A = UWV* using singular value decomposition + */ +void svd( + const MemoryView& As, + MemoryView& Us, + MemoryView& Ws, + MemoryView& Vs +); + +/** + * @brief decompose A = UWV* using singular value decomposition + * + * w is a vector which is the diagonal of matrix W + */ +void svd( + const MemoryView& As, + MemoryView& Us, + MemoryView& ws, + MemoryView& Vs +); + } // namespace rmagine diff --git a/src/rmagine_cuda/include/rmagine/util/cuda/CudaDebug.hpp b/src/rmagine_cuda/include/rmagine/util/cuda/CudaDebug.hpp index 2bd29b81..faa48553 100644 --- a/src/rmagine_cuda/include/rmagine/util/cuda/CudaDebug.hpp +++ b/src/rmagine_cuda/include/rmagine/util/cuda/CudaDebug.hpp @@ -52,4 +52,16 @@ void cudaAssert( const char* func, int line); +#ifdef NDEBUG + #define RM_CUDA_DEBUG() +#else // NDEBUG + #define RM_CUDA_DEBUG() \ + cudaError_t err = cudaGetLastError(); \ + if (err != cudaSuccess) \ + { \ + printf("Error: %s\n", cudaGetErrorString(err)); \ + throw std::runtime_error(cudaGetErrorString(err)); \ + } +#endif // defined NDEBUG + #endif // RMAGINE_UTIL_CUDA_DEBUG_HPP \ No newline at end of file diff --git a/src/rmagine_cuda/src/math/linalg.cu b/src/rmagine_cuda/src/math/linalg.cu new file mode 100644 index 00000000..17c846da --- /dev/null +++ b/src/rmagine_cuda/src/math/linalg.cu @@ -0,0 +1,1085 @@ +#include +#include +#include + +namespace rmagine +{ + +RMAGINE_DEVICE_FUNCTION +void svd( + const Matrix3x3& a, + Matrix3x3& u, + Matrix3x3& w, + Matrix3x3& v) +{ + // printf("SVDD\n"); + + + // TODO: test + const unsigned int max_iterations = 20; + + // additional memory required + bool flag; + int its, j, jj; + float anorm, c, f, g, h, s, scale, x, y, z; + + Vector3 rv1; + rv1.x = 0.0; + rv1.y = 0.0; + rv1.z = 0.0; + + + + g = s = scale = anorm = 0.0; + const float eps = __FLT_EPSILON__; + u = a; + + // FIRST PART + + // i = 0; + // l = 2; + scale = fabs(u(0,0)) + fabs(u(1,0)) + fabs(u(2,0)); + if(scale > 0.0) + { + u(0, 0) /= scale; + u(1, 0) /= scale; + u(2, 0) /= scale; + + s = u(0,0) * u(0,0) + u(1,0) * u(1,0) + u(2,0) * u(2,0); + f = u(0,0); + g = -SIGN(sqrt(s), f); + h = f * g - s; + + u(0, 0) = f - g; + + f = (u(0, 0) * u(0, 1) + u(1, 0) * u(1, 1) + u(2, 0) * u(2, 1)) / h; + u(0, 1) += f * u(0, 0); + u(1, 1) += f * u(1, 0); + u(2, 1) += f * u(2, 0); + + f = (u(0, 0) * u(0, 2) + u(1, 0) * u(1, 2) + u(2, 0) * u(2, 2)) / h; + u(0, 2) += f * u(0, 0); + u(1, 2) += f * u(1, 0); + u(2, 2) += f * u(2, 0); + + u(0, 0) *= scale; + u(1, 0) *= scale; + u(2, 0) *= scale; + } + + w(0, 0) = scale * g; + g = s = scale = 0.0; + + scale = abs(u(0,0)) + abs(u(0,1)) + abs(u(0,2)); + + if(scale > 0.0) + { + u(0, 1) /= scale; + u(0, 2) /= scale; + s = u(0,1) * u(0,1) + u(0,2) * u(0,2); + + f = u(0, 1); + g = -SIGN(sqrt(s),f); + h = f * g-s; + u(0, 1) = f - g; + + rv1.y = u(0, 1) / h; + rv1.z = u(0, 2) / h; + + s = u(1,1) * u(0,1) + u(1,2) * u(0,2); + u(1, 1) += s * rv1.y; + u(1, 2) += s * rv1.z; + + s = u(2,1) * u(0,1) + u(2,2) * u(0,2); + u(2, 1) += s * rv1.y; + u(2, 2) += s * rv1.z; + + u(0, 1) *= scale; + u(0, 2) *= scale; + } + + anorm = fabs(w(0, 0)); + // anorm = MAX(anorm, (fabs(w(0, 0)) + fabs(rv1.x))); // rv1.x is always 0 here, anorm too. fabs(X) >= 0 + + // i = 1; + // l = 3; + rv1.y = scale * g; + g = 0.0; + scale = fabs(u(1, 1)) + fabs(u(2, 1)); + + if(scale > 0.0) + { + u(1,1) /= scale; + u(2,1) /= scale; + + s = u(1,1) * u(1,1) + u(2,1) * u(2,1); + f = u(1,1); + g = -SIGN(sqrt(s),f); + h = f * g - s; + u(1,1) = f-g; + + f = (u(1,1) * u(1,2) + u(2,1) * u(2,2)) / h; + u(1,2) += f * u(1,1); + u(2,2) += f * u(2,1); + + u(1,1) *= scale; + u(2,1) *= scale; + } + + w(1, 1) = scale * g; + g = s = scale = 0.0; + + scale = abs(u(1,2)); + if(scale > 0.0) + { + u(1,2) /= scale; + s = u(1,2) * u(1,2); + + f = u(1, 2); + g = -SIGN(sqrt(s), f); + h = f * g - s; + u(1,2) = f - g; + + rv1.z = u(1,2) / h; + s = u(2,2) * u(1,2); + + u(2,2) += s * rv1.z; + u(1,2) *= scale; + } + + anorm = MAX(anorm, (abs(w(1, 1)) + abs(rv1.y))); + + + + rv1.z = scale * g; + + scale = abs(u(2, 2)); + if(scale > 0.0) + { + u(2, 2) /= scale; + s = u(2, 2) * u(2, 2); + f = u(2, 2); + g = -SIGN(sqrt(s),f); + h = f * g - s; + + u(2, 2) = f - g; + u(2, 2) *= scale; + } + + w(2, 2) = scale * g; + g = s = scale = 0.0; + + anorm = MAX(anorm, (abs(w(2, 2))+abs(rv1.z))); + + // SECOND PART + v(2, 2) = 1.0; + g = rv1.z; + + // i = 1; + // l = 2; + if(fabs(g) > 0.0) + { + v(2,1) = (u(1,2) / u(1,2)) / g; + s = u(1,2) * v(2,2); + v(2,2) += s * v(2,1); + } + v(1,2) = 0.0; + v(2,1) = 0.0; + v(1,1) = 1.0; + + g = rv1.y; + + // l = 1; + // i = 0; + if(fabs(g) > 0.0) + { + v(1,0) = (u(0,1) / u(0,1)) / g; + v(2,0) = (u(0,2) / u(0,1)) / g; + + s = u(0,1) * v(1,1) + u(0,2) * v(2,1); + v(1,1) += s * v(1,0); + v(2,1) += s * v(2,0); + + s = u(0,1) * v(1,2) + u(0,2) * v(2,2); + v(1,2) += s * v(1,0); + v(2,2) += s * v(2,0); + } + v(0,1) = 0.0; + v(1,0) = 0.0; + v(0,2) = 0.0; + v(2,0) = 0.0; + v(0,0) = 1.0; + g = rv1.x; + + + // THIRD PART + + // i = 2; + // l = 3; + g = w(2, 2); + if(fabs(g) > 0.0) + { + u(2,2) /= g; + } else { + // TODO(amock): shouldnt this be a large number? + u(2,2) = 0.0; + } + u(2,2) += 1.0; + + // i = 1; + // l = 2; + + g = w(1, 1); + u(1,2) = 0.0; + + if(fabs(g) > 0.0) + { + g = 1.0/g; + s = u(2,1) * u(2,2); + f = (s/u(1,1)) * g; + + u(1,2) += f * u(1,1); + u(2,2) += f * u(2,1); + + u(1,1) *= g; + u(2,1) *= g; + } else { + u(1,1) = 0.0; + u(2,1) = 0.0; + } + u(1,1) += 1.0; + + // i = 0; + // l = 1; + g = w(0, 0); + u(0,1) = 0.0; + u(0,2) = 0.0; + + if(fabs(g) > 0.0) + { + f = (u(1,0) * u(1,1) + u(2,0) * u(2,1)) / (g * u(0,0)); + u(0,1) += f * u(0,0); + u(1,1) += f * u(1,0); + u(2,1) += f * u(2,0); + + f = (u(1,0) * u(1,2) + u(2,0) * u(2,2)) / (g * u(0,0)); + u(0,2) += f * u(0,0); + u(1,2) += f * u(1,0); + u(2,2) += f * u(2,0); + + u(0,0) /= g; + u(1,0) /= g; + u(2,0) /= g; + } else { + u(0,0) = 0.0; + u(1,0) = 0.0; + u(2,0) = 0.0; + } + u(0,0) += 1.0; + + int i, l; + + + + // PART 4: Opti + + // k = 2; + for(int its=0; its eps*anorm) + { + l=1; + if(fabs(rv1.y) <= eps*anorm) + { + flag=false; + } + else if(fabs(w(0,0)) > eps*anorm) + { + l=0; + flag = false; + } + } + + if(flag) + { + c=0.0; + s=1.0; + for(i=l; i<3; i++) + { + f = s*rv1[i]; + rv1[i] = c*rv1[i]; + if(fabs(f) <= eps*anorm) + { + break; + } + g = w(i, i); + h = PYTHAG(f,g); + w(i, i) = h; + h = 1.0/h; + c = g*h; + s = -f*h; + for(j=0; j<3; j++) + { + y = u(j,l-1); + z = u(j,i); + u(j,l-1) = y*c+z*s; + u(j,i) = z*c-y*s; + } + } + } + z = w(2,2); + if(l == 2) + { + if(z < 0.0) + { + w(2, 2) = -z; + for (j=0; j<3; j++) + { + v(j,2) = -v(j,2); + } + } + break; + } + if(its == max_iterations - 1) + { + // std::cout << "no convergence in " << max_iterations << " svdcmp iterations" << std::endl; + // throw std::runtime_error("no convergence in max svdcmp iterations"); + } + x = w(l,l); + y = w(1,1); + g = rv1.y; + h = rv1.z; + f = ((y-z)*(y+z)+(g-h)*(g+h))/(2.f*h*y); + g = PYTHAG(f, 1.f); + f = ((x-z)*(x+z)+h*((y/(f+SIGN(g,f)))-h))/x; + c = s = 1.f; + for (j=l; j<2; j++) + { + i = j+1; + g = rv1[i]; + y = w(i, i); + h = s*g; + g = c*g; + z = PYTHAG(f,h); + rv1[j] = z; + c = f/z; + s = h/z; + f = x*c+g*s; + g = g*c-x*s; + h = y*s; + y *= c; + for(jj=0;jj<3;jj++) + { + x = v(jj,j); + z = v(jj,i); + v(jj,j) = x*c+z*s; + v(jj,i) = z*c-x*s; + } + z = PYTHAG(f,h); + w(j,j) = z; + if(z>0.0) + { + z = 1.f/z; + c = f*z; + s = h*z; + } + f = c*g+s*y; + x = c*y-s*g; + for (jj=0;jj<3;jj++) + { + y = u(jj,j); + z = u(jj,i); + u(jj,j) = y*c+z*s; + u(jj,i) = z*c-y*s; + } + } + rv1[l] = 0.f; + rv1.z = f; + w(2,2) = x; + } + + + + for(its=0; its eps*anorm) + { + l=0; + flag = false; + } + + if(flag) + { + c=0.0; + s=1.0; + for(i=l; i<2; i++) + { + f = s*rv1[i]; + rv1[i] = c*rv1[i]; + if(abs(f) <= eps*anorm) + { + break; + } + g = w(i,i); + h = PYTHAG(f,g); + w(i, i) = h; + h = 1.0/h; + c = g*h; + s = -f*h; + for(j=0; j<3; j++) + { + y = u(j,l-1); + z = u(j,i); + u(j,l-1) = y*c+z*s; + u(j,i) = z*c-y*s; + } + } + } + z = w(1,1); + if(l == 1) + { + if(z < 0.0) + { + w(1,1) = -z; + for (j=0; j<3; j++) + { + v(j,1) = -v(j,1); + } + } + break; + } + if(its == max_iterations - 1) + { + // std::cout << "no convergence in " << max_iterations << " svdcmp iterations" << std::endl; + // throw std::runtime_error("no convergence in max svdcmp iterations"); + } + + x = w(l, l); + y = w(0, 0); + g = rv1.x; + h = rv1.y; + f = ((y-z)*(y+z)+(g-h)*(g+h))/(2.f*h*y); + g = PYTHAG(f, 1.f); + f = ((x-z)*(x+z)+h*((y/(f+SIGN(g,f)))-h))/x; + c = s = 1.f; + + if(l == 0) + { + g = rv1.y; + y = w(1,1); + h = s*g; + g = c*g; + z = PYTHAG(f,h); + rv1.x = z; + c = f/z; + s = h/z; + f = x*c+g*s; + g = g*c-x*s; + h = y*s; + y *= c; + + + x = v(0,0); + z = v(0,1); + v(0,0) = x*c+z*s; + v(0,1) = z*c-x*s; + + x = v(1,0); + z = v(1,1); + v(1,0) = x*c+z*s; + v(1,1) = z*c-x*s; + + x = v(2,0); + z = v(2,1); + v(2,0) = x*c+z*s; + v(2,1) = z*c-x*s; + + + z = PYTHAG(f,h); + w(0,0) = z; + if(z>0.f) + { + z = 1.f/z; + c = f*z; + s = h*z; + } + f = c*g+s*y; + x = c*y-s*g; + for(jj=0; jj<3; jj++) + { + y = u(jj,0); + z = u(jj,1); + u(jj,0) = y*c+z*s; + u(jj,1) = z*c-y*s; + } + } + + rv1[l] = 0.f; + rv1.y = f; + w(1,1) = x; + } + + + z = w(0,0); + if (z < 0.0) + { + w(0,0) = -z; + v(0,0) = -v(0,0); + v(1,0) = -v(1,0); + v(2,0) = -v(2,0); + } +} + + +RMAGINE_DEVICE_FUNCTION +void svd( + const Matrix3x3& a, + Matrix3x3& u, + Vector3& w, + Matrix3x3& v) +{ + // TODO: test + // constexpr unsigned int m = 3; + // constexpr unsigned int n = 3; + constexpr unsigned int max_iterations = 20; + + // additional memory required + bool flag; + int its, j, jj; + float anorm, c, f, g, h, s, scale, x, y, z; + + Vector3 rv1 = Vector3::Zeros(); + + g = s = scale = anorm = 0.0; + float eps = __FLT_EPSILON__; + u = a; + + // FIRST PART + + // i = 0; + // l = 2; + scale = fabs(u(0,0)) + fabs(u(1,0)) + fabs(u(2,0)); + if(scale > 0.0) + { + u(0, 0) /= scale; + u(1, 0) /= scale; + u(2, 0) /= scale; + + s = u(0,0) * u(0,0) + u(1,0) * u(1,0) + u(2,0) * u(2,0); + f = u(0,0); + g = -SIGN(sqrt(s), f); + h = f * g - s; + + u(0, 0) = f - g; + + f = (u(0, 0) * u(0, 1) + u(1, 0) * u(1, 1) + u(2, 0) * u(2, 1)) / h; + u(0, 1) += f * u(0, 0); + u(1, 1) += f * u(1, 0); + u(2, 1) += f * u(2, 0); + + f = (u(0, 0) * u(0, 2) + u(1, 0) * u(1, 2) + u(2, 0) * u(2, 2)) / h; + u(0, 2) += f * u(0, 0); + u(1, 2) += f * u(1, 0); + u(2, 2) += f * u(2, 0); + + u(0, 0) *= scale; + u(1, 0) *= scale; + u(2, 0) *= scale; + } + + w.x = scale * g; + g = s = scale = 0.0; + + scale = abs(u(0,0)) + abs(u(0,1)) + abs(u(0,2)); + + if(scale > 0.0) + { + u(0, 1) /= scale; + u(0, 2) /= scale; + s = u(0,1) * u(0,1) + u(0,2) * u(0,2); + + f = u(0, 1); + g = -SIGN(sqrt(s),f); + h = f * g-s; + u(0, 1) = f - g; + + rv1.y = u(0, 1) / h; + rv1.z = u(0, 2) / h; + + s = u(1,1) * u(0,1) + u(1,2) * u(0,2); + u(1, 1) += s * rv1.y; + u(1, 2) += s * rv1.z; + + s = u(2,1) * u(0,1) + u(2,2) * u(0,2); + u(2, 1) += s * rv1.y; + u(2, 2) += s * rv1.z; + + u(0, 1) *= scale; + u(0, 2) *= scale; + } + + anorm = fabs(w.x); + // anorm = MAX(anorm, (fabs(w(0, 0)) + fabs(rv1.x))); // rv1.x is always 0 here, anorm too. fabs(X) >= 0 + + // i = 1; + // l = 3; + rv1.y = scale * g; + g = 0.0; + scale = fabs(u(1, 1)) + fabs(u(2, 1)); + + if(scale > 0.0) + { + u(1,1) /= scale; + u(2,1) /= scale; + + s = u(1,1) * u(1,1) + u(2,1) * u(2,1); + f = u(1,1); + g = -SIGN(sqrt(s),f); + h = f * g - s; + u(1,1) = f-g; + + f = (u(1,1) * u(1,2) + u(2,1) * u(2,2)) / h; + u(1,2) += f * u(1,1); + u(2,2) += f * u(2,1); + + u(1,1) *= scale; + u(2,1) *= scale; + } + + w.y = scale * g; + g = s = scale = 0.0; + + scale = abs(u(1,2)); + if(scale > 0.0) + { + u(1,2) /= scale; + s = u(1,2) * u(1,2); + + f = u(1, 2); + g = -SIGN(sqrt(s), f); + h = f * g - s; + u(1,2) = f - g; + + rv1.z = u(1,2) / h; + s = u(2,2) * u(1,2); + + u(2,2) += s * rv1.z; + u(1,2) *= scale; + } + + anorm = MAX(anorm, (abs(w.y) + abs(rv1.y))); + + rv1.z = scale * g; + + scale = abs(u(2, 2)); + if(scale > 0.0) + { + u(2, 2) /= scale; + s = u(2, 2) * u(2, 2); + f = u(2, 2); + g = -SIGN(sqrt(s),f); + h = f * g - s; + + u(2, 2) = f - g; + u(2, 2) *= scale; + } + + w.z = scale * g; + g = s = scale = 0.0; + + anorm = MAX(anorm, (abs(w.z)+abs(rv1.z))); + + // SECOND PART + v(2, 2) = 1.0; + g = rv1.z; + + // i = 1; + // l = 2; + if(fabs(g) > 0.0) + { + v(2,1) = (u(1,2) / u(1,2)) / g; + s = u(1,2) * v(2,2); + v(2,2) += s * v(2,1); + } + v(1,2) = 0.0; + v(2,1) = 0.0; + v(1,1) = 1.0; + + g = rv1.y; + + // l = 1; + // i = 0; + if(fabs(g) > 0.0) + { + v(1,0) = (u(0,1) / u(0,1)) / g; + v(2,0) = (u(0,2) / u(0,1)) / g; + + s = u(0,1) * v(1,1) + u(0,2) * v(2,1); + v(1,1) += s * v(1,0); + v(2,1) += s * v(2,0); + + s = u(0,1) * v(1,2) + u(0,2) * v(2,2); + v(1,2) += s * v(1,0); + v(2,2) += s * v(2,0); + } + v(0,1) = 0.0; + v(1,0) = 0.0; + v(0,2) = 0.0; + v(2,0) = 0.0; + v(0,0) = 1.0; + g = rv1.x; + + + // THIRD PART + + // i = 2; + // l = 3; + g = w.z; + if(fabs(g) > 0.0) + { + u(2,2) /= g; + } else { + // TODO(amock): shouldnt this be a large number? + u(2,2) = 0.0; + } + u(2,2) += 1.0; + + // i = 1; + // l = 2; + + g = w.y; + u(1,2) = 0.0; + + if(fabs(g) > 0.0) + { + g = 1.0/g; + s = u(2,1) * u(2,2); + f = (s/u(1,1)) * g; + + u(1,2) += f * u(1,1); + u(2,2) += f * u(2,1); + + u(1,1) *= g; + u(2,1) *= g; + } else { + u(1,1) = 0.0; + u(2,1) = 0.0; + } + u(1,1) += 1.0; + + // i = 0; + // l = 1; + g = w.x; + u(0,1) = 0.0; + u(0,2) = 0.0; + + if(fabs(g) > 0.0) + { + f = (u(1,0) * u(1,1) + u(2,0) * u(2,1)) / (g * u(0,0)); + u(0,1) += f * u(0,0); + u(1,1) += f * u(1,0); + u(2,1) += f * u(2,0); + + f = (u(1,0) * u(1,2) + u(2,0) * u(2,2)) / (g * u(0,0)); + u(0,2) += f * u(0,0); + u(1,2) += f * u(1,0); + u(2,2) += f * u(2,0); + + u(0,0) /= g; + u(1,0) /= g; + u(2,0) /= g; + } else { + u(0,0) = 0.0; + u(1,0) = 0.0; + u(2,0) = 0.0; + } + u(0,0) += 1.0; + + int i, l; + + // PART 4: Opti + + // k = 2; + for(its=0; its eps*anorm) + // { + // l = 1; + // if(MIN(fabs(rv1.y),abs(w(0,0))) > eps*anorm) + // { + // l = 0; + // } + // } + + flag=true; + l=2; + if(abs(rv1.z) <= eps*anorm) + { + flag=false; + } + else if(abs(w.y) > eps*anorm) + { + l=1; + if(abs(rv1.y) <= eps*anorm) + { + flag=false; + } + else if(abs(w.x) > eps*anorm) + { + l=0; + flag = false; + } + } + + if(flag) + { + c=0.0; + s=1.0; + for(i=l; i<3; i++) + { + f = s*rv1[i]; + rv1[i] = c*rv1[i]; + if(abs(f) <= eps*anorm) + { + break; + } + g = w(i); + h = PYTHAG(f,g); + w(i) = h; + h = 1.0/h; + c = g*h; + s = -f*h; + for(j=0; j<3; j++) + { + y = u(j,l-1); + z = u(j,i); + u(j,l-1) = y*c+z*s; + u(j,i) = z*c-y*s; + } + } + } + z = w.z; + if(l == 2) + { + if(z < 0.0) + { + w.z = -z; + v(0,2) = -v(0,2); + v(1,2) = -v(1,2); + v(2,2) = -v(2,2); + } + break; + } + if(its == max_iterations - 1) + { + // std::cout << "no convergence in " << max_iterations << " svdcmp iterations" << std::endl; + // throw std::runtime_error("no convergence in max svdcmp iterations"); + } + x = w(l); + y = w.y; + g = rv1.y; + h = rv1.z; + f = ((y-z)*(y+z)+(g-h)*(g+h))/(2.f*h*y); + g = PYTHAG(f, 1.f); + f = ((x-z)*(x+z)+h*((y/(f+SIGN(g,f)))-h))/x; + c = s = 1.f; + for (j=l; j<2; j++) + { + i = j+1; + g = rv1[i]; + y = w(i); + h = s*g; + g = c*g; + z = PYTHAG(f,h); + rv1[j] = z; + c = f/z; + s = h/z; + f = x*c+g*s; + g = g*c-x*s; + h = y*s; + y *= c; + for(jj=0;jj<3;jj++) + { + x = v(jj,j); + z = v(jj,i); + v(jj,j) = x*c+z*s; + v(jj,i) = z*c-x*s; + } + z = PYTHAG(f,h); + w(j) = z; + if(z>0.0) + { + z = 1.f/z; + c = f*z; + s = h*z; + } + f = c*g+s*y; + x = c*y-s*g; + for (jj=0;jj<3;jj++) + { + y = u(jj,j); + z = u(jj,i); + u(jj,j) = y*c+z*s; + u(jj,i) = z*c-y*s; + } + } + rv1[l] = 0.f; + rv1.z = f; + w.z = x; + } + + + for(its=0; its eps*anorm) + { + l=0; + flag = false; + } + + if(flag) + { + c=0.0; + s=1.0; + for(i=l; i<2; i++) + { + f = s*rv1[i]; + rv1[i] = c*rv1[i]; + if(abs(f) <= eps*anorm) + { + break; + } + g = w(i); + h = PYTHAG(f,g); + w(i) = h; + h = 1.0/h; + c = g*h; + s = -f*h; + for(j=0; j<3; j++) + { + y = u(j,l-1); + z = u(j,i); + u(j,l-1) = y*c+z*s; + u(j,i) = z*c-y*s; + } + } + } + z = w.y; + if(l == 1) + { + if(z < 0.0) + { + w.y = -z; + for (j=0; j<3; j++) + { + v(j,1) = -v(j,1); + } + } + break; + } + if(its == max_iterations - 1) + { + // std::cout << "no convergence in " << max_iterations << " svdcmp iterations" << std::endl; + // throw std::runtime_error("no convergence in max svdcmp iterations"); + } + + x = w(l); + y = w.x; + g = rv1.x; + h = rv1.y; + f = ((y-z)*(y+z)+(g-h)*(g+h))/(2.f*h*y); + g = PYTHAG(f, 1.f); + f = ((x-z)*(x+z)+h*((y/(f+SIGN(g,f)))-h))/x; + c = s = 1.f; + + if(l == 0) + { + g = rv1.y; + y = w.y; + h = s*g; + g = c*g; + z = PYTHAG(f,h); + rv1.x = z; + c = f/z; + s = h/z; + f = x*c+g*s; + g = g*c-x*s; + h = y*s; + y *= c; + + + x = v(0,0); + z = v(0,1); + v(0,0) = x*c+z*s; + v(0,1) = z*c-x*s; + + x = v(1,0); + z = v(1,1); + v(1,0) = x*c+z*s; + v(1,1) = z*c-x*s; + + x = v(2,0); + z = v(2,1); + v(2,0) = x*c+z*s; + v(2,1) = z*c-x*s; + + + z = PYTHAG(f,h); + w.x = z; + if(z>0.f) + { + z = 1.f/z; + c = f*z; + s = h*z; + } + f = c*g+s*y; + x = c*y-s*g; + for(jj=0; jj<3; jj++) + { + y = u(jj,0); + z = u(jj,1); + u(jj,0) = y*c+z*s; + u(jj,1) = z*c-y*s; + } + } + + rv1[l] = 0.f; + rv1.y = f; + w.y = x; + } + + + z = w.x; + if (z < 0.0) + { + w.x = -z; + v(0,0) = -v(0,0); + v(1,0) = -v(1,0); + v(2,0) = -v(2,0); + } +} + +} // namespace rmagine diff --git a/src/rmagine_cuda/src/math/math.cu b/src/rmagine_cuda/src/math/math.cu index e06d3baa..ec02282f 100644 --- a/src/rmagine_cuda/src/math/math.cu +++ b/src/rmagine_cuda/src/math/math.cu @@ -1,10 +1,13 @@ #include "rmagine/math/math.cuh" #include "rmagine/math/math.h" #include "rmagine/math/types.h" +#include "rmagine/math/linalg.h" +#include "rmagine/util/cuda/CudaDebug.hpp" namespace rmagine { + //////// // Generic Kernel /// @@ -508,6 +511,7 @@ void multNxN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; multNxN_kernel<<>>(A.raw(), B.raw(), C.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory multNxN( @@ -528,6 +532,7 @@ void multNxN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; multNxN_kernel<<>>(A.raw(), b.raw(), c.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory multNxN( @@ -548,6 +553,7 @@ void multNxN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (T1.size() + blockSize - 1) / blockSize; multNxN_kernel<<>>(T1.raw(), T2.raw(), Tr.raw(), T1.size()); + RM_CUDA_DEBUG(); } Memory multNxN( @@ -567,6 +573,7 @@ void multNxN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (T.size() + blockSize - 1) / blockSize; multNxN_kernel<<>>(T.raw(), x.raw(), c.raw(), T.size()); + RM_CUDA_DEBUG(); } Memory multNxN( @@ -586,6 +593,7 @@ void multNxN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (M1.size() + blockSize - 1) / blockSize; multNxN_kernel<<>>(M1.raw(), M2.raw(), Mr.raw(), M1.size()); + RM_CUDA_DEBUG(); } Memory multNxN( @@ -605,6 +613,7 @@ void multNxN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (M1.size() + blockSize - 1) / blockSize; multNxN_conv_kernel<<>>(M1.raw(), M2.raw(), Qres.raw(), M1.size()); + RM_CUDA_DEBUG(); } void multNxN( @@ -615,6 +624,7 @@ void multNxN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (M.size() + blockSize - 1) / blockSize; multNxN_kernel<<>>(M.raw(), x.raw(), c.raw(), M.size()); + RM_CUDA_DEBUG(); } Memory multNxN( @@ -637,6 +647,7 @@ void multNx1( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; multNxN_kernel<<>>(A.raw(), b.raw(), C.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory multNx1( @@ -657,6 +668,7 @@ void multNx1( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; multNx1_kernel<<>>(A.raw(), b.raw(), C.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory multNx1( @@ -676,6 +688,7 @@ void multNx1( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (T1.size() + blockSize - 1) / blockSize; multNx1_kernel<<>>(T1.raw(), t2.raw(), Tr.raw(), T1.size()); + RM_CUDA_DEBUG(); } Memory multNx1( @@ -695,6 +708,7 @@ void multNx1( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (T.size() + blockSize - 1) / blockSize; multNx1_kernel<<>>(T.raw(), x.raw(), c.raw(), T.size()); + RM_CUDA_DEBUG(); } Memory multNx1( @@ -714,6 +728,7 @@ void multNx1( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (M1.size() + blockSize - 1) / blockSize; multNx1_kernel<<>>(M1.raw(), m2.raw(), Mr.raw(), M1.size()); + RM_CUDA_DEBUG(); } Memory multNx1( @@ -733,6 +748,7 @@ void multNx1( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (M.size() + blockSize - 1) / blockSize; multNx1_kernel<<>>(M.raw(), x.raw(), C.raw(), M.size()); + RM_CUDA_DEBUG(); } Memory multNx1( @@ -752,6 +768,7 @@ void multNx1( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (M.size() + blockSize - 1) / blockSize; multNx1_kernel<<>>(M.raw(), x.raw(), C.raw(), M.size()); + RM_CUDA_DEBUG(); } Memory multNx1( @@ -774,6 +791,7 @@ void mult1xN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (B.size() + blockSize - 1) / blockSize; mult1xN_kernel<<>>(a.raw(), B.raw(), C.raw(), B.size()); + RM_CUDA_DEBUG(); } Memory mult1xN( @@ -793,6 +811,7 @@ void mult1xN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (B.size() + blockSize - 1) / blockSize; mult1xN_kernel<<>>(a.raw(), B.raw(), C.raw(), B.size()); + RM_CUDA_DEBUG(); } Memory mult1xN( @@ -812,6 +831,7 @@ void mult1xN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (T2.size() + blockSize - 1) / blockSize; mult1xN_kernel<<>>(t1.raw(), T2.raw(), Tr.raw(), T2.size()); + RM_CUDA_DEBUG(); } Memory mult1xN( @@ -831,6 +851,7 @@ void mult1xN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (X.size() + blockSize - 1) / blockSize; mult1xN_kernel<<>>(t.raw(), X.raw(), C.raw(), X.size()); + RM_CUDA_DEBUG(); } Memory mult1xN( @@ -850,6 +871,7 @@ void mult1xN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (M2.size() + blockSize - 1) / blockSize; mult1xN_kernel<<>>(m1.raw(), M2.raw(), Mr.raw(), M2.size()); + RM_CUDA_DEBUG(); } Memory mult1xN( @@ -869,6 +891,7 @@ void mult1xN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (X.size() + blockSize - 1) / blockSize; mult1xN_kernel<<>>(m.raw(), X.raw(), C.raw(), X.size()); + RM_CUDA_DEBUG(); } Memory mult1xN( @@ -888,6 +911,7 @@ void mult1xN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (X.size() + blockSize - 1) / blockSize; mult1xN_kernel<<>>(m.raw(), X.raw(), C.raw(), X.size()); + RM_CUDA_DEBUG(); } Memory mult1xN( @@ -909,6 +933,7 @@ void addNxN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; addNxN_kernel<<>>(A.raw(), B.raw(), C.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory addNxN( @@ -928,6 +953,7 @@ void addNxN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; addNxN_kernel<<>>(A.raw(), B.raw(), C.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory addNxN( @@ -950,6 +976,7 @@ void subNxN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; subNxN_kernel<<>>(A.raw(), B.raw(), C.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory subNxN( @@ -969,6 +996,7 @@ void subNx1( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; subNx1_kernel<<>>(A.raw(), b.raw(), C.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory subNx1( @@ -989,6 +1017,7 @@ void transpose( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; transpose_kernel<<>>(A.raw(), B.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory transpose( @@ -1006,6 +1035,7 @@ void transpose( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; transpose_kernel<<>>(A.raw(), B.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory transpose( @@ -1024,6 +1054,7 @@ void transposeInplace( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; transposeInplace_kernel<<>>(A.raw(), A.size()); + RM_CUDA_DEBUG(); } ////// @@ -1035,6 +1066,7 @@ void invert( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; invert_kernel<<>>(A.raw(), B.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory invert( @@ -1052,6 +1084,7 @@ void invert( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; invert_kernel<<>>(A.raw(), B.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory invert( @@ -1069,6 +1102,7 @@ void invert( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; invert_kernel<<>>(A.raw(), B.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory invert( @@ -1089,6 +1123,7 @@ void divNxN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; divNxN_conv_kernel<<>>(A.raw(), B.raw(), C.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory divNxN( @@ -1108,6 +1143,7 @@ void divNxN( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; divNxN_conv_kernel<<>>(A.raw(), B.raw(), C.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory divNxN( @@ -1129,6 +1165,7 @@ void divNxNIgnoreZeros( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; divNxNIgnoreZeros_conv_kernel<<>>(A.raw(), B.raw(), C.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory divNxNIgnoreZeros( @@ -1148,6 +1185,7 @@ void divNxNIgnoreZeros( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; divNxNIgnoreZeros_conv_kernel<<>>(A.raw(), B.raw(), C.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory divNxNIgnoreZeros( @@ -1167,6 +1205,7 @@ void divNxNIgnoreZeros( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; divNxNIgnoreZeros_conv_kernel<<>>(A.raw(), B.raw(), C.raw(), A.size()); + RM_CUDA_DEBUG(); } Memory divNxNIgnoreZeros( @@ -1187,6 +1226,7 @@ void divNxNInplace( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; divNxNInplace_kernel<<>>(A.raw(), B.raw(), A.size()); + RM_CUDA_DEBUG(); } void divNxNInplace( @@ -1207,6 +1247,7 @@ void divNx1Inplace( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize; divNx1Inplace_kernel<<>>(A.raw(), B, A.size()); + RM_CUDA_DEBUG(); } void divNx1Inplace( @@ -1228,6 +1269,7 @@ void convert( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (from.size() + blockSize - 1) / blockSize; convert_kernel<<>>(from.raw(), to.raw(), from.size()); + RM_CUDA_DEBUG(); } void convert( @@ -1245,6 +1287,7 @@ void copy(const MemoryView& from, constexpr unsigned int blockSize = 64; const unsigned int gridSize = (from.size() + blockSize - 1) / blockSize; convert_kernel<<>>(from.raw(), to.raw(), from.size()); + RM_CUDA_DEBUG(); } //////// @@ -1267,6 +1310,7 @@ void pack( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (R.size() + blockSize - 1) / blockSize; pack_kernel<<>>(R.raw(), t.raw(), T.raw(), R.size()); + RM_CUDA_DEBUG(); } //////// @@ -1279,6 +1323,7 @@ void multNxNTransposed( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (m1.size() + blockSize - 1) / blockSize; covParts_kernel<<>>(m1.raw(), m2.raw(), Cs.raw(), m1.size()); + RM_CUDA_DEBUG(); } Memory multNxNTransposed( @@ -1299,6 +1344,7 @@ void multNxNTransposed( constexpr unsigned int blockSize = 64; const unsigned int gridSize = (m1.size() + blockSize - 1) / blockSize; covParts_kernel<<>>(m1.raw(), m2.raw(), mask.raw(), Cs.raw(), m1.size()); + RM_CUDA_DEBUG(); } Memory multNxNTransposed( @@ -1318,6 +1364,7 @@ void normalizeInplace(MemoryView& q) constexpr unsigned int blockSize = 1024; const unsigned int gridSize = (q.size() + blockSize - 1) / blockSize; normalizeInplace_kernel<<>>(q.raw(), q.size()); + RM_CUDA_DEBUG(); } /////// @@ -1340,6 +1387,7 @@ void setIdentity(MemoryView& qs) constexpr unsigned int blockSize = 1024; const unsigned int gridSize = (qs.size() + blockSize - 1) / blockSize; setIdentity_kernel<<>>(qs.raw(), qs.size()); + RM_CUDA_DEBUG(); } void setIdentity(MemoryView& Ts) @@ -1347,6 +1395,7 @@ void setIdentity(MemoryView& Ts) constexpr unsigned int blockSize = 1024; const unsigned int gridSize = (Ts.size() + blockSize - 1) / blockSize; setIdentity_kernel<<>>(Ts.raw(), Ts.size()); + RM_CUDA_DEBUG(); } void setIdentity(MemoryView& Ms) @@ -1354,6 +1403,7 @@ void setIdentity(MemoryView& Ms) constexpr unsigned int blockSize = 1024; const unsigned int gridSize = (Ms.size() + blockSize - 1) / blockSize; setIdentity_kernel<<>>(Ms.raw(), Ms.size()); + RM_CUDA_DEBUG(); } void setIdentity(MemoryView& Ms) @@ -1361,6 +1411,7 @@ void setIdentity(MemoryView& Ms) constexpr unsigned int blockSize = 1024; const unsigned int gridSize = (Ms.size() + blockSize - 1) / blockSize; setIdentity_kernel<<>>(Ms.raw(), Ms.size()); + RM_CUDA_DEBUG(); } void setZeros(MemoryView& Ms) @@ -1381,6 +1432,7 @@ void sum( MemoryView& s) { sum_kernel<1024> <<<1, 1024>>>(data.raw(), s.raw(), data.size() ); + RM_CUDA_DEBUG(); } Memory sum( @@ -1417,6 +1469,7 @@ void cov( MemoryView& C) { cov_kernel<1024> <<<1, 1024>>>(v1.raw(), v2.raw(), C.raw(), v1.size() ); + RM_CUDA_DEBUG(); } Memory cov( @@ -1430,4 +1483,61 @@ Memory cov( } +__global__ void svd_kernel( + const Matrix3x3* As, + Matrix3x3* Us, + Matrix3x3* Ws, + Matrix3x3* Vs, + unsigned int N) +{ + const unsigned int id = blockIdx.x * blockDim.x + threadIdx.x; + // printf("hello\n"); + if(id < N) + { + svd(As[id], Us[id], Ws[id], Vs[id]); + } +} + +void svd( + const MemoryView& As, + MemoryView& Us, + MemoryView& Ws, + MemoryView& Vs +) +{ + // std::cout << "SVD CUDA!" << std::endl; + constexpr unsigned int blockSize = 512; + const unsigned int gridSize = (As.size() + blockSize - 1) / blockSize; + svd_kernel<<>>(As.raw(), Us.raw(), Ws.raw(), Vs.raw(), As.size()); + RM_CUDA_DEBUG(); +} + +__global__ void svd_kernel( + const Matrix3x3* As, + Matrix3x3* Us, + Vector3* ws, + Matrix3x3* Vs, + unsigned int N) +{ + const unsigned int id = blockIdx.x * blockDim.x + threadIdx.x; + // printf("hello\n"); + if(id < N) + { + svd(As[id], Us[id], ws[id], Vs[id]); + } +} + +void svd( + const MemoryView& As, + MemoryView& Us, + MemoryView& ws, + MemoryView& Vs +) +{ + constexpr unsigned int blockSize = 512; + const unsigned int gridSize = (As.size() + blockSize - 1) / blockSize; + svd_kernel<<>>(As.raw(), Us.raw(), ws.raw(), Vs.raw(), As.size()); + RM_CUDA_DEBUG(); +} + } // namespace rmagine \ No newline at end of file diff --git a/src/rmagine_embree/include/rmagine/map/embree/EmbreeMesh.hpp b/src/rmagine_embree/include/rmagine/map/embree/EmbreeMesh.hpp index ea0a580f..fe7925c4 100644 --- a/src/rmagine_embree/include/rmagine/map/embree/EmbreeMesh.hpp +++ b/src/rmagine_embree/include/rmagine/map/embree/EmbreeMesh.hpp @@ -92,8 +92,6 @@ class EmbreeMesh void initVertexNormals(); - bool closestPointFunc2(RTCPointQueryFunctionArguments* args); - // PUBLIC ATTRIBUTES MemoryView faces() const; MemoryView vertices() const; @@ -103,9 +101,6 @@ class EmbreeMesh MemoryView verticesTransformed() const; MemoryView faceNormalsTransformed() const; - - - void computeFaceNormals(); /** @@ -137,10 +132,6 @@ class EmbreeMesh Vertex* m_vertices_transformed; Memory m_face_normals_transformed; Memory m_vertex_normals_transformed; - - // boost::function m_closest_point_func; - // RTCPointQueryFunction* m_closest_point_func_raw; - }; using EmbreeMeshPtr = std::shared_ptr; diff --git a/src/rmagine_embree/include/rmagine/simulation/O1DnSimulatorEmbree.hpp b/src/rmagine_embree/include/rmagine/simulation/O1DnSimulatorEmbree.hpp index 714942ec..9892808e 100644 --- a/src/rmagine_embree/include/rmagine/simulation/O1DnSimulatorEmbree.hpp +++ b/src/rmagine_embree/include/rmagine/simulation/O1DnSimulatorEmbree.hpp @@ -112,34 +112,6 @@ class O1DnSimulatorEmbree { void setModel(const MemoryView, RAM>& model); void setModel(const O1DnModel_& model); - // Generic Version - template - void simulate(const MemoryView& Tbm, - BundleT& ret) const; - - template - BundleT simulate(const MemoryView& Tbm) const; - - - - // [[deprecated("Use simulate() instead.")]] - void simulateRanges( - const MemoryView& Tbm, - MemoryView& ranges) const; - - // [[deprecated("Use simulate() instead.")]] - Memory simulateRanges( - const MemoryView& Tbm) const; - - // [[deprecated("Use simulate() instead.")]] - void simulateHits( - const MemoryView& Tbm, - MemoryView& hits) const; - - // [[deprecated("Use simulate() instead.")]] - Memory simulateHits( - const MemoryView& Tbm) const; - inline Memory, RAM> model() const { return m_model; @@ -150,6 +122,41 @@ class O1DnSimulatorEmbree { return m_map; } + + /** + * @brief Simulate from one pose + * + * @tparam BundleT + * @param Tbm + * @param ret + */ + template + void simulate(const Transform& Tbm, BundleT& ret) const; + + template + BundleT simulate(const Transform& Tbm) const; + + /** + * @brief Simulate for multiple poses at once + * + * @tparam BundleT + * @param Tbm + * @param ret + */ + template + void simulate(const MemoryView& Tbm, + BundleT& ret) const; + + template + void simulate(const MemoryView& Tbm, + BundleT& ret) const; + + template + BundleT simulate(const MemoryView& Tbm) const; + + template + BundleT simulate(const MemoryView& Tbm) const; + protected: EmbreeMapPtr m_map; diff --git a/src/rmagine_embree/include/rmagine/simulation/O1DnSimulatorEmbree.tcc b/src/rmagine_embree/include/rmagine/simulation/O1DnSimulatorEmbree.tcc index 7e1598c3..63ccd223 100644 --- a/src/rmagine_embree/include/rmagine/simulation/O1DnSimulatorEmbree.tcc +++ b/src/rmagine_embree/include/rmagine/simulation/O1DnSimulatorEmbree.tcc @@ -12,15 +12,46 @@ namespace rmagine { +template +void O1DnSimulatorEmbree::simulate( + const Transform& Tbm, + BundleT& ret) const +{ + // TODO: change parallelization scheme for single simulations? + const MemoryView Tbm_mem(&Tbm, 1); + simulate(Tbm_mem, ret); +} + +template +BundleT O1DnSimulatorEmbree::simulate( + const Transform& Tbm) const +{ + BundleT res; + resize_memory_bundle(res, m_model->getWidth(), m_model->getHeight(), 1); + simulate(Tbm, res); + return res; +} template void O1DnSimulatorEmbree::simulate( const MemoryView& Tbm, BundleT& ret) const +{ + const MemoryView Tbm_const(Tbm.raw(), Tbm.size()); + simulate(Tbm_const, ret); +} + +template +void O1DnSimulatorEmbree::simulate( + const MemoryView& Tbm, + BundleT& ret) const { SimulationFlags flags = SimulationFlags::Zero(); set_simulation_flags_(ret, flags); + const float range_min = m_model->range.min; + const float range_max = m_model->range.max; + #pragma omp parallel for for(size_t pid = 0; pid < Tbm.size(); pid++) { @@ -52,8 +83,8 @@ void O1DnSimulatorEmbree::simulate( rayhit.ray.dir_x = ray_dir_m.x; rayhit.ray.dir_y = ray_dir_m.y; rayhit.ray.dir_z = ray_dir_m.z; - rayhit.ray.tnear = 0; - rayhit.ray.tfar = std::numeric_limits::infinity(); + rayhit.ray.tnear = 0; // if set to m_model->range.min we would scan through near occlusions + rayhit.ray.tfar = range_max; rayhit.ray.mask = -1; rayhit.ray.flags = 0; rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; @@ -67,7 +98,12 @@ void O1DnSimulatorEmbree::simulate( { if(flags.hits) { - ret.Hits::hits[glob_id] = 1; + if(rayhit.ray.tfar >= range_min) + { + ret.Hits::hits[glob_id] = 1; + } else { + ret.Hits::hits[glob_id] = 0; + } } } @@ -83,7 +119,7 @@ void O1DnSimulatorEmbree::simulate( { if(flags.points) { - Vector pint = ray_dir_s * rayhit.ray.tfar; + Vector pint = ray_dir_s * rayhit.ray.tfar + ray_orig_s; ret.Points::points[glob_id] = pint; } } @@ -217,4 +253,14 @@ BundleT O1DnSimulatorEmbree::simulate( return res; } +template +BundleT O1DnSimulatorEmbree::simulate( + const MemoryView& Tbm) const +{ + BundleT res; + resize_memory_bundle(res, m_model->getWidth(), m_model->getHeight(), Tbm.size()); + simulate(Tbm, res); + return res; +} + } // namespace rmagine \ No newline at end of file diff --git a/src/rmagine_embree/include/rmagine/simulation/OnDnSimulatorEmbree.hpp b/src/rmagine_embree/include/rmagine/simulation/OnDnSimulatorEmbree.hpp index ccfe7ffb..af2015c6 100644 --- a/src/rmagine_embree/include/rmagine/simulation/OnDnSimulatorEmbree.hpp +++ b/src/rmagine_embree/include/rmagine/simulation/OnDnSimulatorEmbree.hpp @@ -112,33 +112,6 @@ class OnDnSimulatorEmbree { void setModel(const OnDnModel_& model); void setModel(const MemoryView, RAM>& model); - - // Generic Version - template - void simulate(const MemoryView& Tbm, - BundleT& ret) const; - - template - BundleT simulate(const MemoryView& Tbm) const; - - // [[deprecated("Use simulate() instead.")]] - void simulateRanges( - const MemoryView& Tbm, - MemoryView& ranges) const; - - // [[deprecated("Use simulate() instead.")]] - Memory simulateRanges( - const MemoryView& Tbm) const; - - // [[deprecated("Use simulate() instead.")]] - void simulateHits( - const MemoryView& Tbm, - MemoryView& hits) const; - - // [[deprecated("Use simulate() instead.")]] - Memory simulateHits( - const MemoryView& Tbm) const; - inline Memory, RAM> model() const { return m_model; @@ -149,6 +122,41 @@ class OnDnSimulatorEmbree { return m_map; } + /** + * @brief Simulate from one pose + * + * @tparam BundleT + * @param Tbm + * @param ret + */ + template + void simulate(const Transform& Tbm, BundleT& ret) const; + + template + BundleT simulate(const Transform& Tbm) const; + + /** + * @brief Simulate for multiple poses at once + * + * @tparam BundleT + * @param Tbm + * @param ret + */ + template + void simulate(const MemoryView& Tbm, + BundleT& ret) const; + + template + void simulate(const MemoryView& Tbm, + BundleT& ret) const; + + template + BundleT simulate(const MemoryView& Tbm) const; + + template + BundleT simulate(const MemoryView& Tbm) const; + + protected: EmbreeMapPtr m_map; diff --git a/src/rmagine_embree/include/rmagine/simulation/OnDnSimulatorEmbree.tcc b/src/rmagine_embree/include/rmagine/simulation/OnDnSimulatorEmbree.tcc index e003ff6b..9a875be0 100644 --- a/src/rmagine_embree/include/rmagine/simulation/OnDnSimulatorEmbree.tcc +++ b/src/rmagine_embree/include/rmagine/simulation/OnDnSimulatorEmbree.tcc @@ -13,13 +13,45 @@ namespace rmagine { +template +void OnDnSimulatorEmbree::simulate( + const Transform& Tbm, + BundleT& ret) const +{ + // TODO: change parallelization scheme for single simulations? + const MemoryView Tbm_mem(&Tbm, 1); + simulate(Tbm_mem, ret); +} + +template +BundleT OnDnSimulatorEmbree::simulate( + const Transform& Tbm) const +{ + BundleT res; + resize_memory_bundle(res, m_model->getWidth(), m_model->getHeight(), 1); + simulate(Tbm, res); + return res; +} + template void OnDnSimulatorEmbree::simulate( const MemoryView& Tbm, BundleT& ret) const +{ + const MemoryView Tbm_const(Tbm.raw(), Tbm.size()); + simulate(Tbm_const, ret); +} + +template +void OnDnSimulatorEmbree::simulate( + const MemoryView& Tbm, + BundleT& ret) const { SimulationFlags flags = SimulationFlags::Zero(); set_simulation_flags_(ret, flags); + + const float range_min = m_model->range.min; + const float range_max = m_model->range.max; #pragma omp parallel for for(size_t pid = 0; pid < Tbm.size(); pid++) @@ -31,7 +63,7 @@ void OnDnSimulatorEmbree::simulate( const Transform Tms_ = Tsm_.inv(); const unsigned int glob_shift = pid * m_model->size(); - + for(unsigned int vid = 0; vid < m_model->getHeight(); vid++) { for(unsigned int hid = 0; hid < m_model->getWidth(); hid++) @@ -53,7 +85,7 @@ void OnDnSimulatorEmbree::simulate( rayhit.ray.dir_y = ray_dir_m.y; rayhit.ray.dir_z = ray_dir_m.z; rayhit.ray.tnear = 0; - rayhit.ray.tfar = std::numeric_limits::infinity(); + rayhit.ray.tfar = range_max; rayhit.ray.mask = -1; rayhit.ray.flags = 0; rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; @@ -67,7 +99,12 @@ void OnDnSimulatorEmbree::simulate( { if(flags.hits) { - ret.Hits::hits[glob_id] = 1; + if(rayhit.ray.tfar >= range_min) + { + ret.Hits::hits[glob_id] = 1; + } else { + ret.Hits::hits[glob_id] = 0; + } } } @@ -83,7 +120,7 @@ void OnDnSimulatorEmbree::simulate( { if(flags.points) { - Vector pint = ray_dir_s * rayhit.ray.tfar; + Vector pint = ray_dir_s * rayhit.ray.tfar + ray_orig_s; ret.Points::points[glob_id] = pint; } } @@ -217,4 +254,14 @@ BundleT OnDnSimulatorEmbree::simulate( return res; } +template +BundleT OnDnSimulatorEmbree::simulate( + const MemoryView& Tbm) const +{ + BundleT res; + resize_memory_bundle(res, m_model->getWidth(), m_model->getHeight(), Tbm.size()); + simulate(Tbm, res); + return res; +} + } // namespace rmagine \ No newline at end of file diff --git a/src/rmagine_embree/include/rmagine/simulation/PinholeSimulatorEmbree.hpp b/src/rmagine_embree/include/rmagine/simulation/PinholeSimulatorEmbree.hpp index d940fb5e..04e7aab8 100644 --- a/src/rmagine_embree/include/rmagine/simulation/PinholeSimulatorEmbree.hpp +++ b/src/rmagine_embree/include/rmagine/simulation/PinholeSimulatorEmbree.hpp @@ -116,32 +116,6 @@ class PinholeSimulatorEmbree { void setModel(const MemoryView& model); void setModel(const PinholeModel& model); - // Generic Version - template - void simulate(const MemoryView& Tbm, - BundleT& ret) const; - - template - BundleT simulate(const MemoryView& Tbm) const; - - // [[deprecated("Use simulate() instead.")]] - void simulateRanges( - const MemoryView& Tbm, - MemoryView& ranges) const; - - // [[deprecated("Use simulate() instead.")]] - Memory simulateRanges( - const MemoryView& Tbm) const; - - // [[deprecated("Use simulate() instead.")]] - void simulateHits( - const MemoryView& Tbm, - MemoryView& hits) const; - - // [[deprecated("Use simulate() instead.")]] - Memory simulateHits( - const MemoryView& Tbm) const; - inline Memory model() const { return m_model; @@ -152,6 +126,40 @@ class PinholeSimulatorEmbree { return m_map; } + /** + * @brief Simulate from one pose + * + * @tparam BundleT + * @param Tbm + * @param ret + */ + template + void simulate(const Transform& Tbm, BundleT& ret) const; + + template + BundleT simulate(const Transform& Tbm) const; + + /** + * @brief Simulate for multiple poses at once + * + * @tparam BundleT + * @param Tbm + * @param ret + */ + template + void simulate(const MemoryView& Tbm, + BundleT& ret) const; + + template + void simulate(const MemoryView& Tbm, + BundleT& ret) const; + + template + BundleT simulate(const MemoryView& Tbm) const; + + template + BundleT simulate(const MemoryView& Tbm) const; + protected: EmbreeMapPtr m_map; diff --git a/src/rmagine_embree/include/rmagine/simulation/PinholeSimulatorEmbree.tcc b/src/rmagine_embree/include/rmagine/simulation/PinholeSimulatorEmbree.tcc index 956953c9..0e563b2b 100644 --- a/src/rmagine_embree/include/rmagine/simulation/PinholeSimulatorEmbree.tcc +++ b/src/rmagine_embree/include/rmagine/simulation/PinholeSimulatorEmbree.tcc @@ -13,14 +13,46 @@ namespace rmagine { +template +void PinholeSimulatorEmbree::simulate( + const Transform& Tbm, + BundleT& ret) const +{ + // TODO: change parallelization scheme for single simulations? + const MemoryView Tbm_mem(&Tbm, 1); + simulate(Tbm_mem, ret); +} + +template +BundleT PinholeSimulatorEmbree::simulate( + const Transform& Tbm) const +{ + BundleT res; + resize_memory_bundle(res, m_model->getWidth(), m_model->getHeight(), 1); + simulate(Tbm, res); + return res; +} + template void PinholeSimulatorEmbree::simulate( const MemoryView& Tbm, BundleT& ret) const +{ + const MemoryView Tbm_const(Tbm.raw(), Tbm.size()); + simulate(Tbm_const, ret); +} + +template +void PinholeSimulatorEmbree::simulate( + const MemoryView& Tbm, + BundleT& ret) const { SimulationFlags flags = SimulationFlags::Zero(); set_simulation_flags_(ret, flags); + const float range_min = m_model->range.min; + const float range_max = m_model->range.max; + #pragma omp parallel for for(size_t pid = 0; pid < Tbm.size(); pid++) { @@ -49,8 +81,8 @@ void PinholeSimulatorEmbree::simulate( rayhit.ray.dir_x = ray_dir_m.x; rayhit.ray.dir_y = ray_dir_m.y; rayhit.ray.dir_z = ray_dir_m.z; - rayhit.ray.tnear = 0; - rayhit.ray.tfar = std::numeric_limits::infinity(); + rayhit.ray.tnear = 0; // if set to m_model->range.min we would scan through near occlusions + rayhit.ray.tfar = range_max; rayhit.ray.mask = -1; rayhit.ray.flags = 0; rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; @@ -64,7 +96,12 @@ void PinholeSimulatorEmbree::simulate( { if(flags.hits) { - ret.Hits::hits[glob_id] = 1; + if(rayhit.ray.tfar >= range_min) + { + ret.Hits::hits[glob_id] = 1; + } else { + ret.Hits::hits[glob_id] = 0; + } } } @@ -94,9 +131,10 @@ void PinholeSimulatorEmbree::simulate( rayhit.hit.Ng_y, rayhit.hit.Ng_z }; - + // nint in map frame nint.normalizeInplace(); nint = Tms_.R * nint; + // nint in sensor frame // flip? if(ray_dir_s.dot(nint) > 0.0) diff --git a/src/rmagine_embree/include/rmagine/simulation/SphereSimulatorEmbree.hpp b/src/rmagine_embree/include/rmagine/simulation/SphereSimulatorEmbree.hpp index b60736e3..29025617 100644 --- a/src/rmagine_embree/include/rmagine/simulation/SphereSimulatorEmbree.hpp +++ b/src/rmagine_embree/include/rmagine/simulation/SphereSimulatorEmbree.hpp @@ -116,33 +116,6 @@ class SphereSimulatorEmbree { void setModel(const MemoryView& model); void setModel(const SphericalModel& model); - // Generic Version - template - void simulate(const MemoryView& Tbm, - BundleT& ret) const; - - template - BundleT simulate(const MemoryView& Tbm) const; - - // [[deprecated("Use simulate() instead.")]] - void simulateRanges( - const MemoryView& Tbm, - MemoryView& ranges) const; - - // [[deprecated("Use simulate() instead.")]] - Memory simulateRanges( - const MemoryView& Tbm) const; - - // [[deprecated("Use simulate() instead.")]] - void simulateHits( - const MemoryView& Tbm, - MemoryView& hits) const; - - // [[deprecated("Use simulate() instead.")]] - Memory simulateHits( - const MemoryView& Tbm) const; - - inline Memory model() const { return m_model; @@ -153,6 +126,40 @@ class SphereSimulatorEmbree { return m_map; } + /** + * @brief Simulate from one pose + * + * @tparam BundleT + * @param Tbm + * @param ret + */ + template + void simulate(const Transform& Tbm, BundleT& ret) const; + + template + BundleT simulate(const Transform& Tbm) const; + + /** + * @brief Simulate for multiple poses at once + * + * @tparam BundleT + * @param Tbm + * @param ret + */ + template + void simulate(const MemoryView& Tbm, + BundleT& ret) const; + + template + void simulate(const MemoryView& Tbm, + BundleT& ret) const; + + template + BundleT simulate(const MemoryView& Tbm) const; + + template + BundleT simulate(const MemoryView& Tbm) const; + protected: EmbreeMapPtr m_map; diff --git a/src/rmagine_embree/include/rmagine/simulation/SphereSimulatorEmbree.tcc b/src/rmagine_embree/include/rmagine/simulation/SphereSimulatorEmbree.tcc index ac800af2..3511838d 100644 --- a/src/rmagine_embree/include/rmagine/simulation/SphereSimulatorEmbree.tcc +++ b/src/rmagine_embree/include/rmagine/simulation/SphereSimulatorEmbree.tcc @@ -13,14 +13,47 @@ namespace rmagine { +template +void SphereSimulatorEmbree::simulate( + const Transform& Tbm, + BundleT& ret) const +{ + // TODO: change parallelization scheme for single simulations? + const MemoryView Tbm_mem(&Tbm, 1); + simulate(Tbm_mem, ret); +} + +template +BundleT SphereSimulatorEmbree::simulate( + const Transform& Tbm) const +{ + BundleT res; + resize_memory_bundle(res, m_model->getWidth(), m_model->getHeight(), 1); + simulate(Tbm, res); + return res; +} + template void SphereSimulatorEmbree::simulate( const MemoryView& Tbm, BundleT& ret) const +{ + const MemoryView Tbm_const(Tbm.raw(), Tbm.size()); + simulate(Tbm_const, ret); +} + +template +void SphereSimulatorEmbree::simulate( + const MemoryView& Tbm, + BundleT& ret) const { SimulationFlags flags = SimulationFlags::Zero(); set_simulation_flags_(ret, flags); + const float range_min = m_model->range.min; + const float range_max = m_model->range.max; + + #pragma omp parallel for for(size_t pid = 0; pid < Tbm.size(); pid++) { @@ -49,8 +82,8 @@ void SphereSimulatorEmbree::simulate( rayhit.ray.dir_x = ray_dir_m.x; rayhit.ray.dir_y = ray_dir_m.y; rayhit.ray.dir_z = ray_dir_m.z; - rayhit.ray.tnear = 0; - rayhit.ray.tfar = std::numeric_limits::infinity(); + rayhit.ray.tnear = 0.0; // if set to m_model->range.min we would scan through near occlusions + rayhit.ray.tfar = range_max; rayhit.ray.mask = -1; rayhit.ray.flags = 0; rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; @@ -64,7 +97,12 @@ void SphereSimulatorEmbree::simulate( { if(flags.hits) { - ret.Hits::hits[glob_id] = 1; + if(rayhit.ray.tfar >= range_min) + { + ret.Hits::hits[glob_id] = 1; + } else { + ret.Hits::hits[glob_id] = 0; + } } } @@ -214,4 +252,14 @@ BundleT SphereSimulatorEmbree::simulate( return res; } +template +BundleT SphereSimulatorEmbree::simulate( + const MemoryView& Tbm) const +{ + BundleT res; + resize_memory_bundle(res, m_model->getWidth(), m_model->getHeight(), Tbm.size()); + simulate(Tbm, res); + return res; +} + } // namespace rmagine \ No newline at end of file diff --git a/src/rmagine_embree/src/map/embree/EmbreeMesh.cpp b/src/rmagine_embree/src/map/embree/EmbreeMesh.cpp index cc290672..8a87011e 100644 --- a/src/rmagine_embree/src/map/embree/EmbreeMesh.cpp +++ b/src/rmagine_embree/src/map/embree/EmbreeMesh.cpp @@ -322,6 +322,4 @@ void EmbreeMesh::apply() } } -// pt2ConstMember = &EmbreeMesh::closestPointFunc2; - } // namespace rmagine \ No newline at end of file diff --git a/src/rmagine_embree/src/simulation/O1DnSimulatorEmbree.cpp b/src/rmagine_embree/src/simulation/O1DnSimulatorEmbree.cpp index c99741ca..92d5793f 100644 --- a/src/rmagine_embree/src/simulation/O1DnSimulatorEmbree.cpp +++ b/src/rmagine_embree/src/simulation/O1DnSimulatorEmbree.cpp @@ -19,7 +19,7 @@ O1DnSimulatorEmbree::O1DnSimulatorEmbree(EmbreeMapPtr map) O1DnSimulatorEmbree::~O1DnSimulatorEmbree() { - // std::cout << "O1DnSimulatorEmbree - Destructor " << std::endl; + } void O1DnSimulatorEmbree::setMap(EmbreeMapPtr map) @@ -50,134 +50,6 @@ void O1DnSimulatorEmbree::setModel( m_model->range = model->range; m_model->orig = model->orig; m_model->dirs = model->dirs; - - // this is shallow copy becauce pointer is copied - // m_model = model; - // copy(model[0].dirs, m_model[0].dirs); - // std::cout << "Set Model" << std::endl; - // std::cout << model[0].dirs.raw() << " -> " << m_model[0].dirs.raw() << std::endl; -} - -void O1DnSimulatorEmbree::simulateRanges( - const MemoryView& Tbm, - MemoryView& ranges) const -{ - #pragma omp parallel for - for(size_t pid = 0; pid < Tbm.size(); pid++) - { - const Transform Tbm_ = Tbm[pid]; - const Transform Tsm_ = Tbm_ * m_Tsb[0]; - - const unsigned int glob_shift = pid * m_model->size(); - - for(unsigned int vid = 0; vid < m_model->getHeight(); vid++) - { - for(unsigned int hid = 0; hid < m_model->getWidth(); hid++) - { - const unsigned int loc_id = m_model->getBufferId(vid, hid); - const unsigned int glob_id = glob_shift + loc_id; - - const Vector ray_dir_s = m_model->getDirection(vid, hid); - const Vector ray_dir_m = Tsm_.R * ray_dir_s; - - const Vector ray_orig_s = m_model->getOrigin(vid, hid); - const Vector ray_orig_m = Tsm_ * ray_orig_s; - - // std::cout << ray_dir_s.x << " " << ray_dir_s.y << " " << ray_dir_s.z << std::endl; - - RTCRayHit rayhit; - rayhit.ray.org_x = ray_orig_m.x; - rayhit.ray.org_y = ray_orig_m.y; - rayhit.ray.org_z = ray_orig_m.z; - rayhit.ray.dir_x = ray_dir_m.x; - rayhit.ray.dir_y = ray_dir_m.y; - rayhit.ray.dir_z = ray_dir_m.z; - rayhit.ray.tnear = 0; - rayhit.ray.tfar = std::numeric_limits::infinity(); - rayhit.ray.mask = -1; - rayhit.ray.flags = 0; - rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; - - rtcIntersect1(m_map->scene->handle(), &rayhit); - - if(rayhit.hit.geomID != RTC_INVALID_GEOMETRY_ID) - { - ranges[glob_id] = rayhit.ray.tfar; - } else { - ranges[glob_id] = m_model->range.max + 1.0; - } - } - } - } -} - -Memory O1DnSimulatorEmbree::simulateRanges( - const MemoryView& Tbm) const -{ - Memory res(m_model->size() * Tbm.size()); - simulateRanges(Tbm, res); - return res; -} - -void O1DnSimulatorEmbree::simulateHits( - const MemoryView& Tbm, - MemoryView& hits) const -{ - #pragma omp parallel for - for(size_t pid = 0; pid < Tbm.size(); pid++) - { - const Transform Tbm_ = Tbm[pid]; - const Transform Tsm_ = Tbm_ * m_Tsb[0]; - - const unsigned int glob_shift = pid * m_model->size(); - - for(unsigned int vid = 0; vid < m_model->getHeight(); vid++) - { - for(unsigned int hid = 0; hid < m_model->getWidth(); hid++) - { - const unsigned int loc_id = m_model->getBufferId(vid, hid); - const unsigned int glob_id = glob_shift + loc_id; - - const Vector ray_dir_s = m_model->getDirection(vid, hid); - const Vector ray_dir_m = Tsm_.R * ray_dir_s; - - const Vector ray_orig_s = m_model->getOrigin(vid, hid); - const Vector ray_orig_m = Tsm_ * ray_orig_s; - - RTCRayHit rayhit; - rayhit.ray.org_x = ray_orig_m.x; - rayhit.ray.org_y = ray_orig_m.y; - rayhit.ray.org_z = ray_orig_m.z; - rayhit.ray.dir_x = ray_dir_m.x; - rayhit.ray.dir_y = ray_dir_m.y; - rayhit.ray.dir_z = ray_dir_m.z; - rayhit.ray.tnear = 0; - rayhit.ray.tfar = std::numeric_limits::infinity(); - rayhit.ray.mask = -1; - rayhit.ray.flags = 0; - rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; - - rtcIntersect1(m_map->scene->handle(), &rayhit); - - if(rayhit.hit.geomID != RTC_INVALID_GEOMETRY_ID) - { - hits[glob_id] = 1; - } else { - hits[glob_id] = 0; - } - } - } - } -} - -Memory O1DnSimulatorEmbree::simulateHits( - const MemoryView& Tbm) const -{ - Memory res(m_model->size() * Tbm.size()); - simulateHits(Tbm, res); - return res; } } // namespace rmagine \ No newline at end of file diff --git a/src/rmagine_embree/src/simulation/OnDnSimulatorEmbree.cpp b/src/rmagine_embree/src/simulation/OnDnSimulatorEmbree.cpp index 5e4ee7be..702831d6 100644 --- a/src/rmagine_embree/src/simulation/OnDnSimulatorEmbree.cpp +++ b/src/rmagine_embree/src/simulation/OnDnSimulatorEmbree.cpp @@ -54,134 +54,6 @@ void OnDnSimulatorEmbree::setModel( m_model->range = model->range; m_model->origs = model->origs; m_model->dirs = model->dirs; - - // this is shallow copy becauce pointer is copied - // m_model = model; - // copy(model[0].dirs, m_model[0].dirs); - // std::cout << "Set Model" << std::endl; - // std::cout << model[0].dirs.raw() << " -> " << m_model[0].dirs.raw() << std::endl; -} - -void OnDnSimulatorEmbree::simulateRanges( - const MemoryView& Tbm, - MemoryView& ranges) const -{ - #pragma omp parallel for - for(size_t pid = 0; pid < Tbm.size(); pid++) - { - const Transform Tbm_ = Tbm[pid]; - const Transform Tsm_ = Tbm_ * m_Tsb[0]; - - const unsigned int glob_shift = pid * m_model->size(); - - for(unsigned int vid = 0; vid < m_model->getHeight(); vid++) - { - for(unsigned int hid = 0; hid < m_model->getWidth(); hid++) - { - const unsigned int loc_id = m_model->getBufferId(vid, hid); - const unsigned int glob_id = glob_shift + loc_id; - - const Vector ray_dir_s = m_model->getDirection(vid, hid); - const Vector ray_dir_m = Tsm_.R * ray_dir_s; - - const Vector ray_orig_s = m_model->getOrigin(vid, hid); - const Vector ray_orig_m = Tsm_ * ray_orig_s; - - // std::cout << ray_dir_s.x << " " << ray_dir_s.y << " " << ray_dir_s.z << std::endl; - - RTCRayHit rayhit; - rayhit.ray.org_x = ray_orig_m.x; - rayhit.ray.org_y = ray_orig_m.y; - rayhit.ray.org_z = ray_orig_m.z; - rayhit.ray.dir_x = ray_dir_m.x; - rayhit.ray.dir_y = ray_dir_m.y; - rayhit.ray.dir_z = ray_dir_m.z; - rayhit.ray.tnear = 0; - rayhit.ray.tfar = std::numeric_limits::infinity(); - rayhit.ray.mask = -1; - rayhit.ray.flags = 0; - rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; - - rtcIntersect1(m_map->scene->handle(), &rayhit); - - if(rayhit.hit.geomID != RTC_INVALID_GEOMETRY_ID) - { - ranges[glob_id] = rayhit.ray.tfar; - } else { - ranges[glob_id] = m_model->range.max + 1.0; - } - } - } - } -} - -Memory OnDnSimulatorEmbree::simulateRanges( - const MemoryView& Tbm) const -{ - Memory res(m_model->size() * Tbm.size()); - simulateRanges(Tbm, res); - return res; -} - -void OnDnSimulatorEmbree::simulateHits( - const MemoryView& Tbm, - MemoryView& hits) const -{ - #pragma omp parallel for - for(size_t pid = 0; pid < Tbm.size(); pid++) - { - const Transform Tbm_ = Tbm[pid]; - const Transform Tsm_ = Tbm_ * m_Tsb[0]; - - const unsigned int glob_shift = pid * m_model->size(); - - for(unsigned int vid = 0; vid < m_model->getHeight(); vid++) - { - for(unsigned int hid = 0; hid < m_model->getWidth(); hid++) - { - const unsigned int loc_id = m_model->getBufferId(vid, hid); - const unsigned int glob_id = glob_shift + loc_id; - - const Vector ray_dir_s = m_model->getDirection(vid, hid); - const Vector ray_dir_m = Tsm_.R * ray_dir_s; - - const Vector ray_orig_s = m_model->getOrigin(vid, hid); - const Vector ray_orig_m = Tsm_ * ray_orig_s; - - RTCRayHit rayhit; - rayhit.ray.org_x = ray_orig_m.x; - rayhit.ray.org_y = ray_orig_m.y; - rayhit.ray.org_z = ray_orig_m.z; - rayhit.ray.dir_x = ray_dir_m.x; - rayhit.ray.dir_y = ray_dir_m.y; - rayhit.ray.dir_z = ray_dir_m.z; - rayhit.ray.tnear = 0; - rayhit.ray.tfar = std::numeric_limits::infinity(); - rayhit.ray.mask = -1; - rayhit.ray.flags = 0; - rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; - - rtcIntersect1(m_map->scene->handle(), &rayhit); - - if(rayhit.hit.geomID != RTC_INVALID_GEOMETRY_ID) - { - hits[glob_id] = 1; - } else { - hits[glob_id] = 0; - } - } - } - } -} - -Memory OnDnSimulatorEmbree::simulateHits( - const MemoryView& Tbm) const -{ - Memory res(m_model->size() * Tbm.size()); - simulateHits(Tbm, res); - return res; } } // namespace rmagine \ No newline at end of file diff --git a/src/rmagine_embree/src/simulation/PinholeSimulatorEmbree.cpp b/src/rmagine_embree/src/simulation/PinholeSimulatorEmbree.cpp index 48db4f47..c93b8677 100644 --- a/src/rmagine_embree/src/simulation/PinholeSimulatorEmbree.cpp +++ b/src/rmagine_embree/src/simulation/PinholeSimulatorEmbree.cpp @@ -49,118 +49,4 @@ void PinholeSimulatorEmbree::setModel(const PinholeModel& model) m_model[0] = model; } -void PinholeSimulatorEmbree::simulateRanges( - const MemoryView& Tbm, - MemoryView& ranges) const -{ - #pragma omp parallel for - for(size_t pid = 0; pid < Tbm.size(); pid++) - { - const Transform Tbm_ = Tbm[pid]; - const Transform Tsm_ = Tbm_ * m_Tsb[0]; - - const unsigned int glob_shift = pid * m_model->size(); - - for(unsigned int vid = 0; vid < m_model->getHeight(); vid++) - { - for(unsigned int hid = 0; hid < m_model->getWidth(); hid++) - { - const unsigned int loc_id = m_model->getBufferId(vid, hid); - const unsigned int glob_id = glob_shift + loc_id; - - const Vector ray_dir_s = m_model->getDirection(vid, hid); - const Vector ray_dir_m = Tsm_.R * ray_dir_s; - - RTCRayHit rayhit; - rayhit.ray.org_x = Tsm_.t.x; - rayhit.ray.org_y = Tsm_.t.y; - rayhit.ray.org_z = Tsm_.t.z; - rayhit.ray.dir_x = ray_dir_m.x; - rayhit.ray.dir_y = ray_dir_m.y; - rayhit.ray.dir_z = ray_dir_m.z; - rayhit.ray.tnear = 0; - rayhit.ray.tfar = std::numeric_limits::infinity(); - rayhit.ray.mask = -1; - rayhit.ray.flags = 0; - rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; - - rtcIntersect1(m_map->scene->handle(), &rayhit); - - if(rayhit.hit.geomID != RTC_INVALID_GEOMETRY_ID) - { - ranges[glob_id] = rayhit.ray.tfar; - } else { - ranges[glob_id] = m_model->range.max + 1.0; - } - } - } - } -} - -Memory PinholeSimulatorEmbree::simulateRanges( - const MemoryView& Tbm) const -{ - Memory res(m_model->size() * Tbm.size()); - simulateRanges(Tbm, res); - return res; -} - -void PinholeSimulatorEmbree::simulateHits( - const MemoryView& Tbm, - MemoryView& hits) const -{ - #pragma omp parallel for - for(size_t pid = 0; pid < Tbm.size(); pid++) - { - const Transform Tbm_ = Tbm[pid]; - const Transform Tsm_ = Tbm_ * m_Tsb[0]; - - const unsigned int glob_shift = pid * m_model->size(); - - for(unsigned int vid = 0; vid < m_model->getHeight(); vid++) - { - for(unsigned int hid = 0; hid < m_model->getWidth(); hid++) - { - const unsigned int loc_id = m_model->getBufferId(vid, hid); - const unsigned int glob_id = glob_shift + loc_id; - - const Vector ray_dir_s = m_model->getDirection(vid, hid); - const Vector ray_dir_m = Tsm_.R * ray_dir_s; - - RTCRayHit rayhit; - rayhit.ray.org_x = Tsm_.t.x; - rayhit.ray.org_y = Tsm_.t.y; - rayhit.ray.org_z = Tsm_.t.z; - rayhit.ray.dir_x = ray_dir_m.x; - rayhit.ray.dir_y = ray_dir_m.y; - rayhit.ray.dir_z = ray_dir_m.z; - rayhit.ray.tnear = 0; - rayhit.ray.tfar = std::numeric_limits::infinity(); - rayhit.ray.mask = -1; - rayhit.ray.flags = 0; - rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; - - rtcIntersect1(m_map->scene->handle(), &rayhit); - - if(rayhit.hit.geomID != RTC_INVALID_GEOMETRY_ID) - { - hits[glob_id] = 1; - } else { - hits[glob_id] = 0; - } - } - } - } -} - -Memory PinholeSimulatorEmbree::simulateHits( - const MemoryView& Tbm) const -{ - Memory res(m_model->size() * Tbm.size()); - simulateHits(Tbm, res); - return res; -} - } // namespace rmagine \ No newline at end of file diff --git a/src/rmagine_embree/src/simulation/SphereSimulatorEmbree.cpp b/src/rmagine_embree/src/simulation/SphereSimulatorEmbree.cpp index eabbf705..a14bcf2e 100644 --- a/src/rmagine_embree/src/simulation/SphereSimulatorEmbree.cpp +++ b/src/rmagine_embree/src/simulation/SphereSimulatorEmbree.cpp @@ -56,120 +56,4 @@ void SphereSimulatorEmbree::setModel( m_model[0] = model; } -void SphereSimulatorEmbree::simulateRanges( - const MemoryView& Tbm, - MemoryView& ranges) const -{ - auto handle = m_map->scene->handle(); - - #pragma omp parallel for - for(size_t pid = 0; pid < Tbm.size(); pid++) - { - const Transform Tbm_ = Tbm[pid]; - const Transform Tsm_ = Tbm_ * m_Tsb[0]; - - const unsigned int glob_shift = pid * m_model->size(); - - for(unsigned int vid = 0; vid < m_model->getHeight(); vid++) - { - for(unsigned int hid = 0; hid < m_model->getWidth(); hid++) - { - const unsigned int loc_id = m_model->getBufferId(vid, hid); - const unsigned int glob_id = glob_shift + loc_id; - - const Vector ray_dir_s = m_model->getDirection(vid, hid); - const Vector ray_dir_m = Tsm_.R * ray_dir_s; - - RTCRayHit rayhit; - rayhit.ray.org_x = Tsm_.t.x; - rayhit.ray.org_y = Tsm_.t.y; - rayhit.ray.org_z = Tsm_.t.z; - rayhit.ray.dir_x = ray_dir_m.x; - rayhit.ray.dir_y = ray_dir_m.y; - rayhit.ray.dir_z = ray_dir_m.z; - rayhit.ray.tnear = 0; - rayhit.ray.tfar = std::numeric_limits::infinity(); - rayhit.ray.mask = -1; - rayhit.ray.flags = 0; - rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; - - rtcIntersect1(m_map->scene->handle(), &rayhit); - - if(rayhit.hit.geomID != RTC_INVALID_GEOMETRY_ID) - { - ranges[glob_id] = rayhit.ray.tfar; - } else { - ranges[glob_id] = m_model->range.max + 1.0; - } - } - } - } -} - -Memory SphereSimulatorEmbree::simulateRanges( - const MemoryView& Tbm) const -{ - Memory res(m_model->phi.size * m_model->theta.size * Tbm.size()); - simulateRanges(Tbm, res); - return res; -} - -void SphereSimulatorEmbree::simulateHits( - const MemoryView& Tbm, - MemoryView& hits) const -{ - #pragma omp parallel for - for(size_t pid = 0; pid < Tbm.size(); pid++) - { - const Transform Tbm_ = Tbm[pid]; - const Transform Tsm_ = Tbm_ * m_Tsb[0]; - - const unsigned int glob_shift = pid * m_model->size(); - - for(unsigned int vid = 0; vid < m_model->getHeight(); vid++) - { - for(unsigned int hid = 0; hid < m_model->getWidth(); hid++) - { - const unsigned int loc_id = m_model->getBufferId(vid, hid); - const unsigned int glob_id = glob_shift + loc_id; - - const Vector ray_dir_s = m_model->getDirection(vid, hid); - const Vector ray_dir_m = Tsm_.R * ray_dir_s; - - RTCRayHit rayhit; - rayhit.ray.org_x = Tsm_.t.x; - rayhit.ray.org_y = Tsm_.t.y; - rayhit.ray.org_z = Tsm_.t.z; - rayhit.ray.dir_x = ray_dir_m.x; - rayhit.ray.dir_y = ray_dir_m.y; - rayhit.ray.dir_z = ray_dir_m.z; - rayhit.ray.tnear = 0; - rayhit.ray.tfar = std::numeric_limits::infinity(); - rayhit.ray.mask = -1; - rayhit.ray.flags = 0; - rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; - - rtcIntersect1(m_map->scene->handle(), &rayhit); - - if(rayhit.hit.geomID != RTC_INVALID_GEOMETRY_ID) - { - hits[glob_id] = 1; - } else { - hits[glob_id] = 0; - } - } - } - } -} - -Memory SphereSimulatorEmbree::simulateHits( - const MemoryView& Tbm) const -{ - Memory res(m_model->phi.size * m_model->theta.size * Tbm.size()); - simulateHits(Tbm, res); - return res; -} - } // namespace rmagine \ No newline at end of file diff --git a/src/rmagine_ouster/CMakeLists.txt b/src/rmagine_ouster/CMakeLists.txt new file mode 100644 index 00000000..d90c2cab --- /dev/null +++ b/src/rmagine_ouster/CMakeLists.txt @@ -0,0 +1,94 @@ + +message(STATUS "Building Core. Library: rmagine") + +set(RMAGINE_CORE_SRCS + # Types + src/types/ouster_sensors.cpp +) + +add_library(rmagine-ouster SHARED + ${RMAGINE_CORE_SRCS} +) + +target_include_directories(rmagine-ouster + PUBLIC + $ + $ + ${ASSIMP_INCLUDE_DIRS} + ${Boost_INCLUDE_DIRS} +) + +target_link_libraries(rmagine-ouster + rmagine-core + ${ASSIMP_LIBRARIES} + Eigen3::Eigen + ${Boost_LIBRARIES} + ${OpenMP_CXX_LIBRARIES} + jsoncpp +) + +set_target_properties(rmagine-ouster + PROPERTIES + EXPORT_NAME ouster + SOVERSION ${rmagine_VERSION_MAJOR} + VERSION ${rmagine_VERSION} + COMPONENT ouster + CXX_STANDARD 17 +) + +# TODO: do this: +add_library(rmagine ALIAS rmagine-ouster) +add_library(rmagine::ouster ALIAS rmagine-ouster) + + +list(APPEND RMAGINE_LIBRARIES rmagine-ouster) +set(RMAGINE_LIBRARIES ${RMAGINE_LIBRARIES} PARENT_SCOPE) + + +# CMAKE FIND SCRIPT +include(CMakePackageConfigHelpers) + +########### +## CORE +############ +install(TARGETS rmagine-ouster + EXPORT rmagine-ouster-targets + COMPONENT ouster) + +install(EXPORT rmagine-ouster-targets + FILE rmagine-ouster-targets.cmake + COMPONENT ouster + NAMESPACE rmagine:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/rmagine-${rmagine_VERSION} +) + +write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/rmagine-ouster-config-version.cmake + VERSION ${rmagine_VERSION} + COMPATIBILITY SameMajorVersion +) + +configure_package_config_file(cmake/rmagine-ouster-config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/rmagine-ouster-config.cmake + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/rmagine-${rmagine_VERSION} +) + +install( + FILES + ${CMAKE_CURRENT_BINARY_DIR}/rmagine-ouster-config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/rmagine-ouster-config-version.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/rmagine-${rmagine_VERSION} + COMPONENT ouster +) + +install( + DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/rmagine + COMPONENT ouster + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rmagine-${rmagine_VERSION} +) + +set(CPACK_DEBIAN_CORE_PACKAGE_DEPENDS "libeigen3-dev, libassimp-dev" PARENT_SCOPE) + +list(APPEND CPACK_COMPONENTS_ALL ouster) +set(CPACK_COMPONENTS_ALL ${CPACK_COMPONENTS_ALL} PARENT_SCOPE) + diff --git a/src/rmagine_ouster/cmake/rmagine-ouster-config.cmake.in b/src/rmagine_ouster/cmake/rmagine-ouster-config.cmake.in new file mode 100644 index 00000000..a2d03406 --- /dev/null +++ b/src/rmagine_ouster/cmake/rmagine-ouster-config.cmake.in @@ -0,0 +1,12 @@ +@PACKAGE_INIT@ + +include(${CMAKE_CURRENT_LIST_DIR}/rmagine-ouster-config-version.cmake) +include(${CMAKE_CURRENT_LIST_DIR}/rmagine-ouster-targets.cmake) + +include(CMakeFindDependencyMacro) + +find_dependency(jsoncpp) + +check_required_components(ouster) + +set(rmagine_ouster_FOUND 1) \ No newline at end of file diff --git a/src/rmagine_ouster/include/rmagine/types/ouster_sensors.h b/src/rmagine_ouster/include/rmagine/types/ouster_sensors.h new file mode 100644 index 00000000..9ec65e93 --- /dev/null +++ b/src/rmagine_ouster/include/rmagine/types/ouster_sensors.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2024, University Osnabrück + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University Osnabrück nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL University Osnabrück BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * + * @brief Predefined Sensor Models + * + * @date 10.09.2024 + * @author Alexander Mock + * + * @copyright Copyright (c) 2024, University Osnabrück. All rights reserved. + * This project is released under the 3-Clause BSD License. + * + */ +#ifndef RMAGINE_TYPES_OUSTER_SENSORS_H +#define RMAGINE_TYPES_OUSTER_SENSORS_H + +#include + +namespace rmagine +{ + +O1DnModel o1dn_from_ouster_meta_file(std::string filename); + +} // namespace rmagine + +#endif // RMAGINE_TYPES_OUSTER_SENSORS_H diff --git a/src/rmagine_ouster/src/types/ouster_sensors.cpp b/src/rmagine_ouster/src/types/ouster_sensors.cpp new file mode 100644 index 00000000..b1af6561 --- /dev/null +++ b/src/rmagine_ouster/src/types/ouster_sensors.cpp @@ -0,0 +1,82 @@ +#include "rmagine/types/ouster_sensors.h" + +#include +#include + +namespace rmagine { + +O1DnModel o1dn_from_ouster_meta_file(std::string filename) +{ + O1DnModel model; + + std::ifstream ouster_file(filename, std::ifstream::binary); + + Json::Value ouster_config; + ouster_file >> ouster_config; + + std::string lidar_mode = ouster_config["lidar_mode"].asString(); + + size_t x_id = lidar_mode.find("x"); + unsigned int lidar_width = std::stoi(lidar_mode.substr(0, x_id)); + unsigned int lidar_frequency = std::stoi(lidar_mode.substr(x_id+1)); + + std::vector beam_altitude_angles; + for(auto val : ouster_config["beam_altitude_angles"]) + { + beam_altitude_angles.push_back(val.asFloat()); + } + + unsigned int lidar_height = beam_altitude_angles.size(); + + std::vector beam_azimuth_angles; + for(auto val : ouster_config["beam_azimuth_angles"]) + { + beam_azimuth_angles.push_back(val.asFloat()); + } + + const float lidar_origin_to_beam_origin_mm = ouster_config["lidar_origin_to_beam_origin_mm"].asFloat(); + + // std::cout << "Loading Ouster Lidar: " << lidar_width << "x" << lidar_height << ". " << lidar_frequency << "hz" << std::endl; + + model.width = lidar_width; + model.height = lidar_height; + model.dirs.resize(lidar_width * lidar_height); + + const float hor_start_angle = 0.0; + const float hor_angle_inc = (2.0 * M_PI) / static_cast(model.getWidth()); + + const float ver_start_angle = 0.0; + + + + model.orig = {0.0, 0.0, 0.0}; + + model.range.min = 0.1; + model.range.max = 80.0; + + for(unsigned int vid = 0; vid < model.getHeight(); vid++) + { + // vertical angle + const float phi = beam_altitude_angles[vid] * DEG_TO_RAD_F; + + for(unsigned int hid = 0; hid < model.getWidth(); hid++) + { + // horizontal angle + const float theta_offset = beam_azimuth_angles[vid] * DEG_TO_RAD_F; + const float theta = hor_start_angle + hor_angle_inc * static_cast(hid) + theta_offset; + + // compute direction: polar to cartesian + Vector dir{ + cosf(phi) * cosf(theta), + cosf(phi) * sinf(theta), + sinf(phi)}; + + const unsigned int buf_id = model.getBufferId(vid, hid); + model.dirs[buf_id] = dir; + } + } + + return model; +} + +} // namespace rmagine \ No newline at end of file diff --git a/tests/core/CMakeLists.txt b/tests/core/CMakeLists.txt index 2809af9e..2977723f 100644 --- a/tests/core/CMakeLists.txt +++ b/tests/core/CMakeLists.txt @@ -35,3 +35,11 @@ target_link_libraries(rmagine_tests_core_quaternion ) add_test(NAME core_quaternion COMMAND rmagine_tests_core_quaternion) + +# 4. SVD +add_executable(rmagine_tests_core_math_svd math_svd.cpp) +target_link_libraries(rmagine_tests_core_math_svd + rmagine::core +) + +add_test(NAME core_math_svd COMMAND rmagine_tests_core_math_svd) diff --git a/tests/core/math_svd.cpp b/tests/core/math_svd.cpp new file mode 100644 index 00000000..974457cd --- /dev/null +++ b/tests/core/math_svd.cpp @@ -0,0 +1,412 @@ +#include + + +#include + +#include +#include + +#include +#include + +#include + +#include + +#include +#include + +namespace rm = rmagine; + + +float compute_error(Eigen::Matrix3f gt, Eigen::Matrix3f m) +{ + float ret = 0.0; + for(size_t i=0; i<3; i++) + { + for(size_t j=0; j<3; j++) + { + ret += abs(gt(i, j) - m(i, j)); + } + } + return ret; +} + +float compute_error(rm::Matrix3x3 gt, rm::Matrix3x3 m) +{ + float ret = 0.0; + for(size_t i=0; i<3; i++) + { + for(size_t j=0; j<3; j++) + { + ret += abs(gt(i, j) - m(i, j)); + } + } + return ret; +} + +Eigen::Matrix3f& eigen_view(rm::Matrix3x3& M) +{ + return *reinterpret_cast( &M ); +} + +template +void testSVD(const Eigen::Matrix& A) +{ + Eigen::JacobiSVD > svd(A, Eigen::ComputeFullU | Eigen::ComputeFullV); +} + +template +void testSVD(const rm::Matrix_& A) +{ + using AMatT = rm::Matrix_; + using UMatT = typename rm::svd_dims::U; + using WMatT = typename rm::svd_dims::W; + using VMatT = typename rm::svd_dims::V; + + UMatT U = UMatT::Zeros(); + WMatT W = WMatT::Zeros(); + VMatT V = VMatT::Zeros(); + + rm::svd(A, U, W, V); +} + +void svdTestWithPrints() +{ + rm::Matrix3x3 Arm; + + Eigen::Matrix3f Aeig = Eigen::Matrix3f::Random(3, 3); + for(size_t i=0; i<3; i++) + { + for(size_t j=0; j<3; j++) + { + Arm(i, j) = Aeig(i, j); + } + } + + + std::cout << "A: " << std::endl << Aeig << std::endl; + + std::cout << std::endl; + std::cout << "Eigen: " << std::endl; + + + Eigen::JacobiSVD svdeig(Aeig, Eigen::ComputeFullU | Eigen::ComputeFullV); + // std::cout << "U: " << std::endl << svdeig.matrixU() << std::endl; + // std::cout << "V: " << std::endl << svdeig.matrixV() << std::endl; + + std::cout << "U * V.T" << std::endl; + std::cout << svdeig.matrixU() * svdeig.matrixV().transpose() << std::endl; + + // rm::SVD2 svdrm; + // svdrm.decompose(Arm); + + std::cout << std::endl; + std::cout << "Rmagine: " << std::endl; + // std::cout << "U: " << svdrm.u << std::endl; + // std::cout << "V: " << svdrm.v << std::endl; + // std::cout << "w: " << svdrm.w << std::endl; + + std::cout << "U * V.T" << std::endl; + // std::cout << svdrm.u * svdrm.v.transpose() << std::endl; + + // std::cout << svdrm.w << std::endl; + + using AMat = rm::Matrix_; + using UMat = typename rm::svd_dims::U; + using WMat = typename rm::svd_dims::W; + using VMat = typename rm::svd_dims::V; + + UMat Urm = UMat::Zeros(); + WMat Wrm = WMat::Zeros(); + VMat Vrm = VMat::Zeros(); + + rm::svd(Arm, Urm, Wrm, Vrm); + + std::cout << Urm * Vrm.T() << std::endl; +} + +template +void runtimeTest() +{ + rm::Matrix_ Arm; + + Eigen::Matrix Aeig = Eigen::Matrix::Random(N, M); + for(size_t i=0; i +void accuracyTest() +{ + std::cout << "Accuracy Test" << std::endl; + rm::Matrix_ Arm; + + Eigen::Matrix Aeig = Eigen::Matrix::Random(N, M); + for(size_t i=0; i > svdeig(Aeig, Eigen::ComputeFullU | Eigen::ComputeFullV); + + auto s = svdeig.singularValues(); + + Eigen::Matrix Seig = Eigen::Matrix::Zero(); + for(size_t i=0; i; + using UMat = typename rm::svd_dims::U; + using WMat = typename rm::svd_dims::W; + using VMat = typename rm::svd_dims::V; + + UMat Urm = UMat::Zeros(); + WMat Wrm = WMat::Zeros(); + VMat Vrm = VMat::Zeros(); + + rm::svd(Arm, Urm, Wrm, Vrm); + + auto uvt_rm = Urm * Wrm * Vrm.T(); + + // std::cout << "Rmagine: " << std::endl; + // std::cout << uvt_rm << std::endl; + + float error_rm = 0.0; + for(size_t i=0; i covs_eigen(num_objects); + rm::Memory covs_rm(num_objects); + + for(size_t obj_id=0; obj_id SVD -> UWT* -> U * W * T* -> C + + std::cout << "Start computing SVD of " << num_objects << " 3x3 matrices" << std::endl; + + std::vector res_eigen(num_objects); + + rm::StopWatch sw; + double el_eigen, el_rmagine, el_rmagine2; + + sw(); + #pragma omp parallel for + for(size_t obj_id=0; obj_id svdeig(covs_eigen[obj_id], + Eigen::ComputeFullU | Eigen::ComputeFullV); + auto s = svdeig.singularValues(); + Eigen::Matrix3f Seig = Eigen::Matrix3f::Zero(); + for(size_t i=0; i res_rm(num_objects); + + sw(); + #pragma omp parallel for + for(size_t obj_id=0; obj_id covs_eigen(num_objects); + rm::Memory covs_rm(num_objects); + rm::Memory Us(num_objects); + rm::Memory Ws(num_objects); + rm::Memory Vs(num_objects); + + for(size_t obj_id=0; obj_id res_rm(num_objects); + for(size_t obj_id=0; obj_id(); + accuracyTest<20, 30>(); + + + parallelTest(); + parallelTest2(); + + + return 0; +} + diff --git a/tests/cuda/CMakeLists.txt b/tests/cuda/CMakeLists.txt index f1bb3c4f..2998f89f 100644 --- a/tests/cuda/CMakeLists.txt +++ b/tests/cuda/CMakeLists.txt @@ -26,3 +26,11 @@ target_link_libraries(rmagine_tests_cuda_memory_slicing add_test(NAME cuda_memory_slicing COMMAND rmagine_tests_cuda_memory_slicing) + +# 4. SVD +add_executable(rmagine_tests_cuda_math_svd math_svd.cpp) +target_link_libraries(rmagine_tests_cuda_math_svd + rmagine::cuda +) + +add_test(NAME cuda_math_svd COMMAND rmagine_tests_cuda_math_svd) diff --git a/tests/cuda/math_svd.cpp b/tests/cuda/math_svd.cpp new file mode 100644 index 00000000..7cc7289b --- /dev/null +++ b/tests/cuda/math_svd.cpp @@ -0,0 +1,118 @@ +#include +#include +#include +#include + +#include + +#include + +namespace rm = rmagine; + +float compute_error(Eigen::Matrix3f gt, Eigen::Matrix3f m) +{ + float ret = 0.0; + for(size_t i=0; i<3; i++) + { + for(size_t j=0; j<3; j++) + { + ret += abs(gt(i, j) - m(i, j)); + } + } + return ret; +} + +float compute_error(rm::Matrix3x3 gt, rm::Matrix3x3 m) +{ + float ret = 0.0; + for(size_t i=0; i<3; i++) + { + for(size_t j=0; j<3; j++) + { + ret += abs(gt(i, j) - m(i, j)); + } + } + return ret; +} + +void parallelTest() +{ + size_t num_objects = 1000000; + std::cout << "parallelTest. Computing SVD of " << num_objects << " 3x3 matrices" << std::endl; + // correct num_objects objects in parallel + + + std::vector covs_eigen(num_objects); + rm::Memory covs_rm(num_objects); + rm::Memory Us(num_objects); + rm::Memory Ws(num_objects); + rm::Memory Vs(num_objects); + + for(size_t obj_id=0; obj_id covs_rm_ = covs_rm; + rm::Memory Us_ = Us; + rm::Memory Ws_ = Ws; + rm::Memory Vs_ = Vs; + + sw(); + svd(covs_rm_, Us_, Ws_, Vs_); + cudaDeviceSynchronize(); + el_rmagine = sw(); + + // download + Us = Us_; + Ws = Ws_; + Vs = Vs_; + + rm::Memory res_rm(num_objects); + for(size_t obj_id=0; obj_id