diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9ddff224..e0455c4b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.16)
project(rmagine
LANGUAGES CXX C
- VERSION 2.2.5)# TODO update this version when merging into main-branch
+ VERSION 2.2.6)# TODO update this version when merging into main-branch
option(BUILD_TOOLS "Build tools" ON)
option(BUILD_TESTS "Build tests" ON)
@@ -114,6 +114,9 @@ endif(OPENMP_FOUND)
########################################
## Optional Deps
+# for ouster config loading
+find_package(jsoncpp)
+
######################################
## CUDA: For Optix ##
######################################
@@ -178,6 +181,11 @@ set(RMAGINE_STATIC_LIBRARIES)
### RMAGINE CORE LIB
add_subdirectory(src/rmagine_core)
+### RMAGINE OUSTER LIB
+if(jsoncpp_FOUND)
+ add_subdirectory(src/rmagine_ouster)
+endif(jsoncpp_FOUND)
+
### RMAGINE EMBREE LIB
if(embree_FOUND)
add_subdirectory(src/rmagine_embree)
@@ -193,7 +201,7 @@ endif(CUDA_FOUND)
message(STATUS "${BoldCyan}Components build:${ColourReset}")
foreach(LIBRARY ${RMAGINE_LIBRARIES})
- message(STATUS "- ${BoldGreen}${LIBRARY}${ColourReset}")
+ message(STATUS "- ${BoldGreen}${LIBRARY}${ColourReset}")
endforeach()
#### TESTS
diff --git a/README.md b/README.md
index 9c89873f..0eb836bc 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,8 @@ Rmagine
Issues
•
Examples
+ •
+ Viewer
@@ -116,7 +118,7 @@ More detailed examples explaining each step and how to customize it to your need
Please reference the following papers when using the Rmagine library in your scientific work.
-```latex
+```bib
@inproceedings{mock2023rmagine,
title={{Rmagine: 3D Range Sensor Simulation in Polygonal Maps via Ray Tracing for Embedded Hardware on Mobile Robots}},
author={Mock, Alexander and Wiemann, Thomas and Hertzberg, Joachim},
@@ -127,6 +129,7 @@ Please reference the following papers when using the Rmagine library in your sci
```
## Rmagine-accelerated Applications
+- [rmagine_viewer](https://github.com/amock/rmagine_viewer)
- [rmagine_gazebo_plugins](https://github.com/uos/rmagine_gazebo_plugins)
- [RMCL](https://github.com/uos/rmcl)
- [radarays_ros](https://github.com/uos/radarays_ros)
diff --git a/apps/rmagine_benchmark/Main.cpp b/apps/rmagine_benchmark/Main.cpp
index 3cfc533b..91e8359f 100644
--- a/apps/rmagine_benchmark/Main.cpp
+++ b/apps/rmagine_benchmark/Main.cpp
@@ -116,7 +116,7 @@ int main(int argc, char** argv)
// Define what to simulate
double velos_per_second_mean = 0.0;
- std::cout << "- range of last ray: " << cpu_sim->simulateRanges(Tbm)[Tbm.size() * model->phi.size * model->theta.size - 1] << std::endl;
+ // std::cout << "- range of last ray: " << cpu_sim->simulateRanges(Tbm)[Tbm.size() * model->phi.size * model->theta.size - 1] << std::endl;
std::cout << "-- Starting Benchmark --" << std::endl;
// predefine result buffer
diff --git a/dat/ouster_config/os0-128x1024x20-meta.json b/dat/ouster_config/os0-128x1024x20-meta.json
new file mode 100644
index 00000000..60026a7e
--- /dev/null
+++ b/dat/ouster_config/os0-128x1024x20-meta.json
@@ -0,0 +1,461 @@
+{
+ "beam_altitude_angles":
+ [
+ 45.31,
+ 44.31,
+ 43.58,
+ 43.16,
+ 42.31,
+ 41.32,
+ 40.61,
+ 40.19,
+ 39.32,
+ 38.35,
+ 37.66,
+ 37.22,
+ 36.36,
+ 35.42,
+ 34.73,
+ 34.28,
+ 33.4,
+ 32.48,
+ 31.81,
+ 31.34,
+ 30.47,
+ 29.58,
+ 28.9,
+ 28.44,
+ 27.55,
+ 26.68,
+ 26.01,
+ 25.53,
+ 24.64,
+ 23.8,
+ 23.15,
+ 22.64,
+ 21.76,
+ 20.93,
+ 20.28,
+ 19.77,
+ 18.88,
+ 18.09,
+ 17.44,
+ 16.91,
+ 16.01,
+ 15.25,
+ 14.61,
+ 14.06,
+ 13.16,
+ 12.42,
+ 11.78,
+ 11.25,
+ 10.32,
+ 9.61,
+ 8.97,
+ 8.4,
+ 7.49,
+ 6.81,
+ 6.18,
+ 5.58,
+ 4.66,
+ 4.01,
+ 3.38,
+ 2.76,
+ 1.84,
+ 1.2,
+ 0.57,
+ -0.06,
+ -0.98,
+ -1.6,
+ -2.22,
+ -2.89,
+ -3.8,
+ -4.4,
+ -5.03,
+ -5.7,
+ -6.62,
+ -7.2,
+ -7.83,
+ -8.52,
+ -9.43,
+ -10,
+ -10.65,
+ -11.37,
+ -12.27,
+ -12.83,
+ -13.47,
+ -14.21,
+ -15.12,
+ -15.65,
+ -16.3,
+ -17.06,
+ -17.97,
+ -18.47,
+ -19.12,
+ -19.91,
+ -20.83,
+ -21.32,
+ -21.98,
+ -22.78,
+ -23.71,
+ -24.18,
+ -24.83,
+ -25.67,
+ -26.59,
+ -27.05,
+ -27.7,
+ -28.56,
+ -29.48,
+ -29.92,
+ -30.6,
+ -31.48,
+ -32.4,
+ -32.84,
+ -33.52,
+ -34.4,
+ -35.32,
+ -35.76,
+ -36.44,
+ -37.36,
+ -38.28,
+ -38.71,
+ -39.4,
+ -40.36,
+ -41.27,
+ -41.69,
+ -42.38,
+ -43.36,
+ -44.29,
+ -44.69,
+ -45.4,
+ -46.42
+ ],
+ "beam_azimuth_angles":
+ [
+ 11.58,
+ 4.26,
+ -2.91,
+ -9.95,
+ 11.11,
+ 4.08,
+ -2.85,
+ -9.65,
+ 10.69,
+ 3.9,
+ -2.8,
+ -9.39,
+ 10.34,
+ 3.75,
+ -2.75,
+ -9.18,
+ 10.03,
+ 3.62,
+ -2.72,
+ -8.99,
+ 9.77,
+ 3.5,
+ -2.71,
+ -8.84,
+ 9.53,
+ 3.4,
+ -2.68,
+ -8.72,
+ 9.32,
+ 3.3,
+ -2.68,
+ -8.6,
+ 9.14,
+ 3.21,
+ -2.69,
+ -8.52,
+ 8.99,
+ 3.13,
+ -2.69,
+ -8.45,
+ 8.84,
+ 3.06,
+ -2.69,
+ -8.41,
+ 8.73,
+ 2.99,
+ -2.7,
+ -8.37,
+ 8.63,
+ 2.94,
+ -2.73,
+ -8.36,
+ 8.54,
+ 2.9,
+ -2.75,
+ -8.35,
+ 8.48,
+ 2.85,
+ -2.77,
+ -8.37,
+ 8.43,
+ 2.83,
+ -2.81,
+ -8.4,
+ 8.4,
+ 2.78,
+ -2.84,
+ -8.43,
+ 8.38,
+ 2.76,
+ -2.87,
+ -8.51,
+ 8.37,
+ 2.72,
+ -2.91,
+ -8.57,
+ 8.38,
+ 2.72,
+ -2.96,
+ -8.65,
+ 8.4,
+ 2.7,
+ -3.03,
+ -8.76,
+ 8.45,
+ 2.7,
+ -3.08,
+ -8.87,
+ 8.49,
+ 2.69,
+ -3.15,
+ -9.01,
+ 8.57,
+ 2.69,
+ -3.22,
+ -9.18,
+ 8.67,
+ 2.7,
+ -3.31,
+ -9.36,
+ 8.79,
+ 2.71,
+ -3.41,
+ -9.58,
+ 8.92,
+ 2.73,
+ -3.52,
+ -9.81,
+ 9.09,
+ 2.77,
+ -3.63,
+ -10.1,
+ 9.27,
+ 2.79,
+ -3.78,
+ -10.42,
+ 9.5,
+ 2.84,
+ -3.93,
+ -10.8,
+ 9.78,
+ 2.9,
+ -4.11,
+ -11.23,
+ 10.11,
+ 2.95,
+ -4.34,
+ -11.75
+ ],
+ "build_date": "2022-09-21T17:47:45Z",
+ "build_rev": "v2.4.0",
+ "client_version": "ouster_client 0.7.1",
+ "data_format":
+ {
+ "column_window":
+ [
+ 0,
+ 1023
+ ],
+ "columns_per_frame": 1024,
+ "columns_per_packet": 16,
+ "pixel_shift_by_row":
+ [
+ 33,
+ 12,
+ -8,
+ -28,
+ 32,
+ 12,
+ -8,
+ -27,
+ 30,
+ 11,
+ -8,
+ -27,
+ 29,
+ 11,
+ -8,
+ -26,
+ 29,
+ 10,
+ -8,
+ -26,
+ 28,
+ 10,
+ -8,
+ -25,
+ 27,
+ 10,
+ -8,
+ -25,
+ 27,
+ 9,
+ -8,
+ -24,
+ 26,
+ 9,
+ -8,
+ -24,
+ 26,
+ 9,
+ -8,
+ -24,
+ 25,
+ 9,
+ -8,
+ -24,
+ 25,
+ 9,
+ -8,
+ -24,
+ 25,
+ 8,
+ -8,
+ -24,
+ 24,
+ 8,
+ -8,
+ -24,
+ 24,
+ 8,
+ -8,
+ -24,
+ 24,
+ 8,
+ -8,
+ -24,
+ 24,
+ 8,
+ -8,
+ -24,
+ 24,
+ 8,
+ -8,
+ -24,
+ 24,
+ 8,
+ -8,
+ -24,
+ 24,
+ 8,
+ -8,
+ -25,
+ 24,
+ 8,
+ -9,
+ -25,
+ 24,
+ 8,
+ -9,
+ -25,
+ 24,
+ 8,
+ -9,
+ -26,
+ 24,
+ 8,
+ -9,
+ -26,
+ 25,
+ 8,
+ -9,
+ -27,
+ 25,
+ 8,
+ -10,
+ -27,
+ 25,
+ 8,
+ -10,
+ -28,
+ 26,
+ 8,
+ -10,
+ -29,
+ 26,
+ 8,
+ -11,
+ -30,
+ 27,
+ 8,
+ -11,
+ -31,
+ 28,
+ 8,
+ -12,
+ -32,
+ 29,
+ 8,
+ -12,
+ -33
+ ],
+ "pixels_per_column": 128,
+ "udp_profile_imu": "LEGACY",
+ "udp_profile_lidar": "LEGACY"
+ },
+ "hostname": "",
+ "image_rev": "ousteros-image-prod-aries-v2.4.0+20220921174636",
+ "imu_to_sensor_transform":
+ [
+ 1,
+ 0,
+ 0,
+ 6.253,
+ 0,
+ 1,
+ 0,
+ -11.775,
+ 0,
+ 0,
+ 1,
+ 7.645,
+ 0,
+ 0,
+ 0,
+ 1
+ ],
+ "initialization_id": 7109749,
+ "json_calibration_version": 4,
+ "lidar_mode": "1024x20",
+ "lidar_origin_to_beam_origin_mm": 27.67,
+ "lidar_to_sensor_transform":
+ [
+ -1,
+ 0,
+ 0,
+ 0,
+ 0,
+ -1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1,
+ 36.18,
+ 0,
+ 0,
+ 0,
+ 1
+ ],
+ "prod_line": "OS-0-128",
+ "prod_pn": "840-103574-06",
+ "prod_sn": "122219001184",
+ "status": "RUNNING",
+ "udp_port_imu": 55235,
+ "udp_port_lidar": 52031
+}
diff --git a/src/rmagine_core/CMakeLists.txt b/src/rmagine_core/CMakeLists.txt
index 9bcfe03d..9ab8b04b 100644
--- a/src/rmagine_core/CMakeLists.txt
+++ b/src/rmagine_core/CMakeLists.txt
@@ -12,6 +12,7 @@ set(RMAGINE_CORE_SRCS
src/types/Memory.cpp
src/types/conversions.cpp
src/types/sensors.cpp
+ src/types/mesh_types.cpp
# Util
src/util/synthetic.cpp
src/util/assimp/helper.cpp
diff --git a/src/rmagine_core/include/rmagine/math/definitions.h b/src/rmagine_core/include/rmagine/math/definitions.h
deleted file mode 100644
index 6b585dc1..00000000
--- a/src/rmagine_core/include/rmagine/math/definitions.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef RMAGINE_MATH_DEFINITIONS_OLD_H
-#define RMAGINE_MATH_DEFINITIONS_OLD_H
-
-#include
-#include
-#include
-
-namespace rmagine
-{
-
-#define __UINT_MAX__ (__INT_MAX__ * 2U + 1U)
-
-#define DEG_TO_RAD 0.017453292519943295
-#define DEG_TO_RAD_F 0.017453292519943295f
-#define RAD_TO_DEG 57.29577951308232
-#define RAD_TO_DEG_F 57.29577951308232f
-
-// Forward declarations
-struct Vector2;
-struct Vector3;
-struct EulerAngles;
-struct Quaternion;
-struct Transform;
-struct Matrix3x3;
-struct Matrix4x4;
-struct AABB;
-
-} // namespace rmagine
-
-#endif // RMAGINE_MATH_DEFINITIONS_H
\ No newline at end of file
diff --git a/src/rmagine_core/include/rmagine/math/linalg.h b/src/rmagine_core/include/rmagine/math/linalg.h
index 5a6b60b8..58bd0a35 100644
--- a/src/rmagine_core/include/rmagine/math/linalg.h
+++ b/src/rmagine_core/include/rmagine/math/linalg.h
@@ -41,6 +41,7 @@
#define RMAGINE_MATH_LINALG_H
#include "types.h"
+#include
namespace rmagine
{
@@ -53,8 +54,10 @@ namespace rmagine
* @param scale scale vector
* @return Matrix4x4 composed 4x4 transformation matrix
*/
+RMAGINE_FUNCTION
Matrix4x4 compose(const Transform& T, const Vector3& scale);
+RMAGINE_FUNCTION
Matrix4x4 compose(const Transform& T, const Matrix3x3& S);
/**
@@ -65,6 +68,7 @@ Matrix4x4 compose(const Transform& T, const Matrix3x3& S);
* @param T transform object consisting of translational and rotational parts
* @param S 3x3 scale matrix
*/
+RMAGINE_FUNCTION
void decompose(const Matrix4x4& M, Transform& T, Matrix3x3& S);
/**
@@ -75,6 +79,7 @@ void decompose(const Matrix4x4& M, Transform& T, Matrix3x3& S);
* @param T transform object consisting of translational and rotational parts
* @param scale scale vector
*/
+RMAGINE_FUNCTION
void decompose(const Matrix4x4& M, Transform& T, Vector3& scale);
/**
@@ -86,6 +91,7 @@ void decompose(const Matrix4x4& M, Transform& T, Vector3& scale);
* - if fac=1.0 it is exactly B
* - if fac=2.0 it it goes on a (B-A) length from B (extrapolation)
*/
+RMAGINE_FUNCTION
Quaternion polate(const Quaternion& A, const Quaternion& B, float fac);
/**
@@ -97,8 +103,69 @@ Quaternion polate(const Quaternion& A, const Quaternion& B, float fac);
* - if fac=1.0 it is exactly B
* - if fac=2.0 it it goes on a (B-A) length from B (extrapolation)
*/
+RMAGINE_FUNCTION
Transform polate(const Transform& A, const Transform& B, float fac);
+
+// Numerical Recipes
+// M = MatrixT::rows()
+// N = MatrixT::cols()
+//
+// Warning: Numerical Recipes has different SVD matrix shapes
+// than Wikipedia
+template
+struct svd_dims {
+ using U = MatrixT; // same as input
+ using w = Matrix_;
+ using W = Matrix_;
+ using V = Matrix_;
+};
+
+/**
+ * @brief own SVD implementation.
+ * Why use it?
+ * - ~2x faster than Eigen
+ * - SOON: Works insided of CUDA kernels
+ *
+ */
+template
+void svd(
+ const Matrix_& A,
+ Matrix_& U,
+ Matrix_& W, // matrix
+ Matrix_& V
+);
+
+template
+void svd(
+ const Matrix_& A,
+ Matrix_& U,
+ Matrix_& w, // vector version (Cols should be something with max)
+ Matrix_& V
+);
+
+/**
+ * @brief SVD that can be used for both CPU and GPU (Cuda kernels)
+ *
+ */
+RMAGINE_FUNCTION
+void svd(
+ const Matrix3x3& A,
+ Matrix3x3& U,
+ Matrix3x3& W,
+ Matrix3x3& V
+);
+
+RMAGINE_FUNCTION
+void svd(
+ const Matrix3x3& A,
+ Matrix3x3& U,
+ Vector3& w,
+ Matrix3x3& V
+);
+
} // namespace rmagine
+#include "linalg.tcc"
+
#endif // RMAGINE_MATH_MATH_LINALG_H
\ No newline at end of file
diff --git a/src/rmagine_core/include/rmagine/math/linalg.tcc b/src/rmagine_core/include/rmagine/math/linalg.tcc
new file mode 100644
index 00000000..ef7a3762
--- /dev/null
+++ b/src/rmagine_core/include/rmagine/math/linalg.tcc
@@ -0,0 +1,582 @@
+#include
+#include
+
+namespace rmagine
+{
+
+template
+void svd(
+ const Matrix_& a,
+ Matrix_& u,
+ Matrix_& w,
+ Matrix_& v)
+{
+ constexpr unsigned int m = Rows;
+ constexpr unsigned int n = Cols;
+
+ constexpr unsigned int max_iterations = 30;
+
+ // extra memory required
+ bool flag;
+ int i, its, j, jj, k, l, nm;
+ float anorm, c, f, g, h, s, scale, x, y, z;
+ DataT rv1[n];
+
+ g = scale = anorm = 0.0;
+ const float eps = std::numeric_limits::epsilon();
+ u = a;
+
+ for(i=0; i < n; i++)
+ {
+ l = i + 2;
+ rv1[i] = scale * g;
+ g = s = scale = 0.0;
+ if(i < m)
+ {
+ for(k=i; k=0; i--)
+ {
+ if(i < n-1)
+ {
+ if(g != 0.0)
+ {
+ for(j=l; j=0; i--)
+ {
+ l = i+1;
+ g = w(i, i);
+ for(j=l;j=0; k--)
+ {
+ for(its=0; its=0; l--)
+ {
+ nm=l-1;
+ if (l == 0 || abs(rv1[l]) <= eps*anorm) {
+ flag=false;
+ break;
+ }
+ if (abs(w(nm, nm)) <= eps*anorm)
+ {
+ break;
+ }
+ }
+ if(flag)
+ {
+ c=0.0;
+ s=1.0;
+ for(i=l; i
+void svd(
+ const Matrix_& a,
+ Matrix_& u,
+ Matrix_& w, // vector version
+ Matrix_& v)
+{
+ constexpr unsigned int m = Rows;
+ constexpr unsigned int n = Cols;
+ constexpr unsigned int max_iterations = 30;
+
+ // additional memory required
+ bool flag;
+ int i, its, j, jj, k, l, nm;
+ float anorm, c, f, g, h, s, scale, x, y, z;
+ DataT rv1[n];
+
+ g = scale = anorm = 0.0;
+ float eps = std::numeric_limits::epsilon();
+ u = a;
+
+ for(i=0; i < n; i++)
+ {
+ l = i+2;
+ rv1[i] = scale*g;
+ g = s = scale = 0.0;
+ if(i < m)
+ {
+ for(k=i; k=0; i--)
+ {
+ if(i < n-1)
+ {
+ if(g != 0.0)
+ {
+ for(j=l; j=0; i--)
+ {
+ l = i+1;
+ g = w(i, 0);
+ for(j=l;j=0; k--)
+ {
+ for(its=0; its=0; l--)
+ {
+ nm=l-1;
+ if (l == 0 || abs(rv1[l]) <= eps*anorm) {
+ flag=false;
+ break;
+ }
+ if (abs(w(nm, 0)) <= eps*anorm)
+ {
+ break;
+ }
+ }
+ if(flag)
+ {
+ c=0.0;
+ s=1.0;
+ for(i=l; i
+RMAGINE_INLINE_FUNCTION
+T SQR(const T a) {return a*a;}
+
+template
+RMAGINE_INLINE_FUNCTION
+const T &MAX(const T &a, const T &b)
+ {return b > a ? (b) : (a);}
+
+RMAGINE_INLINE_FUNCTION
+float MAX(const double &a, const float &b)
+ {return b > a ? (b) : float(a);}
+
+RMAGINE_INLINE_FUNCTION
+float MAX(const float &a, const double &b)
+ {return b > a ? float(b) : (a);}
+
+template
+RMAGINE_INLINE_FUNCTION
+const T &MIN(const T &a, const T &b)
+ {return b < a ? (b) : (a);}
+
+RMAGINE_INLINE_FUNCTION
+float MIN(const double &a, const float &b)
+ {return b < a ? (b) : float(a);}
+
+RMAGINE_INLINE_FUNCTION
+float MIN(const float &a, const double &b)
+ {return b < a ? float(b) : (a);}
+
+template
+RMAGINE_INLINE_FUNCTION
+T SIGN(const T &a, const T &b)
+ {return b >= 0 ? (a >= 0 ? a : -a) : (a >= 0 ? -a : a);}
+
+RMAGINE_INLINE_FUNCTION
+float SIGN(const float &a, const double &b)
+ {return b >= 0 ? (a >= 0 ? a : -a) : (a >= 0 ? -a : a);}
+
+RMAGINE_INLINE_FUNCTION
+float SIGN(const double &a, const float &b)
+ {return (float)(b >= 0 ? (a >= 0 ? a : -a) : (a >= 0 ? -a : a));}
+
+template
+RMAGINE_INLINE_FUNCTION
+void SWAP(T &a, T &b)
+ {T dum=a; a=b; b=dum;}
+
+template
+RMAGINE_INLINE_FUNCTION
+T PYTHAG(const T a, const T b)
+{
+ T absa = abs(a);
+ T absb = abs(b);
+ return (absa > absb ? absa * sqrt(1.0+SQR(absb/absa)) :
+ (absb == 0.0 ? 0.0 : absb * sqrt(1.0+SQR(absa/absb))));
+}
+
template
Vector3_ min(const Vector3_& a, const Vector3_& b);
@@ -56,7 +117,6 @@ Vector3_ min(const Vector3_& a, const Vector3_& b);
template
Vector3_ max(const Vector3_& a, const Vector3_& b);
-
/////////////
// #multNxN
////////
@@ -371,6 +431,29 @@ Memory cov(
const MemoryView& v2
);
+/**
+ * @brief decompose A = UWV* using singular value decomposition
+ */
+void svd(
+ const MemoryView& As,
+ MemoryView& Us,
+ MemoryView& Ws,
+ MemoryView& Vs
+);
+
+/**
+ * @brief decompose A = UWV* using singular value decomposition
+ *
+ * w is a vector which is the diagonal of matrix W
+ */
+void svd(
+ const MemoryView& As,
+ MemoryView& Us,
+ MemoryView& ws,
+ MemoryView& Vs
+);
+
+
} // namespace rmagine
#include "math.tcc"
diff --git a/src/rmagine_core/include/rmagine/math/types/Matrix.hpp b/src/rmagine_core/include/rmagine/math/types/Matrix.hpp
index 46cdc561..22e824b5 100644
--- a/src/rmagine_core/include/rmagine/math/types/Matrix.hpp
+++ b/src/rmagine_core/include/rmagine/math/types/Matrix.hpp
@@ -333,6 +333,17 @@ struct Matrix_ {
RMAGINE_INLINE_FUNCTION
operator EulerAngles_() const;
+ /**
+ * @brief Transformation Matrix -> Transform
+ * WARNING: The matrix has to be isometric, i.e. composed only of
+ * rotational and translational components. If it has e.g. scalar
+ * components use the "decompose" function instead
+ *
+ * @return Transform_
+ */
+ RMAGINE_INLINE_FUNCTION
+ operator Transform_() const;
+
/**
* @brief Data Type Cast to ConvT
*
diff --git a/src/rmagine_core/include/rmagine/math/types/Matrix.tcc b/src/rmagine_core/include/rmagine/math/types/Matrix.tcc
index 5973ce91..5fe5271e 100644
--- a/src/rmagine_core/include/rmagine/math/types/Matrix.tcc
+++ b/src/rmagine_core/include/rmagine/math/types/Matrix.tcc
@@ -1256,7 +1256,7 @@ Matrix_::operator EulerAngles_() const
// pitch (y-axis)
if (fabs(sB) >= 1.0)
{
- e.pitch = copysignf(M_PI / 2, sB); // use 90 degrees if out of range
+ e.pitch = copysignf(M_PI_2, sB); // use 90 degrees if out of range
} else {
e.pitch = asinf(sB);
}
@@ -1267,6 +1267,18 @@ Matrix_::operator EulerAngles_() const
return e;
}
+template
+RMAGINE_INLINE_FUNCTION
+Matrix_::operator Transform_() const
+{
+ static_assert(Rows == 4 && Cols == 4);
+
+ Transform_ T;
+ T.set(*this);
+ return T;
+}
+
+
template
template
diff --git a/src/rmagine_core/include/rmagine/math/types/Quaternion.tcc b/src/rmagine_core/include/rmagine/math/types/Quaternion.tcc
index a3742652..b4dac7da 100644
--- a/src/rmagine_core/include/rmagine/math/types/Quaternion.tcc
+++ b/src/rmagine_core/include/rmagine/math/types/Quaternion.tcc
@@ -182,8 +182,6 @@ template
RMAGINE_INLINE_FUNCTION
Quaternion_::operator EulerAngles_() const
{
- constexpr DataT PI_HALF = M_PI / 2.0;
-
// TODO: check
// https://en.wikipedia.org/wiki/Conversion_between_quaternions_and_Euler_angles
// checked once
@@ -215,7 +213,7 @@ Quaternion_::operator EulerAngles_() const
// pitch (y-axis)
if (fabs(sinp) >= 1.0f)
{
- e.pitch = copysign(PI_HALF, sinp); // use 90 degrees if out of range
+ e.pitch = copysign(M_PI_2, sinp); // use 90 degrees if out of range
} else {
e.pitch = asin(sinp);
}
diff --git a/src/rmagine_core/include/rmagine/math/types/Transform.hpp b/src/rmagine_core/include/rmagine/math/types/Transform.hpp
index 2ad90aa9..baef4389 100644
--- a/src/rmagine_core/include/rmagine/math/types/Transform.hpp
+++ b/src/rmagine_core/include/rmagine/math/types/Transform.hpp
@@ -38,6 +38,12 @@ struct Transform_
RMAGINE_INLINE_FUNCTION
void setIdentity();
+ /**
+ * @brief Setting the transform from an 4x4 transformation matrix
+ * WARNING matrix must be isometric, i.e. must only contain
+ * rotational and translational parts (not scale). Otherwise,
+ * use "decompose" function
+ */
RMAGINE_INLINE_FUNCTION
void set(const Matrix_& M);
@@ -76,13 +82,6 @@ struct Transform_
RMAGINE_INLINE_FUNCTION
Transform_ pow(const DataT& exp) const;
- // OPERATORS
- RMAGINE_INLINE_FUNCTION
- void operator=(const Matrix_& M)
- {
- set(M);
- }
-
RMAGINE_INLINE_FUNCTION
Transform_ operator~() const
{
@@ -108,6 +107,20 @@ struct Transform_
return mult(v);
}
+ /////////////////////
+ // CASTING
+
+ /**
+ * @brief Transform -> Matrix4x4
+ *
+ * @return Matrix_
+ */
+ RMAGINE_INLINE_FUNCTION
+ operator Matrix_() const;
+
+ /**
+ * @brief Internal data type cast
+ */
template
Transform_ cast() const
{
diff --git a/src/rmagine_core/include/rmagine/math/types/Transform.tcc b/src/rmagine_core/include/rmagine/math/types/Transform.tcc
index 02fd878a..8e8d6b5a 100644
--- a/src/rmagine_core/include/rmagine/math/types/Transform.tcc
+++ b/src/rmagine_core/include/rmagine/math/types/Transform.tcc
@@ -78,4 +78,13 @@ Transform_ Transform_::pow(const DataT& exp) const
return res;
}
+template
+RMAGINE_INLINE_FUNCTION
+Transform_::operator Matrix_() const
+{
+ Matrix_ M;
+ M.set(*this);
+ return M;
+}
+
} // namespace rmagine
\ No newline at end of file
diff --git a/src/rmagine_core/include/rmagine/math/types/Vector3.hpp b/src/rmagine_core/include/rmagine/math/types/Vector3.hpp
index e4a0b669..6a249511 100644
--- a/src/rmagine_core/include/rmagine/math/types/Vector3.hpp
+++ b/src/rmagine_core/include/rmagine/math/types/Vector3.hpp
@@ -245,6 +245,31 @@ struct Vector3_
{
return multEwise(b);
}
+
+ // Use with care!
+ RMAGINE_INLINE_FUNCTION
+ DataT& operator[](const size_t& idx)
+ {
+ return *(&x + idx);
+ }
+
+ RMAGINE_INLINE_FUNCTION
+ const DataT& operator[](const size_t& idx) const
+ {
+ return *(&x + idx);
+ }
+
+ RMAGINE_INLINE_FUNCTION
+ DataT& operator()(const size_t& idx)
+ {
+ return *(&x + idx);
+ }
+
+ RMAGINE_INLINE_FUNCTION
+ const DataT& operator()(const size_t& idx) const
+ {
+ return *(&x + idx);
+ }
};
} // namespace rmagine
diff --git a/src/rmagine_core/include/rmagine/math/types/definitions.h b/src/rmagine_core/include/rmagine/math/types/definitions.h
index 39de2857..39913160 100644
--- a/src/rmagine_core/include/rmagine/math/types/definitions.h
+++ b/src/rmagine_core/include/rmagine/math/types/definitions.h
@@ -17,7 +17,6 @@ namespace rmagine
// Forward declarations
-
template
struct Vector2_;
@@ -41,6 +40,8 @@ struct AABB_;
using Vector2f = Vector2_;
+using Vector2u = Vector2_;
+using Vector2i = Vector2_;
using Vector3f = Vector3_;
using Matrix2x2f = Matrix_;
using Matrix3x3f = Matrix_;
@@ -79,17 +80,12 @@ using AABB = AABB_;
using Vector = Vector3;
using Point = Vector;
+// @amock TODO: how to define a pixel? unsigned or signed?
+// - projection operations can result in negative pixels
+// using Pixel = Vector2u;
+// using Pixel = Vector2i;
-// struct Vector2;
-// struct Vector3;
-// struct EulerAngles;
-// struct Quaternion;
-// struct Transform;
-// struct Matrix3x3;
-// struct Matrix4x4;
-// struct AABB;
-
} // namespace rmagine
#endif // RMAGINE_MATH_DEFINITIONS_H
\ No newline at end of file
diff --git a/src/rmagine_core/include/rmagine/simulation/SimulationResults.hpp b/src/rmagine_core/include/rmagine/simulation/SimulationResults.hpp
index aa8fe2cd..9942a1e1 100644
--- a/src/rmagine_core/include/rmagine/simulation/SimulationResults.hpp
+++ b/src/rmagine_core/include/rmagine/simulation/SimulationResults.hpp
@@ -97,7 +97,6 @@ struct FaceIds {
Memory face_ids;
};
-
/**
* @brief GeomIds computed by the simulators
*
@@ -127,6 +126,13 @@ struct ObjectIds {
};
+/**
+ * @brief Convenience object if we want to access all attributes at intersection
+ *
+ * WARNING: use with care; It causes slower runtime in contrast to a more specific
+ * choice of attributes
+ *
+ */
template
using IntAttrAll = Bundle<
Hits,
@@ -156,7 +162,7 @@ template
static void resize_memory_bundle(BundleT& res,
unsigned int W,
unsigned int H,
- unsigned int N )
+ unsigned int N = 1 )
{
if constexpr(BundleT::template has >())
{
@@ -194,24 +200,6 @@ static void resize_memory_bundle(BundleT& res,
}
}
-// template
-// static void resize_memory_bundle(BundleT& res,
-// unsigned int W,
-// unsigned int H,
-// unsigned int N )
-// {
-// resize_memory_bundle_(res, W, H, N);
-// }
-
-template
-[[deprecated("Use resize_memory_bundle() instead.")]]
-void resizeMemoryBundle(BundleT& res,
- unsigned int W,
- unsigned int H,
- unsigned int N )
-{
- resize_memory_bundle(res, W, H, N);
-}
} // namespace rmagine
diff --git a/src/rmagine_core/include/rmagine/types/Bundle.hpp b/src/rmagine_core/include/rmagine/types/Bundle.hpp
index cd015811..24952635 100644
--- a/src/rmagine_core/include/rmagine/types/Bundle.hpp
+++ b/src/rmagine_core/include/rmagine/types/Bundle.hpp
@@ -93,8 +93,6 @@ struct Bundle : public Tp...
static constexpr bool value = has_type::type::value;
};
-
-
template
struct Index;
diff --git a/src/rmagine_core/include/rmagine/types/Memory.hpp b/src/rmagine_core/include/rmagine/types/Memory.hpp
index 3b0279c5..ed7b1702 100644
--- a/src/rmagine_core/include/rmagine/types/Memory.hpp
+++ b/src/rmagine_core/include/rmagine/types/Memory.hpp
@@ -39,6 +39,7 @@
#include
#include
#include
+#include
#include
@@ -46,6 +47,15 @@ namespace rmagine {
struct RAM;
+class MemoryResizeError : public std::runtime_error {
+public:
+ MemoryResizeError()
+ :std::runtime_error("rmagine: cannot resize memory view!")
+ {
+
+ }
+};
+
template
class MemoryView {
public:
@@ -126,6 +136,11 @@ class MemoryView {
return m_size;
}
+ // Shall we introduce this?
+ // virtual void resize(size_t N) {
+ // throw MemoryResizeError();
+ // }
+
MemoryView slice(size_t idx_start, size_t idx_end)
{
return MemoryView(m_mem + idx_start, idx_end - idx_start);
@@ -173,6 +188,7 @@ class Memory : public MemoryView {
~Memory();
+ // virtual void resize(size_t N);
void resize(size_t N);
// Copy for assignment of same MemT
diff --git a/src/rmagine_core/include/rmagine/types/mesh_types.h b/src/rmagine_core/include/rmagine/types/mesh_types.h
index 210c7fc1..4e094a4b 100644
--- a/src/rmagine_core/include/rmagine/types/mesh_types.h
+++ b/src/rmagine_core/include/rmagine/types/mesh_types.h
@@ -43,6 +43,8 @@
#define RMAGINE_TYPES_MESH_TYPES_H
#include
+#include
+#include
namespace rmagine
{
@@ -53,8 +55,24 @@ struct Face {
unsigned int v0;
unsigned int v1;
unsigned int v2;
+
+ // Other access functions
+ // use with care! No out of range checks
+ RMAGINE_INLINE_FUNCTION
+ const unsigned int& operator[](const size_t& idx) const;
+
+ RMAGINE_INLINE_FUNCTION
+ unsigned int& operator[](const size_t& idx);
+
+ RMAGINE_INLINE_FUNCTION
+ constexpr size_t size() const
+ {
+ return 3;
+ }
};
} // namespace rmagine
+#include "mesh_types.tcc"
+
#endif // RMAGINE_TYPES_MESH_TYPES_H
\ No newline at end of file
diff --git a/src/rmagine_core/include/rmagine/types/mesh_types.tcc b/src/rmagine_core/include/rmagine/types/mesh_types.tcc
new file mode 100644
index 00000000..fa9d07c8
--- /dev/null
+++ b/src/rmagine_core/include/rmagine/types/mesh_types.tcc
@@ -0,0 +1,18 @@
+#include "mesh_types.h"
+
+namespace rmagine
+{
+
+RMAGINE_INLINE_FUNCTION
+const unsigned int& Face::operator[](const size_t& idx) const
+{
+ return *((&v0)+idx);
+}
+
+RMAGINE_INLINE_FUNCTION
+unsigned int& Face::operator[](const size_t& idx)
+{
+ return *((&v0)+idx);
+}
+
+} // namespace rmagine
diff --git a/src/rmagine_core/include/rmagine/types/sensor_models.h b/src/rmagine_core/include/rmagine/types/sensor_models.h
index 4c69ee3c..d54a15bd 100644
--- a/src/rmagine_core/include/rmagine/types/sensor_models.h
+++ b/src/rmagine_core/include/rmagine/types/sensor_models.h
@@ -148,6 +148,12 @@ struct SphericalModel
return getWidth() * getHeight();
}
+ RMAGINE_INLINE_FUNCTION
+ uint32_t getSize() const
+ {
+ return getWidth() * getHeight();
+ }
+
RMAGINE_INLINE_FUNCTION
float getPhi(uint32_t phi_id) const
{
@@ -179,6 +185,32 @@ struct SphericalModel
{
return phi_id * theta.size + theta_id;
}
+
+ RMAGINE_INLINE_FUNCTION
+ Vector2u getPixelCoord(uint32_t buffer_id) const
+ {
+ return {buffer_id % theta.size, buffer_id / theta.size};
+ }
+
+ // slice horizontal line. vertical is not currently not possible because of memory layout
+ template
+ MemoryView getRow(const MemoryView& mem, uint32_t vid) const
+ {
+ return mem.slice(vid * getWidth(), (vid+1) * getWidth());
+ }
+
+ // for RAM we can access single elements of a buffer
+ template
+ DataT& getPixelValue(MemoryView& mem, uint32_t vid, uint32_t hid) const
+ {
+ return mem[getBufferId(vid, hid)];
+ }
+
+ template
+ DataT getPixelValue(const MemoryView& mem, uint32_t vid, uint32_t hid) const
+ {
+ return mem[getBufferId(vid, hid)];
+ }
};
using LiDARModel = SphericalModel;
@@ -215,6 +247,12 @@ struct PinholeModel {
return getWidth() * getHeight();
}
+ RMAGINE_INLINE_FUNCTION
+ uint32_t getSize() const
+ {
+ return getWidth() * getHeight();
+ }
+
RMAGINE_INLINE_FUNCTION
Vector getDirectionOptical(uint32_t vid, uint32_t hid) const
{
@@ -250,6 +288,31 @@ struct PinholeModel {
return vid * width + hid;
}
+ RMAGINE_INLINE_FUNCTION
+ Vector2u getPixelCoord(uint32_t buffer_id) const
+ {
+ return {buffer_id % width, buffer_id / width};
+ }
+
+ // slice horizontal line. vertical is not currently not possible because of memory layout
+ template
+ MemoryView getRow(const MemoryView& mem, uint32_t vid) const
+ {
+ return mem.slice(vid * getWidth(), (vid+1) * getWidth());
+ }
+
+ // for RAM we can access single elements of a buffer
+ template
+ DataT& getPixelValue(MemoryView& mem, uint32_t vid, uint32_t hid) const
+ {
+ return mem[getBufferId(vid, hid)];
+ }
+
+ template
+ DataT getPixelValue(const MemoryView& mem, uint32_t vid, uint32_t hid) const
+ {
+ return mem[getBufferId(vid, hid)];
+ }
};
// Distortion? Fisheye / radial-tangential ?
@@ -257,20 +320,20 @@ using CameraModel = PinholeModel;
using DepthCameraModel = PinholeModel;
// TODO: distortion
-struct RadialTangentialDistortion {
- // TODO
-};
+// struct RadialTangentialDistortion {
+// // TODO
+// };
-struct FisheyeDistortion {
+// struct FisheyeDistortion {
-};
+// };
-struct CylindricModel {
- static constexpr char name[] = "Cylinder";
- // TODO
+// struct CylindricModel {
+// static constexpr char name[] = "Cylinder";
+// // TODO
-};
+// };
template
struct O1DnModel_ {
@@ -298,15 +361,15 @@ struct O1DnModel_ {
}
RMAGINE_INLINE_FUNCTION
- uint32_t size() const
+ uint32_t getSize() const
{
return getWidth() * getHeight();
}
RMAGINE_INLINE_FUNCTION
- uint32_t getBufferId(uint32_t vid, uint32_t hid) const
+ uint32_t size() const
{
- return vid * getWidth() + hid;
+ return getWidth() * getHeight();
}
RMAGINE_INLINE_FUNCTION
@@ -320,6 +383,38 @@ struct O1DnModel_ {
{
return dirs[getBufferId(vid, hid)];
}
+
+ RMAGINE_INLINE_FUNCTION
+ uint32_t getBufferId(uint32_t vid, uint32_t hid) const
+ {
+ return vid * getWidth() + hid;
+ }
+
+ RMAGINE_INLINE_FUNCTION
+ Vector2u getPixelCoord(uint32_t buffer_id) const
+ {
+ return {buffer_id % width, buffer_id / width};
+ }
+
+ // slice horizontal line. vertical is not currently not possible because of memory layout
+ template
+ MemoryView getRow(const MemoryView& mem, uint32_t vid) const
+ {
+ return mem.slice(vid * getWidth(), (vid+1) * getWidth());
+ }
+
+ // for RAM we can access single elements of a buffer
+ template
+ DataT& getPixelValue(MemoryView& mem, uint32_t vid, uint32_t hid) const
+ {
+ return mem[getBufferId(vid, hid)];
+ }
+
+ template
+ DataT getPixelValue(const MemoryView& mem, uint32_t vid, uint32_t hid) const
+ {
+ return mem[getBufferId(vid, hid)];
+ }
};
using O1DnModel = O1DnModel_;
@@ -351,16 +446,15 @@ struct OnDnModel_ {
}
RMAGINE_INLINE_FUNCTION
- uint32_t size() const
+ uint32_t getSize() const
{
return getWidth() * getHeight();
}
-
RMAGINE_INLINE_FUNCTION
- uint32_t getBufferId(uint32_t vid, uint32_t hid) const
+ uint32_t size() const
{
- return vid * getWidth() + hid;
+ return getWidth() * getHeight();
}
RMAGINE_INLINE_FUNCTION
@@ -375,11 +469,51 @@ struct OnDnModel_ {
return dirs[getBufferId(vid, hid)];
}
+ RMAGINE_INLINE_FUNCTION
+ uint32_t getBufferId(uint32_t vid, uint32_t hid) const
+ {
+ return vid * getWidth() + hid;
+ }
+
+ RMAGINE_INLINE_FUNCTION
+ Vector2u getPixelCoord(uint32_t buffer_id) const
+ {
+ return {buffer_id % width, buffer_id / width};
+ }
+
+ // slice horizontal line. vertical is not currently not possible because of memory layout
+ template
+ MemoryView getRow(MemoryView& mem, uint32_t vid) const
+ {
+ return mem.slice(vid * getWidth(), (vid+1) * getWidth());
+ }
+
+ // for CPU we can access single elements of a buffer
+ template
+ DataT& getPixelValue(MemoryView& mem, uint32_t vid, uint32_t hid) const
+ {
+ return mem[getBufferId(vid, hid)];
+ }
+
+ template
+ DataT getPixelValue(const MemoryView& mem, uint32_t vid, uint32_t hid) const
+ {
+ return mem[getBufferId(vid, hid)];
+ }
};
using OnDnModel = OnDnModel_;
+template
+MemoryView slice(
+ const MemoryView& mem,
+ const ModelT& model,
+ const uint32_t pose_id)
+{
+ return mem.slice(model.getSize() * pose_id, model.getSize() * (pose_id + 1));
+}
+
} // namespace rmagine
#endif // RMAGINE_TYPES_SENSOR_MODELS_H
\ No newline at end of file
diff --git a/src/rmagine_core/include/rmagine/types/shared_functions.h b/src/rmagine_core/include/rmagine/types/shared_functions.h
index a8e11785..c4bdb96d 100644
--- a/src/rmagine_core/include/rmagine/types/shared_functions.h
+++ b/src/rmagine_core/include/rmagine/types/shared_functions.h
@@ -44,9 +44,17 @@
#ifdef __CUDA_ARCH__
#define RMAGINE_FUNCTION __host__ __device__
#define RMAGINE_INLINE_FUNCTION __inline__ __host__ __device__
+#define RMAGINE_HOST_FUNCTION __host__
+#define RMAGINE_INLINE_HOST_FUNCTION __inline__ __host__
+#define RMAGINE_DEVICE_FUNCTION __device__
+#define RMAGINE_INLINE_DEVICE_FUNCTION __inline__ __device__
#else
#define RMAGINE_FUNCTION
#define RMAGINE_INLINE_FUNCTION inline
+#define RMAGINE_HOST_FUNCTION
+#define RMAGINE_INLINE_HOST_FUNCTION inline
+#define RMAGINE_DEVICE_FUNCTION
+#define RMAGINE_INLINE_DEVICE_FUNCTION inline
#endif
#endif // RMAGINE_TYPES_SHARED_FUNCTIONS_H
\ No newline at end of file
diff --git a/src/rmagine_core/src/math/linalg.cpp b/src/rmagine_core/src/math/linalg.cpp
index 739b24fb..148d1996 100644
--- a/src/rmagine_core/src/math/linalg.cpp
+++ b/src/rmagine_core/src/math/linalg.cpp
@@ -1,10 +1,15 @@
#include "rmagine/math/linalg.h"
+#include "rmagine/types/Memory.hpp"
+#include
#include
+#include "rmagine/math/math.h"
+
namespace rmagine
{
+RMAGINE_HOST_FUNCTION
Matrix4x4 compose(const Transform& T, const Vector3& scale)
{
Matrix4x4 M;
@@ -19,6 +24,7 @@ Matrix4x4 compose(const Transform& T, const Vector3& scale)
return M * S;
}
+RMAGINE_HOST_FUNCTION
Matrix4x4 compose(const Transform& T, const Matrix3x3& S)
{
Matrix4x4 M;
@@ -38,6 +44,7 @@ Matrix4x4 compose(const Transform& T, const Matrix3x3& S)
return M * S_;
}
+RMAGINE_HOST_FUNCTION
void decompose(const Matrix4x4& M, Transform& T, Matrix3x3& S)
{
Eigen::Matrix4f Meig;
@@ -73,6 +80,7 @@ void decompose(const Matrix4x4& M, Transform& T, Matrix3x3& S)
T.R.set(R);
}
+RMAGINE_HOST_FUNCTION
void decompose(const Matrix4x4& M, Transform& T, Vector3& scale)
{
Matrix3x3 S;
@@ -85,15 +93,1088 @@ void decompose(const Matrix4x4& M, Transform& T, Vector3& scale)
scale.z = S(2,2);
}
+RMAGINE_HOST_FUNCTION
Quaternion polate(const Quaternion& A, const Quaternion& B, float fac)
{
return A * A.to(B).pow(fac);
}
+RMAGINE_HOST_FUNCTION
Transform polate(const Transform& A, const Transform& B, float fac)
{
return A * A.to(B).pow(fac);
}
+RMAGINE_HOST_FUNCTION
+void svd(
+ const Matrix3x3& a,
+ Matrix3x3& u,
+ Matrix3x3& w,
+ Matrix3x3& v)
+{
+ // TODO: test
+ constexpr unsigned int max_iterations = 20;
+
+ // additional memory required
+ bool flag;
+ int its, j, jj;
+ float anorm, c, f, g, h, s, scale, x, y, z;
+
+ Vector3 rv1 = Vector3::Zeros();
+
+ g = s = scale = anorm = 0.0;
+ float eps = __FLT_EPSILON__;
+ u = a;
+
+ // FIRST PART
+
+ // i = 0;
+ // l = 2;
+ scale = fabs(u(0,0)) + fabs(u(1,0)) + fabs(u(2,0));
+ if(scale > 0.0)
+ {
+ u(0, 0) /= scale;
+ u(1, 0) /= scale;
+ u(2, 0) /= scale;
+
+ s = u(0,0) * u(0,0) + u(1,0) * u(1,0) + u(2,0) * u(2,0);
+ f = u(0,0);
+ g = -SIGN(sqrt(s), f);
+ h = f * g - s;
+
+ u(0, 0) = f - g;
+
+ f = (u(0, 0) * u(0, 1) + u(1, 0) * u(1, 1) + u(2, 0) * u(2, 1)) / h;
+ u(0, 1) += f * u(0, 0);
+ u(1, 1) += f * u(1, 0);
+ u(2, 1) += f * u(2, 0);
+
+ f = (u(0, 0) * u(0, 2) + u(1, 0) * u(1, 2) + u(2, 0) * u(2, 2)) / h;
+ u(0, 2) += f * u(0, 0);
+ u(1, 2) += f * u(1, 0);
+ u(2, 2) += f * u(2, 0);
+
+ u(0, 0) *= scale;
+ u(1, 0) *= scale;
+ u(2, 0) *= scale;
+ }
+
+ w(0, 0) = scale * g;
+ g = s = scale = 0.0;
+
+ scale = abs(u(0,0)) + abs(u(0,1)) + abs(u(0,2));
+
+ if(scale > 0.0)
+ {
+ u(0, 1) /= scale;
+ u(0, 2) /= scale;
+ s = u(0,1) * u(0,1) + u(0,2) * u(0,2);
+
+ f = u(0, 1);
+ g = -SIGN(sqrt(s),f);
+ h = f * g-s;
+ u(0, 1) = f - g;
+
+ rv1.y = u(0, 1) / h;
+ rv1.z = u(0, 2) / h;
+
+ s = u(1,1) * u(0,1) + u(1,2) * u(0,2);
+ u(1, 1) += s * rv1.y;
+ u(1, 2) += s * rv1.z;
+
+ s = u(2,1) * u(0,1) + u(2,2) * u(0,2);
+ u(2, 1) += s * rv1.y;
+ u(2, 2) += s * rv1.z;
+
+ u(0, 1) *= scale;
+ u(0, 2) *= scale;
+ }
+
+ anorm = fabs(w(0, 0));
+ // anorm = MAX(anorm, (fabs(w(0, 0)) + fabs(rv1.x))); // rv1.x is always 0 here, anorm too. fabs(X) >= 0
+
+ // i = 1;
+ // l = 3;
+ rv1.y = scale * g;
+ g = 0.0;
+ scale = fabs(u(1, 1)) + fabs(u(2, 1));
+
+ if(scale > 0.0)
+ {
+ u(1,1) /= scale;
+ u(2,1) /= scale;
+
+ s = u(1,1) * u(1,1) + u(2,1) * u(2,1);
+ f = u(1,1);
+ g = -SIGN(sqrt(s),f);
+ h = f * g - s;
+ u(1,1) = f-g;
+
+ f = (u(1,1) * u(1,2) + u(2,1) * u(2,2)) / h;
+ u(1,2) += f * u(1,1);
+ u(2,2) += f * u(2,1);
+
+ u(1,1) *= scale;
+ u(2,1) *= scale;
+ }
+
+ w(1, 1) = scale * g;
+ g = s = scale = 0.0;
+
+ scale = abs(u(1,2));
+ if(scale > 0.0)
+ {
+ u(1,2) /= scale;
+ s = u(1,2) * u(1,2);
+
+ f = u(1, 2);
+ g = -SIGN(sqrt(s), f);
+ h = f * g - s;
+ u(1,2) = f - g;
+
+ rv1.z = u(1,2) / h;
+ s = u(2,2) * u(1,2);
+
+ u(2,2) += s * rv1.z;
+ u(1,2) *= scale;
+ }
+
+ anorm = MAX(anorm, (abs(w(1, 1)) + abs(rv1.y)));
+
+ rv1.z = scale * g;
+
+ scale = abs(u(2, 2));
+ if(scale > 0.0)
+ {
+ u(2, 2) /= scale;
+ s = u(2, 2) * u(2, 2);
+ f = u(2, 2);
+ g = -SIGN(sqrt(s),f);
+ h = f * g - s;
+
+ u(2, 2) = f - g;
+ u(2, 2) *= scale;
+ }
+
+ w(2, 2) = scale * g;
+ g = s = scale = 0.0;
+
+ anorm = MAX(anorm, (abs(w(2, 2))+abs(rv1.z)));
+
+ // SECOND PART
+ v(2, 2) = 1.0;
+ g = rv1.z;
+
+ // i = 1;
+ // l = 2;
+ if(fabs(g) > 0.0)
+ {
+ v(2,1) = (u(1,2) / u(1,2)) / g;
+ s = u(1,2) * v(2,2);
+ v(2,2) += s * v(2,1);
+ }
+ v(1,2) = 0.0;
+ v(2,1) = 0.0;
+ v(1,1) = 1.0;
+
+ g = rv1.y;
+
+ // l = 1;
+ // i = 0;
+ if(fabs(g) > 0.0)
+ {
+ v(1,0) = (u(0,1) / u(0,1)) / g;
+ v(2,0) = (u(0,2) / u(0,1)) / g;
+
+ s = u(0,1) * v(1,1) + u(0,2) * v(2,1);
+ v(1,1) += s * v(1,0);
+ v(2,1) += s * v(2,0);
+
+ s = u(0,1) * v(1,2) + u(0,2) * v(2,2);
+ v(1,2) += s * v(1,0);
+ v(2,2) += s * v(2,0);
+ }
+ v(0,1) = 0.0;
+ v(1,0) = 0.0;
+ v(0,2) = 0.0;
+ v(2,0) = 0.0;
+ v(0,0) = 1.0;
+ g = rv1.x;
+
+
+ // THIRD PART
+
+ // i = 2;
+ // l = 3;
+ g = w(2, 2);
+ if(fabs(g) > 0.0)
+ {
+ u(2,2) /= g;
+ } else {
+ // TODO(amock): shouldnt this be a large number?
+ u(2,2) = 0.0;
+ }
+ u(2,2) += 1.0;
+
+ // i = 1;
+ // l = 2;
+
+ g = w(1, 1);
+ u(1,2) = 0.0;
+
+ if(fabs(g) > 0.0)
+ {
+ g = 1.0/g;
+ s = u(2,1) * u(2,2);
+ f = (s/u(1,1)) * g;
+
+ u(1,2) += f * u(1,1);
+ u(2,2) += f * u(2,1);
+
+ u(1,1) *= g;
+ u(2,1) *= g;
+ } else {
+ u(1,1) = 0.0;
+ u(2,1) = 0.0;
+ }
+ u(1,1) += 1.0;
+
+ // i = 0;
+ // l = 1;
+ g = w(0, 0);
+ u(0,1) = 0.0;
+ u(0,2) = 0.0;
+
+ if(fabs(g) > 0.0)
+ {
+ f = (u(1,0) * u(1,1) + u(2,0) * u(2,1)) / (g * u(0,0));
+ u(0,1) += f * u(0,0);
+ u(1,1) += f * u(1,0);
+ u(2,1) += f * u(2,0);
+
+ f = (u(1,0) * u(1,2) + u(2,0) * u(2,2)) / (g * u(0,0));
+ u(0,2) += f * u(0,0);
+ u(1,2) += f * u(1,0);
+ u(2,2) += f * u(2,0);
+
+ u(0,0) /= g;
+ u(1,0) /= g;
+ u(2,0) /= g;
+ } else {
+ u(0,0) = 0.0;
+ u(1,0) = 0.0;
+ u(2,0) = 0.0;
+ }
+ u(0,0) += 1.0;
+
+ int i, l;
+
+ // PART 4: Opti
+
+ // k = 2;
+ for(its=0; its eps*anorm)
+ // {
+ // l = 1;
+ // if(MIN(fabs(rv1.y),abs(w(0,0))) > eps*anorm)
+ // {
+ // l = 0;
+ // }
+ // }
+
+ flag=true;
+ l=2;
+ if(abs(rv1.z) <= eps*anorm)
+ {
+ flag=false;
+ }
+ else if(abs(w(1,1)) > eps*anorm)
+ {
+ l=1;
+ if(abs(rv1.y) <= eps*anorm)
+ {
+ flag=false;
+ }
+ else if(abs(w(0,0)) > eps*anorm)
+ {
+ l=0;
+ flag = false;
+ }
+ }
+
+ if(flag)
+ {
+ c=0.0;
+ s=1.0;
+ for(i=l; i<3; i++)
+ {
+ f = s*rv1[i];
+ rv1[i] = c*rv1[i];
+ if(abs(f) <= eps*anorm)
+ {
+ break;
+ }
+ g = w(i, i);
+ h = PYTHAG(f,g);
+ w(i, i) = h;
+ h = 1.0/h;
+ c = g*h;
+ s = -f*h;
+ for(j=0; j<3; j++)
+ {
+ y = u(j,l-1);
+ z = u(j,i);
+ u(j,l-1) = y*c+z*s;
+ u(j,i) = z*c-y*s;
+ }
+ }
+ }
+ z = w(2,2);
+ if(l == 2)
+ {
+ if(z < 0.0)
+ {
+ w(2, 2) = -z;
+ for (j=0; j<3; j++)
+ {
+ v(j,2) = -v(j,2);
+ }
+ }
+ break;
+ }
+ if(its == max_iterations - 1)
+ {
+ // std::cout << "no convergence in " << max_iterations << " svdcmp iterations" << std::endl;
+ // throw std::runtime_error("no convergence in max svdcmp iterations");
+ }
+ x = w(l,l);
+ y = w(1,1);
+ g = rv1.y;
+ h = rv1.z;
+ f = ((y-z)*(y+z)+(g-h)*(g+h))/(2.f*h*y);
+ g = PYTHAG(f, 1.f);
+ f = ((x-z)*(x+z)+h*((y/(f+SIGN(g,f)))-h))/x;
+ c = s = 1.f;
+ for (j=l; j<2; j++)
+ {
+ i = j+1;
+ g = rv1[i];
+ y = w(i, i);
+ h = s*g;
+ g = c*g;
+ z = PYTHAG(f,h);
+ rv1[j] = z;
+ c = f/z;
+ s = h/z;
+ f = x*c+g*s;
+ g = g*c-x*s;
+ h = y*s;
+ y *= c;
+ for(jj=0;jj<3;jj++)
+ {
+ x = v(jj,j);
+ z = v(jj,i);
+ v(jj,j) = x*c+z*s;
+ v(jj,i) = z*c-x*s;
+ }
+ z = PYTHAG(f,h);
+ w(j,j) = z;
+ if(z>0.0)
+ {
+ z = 1.f/z;
+ c = f*z;
+ s = h*z;
+ }
+ f = c*g+s*y;
+ x = c*y-s*g;
+ for (jj=0;jj<3;jj++)
+ {
+ y = u(jj,j);
+ z = u(jj,i);
+ u(jj,j) = y*c+z*s;
+ u(jj,i) = z*c-y*s;
+ }
+ }
+ rv1[l] = 0.f;
+ rv1.z = f;
+ w(2,2) = x;
+ }
+
+
+ for(its=0; its eps*anorm)
+ {
+ l=0;
+ flag = false;
+ }
+
+ if(flag)
+ {
+ c=0.0;
+ s=1.0;
+ for(i=l; i<2; i++)
+ {
+ f = s*rv1[i];
+ rv1[i] = c*rv1[i];
+ if(abs(f) <= eps*anorm)
+ {
+ break;
+ }
+ g = w(i,i);
+ h = PYTHAG(f,g);
+ w(i, i) = h;
+ h = 1.0/h;
+ c = g*h;
+ s = -f*h;
+ for(j=0; j<3; j++)
+ {
+ y = u(j,l-1);
+ z = u(j,i);
+ u(j,l-1) = y*c+z*s;
+ u(j,i) = z*c-y*s;
+ }
+ }
+ }
+ z = w(1,1);
+ if(l == 1)
+ {
+ if(z < 0.0)
+ {
+ w(1,1) = -z;
+ for (j=0; j<3; j++)
+ {
+ v(j,1) = -v(j,1);
+ }
+ }
+ break;
+ }
+ if(its == max_iterations - 1)
+ {
+ // std::cout << "no convergence in " << max_iterations << " svdcmp iterations" << std::endl;
+ // throw std::runtime_error("no convergence in max svdcmp iterations");
+ }
+
+ x = w(l, l);
+ y = w(0, 0);
+ g = rv1.x;
+ h = rv1.y;
+ f = ((y-z)*(y+z)+(g-h)*(g+h))/(2.f*h*y);
+ g = PYTHAG(f, 1.f);
+ f = ((x-z)*(x+z)+h*((y/(f+SIGN(g,f)))-h))/x;
+ c = s = 1.f;
+
+ if(l == 0)
+ {
+ g = rv1.y;
+ y = w(1,1);
+ h = s*g;
+ g = c*g;
+ z = PYTHAG(f,h);
+ rv1.x = z;
+ c = f/z;
+ s = h/z;
+ f = x*c+g*s;
+ g = g*c-x*s;
+ h = y*s;
+ y *= c;
+
+
+ x = v(0,0);
+ z = v(0,1);
+ v(0,0) = x*c+z*s;
+ v(0,1) = z*c-x*s;
+
+ x = v(1,0);
+ z = v(1,1);
+ v(1,0) = x*c+z*s;
+ v(1,1) = z*c-x*s;
+
+ x = v(2,0);
+ z = v(2,1);
+ v(2,0) = x*c+z*s;
+ v(2,1) = z*c-x*s;
+
+
+ z = PYTHAG(f,h);
+ w(0,0) = z;
+ if(z>0.f)
+ {
+ z = 1.f/z;
+ c = f*z;
+ s = h*z;
+ }
+ f = c*g+s*y;
+ x = c*y-s*g;
+ for(jj=0; jj<3; jj++)
+ {
+ y = u(jj,0);
+ z = u(jj,1);
+ u(jj,0) = y*c+z*s;
+ u(jj,1) = z*c-y*s;
+ }
+ }
+
+ rv1[l] = 0.f;
+ rv1.y = f;
+ w(1,1) = x;
+ }
+
+
+ z = w(0,0);
+ if (z < 0.0)
+ {
+ w(0,0) = -z;
+ v(0,0) = -v(0,0);
+ v(1,0) = -v(1,0);
+ v(2,0) = -v(2,0);
+ }
+}
+
+RMAGINE_HOST_FUNCTION
+void svd(
+ const Matrix3x3& a,
+ Matrix3x3& u,
+ Vector3& w,
+ Matrix3x3& v)
+{
+ // TODO: test
+ constexpr unsigned int m = 3;
+ constexpr unsigned int n = 3;
+ constexpr unsigned int max_iterations = 20;
+
+ // additional memory required
+ bool flag;
+ int its, j, jj, k, nm;
+ float anorm, c, f, g, h, s, scale, x, y, z;
+
+ Vector3 rv1 = Vector3::Zeros();
+
+ g = s = scale = anorm = 0.0;
+ float eps = std::numeric_limits::epsilon();
+ u = a;
+
+ // FIRST PART
+
+ // i = 0;
+ // l = 2;
+ scale = fabs(u(0,0)) + fabs(u(1,0)) + fabs(u(2,0));
+ if(scale > 0.0)
+ {
+ u(0, 0) /= scale;
+ u(1, 0) /= scale;
+ u(2, 0) /= scale;
+
+ s = u(0,0) * u(0,0) + u(1,0) * u(1,0) + u(2,0) * u(2,0);
+ f = u(0,0);
+ g = -SIGN(sqrt(s), f);
+ h = f * g - s;
+
+ u(0, 0) = f - g;
+
+ f = (u(0, 0) * u(0, 1) + u(1, 0) * u(1, 1) + u(2, 0) * u(2, 1)) / h;
+ u(0, 1) += f * u(0, 0);
+ u(1, 1) += f * u(1, 0);
+ u(2, 1) += f * u(2, 0);
+
+ f = (u(0, 0) * u(0, 2) + u(1, 0) * u(1, 2) + u(2, 0) * u(2, 2)) / h;
+ u(0, 2) += f * u(0, 0);
+ u(1, 2) += f * u(1, 0);
+ u(2, 2) += f * u(2, 0);
+
+ u(0, 0) *= scale;
+ u(1, 0) *= scale;
+ u(2, 0) *= scale;
+ }
+
+ w.x = scale * g;
+ g = s = scale = 0.0;
+
+ scale = abs(u(0,0)) + abs(u(0,1)) + abs(u(0,2));
+
+ if(scale > 0.0)
+ {
+ u(0, 1) /= scale;
+ u(0, 2) /= scale;
+ s = u(0,1) * u(0,1) + u(0,2) * u(0,2);
+
+ f = u(0, 1);
+ g = -SIGN(sqrt(s),f);
+ h = f * g-s;
+ u(0, 1) = f - g;
+
+ rv1.y = u(0, 1) / h;
+ rv1.z = u(0, 2) / h;
+
+ s = u(1,1) * u(0,1) + u(1,2) * u(0,2);
+ u(1, 1) += s * rv1.y;
+ u(1, 2) += s * rv1.z;
+
+ s = u(2,1) * u(0,1) + u(2,2) * u(0,2);
+ u(2, 1) += s * rv1.y;
+ u(2, 2) += s * rv1.z;
+
+ u(0, 1) *= scale;
+ u(0, 2) *= scale;
+ }
+
+ anorm = fabs(w.x);
+ // anorm = MAX(anorm, (fabs(w(0, 0)) + fabs(rv1.x))); // rv1.x is always 0 here, anorm too. fabs(X) >= 0
+
+ // i = 1;
+ // l = 3;
+ rv1.y = scale * g;
+ g = 0.0;
+ scale = fabs(u(1, 1)) + fabs(u(2, 1));
+
+ if(scale > 0.0)
+ {
+ u(1,1) /= scale;
+ u(2,1) /= scale;
+
+ s = u(1,1) * u(1,1) + u(2,1) * u(2,1);
+ f = u(1,1);
+ g = -SIGN(sqrt(s),f);
+ h = f * g - s;
+ u(1,1) = f-g;
+
+ f = (u(1,1) * u(1,2) + u(2,1) * u(2,2)) / h;
+ u(1,2) += f * u(1,1);
+ u(2,2) += f * u(2,1);
+
+ u(1,1) *= scale;
+ u(2,1) *= scale;
+ }
+
+ w.y = scale * g;
+ g = s = scale = 0.0;
+
+ scale = abs(u(1,2));
+ if(scale > 0.0)
+ {
+ u(1,2) /= scale;
+ s = u(1,2) * u(1,2);
+
+ f = u(1, 2);
+ g = -SIGN(sqrt(s), f);
+ h = f * g - s;
+ u(1,2) = f - g;
+
+ rv1.z = u(1,2) / h;
+ s = u(2,2) * u(1,2);
+
+ u(2,2) += s * rv1.z;
+ u(1,2) *= scale;
+ }
+
+ anorm = MAX(anorm, (abs(w.y) + abs(rv1.y)));
+
+ rv1.z = scale * g;
+
+ scale = abs(u(2, 2));
+ if(scale > 0.0)
+ {
+ u(2, 2) /= scale;
+ s = u(2, 2) * u(2, 2);
+ f = u(2, 2);
+ g = -SIGN(sqrt(s),f);
+ h = f * g - s;
+
+ u(2, 2) = f - g;
+ u(2, 2) *= scale;
+ }
+
+ w.z = scale * g;
+ g = s = scale = 0.0;
+
+ anorm = MAX(anorm, (abs(w.z)+abs(rv1.z)));
+
+ // SECOND PART
+ v(2, 2) = 1.0;
+ g = rv1.z;
+
+ // i = 1;
+ // l = 2;
+ if(fabs(g) > 0.0)
+ {
+ v(2,1) = (u(1,2) / u(1,2)) / g;
+ s = u(1,2) * v(2,2);
+ v(2,2) += s * v(2,1);
+ }
+ v(1,2) = 0.0;
+ v(2,1) = 0.0;
+ v(1,1) = 1.0;
+
+ g = rv1.y;
+
+ // l = 1;
+ // i = 0;
+ if(fabs(g) > 0.0)
+ {
+ v(1,0) = (u(0,1) / u(0,1)) / g;
+ v(2,0) = (u(0,2) / u(0,1)) / g;
+
+ s = u(0,1) * v(1,1) + u(0,2) * v(2,1);
+ v(1,1) += s * v(1,0);
+ v(2,1) += s * v(2,0);
+
+ s = u(0,1) * v(1,2) + u(0,2) * v(2,2);
+ v(1,2) += s * v(1,0);
+ v(2,2) += s * v(2,0);
+ }
+ v(0,1) = 0.0;
+ v(1,0) = 0.0;
+ v(0,2) = 0.0;
+ v(2,0) = 0.0;
+ v(0,0) = 1.0;
+ g = rv1.x;
+
+
+ // THIRD PART
+
+ // i = 2;
+ // l = 3;
+ g = w.z;
+ if(fabs(g) > 0.0)
+ {
+ u(2,2) /= g;
+ } else {
+ // TODO(amock): shouldnt this be a large number?
+ u(2,2) = 0.0;
+ }
+ u(2,2) += 1.0;
+
+ // i = 1;
+ // l = 2;
+
+ g = w.y;
+ u(1,2) = 0.0;
+
+ if(fabs(g) > 0.0)
+ {
+ g = 1.0/g;
+ s = u(2,1) * u(2,2);
+ f = (s/u(1,1)) * g;
+
+ u(1,2) += f * u(1,1);
+ u(2,2) += f * u(2,1);
+
+ u(1,1) *= g;
+ u(2,1) *= g;
+ } else {
+ u(1,1) = 0.0;
+ u(2,1) = 0.0;
+ }
+ u(1,1) += 1.0;
+
+ // i = 0;
+ // l = 1;
+ g = w.x;
+ u(0,1) = 0.0;
+ u(0,2) = 0.0;
+
+ if(fabs(g) > 0.0)
+ {
+ f = (u(1,0) * u(1,1) + u(2,0) * u(2,1)) / (g * u(0,0));
+ u(0,1) += f * u(0,0);
+ u(1,1) += f * u(1,0);
+ u(2,1) += f * u(2,0);
+
+ f = (u(1,0) * u(1,2) + u(2,0) * u(2,2)) / (g * u(0,0));
+ u(0,2) += f * u(0,0);
+ u(1,2) += f * u(1,0);
+ u(2,2) += f * u(2,0);
+
+ u(0,0) /= g;
+ u(1,0) /= g;
+ u(2,0) /= g;
+ } else {
+ u(0,0) = 0.0;
+ u(1,0) = 0.0;
+ u(2,0) = 0.0;
+ }
+ u(0,0) += 1.0;
+
+ int i, l;
+
+ // PART 4: Opti
+
+ // k = 2;
+ for(its=0; its eps*anorm)
+ // {
+ // l = 1;
+ // if(MIN(fabs(rv1.y),abs(w(0,0))) > eps*anorm)
+ // {
+ // l = 0;
+ // }
+ // }
+
+ flag=true;
+ l=2;
+ if(abs(rv1.z) <= eps*anorm)
+ {
+ flag=false;
+ }
+ else if(abs(w.y) > eps*anorm)
+ {
+ l=1;
+ if(abs(rv1.y) <= eps*anorm)
+ {
+ flag=false;
+ }
+ else if(abs(w.x) > eps*anorm)
+ {
+ l=0;
+ flag = false;
+ }
+ }
+
+ if(flag)
+ {
+ c=0.0;
+ s=1.0;
+ for(i=l; i<3; i++)
+ {
+ f = s*rv1[i];
+ rv1[i] = c*rv1[i];
+ if(abs(f) <= eps*anorm)
+ {
+ break;
+ }
+ g = w(i);
+ h = PYTHAG(f,g);
+ w(i) = h;
+ h = 1.0/h;
+ c = g*h;
+ s = -f*h;
+ for(j=0; j0.0)
+ {
+ z = 1.f/z;
+ c = f*z;
+ s = h*z;
+ }
+ f = c*g+s*y;
+ x = c*y-s*g;
+ for (jj=0;jj eps*anorm)
+ {
+ l=0;
+ flag = false;
+ }
+
+ if(flag)
+ {
+ c=0.0;
+ s=1.0;
+ for(i=l; i<2; i++)
+ {
+ f = s*rv1[i];
+ rv1[i] = c*rv1[i];
+ if(abs(f) <= eps*anorm)
+ {
+ break;
+ }
+ g = w(i);
+ h = PYTHAG(f,g);
+ w(i) = h;
+ h = 1.0/h;
+ c = g*h;
+ s = -f*h;
+ for(j=0; j0.f)
+ {
+ z = 1.f/z;
+ c = f*z;
+ s = h*z;
+ }
+ f = c*g+s*y;
+ x = c*y-s*g;
+ for(jj=0; jj<3; jj++)
+ {
+ y = u(jj,0);
+ z = u(jj,1);
+ u(jj,0) = y*c+z*s;
+ u(jj,1) = z*c-y*s;
+ }
+ }
+
+ rv1[l] = 0.f;
+ rv1.y = f;
+ w.y = x;
+ }
+
+
+ z = w.x;
+ if (z < 0.0)
+ {
+ w.x = -z;
+ v(0,0) = -v(0,0);
+ v(1,0) = -v(1,0);
+ v(2,0) = -v(2,0);
+ }
+}
+
} // namespace rmagine
\ No newline at end of file
diff --git a/src/rmagine_core/src/math/math.cpp b/src/rmagine_core/src/math/math.cpp
index 9323175d..42de8ae9 100644
--- a/src/rmagine_core/src/math/math.cpp
+++ b/src/rmagine_core/src/math/math.cpp
@@ -4,6 +4,8 @@
#include
+#include "rmagine/math/linalg.h"
+
namespace rmagine {
@@ -697,5 +699,39 @@ Memory cov(
return C;
}
+/**
+ * @brief decompose A = UWV* using singular value decomposition
+ */
+void svd(
+ const MemoryView& As,
+ MemoryView& Us,
+ MemoryView& Ws,
+ MemoryView& Vs)
+{
+ #pragma omp parallel for
+ for(size_t i=0; i& As,
+ MemoryView& Us,
+ MemoryView& ws,
+ MemoryView& Vs)
+{
+ #pragma omp parallel for
+ for(size_t i=0; i only works for
# CXX_STANDARD 17
)
diff --git a/src/rmagine_cuda/include/rmagine/math/math.cuh b/src/rmagine_cuda/include/rmagine/math/math.cuh
index 7439487f..645085cb 100644
--- a/src/rmagine_cuda/include/rmagine/math/math.cuh
+++ b/src/rmagine_cuda/include/rmagine/math/math.cuh
@@ -543,6 +543,28 @@ Memory cov(
const MemoryView& v2
);
+/**
+ * @brief decompose A = UWV* using singular value decomposition
+ */
+void svd(
+ const MemoryView& As,
+ MemoryView& Us,
+ MemoryView& Ws,
+ MemoryView& Vs
+);
+
+/**
+ * @brief decompose A = UWV* using singular value decomposition
+ *
+ * w is a vector which is the diagonal of matrix W
+ */
+void svd(
+ const MemoryView& As,
+ MemoryView& Us,
+ MemoryView& ws,
+ MemoryView& Vs
+);
+
} // namespace rmagine
diff --git a/src/rmagine_cuda/include/rmagine/util/cuda/CudaDebug.hpp b/src/rmagine_cuda/include/rmagine/util/cuda/CudaDebug.hpp
index 2bd29b81..faa48553 100644
--- a/src/rmagine_cuda/include/rmagine/util/cuda/CudaDebug.hpp
+++ b/src/rmagine_cuda/include/rmagine/util/cuda/CudaDebug.hpp
@@ -52,4 +52,16 @@ void cudaAssert(
const char* func,
int line);
+#ifdef NDEBUG
+ #define RM_CUDA_DEBUG()
+#else // NDEBUG
+ #define RM_CUDA_DEBUG() \
+ cudaError_t err = cudaGetLastError(); \
+ if (err != cudaSuccess) \
+ { \
+ printf("Error: %s\n", cudaGetErrorString(err)); \
+ throw std::runtime_error(cudaGetErrorString(err)); \
+ }
+#endif // defined NDEBUG
+
#endif // RMAGINE_UTIL_CUDA_DEBUG_HPP
\ No newline at end of file
diff --git a/src/rmagine_cuda/src/math/linalg.cu b/src/rmagine_cuda/src/math/linalg.cu
new file mode 100644
index 00000000..17c846da
--- /dev/null
+++ b/src/rmagine_cuda/src/math/linalg.cu
@@ -0,0 +1,1085 @@
+#include
+#include
+#include
+
+namespace rmagine
+{
+
+RMAGINE_DEVICE_FUNCTION
+void svd(
+ const Matrix3x3& a,
+ Matrix3x3& u,
+ Matrix3x3& w,
+ Matrix3x3& v)
+{
+ // printf("SVDD\n");
+
+
+ // TODO: test
+ const unsigned int max_iterations = 20;
+
+ // additional memory required
+ bool flag;
+ int its, j, jj;
+ float anorm, c, f, g, h, s, scale, x, y, z;
+
+ Vector3 rv1;
+ rv1.x = 0.0;
+ rv1.y = 0.0;
+ rv1.z = 0.0;
+
+
+
+ g = s = scale = anorm = 0.0;
+ const float eps = __FLT_EPSILON__;
+ u = a;
+
+ // FIRST PART
+
+ // i = 0;
+ // l = 2;
+ scale = fabs(u(0,0)) + fabs(u(1,0)) + fabs(u(2,0));
+ if(scale > 0.0)
+ {
+ u(0, 0) /= scale;
+ u(1, 0) /= scale;
+ u(2, 0) /= scale;
+
+ s = u(0,0) * u(0,0) + u(1,0) * u(1,0) + u(2,0) * u(2,0);
+ f = u(0,0);
+ g = -SIGN(sqrt(s), f);
+ h = f * g - s;
+
+ u(0, 0) = f - g;
+
+ f = (u(0, 0) * u(0, 1) + u(1, 0) * u(1, 1) + u(2, 0) * u(2, 1)) / h;
+ u(0, 1) += f * u(0, 0);
+ u(1, 1) += f * u(1, 0);
+ u(2, 1) += f * u(2, 0);
+
+ f = (u(0, 0) * u(0, 2) + u(1, 0) * u(1, 2) + u(2, 0) * u(2, 2)) / h;
+ u(0, 2) += f * u(0, 0);
+ u(1, 2) += f * u(1, 0);
+ u(2, 2) += f * u(2, 0);
+
+ u(0, 0) *= scale;
+ u(1, 0) *= scale;
+ u(2, 0) *= scale;
+ }
+
+ w(0, 0) = scale * g;
+ g = s = scale = 0.0;
+
+ scale = abs(u(0,0)) + abs(u(0,1)) + abs(u(0,2));
+
+ if(scale > 0.0)
+ {
+ u(0, 1) /= scale;
+ u(0, 2) /= scale;
+ s = u(0,1) * u(0,1) + u(0,2) * u(0,2);
+
+ f = u(0, 1);
+ g = -SIGN(sqrt(s),f);
+ h = f * g-s;
+ u(0, 1) = f - g;
+
+ rv1.y = u(0, 1) / h;
+ rv1.z = u(0, 2) / h;
+
+ s = u(1,1) * u(0,1) + u(1,2) * u(0,2);
+ u(1, 1) += s * rv1.y;
+ u(1, 2) += s * rv1.z;
+
+ s = u(2,1) * u(0,1) + u(2,2) * u(0,2);
+ u(2, 1) += s * rv1.y;
+ u(2, 2) += s * rv1.z;
+
+ u(0, 1) *= scale;
+ u(0, 2) *= scale;
+ }
+
+ anorm = fabs(w(0, 0));
+ // anorm = MAX(anorm, (fabs(w(0, 0)) + fabs(rv1.x))); // rv1.x is always 0 here, anorm too. fabs(X) >= 0
+
+ // i = 1;
+ // l = 3;
+ rv1.y = scale * g;
+ g = 0.0;
+ scale = fabs(u(1, 1)) + fabs(u(2, 1));
+
+ if(scale > 0.0)
+ {
+ u(1,1) /= scale;
+ u(2,1) /= scale;
+
+ s = u(1,1) * u(1,1) + u(2,1) * u(2,1);
+ f = u(1,1);
+ g = -SIGN(sqrt(s),f);
+ h = f * g - s;
+ u(1,1) = f-g;
+
+ f = (u(1,1) * u(1,2) + u(2,1) * u(2,2)) / h;
+ u(1,2) += f * u(1,1);
+ u(2,2) += f * u(2,1);
+
+ u(1,1) *= scale;
+ u(2,1) *= scale;
+ }
+
+ w(1, 1) = scale * g;
+ g = s = scale = 0.0;
+
+ scale = abs(u(1,2));
+ if(scale > 0.0)
+ {
+ u(1,2) /= scale;
+ s = u(1,2) * u(1,2);
+
+ f = u(1, 2);
+ g = -SIGN(sqrt(s), f);
+ h = f * g - s;
+ u(1,2) = f - g;
+
+ rv1.z = u(1,2) / h;
+ s = u(2,2) * u(1,2);
+
+ u(2,2) += s * rv1.z;
+ u(1,2) *= scale;
+ }
+
+ anorm = MAX(anorm, (abs(w(1, 1)) + abs(rv1.y)));
+
+
+
+ rv1.z = scale * g;
+
+ scale = abs(u(2, 2));
+ if(scale > 0.0)
+ {
+ u(2, 2) /= scale;
+ s = u(2, 2) * u(2, 2);
+ f = u(2, 2);
+ g = -SIGN(sqrt(s),f);
+ h = f * g - s;
+
+ u(2, 2) = f - g;
+ u(2, 2) *= scale;
+ }
+
+ w(2, 2) = scale * g;
+ g = s = scale = 0.0;
+
+ anorm = MAX(anorm, (abs(w(2, 2))+abs(rv1.z)));
+
+ // SECOND PART
+ v(2, 2) = 1.0;
+ g = rv1.z;
+
+ // i = 1;
+ // l = 2;
+ if(fabs(g) > 0.0)
+ {
+ v(2,1) = (u(1,2) / u(1,2)) / g;
+ s = u(1,2) * v(2,2);
+ v(2,2) += s * v(2,1);
+ }
+ v(1,2) = 0.0;
+ v(2,1) = 0.0;
+ v(1,1) = 1.0;
+
+ g = rv1.y;
+
+ // l = 1;
+ // i = 0;
+ if(fabs(g) > 0.0)
+ {
+ v(1,0) = (u(0,1) / u(0,1)) / g;
+ v(2,0) = (u(0,2) / u(0,1)) / g;
+
+ s = u(0,1) * v(1,1) + u(0,2) * v(2,1);
+ v(1,1) += s * v(1,0);
+ v(2,1) += s * v(2,0);
+
+ s = u(0,1) * v(1,2) + u(0,2) * v(2,2);
+ v(1,2) += s * v(1,0);
+ v(2,2) += s * v(2,0);
+ }
+ v(0,1) = 0.0;
+ v(1,0) = 0.0;
+ v(0,2) = 0.0;
+ v(2,0) = 0.0;
+ v(0,0) = 1.0;
+ g = rv1.x;
+
+
+ // THIRD PART
+
+ // i = 2;
+ // l = 3;
+ g = w(2, 2);
+ if(fabs(g) > 0.0)
+ {
+ u(2,2) /= g;
+ } else {
+ // TODO(amock): shouldnt this be a large number?
+ u(2,2) = 0.0;
+ }
+ u(2,2) += 1.0;
+
+ // i = 1;
+ // l = 2;
+
+ g = w(1, 1);
+ u(1,2) = 0.0;
+
+ if(fabs(g) > 0.0)
+ {
+ g = 1.0/g;
+ s = u(2,1) * u(2,2);
+ f = (s/u(1,1)) * g;
+
+ u(1,2) += f * u(1,1);
+ u(2,2) += f * u(2,1);
+
+ u(1,1) *= g;
+ u(2,1) *= g;
+ } else {
+ u(1,1) = 0.0;
+ u(2,1) = 0.0;
+ }
+ u(1,1) += 1.0;
+
+ // i = 0;
+ // l = 1;
+ g = w(0, 0);
+ u(0,1) = 0.0;
+ u(0,2) = 0.0;
+
+ if(fabs(g) > 0.0)
+ {
+ f = (u(1,0) * u(1,1) + u(2,0) * u(2,1)) / (g * u(0,0));
+ u(0,1) += f * u(0,0);
+ u(1,1) += f * u(1,0);
+ u(2,1) += f * u(2,0);
+
+ f = (u(1,0) * u(1,2) + u(2,0) * u(2,2)) / (g * u(0,0));
+ u(0,2) += f * u(0,0);
+ u(1,2) += f * u(1,0);
+ u(2,2) += f * u(2,0);
+
+ u(0,0) /= g;
+ u(1,0) /= g;
+ u(2,0) /= g;
+ } else {
+ u(0,0) = 0.0;
+ u(1,0) = 0.0;
+ u(2,0) = 0.0;
+ }
+ u(0,0) += 1.0;
+
+ int i, l;
+
+
+
+ // PART 4: Opti
+
+ // k = 2;
+ for(int its=0; its eps*anorm)
+ {
+ l=1;
+ if(fabs(rv1.y) <= eps*anorm)
+ {
+ flag=false;
+ }
+ else if(fabs(w(0,0)) > eps*anorm)
+ {
+ l=0;
+ flag = false;
+ }
+ }
+
+ if(flag)
+ {
+ c=0.0;
+ s=1.0;
+ for(i=l; i<3; i++)
+ {
+ f = s*rv1[i];
+ rv1[i] = c*rv1[i];
+ if(fabs(f) <= eps*anorm)
+ {
+ break;
+ }
+ g = w(i, i);
+ h = PYTHAG(f,g);
+ w(i, i) = h;
+ h = 1.0/h;
+ c = g*h;
+ s = -f*h;
+ for(j=0; j<3; j++)
+ {
+ y = u(j,l-1);
+ z = u(j,i);
+ u(j,l-1) = y*c+z*s;
+ u(j,i) = z*c-y*s;
+ }
+ }
+ }
+ z = w(2,2);
+ if(l == 2)
+ {
+ if(z < 0.0)
+ {
+ w(2, 2) = -z;
+ for (j=0; j<3; j++)
+ {
+ v(j,2) = -v(j,2);
+ }
+ }
+ break;
+ }
+ if(its == max_iterations - 1)
+ {
+ // std::cout << "no convergence in " << max_iterations << " svdcmp iterations" << std::endl;
+ // throw std::runtime_error("no convergence in max svdcmp iterations");
+ }
+ x = w(l,l);
+ y = w(1,1);
+ g = rv1.y;
+ h = rv1.z;
+ f = ((y-z)*(y+z)+(g-h)*(g+h))/(2.f*h*y);
+ g = PYTHAG(f, 1.f);
+ f = ((x-z)*(x+z)+h*((y/(f+SIGN(g,f)))-h))/x;
+ c = s = 1.f;
+ for (j=l; j<2; j++)
+ {
+ i = j+1;
+ g = rv1[i];
+ y = w(i, i);
+ h = s*g;
+ g = c*g;
+ z = PYTHAG(f,h);
+ rv1[j] = z;
+ c = f/z;
+ s = h/z;
+ f = x*c+g*s;
+ g = g*c-x*s;
+ h = y*s;
+ y *= c;
+ for(jj=0;jj<3;jj++)
+ {
+ x = v(jj,j);
+ z = v(jj,i);
+ v(jj,j) = x*c+z*s;
+ v(jj,i) = z*c-x*s;
+ }
+ z = PYTHAG(f,h);
+ w(j,j) = z;
+ if(z>0.0)
+ {
+ z = 1.f/z;
+ c = f*z;
+ s = h*z;
+ }
+ f = c*g+s*y;
+ x = c*y-s*g;
+ for (jj=0;jj<3;jj++)
+ {
+ y = u(jj,j);
+ z = u(jj,i);
+ u(jj,j) = y*c+z*s;
+ u(jj,i) = z*c-y*s;
+ }
+ }
+ rv1[l] = 0.f;
+ rv1.z = f;
+ w(2,2) = x;
+ }
+
+
+
+ for(its=0; its eps*anorm)
+ {
+ l=0;
+ flag = false;
+ }
+
+ if(flag)
+ {
+ c=0.0;
+ s=1.0;
+ for(i=l; i<2; i++)
+ {
+ f = s*rv1[i];
+ rv1[i] = c*rv1[i];
+ if(abs(f) <= eps*anorm)
+ {
+ break;
+ }
+ g = w(i,i);
+ h = PYTHAG(f,g);
+ w(i, i) = h;
+ h = 1.0/h;
+ c = g*h;
+ s = -f*h;
+ for(j=0; j<3; j++)
+ {
+ y = u(j,l-1);
+ z = u(j,i);
+ u(j,l-1) = y*c+z*s;
+ u(j,i) = z*c-y*s;
+ }
+ }
+ }
+ z = w(1,1);
+ if(l == 1)
+ {
+ if(z < 0.0)
+ {
+ w(1,1) = -z;
+ for (j=0; j<3; j++)
+ {
+ v(j,1) = -v(j,1);
+ }
+ }
+ break;
+ }
+ if(its == max_iterations - 1)
+ {
+ // std::cout << "no convergence in " << max_iterations << " svdcmp iterations" << std::endl;
+ // throw std::runtime_error("no convergence in max svdcmp iterations");
+ }
+
+ x = w(l, l);
+ y = w(0, 0);
+ g = rv1.x;
+ h = rv1.y;
+ f = ((y-z)*(y+z)+(g-h)*(g+h))/(2.f*h*y);
+ g = PYTHAG(f, 1.f);
+ f = ((x-z)*(x+z)+h*((y/(f+SIGN(g,f)))-h))/x;
+ c = s = 1.f;
+
+ if(l == 0)
+ {
+ g = rv1.y;
+ y = w(1,1);
+ h = s*g;
+ g = c*g;
+ z = PYTHAG(f,h);
+ rv1.x = z;
+ c = f/z;
+ s = h/z;
+ f = x*c+g*s;
+ g = g*c-x*s;
+ h = y*s;
+ y *= c;
+
+
+ x = v(0,0);
+ z = v(0,1);
+ v(0,0) = x*c+z*s;
+ v(0,1) = z*c-x*s;
+
+ x = v(1,0);
+ z = v(1,1);
+ v(1,0) = x*c+z*s;
+ v(1,1) = z*c-x*s;
+
+ x = v(2,0);
+ z = v(2,1);
+ v(2,0) = x*c+z*s;
+ v(2,1) = z*c-x*s;
+
+
+ z = PYTHAG(f,h);
+ w(0,0) = z;
+ if(z>0.f)
+ {
+ z = 1.f/z;
+ c = f*z;
+ s = h*z;
+ }
+ f = c*g+s*y;
+ x = c*y-s*g;
+ for(jj=0; jj<3; jj++)
+ {
+ y = u(jj,0);
+ z = u(jj,1);
+ u(jj,0) = y*c+z*s;
+ u(jj,1) = z*c-y*s;
+ }
+ }
+
+ rv1[l] = 0.f;
+ rv1.y = f;
+ w(1,1) = x;
+ }
+
+
+ z = w(0,0);
+ if (z < 0.0)
+ {
+ w(0,0) = -z;
+ v(0,0) = -v(0,0);
+ v(1,0) = -v(1,0);
+ v(2,0) = -v(2,0);
+ }
+}
+
+
+RMAGINE_DEVICE_FUNCTION
+void svd(
+ const Matrix3x3& a,
+ Matrix3x3& u,
+ Vector3& w,
+ Matrix3x3& v)
+{
+ // TODO: test
+ // constexpr unsigned int m = 3;
+ // constexpr unsigned int n = 3;
+ constexpr unsigned int max_iterations = 20;
+
+ // additional memory required
+ bool flag;
+ int its, j, jj;
+ float anorm, c, f, g, h, s, scale, x, y, z;
+
+ Vector3 rv1 = Vector3::Zeros();
+
+ g = s = scale = anorm = 0.0;
+ float eps = __FLT_EPSILON__;
+ u = a;
+
+ // FIRST PART
+
+ // i = 0;
+ // l = 2;
+ scale = fabs(u(0,0)) + fabs(u(1,0)) + fabs(u(2,0));
+ if(scale > 0.0)
+ {
+ u(0, 0) /= scale;
+ u(1, 0) /= scale;
+ u(2, 0) /= scale;
+
+ s = u(0,0) * u(0,0) + u(1,0) * u(1,0) + u(2,0) * u(2,0);
+ f = u(0,0);
+ g = -SIGN(sqrt(s), f);
+ h = f * g - s;
+
+ u(0, 0) = f - g;
+
+ f = (u(0, 0) * u(0, 1) + u(1, 0) * u(1, 1) + u(2, 0) * u(2, 1)) / h;
+ u(0, 1) += f * u(0, 0);
+ u(1, 1) += f * u(1, 0);
+ u(2, 1) += f * u(2, 0);
+
+ f = (u(0, 0) * u(0, 2) + u(1, 0) * u(1, 2) + u(2, 0) * u(2, 2)) / h;
+ u(0, 2) += f * u(0, 0);
+ u(1, 2) += f * u(1, 0);
+ u(2, 2) += f * u(2, 0);
+
+ u(0, 0) *= scale;
+ u(1, 0) *= scale;
+ u(2, 0) *= scale;
+ }
+
+ w.x = scale * g;
+ g = s = scale = 0.0;
+
+ scale = abs(u(0,0)) + abs(u(0,1)) + abs(u(0,2));
+
+ if(scale > 0.0)
+ {
+ u(0, 1) /= scale;
+ u(0, 2) /= scale;
+ s = u(0,1) * u(0,1) + u(0,2) * u(0,2);
+
+ f = u(0, 1);
+ g = -SIGN(sqrt(s),f);
+ h = f * g-s;
+ u(0, 1) = f - g;
+
+ rv1.y = u(0, 1) / h;
+ rv1.z = u(0, 2) / h;
+
+ s = u(1,1) * u(0,1) + u(1,2) * u(0,2);
+ u(1, 1) += s * rv1.y;
+ u(1, 2) += s * rv1.z;
+
+ s = u(2,1) * u(0,1) + u(2,2) * u(0,2);
+ u(2, 1) += s * rv1.y;
+ u(2, 2) += s * rv1.z;
+
+ u(0, 1) *= scale;
+ u(0, 2) *= scale;
+ }
+
+ anorm = fabs(w.x);
+ // anorm = MAX(anorm, (fabs(w(0, 0)) + fabs(rv1.x))); // rv1.x is always 0 here, anorm too. fabs(X) >= 0
+
+ // i = 1;
+ // l = 3;
+ rv1.y = scale * g;
+ g = 0.0;
+ scale = fabs(u(1, 1)) + fabs(u(2, 1));
+
+ if(scale > 0.0)
+ {
+ u(1,1) /= scale;
+ u(2,1) /= scale;
+
+ s = u(1,1) * u(1,1) + u(2,1) * u(2,1);
+ f = u(1,1);
+ g = -SIGN(sqrt(s),f);
+ h = f * g - s;
+ u(1,1) = f-g;
+
+ f = (u(1,1) * u(1,2) + u(2,1) * u(2,2)) / h;
+ u(1,2) += f * u(1,1);
+ u(2,2) += f * u(2,1);
+
+ u(1,1) *= scale;
+ u(2,1) *= scale;
+ }
+
+ w.y = scale * g;
+ g = s = scale = 0.0;
+
+ scale = abs(u(1,2));
+ if(scale > 0.0)
+ {
+ u(1,2) /= scale;
+ s = u(1,2) * u(1,2);
+
+ f = u(1, 2);
+ g = -SIGN(sqrt(s), f);
+ h = f * g - s;
+ u(1,2) = f - g;
+
+ rv1.z = u(1,2) / h;
+ s = u(2,2) * u(1,2);
+
+ u(2,2) += s * rv1.z;
+ u(1,2) *= scale;
+ }
+
+ anorm = MAX(anorm, (abs(w.y) + abs(rv1.y)));
+
+ rv1.z = scale * g;
+
+ scale = abs(u(2, 2));
+ if(scale > 0.0)
+ {
+ u(2, 2) /= scale;
+ s = u(2, 2) * u(2, 2);
+ f = u(2, 2);
+ g = -SIGN(sqrt(s),f);
+ h = f * g - s;
+
+ u(2, 2) = f - g;
+ u(2, 2) *= scale;
+ }
+
+ w.z = scale * g;
+ g = s = scale = 0.0;
+
+ anorm = MAX(anorm, (abs(w.z)+abs(rv1.z)));
+
+ // SECOND PART
+ v(2, 2) = 1.0;
+ g = rv1.z;
+
+ // i = 1;
+ // l = 2;
+ if(fabs(g) > 0.0)
+ {
+ v(2,1) = (u(1,2) / u(1,2)) / g;
+ s = u(1,2) * v(2,2);
+ v(2,2) += s * v(2,1);
+ }
+ v(1,2) = 0.0;
+ v(2,1) = 0.0;
+ v(1,1) = 1.0;
+
+ g = rv1.y;
+
+ // l = 1;
+ // i = 0;
+ if(fabs(g) > 0.0)
+ {
+ v(1,0) = (u(0,1) / u(0,1)) / g;
+ v(2,0) = (u(0,2) / u(0,1)) / g;
+
+ s = u(0,1) * v(1,1) + u(0,2) * v(2,1);
+ v(1,1) += s * v(1,0);
+ v(2,1) += s * v(2,0);
+
+ s = u(0,1) * v(1,2) + u(0,2) * v(2,2);
+ v(1,2) += s * v(1,0);
+ v(2,2) += s * v(2,0);
+ }
+ v(0,1) = 0.0;
+ v(1,0) = 0.0;
+ v(0,2) = 0.0;
+ v(2,0) = 0.0;
+ v(0,0) = 1.0;
+ g = rv1.x;
+
+
+ // THIRD PART
+
+ // i = 2;
+ // l = 3;
+ g = w.z;
+ if(fabs(g) > 0.0)
+ {
+ u(2,2) /= g;
+ } else {
+ // TODO(amock): shouldnt this be a large number?
+ u(2,2) = 0.0;
+ }
+ u(2,2) += 1.0;
+
+ // i = 1;
+ // l = 2;
+
+ g = w.y;
+ u(1,2) = 0.0;
+
+ if(fabs(g) > 0.0)
+ {
+ g = 1.0/g;
+ s = u(2,1) * u(2,2);
+ f = (s/u(1,1)) * g;
+
+ u(1,2) += f * u(1,1);
+ u(2,2) += f * u(2,1);
+
+ u(1,1) *= g;
+ u(2,1) *= g;
+ } else {
+ u(1,1) = 0.0;
+ u(2,1) = 0.0;
+ }
+ u(1,1) += 1.0;
+
+ // i = 0;
+ // l = 1;
+ g = w.x;
+ u(0,1) = 0.0;
+ u(0,2) = 0.0;
+
+ if(fabs(g) > 0.0)
+ {
+ f = (u(1,0) * u(1,1) + u(2,0) * u(2,1)) / (g * u(0,0));
+ u(0,1) += f * u(0,0);
+ u(1,1) += f * u(1,0);
+ u(2,1) += f * u(2,0);
+
+ f = (u(1,0) * u(1,2) + u(2,0) * u(2,2)) / (g * u(0,0));
+ u(0,2) += f * u(0,0);
+ u(1,2) += f * u(1,0);
+ u(2,2) += f * u(2,0);
+
+ u(0,0) /= g;
+ u(1,0) /= g;
+ u(2,0) /= g;
+ } else {
+ u(0,0) = 0.0;
+ u(1,0) = 0.0;
+ u(2,0) = 0.0;
+ }
+ u(0,0) += 1.0;
+
+ int i, l;
+
+ // PART 4: Opti
+
+ // k = 2;
+ for(its=0; its eps*anorm)
+ // {
+ // l = 1;
+ // if(MIN(fabs(rv1.y),abs(w(0,0))) > eps*anorm)
+ // {
+ // l = 0;
+ // }
+ // }
+
+ flag=true;
+ l=2;
+ if(abs(rv1.z) <= eps*anorm)
+ {
+ flag=false;
+ }
+ else if(abs(w.y) > eps*anorm)
+ {
+ l=1;
+ if(abs(rv1.y) <= eps*anorm)
+ {
+ flag=false;
+ }
+ else if(abs(w.x) > eps*anorm)
+ {
+ l=0;
+ flag = false;
+ }
+ }
+
+ if(flag)
+ {
+ c=0.0;
+ s=1.0;
+ for(i=l; i<3; i++)
+ {
+ f = s*rv1[i];
+ rv1[i] = c*rv1[i];
+ if(abs(f) <= eps*anorm)
+ {
+ break;
+ }
+ g = w(i);
+ h = PYTHAG(f,g);
+ w(i) = h;
+ h = 1.0/h;
+ c = g*h;
+ s = -f*h;
+ for(j=0; j<3; j++)
+ {
+ y = u(j,l-1);
+ z = u(j,i);
+ u(j,l-1) = y*c+z*s;
+ u(j,i) = z*c-y*s;
+ }
+ }
+ }
+ z = w.z;
+ if(l == 2)
+ {
+ if(z < 0.0)
+ {
+ w.z = -z;
+ v(0,2) = -v(0,2);
+ v(1,2) = -v(1,2);
+ v(2,2) = -v(2,2);
+ }
+ break;
+ }
+ if(its == max_iterations - 1)
+ {
+ // std::cout << "no convergence in " << max_iterations << " svdcmp iterations" << std::endl;
+ // throw std::runtime_error("no convergence in max svdcmp iterations");
+ }
+ x = w(l);
+ y = w.y;
+ g = rv1.y;
+ h = rv1.z;
+ f = ((y-z)*(y+z)+(g-h)*(g+h))/(2.f*h*y);
+ g = PYTHAG(f, 1.f);
+ f = ((x-z)*(x+z)+h*((y/(f+SIGN(g,f)))-h))/x;
+ c = s = 1.f;
+ for (j=l; j<2; j++)
+ {
+ i = j+1;
+ g = rv1[i];
+ y = w(i);
+ h = s*g;
+ g = c*g;
+ z = PYTHAG(f,h);
+ rv1[j] = z;
+ c = f/z;
+ s = h/z;
+ f = x*c+g*s;
+ g = g*c-x*s;
+ h = y*s;
+ y *= c;
+ for(jj=0;jj<3;jj++)
+ {
+ x = v(jj,j);
+ z = v(jj,i);
+ v(jj,j) = x*c+z*s;
+ v(jj,i) = z*c-x*s;
+ }
+ z = PYTHAG(f,h);
+ w(j) = z;
+ if(z>0.0)
+ {
+ z = 1.f/z;
+ c = f*z;
+ s = h*z;
+ }
+ f = c*g+s*y;
+ x = c*y-s*g;
+ for (jj=0;jj<3;jj++)
+ {
+ y = u(jj,j);
+ z = u(jj,i);
+ u(jj,j) = y*c+z*s;
+ u(jj,i) = z*c-y*s;
+ }
+ }
+ rv1[l] = 0.f;
+ rv1.z = f;
+ w.z = x;
+ }
+
+
+ for(its=0; its eps*anorm)
+ {
+ l=0;
+ flag = false;
+ }
+
+ if(flag)
+ {
+ c=0.0;
+ s=1.0;
+ for(i=l; i<2; i++)
+ {
+ f = s*rv1[i];
+ rv1[i] = c*rv1[i];
+ if(abs(f) <= eps*anorm)
+ {
+ break;
+ }
+ g = w(i);
+ h = PYTHAG(f,g);
+ w(i) = h;
+ h = 1.0/h;
+ c = g*h;
+ s = -f*h;
+ for(j=0; j<3; j++)
+ {
+ y = u(j,l-1);
+ z = u(j,i);
+ u(j,l-1) = y*c+z*s;
+ u(j,i) = z*c-y*s;
+ }
+ }
+ }
+ z = w.y;
+ if(l == 1)
+ {
+ if(z < 0.0)
+ {
+ w.y = -z;
+ for (j=0; j<3; j++)
+ {
+ v(j,1) = -v(j,1);
+ }
+ }
+ break;
+ }
+ if(its == max_iterations - 1)
+ {
+ // std::cout << "no convergence in " << max_iterations << " svdcmp iterations" << std::endl;
+ // throw std::runtime_error("no convergence in max svdcmp iterations");
+ }
+
+ x = w(l);
+ y = w.x;
+ g = rv1.x;
+ h = rv1.y;
+ f = ((y-z)*(y+z)+(g-h)*(g+h))/(2.f*h*y);
+ g = PYTHAG(f, 1.f);
+ f = ((x-z)*(x+z)+h*((y/(f+SIGN(g,f)))-h))/x;
+ c = s = 1.f;
+
+ if(l == 0)
+ {
+ g = rv1.y;
+ y = w.y;
+ h = s*g;
+ g = c*g;
+ z = PYTHAG(f,h);
+ rv1.x = z;
+ c = f/z;
+ s = h/z;
+ f = x*c+g*s;
+ g = g*c-x*s;
+ h = y*s;
+ y *= c;
+
+
+ x = v(0,0);
+ z = v(0,1);
+ v(0,0) = x*c+z*s;
+ v(0,1) = z*c-x*s;
+
+ x = v(1,0);
+ z = v(1,1);
+ v(1,0) = x*c+z*s;
+ v(1,1) = z*c-x*s;
+
+ x = v(2,0);
+ z = v(2,1);
+ v(2,0) = x*c+z*s;
+ v(2,1) = z*c-x*s;
+
+
+ z = PYTHAG(f,h);
+ w.x = z;
+ if(z>0.f)
+ {
+ z = 1.f/z;
+ c = f*z;
+ s = h*z;
+ }
+ f = c*g+s*y;
+ x = c*y-s*g;
+ for(jj=0; jj<3; jj++)
+ {
+ y = u(jj,0);
+ z = u(jj,1);
+ u(jj,0) = y*c+z*s;
+ u(jj,1) = z*c-y*s;
+ }
+ }
+
+ rv1[l] = 0.f;
+ rv1.y = f;
+ w.y = x;
+ }
+
+
+ z = w.x;
+ if (z < 0.0)
+ {
+ w.x = -z;
+ v(0,0) = -v(0,0);
+ v(1,0) = -v(1,0);
+ v(2,0) = -v(2,0);
+ }
+}
+
+} // namespace rmagine
diff --git a/src/rmagine_cuda/src/math/math.cu b/src/rmagine_cuda/src/math/math.cu
index e06d3baa..ec02282f 100644
--- a/src/rmagine_cuda/src/math/math.cu
+++ b/src/rmagine_cuda/src/math/math.cu
@@ -1,10 +1,13 @@
#include "rmagine/math/math.cuh"
#include "rmagine/math/math.h"
#include "rmagine/math/types.h"
+#include "rmagine/math/linalg.h"
+#include "rmagine/util/cuda/CudaDebug.hpp"
namespace rmagine
{
+
////////
// Generic Kernel
///
@@ -508,6 +511,7 @@ void multNxN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize;
multNxN_kernel<<>>(A.raw(), B.raw(), C.raw(), A.size());
+ RM_CUDA_DEBUG();
}
Memory multNxN(
@@ -528,6 +532,7 @@ void multNxN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize;
multNxN_kernel<<>>(A.raw(), b.raw(), c.raw(), A.size());
+ RM_CUDA_DEBUG();
}
Memory multNxN(
@@ -548,6 +553,7 @@ void multNxN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (T1.size() + blockSize - 1) / blockSize;
multNxN_kernel<<>>(T1.raw(), T2.raw(), Tr.raw(), T1.size());
+ RM_CUDA_DEBUG();
}
Memory multNxN(
@@ -567,6 +573,7 @@ void multNxN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (T.size() + blockSize - 1) / blockSize;
multNxN_kernel<<>>(T.raw(), x.raw(), c.raw(), T.size());
+ RM_CUDA_DEBUG();
}
Memory multNxN(
@@ -586,6 +593,7 @@ void multNxN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (M1.size() + blockSize - 1) / blockSize;
multNxN_kernel<<>>(M1.raw(), M2.raw(), Mr.raw(), M1.size());
+ RM_CUDA_DEBUG();
}
Memory multNxN(
@@ -605,6 +613,7 @@ void multNxN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (M1.size() + blockSize - 1) / blockSize;
multNxN_conv_kernel<<>>(M1.raw(), M2.raw(), Qres.raw(), M1.size());
+ RM_CUDA_DEBUG();
}
void multNxN(
@@ -615,6 +624,7 @@ void multNxN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (M.size() + blockSize - 1) / blockSize;
multNxN_kernel<<>>(M.raw(), x.raw(), c.raw(), M.size());
+ RM_CUDA_DEBUG();
}
Memory multNxN(
@@ -637,6 +647,7 @@ void multNx1(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize;
multNxN_kernel<<>>(A.raw(), b.raw(), C.raw(), A.size());
+ RM_CUDA_DEBUG();
}
Memory multNx1(
@@ -657,6 +668,7 @@ void multNx1(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize;
multNx1_kernel<<>>(A.raw(), b.raw(), C.raw(), A.size());
+ RM_CUDA_DEBUG();
}
Memory multNx1(
@@ -676,6 +688,7 @@ void multNx1(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (T1.size() + blockSize - 1) / blockSize;
multNx1_kernel<<>>(T1.raw(), t2.raw(), Tr.raw(), T1.size());
+ RM_CUDA_DEBUG();
}
Memory multNx1(
@@ -695,6 +708,7 @@ void multNx1(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (T.size() + blockSize - 1) / blockSize;
multNx1_kernel<<>>(T.raw(), x.raw(), c.raw(), T.size());
+ RM_CUDA_DEBUG();
}
Memory multNx1(
@@ -714,6 +728,7 @@ void multNx1(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (M1.size() + blockSize - 1) / blockSize;
multNx1_kernel<<>>(M1.raw(), m2.raw(), Mr.raw(), M1.size());
+ RM_CUDA_DEBUG();
}
Memory multNx1(
@@ -733,6 +748,7 @@ void multNx1(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (M.size() + blockSize - 1) / blockSize;
multNx1_kernel<<>>(M.raw(), x.raw(), C.raw(), M.size());
+ RM_CUDA_DEBUG();
}
Memory multNx1(
@@ -752,6 +768,7 @@ void multNx1(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (M.size() + blockSize - 1) / blockSize;
multNx1_kernel<<>>(M.raw(), x.raw(), C.raw(), M.size());
+ RM_CUDA_DEBUG();
}
Memory multNx1(
@@ -774,6 +791,7 @@ void mult1xN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (B.size() + blockSize - 1) / blockSize;
mult1xN_kernel<<>>(a.raw(), B.raw(), C.raw(), B.size());
+ RM_CUDA_DEBUG();
}
Memory mult1xN(
@@ -793,6 +811,7 @@ void mult1xN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (B.size() + blockSize - 1) / blockSize;
mult1xN_kernel<<>>(a.raw(), B.raw(), C.raw(), B.size());
+ RM_CUDA_DEBUG();
}
Memory mult1xN(
@@ -812,6 +831,7 @@ void mult1xN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (T2.size() + blockSize - 1) / blockSize;
mult1xN_kernel<<>>(t1.raw(), T2.raw(), Tr.raw(), T2.size());
+ RM_CUDA_DEBUG();
}
Memory mult1xN(
@@ -831,6 +851,7 @@ void mult1xN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (X.size() + blockSize - 1) / blockSize;
mult1xN_kernel<<>>(t.raw(), X.raw(), C.raw(), X.size());
+ RM_CUDA_DEBUG();
}
Memory mult1xN(
@@ -850,6 +871,7 @@ void mult1xN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (M2.size() + blockSize - 1) / blockSize;
mult1xN_kernel<<>>(m1.raw(), M2.raw(), Mr.raw(), M2.size());
+ RM_CUDA_DEBUG();
}
Memory mult1xN(
@@ -869,6 +891,7 @@ void mult1xN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (X.size() + blockSize - 1) / blockSize;
mult1xN_kernel<<>>(m.raw(), X.raw(), C.raw(), X.size());
+ RM_CUDA_DEBUG();
}
Memory mult1xN(
@@ -888,6 +911,7 @@ void mult1xN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (X.size() + blockSize - 1) / blockSize;
mult1xN_kernel<<>>(m.raw(), X.raw(), C.raw(), X.size());
+ RM_CUDA_DEBUG();
}
Memory mult1xN(
@@ -909,6 +933,7 @@ void addNxN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize;
addNxN_kernel<<>>(A.raw(), B.raw(), C.raw(), A.size());
+ RM_CUDA_DEBUG();
}
Memory addNxN(
@@ -928,6 +953,7 @@ void addNxN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize;
addNxN_kernel<<>>(A.raw(), B.raw(), C.raw(), A.size());
+ RM_CUDA_DEBUG();
}
Memory addNxN(
@@ -950,6 +976,7 @@ void subNxN(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize;
subNxN_kernel<<>>(A.raw(), B.raw(), C.raw(), A.size());
+ RM_CUDA_DEBUG();
}
Memory subNxN(
@@ -969,6 +996,7 @@ void subNx1(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize;
subNx1_kernel<<>>(A.raw(), b.raw(), C.raw(), A.size());
+ RM_CUDA_DEBUG();
}
Memory subNx1(
@@ -989,6 +1017,7 @@ void transpose(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize;
transpose_kernel<<>>(A.raw(), B.raw(), A.size());
+ RM_CUDA_DEBUG();
}
Memory transpose(
@@ -1006,6 +1035,7 @@ void transpose(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize;
transpose_kernel<<>>(A.raw(), B.raw(), A.size());
+ RM_CUDA_DEBUG();
}
Memory transpose(
@@ -1024,6 +1054,7 @@ void transposeInplace(
constexpr unsigned int blockSize = 64;
const unsigned int gridSize = (A.size() + blockSize - 1) / blockSize;
transposeInplace_kernel<<