kram - simd - small matrix/quat fixes

alecazam · Oct 11, 2024 · 62c5d56 · 62c5d56
1 parent 80a612f
commit 62c5d56
Show file tree

Hide file tree

Showing 2 changed files with 58 additions and 49 deletions.
diff --git a/libkram/vectormath/float234.cpp b/libkram/vectormath/float234.cpp
@@ -851,7 +851,7 @@ float4x4 inverse_tru(const float4x4& mtx)
     return inverse;
 }
 
-float4x4 float4x4_tr(float3 t, quatf r) {
+float4x4 float4x4m_tr(float3 t, quatf r) {
     float4x4 m(float4x4::identity());
     m[3].xyz = t;
 
@@ -860,7 +860,7 @@ float4x4 float4x4_tr(float3 t, quatf r) {
 }
 
 // TODO: there are faster ways to apply post rot, post scale
-float4x4 float4x4_trs(float3 t, quatf r, float3 scale) {
+float4x4 float4x4m_trs(float3 t, quatf r, float3 scale) {
     float4x4 m(float4x4::identity());
     m[3].xyz = t;
     m = m * float4x4m(r);
@@ -870,8 +870,8 @@ float4x4 float4x4_trs(float3 t, quatf r, float3 scale) {
 }
 
 // leaving this in here, since it can be further optimized
-float4x4 float4x4_tru(float3 t, quatf r, float scale) {
-    return float4x4_trs(t, r, float3m(scale));
+float4x4 float4x4m_tru(float3 t, quatf r, float scale) {
+    return float4x4m_trs(t, r, float3m(scale));
 }
 
 float4x4 inverse_trs(const float4x4& mtx)
@@ -974,32 +974,22 @@ float4x4 perspective_rhcs(float4 tangents, float nearZ, float farZ)
     float xoff = (r + l) / dx;
     float yoff = (t + b) / dy;
 
+    // zs drops out since zs = inf / -inf = 1, 1-1 = 0
+    // z' = near / -z
+
     float m22;
     float m23;
 
-
-    //if (isReverseZ) {
-        // zs drops out since zs = inf / -inf = 1, 1-1 = 0
-        // z' = near / -z
-
-        if (farZ == FLT_MAX) {
-            m22 = 0;
-            m23 = nearZ;
-        }
-        else {
-            // TODO: handle farZ when not inf, check these
-            m22 = -nearZ / farZ;
-            m23 = nearZ;
-        }
+    if (farZ == FLT_MAX) {
+        m22 = 0;
+        m23 = nearZ;
+    }
+    else {
+        // TODO: handle farZ when not inf, check these
+        m22 = -nearZ / farZ;
+        m23 = nearZ;
+    }
 
-//    }
-//    else {
-//        float zs = farZ / (nearZ - farZ);
-//        
-//        m22 = zs;
-//        m23 = zs * nearZ;
-//    }
-
     float4x4 m(
         (float4){ xs,       0,   0,  0 },
         (float4){  0,      ys,   0,  0 },
@@ -1010,14 +1000,22 @@ float4x4 perspective_rhcs(float4 tangents, float nearZ, float farZ)
     return m;
 }
 
-float4x4 orthographic_rhcs(float width, float height, float nearZ, float farZ)
+float4x4 orthographic_rhcs(float4 rect, float nearZ, float farZ)
 {
-    // float aspectRatio = width / height;
-    float xs = 2.0f / width;
-    float ys = 2.0f / height;
+    // l,t,r,b
+    float l = rect.x;
+    float t = rect.y;
+    float r = rect.z;
+    float b = rect.w;
+
+    float dx = (r-l);
+    float dy = (t-b);
+
+    float m00 = 2.0f / dx;
+    float m11 = 2.0f / dy;
 
-    float xoff = 0.0f;  // -0.5f * width;
-    float yoff = 0.0f;  // -0.5f * height;
+    float m03 = (r+l) / dx;
+    float m13 = (t+b) / dy;
 
     float dz = -(farZ - nearZ);
     float zs = 1.0f / dz;
@@ -1026,16 +1024,14 @@ float4x4 orthographic_rhcs(float width, float height, float nearZ, float farZ)
     float m23 = zs * nearZ;
 
     // revZ, can't use infiniteZ with ortho view
-    //if (isReverseZ) {
-        m22 = -m22;
-        m23 = 1.0f - m23;
-    //}
-
+    m22 = -m22;
+    m23 = 1.0f - m23;
+
     float4x4 m(
-        (float4){xs, 0, 0, 0},
-        (float4){0, ys, 0, 0},
-        (float4){0, 0, m22, 0},
-        (float4){xoff, yoff, m23, 1}
+        (float4){m00,   0,   0, 0},
+        (float4){  0, m11,   0, 0},
+        (float4){  0,   0, m22, 0},
+        (float4){m03, m13, m23, 1}
     );
     return m;
 }

diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
@@ -808,13 +808,25 @@ struct quatf {
     static const quatf& zero();
     static const quatf& identity();
 
+    // image = axis * sin(theta/2), real = cos(theta/2)
+    const float3& imag() const { return as_float3(v); }
+    float real() const { return v.w; }
+
     float4 v;
 };
 
+// this is conjugate, so only axis is inverted
+SIMD_CALL quatf operator-(quatf q) {
+    float4 qv = q.v;
+    qv.xyz = -qv.xyz;
+    return quatf(qv);
+}
+
 SIMD_CALL float3 operator*(quatf q, float3 v) {
+    // see https://fgiesen.wordpress.com/2019/02/09/rotating-a-single-vector-using-a-quaternion/
     float4 qv = q.v;
-    float3 t = qv.w * cross(qv.xyz, v);
-    return v + 2.0f * t + cross(q.v.xyz, t);
+    float3 t = 2.0f * cross(qv.xyz, v);
+    return v + qv.w * t + cross(qv.xyz, t);
 }
 
 float4x4 float4x4m(quatf q);
@@ -827,7 +839,7 @@ float4x4 float4x4m(quatf q);
 
 SIMD_CALL quatf lerp(quatf q0, quatf q1, float t) {
     if (dot(q0.v, q1.v) < 0.0f)
-        q1.v.xyz = -q1.v.xyz;
+        q1 = -q1; // conjugate
 
     float4 v = lerp(q0.v, q1.v, t);
     return quatf(v);
@@ -872,19 +884,20 @@ SIMD_CALL float3 decompose_scale(const float4x4& m) {
                    length(m[2]));
 }
 SIMD_CALL float decompose_scale_max(const float4x4& m) {
-    return reduce_max(decomposeScale(m));
+    return reduce_max(decompose_scale(m));
 }
 
 
 float3x3 float3x3m(quatf qq);
 
-float4x4 float4x4_tr(float3 t, quatf r);
-float4x4 float4x4_trs(float3 t, quatf r, float3 scale);
-float4x4 float4x4_tru(float3 t, quatf r, float scale);
+// m in here?
+float4x4 float4x4m_tr(float3 t, quatf r);
+float4x4 float4x4m_tru(float3 t, quatf r, float scale);
+float4x4 float4x4m_trs(float3 t, quatf r, float3 scale);
 
 float4x4 perspective_rhcs(float fovyRadians, float aspectXtoY, float nearZ, float farZ = FLT_MAX);
 float4x4 perspective_rhcs(float4 tangents, float nearZ, float farZ = FLT_MAX);
-float4x4 orthographic_rhcs(float width, float height, float nearZ, float farZ);
+float4x4 orthographic_rhcs(float4 rect, float nearZ, float farZ);
 
 SIMD_CALL float4x4 rotation(float3 axis, float radians) {
     quatf q(axis, radians);