Skip to content

Commit

Permalink
kram - simd - more affine and culling, add modulemap
Browse files Browse the repository at this point in the history
Note that clang modules aren't building.
  • Loading branch information
alecazam committed Oct 11, 2024
1 parent 62c5d56 commit fc4d013
Show file tree
Hide file tree
Showing 7 changed files with 190 additions and 56 deletions.
10 changes: 10 additions & 0 deletions build2/vectormath.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
701AF1912CAD27CB00BD0886 /* libvectormath-ios.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libvectormath-ios.a"; sourceTree = BUILT_PRODUCTS_DIR; };
70570FE42CB378E7005692BB /* bounds234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bounds234.h; sourceTree = "<group>"; };
70570FE72CB37997005692BB /* bounds234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bounds234.cpp; sourceTree = "<group>"; };
70570FEF2CB8C5C6005692BB /* module.modulemap */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.module-map"; path = module.modulemap; sourceTree = "<group>"; };
70B686F42CAD1026007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; };
70B686FB2CAD1072007ACA58 /* double234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = double234.h; sourceTree = "<group>"; };
70B686FC2CAD1072007ACA58 /* double234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = double234.cpp; sourceTree = "<group>"; };
Expand Down Expand Up @@ -101,6 +102,7 @@
70B6870A2CAD1072007ACA58 /* vectormath */ = {
isa = PBXGroup;
children = (
70570FEF2CB8C5C6005692BB /* module.modulemap */,
70B687042CAD1072007ACA58 /* README.md */,
70B686FB2CAD1072007ACA58 /* double234.h */,
70B686FC2CAD1072007ACA58 /* double234.cpp */,
Expand Down Expand Up @@ -325,6 +327,8 @@
CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = dwarf;
DEFINES_MODULE = YES;
ENABLE_MODULE_VERIFIER = YES;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
ENABLE_USER_SCRIPT_SANDBOXING = YES;
Expand All @@ -345,6 +349,8 @@
IPHONEOS_DEPLOYMENT_TARGET = 15.0;
LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
MACOSX_DEPLOYMENT_TARGET = 13.0;
MODULEMAP_FILE = ../libkram/vectormath/;
MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c c++";
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
ONLY_ACTIVE_ARCH = YES;
Expand Down Expand Up @@ -393,6 +399,8 @@
CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
DEFINES_MODULE = YES;
ENABLE_MODULE_VERIFIER = YES;
ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_USER_SCRIPT_SANDBOXING = YES;
Expand All @@ -407,6 +415,8 @@
IPHONEOS_DEPLOYMENT_TARGET = 15.0;
LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
MACOSX_DEPLOYMENT_TARGET = 13.0;
MODULEMAP_FILE = ../libkram/vectormath/;
MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c c++";
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
OTHER_CFLAGS = (
Expand Down
13 changes: 12 additions & 1 deletion libkram/kram/KramConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,25 @@
#if KRAM_MAC || KRAM_IOS
// can use old or new
#define USE_SIMDLIB 1

// maybe this doesn't work with C++ pch,
#define USE_SIMDLIBMODULE 0
#else
// have to use new
// have to use new on all other platforms
#define USE_SIMDLIB 1
#define USE_SIMDLIBMODULE 0
#endif

#if USE_SIMDLIB

// new vector math
#if USE_SIMDLIBMODULE
// import this as a clang module now
import vectormath
#else
#include "vectormath234.h"
#endif

#else
// old vector math, using simd/simd.h
#include "float4a.h"
Expand Down
147 changes: 113 additions & 34 deletions libkram/vectormath/bounds234.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@

namespace SIMD_NAMESPACE {

culler::culler(const float4x4& projView) {
culler::culler(): _planeCount(0) {
}

void culler::update(const float4x4& projView) {
// build a worldspace frustum
// https://fgiesen.wordpress.com/2010/10/17/view-frustum-culling/
// but don't test farZ plane if infFarZ

float4x4 m = transpose(projView);
const float4& x = m[0];
const float4& y = m[1];
Expand Down Expand Up @@ -42,6 +45,36 @@ culler::culler(const float4x4& projView) {
for (int i = 0; i < _planeCount; ++i) {
_selectionMasks[i] = _planes[i] < 0;
}

// Nathan Reed - If you represent the frustum corners in homogeneous coordinates,
// with w=0 for points at infinity, this just falls out of the usual
// point vs plane test, where you dot the homogeneous point against the plane equation.

// generate 8 corners of frustum from the inverse
float4x4 projViewInv = inverse(projView); // TODO: can pass down
float nearClip = 1;

// inset so division can occur
float farClip = isInfFarPlane ? 1e-6f : 0;

static float4 clipCorners[8] = {
{-1,-1,nearClip,1},
{-1, 1,nearClip,1},
{ 1,-1,nearClip,1},
{ 1, 1,nearClip,1},

{-1,-1,farClip,1},
{-1, 1,farClip,1},
{ 1,-1,farClip,1},
{ 1, 1,farClip,1},
};

// These are homogenous coords, so w may be 0
for (int i = 0; i < 8; ++i) {
float4 cornerHomog = projViewInv * clipCorners[i];
_corners[i] = cornerHomog / cornerHomog.w;
_corners[i].w = 1;
}
}

bool culler::cullBox(float3 min, float3 max) const {
Expand Down Expand Up @@ -92,23 +125,58 @@ void culler::cullBoxes(const float3* boxes, int count, uint8_t* results) const {
float3 min = boxes[2*i];
float3 max = boxes[2*i+1];

results[i] = cullBox(min, max);
if (cullBox(min, max))
results[i] |= 1;
}
}

void culler::cullSpheres(const float4* sphere, int count, uint8_t* results) const {
for(int i = 0; i < count; ++i) {
results[i] = cullSphere(sphere[i]);
for (int i = 0; i < count; ++i) {
if (cullSphere(sphere[i]))
results[i] |= 1;
}
}

bool culler::isFrustumInBox(bbox box) const {
// See if all 8 verts of the frustum are in the box.
// This becomes a false negative for non-inf far (skips box while inside)
const float3* corners = frustumCorners();

int3 count = 0;
for (int i = 0; i < 8; ++i) {
float3 c = corners[i];
count += c >= box.min &
c <= box.max;
}

// high-bit set is -1
return all(count == (int3)-8);
}

bool culler::isFrustumOutsideBox(bbox box) const {
// See if all 8 verts of the frustum are outside box.
// This becomes a false positive (draws box even though outside)
const float3* corners = frustumCorners();

int3 countMin = 0;
int3 countMax = 0;
for (int i = 0; i < 8; ++i) {
float3 c = corners[i];
countMin += c < box.min;
countMax += c > box.max;
}

bsphere culler::transformSphereTRU(bsphere sphere, const float4x4& modelTfm) {
// high-bit set is -1
return any(countMin == (int3)-8 | countMax == (int3)-8);
}

bsphere culler::transformSphereTRS(bsphere sphere, const float4x4& modelTfm) {
// May be better to convert to box with non-uniform scale
// sphere gets huge otherwise. Cache these too.

#if 1
// not sure which code is smaller, still have to add t
float size = reduce_max(decomposeScale(modelTfm));
float size = decompose_scale_max(modelTfm);
float radius = sphere.radius() * size;
float4 sphereCenter = float4m(sphere.center(), 1);
sphereCenter = modelTfm * sphereCenter;
Expand All @@ -120,7 +188,7 @@ bsphere culler::transformSphereTRU(bsphere sphere, const float4x4& modelTfm) {
const float3x3& m = as_float3x3(modelTfm);
float3 t = m[3];
float size = reduce_max(decomposeScale(modelTfm));
float size = decompose_scale_max(modelTfm);
float radius = sphere.radius() * size;
float3 sphereCenter = m * sphere.center();
sphereCenter += t;
Expand All @@ -130,60 +198,71 @@ bsphere culler::transformSphereTRU(bsphere sphere, const float4x4& modelTfm) {
#endif
}

bbox culler::transformBoxTRS(bbox box, const float4x4& modelTfm) {
// Woth doing on cpu and caching. So can still process an array
// but should transform only ones thatt didn't change transform or bound.
// Note: if doing infFar, may want float4 in homogenous space w = 0
// then the points are accurate.

#if 0
// This is for a full general 4x4, but want a simpler affine version
void culler::boxCorners(bbox box, float3 pt[8]) const {
// TODO: fix these so order is 000 to 111 in bits

float3 min1 = box.min;
float3 max1 = box.max;

pt[0] = min1;
pt[1] = max1;

pt[2] = float3m(min1.xy, max1.z);
pt[3] = float3m(max1.xy, min1.z);

pt[4] = min1; pt[4].y = max1.y;
pt[5] = max1; pt[5].x = min1.x;

pt[6] = max1; pt[6].y = min1.y;
pt[7] = min1; pt[7].x = max1.x;
}

void culler::boxCorners(bbox box, float4 pt[8]) const {
float4 min1 = float4m(box.min, 1);
float4 max1 = float4m(box.max, 1);

// convert the box to 8 pts first
float4 pt[8];

pt[0] = min1;
pt[1] = max1;

pt[2] = float4m(min1.xy, max1.zw);
pt[3] = float4m(max1.xy, min1.zw);

pt[4] = min1; pt[4].y = max1.y; // float4m(min1.x, max1.y, min1.zw),
pt[5] = max1; pt[5].x = min1.x; // float4m(min1.x, max1.yzw),

pt[6] = max1; pt[6].y = min1.y; // float4m(max1.x, min1.y, max1.zw),
pt[7] = min1; pt[7].x = max1.x; // float4m(max1.x, min1.yzw),
}


bbox culler::transformBoxTRS(bbox box, const float4x4& modelTfm) {
// Woth doing on cpu and caching. So can still process an array
// but should transform only ones thatt didn't change transform or bound.

#if 0
// This is for a full general 4x4, but want a simpler affine version
// convert the box to 8 pts first
float4 pt[8];
boxCorners(box, pt)

box.setInvalid();
for (int i = 0; i < 8; ++i) {
float3 v = (modelTfm * pt[i]).xyz;
box.unionWith(v);
}

#elif 0

float3 min1 = box.min;
float3 max1 = box.max;

// really just a 3x3 and translation
const float3x3& m = as_float3x3(modelTfm);
float3 t = m[3];

// convert the box to 8 pts first
float3 pt[8];
boxCorners(box, ptr);

pt[0] = min1;
pt[1] = max1;

pt[2] = float3m(min1.xy, max1.z);
pt[3] = float3m(max1.xy, min1.z);

pt[4] = min1; pt[4].y = max1.y;
pt[5] = max1; pt[5].x = min1.x;

pt[6] = max1; pt[6].y = min1.y;
pt[7] = min1; pt[7].x = max1.x;

box.setInvalid();
for (int i = 0; i < 8; ++i) {
float3 v = m * pt[i];
Expand Down
39 changes: 24 additions & 15 deletions libkram/vectormath/bounds234.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

namespace SIMD_NAMESPACE {

// TODO: may want a 2d box/rect as well
// TODO: may want a rect, circle, capsule as well.

struct bbox {
bbox() {} // nothing
Expand Down Expand Up @@ -76,9 +76,12 @@ struct bsphere {
// Fast cpu culler per frustum. Easy port to gpu which can do occlusion.
// This only tests 5 or 6 planes.
struct culler {
culler(const float4x4& projView);
culler();

void update(const float4x4& projView);

// TODO: should pass bitmask instead of uint8_t array
// caller must zero the results array, and visible state sets only low bit

void cullBoxes(const float3* boxes, int count, uint8_t* results) const;
void cullSpheres(const float4* sphere, int count, uint8_t* results) const;
Expand All @@ -100,27 +103,33 @@ struct culler {
return cullSphere(sphere.centerRadius);
}

// TODO: move this to vectormath affine ops
static float decomposeSize(const float4x4& m) {
return length(m[0]);
// should probably move these
static bsphere transformSphereTRS(bsphere sphere, const float4x4& modelTfm);
static bbox transformBoxTRS(bbox box, const float4x4& modelTfm);

void boxCorners(bbox box, float3 pt[8]) const;
void boxCorners(bbox box, float4 pt[8]) const;

bool isFrustumInBox(bbox box) const;
bool isFrustumOutsideBox(bbox box) const;

const float4* frustumCorners4() const {
return _corners;
}
static float3 decomposeScale(const float4x4& m) {
// TODO: this length is unsigned, so need to fix that for inversion
return float3m(length(m[0]),
length(m[1]),
length(m[2]));
const float3* frustumCorners() const {
return as_float3(_corners);
}

bsphere transformSphereTRU(bsphere sphere, const float4x4& modelTfm);
bbox transformBoxTRS(bbox box, const float4x4& modelTfm);


private:
float4 _planes[6];
// This won't work if SIMD_INT is not defined.
#if SIMD_INT
int4 _selectionMasks[6];
#endif
uint32_t _planeCount = 0;
uint32_t _planeCount;

// 8 corners of frustum
float4 _corners[8];
};

} // namespace SIMD_NAMESPACE
Expand Down
Loading

0 comments on commit fc4d013

Please sign in to comment.