Skip to content

Commit

Permalink
Merge pull request #50 from jbikker/dev
Browse files Browse the repository at this point in the history
Merging dev to main
  • Loading branch information
jbikker authored Dec 7, 2024
2 parents 447f46c + d5c7e28 commit 120e74c
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 43 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# dev
This is the **development branch** for tinybvh. Please send your PRs here.

# tinybvh
Single-header BVH construction and traversal library written as "Sane C++" (or "C with classes"). The library has no dependencies.

Expand Down
102 changes: 59 additions & 43 deletions tiny_bvh.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,12 @@ THE SOFTWARE.
// Oct 29: version 0.0.1 : Establishing interface.
//

// How to use:
//
// Use this in *one* .c or .cpp
// #define TINYBVH_IMPLEMENTATION
// #include "tiny_bvh.h"
//

//
// tinybvh can use custom vector types by defining TINYBVH_USE_CUSTOM_VECTOR_TYPES once before inclusion.
// To define custom vector types create a tinybvh namespace with the appropriate using directives, e.g.:
// namespace tinybvh
Expand All @@ -68,9 +67,7 @@ THE SOFTWARE.
//
// #define TINYBVH_USE_CUSTOM_VECTOR_TYPES
// #include <tiny_bvh.h>
//

// How to use:
// See tiny_bvh_test.cpp for basic usage. In short:
// instantiate a BVH: tinybvh::BVH bvh;
// build it: bvh.Build( (tinybvh::bvhvec4*)triangleData, TRIANGLE_COUNT );
Expand Down Expand Up @@ -720,14 +717,17 @@ class BVH
}
void Compact( const BVHLayout layout /* must be WALD_32BYTE or VERBOSE */ );
void BuildQuick( const bvhvec4* vertices, const uint32_t primCount );
void BuildQuick( const bvhvec4slice& vertices );
void Build( const bvhvec4* vertices, const uint32_t primCount );
void BuildHQ( const bvhvec4slice& vertices );
void Build( const bvhvec4slice& vertices );
void BuildHQ( const bvhvec4* vertices, const uint32_t primCount );
void BuildHQ( const bvhvec4slice& vertices );
#ifdef BVH_USEAVX
void BuildAVX( const bvhvec4* vertices, const uint32_t primCount );
void BuildAVX( const bvhvec4slice& vertices );
#elif defined BVH_USENEON
void BuildNEON( const bvhvec4* vertices, const uint32_t primCount );
void BuildNEON( const bvhvec4slice& vertices );
#endif
void BuildTLAS( const bvhaabb* aabbs, const uint32_t aabbCount );
void BuildTLAS( const BLASInstance* bvhs, const uint32_t instCount );
Expand Down Expand Up @@ -979,8 +979,15 @@ void BVH::BuildTLAS( const BLASInstance* bvhs, const uint32_t instCount )
// your application (e.g., when you need to trace few rays).
void BVH::BuildQuick( const bvhvec4* vertices, const uint32_t primCount )
{
FATAL_ERROR_IF( primCount == 0, "BVH::BuildQuick( .. ), primCount == 0." );
// build the BVH with a continuous array of bvhvec4 vertices:
// in this case, the stride for the slice is 16 bytes.
BuildQuick( bvhvec4slice{ vertices, primCount * 3, 16U } );
}
void BVH::BuildQuick( const bvhvec4slice& vertices )
{
FATAL_ERROR_IF( vertices.count == 0, "BVH::BuildQuick( .. ), primCount == 0." );
// allocate on first build
const uint32_t primCount = vertices.count / 3;
const uint32_t spaceNeeded = primCount * 2; // upper limit
if (allocatedBVHNodes < spaceNeeded)
{
Expand All @@ -994,7 +1001,7 @@ void BVH::BuildQuick( const bvhvec4* vertices, const uint32_t primCount )
fragment = (Fragment*)AlignedAlloc( primCount * sizeof( Fragment ) );
}
else FATAL_ERROR_IF( !rebuildable, "BVH::BuildQuick( .. ), bvh not rebuildable." );
verts = bvhvec4slice{ vertices, primCount * 3 }; // note: we're not copying this data; don't delete.
verts = vertices; // note: we're not copying this data; don't delete.
idxCount = triCount = primCount;
// reset node pool
uint32_t newNodePtr = 2;
Expand Down Expand Up @@ -1063,7 +1070,13 @@ void BVH::BuildQuick( const bvhvec4* vertices, const uint32_t primCount )
// tracing on the CPU. This code uses no SIMD instructions.
// Faster code, using SSE/AVX, is available for x64 CPUs.
// For GPU rendering: The resulting BVH should be converted to a more optimal
// format after construction.
// format after construction, e.g. BVH::AILA_LAINE.
void BVH::Build( const bvhvec4* vertices, const uint32_t primCount )
{
// build the BVH with a continuous array of bvhvec4 vertices:
// in this case, the stride for the slice is 16 bytes.
Build( bvhvec4slice{ vertices, primCount * 3, 16U } );
}
void BVH::Build( const bvhvec4slice& vertices )
{
FATAL_ERROR_IF( vertices.count == 0, "BVH::Build( .. ), primCount == 0." );
Expand Down Expand Up @@ -1096,8 +1109,10 @@ void BVH::Build( const bvhvec4slice& vertices )
// building a BVH over triangles specified as three 16-byte vertices each.
for (uint32_t i = 0; i < triCount; i++)
{
fragment[i].bmin = tinybvh_min( tinybvh_min( verts[i * 3], verts[i * 3 + 1] ), verts[i * 3 + 2] );
fragment[i].bmax = tinybvh_max( tinybvh_max( verts[i * 3], verts[i * 3 + 1] ), verts[i * 3 + 2] );
const bvhvec4 v0 = verts[i * 3], v1 = verts[i * 3 + 1], v2 = verts[i * 3 + 2];
const bvhvec4 fmin = tinybvh_min( v0, tinybvh_min( v1, v2 ) );
const bvhvec4 fmax = tinybvh_max( v0, tinybvh_max( v1, v2 ) );
fragment[i].bmin = fmin, fragment[i].bmax = fmax;
root.aabbMin = tinybvh_min( root.aabbMin, fragment[i].bmin );
root.aabbMax = tinybvh_max( root.aabbMax, fragment[i].bmax ), triIdx[i] = i;
}
Expand Down Expand Up @@ -1203,11 +1218,6 @@ void BVH::Build( const bvhvec4slice& vertices )
usedBVHNodes = newNodePtr;
}

void BVH::Build( const bvhvec4* vertices, const uint32_t primCount )
{
Build( bvhvec4slice{ vertices, primCount * 3 } );
}

// SBVH builder.
// Besides the regular object splits used in the reference builder, the SBVH
// algorithm also considers spatial splits, where primitives may be cut in
Expand All @@ -1216,6 +1226,10 @@ void BVH::Build( const bvhvec4* vertices, const uint32_t primCount )
// For typical geometry, SBVH yields a tree that can be traversed 25% faster.
// This comes at greatly increased construction cost, making the SBVH
// primarily useful for static geometry.
void BVH::BuildHQ( const bvhvec4* vertices, const uint32_t primCount )
{
BuildHQ( bvhvec4slice{ vertices, primCount * 3, 16U } );
}
void BVH::BuildHQ( const bvhvec4slice& vertices )
{
FATAL_ERROR_IF( vertices.count == 0, "BVH::BuildHQ( .. ), primCount == 0." );
Expand Down Expand Up @@ -1452,11 +1466,6 @@ void BVH::BuildHQ( const bvhvec4slice& vertices )
usedBVHNodes = newNodePtr;
}

void BVH::BuildHQ( const bvhvec4* vertices, const uint32_t primCount )
{
BuildHQ( bvhvec4slice{ vertices, primCount * 3 } );
}

// Convert: Change the BVH layout from one format into another.
void BVH::Convert( const BVHLayout from, const BVHLayout to, const bool /* deleteOriginal */ )
{
Expand Down Expand Up @@ -2380,35 +2389,26 @@ void BVH::MergeLeafs()
// cost of leaving things as they are
BVHNodeVerbose& left = verbose[node.left];
BVHNodeVerbose& right = verbose[node.right];
float Ckeepsplit =
C_TRAV + C_INT *
(SA( left.aabbMin, left.aabbMax ) * leftCount +
SA( right.aabbMin, right.aabbMax ) * rightCount);
float Ckeepsplit = C_TRAV + C_INT * (SA( left.aabbMin, left.aabbMax ) *
leftCount + SA( right.aabbMin, right.aabbMax ) * rightCount);
if (Cunsplit <= Ckeepsplit)
{
// collapse the subtree
uint32_t start = newIdxPtr;
MergeSubtree( nodeIdx, newIdx, newIdxPtr );
node.firstTri = start;
node.triCount = mergedCount;
node.firstTri = start, node.triCount = mergedCount;
node.left = node.right = 0;
// pop new task
if (stackPtr == 0) break;
nodeIdx = stack[--stackPtr];
}
else
{
// recurse
nodeIdx = node.left;
stack[stackPtr++] = node.right;
}
else /* recurse */ nodeIdx = node.left, stack[stackPtr++] = node.right;
}
}
// cleanup
AlignedFree( subtreeTriCount );
AlignedFree( triIdx );
triIdx = newIdx;
may_have_holes = true; // all over the place, in fact
triIdx = newIdx, may_have_holes = true; // all over the place, in fact
}

// Optimizing a BVH: BVH must be in 'verbose' format.
Expand Down Expand Up @@ -3098,10 +3098,17 @@ inline float halfArea( const __m256& a /* a contains aabb itself, with min.xyz n
#endif
void BVH::BuildAVX( const bvhvec4* vertices, const uint32_t primCount )
{
FATAL_ERROR_IF( primCount == 0, "BVH::BuildAVX( .. ), primCount == 0." );
// build the BVH with a continuous array of bvhvec4 vertices:
// in this case, the stride for the slice is 16 bytes.
BuildAVX( bvhvec4slice{ vertices, primCount * 3, 16U } );
}
void BVH::BuildAVX( const bvhvec4slice& vertices )
{
FATAL_ERROR_IF( vertices.count == 0, "BVH::BuildAVX( .. ), primCount == 0." );
FATAL_ERROR_IF( vertices.stride & 15, "BVH::BuildAVX( .. ), stride must be multiple of 16." );
FATAL_ERROR_IF( vertices.count == 0, "BVH::BuildAVX( .. ), primCount == 0." );
int32_t test = BVHBINS;
if (test != 8) assert( false ); // AVX builders require BVHBINS == 8.
assert( ((long long)vertices & 63) == 0 ); // buffer must be cacheline-aligned
// aligned data
ALIGNED( 64 ) __m256 binbox[3 * BVHBINS]; // 768 bytes
ALIGNED( 64 ) __m256 binboxOrig[3 * BVHBINS]; // 768 bytes
Expand All @@ -3118,6 +3125,7 @@ void BVH::BuildAVX( const bvhvec4* vertices, const uint32_t primCount )
static const __m256 signFlip8 = _mm256_setr_ps( -0.0f, -0.0f, -0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f );
for (uint32_t i = 0; i < 3 * BVHBINS; i++) binboxOrig[i] = max8; // binbox initialization template
// reset node pool
const uint32_t primCount = vertices.count / 3;
const uint32_t spaceNeeded = primCount * 2;
if (allocatedBVHNodes < spaceNeeded)
{
Expand All @@ -3131,13 +3139,13 @@ void BVH::BuildAVX( const bvhvec4* vertices, const uint32_t primCount )
fragment = (Fragment*)AlignedAlloc( primCount * sizeof( Fragment ) );
}
else FATAL_ERROR_IF( !rebuildable, "BVH::BuildAVX( .. ), bvh not rebuildable." );
verts = bvhvec4slice{ vertices, primCount * 3 }; // note: we're not copying this data; don't delete.
verts = vertices; // note: we're not copying this data; don't delete.
triCount = idxCount = primCount;
uint32_t newNodePtr = 2;
struct FragSSE { __m128 bmin4, bmax4; };
FragSSE* frag4 = (FragSSE*)fragment;
__m256* frag8 = (__m256*)fragment;
const __m128* verts4 = (__m128*)vertices;
const __m128* verts4 = (__m128*)verts.data; // that's why it must be 16-byte aligned.
// assign all triangles to the root node
BVHNode& root = bvhNode[0];
root.leftFirst = 0, root.triCount = triCount;
Expand Down Expand Up @@ -3681,7 +3689,7 @@ int32_t BVH::Intersect_CWBVH( Ray& ray ) const
const bvhvec4 n2 = blasNodes[child_node_index * 5 + 2], n3 = blasNodes[child_node_index * 5 + 3];
const bvhvec4 n4 = blasNodes[child_node_index * 5 + 4], p = n0;
bvhint3 e;
e.x = (int32_t)*((int8_t*)&n0.w + 0), e.y = (int32_t)*((int8_t*)&n0.w + 1), e.z = (int32_t)*((int8_t*)&n0.w + 2);
e.x = (int32_t) * ((int8_t*)&n0.w + 0), e.y = (int32_t) * ((int8_t*)&n0.w + 1), e.z = (int32_t) * ((int8_t*)&n0.w + 2);
ngroup.x = as_uint( n1.x ), tgroup.x = as_uint( n1.y ), tgroup.y = 0;
uint32_t hitmask = 0;
const uint32_t vx = (e.x + 127) << 23u; const float adjusted_idirx = *(float*)&vx * ray.rD.x;
Expand Down Expand Up @@ -4186,10 +4194,17 @@ static inline int32x4_t vrnd32xq_f32( float32x4_t a ) {

void BVH::BuildNEON( const bvhvec4* vertices, const uint32_t primCount )
{
FATAL_ERROR_IF( primCount == 0, "BVH::BuildNEON( .. ), primCount == 0." );
// build the BVH with a continuous array of bvhvec4 vertices:
// in this case, the stride for the slice is 16 bytes.
BuildNEON( bvhvec4slice{ vertices, primCount * 3, 16U } );
}
void BVH::BuildNEON( const bvhvec4slice& vertices )
{
FATAL_ERROR_IF( vertices.count == 0, "BVH::BuildNEON( .. ), primCount == 0." );
FATAL_ERROR_IF( vertices.stride & 15, "BVH::BuildNEON( .. ), stride must be multiple of 16." );
FATAL_ERROR_IF( vertices.count == 0, "BVH::BuildNEON( .. ), primCount == 0." );
int32_t test = BVHBINS;
if (test != 8) assert( false ); // AVX builders require BVHBINS == 8.
assert( ((long long)vertices & 63) == 0 ); // buffer must be cacheline-aligned
// aligned data
ALIGNED( 64 ) float32x4x2_t binbox[3 * BVHBINS]; // 768 bytes
ALIGNED( 64 ) float32x4x2_t binboxOrig[3 * BVHBINS]; // 768 bytes
Expand All @@ -4205,6 +4220,7 @@ void BVH::BuildNEON( const bvhvec4* vertices, const uint32_t primCount )
static const float32x4_t binmul3 = vdupq_n_f32( BVHBINS * 0.49999f );
for (uint32_t i = 0; i < 3 * BVHBINS; i++) binboxOrig[i] = max8; // binbox initialization template
// reset node pool
const uint32_t primCount = vertices.count / 3;
const uint32_t spaceNeeded = primCount * 2;
if (allocatedBVHNodes < spaceNeeded)
{
Expand All @@ -4218,13 +4234,13 @@ void BVH::BuildNEON( const bvhvec4* vertices, const uint32_t primCount )
fragment = (Fragment*)AlignedAlloc( primCount * sizeof( Fragment ) );
}
else FATAL_ERROR_IF( !rebuildable, "BVH::BuildNEON( .. ), bvh not rebuildable." );
verts = bvhvec4slice{ vertices, primCount * 3 }; // note: we're not copying this data; don't delete.
verts = vertices; // note: we're not copying this data; don't delete.
triCount = idxCount = primCount;
uint32_t newNodePtr = 2;
struct FragSSE { float32x4_t bmin4, bmax4; };
FragSSE* frag4 = (FragSSE*)fragment;
float32x4x2_t* frag8 = (float32x4x2_t*)fragment;
const float32x4_t* verts4 = (float32x4_t*)vertices;
const float32x4_t* verts4 = (float32x4_t*)vertices.data;
// assign all triangles to the root node
BVHNode& root = bvhNode[0];
root.leftFirst = 0, root.triCount = triCount;
Expand Down
5 changes: 5 additions & 0 deletions vcproj/tiny_bvh_minimal.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<Optimization>Full</Optimization>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
Expand Down
4 changes: 4 additions & 0 deletions vcproj/tiny_bvh_pt.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@
<LanguageStandard>stdcpp20</LanguageStandard>
<AdditionalIncludeDirectories>..\external\embree\include</AdditionalIncludeDirectories>
<OpenMPSupport>true</OpenMPSupport>
<Optimization>Full</Optimization>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
Expand Down
3 changes: 3 additions & 0 deletions vcproj/tiny_bvh_renderer.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<Optimization>Full</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
Expand Down
2 changes: 2 additions & 0 deletions vcproj/tiny_bvh_speedtest.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@
<AdditionalIncludeDirectories>../external/OpenCL/inc/;../external/embree/include</AdditionalIncludeDirectories>
<LanguageStandard>stdcpp17</LanguageStandard>
<Optimization>Full</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
Expand Down

0 comments on commit 120e74c

Please sign in to comment.