Skip to content

Commit

Permalink
Add SBVH builder.
Browse files Browse the repository at this point in the history
  • Loading branch information
jbikker committed Nov 11, 2024
1 parent fb68e6a commit c813785
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 49 deletions.
60 changes: 20 additions & 40 deletions tiny_bvh.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

// Nov 11, '24: version 0.5.0 : SBVH builder.
// Nov 10, '24: version 0.4.2 : BVH4/8, gpu-friendly BVH4.
// Nov 09, '24: version 0.4.0 : Layouts, BVH optimizer.
// Nov 08, '24: version 0.3.0
Expand Down Expand Up @@ -241,16 +242,6 @@ static bvhvec3 normalize( const bvhvec3& a )
return a * rl;
}

// Random numbers
unsigned int bvh_rnd()
{
static unsigned int seed = 0x12345678;
seed ^= seed << 13;
seed ^= seed >> 17;
seed ^= seed << 5;
return seed;
}

// SIMD typedef, helps keeping the interface generic
#ifdef BVH_USEAVX
typedef __m128 SIMDVEC4;
Expand Down Expand Up @@ -635,7 +626,6 @@ void BVH::Build( const bvhvec4* vertices, const unsigned int primCount )
// For typical geometry, SBVH yields a tree that can be traversed 25% faster.
// This comes at greatly increased construction cost, making the SBVH
// primarily useful for static geometry.
// TODO - UNDER CONSTRUCTION - NOT PRODUCING CORRECT TREES JUST YET
void BVH::BuildHQ( const bvhvec4* vertices, const unsigned int primCount )
{
// allocate on first build
Expand Down Expand Up @@ -670,12 +660,12 @@ void BVH::BuildHQ( const bvhvec4* vertices, const unsigned int primCount )
fragment[i].bmin = tinybvh_min( tinybvh_min( verts[i * 3], verts[i * 3 + 1] ), verts[i * 3 + 2] );
fragment[i].bmax = tinybvh_max( tinybvh_max( verts[i * 3], verts[i * 3 + 1] ), verts[i * 3 + 2] );
root.aabbMin = tinybvh_min( root.aabbMin, fragment[i].bmin );
root.aabbMax = tinybvh_max( root.aabbMax, fragment[i].bmax ), triIdx[i] = i;
root.aabbMax = tinybvh_max( root.aabbMax, fragment[i].bmax ), triIdx[i] = i, fragment[i].primIdx = i;
}
const float rootArea = (root.aabbMax - root.aabbMin).halfArea();
// subdivide recursively
struct Task { unsigned int node, sliceStart, sliceEnd, dummy; };
ALIGNED(64) Task task[256];
ALIGNED( 64 ) Task task[256];
unsigned int taskCount = 0, nodeIdx = 0, sliceStart = 0, sliceEnd = triCount + slack;
const bvhvec3 minDim = (root.aabbMax - root.aabbMin) * 1e-7f /* don't touch, carefully picked */;
bvhvec3 bestLMin = 0, bestLMax = 0, bestRMin = 0, bestRMax = 0;
Expand Down Expand Up @@ -735,7 +725,6 @@ void BVH::BuildHQ( const bvhvec4* vertices, const unsigned int primCount )
}
// consider a spatial split
bool spatial = false;
#if 1
unsigned int NL[BVHBINS - 1], NR[BVHBINS - 1], budget = sliceEnd - sliceStart;
bvhvec3 spatialUnion = bestLMax - bestRMin;
float spatialOverlap = (spatialUnion.halfArea()) / rootArea;
Expand All @@ -753,13 +742,13 @@ void BVH::BuildHQ( const bvhvec4* vertices, const unsigned int primCount )
for (unsigned int i = 0; i < node.triCount; i++)
{
const unsigned int fragIdx = triIdxA[node.leftFirst + i];
const int bin1 = tinybvh_clamp( (int)((fragment[fragIdx].bmin[a] * -1.0f - nodeMin) * rPlaneDist), 0, BVHBINS - 1 );
const int bin1 = tinybvh_clamp( (int)((fragment[fragIdx].bmin[a] - nodeMin) * rPlaneDist), 0, BVHBINS - 1 );
const int bin2 = tinybvh_clamp( (int)((fragment[fragIdx].bmax[a] - nodeMin) * rPlaneDist), 0, BVHBINS - 1 );
countIn[bin1]++, countOut[bin2]++;
if (bin2 == bin1)
{
// fragment fits in a single bin
binMin[bin1] = tinybvh_min( binMin[bin1], fragment[fragIdx].bmin * -1.0f );
binMin[bin1] = tinybvh_min( binMin[bin1], fragment[fragIdx].bmin );
binMax[bin1] = tinybvh_max( binMax[bin1], fragment[fragIdx].bmax );
}
else for (int j = bin1; j <= bin2; j++)
Expand All @@ -769,7 +758,6 @@ void BVH::BuildHQ( const bvhvec4* vertices, const unsigned int primCount )
bmin[a] = nodeMin + planeDist * j;
bmax[a] = j == 6 ? node.aabbMax[a] : (bmin[a] + planeDist);
Fragment orig = fragment[fragIdx];
orig.bmin *= -1.0f;
Fragment tmpFrag;
if (!ClipFrag( orig, tmpFrag, bmin, bmax, minDim )) continue;
binMin[j] = tinybvh_min( binMin[j], tmpFrag.bmin );
Expand Down Expand Up @@ -797,7 +785,6 @@ void BVH::BuildHQ( const bvhvec4* vertices, const unsigned int primCount )
}
}
}
#endif
// terminate recursion
if (splitCost >= node.CalculateNodeCost()) break;
// double-buffered partition
Expand All @@ -809,36 +796,27 @@ void BVH::BuildHQ( const bvhvec4* vertices, const unsigned int primCount )
for (unsigned int i = 0; i < node.triCount; i++)
{
const unsigned int fragIdx = triIdxA[src++];
const unsigned int bin1 = (unsigned int)((fragment[fragIdx].bmin[bestAxis] * -1.0f - nodeMin) * rPlaneDist);
const unsigned int bin1 = (unsigned int)((fragment[fragIdx].bmin[bestAxis] - nodeMin) * rPlaneDist);
const unsigned int bin2 = (unsigned int)((fragment[fragIdx].bmax[bestAxis] - nodeMin) * rPlaneDist);
if (bin2 <= bestPos) triIdxB[A++] = fragIdx; else if (bin1 > bestPos) triIdxB[--B] = fragIdx; else
{
// split straddler
Fragment tmpFrag = fragment[fragIdx];
Fragment newFrag;
tmpFrag.bmin *= -1.0f;
if (ClipFrag( tmpFrag, newFrag, tinybvh_max( bestRMin, node.aabbMin ), tinybvh_min( bestRMax, node.aabbMax ), minDim ))
{
newFrag.bmin *= -1.0f;
fragment[nextFrag] = newFrag;
triIdxB[--B] = nextFrag++;
}
fragment[nextFrag] = newFrag, triIdxB[--B] = nextFrag++;
if (ClipFrag( tmpFrag, fragment[fragIdx], tinybvh_max( bestLMin, node.aabbMin ), tinybvh_min( bestLMax, node.aabbMax ), minDim ))
{
fragment[fragIdx].bmin *= -1.0f;
triIdxB[A++] = fragIdx;
}
}
}
}
else
{
// in-place partitioning - TODO check me.
unsigned int src = node.leftFirst;
// object partitioning
const float rpd = rpd3.cell[bestAxis], nmin = nmin3.cell[bestAxis];
for (unsigned int i = 0; i < node.triCount; i++)
{
const unsigned int fr = triIdx[src];
const unsigned int fr = triIdx[src + i];
int bi = (int)(((fragment[fr].bmin[bestAxis] + fragment[fr].bmax[bestAxis]) * 0.5f - nmin) * rpd);
bi = tinybvh_clamp( bi, 0, BVHBINS - 1 );
if (bi <= (int)bestPos) triIdxB[A++] = fr; else triIdxB[--B] = fr;
Expand All @@ -863,15 +841,13 @@ void BVH::BuildHQ( const bvhvec4* vertices, const unsigned int primCount )
}
// fetch subdivision task from stack
if (taskCount == 0) break; else
{
nodeIdx = task[--taskCount].node;
sliceStart = task[taskCount].sliceStart;
nodeIdx = task[--taskCount].node,
sliceStart = task[taskCount].sliceStart,
sliceEnd = task[taskCount].sliceEnd;
}
}
// clean up
for (unsigned int i = 0; i < triCount + slack; i++) triIdx[i] = fragment[triIdx[i]].primIdx;
// Compact();
// Compact(); - TODO
refittable = false; // can't refit an SBVH
usedBVHNodes = newNodePtr;
}
Expand Down Expand Up @@ -1366,7 +1342,9 @@ void BVH::Optimize()
unsigned int Nid, valid = 0;
do
{
valid = 1, Nid = 2 + bvh_rnd() % (usedVerboseNodes - 2);
static unsigned int seed = 0x12345678;
seed ^= seed << 13, seed ^= seed >> 17, seed ^= seed << 5; // xor32
valid = 1, Nid = 2 + seed % (usedVerboseNodes - 2);
if (verbose[Nid].parent == 0 || verbose[Nid].isLeaf()) valid = 0;
if (valid) if (verbose[verbose[Nid].parent].parent == 0) valid = 0;
} while (valid == 0);
Expand Down Expand Up @@ -1824,10 +1802,12 @@ float BVH::IntersectAABB( const Ray& ray, const bvhvec3& aabbMin, const bvhvec3&
// The code relies on the availability of AVX instructions. AVX2 is not needed.
#ifdef _MSC_VER
#define LANE(a,b) a.m128_f32[b]
#define LANE8(a,b) a.m256_f32[b]
// Not using clang/g++ method under MSCC; compiler may benefit from .m128_i32.
#define ILANE(a,b) a.m128i_i32[b]
#else
#define LANE(a,b) a[b]
#define LANE8(a,b) a[b]
// Below method reduces to a single instruction.
#define ILANE(a,b) _mm_cvtsi128_si32(_mm_castps_si128( _mm_shuffle_ps(_mm_castsi128_ps( a ), _mm_castsi128_ps( a ), b)))
#endif
Expand Down Expand Up @@ -1896,16 +1876,16 @@ void BVH::BuildAVX( const bvhvec4* vertices, const unsigned int primCount )
struct FragSSE { __m128 bmin4, bmax4; };
FragSSE* frag4 = (FragSSE*)fragment;
__m256* frag8 = (__m256*)fragment;
const __m128* tris4 = (__m128*)verts;
const __m128* verts4 = (__m128*)verts;
// assign all triangles to the root node
BVHNode& root = bvhNode[0];
root.leftFirst = 0, root.triCount = triCount;
// initialize fragments and update root bounds
__m128 rootMin = max4, rootMax = max4;
for (unsigned int i = 0; i < triCount; i++)
{
const __m128 v1 = _mm_xor_ps( signFlip4, _mm_min_ps( _mm_min_ps( tris4[i * 3], tris4[i * 3 + 1] ), tris4[i * 3 + 2] ) );
const __m128 v2 = _mm_max_ps( _mm_max_ps( tris4[i * 3], tris4[i * 3 + 1] ), tris4[i * 3 + 2] );
const __m128 v1 = _mm_xor_ps( signFlip4, _mm_min_ps( _mm_min_ps( verts4[i * 3], verts4[i * 3 + 1] ), verts4[i * 3 + 2] ) );
const __m128 v2 = _mm_max_ps( _mm_max_ps( verts4[i * 3], verts4[i * 3 + 1] ), verts4[i * 3 + 2] );
frag4[i].bmin4 = v1, frag4[i].bmax4 = v2, rootMin = _mm_max_ps( rootMin, v1 ), rootMax = _mm_max_ps( rootMax, v2 ), triIdx[i] = i;
}
rootMin = _mm_xor_ps( rootMin, signFlip4 );
Expand Down
2 changes: 1 addition & 1 deletion tiny_bvh_fenster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ void Init()
#if defined(BVH_USEAVX)
bvh.BuildAVX( triangles, verts / 3 );
#else
bvh.Build( triangles, verts / 3 );
// bvh.Build( triangles, verts / 3 );
#endif

#endif
Expand Down
4 changes: 2 additions & 2 deletions tiny_bvh_renderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ int main()
bvhvec3 p1 = C - right + up, p2 = C + right + up, p3 = C - right - up;
char line[122];
float sum;
for( int s, x, y = 0; y < 200; y += 4 )
for (int s, x, y = 0; y < 200; y += 4)
{
for( x = 0; x < 480; x += 4 )
for (x = 0; x < 480; x += 4)
{
for (sum = 0, s = 0; s < 16; s++) // 16 samples per 'pixel'
{
Expand Down
23 changes: 17 additions & 6 deletions tiny_bvh_speedtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ int main()
#ifdef _MSC_VER
printf( "(MSVC %i build)\n", _MSC_VER );
#elif defined __clang__
printf( "(clang %i.%i build)\n", __clang_major__ , __clang_minor__ );
printf( "(clang %i.%i build)\n", __clang_major__, __clang_minor__ );
#elif defined __GNUC__
printf( "(gcc %i.%i build)\n", __GNUC__, __GNUC_MINOR__ );
#else
Expand All @@ -133,8 +133,8 @@ int main()
// determine what CPU is running the tests.
#ifdef _WIN32
char model[256]{};
for(unsigned i = 0; i < 3; ++i) __cpuidex( (int*)(model + i * 16), i + 0x80000002 , 0 );
printf( "running on %s\n", model );
for (unsigned i = 0; i < 3; ++i) __cpuidex( (int*)(model + i * 16), i + 0x80000002, 0 );
printf( "running on %s\n", model );
#endif
printf( "----------------------------------------------------------------\n" );

Expand All @@ -151,8 +151,7 @@ int main()
// measure single-core bvh construction time - reference builder
printf( "- reference builder: " );
t.reset();
for (int pass = 0; pass < 3; pass++)
bvh.Build( (bvhvec4*)triangles, verts / 3 );
for (int pass = 0; pass < 3; pass++) bvh.Build( triangles, verts / 3 );
float buildTime = t.elapsed() / 3.0f;
printf( "%7.2fms for %7i triangles ", buildTime * 1000.0f, verts / 3 );
printf( "- %6i nodes, SAH=%.2f\n", bvh.usedBVHNodes, bvh.SAHCost() );
Expand All @@ -164,13 +163,25 @@ int main()
// measure single-core bvh construction time - AVX builder
printf( "- fast AVX builder: " );
t.reset();
for (int pass = 0; pass < 3; pass++) bvh.BuildAVX( (bvhvec4*)triangles, verts / 3 );
for (int pass = 0; pass < 3; pass++) bvh.BuildAVX( triangles, verts / 3 );
float buildTimeAVX = t.elapsed() / 3.0f;
printf( "%7.2fms for %7i triangles ", buildTimeAVX * 1000.0f, verts / 3 );
printf( "- %6i nodes, SAH=%.2f\n", bvh.usedBVHNodes, bvh.SAHCost() );
#endif
#endif

#ifdef BUILD_AVX
#ifdef BVH_USEAVX
// measure single-core bvh construction time - AVX builder
printf( "- HQ (SBVH) builder: " );
t.reset();
for (int pass = 0; pass < 3; pass++) bvh.BuildHQ( triangles, verts / 3 );
float buildTimeHQ = t.elapsed() / 3.0f;
printf( "%7.2fms for %7i triangles ", buildTimeHQ * 1000.0f, verts / 3 );
printf( "- %6i nodes, SAH=%.2f\n", bvh.usedBVHNodes, bvh.SAHCost() );
#endif
#endif

#if defined EMBREE_BUILD || defined EMBREE_TRAVERSE

// convert data to correct format for Embree and build a BVH
Expand Down

0 comments on commit c813785

Please sign in to comment.