Skip to content

Commit

Permalink
Small bug fix in optimizer.
Browse files Browse the repository at this point in the history
  • Loading branch information
jbikker committed Nov 15, 2024
1 parent 0ea1131 commit e0384cb
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 16 deletions.
9 changes: 7 additions & 2 deletions tiny_bvh.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ THE SOFTWARE.
// library version
#define TINY_BVH_VERSION_MAJOR 0
#define TINY_BVH_VERSION_MINOR 8
#define TINY_BVH_VERSION_SUB 2
#define TINY_BVH_VERSION_SUB 3

// ============================================================================
//
Expand Down Expand Up @@ -356,6 +356,10 @@ class BVH
BASIC_BVH8, // Input for CWBVH. Obtained by converting WALD_32BYTE.
CWBVH // Fastest GPU rendering. Obtained by converting BASIC_BVH8.
};
enum BuildFlags {
NONE = 0, // Default building behavior (binned, SAH-driven).
FULLSPLIT = 1 // Split as far as possible, even when SAH doesn't agree.
};
struct BVHNode
{
// 'Traditional' 32-byte BVH node layout, as proposed by Ingo Wald.
Expand Down Expand Up @@ -537,6 +541,7 @@ class BVH
bool refittable = true; // Refits are safe only if the tree has no spatial splits.
bool frag_min_flipped = false; // AVX builders flip aabb min.
bool may_have_holes = false; // Threaded builds and MergeLeafs produce BVHs with unused nodes.
BuildFlags buildFlag = NONE; // Hint to the builder.
// keep track of allocated buffer size to avoid
// repeated allocation during layout conversion.
unsigned allocatedBVHNodes = 0;
Expand Down Expand Up @@ -1872,7 +1877,7 @@ unsigned BVH::FindBestNewPosition( const unsigned Lid )
void BVH::ReinsertNodeVerbose( const unsigned Lid, const unsigned Nid, const unsigned origin )
{
unsigned Xbest = FindBestNewPosition( Lid );
if (verbose[Xbest].parent == 0) Xbest = origin;
if (Xbest == 0 || verbose[Xbest].parent == 0) Xbest = origin;
const unsigned X1 = verbose[Xbest].parent;
BVHNodeVerbose& N = verbose[Nid];
N.left = Xbest, N.right = Lid;
Expand Down
42 changes: 28 additions & 14 deletions tiny_bvh_speedtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#define BUILD_REFERENCE
#define BUILD_AVX
#define BUILD_NEON
#define BUILD_SBVH
// #define BUILD_SBVH
#define TRAVERSE_2WAY_ST
#define TRAVERSE_ALT2WAY_ST
#define TRAVERSE_SOA2WAY_ST
Expand Down Expand Up @@ -381,19 +381,33 @@ int main()

// trace all rays three times to estimate average performance
// - single core version, alternative bvh layout
printf( "Optimizing BVH... " );
t.reset();
bvh.Optimize( 1000000 );
bvh.Convert( BVH::WALD_32BYTE, BVH::ALT_SOA );
printf( "done (%.2fs). New: %i nodes, SAH=%.2f\n", t.elapsed(), bvh.NodeCount( BVH::WALD_32BYTE ), bvh.SAHCost() );
for (int i = 0; i < N; i += 2) bvh.Intersect( rays[i], BVH::ALT_SOA ); // re-warm
printf( "- CPU, coherent, 2-way optimized, ST: " );
t.reset();
for (int pass = 0; pass < 3; pass++)
for (int i = 0; i < N; i++) bvh.Intersect( rays[i], BVH::ALT_SOA );
float traceTimeOpt = t.elapsed() / 3.0f;
mrays = (float)N / traceTimeOpt;
printf( "%8.1fms for %6.2fM rays => %6.2fMRay/s\n", traceTimeOpt * 1000, (float)N * 1e-6f, mrays * 1e-6f );
printf( "Optimizing BVH, regular... " );
if (bvh.refittable)
{
printf( "Currently can't optimize SBVH.\n" );
}
else
{
t.reset();
bvh.Optimize( 1000000 );
printf( "done (%.2fs). New: %i nodes, SAH=%.2f\n", t.elapsed(), bvh.NodeCount( BVH::WALD_32BYTE ), bvh.SAHCost() );
printf( "Optimizing BVH, fullsplit... " );
t.reset();
bvh.buildFlag = BVH::FULLSPLIT;
bvh.Build( triangles, verts / 3 );
bvh.Optimize( 1000000 );
bvh.MergeLeafs();
printf( "done (%.2fs). New: %i nodes, SAH=%.2f\n", t.elapsed(), bvh.NodeCount( BVH::WALD_32BYTE ), bvh.SAHCost() );
bvh.Convert( BVH::WALD_32BYTE, BVH::ALT_SOA );
for (int i = 0; i < N; i += 2) bvh.Intersect( rays[i], BVH::ALT_SOA ); // re-warm
printf( "- CPU, coherent, 2-way optimized, ST: " );
t.reset();
for (int pass = 0; pass < 3; pass++)
for (int i = 0; i < N; i++) bvh.Intersect( rays[i], BVH::ALT_SOA );
float traceTimeOpt = t.elapsed() / 3.0f;
mrays = (float)N / traceTimeOpt;
printf( "%8.1fms for %6.2fM rays => %6.2fMRay/s\n", traceTimeOpt * 1000, (float)N * 1e-6f, mrays * 1e-6f );
}

#endif

Expand Down

0 comments on commit e0384cb

Please sign in to comment.