From e0384cbcb7a69c9f301acf0e9f5852dbe7a164f3 Mon Sep 17 00:00:00 2001 From: Jacco Bikker Date: Fri, 15 Nov 2024 17:08:57 +0100 Subject: [PATCH] Small bug fix in optimizer. --- tiny_bvh.h | 9 +++++++-- tiny_bvh_speedtest.cpp | 42 ++++++++++++++++++++++++++++-------------- 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/tiny_bvh.h b/tiny_bvh.h index 3002104..7926eae 100644 --- a/tiny_bvh.h +++ b/tiny_bvh.h @@ -91,7 +91,7 @@ THE SOFTWARE. // library version #define TINY_BVH_VERSION_MAJOR 0 #define TINY_BVH_VERSION_MINOR 8 -#define TINY_BVH_VERSION_SUB 2 +#define TINY_BVH_VERSION_SUB 3 // ============================================================================ // @@ -356,6 +356,10 @@ class BVH BASIC_BVH8, // Input for CWBVH. Obtained by converting WALD_32BYTE. CWBVH // Fastest GPU rendering. Obtained by converting BASIC_BVH8. }; + enum BuildFlags { + NONE = 0, // Default building behavior (binned, SAH-driven). + FULLSPLIT = 1 // Split as far as possible, even when SAH doesn't agree. + }; struct BVHNode { // 'Traditional' 32-byte BVH node layout, as proposed by Ingo Wald. @@ -537,6 +541,7 @@ class BVH bool refittable = true; // Refits are safe only if the tree has no spatial splits. bool frag_min_flipped = false; // AVX builders flip aabb min. bool may_have_holes = false; // Threaded builds and MergeLeafs produce BVHs with unused nodes. + BuildFlags buildFlag = NONE; // Hint to the builder. // keep track of allocated buffer size to avoid // repeated allocation during layout conversion. unsigned allocatedBVHNodes = 0; @@ -1872,7 +1877,7 @@ unsigned BVH::FindBestNewPosition( const unsigned Lid ) void BVH::ReinsertNodeVerbose( const unsigned Lid, const unsigned Nid, const unsigned origin ) { unsigned Xbest = FindBestNewPosition( Lid ); - if (verbose[Xbest].parent == 0) Xbest = origin; + if (Xbest == 0 || verbose[Xbest].parent == 0) Xbest = origin; const unsigned X1 = verbose[Xbest].parent; BVHNodeVerbose& N = verbose[Nid]; N.left = Xbest, N.right = Lid; diff --git a/tiny_bvh_speedtest.cpp b/tiny_bvh_speedtest.cpp index 15838a4..e94235d 100644 --- a/tiny_bvh_speedtest.cpp +++ b/tiny_bvh_speedtest.cpp @@ -25,7 +25,7 @@ #define BUILD_REFERENCE #define BUILD_AVX #define BUILD_NEON -#define BUILD_SBVH +// #define BUILD_SBVH #define TRAVERSE_2WAY_ST #define TRAVERSE_ALT2WAY_ST #define TRAVERSE_SOA2WAY_ST @@ -381,19 +381,33 @@ int main() // trace all rays three times to estimate average performance // - single core version, alternative bvh layout - printf( "Optimizing BVH... " ); - t.reset(); - bvh.Optimize( 1000000 ); - bvh.Convert( BVH::WALD_32BYTE, BVH::ALT_SOA ); - printf( "done (%.2fs). New: %i nodes, SAH=%.2f\n", t.elapsed(), bvh.NodeCount( BVH::WALD_32BYTE ), bvh.SAHCost() ); - for (int i = 0; i < N; i += 2) bvh.Intersect( rays[i], BVH::ALT_SOA ); // re-warm - printf( "- CPU, coherent, 2-way optimized, ST: " ); - t.reset(); - for (int pass = 0; pass < 3; pass++) - for (int i = 0; i < N; i++) bvh.Intersect( rays[i], BVH::ALT_SOA ); - float traceTimeOpt = t.elapsed() / 3.0f; - mrays = (float)N / traceTimeOpt; - printf( "%8.1fms for %6.2fM rays => %6.2fMRay/s\n", traceTimeOpt * 1000, (float)N * 1e-6f, mrays * 1e-6f ); + printf( "Optimizing BVH, regular... " ); + if (bvh.refittable) + { + printf( "Currently can't optimize SBVH.\n" ); + } + else + { + t.reset(); + bvh.Optimize( 1000000 ); + printf( "done (%.2fs). New: %i nodes, SAH=%.2f\n", t.elapsed(), bvh.NodeCount( BVH::WALD_32BYTE ), bvh.SAHCost() ); + printf( "Optimizing BVH, fullsplit... " ); + t.reset(); + bvh.buildFlag = BVH::FULLSPLIT; + bvh.Build( triangles, verts / 3 ); + bvh.Optimize( 1000000 ); + bvh.MergeLeafs(); + printf( "done (%.2fs). New: %i nodes, SAH=%.2f\n", t.elapsed(), bvh.NodeCount( BVH::WALD_32BYTE ), bvh.SAHCost() ); + bvh.Convert( BVH::WALD_32BYTE, BVH::ALT_SOA ); + for (int i = 0; i < N; i += 2) bvh.Intersect( rays[i], BVH::ALT_SOA ); // re-warm + printf( "- CPU, coherent, 2-way optimized, ST: " ); + t.reset(); + for (int pass = 0; pass < 3; pass++) + for (int i = 0; i < N; i++) bvh.Intersect( rays[i], BVH::ALT_SOA ); + float traceTimeOpt = t.elapsed() / 3.0f; + mrays = (float)N / traceTimeOpt; + printf( "%8.1fms for %6.2fM rays => %6.2fMRay/s\n", traceTimeOpt * 1000, (float)N * 1e-6f, mrays * 1e-6f ); + } #endif