Skip to content

Commit

Permalink
Further improvements to aos traversal scheme.
Browse files Browse the repository at this point in the history
  • Loading branch information
jbikker committed Nov 7, 2024
1 parent bd7b54e commit 0baa4ea
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 10 deletions.
33 changes: 26 additions & 7 deletions tiny_bvh.h
Original file line number Diff line number Diff line change
Expand Up @@ -915,7 +915,25 @@ int BVH::Intersect_AltSoA( Ray& ray ) const
steps++;
if (node->isLeaf())
{
for (unsigned int i = 0; i < node->triCount; i++) IntersectTri( ray, triIdx[node->firstTri + i] );
for (unsigned int i = 0; i < node->triCount; i++)
{
const unsigned int tidx = triIdx[node->firstTri + i], vertIdx = tidx * 3;
const bvhvec3 edge1 = verts[vertIdx + 1] - verts[vertIdx];
const bvhvec3 edge2 = verts[vertIdx + 2] - verts[vertIdx];
const bvhvec3 h = cross( ray.D, edge2 );
const float a = dot( edge1, h );
if (fabs( a ) < 0.0000001f) continue; // ray parallel to triangle
const float f = 1 / a;
const bvhvec3 s = ray.O - bvhvec3( verts[vertIdx] );
const float u = f * dot( s, h );
if (u < 0 || u > 1) continue;
const bvhvec3 q = cross( s, edge1 );
const float v = f * dot( ray.D, q );
if (v < 0 || u + v > 1) continue;
const float t = f * dot( edge2, q );
if (t < 0 || t > ray.hit.t) continue;
ray.hit.t = t, ray.hit.u = u, ray.hit.v = v, ray.hit.prim = tidx;
}
if (stackPtr == 0) break; else node = stack[--stackPtr];
continue;
}
Expand All @@ -938,12 +956,13 @@ int BVH::Intersect_AltSoA( Ray& ray ) const
x4 = _mm_shuffle_ps( t0, t2, _MM_SHUFFLE( 1, 0, 1, 0 ) );
y4 = _mm_shuffle_ps( t0, t2, _MM_SHUFFLE( 3, 2, 3, 2 ) );
z4 = _mm_shuffle_ps( t1, t3, _MM_SHUFFLE( 1, 0, 1, 0 ) );
const __m128 min4 = _mm_max_ps( _mm_max_ps( x4, y4 ), z4 );
const __m128 max4 = _mm_min_ps( _mm_min_ps( x4, y4 ), z4 );
const float tmina = LANE( min4, 0 ), tmaxa = LANE( max4, 1 );
const float tminb = LANE( min4, 2 ), tmaxb = LANE( max4, 3 );
float dist1 = (tmaxa >= tmina && tmina < ray.hit.t && tmaxa >= 0) ? tmina : 1e30f;
float dist2 = (tmaxb >= tminb && tminb < ray.hit.t && tmaxb >= 0) ? tminb : 1e30f;
const __m128 min4 = _mm_max_ps( _mm_max_ps( _mm_max_ps( x4, y4 ), z4 ), _mm_setzero_ps() );
const __m128 max4 = _mm_min_ps( _mm_min_ps( _mm_min_ps( x4, y4 ), z4 ), _mm_set1_ps( ray.hit.t ) );
// TODO: use a shuffle here to do the comparison / select with SSE, then extract dist1 and dist2.
const float tmina_0 = LANE( min4, 0 ), tmaxa_1 = LANE( max4, 1 );
const float tminb_2 = LANE( min4, 2 ), tmaxb_3 = LANE( max4, 3 );
float dist1 = tmaxa_1 >= tmina_0 ? tmina_0 : 1e30f;
float dist2 = tmaxb_3 >= tminb_2 ? tminb_2 : 1e30f;
unsigned int lidx = node->left, ridx = node->right;
if (dist1 > dist2)
{
Expand Down
6 changes: 3 additions & 3 deletions tiny_bvh_speedtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@
#define SCRHEIGHT 600

// tests to perform
#define BUILD_REFERENCE
// #define BUILD_REFERENCE
#define BUILD_AVX
#define NANORT_BUILD
// #define NANORT_BUILD // disabled by default to avoid warnings.
#define TRAVERSE_2WAY_ST
#define TRAVERSE_ALT2WAY_ST
#define TRAVERSE_SOA2WAY_ST
#define TRAVERSE_2WAY_MT
#define TRAVERSE_2WAY_MT_PACKET
#define TRAVERSE_2WAY_MT_DIVERGENT
#define NANORT_TRAVERSE
// #define NANORT_TRAVERSE
// #define EMBREE_BUILD // win64-only for now.
// #define EMBREE_TRAVERSE // win64-only for now.

Expand Down

0 comments on commit 0baa4ea

Please sign in to comment.