Skip to content

Commit

Permalink
Save/load for cwbvh, proper path state in wavefront.
Browse files Browse the repository at this point in the history
  • Loading branch information
jbikker committed Dec 22, 2024
1 parent 8feb5c2 commit 4e51514
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 67 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ Apart from the default BVH layout (simply named ````BVH````), several other layo

A BVH in the ````BVH```` format may be _refitted_, in case the triangles moved, using ````BVH::Refit````. Refitting is substantially faster than rebuilding and works well if the animation is subtle. Refitting does not work if polygon counts change.

New in version 1.1.3: 'Self-contained' formats may be serialized and de-serialized via ````::Save```` and ````::Load````. Currently this is supported for ````BVH8_CWBVH````, which stores vertex data in a custom format and thus does not rely on the input vertices for traversal.

# How To Use
The library ````tiny_bvh.h```` is designed to be easy to use. Please have a look at tiny_bvh_minimal.cpp for an example. A Visual Studio 'solution' (.sln/.vcxproj) is included, as well as a CMake file. That being said: The examples consists of only a single source file, which can be compiled with clang or g++, e.g.:

Expand All @@ -71,7 +73,7 @@ The **performance measurement tool** can be compiled with:

````g++ -std=c++20 -mavx -Ofast tiny_bvh_speedtest.cpp -o tiny_bvh_speedtest````

# Version 1.1.2
# Version 1.1.3

Version 1.1.0 introduced a <ins>change to the API</ins>. The single BVH class with multiple layouts has been replaced with a BVH class per layout. You can simply instantiate the desired layout; conversion (and data ownership) is then handled properly by the library. Examples:

Expand Down
100 changes: 50 additions & 50 deletions tiny_bvh.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ THE SOFTWARE.
// library version
#define TINY_BVH_VERSION_MAJOR 1
#define TINY_BVH_VERSION_MINOR 1
#define TINY_BVH_VERSION_SUB 2
#define TINY_BVH_VERSION_SUB 3

// ============================================================================
//
Expand Down Expand Up @@ -907,7 +907,7 @@ class BVH8_CWBVH : public BVHBase
uint32_t allocatedBlocks = 0; // node data is stored in blocks of 16 byte.
uint32_t usedBlocks = 0; // actually used blocks.
BVH8 bvh8; // BVH8_CWBVH is created from BVH8 and uses its data.
bool ownBVH8 = true; // False when ConvertFrom receives an external bvh8.
bool ownBVH8 = true; // false when ConvertFrom receives an external bvh8.
};

// BLASInstance: A TLAS is built over BLAS instances, where a single BLAS can be
Expand Down Expand Up @@ -2176,18 +2176,18 @@ void BVH_Verbose::MergeLeafs()
// BVH_GPU implementation
// ----------------------------------------------------------------------------

BVH_GPU::~BVH_GPU()
BVH_GPU::~BVH_GPU()
{
if (!ownBVH) bvh = BVH(); // clear out pointers we don't own.
AlignedFree( bvhNode );
AlignedFree( bvhNode );
}

void BVH_GPU::Build( const bvhvec4* vertices, const uint32_t primCount )
{
Build( bvhvec4slice( vertices, primCount * 3, sizeof( bvhvec4 ) ) );
void BVH_GPU::Build( const bvhvec4* vertices, const uint32_t primCount )
{
Build( bvhvec4slice( vertices, primCount * 3, sizeof( bvhvec4 ) ) );
}
void BVH_GPU::Build( const bvhvec4slice& vertices )
{
void BVH_GPU::Build( const bvhvec4slice& vertices )
{
bvh.BuildDefault( vertices );
ConvertFrom( bvh );
}
Expand All @@ -2196,7 +2196,7 @@ void BVH_GPU::ConvertFrom( const BVH& original )
{
// get a copy of the original bvh
if (&original != &bvh) ownBVH = false; // bvh isn't ours; don't delete in destructor.
bvh = original;
bvh = original;
// allocate space
const uint32_t spaceNeeded = original.usedNodes;
if (allocatedNodes < spaceNeeded)
Expand Down Expand Up @@ -2285,18 +2285,18 @@ int32_t BVH_GPU::Intersect( Ray& ray ) const
// BVH_SoA implementation
// ----------------------------------------------------------------------------

BVH_SoA::~BVH_SoA()
BVH_SoA::~BVH_SoA()
{
if (!ownBVH) bvh = BVH(); // clear out pointers we don't own.
AlignedFree( bvhNode );
AlignedFree( bvhNode );
}

void BVH_SoA::Build( const bvhvec4* vertices, const uint32_t primCount )
{
Build( bvhvec4slice( vertices, primCount * 3, sizeof( bvhvec4 ) ) );
void BVH_SoA::Build( const bvhvec4* vertices, const uint32_t primCount )
{
Build( bvhvec4slice( vertices, primCount * 3, sizeof( bvhvec4 ) ) );
}
void BVH_SoA::Build( const bvhvec4slice& vertices )
{
void BVH_SoA::Build( const bvhvec4slice& vertices )
{
bvh.context = context; // properly propagate context to fix issue #66.
bvh.BuildDefault( vertices );
ConvertFrom( bvh );
Expand All @@ -2306,7 +2306,7 @@ void BVH_SoA::ConvertFrom( const BVH& original )
{
// get a copy of the original bvh
if (&original != &bvh) ownBVH = false; // bvh isn't ours; don't delete in destructor.
bvh = original;
bvh = original;
// allocate space
const uint32_t spaceNeeded = bvh.usedNodes;
if (allocatedNodes < spaceNeeded)
Expand Down Expand Up @@ -2355,15 +2355,15 @@ void BVH_SoA::ConvertFrom( const BVH& original )
// BVH4 implementation
// ----------------------------------------------------------------------------

BVH4::~BVH4()
BVH4::~BVH4()
{
if (!ownBVH) bvh = BVH(); // clear out pointers we don't own.
AlignedFree( bvh4Node );
AlignedFree( bvh4Node );
}

void BVH4::Build( const bvhvec4* vertices, const uint32_t primCount )
{
Build( bvhvec4slice( vertices, primCount * 3, sizeof( bvhvec4 ) ) );
Build( bvhvec4slice( vertices, primCount * 3, sizeof( bvhvec4 ) ) );
}
void BVH4::Build( const bvhvec4slice& vertices )
{
Expand All @@ -2376,7 +2376,7 @@ void BVH4::ConvertFrom( const BVH& original )
{
// get a copy of the original bvh
if (&original != &bvh) ownBVH = false; // bvh isn't ours; don't delete in destructor.
bvh = original;
bvh = original;
// allocate space
const uint32_t spaceNeeded = original.usedNodes;
if (allocatedNodes < spaceNeeded)
Expand Down Expand Up @@ -2458,19 +2458,19 @@ int32_t BVH4::Intersect( Ray& ray ) const
// BVH4_CPU implementation
// ----------------------------------------------------------------------------

BVH4_CPU::~BVH4_CPU()
BVH4_CPU::~BVH4_CPU()
{
if (!ownBVH4) bvh4 = BVH4(); // clear out pointers we don't own.
AlignedFree( bvh4Node );
AlignedFree( bvh4Node );
AlignedFree( bvh4Tris );
}

void BVH4_CPU::Build( const bvhvec4* vertices, const uint32_t primCount )
void BVH4_CPU::Build( const bvhvec4* vertices, const uint32_t primCount )
{
Build( bvhvec4slice( vertices, primCount * 3, sizeof( bvhvec4 ) ) );
Build( bvhvec4slice( vertices, primCount * 3, sizeof( bvhvec4 ) ) );
}
void BVH4_CPU::Build( const bvhvec4slice& vertices )
{
void BVH4_CPU::Build( const bvhvec4slice& vertices )
{
bvh4.context = context; // properly propagate context to fix issue #66.
bvh4.Build( vertices );
ConvertFrom( bvh4 );
Expand All @@ -2480,7 +2480,7 @@ void BVH4_CPU::ConvertFrom( const BVH4& original )
{
// get a copy of the original bvh4
if (&original != &bvh4) ownBVH4 = false; // bvh isn't ours; don't delete in destructor.
bvh4 = original;
bvh4 = original;
// Convert a 4-wide BVH to a format suitable for CPU traversal.
// See Faster Incoherent Ray Traversal Using 8-Wide AVX InstructionsLayout,
// Atilla T. Áfra, 2013.
Expand Down Expand Up @@ -2564,18 +2564,18 @@ void BVH4_CPU::ConvertFrom( const BVH4& original )
// BVH4_GPU implementation
// ----------------------------------------------------------------------------

BVH4_GPU::~BVH4_GPU()
BVH4_GPU::~BVH4_GPU()
{
if (!ownBVH4) bvh4 = BVH4(); // clear out pointers we don't own.
AlignedFree( bvh4Data );
AlignedFree( bvh4Data );
}

void BVH4_GPU::Build( const bvhvec4* vertices, const uint32_t primCount )
{
Build( bvhvec4slice( vertices, primCount * 3, sizeof( bvhvec4 ) ) );
void BVH4_GPU::Build( const bvhvec4* vertices, const uint32_t primCount )
{
Build( bvhvec4slice( vertices, primCount * 3, sizeof( bvhvec4 ) ) );
}
void BVH4_GPU::Build( const bvhvec4slice& vertices )
{
void BVH4_GPU::Build( const bvhvec4slice& vertices )
{
bvh4.context = context; // properly propagate context to fix issue #66.
bvh4.Build( vertices );
ConvertFrom( bvh4 );
Expand Down Expand Up @@ -2816,18 +2816,18 @@ int32_t BVH4_GPU::Intersect( Ray& ray ) const
// BVH8 implementation
// ----------------------------------------------------------------------------

BVH8::~BVH8()
BVH8::~BVH8()
{
if (!ownBVH) bvh = BVH(); // clear out pointers we don't own.
AlignedFree( bvh8Node );
}

void BVH8::Build( const bvhvec4* vertices, const uint32_t primCount )
{
Build( bvhvec4slice( vertices, primCount * 3, sizeof( bvhvec4 ) ) );
void BVH8::Build( const bvhvec4* vertices, const uint32_t primCount )
{
Build( bvhvec4slice( vertices, primCount * 3, sizeof( bvhvec4 ) ) );
}
void BVH8::Build( const bvhvec4slice& vertices )
{
void BVH8::Build( const bvhvec4slice& vertices )
{
bvh.context = context; // properly propagate context to fix issue #66.
bvh.BuildDefault( vertices );
ConvertFrom( bvh );
Expand All @@ -2837,7 +2837,7 @@ void BVH8::ConvertFrom( const BVH& original )
{
// get a copy of the original
if (&original != &bvh) ownBVH = false; // bvh isn't ours; don't delete in destructor.
bvh = original;
bvh = original;
// allocate space
// Note: The safe upper bound here is usedNodes when converting an existing
// BVH2, but we need triCount * 2 to be safe in later conversions, e.g. to
Expand Down Expand Up @@ -2963,7 +2963,7 @@ int32_t BVH8::Intersect( Ray& ray ) const
// BVH8_CWBVH implementation
// ----------------------------------------------------------------------------

BVH8_CWBVH::~BVH8_CWBVH()
BVH8_CWBVH::~BVH8_CWBVH()
{
if (!ownBVH8) bvh8 = BVH8(); // clear out pointers we don't own.
AlignedFree( bvh8Data );
Expand Down Expand Up @@ -2994,12 +2994,12 @@ bool BVH8_CWBVH::Load( const char* fileName )
return true;
}

void BVH8_CWBVH::Build( const bvhvec4* vertices, const uint32_t primCount )
{
Build( bvhvec4slice( vertices, primCount * 3, sizeof( bvhvec4 ) ) );
void BVH8_CWBVH::Build( const bvhvec4* vertices, const uint32_t primCount )
{
Build( bvhvec4slice( vertices, primCount * 3, sizeof( bvhvec4 ) ) );
}
void BVH8_CWBVH::Build( const bvhvec4slice& vertices )
{
void BVH8_CWBVH::Build( const bvhvec4slice& vertices )
{
bvh8.context = context; // properly propagate context to fix issue #66.
bvh8.Build( vertices );
ConvertFrom( bvh8 );
Expand All @@ -3009,7 +3009,7 @@ void BVH8_CWBVH::ConvertFrom( BVH8& original )
{
// get a copy of the original bvh8
if (&original != &bvh8) ownBVH8 = false; // bvh isn't ours; don't delete in destructor.
bvh8 = original;
bvh8 = original;
// Convert a BVH8 to the format specified in: "Efficient Incoherent Ray
// Traversal on GPUs Through Compressed Wide BVHs", Ylitie et al. 2017.
// Adapted from code by "AlanWBFT".
Expand Down
4 changes: 2 additions & 2 deletions tiny_bvh_gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ static int triCount = 0, frameIdx = 0, spp = 0;
static Kernel* init, * clear, * generate, * extend, * shade;
static Kernel* updateCounters1, * updateCounters2, * traceShadows, * finalize;
static Buffer* pixels, * accumulator, * raysIn, * raysOut, * connections, * triData;
static Buffer* cwbvhNodes = 0, * cwbvhTris = 0, *noise = 0;
static Buffer* cwbvhNodes = 0, * cwbvhTris = 0, * noise = 0;
static size_t computeUnits;
static uint32_t* blueNoise;

Expand Down Expand Up @@ -60,7 +60,7 @@ void AddQuad( const bvhvec3 pos, const float w, const float d, int c )
// Blue noise from file
void LoadBlueNoise()
{
std::fstream s{ "blue_noise_128x128x8_2d.raw", s.binary | s.in };
std::fstream s{ "blue_noise_128x128x8_2d.raw", s.binary | s.in };
s.read( (char*)blueNoise, 128 * 128 * 4 );
}

Expand Down
30 changes: 16 additions & 14 deletions wavefront.cl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ struct RenderData
__global volatile int extendTasks, shadeTasks, connectTasks;
__global struct RenderData rd;


// Xor32 RNG
uint WangHash( uint s ) { s = (s ^ 61) ^ (s >> 16), s *= 9, s = s ^ (s >> 4), s *= 0x27d4eb2d; return s ^ (s >> 15); }
uint RandomUInt( uint* seed ) { *seed ^= *seed << 13, * seed ^= *seed >> 17, * seed ^= *seed << 5; return *seed; }
Expand Down Expand Up @@ -63,10 +62,12 @@ float3 CosWeightedDiffReflection( const float3 N, uint* seed )
}

// PathState: path throughput, current extension ray, pixel index
#define PATH_LAST_SPECULAR 1
#define PATH_VIA_DIFFUSE 2
struct PathState
{
float4 T; // xyz = rgb, postponed pdf in w
float4 O; // pixel index and path depth in O.w
float4 T; // xyz = rgb, postponed MIS pdf in w
float4 O; // O.w: 24-bit pixel index, 4-bit path depth, 4-bit path flags
float4 D; // t in D.w
float4 hit;
};
Expand Down Expand Up @@ -117,8 +118,8 @@ void kernel Generate( global struct PathState* raysOut, uint frameSeed )
const float u = ((float)x + RandomFloat( &seed )) / (float)get_global_size( 0 );
const float v = ((float)y + RandomFloat( &seed )) / (float)get_global_size( 1 );
const float4 P = rd.p0 + u * (rd.p1 - rd.p0) + v * (rd.p2 - rd.p0);
raysOut[id].T = (float4)(1, 1, 1, -1 /* pdf, or -1 for specular vertex */);
raysOut[id].O = (float4)(rd.eye.xyz, as_float( id << 4 /* low bits: depth */ ));
raysOut[id].T = (float4)(1, 1, 1, 1 );
raysOut[id].O = (float4)(rd.eye.xyz, as_float( (id << 8) + PATH_LAST_SPECULAR ));
raysOut[id].D = (float4)(fast_normalize( P.xyz - rd.eye.xyz ), 1e30f);
raysOut[id].hit = (float4)(1e30f, 0, 0, as_float( 0 ));
}
Expand Down Expand Up @@ -161,13 +162,14 @@ void kernel Shade( global float4* accumulator,
const int pathId = atomic_dec( &shadeTasks ) - 1;
if (pathId < 0) break;
// fetch path data
float4 T4 = raysIn[pathId].T; // xyz = rgb, postponed pdf in w
float4 O4 = raysIn[pathId].O; // pixel index in O.w
float4 D4 = raysIn[pathId].D; // t in D.w
float4 T4 = raysIn[pathId].T; // xyz = rgb, postponed pdf in w
float4 O4 = raysIn[pathId].O; // pixel index in O.w
float4 D4 = raysIn[pathId].D; // t in D.w
float4 hit = raysIn[pathId].hit; // dist, u, v, prim
// prepare for shading
uint depth = as_uint( O4.w ) & 15;
uint pixelIdx = as_uint( O4.w ) >> 4;
uint pathState = as_uint( O4.w );
uint pixelIdx = pathState >> 8;
uint depth = (pathState >> 4) & 15;
uint seed = WangHash( as_uint( O4.w ) + rd.frameIdx * 17117 );
float3 T = T4.xyz;
float t = hit.x;
Expand All @@ -186,7 +188,7 @@ void kernel Shade( global float4* accumulator,
float3 lightColor = (float3)(20);
if (mat == 1 /* light source */)
{
if (T4.w == -1) accumulator[pixelIdx] += (float4)(T * lightColor, 1);
if (pathState & PATH_LAST_SPECULAR) accumulator[pixelIdx] += (float4)(T * lightColor, 1);
continue;
}
float3 vert0 = v0.xyz, vert1 = verts[vertIdx + 1].xyz, vert2 = verts[vertIdx + 2].xyz;
Expand All @@ -201,7 +203,7 @@ void kernel Shade( global float4* accumulator,
uint newRayIdx = atomic_inc( &extendTasks );
float3 R = Reflect( D, N );
raysOut[newRayIdx].T = (float4)(T * diff, -1 /* mark vertex as specular */);
raysOut[newRayIdx].O = (float4)(I + R * EPSILON, as_float( (pixelIdx << 4) + depth + 1 ));
raysOut[newRayIdx].O = (float4)(I + R * EPSILON, as_float( (pixelIdx << 8) + ((depth + 1) << 4) + PATH_LAST_SPECULAR ));
raysOut[newRayIdx].D = (float4)(R, 1e30f);
continue;
}
Expand All @@ -221,14 +223,14 @@ void kernel Shade( global float4* accumulator,
shadowOut[newShadowIdx].D = (float4)(L, dist - 2 * EPSILON);
}
// indirect illumination: diffuse bounce
if (depth < 3)
if (depth < 3 && (pathState & PATH_VIA_DIFFUSE) == 0 )
{
uint newRayIdx = atomic_inc( &extendTasks );
float3 R = CosWeightedDiffReflection( N, &seed );
float PDF = dot( N, R ) * INVPI;
T *= dot( N, R ) * BRDF * native_recip( PDF );
raysOut[newRayIdx].T = (float4)(T, 1);
raysOut[newRayIdx].O = (float4)(I + R * EPSILON, as_float( (pixelIdx << 4) + depth + 1 ));
raysOut[newRayIdx].O = (float4)(I + R * EPSILON, as_float( (pixelIdx << 8) + ((depth + 1) << 4) + PATH_VIA_DIFFUSE ));
raysOut[newRayIdx].D = (float4)(R, 1e30f);
}
}
Expand Down

0 comments on commit 4e51514

Please sign in to comment.