Skip to content

Commit

Permalink
Adding gpu renderer, in progress.
Browse files Browse the repository at this point in the history
  • Loading branch information
jbikker committed Dec 17, 2024
1 parent a3010b4 commit 4bdf197
Show file tree
Hide file tree
Showing 10 changed files with 513 additions and 15 deletions.
125 changes: 125 additions & 0 deletions tiny_bvh_gpu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// This example shows how to build a basic GPU path tracer using
// tinybvh. TinyOCL is used to render on the GPU using OpenCL.

#define FENSTER_APP_IMPLEMENTATION
#define SCRWIDTH 1280
#define SCRHEIGHT 800
#include "external/fenster.h" // https://github.com/zserge/fenster

// This application uses tinybvh - And this file will include the implementation.
#define TINYBVH_IMPLEMENTATION
#include "tiny_bvh.h"
using namespace tinybvh;

// This application uses tinyocl - And this file will include the implementation.
#define TINY_OCL_IMPLEMENTATION
#include "tiny_ocl.h"

// Other includes
#include <fstream>

// Application variables

static BVH4_GPU bvh;
static bvhvec4* tris = 0;
static int triCount = 0, frameIdx = 0, spp = 0;
static Kernel* reset, * generate, * extend, * shade, * traceShadows, * finalize;
static Buffer* pixels, * accumulator, * raysIn, * raysOut, * connections;

// View pyramid for a pinhole camera
struct RenderData
{
bvhvec4 eye = bvhvec4( 0, 30, 0, 0 );
bvhvec4 view = bvhvec4( -1, 0, 0, 0 );
bvhvec4 C, p0, p1, p2;
uint32_t frameIdx, dummy1, dummy2, dummy3;
} rd;

// Scene management - Append a file, with optional position, scale and color override, tinyfied
void AddMesh( const char* file, float scale = 1, bvhvec3 pos = {}, int c = 0, int N = 0 )
{
std::fstream s{ file, s.binary | s.in }; s.read( (char*)&N, 4 );
bvhvec4* data = (bvhvec4*)tinybvh::malloc64( (N + triCount) * 48 );
if (tris) memcpy( data, tris, triCount * 48 ), tinybvh::free64( tris );
tris = data, s.read( (char*)tris + triCount * 48, N * 48 ), triCount += N;
for (int* b = (int*)tris + (triCount - N) * 12, i = 0; i < N * 3; i++)
*(bvhvec3*)b = *(bvhvec3*)b * scale + pos, b[3] = c ? c : b[3], b += 4;
}

// Application init
void Init()
{
// create OpenCL kernels
reset = new Kernel( "wavefront.cl", "ResetCounters" );
generate = new Kernel( "wavefront.cl", "Generate" );
extend = new Kernel( "wavefront.cl", "Extend" );
shade = new Kernel( "wavefront.cl", "Shade" );
traceShadows = new Kernel( "wavefront.cl", "Connect" );
finalize = new Kernel( "wavefront.cl", "Finalize" );
// create OpenCL buffers
int N = SCRWIDTH * SCRHEIGHT;
pixels = new Buffer( N * sizeof( uint32_t ) );
accumulator = new Buffer( N * sizeof( bvhvec4 ) );
raysIn = new Buffer( N * sizeof( bvhvec4 ) * 4 );
raysOut = new Buffer( N * sizeof( bvhvec4 ) * 4 );
connections = new Buffer( N * sizeof( bvhvec4 ) * 3 );
// set kernel arguments
reset->SetArguments( N );
// load raw vertex data
AddMesh( "./testdata/cryteksponza.bin", 1, bvhvec3( 0 ), 0xffffff );
AddMesh( "./testdata/lucy.bin", 1.1f, bvhvec3( -2, 4.1f, -3 ), 0xaaaaff );
// build bvh (here: 'compressed wide bvh', for efficient GPU rendering)
bvh.Build( tris, triCount );
// load camera position / direction from file
std::fstream t = std::fstream{ "camera_gpu.bin", t.binary | t.in };
if (!t.is_open()) return;
t.read( (char*)&rd, sizeof( rd ) );
}

// Keyboard handling
bool UpdateCamera( float delta_time_s, fenster& f )
{
bvhvec3 right = normalize( cross( bvhvec3( 0, 1, 0 ), rd.view ) ), up = 0.8f * cross( rd.view, right );
// get camera controls.
bool moved = false;
if (f.keys['A']) rd.eye += right * -1.0f * delta_time_s * 10, moved = true;
if (f.keys['D']) rd.eye += right * delta_time_s * 10, moved = true;
if (f.keys['W']) rd.eye += rd.view * delta_time_s * 10, moved = true;
if (f.keys['S']) rd.eye += rd.view * -1.0f * delta_time_s * 10, moved = true;
if (f.keys['R']) rd.eye += up * delta_time_s * 20, moved = true;
if (f.keys['F']) rd.eye += up * -1.0f * delta_time_s * 20, moved = true;
if (f.keys[20]) rd.view = normalize( rd.view + right * -1.0f * delta_time_s ), moved = true;
if (f.keys[19]) rd.view = normalize( rd.view + right * delta_time_s ), moved = true;
if (f.keys[17]) rd.view = normalize( rd.view + up * -1.0f * delta_time_s ), moved = true;
if (f.keys[18]) rd.view = normalize( rd.view + up * delta_time_s ), moved = true;
// recalculate right, up
right = normalize( cross( bvhvec3( 0, 1, 0 ), rd.view ) ), up = 0.8f * cross( rd.view, right );
bvhvec3 C = rd.eye + 1.2f * rd.view;
rd.p0 = C - right + up, rd.p1 = C + right + up, rd.p2 = C - right - up;
return moved;
}

// Application Tick
void Tick( float delta_time_s, fenster& f, uint32_t* buf )
{
// handle user input and update camera
if (UpdateCamera( delta_time_s, f ) || frameIdx++ == 0)
{
// memset( accumulator, 0, SCRWIDTH * SCRHEIGHT * sizeof( bvhvec3 ) );
spp = 1;
}
// render on the GPU
// ..
// print frame time / rate in window title
char title[50];
sprintf( title, "tiny_bvh %.2f s %.2f Hz", delta_time_s, 1.0f / delta_time_s );
fenster_update_title( &f, title );
}

// Application Shutdown
void Shutdown()
{
// save camera position / direction to file
std::fstream s = std::fstream{ "camera.bin", s.binary | s.out };
s.write( (char*)&rd, sizeof( rd ) );
}
6 changes: 5 additions & 1 deletion tiny_bvh_speedtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,11 @@ int main()
#if defined TRAVERSE_OPTIMIZED_ST || defined TRAVERSE_4WAY_OPTIMIZED

printf( "Optimized BVH performance - Optimizing... " );
if (!bvh_verbose) bvh_verbose = new BVH_Verbose( *bvh );
if (!bvh_verbose)
{
bvh_verbose = new BVH_Verbose();
bvh_verbose->ConvertFrom( *bvh );
}
t.reset();
bvh_verbose->Optimize( 1000000 ); // optimize the raw SBVH
bvh->ConvertFrom( *bvh_verbose );
Expand Down
10 changes: 10 additions & 0 deletions tiny_bvh_test.sln
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tiny_bvh_pt", "vcproj\tiny_
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tiny_bvh_anim", "vcproj\tiny_bvh_anim.vcxproj", "{2DC14EA1-D6E9-45F1-A6CC-6E0A0BAD8A9B}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tiny_bvh_gpu", "vcproj\tiny_bvh_gpu.vcxproj", "{4B0A219D-68D0-41EA-A0C4-8EB9E6171218}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Expand Down Expand Up @@ -71,6 +73,14 @@ Global
{2DC14EA1-D6E9-45F1-A6CC-6E0A0BAD8A9B}.Release|x64.Build.0 = Release|x64
{2DC14EA1-D6E9-45F1-A6CC-6E0A0BAD8A9B}.Release|x86.ActiveCfg = Release|Win32
{2DC14EA1-D6E9-45F1-A6CC-6E0A0BAD8A9B}.Release|x86.Build.0 = Release|Win32
{4B0A219D-68D0-41EA-A0C4-8EB9E6171218}.Debug|x64.ActiveCfg = Debug|x64
{4B0A219D-68D0-41EA-A0C4-8EB9E6171218}.Debug|x64.Build.0 = Debug|x64
{4B0A219D-68D0-41EA-A0C4-8EB9E6171218}.Debug|x86.ActiveCfg = Debug|Win32
{4B0A219D-68D0-41EA-A0C4-8EB9E6171218}.Debug|x86.Build.0 = Debug|Win32
{4B0A219D-68D0-41EA-A0C4-8EB9E6171218}.Release|x64.ActiveCfg = Release|x64
{4B0A219D-68D0-41EA-A0C4-8EB9E6171218}.Release|x64.Build.0 = Release|x64
{4B0A219D-68D0-41EA-A0C4-8EB9E6171218}.Release|x86.ActiveCfg = Release|Win32
{4B0A219D-68D0-41EA-A0C4-8EB9E6171218}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
93 changes: 85 additions & 8 deletions traverse.cl
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,11 @@ inline float fmax_fmax( const float a, const float b, const float c )
//
// ============================================================================

void kernel traverse_ailalaine( global struct BVHNodeAlt* altNode, global unsigned* idx, global float4* verts, global struct Ray* rayData )
float4 traverse_ailalaine( const float3 O, const float3 D, const float3 rD, const float tmax )
{
// fetch ray
const unsigned threadId = get_global_id( 0 );
const float3 O = rayData[threadId].O.xyz;
const float3 D = rayData[threadId].D.xyz;
const float3 rD = rayData[threadId].rD.xyz;
float4 hit;
hit.x = 1e30f; // ignoring value set in ray to spare one memory transaction.
// traverse BVH
const float4 hit;
hit.x = tmax;
unsigned node = 0, stack[64], stackPtr = 0;
while (1)
{
Expand Down Expand Up @@ -169,6 +164,88 @@ void kernel traverse_ailalaine( global struct BVHNodeAlt* altNode, global unsign
rayData[threadId].hit = hit;
}

bool isoccluded_ailalaine( const float3 O, const float3 D, const float3 rD, const float tmax )
{
// traverse BVH
const float4 hit;
hit.x = tmax;
unsigned node = 0, stack[64], stackPtr = 0;
while (1)
{
// fetch the node
const float4 lmin = altNode[node].lmin, lmax = altNode[node].lmax;
const float4 rmin = altNode[node].rmin, rmax = altNode[node].rmax;
const unsigned triCount = as_uint( rmin.w );
if (triCount > 0)
{
// process leaf node
const unsigned firstTri = as_uint( rmax.w );
for (unsigned i = 0; i < triCount; i++)
{
const unsigned triIdx = idx[firstTri + i];
const float4* tri = verts + 3 * triIdx;
// triangle intersection - Möller-Trumbore
const float4 edge1 = tri[1] - tri[0], edge2 = tri[2] - tri[0];
const float3 h = cross( D, edge2.xyz );
const float a = dot( edge1.xyz, h );
if (fabs( a ) < 0.0000001f) continue;
const float f = 1 / a;
const float3 s = O - tri[0].xyz;
const float u = f * dot( s, h );
if (u < 0 || u > 1) continue;
const float3 q = cross( s, edge1.xyz );
const float v = f * dot( D, q );
if (v < 0 || u + v > 1) continue;
const float d = f * dot( edge2.xyz, q );
if (d > 0.0f && d < hit.x) return true;
}
if (stackPtr == 0) break;
node = stack[--stackPtr];
continue;
}
unsigned left = as_uint( lmin.w ), right = as_uint( lmax.w );
// child AABB intersection tests
const float3 t1a = (lmin.xyz - O) * rD, t2a = (lmax.xyz - O) * rD;
const float3 t1b = (rmin.xyz - O) * rD, t2b = (rmax.xyz - O) * rD;
const float3 minta = fmin( t1a, t2a ), maxta = fmax( t1a, t2a );
const float3 mintb = fmin( t1b, t2b ), maxtb = fmax( t1b, t2b );
const float tmina = fmax( fmax( fmax( minta.x, minta.y ), minta.z ), 0 );
const float tminb = fmax( fmax( fmax( mintb.x, mintb.y ), mintb.z ), 0 );
const float tmaxa = fmin( fmin( fmin( maxta.x, maxta.y ), maxta.z ), hit.x );
const float tmaxb = fmin( fmin( fmin( maxtb.x, maxtb.y ), maxtb.z ), hit.x );
float dist1 = tmina > tmaxa ? 1e30f : tmina;
float dist2 = tminb > tmaxb ? 1e30f : tminb;
// traverse nearest child first
if (dist1 > dist2)
{
float h = dist1; dist1 = dist2; dist2 = h;
unsigned t = left; left = right; right = t;
}
if (dist1 == 1e30f)
{
if (stackPtr == 0) break; else node = stack[--stackPtr];
}
else
{
node = left;
if (dist2 != 1e30f) stack[stackPtr++] = right;
}
}
// no hit found
return false;
}

void kernel batch_ailalaine( global struct BVHNodeAlt* altNode, global unsigned* idx, global float4* verts, global struct Ray* rayData )
{
// fetch ray
const unsigned threadId = get_global_id( 0 );
const float3 O = rayData[threadId].O.xyz;
const float3 D = rayData[threadId].D.xyz;
const float3 rD = rayData[threadId].rD.xyz;
float4 hit = traverse_ailalaine( O, D, rD, 1e30f );
rayData[threadId].hit = hit;
}

// ============================================================================
//
// T R A V E R S E _ G P U 4 W A Y
Expand Down
6 changes: 4 additions & 2 deletions vcproj/tiny_bvh_anim.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@
<PreprocessorDefinitions>_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<LanguageStandard>stdcpp20</LanguageStandard>
<AdditionalIncludeDirectories>..\external\embree\include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>
</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
Expand All @@ -137,7 +138,8 @@
<PreprocessorDefinitions>NDEBUG;_WINDOWS;%(PreprocessorDefinitions); _CRT_SECURE_NO_WARNINGS</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<LanguageStandard>stdcpp20</LanguageStandard>
<AdditionalIncludeDirectories>..\external\embree\include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>
</AdditionalIncludeDirectories>
<Optimization>Full</Optimization>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
Expand Down
6 changes: 4 additions & 2 deletions vcproj/tiny_bvh_fenster.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@
<PreprocessorDefinitions>_DEBUG;_WINDOWS;%(PreprocessorDefinitions);_CRT_SECURE_NO_WARNINGS</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<LanguageStandard>stdcpp20</LanguageStandard>
<AdditionalIncludeDirectories>..\external\embree\include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>
</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
Expand All @@ -137,7 +138,8 @@
<PreprocessorDefinitions>NDEBUG;_WINDOWS;%(PreprocessorDefinitions);_CRT_SECURE_NO_WARNINGS</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<LanguageStandard>stdcpp20</LanguageStandard>
<AdditionalIncludeDirectories>..\external\embree\include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>
</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
Expand Down
Loading

0 comments on commit 4bdf197

Please sign in to comment.