From f0a726f95d619a1aecc45980d11c96355ee34830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCnther?= Date: Fri, 10 Mar 2023 14:36:04 +0100 Subject: [PATCH 01/42] Bump version --- README.md | 6 +++--- cmake/ospray_version.cmake | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 23b64fc1b..fde558633 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ OSPRay ====== -This is release v2.11.0 of Intel® OSPRay. For changes and new features -see the [changelog](CHANGELOG.md). Visit http://www.ospray.org for more -information. +This is release v2.11.1 (devel) of Intel® OSPRay. For changes and new +features see the [changelog](CHANGELOG.md). Visit http://www.ospray.org +for more information. OSPRay Overview =============== diff --git a/cmake/ospray_version.cmake b/cmake/ospray_version.cmake index 1a1d1cda4..65a9ec3b4 100644 --- a/cmake/ospray_version.cmake +++ b/cmake/ospray_version.cmake @@ -3,7 +3,7 @@ set(OSPRAY_VERSION_MAJOR 2) set(OSPRAY_VERSION_MINOR 11) -set(OSPRAY_VERSION_PATCH 0) +set(OSPRAY_VERSION_PATCH 1) set(OSPRAY_SOVERSION 2) set(OSPRAY_VERSION_GITHASH 0) set(OSPRAY_VERSION_NOTE "") From 2aa442a4954b14c8b2226a1971d49d50a7e8ed1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCnther?= Date: Thu, 9 Mar 2023 18:49:18 +0100 Subject: [PATCH 02/42] Fix crash in PT when no lights but lightSamples > 0 --- apps/common/ospray_testing/builders/Empty.cpp | 17 +++++++++++++---- apps/ospExamples/GLFWOSPRayWindow.cpp | 6 ++++++ apps/ospTestSuite/test_fixture.cpp | 7 +++++++ apps/ospTestSuite/test_fixture.h | 6 ++++++ apps/ospTestSuite/test_geometry.cpp | 14 +++++++++++++- modules/cpu/render/pathtracer/PathTracer.ispc | 3 ++- ..._TestScenesLightSamples_testScenes_0.png.md5 | 1 + ..._TestScenesLightSamples_testScenes_1.png.md5 | 1 + ...ng_TestScenesGeometry_test_scenes_27.png.md5 | 1 + ...ng_TestScenesGeometry_test_scenes_28.png.md5 | 1 + ...ng_TestScenesGeometry_test_scenes_29.png.md5 | 1 + ..._TestScenesLightSamples_testScenes_0.png.md5 | 1 + ..._TestScenesLightSamples_testScenes_1.png.md5 | 1 + ...ng_TestScenesGeometry_test_scenes_27.png.md5 | 1 + ...ng_TestScenesGeometry_test_scenes_28.png.md5 | 1 + ...ng_TestScenesGeometry_test_scenes_29.png.md5 | 1 + 16 files changed, 57 insertions(+), 6 deletions(-) create mode 100644 test_image_data/baseline/AVX2/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_0.png.md5 create mode 100644 test_image_data/baseline/AVX2/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_1.png.md5 create mode 100644 test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_27.png.md5 create mode 100644 test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_28.png.md5 create mode 100644 test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_29.png.md5 create mode 100644 test_image_data/baseline/AVX512SKX/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_0.png.md5 create mode 100644 test_image_data/baseline/AVX512SKX/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_1.png.md5 create mode 100644 test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_27.png.md5 create mode 100644 test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_28.png.md5 create mode 100644 test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_29.png.md5 diff --git a/apps/common/ospray_testing/builders/Empty.cpp b/apps/common/ospray_testing/builders/Empty.cpp index 8b928cdf5..fe4bfb8e5 100644 --- a/apps/common/ospray_testing/builders/Empty.cpp +++ b/apps/common/ospray_testing/builders/Empty.cpp @@ -11,14 +11,17 @@ namespace testing { struct Empty : public detail::Builder { - Empty() = default; + Empty(bool plane = false) + { + addPlane = plane; + } ~Empty() override = default; void commit() override; cpp::Group buildGroup() const override; - cpp::World buildWorld(const std::vector &) const override; + cpp::World buildWorld() const override; }; // Inlined definitions //////////////////////////////////////////////////// @@ -26,7 +29,6 @@ struct Empty : public detail::Builder void Empty::commit() { Builder::commit(); - addPlane = false; } cpp::Group Empty::buildGroup() const @@ -36,14 +38,21 @@ cpp::Group Empty::buildGroup() const return group; } -cpp::World Empty::buildWorld(const std::vector &) const +cpp::World Empty::buildWorld() const { cpp::World world; + if (addPlane) { + std::vector inst; + inst.push_back(makeGroundPlane(box3f(zero, one))); + world.setParam("instance", cpp::CopiedData(inst)); + } + return world; } OSP_REGISTER_TESTING_BUILDER(Empty, empty); +OSP_REGISTER_TESTING_BUILDER(Empty(true), nolight); } // namespace testing } // namespace ospray diff --git a/apps/ospExamples/GLFWOSPRayWindow.cpp b/apps/ospExamples/GLFWOSPRayWindow.cpp index 3c0a101b4..806b49a7c 100644 --- a/apps/ospExamples/GLFWOSPRayWindow.cpp +++ b/apps/ospExamples/GLFWOSPRayWindow.cpp @@ -730,6 +730,12 @@ void GLFWOSPRayWindow::buildUI() } if (rendererType == OSPRayRendererType::PATHTRACER) { + static int lightSamples = -1; + if (ImGui::SliderInt("lightSamples", &lightSamples, -1, 32)) { + renderer->setParam("lightSamples", lightSamples); + addObjectToCommit(renderer->handle()); + } + static int maxDepth = 20; if (ImGui::SliderInt("maxPathLength", &maxDepth, 1, 64)) { renderer->setParam("maxPathLength", maxDepth); diff --git a/apps/ospTestSuite/test_fixture.cpp b/apps/ospTestSuite/test_fixture.cpp index 2e370c0f1..9d4521ac5 100644 --- a/apps/ospTestSuite/test_fixture.cpp +++ b/apps/ospTestSuite/test_fixture.cpp @@ -220,4 +220,11 @@ void FromOsprayTestingVariance::SetUp() renderer.setParam("varianceThreshold", 20.f); } +void FromOsprayTestingLightSamples::SetUp() +{ + FromOsprayTesting::SetUp(); + + renderer.setParam("lightSamples", 8); +} + } // namespace OSPRayTestScenes diff --git a/apps/ospTestSuite/test_fixture.h b/apps/ospTestSuite/test_fixture.h index db7067c43..00f2c0b68 100644 --- a/apps/ospTestSuite/test_fixture.h +++ b/apps/ospTestSuite/test_fixture.h @@ -101,4 +101,10 @@ class FromOsprayTestingVariance : public FromOsprayTesting void SetUp() override; }; +class FromOsprayTestingLightSamples : public FromOsprayTesting +{ + public: + void SetUp() override; +}; + } // namespace OSPRayTestScenes diff --git a/apps/ospTestSuite/test_geometry.cpp b/apps/ospTestSuite/test_geometry.cpp index f5d4290a2..e31bd7c97 100644 --- a/apps/ospTestSuite/test_geometry.cpp +++ b/apps/ospTestSuite/test_geometry.cpp @@ -179,7 +179,8 @@ INSTANTIATE_TEST_SUITE_P(TestScenesGeometry, "subdivision_cube", "planes", "unstructured_volume_isosurface", - "instancing"), + "instancing", + "nolight"), ::testing::Values("scivis", "pathtracer", "ao"), ::testing::Values(16))); @@ -257,4 +258,15 @@ INSTANTIATE_TEST_SUITE_P(TestScenesVariance, FromOsprayTestingVariance, ::testing::Values(std::make_tuple("cornell_box", "pathtracer", 4))); +TEST_P(FromOsprayTestingLightSamples, testScenes) +{ + PerformRenderTest(); +} + +INSTANTIATE_TEST_SUITE_P(TestScenesLightSamples, + FromOsprayTestingLightSamples, + ::testing::Combine(::testing::Values("cornell_box", "nolight"), + ::testing::Values("pathtracer"), + ::testing::Values(1))); + } // namespace OSPRayTestScenes diff --git a/modules/cpu/render/pathtracer/PathTracer.ispc b/modules/cpu/render/pathtracer/PathTracer.ispc index c130c58bd..ce921d48c 100644 --- a/modules/cpu/render/pathtracer/PathTracer.ispc +++ b/modules/cpu/render/pathtracer/PathTracer.ispc @@ -55,7 +55,8 @@ static ScreenSample PathTraceIntegrator_Li(const PathTracer *uniform self, const uniform int numLights = pathtracerData.lights ? min(MAX_LIGHTS, pathtracerData.numLights) : 0; const uniform int numLightSamples = - self->numLightSamples >= 0 ? self->numLightSamples : numLights; + self->numLightSamples >= 0 && numLights > 0 ? self->numLightSamples + : numLights; PathContext pathContext; pathContext.context = self; pathContext.world = world; diff --git a/test_image_data/baseline/AVX2/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_0.png.md5 b/test_image_data/baseline/AVX2/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_0.png.md5 new file mode 100644 index 000000000..480b060e4 --- /dev/null +++ b/test_image_data/baseline/AVX2/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_0.png.md5 @@ -0,0 +1 @@ +0b2d35e353b50e1ea34f0d0d3857fd82 diff --git a/test_image_data/baseline/AVX2/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_1.png.md5 b/test_image_data/baseline/AVX2/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_1.png.md5 new file mode 100644 index 000000000..4215dd7ac --- /dev/null +++ b/test_image_data/baseline/AVX2/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_1.png.md5 @@ -0,0 +1 @@ +bc066e888df6097b89150cd631700ca9 diff --git a/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_27.png.md5 b/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_27.png.md5 new file mode 100644 index 000000000..f9627e8dd --- /dev/null +++ b/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_27.png.md5 @@ -0,0 +1 @@ +5104654864fd04fa6ba385c8467f3810 diff --git a/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_28.png.md5 b/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_28.png.md5 new file mode 100644 index 000000000..f9627e8dd --- /dev/null +++ b/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_28.png.md5 @@ -0,0 +1 @@ +5104654864fd04fa6ba385c8467f3810 diff --git a/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_29.png.md5 b/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_29.png.md5 new file mode 100644 index 000000000..f9627e8dd --- /dev/null +++ b/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_29.png.md5 @@ -0,0 +1 @@ +5104654864fd04fa6ba385c8467f3810 diff --git a/test_image_data/baseline/AVX512SKX/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_0.png.md5 b/test_image_data/baseline/AVX512SKX/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_0.png.md5 new file mode 100644 index 000000000..480b060e4 --- /dev/null +++ b/test_image_data/baseline/AVX512SKX/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_0.png.md5 @@ -0,0 +1 @@ +0b2d35e353b50e1ea34f0d0d3857fd82 diff --git a/test_image_data/baseline/AVX512SKX/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_1.png.md5 b/test_image_data/baseline/AVX512SKX/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_1.png.md5 new file mode 100644 index 000000000..4215dd7ac --- /dev/null +++ b/test_image_data/baseline/AVX512SKX/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_1.png.md5 @@ -0,0 +1 @@ +bc066e888df6097b89150cd631700ca9 diff --git a/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_27.png.md5 b/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_27.png.md5 new file mode 100644 index 000000000..f9627e8dd --- /dev/null +++ b/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_27.png.md5 @@ -0,0 +1 @@ +5104654864fd04fa6ba385c8467f3810 diff --git a/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_28.png.md5 b/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_28.png.md5 new file mode 100644 index 000000000..f9627e8dd --- /dev/null +++ b/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_28.png.md5 @@ -0,0 +1 @@ +5104654864fd04fa6ba385c8467f3810 diff --git a/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_29.png.md5 b/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_29.png.md5 new file mode 100644 index 000000000..f9627e8dd --- /dev/null +++ b/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_29.png.md5 @@ -0,0 +1 @@ +5104654864fd04fa6ba385c8467f3810 From d551358d4e42d3a564eb9476dfb03091f4030fd8 Mon Sep 17 00:00:00 2001 From: Miroslaw Pawlowski Date: Fri, 3 Feb 2023 17:57:05 +0100 Subject: [PATCH 03/42] Kernel size optimization with specialization constants --- .github/workflows/ci.linux.gpu.yml | 4 +- cmake/compiler/dpcpp.cmake | 6 +- modules/cpu/CMakeLists.txt | 122 ++----- modules/cpu/camera/Camera.cpp | 5 +- modules/cpu/camera/Camera.h | 11 +- modules/cpu/camera/CameraDispatch.ih | 8 +- modules/cpu/camera/CameraDispatch.ispc | 24 +- modules/cpu/camera/OrthographicCamera.cpp | 2 +- modules/cpu/camera/OrthographicCamera.ispc | 5 +- modules/cpu/camera/PanoramicCamera.cpp | 2 +- modules/cpu/camera/PanoramicCamera.ispc | 5 +- modules/cpu/camera/PerspectiveCamera.cpp | 2 +- modules/cpu/common/FeatureFlags.ih | 23 ++ modules/cpu/common/FeatureFlagsEnum.h | 136 ++++++++ modules/cpu/common/FilterIntersect.ih | 11 +- modules/cpu/common/Group.cpp | 45 ++- modules/cpu/common/Group.h | 10 + modules/cpu/common/Instance.ih | 6 +- modules/cpu/common/Ray.ih | 20 -- modules/cpu/common/RayQueryContext.ih | 39 +++ modules/cpu/common/VolumeIntervals.ih | 5 +- modules/cpu/common/World.cpp | 11 + modules/cpu/common/World.h | 11 + modules/cpu/common/World.ih | 95 +++--- modules/cpu/common/World.ispc | 7 +- modules/cpu/fb/FrameBuffer.cpp | 6 +- modules/cpu/fb/FrameBuffer.h | 13 +- modules/cpu/fb/FrameBufferDispatch.ih | 18 +- modules/cpu/fb/FrameBufferDispatch.ispc | 46 ++- modules/cpu/fb/LocalFB.cpp | 8 +- modules/cpu/fb/LocalFB.ih | 1 + modules/cpu/fb/LocalFB.ispc | 1 + modules/cpu/fb/SparseFB.cpp | 16 +- modules/cpu/geometry/Boxes.cpp | 3 +- modules/cpu/geometry/Curves.cpp | 24 +- modules/cpu/geometry/GeometricModel.cpp | 4 + modules/cpu/geometry/GeometricModel.h | 21 ++ modules/cpu/geometry/GeometricModel.ih | 6 +- modules/cpu/geometry/Geometry.cpp | 4 +- modules/cpu/geometry/Geometry.h | 12 +- modules/cpu/geometry/GeometryDispatch.ih | 10 +- modules/cpu/geometry/GeometryDispatch.ispc | 93 +++--- modules/cpu/geometry/Isosurfaces.cpp | 3 +- modules/cpu/geometry/Mesh.cpp | 3 +- modules/cpu/geometry/Planes.cpp | 3 +- modules/cpu/geometry/Spheres.cpp | 2 +- modules/cpu/geometry/Subdivision.cpp | 2 +- modules/cpu/ispc_symbols.txt | 14 +- modules/cpu/lights/AmbientLight.h | 2 +- modules/cpu/lights/CylinderLight.h | 2 +- modules/cpu/lights/DirectionalLight.h | 4 +- modules/cpu/lights/HDRILight.h | 2 +- modules/cpu/lights/Light.cpp | 3 +- modules/cpu/lights/Light.h | 12 +- modules/cpu/lights/LightDispatch.ih | 15 +- modules/cpu/lights/LightDispatch.ispc | 70 ++-- modules/cpu/lights/PointLight.h | 2 +- modules/cpu/lights/QuadLight.h | 2 +- modules/cpu/lights/SpotLight.h | 2 +- modules/cpu/lights/SunSkyLight.cpp | 3 +- modules/cpu/pf/PixelFilterDispatch.ih | 2 +- modules/cpu/pf/PixelFilterDispatch.ispc | 2 +- modules/cpu/render/Material.cpp | 6 +- modules/cpu/render/Material.h | 12 +- modules/cpu/render/Material.ih | 26 +- modules/cpu/render/MaterialDispatch.ih | 13 +- modules/cpu/render/MaterialDispatch.ispc | 80 +++-- modules/cpu/render/Renderer.cpp | 6 + modules/cpu/render/Renderer.h | 4 + modules/cpu/render/Renderer.ih | 11 +- modules/cpu/render/Renderer.ispc | 20 +- modules/cpu/render/RendererRenderTaskFn.inl | 26 +- modules/cpu/render/ao/AORenderer.cpp | 34 +- modules/cpu/render/ao/AORenderer.ih | 3 +- modules/cpu/render/ao/AORenderer.ispc | 47 +-- modules/cpu/render/ao/surfaces.ih | 9 +- modules/cpu/render/ao/surfaces.ispc | 18 +- modules/cpu/render/bsdfs/BSDF.ih | 37 ++- modules/cpu/render/bsdfs/BSDF.ispc | 310 +++++++++++------- modules/cpu/render/bsdfs/Conductor.ih | 9 +- modules/cpu/render/bsdfs/Dielectric.ih | 9 +- modules/cpu/render/bsdfs/DielectricLayer.ih | 18 +- modules/cpu/render/bsdfs/Lambert.ih | 13 +- .../cpu/render/bsdfs/LambertTransmission.ih | 12 +- .../cpu/render/bsdfs/MicrofacetConductor.ih | 10 +- .../cpu/render/bsdfs/MicrofacetDielectric.ih | 18 +- .../render/bsdfs/MicrofacetDielectricLayer.ih | 21 +- .../cpu/render/bsdfs/MicrofacetSheenLayer.ih | 22 +- modules/cpu/render/bsdfs/Minneart.ih | 11 +- modules/cpu/render/bsdfs/MultiBSDF.ih | 18 +- modules/cpu/render/bsdfs/OrenNayar.ih | 11 +- modules/cpu/render/bsdfs/Reflection.ih | 13 +- modules/cpu/render/bsdfs/RobustDielectric.ih | 9 +- .../cpu/render/bsdfs/RobustThinDielectric.ih | 9 +- modules/cpu/render/bsdfs/Scale.ih | 13 +- modules/cpu/render/bsdfs/Specular.ih | 9 +- modules/cpu/render/bsdfs/ThinDielectric.ih | 9 +- .../render/bsdfs/ThinMicrofacetDielectric.ih | 8 +- modules/cpu/render/bsdfs/Transmission.ih | 14 +- modules/cpu/render/bsdfs/Velvety.ih | 11 +- modules/cpu/render/debug/DebugRenderer.cpp | 35 +- modules/cpu/render/debug/DebugRenderer.ih | 3 +- modules/cpu/render/debug/DebugRenderer.ispc | 165 +++++----- modules/cpu/render/materials/Alloy.cpp | 2 +- modules/cpu/render/materials/CarPaint.cpp | 2 +- modules/cpu/render/materials/CarPaint.ispc | 1 + modules/cpu/render/materials/Glass.cpp | 2 +- modules/cpu/render/materials/Glass.ispc | 1 + modules/cpu/render/materials/Luminous.cpp | 2 +- modules/cpu/render/materials/Metal.cpp | 2 +- .../cpu/render/materials/MetallicPaint.cpp | 3 +- modules/cpu/render/materials/Mix.cpp | 2 +- modules/cpu/render/materials/OBJ.cpp | 2 +- modules/cpu/render/materials/OBJ.ih | 7 +- modules/cpu/render/materials/OBJ.ispc | 20 +- modules/cpu/render/materials/Plastic.cpp | 2 +- modules/cpu/render/materials/Principled.cpp | 2 +- modules/cpu/render/materials/Principled.ispc | 1 + modules/cpu/render/materials/ThinGlass.cpp | 2 +- modules/cpu/render/materials/ThinGlass.ispc | 1 + modules/cpu/render/materials/Velvet.cpp | 2 +- .../render/pathtracer/NextEventEstimation.ih | 4 +- .../pathtracer/NextEventEstimation.ispc | 74 +++-- modules/cpu/render/pathtracer/PathSampler.ih | 4 +- .../cpu/render/pathtracer/PathSampler.ispc | 68 ++-- modules/cpu/render/pathtracer/PathTracer.cpp | 40 ++- modules/cpu/render/pathtracer/PathTracer.h | 1 - modules/cpu/render/pathtracer/PathTracer.ispc | 34 +- .../cpu/render/pathtracer/ShadowCatcher.ih | 5 +- .../cpu/render/pathtracer/ShadowCatcher.ispc | 10 +- .../render/pathtracer/TransparentShadow.ih | 4 +- .../render/pathtracer/TransparentShadow.ispc | 14 +- modules/cpu/render/pathtracer/VirtualLight.ih | 3 +- .../cpu/render/pathtracer/VirtualLight.ispc | 23 +- modules/cpu/render/scivis/SciVis.cpp | 37 ++- modules/cpu/render/scivis/SciVis.ih | 10 +- modules/cpu/render/scivis/SciVis.ispc | 116 ++++--- modules/cpu/render/scivis/lightAlpha.ispc | 54 +-- modules/cpu/render/scivis/surfaces.ih | 22 +- modules/cpu/render/scivis/surfaces.ispc | 30 +- modules/cpu/render/scivis/volumes.ih | 3 +- modules/cpu/render/scivis/volumes.ispc | 24 +- modules/cpu/render/util.ih | 3 +- modules/cpu/render/util.ispc | 5 +- modules/cpu/texture/Texture2D.ih | 4 +- modules/cpu/texture/Texture2D.ispc | 4 +- modules/cpu/texture/TextureDispatch.ih | 4 +- modules/cpu/texture/TextureDispatch.ispc | 4 +- modules/cpu/texture/TextureParam.ih | 3 +- modules/cpu/volume/Volume.cpp | 4 +- modules/cpu/volume/Volume.h | 10 + modules/cpu/volume/Volume.ispc | 4 +- modules/cpu/volume/VolumetricModel.h | 20 ++ modules/mpi/ospray/CMakeLists.txt | 22 +- .../mpi/ospray/fb/DistributedFrameBuffer.cpp | 2 +- .../render/distributed/DistributedRaycast.cpp | 40 ++- .../distributed/DistributedRaycast.ispc | 61 ++-- .../distributed/DistributedRenderer.cpp | 38 ++- .../distributed/DistributedRenderer.ispc | 34 +- .../DistributedRendererRenderTaskFn.inl | 26 +- modules/multiDevice/CMakeLists.txt | 48 +-- .../ospray/geometry/BilinearPatches.cpp | 2 +- 162 files changed, 1914 insertions(+), 1243 deletions(-) create mode 100644 modules/cpu/common/FeatureFlags.ih create mode 100644 modules/cpu/common/FeatureFlagsEnum.h create mode 100644 modules/cpu/common/RayQueryContext.ih diff --git a/.github/workflows/ci.linux.gpu.yml b/.github/workflows/ci.linux.gpu.yml index 7a2fdc6c1..a3a072077 100644 --- a/.github/workflows/ci.linux.gpu.yml +++ b/.github/workflows/ci.linux.gpu.yml @@ -29,7 +29,7 @@ jobs: export SYCL_BUNDLE_ROOT=$DPCPP_ROOT export CC=clang export CXX=clang++ - scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_MODULE_MULTIDEVICE=OFF -DBUILD_GPU_SUPPORT=ON -DOPENVKL_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.openvkl.git -DOPENVKL_BRANCH=gpu-devel + scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_MODULE_MULTIDEVICE=OFF -DBUILD_GPU_SUPPORT=ON -DOPENVKL_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.openvkl.git -DOPENVKL_BRANCH=kraszkow/embree-sycl-build-gpu -DRKCOMMON_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.rkcommon.git -DRKCOMMON_VERSION=devel artifact-out: build-ubuntu2204-DG2-JIT artifact-path: build/install/ospray build/install/embree build/CMakeCache.txt build/*/build/CMakeCache.txt @@ -49,7 +49,7 @@ jobs: module load mpi export CC=icx export CXX=icpx - scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_MODULE_MPI=ON -DBUILD_OSPRAY_MODULE_MULTIDEVICE=OFF -DBUILD_GPU_SUPPORT=ON -DOPENVKL_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.openvkl.git -DOPENVKL_BRANCH=gpu-devel + scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_MODULE_MPI=ON -DBUILD_OSPRAY_MODULE_MULTIDEVICE=OFF -DBUILD_GPU_SUPPORT=ON -DOPENVKL_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.openvkl.git -DOPENVKL_BRANCH=kraszkow/embree-sycl-build-gpu -DRKCOMMON_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.rkcommon.git -DRKCOMMON_VERSION=devel artifact-out: build-ubuntu2204-DG2-JIT-mpi artifact-path: build/install/ospray build/install/embree build/CMakeCache.txt build/*/build/CMakeCache.txt diff --git a/cmake/compiler/dpcpp.cmake b/cmake/compiler/dpcpp.cmake index ef7c2f343..0500f8294 100644 --- a/cmake/compiler/dpcpp.cmake +++ b/cmake/compiler/dpcpp.cmake @@ -37,16 +37,14 @@ list(APPEND OSPRAY_CXX_FLAGS_SYCL -g0) #list(APPEND OSPRAY_CXX_FLAGS_SYCL -UDEBUG -DNDEBUG) # IGC options from Embree -# Enable __noinline -list(APPEND OSPRAY_IGC_OPTIONS "EnableOCLNoInlineAttr=0") # This works around some IGC bug in spill compression # TODO: Still true? -list(APPEND OSPRAY_IGC_OPTIONS "VISAOptions=-scratchAllocForStackInKB 128 -nospillcompression") +list(APPEND OSPRAY_IGC_OPTIONS "VISAOptions=-scratchAllocForStackInKB 128") # Allow printf inside indirectly callable function, right now I have this in all for testing # TODO: Should only enable for debug builds, and this needs to be done using a generator expression # if we want to support it in VS -list(APPEND OSPRAY_IGC_OPTIONS "ForceInlineStackCallWithImplArg=0" "EnableGlobalStateBuffer=1") +#list(APPEND OSPRAY_IGC_OPTIONS "ForceInlineStackCallWithImplArg=0" "EnableGlobalStateBuffer=1") option(OSPRAY_IGC_ENABLE_ZE_BINARY "Enable ZEBinary (for GTPin)" OFF) if (OSPRAY_IGC_ENABLE_ZE_BINARY) diff --git a/modules/cpu/CMakeLists.txt b/modules/cpu/CMakeLists.txt index 700a29219..d18cd843f 100644 --- a/modules/cpu/CMakeLists.txt +++ b/modules/cpu/CMakeLists.txt @@ -416,9 +416,10 @@ if (OSPRAY_MODULE_GPU) PROPERTIES LANGUAGE CXX) - add_library(ospray_module_gpu_kernels STATIC + add_library(ospray_module_gpu_kernels OBJECT ${OSPRAY_ISPC_SOURCES} - ${OSPRAY_VOLUMES_ISPC_SOURCES}) + ${OSPRAY_VOLUMES_ISPC_SOURCES} + ) set_target_properties(ospray_module_gpu_kernels PROPERTIES CXX_STANDARD 17 @@ -433,19 +434,19 @@ if (OSPRAY_MODULE_GPU) ) target_include_directories(ospray_module_gpu_kernels - PUBLIC - $ - $ - $ - $ - $ - $ - # NOTE(jda) - the following includes are missing despite PUBLIC linking - $ - $ - ####################################################################### - $ - $ + PUBLIC + $ + $ + $ + $ + $ + $ + # NOTE(jda) - the following includes are missing despite PUBLIC linking + $ + $ + ####################################################################### + $ + $ ) target_compile_definitions(ospray_module_gpu_kernels @@ -460,31 +461,30 @@ if (OSPRAY_MODULE_GPU) -x c++ ) - target_compile_options(ospray_module_gpu_kernels PRIVATE + target_compile_options(ospray_module_gpu_kernels PUBLIC -fsycl ${OSPRAY_CXX_FLAGS_SYCL} -fsycl-targets=${OSPRAY_SYCL_TARGET} ) if (OSPRAY_SYCL_AOT_DEVICES STREQUAL "none") - target_link_options(ospray_module_gpu_kernels PRIVATE + target_link_options(ospray_module_gpu_kernels PUBLIC -fsycl -fsycl-targets=${OSPRAY_SYCL_TARGET} -Xsycl-target-backend=${OSPRAY_SYCL_TARGET} "${OSPRAY_OCL_OPTIONS_STR} -options \"${OSPRAY_OCL_OTHER_OPTIONS_STR} -igc_opts='${OSPRAY_IGC_OPTIONS_STR}'\"" + -fsycl-device-code-split=per_kernel ) else() - target_link_options(ospray_module_gpu_kernels PRIVATE + target_link_options(ospray_module_gpu_kernels PUBLIC -fsycl -fsycl-targets=${OSPRAY_SYCL_TARGET} -Xsycl-target-backend=${OSPRAY_SYCL_TARGET} "-device ${OSPRAY_SYCL_AOT_DEVICES} -revision_id ${OSPRAY_SYCL_AOT_DEVICE_REVISION} ${OSPRAY_OCL_OPTIONS_STR} -options \"${OSPRAY_OCL_OTHER_OPTIONS_STR} -igc_opts='${OSPRAY_IGC_OPTIONS_STR}'\"" ) endif() - target_compile_definitions(ospray_module_gpu_kernels PUBLIC OSPRAY_TARGET_SYCL - __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ OSPRAY_BEGIN_ISPC_NAMESPACE=namespace\ ispc{ OSPRAY_END_ISPC_NAMESPACE=} uniform= @@ -522,7 +522,8 @@ if (OSPRAY_MODULE_GPU) ) endif() - ospray_install_library(ospray_module_gpu_kernels lib) + # Install and export without DESTINATION to turn it into INTERFACE library + install(TARGETS ospray_module_gpu_kernels EXPORT ospray_Exports) add_library(ospray_module_gpu SHARED ${OSPRAY_CPP_SOURCES} @@ -534,39 +535,8 @@ if (OSPRAY_MODULE_GPU) CXX_STANDARD_REQUIRED ON) target_link_libraries(ospray_module_gpu - PUBLIC - ospray_module_gpu_kernels - ) - - target_link_libraries(ospray_module_gpu - PUBLIC - ospray - rkcommon::rkcommon - $ - $ - ) - - target_include_directories(ospray_module_gpu - PUBLIC - $ - $ - $ - $ - $ - $ - # NOTE(jda) - the following includes are missing despite PUBLIC linking - $ - $ - ####################################################################### - $ - $ - ) - - target_compile_definitions(ospray_module_gpu PUBLIC - TILE_SIZE=${OSPRAY_TILE_SIZE} - MAX_TILE_SIZE=${OSPRAY_MAX_STACK_TILE_SIZE} - RKCOMMON_NO_SIMD + ospray_module_gpu_kernels ) target_compile_options(ospray_module_gpu @@ -574,52 +544,6 @@ if (OSPRAY_MODULE_GPU) -x c++ ) - target_compile_options(ospray_module_gpu PRIVATE - -fsycl - ${OSPRAY_CXX_FLAGS_SYCL} - -fsycl-targets=${OSPRAY_SYCL_TARGET} - ) - - if (OSPRAY_SYCL_AOT_DEVICES STREQUAL "none") - target_link_options(ospray_module_gpu PRIVATE - -fsycl - -fsycl-targets=${OSPRAY_SYCL_TARGET} - -Xsycl-target-backend=${OSPRAY_SYCL_TARGET} "${OSPRAY_OCL_OPTIONS_STR} -options \"${OSPRAY_OCL_OTHER_OPTIONS_STR} -igc_opts='${OSPRAY_IGC_OPTIONS_STR}'\"" - ) - else() - target_link_options(ospray_module_gpu PRIVATE - -fsycl - -fsycl-targets=${OSPRAY_SYCL_TARGET} - -Xsycl-target-backend=${OSPRAY_SYCL_TARGET} "-device ${OSPRAY_SYCL_AOT_DEVICES} -revision_id ${OSPRAY_SYCL_AOT_DEVICE_REVISION} ${OSPRAY_OCL_OPTIONS_STR} -options \"${OSPRAY_OCL_OTHER_OPTIONS_STR} -igc_opts='${OSPRAY_IGC_OPTIONS_STR}'\"" - ) - endif() - - target_compile_definitions(ospray_module_gpu - PUBLIC - OSPRAY_TARGET_SYCL - __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ - OSPRAY_BEGIN_ISPC_NAMESPACE=namespace\ ispc{ - OSPRAY_END_ISPC_NAMESPACE=} - ) - - if (OSPRAY_ENABLE_VOLUMES) - target_link_libraries(ospray_module_gpu - PUBLIC - $ - $ - ) - - target_include_directories(ospray_module_gpu - PUBLIC - $ - ) - - target_compile_definitions(ospray_module_gpu - PUBLIC - OSPRAY_ENABLE_VOLUMES - ) - endif() - ospray_install_library(ospray_module_gpu lib) endif() diff --git a/modules/cpu/camera/Camera.cpp b/modules/cpu/camera/Camera.cpp index 25474fd3b..ccc841477 100644 --- a/modules/cpu/camera/Camera.cpp +++ b/modules/cpu/camera/Camera.cpp @@ -6,8 +6,9 @@ namespace ospray { -Camera::Camera(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) +Camera::Camera(api::ISPCDevice &device, const FeatureFlagsOther featureFlags) + : AddStructShared(device.getIspcrtDevice(), device), + featureFlags(featureFlags) { managedObjectType = OSP_CAMERA; } diff --git a/modules/cpu/camera/Camera.h b/modules/cpu/camera/Camera.h index 7a489365f..cdcaa64e4 100644 --- a/modules/cpu/camera/Camera.h +++ b/modules/cpu/camera/Camera.h @@ -4,6 +4,7 @@ #pragma once #include "ISPCDeviceObject.h" +#include "common/FeatureFlagsEnum.h" #include "common/MotionTransform.h" #include "common/ObjectFactory.h" #include "common/StructShared.h" @@ -19,7 +20,7 @@ struct OSPRAY_SDK_INTERFACE Camera : public AddStructShared, public ObjectFactory { - Camera(api::ISPCDevice &device); + Camera(api::ISPCDevice &device, const FeatureFlagsOther featureFlags); ~Camera() override; std::string toString() const override; @@ -32,6 +33,8 @@ struct OSPRAY_SDK_INTERFACE Camera // Assume no motion blur nor depth of field (true for SciVis) virtual box3f projectBox(const box3f &b) const; + FeatureFlagsOther getFeatureFlagsOther() const; + // Data members // // if motionBlur in local camera space; otherwise in world-space: @@ -51,8 +54,14 @@ struct OSPRAY_SDK_INTERFACE Camera private: RTCGeometry embreeGeometry{nullptr}; MotionTransform motionTransform; + FeatureFlagsOther featureFlags{FFO_NONE}; }; OSPTYPEFOR_SPECIALIZATION(Camera *, OSP_CAMERA); +inline FeatureFlagsOther Camera::getFeatureFlagsOther() const +{ + return featureFlags; +} + } // namespace ospray diff --git a/modules/cpu/camera/CameraDispatch.ih b/modules/cpu/camera/CameraDispatch.ih index 417defacb..30b463100 100644 --- a/modules/cpu/camera/CameraDispatch.ih +++ b/modules/cpu/camera/CameraDispatch.ih @@ -3,13 +3,13 @@ #pragma once -#include "common/OSPCommon.ih" +#include "common/FeatureFlags.ih" OSPRAY_BEGIN_ISPC_NAMESPACE -SYCL_EXTERNAL __noinline void Camera_dispatch_initRay( - const Camera *uniform self, +SYCL_EXTERNAL void Camera_dispatch_initRay(const Camera *uniform self, varying Ray &ray, - const varying CameraSample &sample); + const varying CameraSample &sample, + const uniform FeatureFlagsOther ffo); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/camera/CameraDispatch.ispc b/modules/cpu/camera/CameraDispatch.ispc index 4dd28c5b0..a0c7f84e7 100644 --- a/modules/cpu/camera/CameraDispatch.ispc +++ b/modules/cpu/camera/CameraDispatch.ispc @@ -9,27 +9,23 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -SYCL_EXTERNAL __noinline void Camera_dispatch_initRay( - const Camera *uniform self, +SYCL_EXTERNAL void Camera_dispatch_initRay(const Camera *uniform self, varying Ray &ray, - const varying CameraSample &sample) + const varying CameraSample &sample, + const uniform FeatureFlagsOther ffo) { - switch (self->type) { - case CAMERA_TYPE_PERSPECTIVE: + if ((self->type == CAMERA_TYPE_PERSPECTIVE) + && (ffo & FFO_CAMERA_PERSPECTIVE)) { PerspectiveCamera_initRay(self, ray, sample); - break; - case CAMERA_TYPE_ORTHOGRAPHIC: + } else if ((self->type == CAMERA_TYPE_ORTHOGRAPHIC) + && (ffo & FFO_CAMERA_ORTHOGRAPHIC)) { OrthographicCamera_initRay(self, ray, sample); - break; - case CAMERA_TYPE_PANORAMIC: + } else if ((self->type == CAMERA_TYPE_PANORAMIC) + && (ffo & FFO_CAMERA_PANORAMIC)) { PanoramicCamera_initRay(self, ray, sample); - break; - default: + } else { #ifndef OSPRAY_TARGET_SYCL self->initRay(self, ray, sample); - break; -#else - break; #endif } } diff --git a/modules/cpu/camera/OrthographicCamera.cpp b/modules/cpu/camera/OrthographicCamera.cpp index aa5ee31bc..76ae09a7d 100644 --- a/modules/cpu/camera/OrthographicCamera.cpp +++ b/modules/cpu/camera/OrthographicCamera.cpp @@ -10,7 +10,7 @@ namespace ospray { OrthographicCamera::OrthographicCamera(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFO_CAMERA_ORTHOGRAPHIC) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.initRay = reinterpret_cast( diff --git a/modules/cpu/camera/OrthographicCamera.ispc b/modules/cpu/camera/OrthographicCamera.ispc index 6ccee447b..eb876da72 100644 --- a/modules/cpu/camera/OrthographicCamera.ispc +++ b/modules/cpu/camera/OrthographicCamera.ispc @@ -20,6 +20,7 @@ SYCL_EXTERNAL void OrthographicCamera_initRay(const Camera *uniform _self, vec3f dir; vec3f org; +#ifndef OSPRAY_TARGET_SYCL if (self->super.motionBlur) { const affine3f xfm = getInterpolatedTransform(self->super.geom, time); // we cannot just transform the final org & dir, because interpolated @@ -33,7 +34,9 @@ SYCL_EXTERNAL void OrthographicCamera_initRay(const Camera *uniform _self, const float x = (screen.x - 0.5f) * self->du_size.x; const float y = (screen.y - 0.5f) * self->du_size.y; org = org + x * du + y * dv; - } else { + } else +#endif + { dir = self->dir; org = self->org + screen.x * self->du_size + screen.y * self->dv_up; } diff --git a/modules/cpu/camera/PanoramicCamera.cpp b/modules/cpu/camera/PanoramicCamera.cpp index 3af077ff6..9b52c5e93 100644 --- a/modules/cpu/camera/PanoramicCamera.cpp +++ b/modules/cpu/camera/PanoramicCamera.cpp @@ -14,7 +14,7 @@ void *PanoramicCamera_initRay_addr(); namespace ospray { PanoramicCamera::PanoramicCamera(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFO_CAMERA_PANORAMIC) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.initRay = reinterpret_cast( diff --git a/modules/cpu/camera/PanoramicCamera.ispc b/modules/cpu/camera/PanoramicCamera.ispc index 0345f01b2..078a61cd9 100644 --- a/modules/cpu/camera/PanoramicCamera.ispc +++ b/modules/cpu/camera/PanoramicCamera.ispc @@ -54,6 +54,7 @@ SYCL_EXTERNAL void PanoramicCamera_initRay(const Camera *uniform _self, // transform to camera- and then to world-space vec3f dir; vec3f org; +#ifndef OSPRAY_TARGET_SYCL if (self->super.motionBlur) { const affine3f xfm = getInterpolatedTransform(self->super.geom, time); @@ -66,7 +67,9 @@ SYCL_EXTERNAL void PanoramicCamera_initRay(const Camera *uniform _self, dir = frameMB * make_vec3f(-localDir.y, -localDir.z, localDir.x); org = xfmPoint(xfm, self->org) + offset * cross(dir, frameMB.vy); - } else { + } else +#endif + { dir = self->frame * make_vec3f(-localDir.y, -localDir.z, localDir.x); org = self->org + offset * cross(dir, self->frame.vy); } diff --git a/modules/cpu/camera/PerspectiveCamera.cpp b/modules/cpu/camera/PerspectiveCamera.cpp index b21417f72..aaf04107f 100644 --- a/modules/cpu/camera/PerspectiveCamera.cpp +++ b/modules/cpu/camera/PerspectiveCamera.cpp @@ -10,7 +10,7 @@ namespace ospray { PerspectiveCamera::PerspectiveCamera(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFO_CAMERA_PERSPECTIVE) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.initRay = reinterpret_cast( diff --git a/modules/cpu/common/FeatureFlags.ih b/modules/cpu/common/FeatureFlags.ih new file mode 100644 index 000000000..240328d9f --- /dev/null +++ b/modules/cpu/common/FeatureFlags.ih @@ -0,0 +1,23 @@ +// Copyright 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "FeatureFlagsEnum.h" + +OSPRAY_BEGIN_ISPC_NAMESPACE + +#ifdef OSPRAY_TARGET_SYCL +using namespace ospray; +#endif + +inline uniform FeatureFlags ffAll() +{ + uniform FeatureFlags ff; + ff.geometry = FFG_ALL; + ff.volume = FFV_ALL; + ff.other = FFO_ALL; + return ff; +} + +OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/common/FeatureFlagsEnum.h b/modules/cpu/common/FeatureFlagsEnum.h new file mode 100644 index 000000000..94c6af990 --- /dev/null +++ b/modules/cpu/common/FeatureFlagsEnum.h @@ -0,0 +1,136 @@ +// Copyright 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#ifdef __cplusplus +namespace ospray { +#endif // __cplusplus + +enum FeatureFlagsGeometry +{ + FFG_NONE = 0, + + FFG_TRIANGLE = 1 << 1, + FFG_QUAD = 1 << 2, + FFG_GRID = 1 << 3, + + FFG_SUBDIVISION = 1 << 4, + + FFG_CONE_LINEAR_CURVE = 1 << 5, + FFG_ROUND_LINEAR_CURVE = 1 << 6, + FFG_FLAT_LINEAR_CURVE = 1 << 7, + + FFG_ROUND_BEZIER_CURVE = 1 << 8, + FFG_FLAT_BEZIER_CURVE = 1 << 9, + FFG_NORMAL_ORIENTED_BEZIER_CURVE = 1 << 10, + + FFG_ROUND_BSPLINE_CURVE = 1 << 11, + FFG_FLAT_BSPLINE_CURVE = 1 << 12, + FFG_NORMAL_ORIENTED_BSPLINE_CURVE = 1 << 13, + + FFG_ROUND_HERMITE_CURVE = 1 << 14, + FFG_FLAT_HERMITE_CURVE = 1 << 15, + FFG_NORMAL_ORIENTED_HERMITE_CURVE = 1 << 16, + + FFG_ROUND_CATMULL_ROM_CURVE = 1 << 17, + FFG_FLAT_CATMULL_ROM_CURVE = 1 << 18, + FFG_NORMAL_ORIENTED_CATMULL_ROM_CURVE = 1 << 19, + + FFG_SPHERE = 1 << 20, + FFG_DISC_POINT = 1 << 21, + FFG_ORIENTED_DISC_POINT = 1 << 22, + + FFG_CURVES = 1 << 5 | 1 << 6 | 1 << 7 | 1 << 8 | 1 << 9 | 1 << 10 | 1 << 11 + | 1 << 12 | 1 << 13 | 1 << 14 | 1 << 15 | 1 << 16 | 1 << 17 | 1 << 18 + | 1 << 19, + + FFG_USER_GEOMETRY = + 1 << 26, // RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_ARGUMENTS + + FFG_BOX = 1 << 29, + FFG_PLANE = 1 << 30, + FFG_ISOSURFACE = 1 << 31, + + FFG_OSPRAY_MASK = 1 << 29 | 1 << 30 | 1 << 31, + + FFG_ALL = 0xffffffff +}; + +enum FeatureFlagsVolume +{ + FFV_NONE = 0, + + FFV_VOLUME = 1 << 0, + + FFV_ALL = 0xffffffff +}; + +enum FeatureFlagsOther +{ + FFO_NONE = 0, + + FFO_FB_LOCAL = 1 << 0, + FFO_FB_SPARSE = 1 << 1, + + FFO_CAMERA_PERSPECTIVE = 1 << 2, + FFO_CAMERA_ORTHOGRAPHIC = 1 << 3, + FFO_CAMERA_PANORAMIC = 1 << 4, + + FFO_LIGHT_AMBIENT = 1 << 5, + FFO_LIGHT_CYLINDER = 1 << 6, + FFO_LIGHT_DIRECTIONAL = 1 << 7, + FFO_LIGHT_HDRI = 1 << 8, + FFO_LIGHT_POINT = 1 << 9, + FFO_LIGHT_QUAD = 1 << 10, + FFO_LIGHT_SPOT = 1 << 11, + FFO_LIGHT_GEOMETRY = 1 << 12, + + FFO_MATERIAL_ALLOY = 1 << 13, + FFO_MATERIAL_CARPAINT = 1 << 14, + FFO_MATERIAL_GLASS = 1 << 15, + FFO_MATERIAL_LUMINOUS = 1 << 16, + FFO_MATERIAL_METAL = 1 << 17, + FFO_MATERIAL_METALLICPAINT = 1 << 18, + FFO_MATERIAL_MIX = 1 << 19, + FFO_MATERIAL_OBJ = 1 << 20, + FFO_MATERIAL_PLASTIC = 1 << 21, + FFO_MATERIAL_PRINCIPLED = 1 << 22, + FFO_MATERIAL_THINGLASS = 1 << 23, + FFO_MATERIAL_VELVET = 1 << 24, + + FFO_TEXTURE_IN_MATERIAL = 1 << 25, + FFO_TEXTURE_IN_RENDERER = 1 << 26, + + FFO_ALL = 0xffffffff +}; + +struct FeatureFlags +{ + FeatureFlagsGeometry geometry; + FeatureFlagsVolume volume; + FeatureFlagsOther other; +#ifdef __cplusplus + void setNone() + { + geometry = FFG_NONE; + volume = FFV_NONE; + other = FFO_NONE; + } +}; + +template +inline T operator|(T a, T b) +{ + return (T)((unsigned int)(a) | (unsigned int)(b)); +} + +template +inline T &operator|=(T &a, T b) +{ + return (T &)((unsigned int &)(a) |= (unsigned int)(b)); +} +} // namespace ospray +#else +}; +#endif // __cplusplus diff --git a/modules/cpu/common/FilterIntersect.ih b/modules/cpu/common/FilterIntersect.ih index de1cc8e95..7ab656b1d 100644 --- a/modules/cpu/common/FilterIntersect.ih +++ b/modules/cpu/common/FilterIntersect.ih @@ -5,6 +5,7 @@ #include "Intersect.ih" #include "Ray.ih" +#include "RayQueryContext.ih" OSPRAY_BEGIN_ISPC_NAMESPACE @@ -63,12 +64,12 @@ inline bool filterIntersectionSingle( // call filter intersection callback if needed #ifndef OSPRAY_TARGET_SYCL - EmbreeRayQueryContextDefault *uniform ctx = - (EmbreeRayQueryContextDefault * uniform) args->context; - uniform bool noFilter = ctx->type != ERQCT_CLIPPING; - bool accept = noFilter || filterCall(args, rtcHitTemp); + RayQueryContextDefault *uniform ctx = + (RayQueryContextDefault * uniform) args->context; + const uniform bool noFilter = ctx->type != RQCT_CLIPPING; + const bool accept = noFilter || filterCall(args, rtcHitTemp); #else - bool accept = true; + const bool accept = true; #endif if (alwaysReject) { // the hit is always rejected diff --git a/modules/cpu/common/Group.cpp b/modules/cpu/common/Group.cpp index bff519ca6..3b258ac97 100644 --- a/modules/cpu/common/Group.cpp +++ b/modules/cpu/common/Group.cpp @@ -16,12 +16,17 @@ namespace ospray { template inline void createEmbreeScene(RTCScene &scene, + FeatureFlags &featureFlags, const DataT &objects, const int embreeFlags, const RTCBuildQuality buildQuality) { - for (auto &&obj : objects) + for (auto &&obj : objects) { rtcAttachGeometry(scene, obj->embreeGeometryHandle()); + featureFlags.geometry |= obj->getFeatureFlagsGeometry(); + featureFlags.volume |= obj->getFeatureFlagsVolume(); + featureFlags.other |= obj->getFeatureFlagsOther(); + } rtcSetSceneFlags(scene, static_cast(embreeFlags)); rtcSetSceneBuildQuality(scene, buildQuality); @@ -121,10 +126,14 @@ void Group::commit() throw std::runtime_error("invalid Embree device"); } + featureFlags.setNone(); if (numGeometries > 0) { sceneGeometries = rtcNewScene(embreeDevice); - createEmbreeScene( - sceneGeometries, *geometricModels, sceneFlags, buildQuality); + createEmbreeScene(sceneGeometries, + featureFlags, + *geometricModels, + sceneFlags, + buildQuality); geometricModelsArray = make_buffer_shared_unique( getISPCDevice().getIspcrtDevice(), @@ -137,8 +146,11 @@ void Group::commit() #ifdef OSPRAY_ENABLE_VOLUMES if (numVolumes > 0) { sceneVolumes = rtcNewScene(embreeDevice); - createEmbreeScene( - sceneVolumes, *volumetricModels, sceneFlags, buildQuality); + createEmbreeScene(sceneVolumes, + featureFlags, + *volumetricModels, + sceneFlags, + buildQuality); volumetricModelsArray = make_buffer_shared_unique( getISPCDevice().getIspcrtDevice(), @@ -152,6 +164,7 @@ void Group::commit() if (numClippers > 0) { sceneClippers = rtcNewScene(embreeDevice); createEmbreeScene(sceneClippers, + featureFlags, *clipModels, sceneFlags | RTC_SCENE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_SCENE_FLAG_ROBUST, @@ -175,16 +188,22 @@ void Group::commit() #endif getSh()->numClipModels = numClippers; - // Create empty scene for lights-only group, - // it is needed to have rtcGeometry created in Instance object - // which in turn is needed for motion blur matrices interpolation - if ((numLights > 0) && (numGeometries == 0) + if (numLights > 0) { + // Gather light types + for (auto &&light : *lights) + featureFlags.other |= light->getFeatureFlagsOther(); + + // Create empty scene for lights-only group, + // it is needed to have rtcGeometry created in Instance object + // which in turn is needed for motion blur matrices interpolation + if ((numGeometries == 0) #ifdef OSPRAY_ENABLE_VOLUMES - && (numVolumes == 0) + && (numVolumes == 0) #endif - && (numClippers == 0)) { - sceneGeometries = rtcNewScene(embreeDevice); - rtcCommitScene(sceneGeometries); + && (numClippers == 0)) { + sceneGeometries = rtcNewScene(embreeDevice); + rtcCommitScene(sceneGeometries); + } } } diff --git a/modules/cpu/common/Group.h b/modules/cpu/common/Group.h index b18a58d4d..cf84e2df6 100644 --- a/modules/cpu/common/Group.h +++ b/modules/cpu/common/Group.h @@ -5,6 +5,7 @@ // ospray stuff #include "Data.h" +#include "FeatureFlagsEnum.h" #include "ISPCDeviceObject.h" #include "StructShared.h" // stl @@ -48,7 +49,11 @@ struct OSPRAY_SDK_INTERFACE Group #endif RTCScene sceneClippers{nullptr}; + const FeatureFlags &getFeatureFlags() const; + private: + FeatureFlags featureFlags; + std::unique_ptr> geometricModelsArray; #ifdef OSPRAY_ENABLE_VOLUMES std::unique_ptr> volumetricModelsArray; @@ -58,4 +63,9 @@ struct OSPRAY_SDK_INTERFACE Group OSPTYPEFOR_SPECIALIZATION(Group *, OSP_GROUP); +inline const FeatureFlags &Group::getFeatureFlags() const +{ + return featureFlags; +} + } // namespace ospray diff --git a/modules/cpu/common/Instance.ih b/modules/cpu/common/Instance.ih index f439a708b..c60f1a0e2 100644 --- a/modules/cpu/common/Instance.ih +++ b/modules/cpu/common/Instance.ih @@ -40,7 +40,8 @@ inline void Instance_postIntersect(const Instance *uniform self, varying DifferentialGeometry &dg, const varying Ray &ray, uniform int64 flags, - const uniform bool clip = false) + const uniform bool clip, + const uniform FeatureFlags &ff) { GeometricModel **uniform models = clip ? self->group->clipModels : self->group->geometricModels; @@ -51,7 +52,8 @@ inline void Instance_postIntersect(const Instance *uniform self, #else foreach_unique (geomID in ray.geomID) { #endif - GeometricModel_postIntersect(*(models + geomID), renderer, dg, ray, flags); + GeometricModel_postIntersect( + *(models + geomID), renderer, dg, ray, flags, ff); } dg.instID = diff --git a/modules/cpu/common/Ray.ih b/modules/cpu/common/Ray.ih index 252513e1e..0e3a9a285 100644 --- a/modules/cpu/common/Ray.ih +++ b/modules/cpu/common/Ray.ih @@ -38,26 +38,6 @@ struct Ray uint32 instID; //!< instance ID }; -enum EmbreeRayQueryContextType -{ - ERQCT_DEFAULT, - ERQCT_CLIPPING, - ERQCT_UNKNOWN -}; - -struct EmbreeRayQueryContextDefault -{ - RTCRayQueryContext ectx; - EmbreeRayQueryContextType type; -}; - -inline void InitRayQueryContextDefault( - EmbreeRayQueryContextDefault *uniform context) -{ - rtcInitRayQueryContext(&context->ectx); - context->type = ERQCT_DEFAULT; -} - // Hit query functions //////////////////////////////////////////////////////// inline bool noHit(const Ray &ray) diff --git a/modules/cpu/common/RayQueryContext.ih b/modules/cpu/common/RayQueryContext.ih new file mode 100644 index 000000000..c51ec516b --- /dev/null +++ b/modules/cpu/common/RayQueryContext.ih @@ -0,0 +1,39 @@ +// Copyright 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +// ospray +#include "FeatureFlagsEnum.h" +// embree +#include "Embree.h" + +OSPRAY_BEGIN_ISPC_NAMESPACE + +#ifdef OSPRAY_TARGET_SYCL +using namespace ospray; +#endif + +enum RayQueryContextType +{ + RQCT_DEFAULT, + RQCT_CLIPPING, + RQCT_UNKNOWN +}; + +struct RayQueryContextDefault +{ + RTCRayQueryContext ectx; + RayQueryContextType type; + FeatureFlagsGeometry ffg; +}; + +inline void initRayQueryContextDefault(RayQueryContextDefault *uniform context, + const uniform FeatureFlagsGeometry ffg) +{ + rtcInitRayQueryContext(&context->ectx); + context->type = RQCT_DEFAULT; + context->ffg = ffg; +} + +OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/common/VolumeIntervals.ih b/modules/cpu/common/VolumeIntervals.ih index 2789996e5..8cddff295 100644 --- a/modules/cpu/common/VolumeIntervals.ih +++ b/modules/cpu/common/VolumeIntervals.ih @@ -59,14 +59,13 @@ inline void freeVolumeIntervals(VolumeIntervals &intervals) } #endif -struct EmbreeRayQueryContextVolume +struct RayQueryContextVolume { RTCRayQueryContext ectx; varying VolumeIntervals *intervals; }; -inline void InitRayQueryContextVolume( - EmbreeRayQueryContextVolume *uniform context, +inline void InitRayQueryContextVolume(RayQueryContextVolume *uniform context, varying VolumeIntervals *uniform intervals) { rtcInitRayQueryContext(&context->ectx); diff --git a/modules/cpu/common/World.cpp b/modules/cpu/common/World.cpp index 351043e90..daec0e826 100644 --- a/modules/cpu/common/World.cpp +++ b/modules/cpu/common/World.cpp @@ -128,6 +128,7 @@ void World::commit() // Populate shared buffer with instance pointers, // create Embree instances + featureFlags.setNone(); unsigned int id = 0; for (auto &&inst : *instances) { getSh()->instances[id] = inst->getSh(); @@ -147,10 +148,20 @@ void World::commit() esClip, inst->group->sceneClippers, inst, embreeDevice, id); } #endif + // Gather feature flags from all groups + const FeatureFlags &gff = inst->group->getFeatureFlags(); + featureFlags.geometry |= gff.geometry; + featureFlags.volume |= gff.volume; + featureFlags.other |= gff.other; id++; } } + // Gather light types + if (lights) + for (auto &&light : *lights) + featureFlags.other |= light->getFeatureFlagsOther(); + if (esGeom) { rtcSetSceneFlags(esGeom, static_cast(sceneFlags)); rtcSetSceneBuildQuality(esGeom, buildQuality); diff --git a/modules/cpu/common/World.h b/modules/cpu/common/World.h index bf71eeb3b..59f3eaa40 100644 --- a/modules/cpu/common/World.h +++ b/modules/cpu/common/World.h @@ -5,6 +5,7 @@ // ospray stuff #include "Data.h" +#include "FeatureFlagsEnum.h" #include "ISPCDeviceObject.h" // stl #include @@ -29,6 +30,8 @@ struct OSPRAY_SDK_INTERFACE World box3f getBounds() const override; + const FeatureFlags &getFeatureFlags() const; + // Data members // Ref> instances; @@ -38,8 +41,16 @@ struct OSPRAY_SDK_INTERFACE World std::unique_ptr scivisData; std::unique_ptr pathtracerData; + + private: + FeatureFlags featureFlags; }; OSPTYPEFOR_SPECIALIZATION(World *, OSP_WORLD); +inline const FeatureFlags &World::getFeatureFlags() const +{ + return featureFlags; +} + } // namespace ospray diff --git a/modules/cpu/common/World.ih b/modules/cpu/common/World.ih index 3876393c3..d86c68ab9 100644 --- a/modules/cpu/common/World.ih +++ b/modules/cpu/common/World.ih @@ -7,8 +7,10 @@ #include "common/Clipping.ih" #include "common/DGEnum.h" #include "common/DifferentialGeometry.ih" +#include "common/FeatureFlags.ih" #include "common/Instance.ih" #include "common/Ray.ih" +#include "common/RayQueryContext.ih" #include "common/VolumeIntervals.ih" #include "geometry/GeometryDispatch.ih" #include "volume/Volume.ih" @@ -17,38 +19,29 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -#ifdef OSPRAY_TARGET_SYCL -// Define minimal subset of features used in Embree, -// less features means smaller kernel on GPU -#define FEATURE_MASK \ - RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_QUAD \ - | RTC_FEATURE_FLAG_SUBDIVISION | RTC_FEATURE_FLAG_CURVES \ - | RTC_FEATURE_FLAG_SPHERE_POINT | RTC_FEATURE_FLAG_INSTANCE \ - | RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_ARGUMENTS -#else -#define FEATURE_MASK RTC_FEATURE_FLAG_ALL -#endif - #ifdef OSPRAY_TARGET_SYCL using namespace ospray; #endif struct Renderer; -inline void traceGeometryRay(const World *uniform world, varying Ray &ray) +inline void traceGeometryRay(const World *uniform world, + varying Ray &ray, + const uniform FeatureFlags &ff) { // Skip if no geometries scene if (!world->embreeSceneHandleGeometries) return; - uniform EmbreeRayQueryContextDefault context; - InitRayQueryContextDefault(&context); + uniform RayQueryContextDefault context; + initRayQueryContextDefault(&context, ff.geometry); uniform RTCIntersectArguments intersectArgs; rtcInitIntersectArguments(&intersectArgs); intersectArgs.context = &context.ectx; intersectArgs.intersect = (RTCIntersectFunctionN)Geometry_dispatch_intersect; - intersectArgs.feature_mask = (uniform RTCFeatureFlags)(FEATURE_MASK); + intersectArgs.feature_mask = + (uniform RTCFeatureFlags)(ff.geometry & ~FFG_OSPRAY_MASK); rtcIntersectV(world->embreeSceneHandleGeometries, (varying RTCRayHit * uniform) & ray, @@ -74,14 +67,15 @@ inline void traceVolumeRay( allocVolumeIntervals(intervals); #endif - uniform EmbreeRayQueryContextVolume context; + uniform RayQueryContextVolume context; InitRayQueryContextVolume(&context, &intervals); uniform RTCIntersectArguments intersectArgs; rtcInitIntersectArguments(&intersectArgs); intersectArgs.context = &context.ectx; intersectArgs.intersect = (RTCIntersectFunctionN)Volume_intersect_kernel; - intersectArgs.feature_mask = (uniform RTCFeatureFlags)(FEATURE_MASK); + intersectArgs.feature_mask = + RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_ARGUMENTS; rtcIntersectV(world->embreeSceneHandleVolumes, (varying RTCRayHit * uniform) & ray, @@ -107,14 +101,15 @@ inline void traceVolumeRay( if (!world->embreeSceneHandleVolumes) return; - uniform EmbreeRayQueryContextVolume context; + uniform RayQueryContextVolume context; InitRayQueryContextVolume(&context, &intervals); uniform RTCIntersectArguments intersectArgs; rtcInitIntersectArguments(&intersectArgs); intersectArgs.context = &context.ectx; intersectArgs.intersect = (RTCIntersectFunctionN)Volume_intersect_kernel; - intersectArgs.feature_mask = (uniform RTCFeatureFlags)(FEATURE_MASK); + intersectArgs.feature_mask = + RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_ARGUMENTS; rtcIntersectV(world->embreeSceneHandleVolumes, (varying RTCRayHit * uniform) & ray, @@ -132,9 +127,9 @@ inline void traceVolumeRay( #endif // Intersection context structure used for clipping geometries -struct EmbreeRayQueryContextClipping +struct RayQueryContextClipping { - uniform EmbreeRayQueryContextDefault super; + uniform RayQueryContextDefault super; const World *uniform world; varying int32 corrClippingDepth; varying uint32 hitsCount; @@ -162,9 +157,10 @@ inline void traceClippingRay(const World *uniform world, } // Create and initialize intersection context - EmbreeRayQueryContextClipping context; + RayQueryContextClipping context; rtcInitRayQueryContext(&context.super.ectx); - context.super.type = ERQCT_CLIPPING; + context.super.type = RQCT_CLIPPING; + context.super.ffg = FFG_ALL; context.world = world; context.corrClippingDepth = 0; context.hitsCount = 0; @@ -242,8 +238,10 @@ inline void traceClippingRay(const World *uniform world, #endif } -inline void traceGeometryRayIntervals( - const World *uniform world, Ray &ray, RayIntervals &rayIntervals) +inline void traceGeometryRayIntervals(const World *uniform world, + Ray &ray, + RayIntervals &rayIntervals, + const uniform FeatureFlags &ff) { // Save the ray const float t0 = ray.t0; @@ -267,7 +265,7 @@ inline void traceGeometryRayIntervals( ray.t = min(ray.t, t); // Shoot the ray - traceGeometryRay(world, ray); + traceGeometryRay(world, ray, ff); // Exit loop if geometry hit if (hadHit(ray)) { @@ -281,15 +279,17 @@ inline void traceGeometryRayIntervals( ray.t = t; } -inline void traceRay(const World *uniform world, varying Ray &ray) +inline void traceRay(const World *uniform world, + varying Ray &ray, + const uniform FeatureFlags &ff) { #ifdef OSPRAY_TARGET_SYCL // Clipping disabled for now - traceGeometryRay(world, ray); + traceGeometryRay(world, ray, ff); #else // Fast path if no clipping geometry if (!world->embreeSceneHandleClippers) { - traceGeometryRay(world, ray); + traceGeometryRay(world, ray, ff); return; } @@ -298,24 +298,27 @@ inline void traceRay(const World *uniform world, varying Ray &ray) traceClippingRay(world, ray, rayIntervals); // Trace ray intervals - traceGeometryRayIntervals(world, ray, rayIntervals); + traceGeometryRayIntervals(world, ray, rayIntervals, ff); #endif } -inline bool isOccludedNoClipping(const World *uniform world, varying Ray &ray) +inline bool isOccludedNoClipping(const World *uniform world, + varying Ray &ray, + const uniform FeatureFlags &ff) { // Skip if no geometries scene if (!world->embreeSceneHandleGeometries) return false; - uniform EmbreeRayQueryContextDefault context; - InitRayQueryContextDefault(&context); + uniform RayQueryContextDefault context; + initRayQueryContextDefault(&context, ff.geometry); uniform RTCOccludedArguments occludedArgs; rtcInitOccludedArguments(&occludedArgs); occludedArgs.context = &context.ectx; occludedArgs.occluded = (RTCOccludedFunctionN)Geometry_dispatch_occluded; - occludedArgs.feature_mask = (uniform RTCFeatureFlags)(FEATURE_MASK); + occludedArgs.feature_mask = + (uniform RTCFeatureFlags)(ff.geometry & ~FFG_OSPRAY_MASK); rtcOccludedV(world->embreeSceneHandleGeometries, (varying RTCRay * uniform) & ray, @@ -326,7 +329,8 @@ inline bool isOccludedNoClipping(const World *uniform world, varying Ray &ray) inline bool areIntervalsOccluded(const World *uniform world, varying Ray &ray, - varying RayIntervals &rayIntervals) + varying RayIntervals &rayIntervals, + const uniform FeatureFlags &ff) { // Iterate through ray intervals for (uint32 i = 0; i < rayIntervals.count; i++) { @@ -335,7 +339,7 @@ inline bool areIntervalsOccluded(const World *uniform world, ray.t = rayIntervals.intervals[i].upper; // Check for occluders - if (isOccludedNoClipping(world, ray)) + if (isOccludedNoClipping(world, ray, ff)) return true; } @@ -343,18 +347,19 @@ inline bool areIntervalsOccluded(const World *uniform world, return false; } -inline bool isOccluded(const World *uniform world, varying Ray &ray) +inline bool isOccluded(const World *uniform world, + varying Ray &ray, + const uniform FeatureFlags &ff) { #ifdef OSPRAY_TARGET_SYCL // Clipping disabled for now - return isOccludedNoClipping(world, ray); + return isOccludedNoClipping(world, ray, ff); #else // Fast path if no clipping geometry if (!world->embreeSceneHandleClippers) { - return isOccludedNoClipping(world, ray); + return isOccludedNoClipping(world, ray, ff); } - // TODO: use new Embree4 callback setup for clipping when it's enabled // Allocate array for ray intervals varying RayIntervals rayIntervals; rayIntervals.count = 0; @@ -363,7 +368,7 @@ inline bool isOccluded(const World *uniform world, varying Ray &ray) traceClippingRay(world, ray, rayIntervals); // Is there any occluder within given ray intervals - return areIntervalsOccluded(world, ray, rayIntervals); + return areIntervalsOccluded(world, ray, rayIntervals, ff); #endif } @@ -378,7 +383,8 @@ inline void postIntersect(const World *uniform world, const Renderer *uniform renderer, varying DifferentialGeometry &dg, const varying Ray &ray, - uniform int64 flags) + uniform int64 flags, + const uniform FeatureFlags &ff) { dg.primID = ray.primID; dg.st = make_vec2f(ray.u, ray.v); @@ -406,7 +412,7 @@ inline void postIntersect(const World *uniform world, #endif if (instID != RTC_INVALID_GEOMETRY_ID) { Instance *uniform instance = *(world->instances + instID); - Instance_postIntersect(instance, renderer, dg, ray, flags); + Instance_postIntersect(instance, renderer, dg, ray, flags, false, ff); } else { dg.Ns = dg.Ng = ray.Ng; } @@ -447,4 +453,5 @@ inline void postIntersect(const World *uniform world, #undef DG_NG_NORMALIZE #undef DG_NS_NORMALIZE } + OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/common/World.ispc b/modules/cpu/common/World.ispc index 4165d2c5c..1ff246f73 100644 --- a/modules/cpu/common/World.ispc +++ b/modules/cpu/common/World.ispc @@ -15,8 +15,8 @@ unmasked void clippingIntersectionFilterV( return; // Get pointer to the intersection context - EmbreeRayQueryContextClipping *uniform context = - (EmbreeRayQueryContextClipping * uniform) args->context; + RayQueryContextClipping *uniform context = + (RayQueryContextClipping * uniform) args->context; // We skip this intersection to collect all remaining intersections valid[programIndex] = 0; @@ -52,7 +52,8 @@ unmasked void clippingIntersectionFilterV( #endif // Call postIntersect to get shading normal Instance *uniform instance = *(context->world->instances + instID); - Instance_postIntersect(instance, NULL, dg, *ray, DG_NG | DG_NS, true); + Instance_postIntersect( + instance, NULL, dg, *ray, DG_NG | DG_NS, true, ffAll()); // Use geometry normal for clipping // but use shading normal to check if invertion is needed diff --git a/modules/cpu/fb/FrameBuffer.cpp b/modules/cpu/fb/FrameBuffer.cpp index e0c5cebe0..ae7456e0d 100644 --- a/modules/cpu/fb/FrameBuffer.cpp +++ b/modules/cpu/fb/FrameBuffer.cpp @@ -29,7 +29,8 @@ namespace ospray { FrameBuffer::FrameBuffer(api::ISPCDevice &device, const vec2i &_size, ColorBufferFormat _colorBufferFormat, - const uint32 channels) + const uint32 channels, + const FeatureFlagsOther ffo) : AddStructShared(device.getIspcrtDevice(), device), size(_size), hasDepthBuffer(channels & OSP_FB_DEPTH), @@ -40,7 +41,8 @@ FrameBuffer::FrameBuffer(api::ISPCDevice &device, hasAlbedoBuffer(channels & OSP_FB_ALBEDO), hasPrimitiveIDBuffer(channels & OSP_FB_ID_PRIMITIVE), hasObjectIDBuffer(channels & OSP_FB_ID_OBJECT), - hasInstanceIDBuffer(channels & OSP_FB_ID_INSTANCE) + hasInstanceIDBuffer(channels & OSP_FB_ID_INSTANCE), + featureFlags(ffo) { managedObjectType = OSP_FRAMEBUFFER; if (_size.x <= 0 || _size.y <= 0) { diff --git a/modules/cpu/fb/FrameBuffer.h b/modules/cpu/fb/FrameBuffer.h index 7dc3a58b8..d9a31985d 100644 --- a/modules/cpu/fb/FrameBuffer.h +++ b/modules/cpu/fb/FrameBuffer.h @@ -7,6 +7,7 @@ // ospray #include "ISPCDeviceObject.h" #include "common/Data.h" +#include "common/FeatureFlagsEnum.h" #include "fb/ImageOp.h" #include "ospray/ospray.h" #include "rkcommon/utility/ArrayView.h" @@ -26,7 +27,8 @@ struct OSPRAY_SDK_INTERFACE FrameBuffer FrameBuffer(api::ISPCDevice &device, const vec2i &size, ColorBufferFormat colorBufferFormat, - const uint32 channels); + const uint32 channels, + const FeatureFlagsOther ffo); virtual ~FrameBuffer() override = default; @@ -91,6 +93,8 @@ struct OSPRAY_SDK_INTERFACE FrameBuffer int32 getFrameID() const; + FeatureFlagsOther getFeatureFlagsOther() const; + protected: // Finalize the pixel op and frame op state for rendering on commit void prepareImageOps(); @@ -128,8 +132,15 @@ struct OSPRAY_SDK_INTERFACE FrameBuffer std::vector> imageOps; std::vector pixelOpShs; size_t firstFrameOperation = -1; + + FeatureFlagsOther featureFlags{FFO_NONE}; }; OSPTYPEFOR_SPECIALIZATION(FrameBuffer *, OSP_FRAMEBUFFER); +inline FeatureFlagsOther FrameBuffer::getFeatureFlagsOther() const +{ + return featureFlags; +} + } // namespace ospray diff --git a/modules/cpu/fb/FrameBufferDispatch.ih b/modules/cpu/fb/FrameBufferDispatch.ih index 440773654..6c33e67e8 100644 --- a/modules/cpu/fb/FrameBufferDispatch.ih +++ b/modules/cpu/fb/FrameBufferDispatch.ih @@ -3,6 +3,7 @@ #pragma once +#include "common/FeatureFlags.ih" #include "common/OSPCommon.ih" OSPRAY_BEGIN_ISPC_NAMESPACE @@ -11,16 +12,19 @@ struct FrameBuffer; struct RenderTaskDesc; struct ScreenSample; -SYCL_EXTERNAL __noinline uniform RenderTaskDesc -FrameBuffer_dispatch_getRenderTaskDesc( - FrameBuffer *uniform fb, const uniform uint32 taskID); +SYCL_EXTERNAL uniform RenderTaskDesc FrameBuffer_dispatch_getRenderTaskDesc( + FrameBuffer *uniform fb, + const uniform uint32 taskID, + const uniform FeatureFlagsOther ffo); -SYCL_EXTERNAL __noinline void FrameBuffer_dispatch_accumulateSample( +SYCL_EXTERNAL void FrameBuffer_dispatch_accumulateSample( FrameBuffer *uniform fb, const varying ScreenSample &screenSample, - uniform RenderTaskDesc &taskDesc); + uniform RenderTaskDesc &taskDesc, + const uniform FeatureFlagsOther ffo); -SYCL_EXTERNAL __noinline void FrameBuffer_dispatch_completeTask( - FrameBuffer *uniform fb, const uniform RenderTaskDesc &taskDesc); +SYCL_EXTERNAL void FrameBuffer_dispatch_completeTask(FrameBuffer *uniform fb, + const uniform RenderTaskDesc &taskDesc, + const uniform FeatureFlagsOther ffo); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/fb/FrameBufferDispatch.ispc b/modules/cpu/fb/FrameBufferDispatch.ispc index fdd49e078..3edb14b21 100644 --- a/modules/cpu/fb/FrameBufferDispatch.ispc +++ b/modules/cpu/fb/FrameBufferDispatch.ispc @@ -11,20 +11,18 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -SYCL_EXTERNAL __noinline uniform RenderTaskDesc -FrameBuffer_dispatch_getRenderTaskDesc( - FrameBuffer *uniform fb, const uniform uint32 taskID) +SYCL_EXTERNAL uniform RenderTaskDesc FrameBuffer_dispatch_getRenderTaskDesc( + FrameBuffer *uniform fb, + const uniform uint32 taskID, + const uniform FeatureFlagsOther ffo) { - switch (fb->type) { - case FRAMEBUFFER_TYPE_LOCAL: + if ((fb->type == FRAMEBUFFER_TYPE_LOCAL) && (ffo & FFO_FB_LOCAL)) { return LocalFB_getRenderTaskDesc(fb, taskID); - case FRAMEBUFFER_TYPE_SPARSE: + } else if ((fb->type == FRAMEBUFFER_TYPE_SPARSE) && (ffo & FFO_FB_SPARSE)) { return SparseFB_getRenderTaskDesc(fb, taskID); - default: + } else { #ifndef OSPRAY_TARGET_SYCL return fb->getRenderTaskDesc(fb, taskID); -#else - break; #endif } // TODO: Should be an error here @@ -34,41 +32,35 @@ FrameBuffer_dispatch_getRenderTaskDesc( return rt; } -SYCL_EXTERNAL __noinline void FrameBuffer_dispatch_accumulateSample( +SYCL_EXTERNAL void FrameBuffer_dispatch_accumulateSample( FrameBuffer *uniform fb, const varying ScreenSample &screenSample, - uniform RenderTaskDesc &taskDesc) + uniform RenderTaskDesc &taskDesc, + const uniform FeatureFlagsOther ffo) { - switch (fb->type) { - case FRAMEBUFFER_TYPE_LOCAL: + if ((fb->type == FRAMEBUFFER_TYPE_LOCAL) && (ffo & FFO_FB_LOCAL)) { LocalFB_accumulateSample(fb, screenSample, taskDesc); - break; - case FRAMEBUFFER_TYPE_SPARSE: + } else if ((fb->type == FRAMEBUFFER_TYPE_SPARSE) && (ffo & FFO_FB_SPARSE)) { SparseFB_accumulateSample(fb, screenSample, taskDesc); - break; - default: + } else { #ifndef OSPRAY_TARGET_SYCL fb->accumulateSample(fb, screenSample, taskDesc); #endif - break; } } -SYCL_EXTERNAL __noinline void FrameBuffer_dispatch_completeTask( - FrameBuffer *uniform fb, const uniform RenderTaskDesc &taskDesc) +SYCL_EXTERNAL void FrameBuffer_dispatch_completeTask(FrameBuffer *uniform fb, + const uniform RenderTaskDesc &taskDesc, + const uniform FeatureFlagsOther ffo) { - switch (fb->type) { - case FRAMEBUFFER_TYPE_LOCAL: + if ((fb->type == FRAMEBUFFER_TYPE_LOCAL) && (ffo & FFO_FB_LOCAL)) { LocalFB_completeTask(fb, taskDesc); - break; - case FRAMEBUFFER_TYPE_SPARSE: + } else if ((fb->type == FRAMEBUFFER_TYPE_SPARSE) && (ffo & FFO_FB_SPARSE)) { SparseFB_completeTask(fb, taskDesc); - break; - default: + } else { #ifndef OSPRAY_TARGET_SYCL fb->completeTask(fb, taskDesc); #endif - break; } } diff --git a/modules/cpu/fb/LocalFB.cpp b/modules/cpu/fb/LocalFB.cpp index ededd79b6..9c7a1616b 100644 --- a/modules/cpu/fb/LocalFB.cpp +++ b/modules/cpu/fb/LocalFB.cpp @@ -39,8 +39,12 @@ LocalFrameBuffer::LocalFrameBuffer(api::ISPCDevice &device, const vec2i &_size, ColorBufferFormat _colorBufferFormat, const uint32 channels) - : AddStructShared( - device.getIspcrtDevice(), device, _size, _colorBufferFormat, channels), + : AddStructShared(device.getIspcrtDevice(), + device, + _size, + _colorBufferFormat, + channels, + FFO_FB_LOCAL), numRenderTasks(divRoundUp(size, getRenderTaskSize())), taskErrorRegion(device.getIspcrtDevice(), hasVarianceBuffer ? getNumRenderTasks() : vec2i(0)) diff --git a/modules/cpu/fb/LocalFB.ih b/modules/cpu/fb/LocalFB.ih index f7f5fefa3..e7b80c773 100644 --- a/modules/cpu/fb/LocalFB.ih +++ b/modules/cpu/fb/LocalFB.ih @@ -1,6 +1,7 @@ // Copyright 2009-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +#include "common/FeatureFlags.ih" #include "fb/FrameBuffer.ih" #include "fb/Tile.ih" // c++ shared diff --git a/modules/cpu/fb/LocalFB.ispc b/modules/cpu/fb/LocalFB.ispc index 71e864aff..eda2f24e0 100644 --- a/modules/cpu/fb/LocalFB.ispc +++ b/modules/cpu/fb/LocalFB.ispc @@ -4,6 +4,7 @@ #include "FrameBuffer.ih" #include "RenderTaskDesc.ih" #include "Tile.ih" +#include "common/FeatureFlags.ih" #include "render/ScreenSample.ih" // c++ shared #include "LocalFBShared.h" diff --git a/modules/cpu/fb/SparseFB.cpp b/modules/cpu/fb/SparseFB.cpp index d321f2d9d..e98b674c7 100644 --- a/modules/cpu/fb/SparseFB.cpp +++ b/modules/cpu/fb/SparseFB.cpp @@ -21,8 +21,12 @@ SparseFrameBuffer::SparseFrameBuffer(api::ISPCDevice &device, const uint32 channels, const std::vector &_tileIDs, const bool overrideUseTaskAccumIDs) - : AddStructShared( - device.getIspcrtDevice(), device, _size, _colorBufferFormat, channels), + : AddStructShared(device.getIspcrtDevice(), + device, + _size, + _colorBufferFormat, + channels, + FFO_FB_SPARSE), useTaskAccumIDs((channels & OSP_FB_ACCUM) || overrideUseTaskAccumIDs), totalTiles(divRoundUp(size, vec2i(TILE_SIZE))) { @@ -42,8 +46,12 @@ SparseFrameBuffer::SparseFrameBuffer(api::ISPCDevice &device, ColorBufferFormat _colorBufferFormat, const uint32 channels, const bool overrideUseTaskAccumIDs) - : AddStructShared( - device.getIspcrtDevice(), device, _size, _colorBufferFormat, channels), + : AddStructShared(device.getIspcrtDevice(), + device, + _size, + _colorBufferFormat, + channels, + FFO_FB_SPARSE), useTaskAccumIDs((channels & OSP_FB_ACCUM) || overrideUseTaskAccumIDs), totalTiles(divRoundUp(size, vec2i(TILE_SIZE))) { diff --git a/modules/cpu/geometry/Boxes.cpp b/modules/cpu/geometry/Boxes.cpp index 663323498..a119d04f8 100644 --- a/modules/cpu/geometry/Boxes.cpp +++ b/modules/cpu/geometry/Boxes.cpp @@ -16,7 +16,8 @@ void Boxes_bounds(const RTCBoundsFunctionArguments *uniform args); namespace ospray { Boxes::Boxes(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared( + device.getIspcrtDevice(), device, FFG_BOX | FFG_USER_GEOMETRY) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.postIntersect = diff --git a/modules/cpu/geometry/Curves.cpp b/modules/cpu/geometry/Curves.cpp index 0a4ed0408..a046d1383 100644 --- a/modules/cpu/geometry/Curves.cpp +++ b/modules/cpu/geometry/Curves.cpp @@ -45,10 +45,31 @@ static std::map, RTCGeometryType> RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_CATMULL_ROM_CURVE}, {{OSP_DISJOINT, OSP_CATMULL_ROM}, (RTCGeometryType)-1}}; +static std::map curveFeatureFlags = { + {RTC_GEOMETRY_TYPE_CONE_LINEAR_CURVE, FFG_CONE_LINEAR_CURVE}, + {RTC_GEOMETRY_TYPE_ROUND_LINEAR_CURVE, FFG_ROUND_LINEAR_CURVE}, + {RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE, FFG_FLAT_LINEAR_CURVE}, + {RTC_GEOMETRY_TYPE_ROUND_BEZIER_CURVE, FFG_ROUND_BEZIER_CURVE}, + {RTC_GEOMETRY_TYPE_FLAT_BEZIER_CURVE, FFG_FLAT_BEZIER_CURVE}, + {RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BEZIER_CURVE, + FFG_NORMAL_ORIENTED_BEZIER_CURVE}, + {RTC_GEOMETRY_TYPE_ROUND_BSPLINE_CURVE, FFG_ROUND_BSPLINE_CURVE}, + {RTC_GEOMETRY_TYPE_FLAT_BSPLINE_CURVE, FFG_FLAT_BSPLINE_CURVE}, + {RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BSPLINE_CURVE, + FFG_NORMAL_ORIENTED_BSPLINE_CURVE}, + {RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE, FFG_ROUND_HERMITE_CURVE}, + {RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE, FFG_FLAT_HERMITE_CURVE}, + {RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_HERMITE_CURVE, + FFG_NORMAL_ORIENTED_HERMITE_CURVE}, + {RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE, FFG_ROUND_CATMULL_ROM_CURVE}, + {RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE, FFG_FLAT_CATMULL_ROM_CURVE}, + {RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_CATMULL_ROM_CURVE, + FFG_NORMAL_ORIENTED_CATMULL_ROM_CURVE}}; + // Curves definitions /////////////////////////////////////////////////////// Curves::Curves(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFG_NONE) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.postIntersect = @@ -150,6 +171,7 @@ void Curves::commit() } postCreationInfo(vertexData->size()); + featureFlags = curveFeatureFlags[embreeCurveType]; } size_t Curves::numPrimitives() const diff --git a/modules/cpu/geometry/GeometricModel.cpp b/modules/cpu/geometry/GeometricModel.cpp index e9901befb..abf3e372d 100644 --- a/modules/cpu/geometry/GeometricModel.cpp +++ b/modules/cpu/geometry/GeometricModel.cpp @@ -32,7 +32,11 @@ void GeometricModel::commit() getSh()->material = nullptr; getSh()->materialID = nullptr; getSh()->numMaterials = 0; + featureFlags = FFO_NONE; if (materialData) { + for (auto &&mat : materialData->as()) + featureFlags |= mat->getFeatureFlagsOther(); + materialArray = make_buffer_shared_unique( getISPCDevice().getIspcrtDevice(), createArrayOfSh(materialData->as())); diff --git a/modules/cpu/geometry/GeometricModel.h b/modules/cpu/geometry/GeometricModel.h index b3fe58769..377609aa1 100644 --- a/modules/cpu/geometry/GeometricModel.h +++ b/modules/cpu/geometry/GeometricModel.h @@ -34,6 +34,10 @@ struct OSPRAY_SDK_INTERFACE GeometricModel bool hasEmissiveMaterials( Ref> rendererMaterials) const; + FeatureFlagsGeometry getFeatureFlagsGeometry() const; + FeatureFlagsVolume getFeatureFlagsVolume() const; + FeatureFlagsOther getFeatureFlagsOther() const; + private: Ref geom; const Ref geomAPI; @@ -42,6 +46,8 @@ struct OSPRAY_SDK_INTERFACE GeometricModel Ref> indexData; std::unique_ptr> materialArray; std::unique_ptr> materialIDArray; + + FeatureFlagsOther featureFlags{FFO_NONE}; }; OSPTYPEFOR_SPECIALIZATION(GeometricModel *, OSP_GEOMETRIC_MODEL); @@ -63,4 +69,19 @@ inline bool GeometricModel::invertedNormals() const return getSh()->invertedNormals; } +inline FeatureFlagsGeometry GeometricModel::getFeatureFlagsGeometry() const +{ + return geom->getFeatureFlagsGeometry(); +} + +inline FeatureFlagsVolume GeometricModel::getFeatureFlagsVolume() const +{ + return FFV_NONE; +} + +inline FeatureFlagsOther GeometricModel::getFeatureFlagsOther() const +{ + return featureFlags; +} + } // namespace ospray diff --git a/modules/cpu/geometry/GeometricModel.ih b/modules/cpu/geometry/GeometricModel.ih index 7a5bed600..7606a2ce0 100644 --- a/modules/cpu/geometry/GeometricModel.ih +++ b/modules/cpu/geometry/GeometricModel.ih @@ -6,6 +6,7 @@ #include "Geometry.ih" #include "GeometryDispatch.ih" #include "common/Data.ih" +#include "common/FeatureFlagsEnum.h" #include "render/Renderer.ih" #include "rkcommon/math/AffineSpace.ih" // c++ shared @@ -47,11 +48,12 @@ inline void GeometricModel_postIntersect(const GeometricModel *uniform self, const Renderer *uniform renderer, varying DifferentialGeometry &dg, const varying Ray &ray, - uniform int64 flags) + uniform int64 flags, + const uniform FeatureFlags &ff) { Geometry *uniform geom = self->geom; - Geometry_dispatch_postIntersect(geom, dg, ray, flags); + Geometry_dispatch_postIntersect(geom, dg, ray, flags, ff.geometry); dg.areaPDF = self->areaPDF; dg.objID = diff --git a/modules/cpu/geometry/Geometry.cpp b/modules/cpu/geometry/Geometry.cpp index 1a86d9d0b..693453455 100644 --- a/modules/cpu/geometry/Geometry.cpp +++ b/modules/cpu/geometry/Geometry.cpp @@ -12,8 +12,8 @@ namespace ospray { // Geometry definitions /////////////////////////////////////////////////////// -Geometry::Geometry(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) +Geometry::Geometry(api::ISPCDevice &device, const FeatureFlagsGeometry ffg) + : AddStructShared(device.getIspcrtDevice(), device), featureFlags(ffg) { managedObjectType = OSP_GEOMETRY; } diff --git a/modules/cpu/geometry/Geometry.h b/modules/cpu/geometry/Geometry.h index aee1affcc..914eb0d1e 100644 --- a/modules/cpu/geometry/Geometry.h +++ b/modules/cpu/geometry/Geometry.h @@ -4,6 +4,7 @@ #pragma once #include "common/Data.h" +#include "common/FeatureFlagsEnum.h" #include "common/ObjectFactory.h" // embree #include "common/Embree.h" @@ -16,7 +17,7 @@ struct OSPRAY_SDK_INTERFACE Geometry : public AddStructShared, public ObjectFactory { - Geometry(api::ISPCDevice &device); + Geometry(api::ISPCDevice &device, const FeatureFlagsGeometry ffg); virtual ~Geometry() override; virtual std::string toString() const override; @@ -29,9 +30,13 @@ struct OSPRAY_SDK_INTERFACE Geometry bool supportAreaLighting() const; + FeatureFlagsGeometry getFeatureFlagsGeometry() const; + protected: RTCGeometry embreeGeometry{nullptr}; + FeatureFlagsGeometry featureFlags; + void createEmbreeGeometry(RTCGeometryType type); // NOTE: We now pass intersection functions through Embree RTCIntersectionArgs // context parameter so that they can be inlined in SYCL @@ -50,6 +55,11 @@ inline bool Geometry::supportAreaLighting() const return (getSh()->sampleArea != nullptr) && (getSh()->getAreas != nullptr); } +inline FeatureFlagsGeometry Geometry::getFeatureFlagsGeometry() const +{ + return featureFlags; +} + // convenience wrappers to set Embree buffer ////////////////////////////////// template diff --git a/modules/cpu/geometry/GeometryDispatch.ih b/modules/cpu/geometry/GeometryDispatch.ih index 42b10fbc8..0e56b62be 100644 --- a/modules/cpu/geometry/GeometryDispatch.ih +++ b/modules/cpu/geometry/GeometryDispatch.ih @@ -3,7 +3,7 @@ #pragma once -#include "common/OSPCommon.ih" +#include "common/FeatureFlags.ih" OSPRAY_BEGIN_ISPC_NAMESPACE @@ -18,14 +18,14 @@ OSPRAY_BEGIN_ISPC_NAMESPACE // (DG_NORMALIZE/DG_FACEFORWARD) after Geometry_postIntersectFct is called. // Thus the material pointer only needs to be set if different to // geometry->material, or the color when different to vec4f(1.0f). -SYCL_EXTERNAL __noinline void Geometry_dispatch_postIntersect( - const Geometry *uniform self, +SYCL_EXTERNAL void Geometry_dispatch_postIntersect(const Geometry *uniform self, varying DifferentialGeometry &dg, const varying Ray &ray, - uniform int64 flags); + uniform int64 flags, + const uniform FeatureFlagsGeometry ffg); // sample the given primitive uniformly wrt. area -SYCL_EXTERNAL __noinline SampleAreaRes Geometry_dispatch_sampleArea( +SYCL_EXTERNAL SampleAreaRes Geometry_dispatch_sampleArea( const Geometry *const uniform, const int32 primID, const uniform affine3f &xfm, // instance transformation (obj2world) diff --git a/modules/cpu/geometry/GeometryDispatch.ispc b/modules/cpu/geometry/GeometryDispatch.ispc index 432de1ab5..0e3f18060 100644 --- a/modules/cpu/geometry/GeometryDispatch.ispc +++ b/modules/cpu/geometry/GeometryDispatch.ispc @@ -3,6 +3,7 @@ #include "common/DifferentialGeometry.ih" #include "common/Ray.ih" +#include "common/RayQueryContext.ih" #include "geometry/Boxes.ih" #include "geometry/Curves.ih" #include "geometry/GeometryDispatch.ih" @@ -18,51 +19,44 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -SYCL_EXTERNAL __noinline void Geometry_dispatch_postIntersect( - const Geometry *uniform self, +SYCL_EXTERNAL void Geometry_dispatch_postIntersect(const Geometry *uniform self, varying DifferentialGeometry &dg, const varying Ray &ray, - uniform int64 flags) + uniform int64 flags, + const uniform FeatureFlagsGeometry ffg) { - switch (self->type) { - case GEOMETRY_TYPE_QUAD_MESH: + if ((self->type == GEOMETRY_TYPE_QUAD_MESH) && (ffg & FFG_QUAD)) { QuadMesh_postIntersect(self, dg, ray, flags); - break; - case GEOMETRY_TYPE_TRIANGLE_MESH: + } else if ((self->type == GEOMETRY_TYPE_TRIANGLE_MESH) + && (ffg & FFG_TRIANGLE)) { TriangleMesh_postIntersect(self, dg, ray, flags); - break; - case GEOMETRY_TYPE_BOXES: + } else if ((self->type == GEOMETRY_TYPE_BOXES) && (ffg & FFG_BOX)) { Boxes_postIntersect(self, dg, ray, flags); - break; - case GEOMETRY_TYPE_SPHERES: + } else if ((self->type == GEOMETRY_TYPE_SPHERES) && (ffg & FFG_SPHERE)) { Spheres_postIntersect(self, dg, ray, flags); - break; - case GEOMETRY_TYPE_PLANES: + } else if ((self->type == GEOMETRY_TYPE_PLANES) && (ffg & FFG_PLANE)) { Planes_postIntersect(self, dg, ray, flags); - break; - case GEOMETRY_TYPE_CURVES: + } else if ((self->type == GEOMETRY_TYPE_CURVES) && (ffg & FFG_CURVES)) { Curves_postIntersect(self, dg, ray, flags); - break; #ifdef OSPRAY_ENABLE_VOLUMES - case GEOMETRY_TYPE_ISOSURFACES: + } else if ((self->type == GEOMETRY_TYPE_ISOSURFACES) + && (ffg & FFG_ISOSURFACE)) { Isosurfaces_postIntersect(self, dg, ray, flags); - break; #endif #ifndef OSPRAY_TARGET_SYCL - // Subdivision surfaces not supported on the GPU right now - case GEOMETRY_TYPE_SUBDIVISION: + } else if ((self->type == GEOMETRY_TYPE_SUBDIVISION) + && (ffg & FFG_SUBDIVISION)) { Subdivision_postIntersect(self, dg, ray, flags); - break; -#endif - default: -#ifndef OSPRAY_TARGET_SYCL + } else { self->postIntersect(self, dg, ray, flags); -#endif - break; } +#else + } else { + } +#endif } -SYCL_EXTERNAL __noinline SampleAreaRes Geometry_dispatch_sampleArea( +SYCL_EXTERNAL SampleAreaRes Geometry_dispatch_sampleArea( const Geometry *const uniform self, const int32 primID, const uniform affine3f &xfm, @@ -94,62 +88,49 @@ SYCL_EXTERNAL __noinline SampleAreaRes Geometry_dispatch_sampleArea( RTC_SYCL_INDIRECTLY_CALLABLE unmasked void Geometry_dispatch_intersect( RTCIntersectFunctionNArguments *uniform args) { + RayQueryContextDefault *uniform ctx = + (RayQueryContextDefault * uniform) args->context; + const uniform FeatureFlagsGeometry ffg = ctx->ffg; Geometry *uniform geom = (Geometry * uniform) args->geometryUserPtr; - switch (geom->type) { - case GEOMETRY_TYPE_BOXES: + if ((geom->type == GEOMETRY_TYPE_BOXES) && (ffg & FFG_BOX)) { Boxes_intersect_kernel(args, false); - break; - case GEOMETRY_TYPE_PLANES: + } else if ((geom->type == GEOMETRY_TYPE_PLANES) && (ffg & FFG_PLANE)) { Planes_intersect_kernel(args, false); - break; -#if 0 - case GEOMETRY_TYPE_SPHERES: - Spheres_intersect_kernel(args, false); - break; -#endif #ifdef OSPRAY_ENABLE_VOLUMES - case GEOMETRY_TYPE_ISOSURFACES: + } else if ((geom->type == GEOMETRY_TYPE_ISOSURFACES) + && (ffg & FFG_ISOSURFACE)) { Isosurfaces_intersect_kernel(args, false); - break; #endif - default: + } else { #ifndef OSPRAY_TARGET_SYCL geom->intersect((RTCIntersectFunctionNArguments * uniform) args, false); #endif - break; } } RTC_SYCL_INDIRECTLY_CALLABLE unmasked void Geometry_dispatch_occluded( RTCOccludedFunctionNArguments *uniform args) { + RayQueryContextDefault *uniform ctx = + (RayQueryContextDefault * uniform) args->context; + const uniform FeatureFlagsGeometry ffg = ctx->ffg; Geometry *uniform geom = (Geometry * uniform) args->geometryUserPtr; - switch (geom->type) { - case GEOMETRY_TYPE_BOXES: + if ((geom->type == GEOMETRY_TYPE_BOXES) && (ffg & FFG_BOX)) { Boxes_intersect_kernel( (RTCIntersectFunctionNArguments * uniform) args, true); - break; - case GEOMETRY_TYPE_PLANES: + } else if ((geom->type == GEOMETRY_TYPE_PLANES) && (ffg & FFG_PLANE)) { Planes_intersect_kernel( (RTCIntersectFunctionNArguments * uniform) args, true); - break; -#if 0 - case GEOMETRY_TYPE_SPHERES: - Spheres_intersect_kernel( - (RTCIntersectFunctionNArguments * uniform) args, true); - break; -#endif #ifdef OSPRAY_ENABLE_VOLUMES - case GEOMETRY_TYPE_ISOSURFACES: + } else if ((geom->type == GEOMETRY_TYPE_ISOSURFACES) + && (ffg & FFG_ISOSURFACE)) { Isosurfaces_intersect_kernel( (RTCIntersectFunctionNArguments * uniform) args, true); - break; #endif - default: + } else { #ifndef OSPRAY_TARGET_SYCL geom->intersect((RTCIntersectFunctionNArguments * uniform) args, true); #endif - break; } } diff --git a/modules/cpu/geometry/Isosurfaces.cpp b/modules/cpu/geometry/Isosurfaces.cpp index 324cf2dd5..e0a01f032 100644 --- a/modules/cpu/geometry/Isosurfaces.cpp +++ b/modules/cpu/geometry/Isosurfaces.cpp @@ -23,7 +23,8 @@ void Isosurfaces_bounds(const RTCBoundsFunctionArguments *uniform args); namespace ospray { Isosurfaces::Isosurfaces(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared( + device.getIspcrtDevice(), device, FFG_ISOSURFACE | FFG_USER_GEOMETRY) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.postIntersect = ispc::Isosurfaces_postIntersect_addr(); diff --git a/modules/cpu/geometry/Mesh.cpp b/modules/cpu/geometry/Mesh.cpp index bea9a1aa6..bfa419c45 100644 --- a/modules/cpu/geometry/Mesh.cpp +++ b/modules/cpu/geometry/Mesh.cpp @@ -21,7 +21,7 @@ void *Mesh_getAreas_addr(); namespace ospray { Mesh::Mesh(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFG_NONE) { getSh()->super.getAreas = reinterpret_cast(ispc::Mesh_getAreas_addr()); @@ -170,6 +170,7 @@ void Mesh::commit() getSh()->flagMask &= ispc::int64(~DG_TEXCOORD); postCreationInfo(vertexData->size()); + featureFlags = isTri ? FFG_TRIANGLE : FFG_QUAD; } size_t Mesh::numPrimitives() const diff --git a/modules/cpu/geometry/Planes.cpp b/modules/cpu/geometry/Planes.cpp index 510542424..95d3c273d 100644 --- a/modules/cpu/geometry/Planes.cpp +++ b/modules/cpu/geometry/Planes.cpp @@ -16,7 +16,8 @@ void Planes_bounds(const RTCBoundsFunctionArguments *args); namespace ospray { Planes::Planes(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared( + device.getIspcrtDevice(), device, FFG_PLANE | FFG_USER_GEOMETRY) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.postIntersect = diff --git a/modules/cpu/geometry/Spheres.cpp b/modules/cpu/geometry/Spheres.cpp index 92f1a8a83..ca6fee7a5 100644 --- a/modules/cpu/geometry/Spheres.cpp +++ b/modules/cpu/geometry/Spheres.cpp @@ -18,7 +18,7 @@ void *Spheres_getAreas_addr(); namespace ospray { Spheres::Spheres(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFG_SPHERE) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.postIntersect = diff --git a/modules/cpu/geometry/Subdivision.cpp b/modules/cpu/geometry/Subdivision.cpp index b70d75b1e..226f5032d 100644 --- a/modules/cpu/geometry/Subdivision.cpp +++ b/modules/cpu/geometry/Subdivision.cpp @@ -16,7 +16,7 @@ void *Subdivision_postIntersect_addr(); namespace ospray { Subdivision::Subdivision(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFG_SUBDIVISION) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.postIntersect = diff --git a/modules/cpu/ispc_symbols.txt b/modules/cpu/ispc_symbols.txt index 347511be3..65bf818a4 100644 --- a/modules/cpu/ispc_symbols.txt +++ b/modules/cpu/ispc_symbols.txt @@ -2,20 +2,20 @@ Distribution2D_pdf___un_3C_s_5B__c_unDistribution2D_5D__3E_REFs_5B__c_vyvec2f_5D Distribution2D_sample___un_3C_s_5B__c_unDistribution2D_5D__3E_REFs_5B__c_vyvec2f_5D__, FrameBuffer_runPixelOps___un_3C_s_5B_unFrameBuffer_5D__3E_REFs_5B__c_vyvec4f_5D_CvyfREFs_5B__c_vyvec3f_5D_REFs_5B__c_vyvec3f_5D__, Light_eval___un_3C_s_5B__c_unLight_5D__3E_REFs_5B__c_vyDifferentialGeometry_5D_REFs_5B__c_vyvec3f_5D_CvyfCvyfCvyf_, -Renderer_getBackground___un_3C_s_5B__c_unRenderer_5D__3E_REFs_5B__c_vyvec2f_5D__, -Renderer_getMaxDepth___un_3C_s_5B__c_unRenderer_5D__3E_REFs_5B__c_vyvec2f_5D__, +Renderer_getBackground___un_3C_s_5B__c_unRenderer_5D__3E_REFs_5B__c_vyvec2f_5D_Cunenum_5B_FeatureFlagsOther_5D__, +Renderer_getMaxDepth___un_3C_s_5B__c_unRenderer_5D__3E_REFs_5B__c_vyvec2f_5D_Cunenum_5B_FeatureFlagsOther_5D__, clippingIntersectionFilterV___UM_un_3C_s_5B__c_unRTCFilterFunctionNArguments_5D__3E__, delete_uniform___un_3C_unv_3E__, delete_uniform_, get_zorder____, Geometry_dispatch_intersect___UM_un_3C_s_5B_unRTCIntersectFunctionNArguments_5D__3E__, Geometry_dispatch_occluded___UM_un_3C_s_5B_unRTCOccludedFunctionNArguments_5D__3E__, -Geometry_dispatch_postIntersect___un_3C_s_5B__c_unGeometry_5D__3E_REFs_5B_vyDifferentialGeometry_5D_REFs_5B__c_vyRay_5D_unI_, +Geometry_dispatch_postIntersect___un_3C_s_5B__c_unGeometry_5D__3E_REFs_5B_vyDifferentialGeometry_5D_REFs_5B__c_vyRay_5D_unICunenum_5B_FeatureFlagsGeometry_5D__, Texture_dispatch_get___un_3C_s_5B__c_unTexture_5D__3E_REFs_5B__c_vyDifferentialGeometry_5D__, Volume_intersect_kernel___UM_un_3C_s_5B_unRTCIntersectFunctionNArguments_5D__3E__, -FrameBuffer_dispatch_getRenderTaskDesc___un_3C_s_5B_unFrameBuffer_5D__3E_Cunu_, -FrameBuffer_dispatch_accumulateSample___un_3C_s_5B_unFrameBuffer_5D__3E_REFs_5B__c_vyScreenSample_5D_REFs_5B_unRenderTaskDesc_5D__, -FrameBuffer_dispatch_completeTask___un_3C_s_5B_unFrameBuffer_5D__3E_REFs_5B__c_unRenderTaskDesc_5D__, -Camera_dispatch_initRay___un_3C_s_5B__c_unCamera_5D__3E_REFs_5B_vyRay_5D_REFs_5B__c_vyCameraSample_5D__, +FrameBuffer_dispatch_getRenderTaskDesc___un_3C_s_5B_unFrameBuffer_5D__3E_CunuCunenum_5B_FeatureFlagsOther_5D__, +FrameBuffer_dispatch_accumulateSample___un_3C_s_5B_unFrameBuffer_5D__3E_REFs_5B__c_vyScreenSample_5D_REFs_5B_unRenderTaskDesc_5D_Cunenum_5B_FeatureFlagsOther_5D__, +FrameBuffer_dispatch_completeTask___un_3C_s_5B_unFrameBuffer_5D__3E_REFs_5B__c_unRenderTaskDesc_5D_Cunenum_5B_FeatureFlagsOther_5D__, +Camera_dispatch_initRay___un_3C_s_5B__c_unCamera_5D__3E_REFs_5B_vyRay_5D_REFs_5B__c_vyCameraSample_5D_Cunenum_5B_FeatureFlagsOther_5D__, LinearTransferFunction_get___un_3C_s_5B__c_unTransferFunction_5D__3E_vyf_, TransferFunction_dispatch_get___un_3C_s_5B__c_unTransferFunction_5D__3E_vyf_, diff --git a/modules/cpu/lights/AmbientLight.h b/modules/cpu/lights/AmbientLight.h index 0fe8cd20d..b63021df2 100644 --- a/modules/cpu/lights/AmbientLight.h +++ b/modules/cpu/lights/AmbientLight.h @@ -10,7 +10,7 @@ namespace ospray { //! an AmbientLight is a constant light that is present everywhere struct OSPRAY_SDK_INTERFACE AmbientLight : public Light { - AmbientLight(api::ISPCDevice &device) : Light(device) {} + AmbientLight(api::ISPCDevice &device) : Light(device, FFO_LIGHT_AMBIENT) {} virtual ~AmbientLight() override = default; virtual ISPCRTMemoryView createSh( uint32_t, const ispc::Instance *instance = nullptr) const override; diff --git a/modules/cpu/lights/CylinderLight.h b/modules/cpu/lights/CylinderLight.h index 6a540bb8b..64e43b7c6 100644 --- a/modules/cpu/lights/CylinderLight.h +++ b/modules/cpu/lights/CylinderLight.h @@ -11,7 +11,7 @@ namespace ospray { * cylindrical area into outward space */ struct OSPRAY_SDK_INTERFACE CylinderLight : public Light { - CylinderLight(api::ISPCDevice &device) : Light(device) {} + CylinderLight(api::ISPCDevice &device) : Light(device, FFO_LIGHT_CYLINDER) {} virtual ~CylinderLight() override = default; virtual ISPCRTMemoryView createSh( uint32_t, const ispc::Instance *instance = nullptr) const override; diff --git a/modules/cpu/lights/DirectionalLight.h b/modules/cpu/lights/DirectionalLight.h index 95def233f..3048eacba 100644 --- a/modules/cpu/lights/DirectionalLight.h +++ b/modules/cpu/lights/DirectionalLight.h @@ -11,7 +11,9 @@ namespace ospray { * thus projects parallel rays of light across the entire scene */ struct OSPRAY_SDK_INTERFACE DirectionalLight : public Light { - DirectionalLight(api::ISPCDevice &device) : Light(device) {} + DirectionalLight(api::ISPCDevice &device) + : Light(device, FFO_LIGHT_DIRECTIONAL) + {} virtual ~DirectionalLight() override = default; virtual ISPCRTMemoryView createSh( uint32_t, const ispc::Instance *instance = nullptr) const override; diff --git a/modules/cpu/lights/HDRILight.h b/modules/cpu/lights/HDRILight.h index 604f24d6b..7b2dcd701 100644 --- a/modules/cpu/lights/HDRILight.h +++ b/modules/cpu/lights/HDRILight.h @@ -13,7 +13,7 @@ namespace ospray { * cone of directions bounded by halfAngle */ struct OSPRAY_SDK_INTERFACE HDRILight : public Light { - HDRILight(api::ISPCDevice &device) : Light(device) {} + HDRILight(api::ISPCDevice &device) : Light(device, FFO_LIGHT_HDRI) {} virtual ISPCRTMemoryView createSh( uint32_t, const ispc::Instance *instance = nullptr) const override; virtual std::string toString() const override; diff --git a/modules/cpu/lights/Light.cpp b/modules/cpu/lights/Light.cpp index 99b373920..5564fb0ca 100644 --- a/modules/cpu/lights/Light.cpp +++ b/modules/cpu/lights/Light.cpp @@ -11,7 +11,8 @@ namespace ospray { // Light definitions ////////////////////////////////////////////////////////// -Light::Light(api::ISPCDevice &device) : ISPCDeviceObject(device) +Light::Light(api::ISPCDevice &device, const FeatureFlagsOther ffo) + : ISPCDeviceObject(device), featureFlags(ffo) { managedObjectType = OSP_LIGHT; } diff --git a/modules/cpu/lights/Light.h b/modules/cpu/lights/Light.h index 3dbbd357f..7c71af557 100644 --- a/modules/cpu/lights/Light.h +++ b/modules/cpu/lights/Light.h @@ -4,6 +4,7 @@ #pragma once #include "ISPCDeviceObject.h" +#include "common/FeatureFlagsEnum.h" #include "common/ObjectFactory.h" #include "ispcrt.h" @@ -19,7 +20,7 @@ struct OSPRAY_SDK_INTERFACE Light : public ISPCDeviceObject, public ObjectFactory { - Light(api::ISPCDevice &device); + Light(api::ISPCDevice &device, const FeatureFlagsOther ffo); virtual ~Light() override = default; virtual uint32_t getShCount() const; @@ -32,7 +33,11 @@ struct OSPRAY_SDK_INTERFACE Light vec3f coloredIntensity{1.0f, 1.0f, 1.0f}; OSPIntensityQuantity intensityQuantity = OSP_INTENSITY_QUANTITY_UNKNOWN; + FeatureFlagsOther getFeatureFlagsOther() const; + protected: + FeatureFlagsOther featureFlags; + void queryIntensityQuantityType(const OSPIntensityQuantity &defaultIQ); }; @@ -45,4 +50,9 @@ inline uint32_t Light::getShCount() const return 1; } +inline FeatureFlagsOther Light::getFeatureFlagsOther() const +{ + return featureFlags; +} + } // namespace ospray diff --git a/modules/cpu/lights/LightDispatch.ih b/modules/cpu/lights/LightDispatch.ih index a8b8be415..80e8e7319 100644 --- a/modules/cpu/lights/LightDispatch.ih +++ b/modules/cpu/lights/LightDispatch.ih @@ -3,7 +3,8 @@ #pragma once -#include "common/OSPCommon.ih" +#include "common/FeatureFlags.ih" +#include "rkcommon/math/vec.ih" // c++ shared #include "LightShared.h" @@ -17,20 +18,20 @@ struct DifferentialGeometry; // by convention, giving (0, 0) as "random" numbers should sample the "center" // of the light source (used by the raytracing renderers such as the SciVis // renderer) -SYCL_EXTERNAL __noinline Light_SampleRes Light_dispatch_sample( - const Light *uniform self, +SYCL_EXTERNAL Light_SampleRes Light_dispatch_sample(const Light *uniform self, const DifferentialGeometry &dg, // point (&normal) to generate the sample const vec2f &s, // random numbers to generate the sample - const float time); // generate the sample at time (motion blur) + const float time, // generate the sample at time (motion blur) + const uniform FeatureFlagsOther ffo); //! compute the radiance and pdf caused by the light source (pointed to by the //! given direction up until maxDist) -SYCL_EXTERNAL __noinline Light_EvalRes Light_dispatch_eval( - const Light *uniform self, +SYCL_EXTERNAL Light_EvalRes Light_dispatch_eval(const Light *uniform self, const DifferentialGeometry &dg, // point to evaluate illumination for const vec3f &dir, // direction towards the light source, normalized const float minDist, // minimum distance to look for light contribution const float maxDist, // maximum distance to look for light contribution - const float time); // evaluate at time (motion blur) + const float time, // evaluate at time (motion blur) + const uniform FeatureFlagsOther ffo); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/lights/LightDispatch.ispc b/modules/cpu/lights/LightDispatch.ispc index 53cf68e10..cba709815 100644 --- a/modules/cpu/lights/LightDispatch.ispc +++ b/modules/cpu/lights/LightDispatch.ispc @@ -5,7 +5,6 @@ #include "lights/LightDispatch.ih" #include "lights/LightShared.h" -#include "lights/QuadLight.ih" #include "lights/AmbientLight.ih" #include "lights/CylinderLight.ih" #include "lights/DirectionalLight.ih" @@ -17,33 +16,35 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -SYCL_EXTERNAL __noinline Light_SampleRes Light_dispatch_sample( - const Light *uniform self, +SYCL_EXTERNAL Light_SampleRes Light_dispatch_sample(const Light *uniform self, const DifferentialGeometry &dg, const vec2f &s, - const float time) + const float time, + const uniform FeatureFlagsOther ffo) { #ifndef OSPRAY_TARGET_SYCL return self->sample(self, dg, s, time); #else - switch (self->type) { - case LIGHT_TYPE_QUAD: - return QuadLight_sample(self, dg, s, time); - case LIGHT_TYPE_CYLINDER: - return CylinderLight_sample(self, dg, s, time); - case LIGHT_TYPE_DIRECTIONAL: + if ((self->type == LIGHT_TYPE_AMBIENT) && (ffo & FFO_LIGHT_AMBIENT)) { + return AmbientLight_sample(self, dg, s, time); + } else if ((self->type == LIGHT_TYPE_DIRECTIONAL) + && (ffo & FFO_LIGHT_DIRECTIONAL)) { return DirectionalLight_sample(self, dg, s, time); - case LIGHT_TYPE_HDRI: - return HDRILight_sample(self, dg, s, time); - case LIGHT_TYPE_POINT: + } else if ((self->type == LIGHT_TYPE_POINT) && (ffo & FFO_LIGHT_POINT)) { return PointLight_sample(self, dg, s, time); - case LIGHT_TYPE_SPOT: + } else if ((self->type == LIGHT_TYPE_SPOT) && (ffo & FFO_LIGHT_SPOT)) { return SpotLight_sample(self, dg, s, time); - case LIGHT_TYPE_AMBIENT: - return AmbientLight_sample(self, dg, s, time); - case LIGHT_TYPE_GEOMETRY: + } else if ((self->type == LIGHT_TYPE_HDRI) && (ffo & FFO_LIGHT_HDRI)) { + return HDRILight_sample(self, dg, s, time); + } else if ((self->type == LIGHT_TYPE_QUAD) && (ffo & FFO_LIGHT_QUAD)) { + return QuadLight_sample(self, dg, s, time); + } else if ((self->type == LIGHT_TYPE_CYLINDER) + && (ffo & FFO_LIGHT_CYLINDER)) { + return CylinderLight_sample(self, dg, s, time); + } else if ((self->type == LIGHT_TYPE_GEOMETRY) + && (ffo & FFO_LIGHT_GEOMETRY)) { return GeometryLight_sample(self, dg, s, time); - default: + } else { Light_SampleRes res; res.weight = make_vec3f(0.f); res.dir = make_vec3f(0.f); @@ -54,33 +55,34 @@ SYCL_EXTERNAL __noinline Light_SampleRes Light_dispatch_sample( #endif } -SYCL_EXTERNAL __noinline Light_EvalRes Light_dispatch_eval( - const Light *uniform self, +SYCL_EXTERNAL Light_EvalRes Light_dispatch_eval(const Light *uniform self, const DifferentialGeometry &dg, const vec3f &dir, const float minDist, const float maxDist, - const float time) + const float time, + const uniform FeatureFlagsOther ffo) { #ifndef OSPRAY_TARGET_SYCL return self->eval(self, dg, dir, minDist, maxDist, time); #else - switch (self->type) { - case LIGHT_TYPE_QUAD: - return QuadLight_eval(self, dg, dir, minDist, maxDist, time); - case LIGHT_TYPE_CYLINDER: - return CylinderLight_eval(self, dg, dir, minDist, maxDist, time); - case LIGHT_TYPE_DIRECTIONAL: + if ((self->type == LIGHT_TYPE_AMBIENT) && (ffo & FFO_LIGHT_AMBIENT)) { + return AmbientLight_eval(self, dg, dir, minDist, maxDist, time); + } else if ((self->type == LIGHT_TYPE_DIRECTIONAL) + && (ffo & FFO_LIGHT_DIRECTIONAL)) { return DirectionalLight_eval(self, dg, dir, minDist, maxDist, time); - case LIGHT_TYPE_HDRI: - return HDRILight_eval(self, dg, dir, minDist, maxDist, time); - case LIGHT_TYPE_POINT: + } else if ((self->type == LIGHT_TYPE_POINT) && (ffo & FFO_LIGHT_POINT)) { return PointLight_eval(self, dg, dir, minDist, maxDist, time); - case LIGHT_TYPE_SPOT: + } else if ((self->type == LIGHT_TYPE_SPOT) && (ffo & FFO_LIGHT_SPOT)) { return SpotLight_eval(self, dg, dir, minDist, maxDist, time); - case LIGHT_TYPE_AMBIENT: - return AmbientLight_eval(self, dg, dir, minDist, maxDist, time); - default: + } else if ((self->type == LIGHT_TYPE_HDRI) && (ffo & FFO_LIGHT_HDRI)) { + return HDRILight_eval(self, dg, dir, minDist, maxDist, time); + } else if ((self->type == LIGHT_TYPE_QUAD) && (ffo & FFO_LIGHT_QUAD)) { + return QuadLight_eval(self, dg, dir, minDist, maxDist, time); + } else if ((self->type == LIGHT_TYPE_CYLINDER) + && (ffo & FFO_LIGHT_CYLINDER)) { + return CylinderLight_eval(self, dg, dir, minDist, maxDist, time); + } else { Light_EvalRes res; res.radiance = make_vec3f(0.f); res.pdf = 0.f; diff --git a/modules/cpu/lights/PointLight.h b/modules/cpu/lights/PointLight.h index 69836ff51..c2905342a 100644 --- a/modules/cpu/lights/PointLight.h +++ b/modules/cpu/lights/PointLight.h @@ -12,7 +12,7 @@ namespace ospray { //! directions struct OSPRAY_SDK_INTERFACE PointLight : public Light { - PointLight(api::ISPCDevice &device) : Light(device) {} + PointLight(api::ISPCDevice &device) : Light(device, FFO_LIGHT_POINT) {} virtual ~PointLight() override = default; virtual ISPCRTMemoryView createSh( uint32_t, const ispc::Instance *instance = nullptr) const override; diff --git a/modules/cpu/lights/QuadLight.h b/modules/cpu/lights/QuadLight.h index 015295912..d12e14ad7 100644 --- a/modules/cpu/lights/QuadLight.h +++ b/modules/cpu/lights/QuadLight.h @@ -12,7 +12,7 @@ namespace ospray { * area into the positive half space */ struct OSPRAY_SDK_INTERFACE QuadLight : public Light { - QuadLight(api::ISPCDevice &device) : Light(device) {} + QuadLight(api::ISPCDevice &device) : Light(device, FFO_LIGHT_QUAD) {} virtual ~QuadLight() override = default; virtual ISPCRTMemoryView createSh( uint32_t, const ispc::Instance *instance = nullptr) const override; diff --git a/modules/cpu/lights/SpotLight.h b/modules/cpu/lights/SpotLight.h index 16f60c439..67ceb0bce 100644 --- a/modules/cpu/lights/SpotLight.h +++ b/modules/cpu/lights/SpotLight.h @@ -12,7 +12,7 @@ namespace ospray { * cone of directions bounded by halfAngle */ struct OSPRAY_SDK_INTERFACE SpotLight : public Light { - SpotLight(api::ISPCDevice &device) : Light(device) {} + SpotLight(api::ISPCDevice &device) : Light(device, FFO_LIGHT_SPOT) {} virtual ~SpotLight() override = default; virtual ISPCRTMemoryView createSh( uint32_t, const ispc::Instance *instance = nullptr) const override; diff --git a/modules/cpu/lights/SunSkyLight.cpp b/modules/cpu/lights/SunSkyLight.cpp index 1e181f15a..3495b2b46 100644 --- a/modules/cpu/lights/SunSkyLight.cpp +++ b/modules/cpu/lights/SunSkyLight.cpp @@ -18,7 +18,8 @@ void HDRILight_initDistribution(const void *map, void *distribution); namespace ospray { -SunSkyLight::SunSkyLight(api::ISPCDevice &device) : Light(device) +SunSkyLight::SunSkyLight(api::ISPCDevice &device) + : Light(device, FFO_LIGHT_HDRI | FFO_LIGHT_DIRECTIONAL) { static const int skyResolution = 512; this->skySize = vec2i(skyResolution, skyResolution / 2); diff --git a/modules/cpu/pf/PixelFilterDispatch.ih b/modules/cpu/pf/PixelFilterDispatch.ih index d06ead273..30075c484 100644 --- a/modules/cpu/pf/PixelFilterDispatch.ih +++ b/modules/cpu/pf/PixelFilterDispatch.ih @@ -21,7 +21,7 @@ struct PixelFilter; Returns a 2D position in the domain [-w/2 .. w/2] distributed proportionally to the filter kernel. */ -SYCL_EXTERNAL __noinline vec2f PixelFilter_dispatch_sample( +SYCL_EXTERNAL vec2f PixelFilter_dispatch_sample( const PixelFilter *uniform self, const vec2f &x); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/pf/PixelFilterDispatch.ispc b/modules/cpu/pf/PixelFilterDispatch.ispc index 56e15c0e8..fa9202900 100644 --- a/modules/cpu/pf/PixelFilterDispatch.ispc +++ b/modules/cpu/pf/PixelFilterDispatch.ispc @@ -7,7 +7,7 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -SYCL_EXTERNAL __noinline vec2f PixelFilter_dispatch_sample( +SYCL_EXTERNAL vec2f PixelFilter_dispatch_sample( const PixelFilter *uniform self, const vec2f &x) { switch (self->type) { diff --git a/modules/cpu/render/Material.cpp b/modules/cpu/render/Material.cpp index fc22da0e8..2b677a6ee 100644 --- a/modules/cpu/render/Material.cpp +++ b/modules/cpu/render/Material.cpp @@ -16,8 +16,8 @@ namespace ospray { Ref Material::microfacetAlbedoTables = nullptr; -Material::Material(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) +Material::Material(api::ISPCDevice &device, const FeatureFlagsOther ffo) + : AddStructShared(device.getIspcrtDevice(), device), featureFlags(ffo) { managedObjectType = OSP_MATERIAL; #ifndef OSPRAY_TARGET_SYCL @@ -66,6 +66,8 @@ ispc::TextureParam Material::getTextureParam(const char *texture_name) { // Get texture pointer Texture *ptr = (Texture *)getParamObject(texture_name); + if (ptr) + featureFlags |= FFO_TEXTURE_IN_MATERIAL; // Get 2D transformation if exists int transformFlags = ispc::TRANSFORM_FLAG_NONE; diff --git a/modules/cpu/render/Material.h b/modules/cpu/render/Material.h index 17ba32f0d..84499f6db 100644 --- a/modules/cpu/render/Material.h +++ b/modules/cpu/render/Material.h @@ -4,6 +4,7 @@ #pragma once #include "ISPCDeviceObject.h" +#include "common/FeatureFlagsEnum.h" #include "common/ObjectFactory.h" #include "common/StructShared.h" // ispc shared @@ -29,11 +30,13 @@ struct OSPRAY_SDK_INTERFACE Material : public AddStructShared, public ObjectFactory { - Material(api::ISPCDevice &device); + Material(api::ISPCDevice &device, const FeatureFlagsOther ffo); virtual ~Material() override; virtual std::string toString() const override; virtual void commit() override; + FeatureFlagsOther getFeatureFlagsOther() const; + // helper function to get all texture related parameters ispc::TextureParam getTextureParam(const char *texture_name); @@ -64,6 +67,8 @@ struct OSPRAY_SDK_INTERFACE Material utility::Optional getTextureTransform2f(const char *_texname); utility::Optional getTextureTransform3f(const char *_texname); + FeatureFlagsOther featureFlags; + // Microfacet albedo tables data, shared by all materials. New materials // increment the use count, and decrement it on destruction to ensure // the data will be released before the device @@ -74,6 +79,11 @@ OSPTYPEFOR_SPECIALIZATION(Material *, OSP_MATERIAL); // Inlined definitions ///////////////////////////////////////////////////////// +inline FeatureFlagsOther Material::getFeatureFlagsOther() const +{ + return featureFlags; +} + inline bool Material::isEmissive() const { return reduce_max(getSh()->emission) > 0.f; diff --git a/modules/cpu/render/Material.ih b/modules/cpu/render/Material.ih index 82e242681..7ca2be742 100644 --- a/modules/cpu/render/Material.ih +++ b/modules/cpu/render/Material.ih @@ -4,16 +4,22 @@ #pragma once #include "common/DifferentialGeometry.ih" -#include "common/Ray.ih" -#include "render/MaterialShared.h" -#include "render/bsdfs/BSDF.ih" -#include "render/bsdfs/ShadingContext.ih" +#include "common/FeatureFlags.ih" #include "texture/TextureParam.ih" - +// c++ shared #include "MaterialShared.h" OSPRAY_BEGIN_ISPC_NAMESPACE +#define define_texture_get_ff(fn, map, dg, val, ff) \ + ((ffo & FFO_TEXTURE_IN_MATERIAL) ? fn(map, dg, val) : val) +#define get1f_ff(map, dg, val, ff) \ + define_texture_get_ff(get1f, map, dg, val, ff) +#define get3f_ff(map, dg, val, ff) \ + define_texture_get_ff(get3f, map, dg, val, ff) +#define get4f_ff(map, dg, val, ff) \ + define_texture_get_ff(get4f, map, dg, val, ff) + inline linear3f makeShadingFrame(const DifferentialGeometry &dg) { vec3f N = dg.Ns; @@ -71,6 +77,16 @@ inline linear3f makeShadingFrame(const DifferentialGeometry &dg, } } +inline linear3f makeShadingFrame_ff(const DifferentialGeometry &dg, + const uniform TextureParam &normalMap, + const uniform linear2f &normalRot, + const uniform FeatureFlagsOther ffo) +{ + return (ffo & FFO_TEXTURE_IN_MATERIAL) + ? makeShadingFrame(dg, normalMap, normalRot) + : makeShadingFrame(dg); +} + SYCL_EXTERNAL vec3f Material_getTransparency( const uniform Material *uniform self, const DifferentialGeometry &dg, diff --git a/modules/cpu/render/MaterialDispatch.ih b/modules/cpu/render/MaterialDispatch.ih index b3224a960..7b2b35586 100644 --- a/modules/cpu/render/MaterialDispatch.ih +++ b/modules/cpu/render/MaterialDispatch.ih @@ -3,10 +3,14 @@ #pragma once +#include "common/FeatureFlags.ih" #include "rkcommon/math/vec.ih" OSPRAY_BEGIN_ISPC_NAMESPACE +// Note: Can't remove these `__noinline` because it causes artifiacts +// in 'ThinGlass' material on ci-comp_igc-19476 driver + SYCL_EXTERNAL __noinline const varying BSDF *varying Material_dispatch_getBSDF( const uniform Material *uniform self, uniform ShadingContext *uniform ctx, @@ -15,7 +19,8 @@ SYCL_EXTERNAL __noinline const varying BSDF *varying Material_dispatch_getBSDF( // The ray arriving at the point to shade. const Ray &ray, // The medium this ray travels inside. - const Medium ¤tMedium); + const Medium ¤tMedium, + const uniform FeatureFlagsOther ffo); SYCL_EXTERNAL __noinline vec3f Material_dispatch_getTransparency( const uniform Material *uniform self, @@ -24,11 +29,13 @@ SYCL_EXTERNAL __noinline vec3f Material_dispatch_getTransparency( // The ray arriving at the point to shade. const Ray &ray, // The medium this ray travels inside. - const Medium ¤tMedium); + const Medium ¤tMedium, + const uniform FeatureFlagsOther ffo); SYCL_EXTERNAL __noinline void Material_dispatch_selectNextMedium( const uniform Material *uniform self, const DifferentialGeometry &dg, - Medium ¤tMedium); + Medium ¤tMedium, + const uniform FeatureFlagsOther ffo); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/MaterialDispatch.ispc b/modules/cpu/render/MaterialDispatch.ispc index a8d99d330..062d94972 100644 --- a/modules/cpu/render/MaterialDispatch.ispc +++ b/modules/cpu/render/MaterialDispatch.ispc @@ -24,38 +24,45 @@ SYCL_EXTERNAL __noinline const varying BSDF *varying Material_dispatch_getBSDF( uniform ShadingContext *uniform ctx, const DifferentialGeometry &dg, const Ray &ray, - const Medium ¤tMedium) + const Medium ¤tMedium, + const uniform FeatureFlagsOther ffo) { - switch (self->type) { - case MATERIAL_TYPE_ALLOY: + if ((self->type == MATERIAL_TYPE_ALLOY) && (ffo & FFO_MATERIAL_ALLOY)) { return Alloy_getBSDF(self, ctx, dg, ray, currentMedium); - case MATERIAL_TYPE_CARPAINT: + } else if ((self->type == MATERIAL_TYPE_CARPAINT) + && (ffo & FFO_MATERIAL_CARPAINT)) { return CarPaint_getBSDF(self, ctx, dg, ray, currentMedium); - case MATERIAL_TYPE_GLASS: + } else if ((self->type == MATERIAL_TYPE_GLASS) + && (ffo & FFO_MATERIAL_GLASS)) { return Glass_getBSDF(self, ctx, dg, ray, currentMedium); - case MATERIAL_TYPE_LUMINOUS: + } else if ((self->type == MATERIAL_TYPE_LUMINOUS) + && (ffo & FFO_MATERIAL_LUMINOUS)) { return Luminous_getBSDF(self, ctx, dg, ray, currentMedium); - case MATERIAL_TYPE_METAL: + } else if ((self->type == MATERIAL_TYPE_METAL) + && (ffo & FFO_MATERIAL_METAL)) { return Metal_getBSDF(self, ctx, dg, ray, currentMedium); - case MATERIAL_TYPE_METALLICPAINT: + } else if ((self->type == MATERIAL_TYPE_METALLICPAINT) + && (ffo & FFO_MATERIAL_METALLICPAINT)) { return MetallicPaint_getBSDF(self, ctx, dg, ray, currentMedium); - case MATERIAL_TYPE_MIX: + } else if ((self->type == MATERIAL_TYPE_MIX) && (ffo & FFO_MATERIAL_MIX)) { return Mix_getBSDF(self, ctx, dg, ray, currentMedium); - case MATERIAL_TYPE_OBJ: - return OBJ_getBSDF(self, ctx, dg, ray, currentMedium); - case MATERIAL_TYPE_PLASTIC: + } else if ((self->type == MATERIAL_TYPE_OBJ) && (ffo & FFO_MATERIAL_OBJ)) { + return OBJ_getBSDF(self, ctx, dg, ray, currentMedium, ffo); + } else if ((self->type == MATERIAL_TYPE_PLASTIC) + && (ffo & FFO_MATERIAL_PLASTIC)) { return Plastic_getBSDF(self, ctx, dg, ray, currentMedium); - case MATERIAL_TYPE_PRINCIPLED: + } else if ((self->type == MATERIAL_TYPE_PRINCIPLED) + && (ffo & FFO_MATERIAL_PRINCIPLED)) { return Principled_getBSDF(self, ctx, dg, ray, currentMedium); - case MATERIAL_TYPE_THINGLASS: + } else if ((self->type == MATERIAL_TYPE_THINGLASS) + && (ffo & FFO_MATERIAL_THINGLASS)) { return ThinGlass_getBSDF(self, ctx, dg, ray, currentMedium); - case MATERIAL_TYPE_VELVET: + } else if ((self->type == MATERIAL_TYPE_VELVET) + && (ffo & FFO_MATERIAL_VELVET)) { return Velvet_getBSDF(self, ctx, dg, ray, currentMedium); - default: + } else { #ifndef OSPRAY_TARGET_SYCL return self->getBSDF(self, ctx, dg, ray, currentMedium); -#else - return NULL; #endif } return NULL; @@ -65,22 +72,25 @@ SYCL_EXTERNAL __noinline vec3f Material_dispatch_getTransparency( const uniform Material *uniform self, const DifferentialGeometry &dg, const Ray &ray, - const Medium ¤tMedium) + const Medium ¤tMedium, + const uniform FeatureFlagsOther ffo) { - switch (self->type) { - case MATERIAL_TYPE_GLASS: + if ((self->type == MATERIAL_TYPE_GLASS) && (ffo & FFO_MATERIAL_GLASS)) { return Glass_getTransparency(self, dg, ray, currentMedium); - case MATERIAL_TYPE_LUMINOUS: + } else if ((self->type == MATERIAL_TYPE_LUMINOUS) + && (ffo & FFO_MATERIAL_LUMINOUS)) { return Luminous_getTransparency(self, dg, ray, currentMedium); - case MATERIAL_TYPE_MIX: + } else if ((self->type == MATERIAL_TYPE_MIX) && (ffo & FFO_MATERIAL_MIX)) { return Mix_getTransparency(self, dg, ray, currentMedium); - case MATERIAL_TYPE_OBJ: - return OBJ_getTransparency(self, dg, ray, currentMedium); - case MATERIAL_TYPE_PRINCIPLED: + } else if ((self->type == MATERIAL_TYPE_OBJ) && (ffo & FFO_MATERIAL_OBJ)) { + return OBJ_getTransparency(self, dg, ray, currentMedium, ffo); + } else if ((self->type == MATERIAL_TYPE_PRINCIPLED) + && (ffo & FFO_MATERIAL_PRINCIPLED)) { return Principled_getTransparency(self, dg, ray, currentMedium); - case MATERIAL_TYPE_THINGLASS: + } else if ((self->type == MATERIAL_TYPE_THINGLASS) + && (ffo & FFO_MATERIAL_THINGLASS)) { return ThinGlass_getTransparency(self, dg, ray, currentMedium); - default: + } else { #ifndef OSPRAY_TARGET_SYCL return self->getTransparency(self, dg, ray, currentMedium); #else @@ -92,22 +102,20 @@ SYCL_EXTERNAL __noinline vec3f Material_dispatch_getTransparency( SYCL_EXTERNAL __noinline void Material_dispatch_selectNextMedium( const uniform Material *uniform self, const DifferentialGeometry &dg, - Medium ¤tMedium) + Medium ¤tMedium, + const uniform FeatureFlagsOther ffo) { - switch (self->type) { - case MATERIAL_TYPE_GLASS: + if ((self->type == MATERIAL_TYPE_GLASS) && (ffo & FFO_MATERIAL_GLASS)) { Glass_selectNextMedium(self, dg, currentMedium); - break; - case MATERIAL_TYPE_PRINCIPLED: + } else if ((self->type == MATERIAL_TYPE_PRINCIPLED) + && (ffo & FFO_MATERIAL_PRINCIPLED)) { Principled_selectNextMedium(self, dg, currentMedium); - break; - default: + } else { #ifndef OSPRAY_TARGET_SYCL self->selectNextMedium(self, dg, currentMedium); #else Material_selectNextMedium(self, dg, currentMedium); #endif - break; } } diff --git a/modules/cpu/render/Renderer.cpp b/modules/cpu/render/Renderer.cpp index 847806d8d..175765c65 100644 --- a/modules/cpu/render/Renderer.cpp +++ b/modules/cpu/render/Renderer.cpp @@ -46,6 +46,9 @@ void Renderer::commit() maxDepthTexture = (Texture2D *)getParamObject("map_maxDepth"); backplate = (Texture2D *)getParamObject("map_backplate"); + featureFlags = FFO_NONE; + if (maxDepthTexture || backplate) + featureFlags |= FFO_TEXTURE_IN_RENDERER; if (maxDepthTexture) { if (maxDepthTexture->format != OSP_TEXTURE_R32F @@ -66,6 +69,9 @@ void Renderer::commit() getSh()->material = nullptr; materialData = getParamDataT("material"); if (materialData) { + for (auto &&mat : *materialData) + featureFlags |= mat->getFeatureFlagsOther(); + materialArray = make_buffer_shared_unique( getISPCDevice().getIspcrtDevice(), createArrayOfSh(*materialData)); diff --git a/modules/cpu/render/Renderer.h b/modules/cpu/render/Renderer.h index 457015a4d..641a56367 100644 --- a/modules/cpu/render/Renderer.h +++ b/modules/cpu/render/Renderer.h @@ -4,6 +4,7 @@ #pragma once #include "ISPCDeviceObject.h" +#include "common/FeatureFlagsEnum.h" #include "math/MathConstants.h" #include "pf/PixelFilter.h" #include "rkcommon/utility/ArrayView.h" @@ -95,6 +96,9 @@ struct OSPRAY_SDK_INTERFACE Renderer Ref> materialData; std::unique_ptr> materialArray; + protected: + FeatureFlagsOther featureFlags{FFO_NONE}; + private: void setupPixelFilter(); }; diff --git a/modules/cpu/render/Renderer.ih b/modules/cpu/render/Renderer.ih index e547fb76b..b81e6cba0 100644 --- a/modules/cpu/render/Renderer.ih +++ b/modules/cpu/render/Renderer.ih @@ -3,14 +3,17 @@ #pragma once +#include "common/FeatureFlags.ih" // c++ shared #include "RendererShared.h" OSPRAY_BEGIN_ISPC_NAMESPACE -SYCL_EXTERNAL vec4f Renderer_getBackground( - const Renderer *uniform self, const vec2f &screenPos); -SYCL_EXTERNAL float Renderer_getMaxDepth( - const Renderer *uniform self, const vec2f &screenPos); +SYCL_EXTERNAL vec4f Renderer_getBackground(const Renderer *uniform self, + const vec2f &screenPos, + const uniform FeatureFlagsOther ffo); +SYCL_EXTERNAL float Renderer_getMaxDepth(const Renderer *uniform self, + const vec2f &screenPos, + const uniform FeatureFlagsOther ffo); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/Renderer.ispc b/modules/cpu/render/Renderer.ispc index 9b0890790..24340d55b 100644 --- a/modules/cpu/render/Renderer.ispc +++ b/modules/cpu/render/Renderer.ispc @@ -15,10 +15,11 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -SYCL_EXTERNAL vec4f Renderer_getBackground( - const Renderer *uniform self, const vec2f &screenPos) +SYCL_EXTERNAL vec4f Renderer_getBackground(const Renderer *uniform self, + const vec2f &screenPos, + const uniform FeatureFlagsOther ffo) { - if (!self->backplate) + if (!self->backplate || !(ffo & FFO_TEXTURE_IN_RENDERER)) return self->bgColor; // TODO: Now for GPU making a whole DifferentialGeometry object @@ -29,10 +30,11 @@ SYCL_EXTERNAL vec4f Renderer_getBackground( return get4f(self->backplate, lookup); } -SYCL_EXTERNAL float Renderer_getMaxDepth( - const Renderer *uniform self, const vec2f &screenPos) +SYCL_EXTERNAL float Renderer_getMaxDepth(const Renderer *uniform self, + const vec2f &screenPos, + const uniform FeatureFlagsOther ffo) { - if (!self->maxDepthTexture) + if (!self->maxDepthTexture || !(ffo & FFO_TEXTURE_IN_RENDERER)) return inf; DifferentialGeometry lookup; @@ -67,10 +69,10 @@ export void Renderer_pick(const void *uniform _self, cameraSample.time = 0.5f; Ray ray; - camera->initRay(camera, ray, cameraSample); - ray.t = min(ray.t, Renderer_getMaxDepth(self, cameraSample.screen)); + Camera_dispatch_initRay(camera, ray, cameraSample, FFO_ALL); + ray.t = min(ray.t, Renderer_getMaxDepth(self, cameraSample.screen, FFO_ALL)); - traceRay(world, ray); + traceRay(world, ray, ffAll()); vec3f p = ray.org + ray.dir * ray.t; diff --git a/modules/cpu/render/RendererRenderTaskFn.inl b/modules/cpu/render/RendererRenderTaskFn.inl index 1b7216ae9..5a0ac6781 100644 --- a/modules/cpu/render/RendererRenderTaskFn.inl +++ b/modules/cpu/render/RendererRenderTaskFn.inl @@ -14,12 +14,11 @@ task Camera *uniform camera, World *uniform world, void *uniform perFrameData, - const uint32 *uniform taskIDs + const uint32 *uniform taskIDs, #ifdef OSPRAY_TARGET_SYCL - , - const int taskIndex0 + const int taskIndex0, #endif - ) + const uniform FeatureFlags &ff) { const uniform int32 spp = self->spp; @@ -30,7 +29,7 @@ task CameraSample cameraSample; uniform RenderTaskDesc taskDesc = - FrameBuffer_dispatch_getRenderTaskDesc(fb, taskIDs[taskIndex0]); + FrameBuffer_dispatch_getRenderTaskDesc(fb, taskIDs[taskIndex0], ff.other); const uniform int startSampleID = max(taskDesc.accumID, 0) * spp; if (fb->cancelRender || isEmpty(taskDesc.region)) { @@ -50,7 +49,8 @@ task // set ray t value for early ray termination (from maximum depth texture) vec2f center = make_vec2f(screenSample.sampleID.x, screenSample.sampleID.y) + 0.5f; - const float tMax = Renderer_getMaxDepth(self, center * fb->rcpSize); + const float tMax = + Renderer_getMaxDepth(self, center * fb->rcpSize, ff.other); screenSample.z = tMax; vec3f col = make_vec3f(0.f); @@ -87,15 +87,16 @@ task cameraSample.lens.y = 0.0f; cameraSample.time = 0.5f; - Camera_dispatch_initRay(camera, screenSample.ray, cameraSample); + Camera_dispatch_initRay( + camera, screenSample.ray, cameraSample, ff.other); screenSample.ray.t = min(screenSample.ray.t, tMax); screenSample.z = inf; screenSample.primID = RTC_INVALID_GEOMETRY_ID; screenSample.geomID = RTC_INVALID_GEOMETRY_ID; screenSample.instID = RTC_INVALID_GEOMETRY_ID; - screenSample.albedo = - make_vec3f(Renderer_getBackground(self, screenSample.pos)); + screenSample.albedo = make_vec3f( + Renderer_getBackground(self, screenSample.pos, ff.other)); screenSample.normal = make_vec3f(0.f); #ifdef OSPRAY_TARGET_SYCL @@ -108,7 +109,7 @@ task #endif #endif - renderSampleFn(self, fb, world, perFrameData, screenSample); + renderSampleFn(self, fb, world, perFrameData, screenSample, ff); col = col + screenSample.rgb; alpha += screenSample.alpha; @@ -124,7 +125,8 @@ task screenSample.normal = normal * rspp; screenSample.albedo = albedo * rspp; - FrameBuffer_dispatch_accumulateSample(fb, screenSample, taskDesc); + FrameBuffer_dispatch_accumulateSample( + fb, screenSample, taskDesc, ff.other); } - FrameBuffer_dispatch_completeTask(fb, taskDesc); + FrameBuffer_dispatch_completeTask(fb, taskDesc, ff.other); } diff --git a/modules/cpu/render/ao/AORenderer.cpp b/modules/cpu/render/ao/AORenderer.cpp index 71131e741..1b9e498f9 100644 --- a/modules/cpu/render/ao/AORenderer.cpp +++ b/modules/cpu/render/ao/AORenderer.cpp @@ -3,12 +3,14 @@ #include "AORenderer.h" #include "camera/Camera.h" +#include "common/FeatureFlagsEnum.h" #include "common/World.h" #include "fb/FrameBuffer.h" #ifndef OSPRAY_TARGET_SYCL #include "render/ao/AORenderer_ispc.h" #else #include "AORenderer.ih" +constexpr sycl::specialization_id specFeatureFlags; #endif namespace ospray { @@ -54,18 +56,28 @@ void AORenderer::renderTasks(FrameBuffer *fb, #ifdef OSPRAY_TARGET_SYCL const uint32_t *taskIDsPtr = taskIDs.data(); auto event = syclQueue.submit([&](sycl::handler &cgh) { + FeatureFlags ff = world->getFeatureFlags(); + ff.other |= featureFlags; + ff.other |= fb->getFeatureFlagsOther(); + ff.other |= camera->getFeatureFlagsOther(); + cgh.set_specialization_constant(ff); + const sycl::nd_range<1> dispatchRange = computeDispatchRange(numTasks, 16); - cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex) { - if (taskIndex.get_global_id(0) < numTasks) { - ispc::AORenderer_renderTask(&rendererSh->super, - fbSh, - cameraSh, - worldSh, - perFrameData, - taskIDsPtr, - taskIndex.get_global_id(0)); - } - }); + cgh.parallel_for(dispatchRange, + [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { + if (taskIndex.get_global_id(0) < numTasks) { + const FeatureFlags ff = + kh.get_specialization_constant(); + ispc::AORenderer_renderTask(&rendererSh->super, + fbSh, + cameraSh, + worldSh, + perFrameData, + taskIDsPtr, + taskIndex.get_global_id(0), + ff); + } + }); }); event.wait_and_throw(); // For prints we have to flush the entire queue, because other stuff is queued diff --git a/modules/cpu/render/ao/AORenderer.ih b/modules/cpu/render/ao/AORenderer.ih index 53576b240..211ec9f8a 100644 --- a/modules/cpu/render/ao/AORenderer.ih +++ b/modules/cpu/render/ao/AORenderer.ih @@ -10,7 +10,8 @@ SYCL_EXTERNAL void AORenderer_renderTask(Renderer *uniform self, World *uniform world, void *uniform perFrameData, const uint32 *uniform taskIDs, - const int taskIndex0); + const int taskIndex0, + const uniform ospray::FeatureFlags &ff); #endif OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/ao/AORenderer.ispc b/modules/cpu/render/ao/AORenderer.ispc index 723680a9f..2bb721b17 100644 --- a/modules/cpu/render/ao/AORenderer.ispc +++ b/modules/cpu/render/ao/AORenderer.ispc @@ -22,7 +22,8 @@ static void AORenderer_renderSample(Renderer *uniform _self, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { uniform AORenderer *uniform self = (uniform AORenderer * uniform) _self; @@ -72,24 +73,26 @@ static void AORenderer_renderSample(Renderer *uniform _self, // if hit ray.t will be updated // Note: work around for bug when using traceGeometryRayIntervals on GPU #ifdef OSPRAY_TARGET_SYCL - traceGeometryRay(world, ray); + traceGeometryRay(world, ray, ff); #else - traceGeometryRayIntervals(world, ray, rayIntervals); + traceGeometryRayIntervals(world, ray, rayIntervals, ff); #endif #ifdef OSPRAY_ENABLE_VOLUMES - // Determine volume intervals by tracing ray in the volume scene - Ray volumeRay = ray; - traceVolumeRay(world, volumeRay, volumeIntervals); - - // Sample volumes across volume intervals (in front of geometry hit) - if (volumeIntervals.numVolumeIntervals > 0) { - vec4f volumeColor = integrateVolumeIntervals(volumeIntervals, - rayIntervals, - volumeRay, - ldSampler, - self->volumeSamplingRate); - outputColor = outputColor + (1.f - outputColor.w) * volumeColor; + if (ff.volume & FFV_VOLUME) { + // Determine volume intervals by tracing ray in the volume scene + Ray volumeRay = ray; + traceVolumeRay(world, volumeRay, volumeIntervals); + + // Sample volumes across volume intervals (in front of geometry hit) + if (volumeIntervals.numVolumeIntervals > 0) { + vec4f volumeColor = integrateVolumeIntervals(volumeIntervals, + rayIntervals, + volumeRay, + ldSampler, + self->volumeSamplingRate); + outputColor = outputColor + (1.f - outputColor.w) * volumeColor; + } } #endif @@ -99,11 +102,12 @@ static void AORenderer_renderSample(Renderer *uniform _self, if (rayHadHit) { // Prepare differential geometry structure DifferentialGeometry dg; - computeDG(world, self, ray, dg); + computeDG(world, self, ray, dg, ff); // Shade geometry SSI surfaceShading; - surfaceShading = AORenderer_computeShading(self, fb, world, dg, sample); + surfaceShading = + AORenderer_computeShading(self, fb, world, dg, sample, ff); // Use shaded color for blending blendedColor = surfaceShading.shadedColor; @@ -122,7 +126,7 @@ static void AORenderer_renderSample(Renderer *uniform _self, // start from the last geometry hit all over to initial Tfar setRay(ray, ray.t + dg.epsilon, originalRayTFar); } else { - blendedColor = Renderer_getBackground(&self->super, sample.pos); + blendedColor = Renderer_getBackground(&self->super, sample.pos, ff.other); // Initialize other per sample data with first hit values if (firstHit) { sample.z = ray.t; @@ -157,10 +161,11 @@ SYCL_EXTERNAL void AORenderer_renderTask(Renderer *uniform self, World *uniform world, void *uniform perFrameData, const uint32 *uniform taskIDs, - const int taskIndex0) + const int taskIndex0, + const uniform FeatureFlags &ff) { Renderer_default_renderTask( - self, fb, camera, world, perFrameData, taskIDs, taskIndex0); + self, fb, camera, world, perFrameData, taskIDs, taskIndex0, ff); } #else export void AORenderer_renderTasks(void *uniform _self, @@ -177,7 +182,7 @@ export void AORenderer_renderTasks(void *uniform _self, World *uniform world = (World * uniform) _world; const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; launch[numTasks] Renderer_default_renderTask( - self, fb, camera, world, perFrameData, taskIDs); + self, fb, camera, world, perFrameData, taskIDs, ffAll()); sync; } #endif diff --git a/modules/cpu/render/ao/surfaces.ih b/modules/cpu/render/ao/surfaces.ih index f36fe7c3e..457db939a 100644 --- a/modules/cpu/render/ao/surfaces.ih +++ b/modules/cpu/render/ao/surfaces.ih @@ -24,19 +24,22 @@ typedef SurfaceShadingInfo SSI; inline void computeDG(const World *uniform world, const AORenderer *uniform renderer, const Ray &ray, - DifferentialGeometry &dg) + DifferentialGeometry &dg, + const uniform FeatureFlags &ff) { postIntersect(world, &renderer->super, dg, ray, - DG_NG | DG_NS | DG_NORMALIZE | DG_FACEFORWARD | DG_COLOR | DG_TEXCOORD); + DG_NG | DG_NS | DG_NORMALIZE | DG_FACEFORWARD | DG_COLOR | DG_TEXCOORD, + ff); } SYCL_EXTERNAL SSI AORenderer_computeShading(const AORenderer *uniform self, const FrameBuffer *uniform fb, const World *uniform world, const DifferentialGeometry &dg, - ScreenSample &sample); + ScreenSample &sample, + const uniform FeatureFlags &ff); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/ao/surfaces.ispc b/modules/cpu/render/ao/surfaces.ispc index c0be8f8f6..ffc9c4fa1 100644 --- a/modules/cpu/render/ao/surfaces.ispc +++ b/modules/cpu/render/ao/surfaces.ispc @@ -14,12 +14,16 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -inline vec4f getSurfaceColor(const DifferentialGeometry &dg) +inline vec4f getSurfaceColor( + const DifferentialGeometry &dg, const uniform FeatureFlagsOther ffo) { const OBJ *mat = (const OBJ *)dg.material; vec3f surfaceColor = make_vec3f(1.f); float opacity = 1.f; + + const uniform bool fft = ffo & FFO_TEXTURE_IN_MATERIAL; + #ifdef OSPRAY_TARGET_SYCL { const OBJ *m = mat; @@ -28,12 +32,12 @@ inline vec4f getSurfaceColor(const DifferentialGeometry &dg) #endif if (m != NULL && m->super.type == MATERIAL_TYPE_OBJ) { surfaceColor = m->Kd; - if (valid(m->KdMap)) { + if (valid(m->KdMap) && fft) { vec4f Kd_from_map = get4f(m->KdMap, dg); surfaceColor = surfaceColor * make_vec3f(Kd_from_map); opacity *= Kd_from_map.w; } - opacity *= m->d * get1f(m->dMap, dg, 1.f); + opacity *= m->d * (fft ? get1f(m->dMap, dg, 1.f) : 1.f); } } @@ -44,11 +48,12 @@ SYCL_EXTERNAL SSI AORenderer_computeShading(const AORenderer *uniform self, const FrameBuffer *uniform fb, const World *uniform world, const DifferentialGeometry &dg, - ScreenSample &sample) + ScreenSample &sample, + const uniform FeatureFlags &ff) { SSI retval; - const vec4f surfaceColor = getSurfaceColor(dg); + const vec4f surfaceColor = getSurfaceColor(dg, ff.other); const vec3f color = make_vec3f(surfaceColor); const float opacity = surfaceColor.w; @@ -61,7 +66,8 @@ SYCL_EXTERNAL SSI AORenderer_computeShading(const AORenderer *uniform self, dg, self->aoSamples, self->aoRadius, - sample.sampleID); + sample.sampleID, + ff); intensity *= (1.0f - self->aoIntensity) + (ao * self->aoIntensity); } diff --git a/modules/cpu/render/bsdfs/BSDF.ih b/modules/cpu/render/bsdfs/BSDF.ih index 72a7899a5..287a71e07 100644 --- a/modules/cpu/render/bsdfs/BSDF.ih +++ b/modules/cpu/render/bsdfs/BSDF.ih @@ -3,6 +3,8 @@ #pragma once +#include "common/FeatureFlags.ih" +#include "common/OSPCommon.ih" #include "render/pathtracer/Scattering.ih" #include "rkcommon/math/vec.ih" @@ -80,13 +82,16 @@ inline BSDF_SampleRes make_BSDF_SampleRes_zero() return res; } -typedef BSDF_EvalRes (*BSDF_EvalFunc)( - const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi); +typedef BSDF_EvalRes (*BSDF_EvalFunc)(const varying BSDF *uniform self, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo); typedef BSDF_SampleRes (*BSDF_SampleFunc)(const varying BSDF *uniform self, const vec3f &wo, const vec2f &s, - float ss); + float ss, + const uniform FeatureFlagsOther ffo); struct BSDF { @@ -126,34 +131,32 @@ inline linear3f getFrame(const varying BSDF *uniform bsdf) return *bsdf->frame; } -SYCL_EXTERNAL BSDF_EvalRes BSDF_dispatch_eval( - const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi); - -SYCL_EXTERNAL BSDF_SampleRes BSDF_dispatch_sample( - const varying BSDF *uniform self, +SYCL_EXTERNAL BSDF_EvalRes BSDF_dispatch_eval(const varying BSDF *uniform self, const vec3f &wo, - const vec2f &s, - float ss); + const vec3f &wi, + const uniform FeatureFlagsOther ffo); -SYCL_EXTERNAL BSDF_EvalRes BSDF_dispatch_eval_nomulti( - const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi); - -SYCL_EXTERNAL BSDF_SampleRes BSDF_dispatch_sample_nomulti( +SYCL_EXTERNAL BSDF_SampleRes BSDF_dispatch_sample( const varying BSDF *uniform self, const vec3f &wo, const vec2f &s, - float ss); + float ss, + const uniform FeatureFlagsOther ffo); // We need to break the possible recursive call structure of a multi bsdf // containing multi-BSDFs, since SYCL doesn't allow recursive calls // TODO: Is there a flag to allow recursion in SYCL? SYCL_EXTERNAL BSDF_EvalRes BSDF_dispatch_eval_base( - const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi); + const varying BSDF *uniform self, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo); SYCL_EXTERNAL BSDF_SampleRes BSDF_dispatch_sample_base( const varying BSDF *uniform self, const vec3f &wo, const vec2f &s, - float ss); + float ss, + const uniform FeatureFlagsOther ffo); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/bsdfs/BSDF.ispc b/modules/cpu/render/bsdfs/BSDF.ispc index 0588b9a4f..cf28b7bb7 100644 --- a/modules/cpu/render/bsdfs/BSDF.ispc +++ b/modules/cpu/render/bsdfs/BSDF.ispc @@ -25,153 +25,227 @@ #include "render/bsdfs/Transmission.ih" #include "render/bsdfs/Velvety.ih" +#include "common/FeatureFlags.ih" + OSPRAY_BEGIN_ISPC_NAMESPACE -SYCL_EXTERNAL BSDF_EvalRes BSDF_dispatch_eval( - const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi) +// Material feature flags to BSDF mapping for SYCL specialization constants +#define MTB_CONDUCTOR \ + (FFO_MATERIAL_ALLOY | FFO_MATERIAL_CARPAINT | FFO_MATERIAL_METAL \ + | FFO_MATERIAL_PRINCIPLED) +#define MTB_DIELECTRIC FFO_MATERIAL_PRINCIPLED +#define MTB_DIELECTRIC_LAYER \ + (FFO_MATERIAL_CARPAINT | FFO_MATERIAL_METALLICPAINT | FFO_MATERIAL_PLASTIC \ + | FFO_MATERIAL_PRINCIPLED) +#define MTB_LAMBERT \ + (FFO_MATERIAL_CARPAINT | FFO_MATERIAL_METALLICPAINT | FFO_MATERIAL_OBJ \ + | FFO_MATERIAL_PLASTIC | FFO_MATERIAL_PRINCIPLED) +#define MTB_LAMBERT_TRANSMISSION FFO_MATERIAL_PRINCIPLED +#define MTB_MICROFACET_CONDUCTOR \ + (FFO_MATERIAL_ALLOY | FFO_MATERIAL_CARPAINT | FFO_MATERIAL_METAL \ + | FFO_MATERIAL_METALLICPAINT | FFO_MATERIAL_PRINCIPLED) +#define MTB_MICROFACET_DIELECTRIC FFO_MATERIAL_PRINCIPLED +#define MTB_MICROFACET_DIELECTRIC_LAYER \ + (FFO_MATERIAL_CARPAINT | FFO_MATERIAL_PLASTIC | FFO_MATERIAL_PRINCIPLED) +#define MTB_MICROFACET_SHEEN_LAYER FFO_MATERIAL_PRINCIPLED +#define MTB_MINNEART FFO_MATERIAL_VELVET +#define MTB_MULTI_BSDF \ + (FFO_MATERIAL_METALLICPAINT | FFO_MATERIAL_MIX | FFO_MATERIAL_OBJ \ + | FFO_MATERIAL_PRINCIPLED | FFO_MATERIAL_VELVET) +#define MTB_OREN_NAYAR (FFO_MATERIAL_CARPAINT | FFO_MATERIAL_PRINCIPLED) +#define MTB_SPECULAR FFO_MATERIAL_OBJ +#define MTB_SCALE 0 +#define MTB_REFLECTION 0 +#define MTB_ROBUST_DIELECTRIC FFO_MATERIAL_GLASS +#define MTB_ROBUST_THIN_DIELECTRIC FFO_MATERIAL_THINGLASS +#define MTB_THIN_DIELECTRIC FFO_MATERIAL_PRINCIPLED +#define MTB_THIN_MICROFACET_DIELECTRIC FFO_MATERIAL_PRINCIPLED +#define MTB_TRANSMISSION \ + (FFO_MATERIAL_LUMINOUS | FFO_MATERIAL_OBJ | FFO_MATERIAL_PRINCIPLED) +#define MTB_VELVETY FFO_MATERIAL_VELVET + +static BSDF_EvalRes BSDF_dispatch_eval_nomulti(const varying BSDF *uniform self, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { - switch (self->bsdfType) { - case BSDF_TYPE_MULTI_BSDF: - return MultiBSDF_eval(self, wo, wi); - default: - return BSDF_dispatch_eval_nomulti(self, wo, wi); + if ((self->bsdfType == BSDF_TYPE_DIELECTRIC_LAYER) + && (ffo & MTB_DIELECTRIC_LAYER)) { + return DielectricLayer_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_MICROFACET_DIELECTRIC_LAYER) + && (ffo & MTB_MICROFACET_DIELECTRIC_LAYER)) { + return MicrofacetDielectricLayer_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_SCALE) && (ffo & MTB_SCALE)) { + return Scale_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_MICROFACET_SHEEN_LAYER) + && (ffo & MTB_MICROFACET_SHEEN_LAYER)) { + return MicrofacetSheenLayer_eval(self, wo, wi, ffo); + } else { + return BSDF_dispatch_eval_base(self, wo, wi, ffo); } } -SYCL_EXTERNAL BSDF_SampleRes BSDF_dispatch_sample( - const varying BSDF *uniform self, const vec3f &wo, const vec2f &s, float ss) +static BSDF_SampleRes BSDF_dispatch_sample_nomulti( + const varying BSDF *uniform self, + const vec3f &wo, + const vec2f &s, + float ss, + const uniform FeatureFlagsOther ffo) { - switch (self->bsdfType) { - case BSDF_TYPE_MULTI_BSDF: - return MultiBSDF_sample(self, wo, s, ss); - default: - return BSDF_dispatch_sample_nomulti(self, wo, s, ss); + if ((self->bsdfType == BSDF_TYPE_DIELECTRIC_LAYER) + && (ffo & MTB_DIELECTRIC_LAYER)) { + return DielectricLayer_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_MICROFACET_DIELECTRIC_LAYER) + && (ffo & MTB_MICROFACET_DIELECTRIC_LAYER)) { + return MicrofacetDielectricLayer_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_SCALE) && (ffo & MTB_SCALE)) { + return Scale_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_MICROFACET_SHEEN_LAYER) + && (ffo & MTB_MICROFACET_SHEEN_LAYER)) { + return MicrofacetSheenLayer_sample(self, wo, s, ss, ffo); + } else { + return BSDF_dispatch_sample_base(self, wo, s, ss, ffo); } } -SYCL_EXTERNAL BSDF_EvalRes BSDF_dispatch_eval_nomulti( - const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi) +SYCL_EXTERNAL BSDF_EvalRes BSDF_dispatch_eval(const varying BSDF *uniform self, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { - switch (self->bsdfType) { - case BSDF_TYPE_DIELECTRIC_LAYER: - return DielectricLayer_eval(self, wo, wi); - case BSDF_TYPE_MICROFACET_DIELECTRIC_LAYER: - return MicrofacetDielectricLayer_eval(self, wo, wi); - case BSDF_TYPE_SCALE: - return Scale_eval(self, wo, wi); - case BSDF_TYPE_MICROFACET_SHEEN_LAYER: - return MicrofacetSheenLayer_eval(self, wo, wi); - default: - return BSDF_dispatch_eval_base(self, wo, wi); + if ((self->bsdfType == BSDF_TYPE_MULTI_BSDF) && (ffo & MTB_MULTI_BSDF)) { + return MultiBSDF_eval(self, wo, wi, ffo); + } else { + return BSDF_dispatch_eval_nomulti(self, wo, wi, ffo); } } -SYCL_EXTERNAL BSDF_SampleRes BSDF_dispatch_sample_nomulti( - const varying BSDF *uniform self, const vec3f &wo, const vec2f &s, float ss) +SYCL_EXTERNAL BSDF_SampleRes BSDF_dispatch_sample( + const varying BSDF *uniform self, + const vec3f &wo, + const vec2f &s, + float ss, + const uniform FeatureFlagsOther ffo) { - switch (self->bsdfType) { - case BSDF_TYPE_DIELECTRIC_LAYER: - return DielectricLayer_sample(self, wo, s, ss); - case BSDF_TYPE_MICROFACET_DIELECTRIC_LAYER: - return MicrofacetDielectricLayer_sample(self, wo, s, ss); - case BSDF_TYPE_SCALE: - return Scale_sample(self, wo, s, ss); - case BSDF_TYPE_MICROFACET_SHEEN_LAYER: - return MicrofacetSheenLayer_sample(self, wo, s, ss); - default: - return BSDF_dispatch_sample_base(self, wo, s, ss); + if ((self->bsdfType == BSDF_TYPE_MULTI_BSDF) && (ffo & MTB_MULTI_BSDF)) { + return MultiBSDF_sample(self, wo, s, ss, ffo); + } else { + return BSDF_dispatch_sample_nomulti(self, wo, s, ss, ffo); } } SYCL_EXTERNAL BSDF_EvalRes BSDF_dispatch_eval_base( - const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi) + const varying BSDF *uniform self, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { - switch (self->bsdfType) { - case BSDF_TYPE_REALLY_UNKNOWN: - break; - case BSDF_TYPE_CONDUCTOR: - return Conductor_eval(self, wo, wi); - case BSDF_TYPE_DIELECTRIC: - return Dielectric_eval(self, wo, wi); - case BSDF_TYPE_LAMBERT: - return Lambert_eval(self, wo, wi); - case BSDF_TYPE_LAMBERT_TRANSMISSION: - return LambertTransmission_eval(self, wo, wi); - case BSDF_TYPE_MICROFACET_CONDUCTOR: - return MicrofacetConductor_eval(self, wo, wi); - case BSDF_TYPE_MICROFACET_DIELECTRIC: - return MicrofacetDielectric_eval(self, wo, wi); - case BSDF_TYPE_MINNEART: - return Minneart_eval(self, wo, wi); - case BSDF_TYPE_OREN_NAYAR: - return OrenNayar_eval(self, wo, wi); - case BSDF_TYPE_SPECULAR: - return Specular_eval(self, wo, wi); - case BSDF_TYPE_REFLECTION: - return Reflection_eval(self, wo, wi); - case BSDF_TYPE_ROBUST_DIELECTRIC: - return RobustDielectric_eval(self, wo, wi); - case BSDF_TYPE_ROBUST_THIN_DIELECTRIC: - return RobustThinDielectric_eval(self, wo, wi); - case BSDF_TYPE_THIN_DIELECTRIC: - return ThinDielectric_eval(self, wo, wi); - case BSDF_TYPE_THIN_MICROFACET_DIELECTRIC: - return ThinMicrofacetDielectric_eval(self, wo, wi); - case BSDF_TYPE_TRANSMISSION: - return Transmission_eval(self, wo, wi); - case BSDF_TYPE_VELVETY: - return Velvety_eval(self, wo, wi); - default: + if ((self->bsdfType == BSDF_TYPE_CONDUCTOR) && (ffo & MTB_CONDUCTOR)) { + return Conductor_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_DIELECTRIC) + && (ffo & MTB_DIELECTRIC)) { + return Dielectric_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_LAMBERT) && (ffo & MTB_LAMBERT)) { + return Lambert_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_LAMBERT_TRANSMISSION) + && (ffo & MTB_LAMBERT_TRANSMISSION)) { + return LambertTransmission_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_MICROFACET_CONDUCTOR) + && (ffo & MTB_MICROFACET_CONDUCTOR)) { + return MicrofacetConductor_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_MICROFACET_DIELECTRIC) + && (ffo & MTB_MICROFACET_DIELECTRIC)) { + return MicrofacetDielectric_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_MINNEART) && (ffo & MTB_MINNEART)) { + return Minneart_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_SPECULAR) && (ffo & MTB_SPECULAR)) { + return Specular_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_OREN_NAYAR) + && (ffo & MTB_OREN_NAYAR)) { + return OrenNayar_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_REFLECTION) + && (ffo & MTB_REFLECTION)) { + return Reflection_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_ROBUST_DIELECTRIC) + && (ffo & MTB_ROBUST_DIELECTRIC)) { + return RobustDielectric_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_ROBUST_THIN_DIELECTRIC) + && (ffo & MTB_ROBUST_THIN_DIELECTRIC)) { + return RobustThinDielectric_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_THIN_DIELECTRIC) + && (ffo & MTB_THIN_DIELECTRIC)) { + return ThinDielectric_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_THIN_MICROFACET_DIELECTRIC) + && (ffo & MTB_THIN_MICROFACET_DIELECTRIC)) { + return ThinMicrofacetDielectric_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_TRANSMISSION) + && (ffo & MTB_TRANSMISSION)) { + return Transmission_eval(self, wo, wi, ffo); + } else if ((self->bsdfType == BSDF_TYPE_VELVETY) && (ffo & MTB_VELVETY)) { + return Velvety_eval(self, wo, wi, ffo); + } else if (self->bsdfType == BSDF_TYPE_REALLY_UNKNOWN) { + } else { #ifndef OSPRAY_TARGET_SYCL - return self->eval(self, wo, wi); -#else - break; + return self->eval(self, wo, wi, ffo); #endif } return make_BSDF_EvalRes_zero(); } SYCL_EXTERNAL BSDF_SampleRes BSDF_dispatch_sample_base( - const varying BSDF *uniform self, const vec3f &wo, const vec2f &s, float ss) + const varying BSDF *uniform self, + const vec3f &wo, + const vec2f &s, + float ss, + const uniform FeatureFlagsOther ffo) { - switch (self->bsdfType) { - case BSDF_TYPE_CONDUCTOR: - return Conductor_sample(self, wo, s, ss); - case BSDF_TYPE_DIELECTRIC: - return Dielectric_sample(self, wo, s, ss); - case BSDF_TYPE_LAMBERT: - return Lambert_sample(self, wo, s, ss); - case BSDF_TYPE_LAMBERT_TRANSMISSION: - return LambertTransmission_sample(self, wo, s, ss); - case BSDF_TYPE_MICROFACET_CONDUCTOR: - return MicrofacetConductor_sample(self, wo, s, ss); - case BSDF_TYPE_MICROFACET_DIELECTRIC: - return MicrofacetDielectric_sample(self, wo, s, ss); - case BSDF_TYPE_MINNEART: - return Minneart_sample(self, wo, s, ss); - case BSDF_TYPE_OREN_NAYAR: - return OrenNayar_sample(self, wo, s, ss); - case BSDF_TYPE_SPECULAR: - return Specular_sample(self, wo, s, ss); - case BSDF_TYPE_REFLECTION: - return Reflection_sample(self, wo, s, ss); - case BSDF_TYPE_ROBUST_DIELECTRIC: - return RobustDielectric_sample(self, wo, s, ss); - case BSDF_TYPE_ROBUST_THIN_DIELECTRIC: - return RobustThinDielectric_sample(self, wo, s, ss); - case BSDF_TYPE_THIN_DIELECTRIC: - return ThinDielectric_sample(self, wo, s, ss); - case BSDF_TYPE_THIN_MICROFACET_DIELECTRIC: - return ThinMicrofacetDielectric_sample(self, wo, s, ss); - case BSDF_TYPE_TRANSMISSION: - return Transmission_sample(self, wo, s, ss); - case BSDF_TYPE_VELVETY: - return Velvety_sample(self, wo, s, ss); - default: + if ((self->bsdfType == BSDF_TYPE_CONDUCTOR) && (ffo & MTB_CONDUCTOR)) { + return Conductor_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_DIELECTRIC) + && (ffo & MTB_DIELECTRIC)) { + return Dielectric_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_LAMBERT) && (ffo & MTB_LAMBERT)) { + return Lambert_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_LAMBERT_TRANSMISSION) + && (ffo & MTB_LAMBERT_TRANSMISSION)) { + return LambertTransmission_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_MICROFACET_CONDUCTOR) + && (ffo & MTB_MICROFACET_CONDUCTOR)) { + return MicrofacetConductor_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_MICROFACET_DIELECTRIC) + && (ffo & MTB_MICROFACET_DIELECTRIC)) { + return MicrofacetDielectric_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_MINNEART) && (ffo & MTB_MINNEART)) { + return Minneart_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_OREN_NAYAR) + && (ffo & MTB_OREN_NAYAR)) { + return OrenNayar_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_SPECULAR) && (ffo & MTB_SPECULAR)) { + return Specular_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_REFLECTION) + && (ffo & MTB_REFLECTION)) { + return Reflection_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_ROBUST_DIELECTRIC) + && (ffo & MTB_ROBUST_DIELECTRIC)) { + return RobustDielectric_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_ROBUST_THIN_DIELECTRIC) + && (ffo & MTB_ROBUST_THIN_DIELECTRIC)) { + return RobustThinDielectric_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_THIN_DIELECTRIC) + && (ffo & MTB_THIN_DIELECTRIC)) { + return ThinDielectric_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_THIN_MICROFACET_DIELECTRIC) + && (ffo & MTB_THIN_MICROFACET_DIELECTRIC)) { + return ThinMicrofacetDielectric_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_TRANSMISSION) + && (ffo & MTB_TRANSMISSION)) { + return Transmission_sample(self, wo, s, ss, ffo); + } else if ((self->bsdfType == BSDF_TYPE_VELVETY) && (ffo & MTB_VELVETY)) { + return Velvety_sample(self, wo, s, ss, ffo); + } else if (self->bsdfType == BSDF_TYPE_REALLY_UNKNOWN) { + } else { #ifndef OSPRAY_TARGET_SYCL - return self->sample(self, wo, s, ss); -#else - break; + return self->sample(self, wo, s, ss, ffo); #endif } return make_BSDF_SampleRes_zero(); diff --git a/modules/cpu/render/bsdfs/Conductor.ih b/modules/cpu/render/bsdfs/Conductor.ih index 8775bc14b..59c4a7340 100644 --- a/modules/cpu/render/bsdfs/Conductor.ih +++ b/modules/cpu/render/bsdfs/Conductor.ih @@ -15,8 +15,10 @@ struct Conductor Fresnel *uniform fresnel; }; -inline BSDF_EvalRes Conductor_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes Conductor_eval(const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { return make_BSDF_EvalRes_zero(); } @@ -24,7 +26,8 @@ inline BSDF_EvalRes Conductor_eval( inline BSDF_SampleRes Conductor_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss) + float ss, + const uniform FeatureFlagsOther ffo) { const varying Conductor *uniform self = (const varying Conductor *uniform)super; diff --git a/modules/cpu/render/bsdfs/Dielectric.ih b/modules/cpu/render/bsdfs/Dielectric.ih index 8e2c1c15b..12b56368a 100644 --- a/modules/cpu/render/bsdfs/Dielectric.ih +++ b/modules/cpu/render/bsdfs/Dielectric.ih @@ -16,8 +16,10 @@ struct Dielectric float eta; // etaO / etaI }; -inline BSDF_EvalRes Dielectric_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes Dielectric_eval(const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { return make_BSDF_EvalRes_zero(); } @@ -25,7 +27,8 @@ inline BSDF_EvalRes Dielectric_eval( inline BSDF_SampleRes Dielectric_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss) + float ss, + const uniform FeatureFlagsOther ffo) { const varying Dielectric *uniform self = (const varying Dielectric *uniform)super; diff --git a/modules/cpu/render/bsdfs/DielectricLayer.ih b/modules/cpu/render/bsdfs/DielectricLayer.ih index f254f25b5..c0364f5f7 100644 --- a/modules/cpu/render/bsdfs/DielectricLayer.ih +++ b/modules/cpu/render/bsdfs/DielectricLayer.ih @@ -29,8 +29,10 @@ struct DielectricLayer float weight; }; -inline BSDF_EvalRes DielectricLayer_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes DielectricLayer_eval(const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { const varying DielectricLayer *uniform self = (const varying DielectricLayer *uniform)super; @@ -58,9 +60,10 @@ inline BSDF_EvalRes DielectricLayer_eval( // recursion (that we know will terminate) from the SYCL compiler we have // to call MultiBSDF directly here if we have a layer over a MultiBSDF if (f->bsdfType == BSDF_TYPE_MULTI_BSDF) { - substrate = MultiBSDF_eval(f, wo, wi); // f->eval(f, wo, wi); + substrate = MultiBSDF_eval(f, wo, wi, ffo); // f->eval(f, wo, wi); } else { - substrate = BSDF_dispatch_eval_base(f, wo, wi); // f->eval(f, wo, wi); + substrate = + BSDF_dispatch_eval_base(f, wo, wi, ffo); // f->eval(f, wo, wi); } } @@ -97,7 +100,8 @@ inline BSDF_EvalRes DielectricLayer_eval( inline BSDF_SampleRes DielectricLayer_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss) + float ss, + const uniform FeatureFlagsOther ffo) { const varying DielectricLayer *uniform self = (const varying DielectricLayer *uniform)super; @@ -132,10 +136,10 @@ inline BSDF_SampleRes DielectricLayer_sample(const varying BSDF *uniform super, // recursion (that we know will terminate) from the SYCL compiler we have // to call MultiBSDF directly here if we have a layer over a MultiBSDF if (f->bsdfType == BSDF_TYPE_MULTI_BSDF) { - res = MultiBSDF_sample(f, wo, s, ss1); // f->sample(f, wo, s, ss1); + res = MultiBSDF_sample(f, wo, s, ss1, ffo); // f->sample(f, wo, s, ss1); } else { res = BSDF_dispatch_sample_base( - f, wo, s, ss1); // f->sample(f, wo, s, ss1); + f, wo, s, ss1, ffo); // f->sample(f, wo, s, ss1); } } if (reduce_max(res.weight) <= 0.f) diff --git a/modules/cpu/render/bsdfs/Lambert.ih b/modules/cpu/render/bsdfs/Lambert.ih index 28996dcda..993596eee 100644 --- a/modules/cpu/render/bsdfs/Lambert.ih +++ b/modules/cpu/render/bsdfs/Lambert.ih @@ -8,8 +8,10 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -inline BSDF_EvalRes Lambert_eval( - const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes Lambert_eval(const varying BSDF *uniform self, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { BSDF_EvalRes res; float cosThetaI = max(dot(wi, getN(self)), 0.f); @@ -18,8 +20,11 @@ inline BSDF_EvalRes Lambert_eval( return res; } -inline BSDF_SampleRes Lambert_sample( - const varying BSDF *uniform self, const vec3f &wo, const vec2f &s, float ss) +inline BSDF_SampleRes Lambert_sample(const varying BSDF *uniform self, + const vec3f &wo, + const vec2f &s, + float ss, + const uniform FeatureFlagsOther ffo) { const vec3f localDir = cosineSampleHemisphere(s); BSDF_SampleRes res; diff --git a/modules/cpu/render/bsdfs/LambertTransmission.ih b/modules/cpu/render/bsdfs/LambertTransmission.ih index 49fb01550..8fb50bc84 100644 --- a/modules/cpu/render/bsdfs/LambertTransmission.ih +++ b/modules/cpu/render/bsdfs/LambertTransmission.ih @@ -7,8 +7,10 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -inline BSDF_EvalRes LambertTransmission_eval( - const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes LambertTransmission_eval(const varying BSDF *uniform self, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { BSDF_EvalRes res; float cosThetaI = max(-dot(wi, getN(self)), 0.f); @@ -18,7 +20,11 @@ inline BSDF_EvalRes LambertTransmission_eval( } inline BSDF_SampleRes LambertTransmission_sample( - const varying BSDF *uniform self, const vec3f &wo, const vec2f &s, float ss) + const varying BSDF *uniform self, + const vec3f &wo, + const vec2f &s, + float ss, + const uniform FeatureFlagsOther ffo) { const vec3f localDir = cosineSampleHemisphere(s); BSDF_SampleRes res; diff --git a/modules/cpu/render/bsdfs/MicrofacetConductor.ih b/modules/cpu/render/bsdfs/MicrofacetConductor.ih index 21cdaf330..dd2a81c51 100644 --- a/modules/cpu/render/bsdfs/MicrofacetConductor.ih +++ b/modules/cpu/render/bsdfs/MicrofacetConductor.ih @@ -28,8 +28,10 @@ struct MicrofacetConductor vec3f fmsScale; }; -inline BSDF_EvalRes MicrofacetConductor_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes MicrofacetConductor_eval(const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { const varying MicrofacetConductor *uniform self = (const varying MicrofacetConductor *uniform)super; @@ -74,7 +76,8 @@ inline BSDF_SampleRes MicrofacetConductor_sample( const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss) + float ss, + const uniform FeatureFlagsOther ffo) { const varying MicrofacetConductor *uniform self = (const varying MicrofacetConductor *uniform)super; @@ -170,4 +173,3 @@ inline varying BSDF *uniform MicrofacetConductor_create( } OSPRAY_END_ISPC_NAMESPACE - diff --git a/modules/cpu/render/bsdfs/MicrofacetDielectric.ih b/modules/cpu/render/bsdfs/MicrofacetDielectric.ih index 3765bfea4..2170820cc 100644 --- a/modules/cpu/render/bsdfs/MicrofacetDielectric.ih +++ b/modules/cpu/render/bsdfs/MicrofacetDielectric.ih @@ -33,7 +33,10 @@ struct MicrofacetDielectric // Single-scattering lobe inline BSDF_EvalRes MicrofacetDielectric_evalSingle( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) + const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { const varying MicrofacetDielectric *uniform self = (const varying MicrofacetDielectric *uniform)super; @@ -95,8 +98,10 @@ inline BSDF_EvalRes MicrofacetDielectric_evalSingle( return res; } -inline BSDF_EvalRes MicrofacetDielectric_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes MicrofacetDielectric_eval(const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { const varying MicrofacetDielectric *uniform self = (const varying MicrofacetDielectric *uniform)super; @@ -134,7 +139,7 @@ inline BSDF_EvalRes MicrofacetDielectric_eval( } // Evaluate the single-scattering lobe - BSDF_EvalRes single = MicrofacetDielectric_evalSingle(super, wo, wi); + BSDF_EvalRes single = MicrofacetDielectric_evalSingle(super, wo, wi, ffo); // Compute the final result float singlePickProb = Eo; @@ -147,7 +152,8 @@ inline BSDF_SampleRes MicrofacetDielectric_sample( const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss) + float ss, + const uniform FeatureFlagsOther ffo) { const varying MicrofacetDielectric *uniform self = (const varying MicrofacetDielectric *uniform)super; @@ -236,7 +242,7 @@ inline BSDF_SampleRes MicrofacetDielectric_sample( } // Evaluate the single-scattering lobe - BSDF_EvalRes single = MicrofacetDielectric_evalSingle(super, wo, res.wi); + BSDF_EvalRes single = MicrofacetDielectric_evalSingle(super, wo, res.wi, ffo); // Compute the final result res.pdf = singlePickProb * single.pdf + (1.f - singlePickProb) * fms.pdf; diff --git a/modules/cpu/render/bsdfs/MicrofacetDielectricLayer.ih b/modules/cpu/render/bsdfs/MicrofacetDielectricLayer.ih index c6865c051..319571370 100644 --- a/modules/cpu/render/bsdfs/MicrofacetDielectricLayer.ih +++ b/modules/cpu/render/bsdfs/MicrofacetDielectricLayer.ih @@ -39,7 +39,10 @@ struct MicrofacetDielectricLayer }; inline BSDF_EvalRes MicrofacetDielectricLayer_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) + const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { const varying MicrofacetDielectricLayer *uniform self = (const varying MicrofacetDielectricLayer *uniform)super; @@ -62,9 +65,10 @@ inline BSDF_EvalRes MicrofacetDielectricLayer_eval( // recursion (that we know will terminate) from the SYCL compiler we have // to call MultiBSDF directly here if we have a layer over a MultiBSDF if (f->bsdfType == BSDF_TYPE_MULTI_BSDF) { - substrate = MultiBSDF_eval(f, wo, wi); // f->eval(f, wo, wi); + substrate = MultiBSDF_eval(f, wo, wi, ffo); // f->eval(f, wo, wi); } else { - substrate = BSDF_dispatch_eval_base(f, wo, wi); // f->eval(f, wo, wi); + substrate = + BSDF_dispatch_eval_base(f, wo, wi, ffo); // f->eval(f, wo, wi); } } @@ -155,7 +159,8 @@ inline BSDF_SampleRes MicrofacetDielectricLayer_sample( const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss) + float ss, + const uniform FeatureFlagsOther ffo) { const varying MicrofacetDielectricLayer *uniform self = (const varying MicrofacetDielectricLayer *uniform)super; @@ -205,10 +210,10 @@ inline BSDF_SampleRes MicrofacetDielectricLayer_sample( // recursion (that we know will terminate) from the SYCL compiler we have // to call MultiBSDF directly here if we have a layer over a MultiBSDF if (f->bsdfType == BSDF_TYPE_MULTI_BSDF) { - substrate = MultiBSDF_eval(f, wo, res.wi); // f->eval(f, wo, wi); + substrate = MultiBSDF_eval(f, wo, res.wi, ffo); // f->eval(f, wo, wi); } else { substrate = - BSDF_dispatch_eval_base(f, wo, res.wi); // f->eval(f, wo, wi); + BSDF_dispatch_eval_base(f, wo, res.wi, ffo); // f->eval(f, wo, wi); } } } else { @@ -226,10 +231,10 @@ inline BSDF_SampleRes MicrofacetDielectricLayer_sample( // recursion (that we know will terminate) from the SYCL compiler we have // to call MultiBSDF directly here if we have a layer over a MultiBSDF if (f->bsdfType == BSDF_TYPE_MULTI_BSDF) { - res = MultiBSDF_sample(f, wo, s, ss1); // f->sample(f, wo, s, ss1); + res = MultiBSDF_sample(f, wo, s, ss1, ffo); // f->sample(f, wo, s, ss1); } else { res = BSDF_dispatch_sample_base( - f, wo, s, ss1); // f->sample(f, wo, s, ss1); + f, wo, s, ss1, ffo); // f->sample(f, wo, s, ss1); } } if (reduce_max(res.weight) <= 0.f) diff --git a/modules/cpu/render/bsdfs/MicrofacetSheenLayer.ih b/modules/cpu/render/bsdfs/MicrofacetSheenLayer.ih index 1787456eb..5b2989b10 100644 --- a/modules/cpu/render/bsdfs/MicrofacetSheenLayer.ih +++ b/modules/cpu/render/bsdfs/MicrofacetSheenLayer.ih @@ -25,8 +25,10 @@ struct MicrofacetSheenLayer float weight; }; -inline BSDF_EvalRes MicrofacetSheenLayer_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes MicrofacetSheenLayer_eval(const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { const varying MicrofacetSheenLayer *uniform self = (const varying MicrofacetSheenLayer *uniform)super; @@ -48,9 +50,10 @@ inline BSDF_EvalRes MicrofacetSheenLayer_eval( // recursion (that we know will terminate) from the SYCL compiler we have // to call MultiBSDF directly here if we have a layer over a MultiBSDF if (f->bsdfType == BSDF_TYPE_MULTI_BSDF) { - substrate = MultiBSDF_eval(f, wo, wi); // f->eval(f, wo, wi); + substrate = MultiBSDF_eval(f, wo, wi, ffo); // f->eval(f, wo, wi); } else { - substrate = BSDF_dispatch_eval_base(f, wo, wi); // f->eval(f, wo, wi); + substrate = + BSDF_dispatch_eval_base(f, wo, wi, ffo); // f->eval(f, wo, wi); } } @@ -99,7 +102,8 @@ inline BSDF_SampleRes MicrofacetSheenLayer_sample( const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss) + float ss, + const uniform FeatureFlagsOther ffo) { const varying MicrofacetSheenLayer *uniform self = (const varying MicrofacetSheenLayer *uniform)super; @@ -136,10 +140,10 @@ inline BSDF_SampleRes MicrofacetSheenLayer_sample( // recursion (that we know will terminate) from the SYCL compiler we have // to call MultiBSDF directly here if we have a layer over a MultiBSDF if (f->bsdfType == BSDF_TYPE_MULTI_BSDF) { - substrate = MultiBSDF_eval(f, wo, res.wi); // f->eval(f, wo, wi); + substrate = MultiBSDF_eval(f, wo, res.wi, ffo); // f->eval(f, wo, wi); } else { substrate = - BSDF_dispatch_eval_base(f, wo, res.wi); // f->eval(f, wo, wi); + BSDF_dispatch_eval_base(f, wo, res.wi, ffo); // f->eval(f, wo, wi); } } } else { @@ -156,10 +160,10 @@ inline BSDF_SampleRes MicrofacetSheenLayer_sample( // recursion (that we know will terminate) from the SYCL compiler we have // to call MultiBSDF directly here if we have a layer over a MultiBSDF if (f->bsdfType == BSDF_TYPE_MULTI_BSDF) { - res = MultiBSDF_sample(f, wo, s, ss1); // f->sample(f, wo, s, ss1); + res = MultiBSDF_sample(f, wo, s, ss1, ffo); // f->sample(f, wo, s, ss1); } else { res = BSDF_dispatch_sample_base( - f, wo, s, ss1); // f->sample(f, wo, s, ss1); + f, wo, s, ss1, ffo); // f->sample(f, wo, s, ss1); } } if (reduce_max(res.weight) <= 0.f) diff --git a/modules/cpu/render/bsdfs/Minneart.ih b/modules/cpu/render/bsdfs/Minneart.ih index fd673bcf2..294708034 100644 --- a/modules/cpu/render/bsdfs/Minneart.ih +++ b/modules/cpu/render/bsdfs/Minneart.ih @@ -16,8 +16,10 @@ struct Minneart float b; }; -inline BSDF_EvalRes Minneart_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes Minneart_eval(const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { const varying Minneart *uniform self = (const varying Minneart *uniform)super; BSDF_EvalRes res; @@ -33,7 +35,8 @@ inline BSDF_EvalRes Minneart_eval( inline BSDF_SampleRes Minneart_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss) + float ss, + const uniform FeatureFlagsOther ffo) { const varying Minneart *uniform self = (const varying Minneart *uniform)super; BSDF_SampleRes res; @@ -41,7 +44,7 @@ inline BSDF_SampleRes Minneart_sample(const varying BSDF *uniform super, const vec3f localDir = cosineSampleHemisphere(s); res.wi = getFrame(super) * localDir; res.type = BSDF_DIFFUSE_REFLECTION; - BSDF_EvalRes eval = Minneart_eval(super, wo, res.wi); + BSDF_EvalRes eval = Minneart_eval(super, wo, res.wi, ffo); res.pdf = eval.pdf; res.weight = eval.value * rcp(eval.pdf); return res; diff --git a/modules/cpu/render/bsdfs/MultiBSDF.ih b/modules/cpu/render/bsdfs/MultiBSDF.ih index 1b86f9066..2fc2ebd11 100644 --- a/modules/cpu/render/bsdfs/MultiBSDF.ih +++ b/modules/cpu/render/bsdfs/MultiBSDF.ih @@ -4,6 +4,7 @@ #pragma once #include "BSDF.ih" +#include "ShadingContext.ih" OSPRAY_BEGIN_ISPC_NAMESPACE @@ -62,8 +63,10 @@ inline void MultiBSDF_add(varying BSDF *uniform super, #endif /*! Evaluates all BSDF components. */ -inline BSDF_EvalRes MultiBSDF_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes MultiBSDF_eval(const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { const varying MultiBSDF *uniform self = (const varying MultiBSDF *uniform)super; @@ -73,7 +76,7 @@ inline BSDF_EvalRes MultiBSDF_eval( for (uniform int i = 0; i < self->numBsdfs; ++i) { if (self->importances[i] > 0.0f) { const varying BSDF *uniform curBsdf = self->bsdfs[i]; - BSDF_EvalRes cur = BSDF_dispatch_eval_base(curBsdf, wo, wi); + BSDF_EvalRes cur = BSDF_dispatch_eval_base(curBsdf, wo, wi, ffo); cur.value = cur.value * self->weights[i]; res.value = res.value + cur.value; res.pdf += cur.pdf * self->importances[i]; @@ -88,7 +91,8 @@ inline BSDF_EvalRes MultiBSDF_eval( inline BSDF_SampleRes MultiBSDF_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss) + float ss, + const uniform FeatureFlagsOther ffo) { const varying MultiBSDF *uniform self = (const varying MultiBSDF *uniform)super; @@ -100,7 +104,7 @@ inline BSDF_SampleRes MultiBSDF_sample(const varying BSDF *uniform super, if (self->numBsdfs == 1) { const varying BSDF *uniform bsdf = self->bsdfs[0]; - res = BSDF_dispatch_sample_base(bsdf, wo, s, ss); + res = BSDF_dispatch_sample_base(bsdf, wo, s, ss, ffo); res.weight = res.weight * self->weights[0]; return res; } else { @@ -127,7 +131,7 @@ inline BSDF_SampleRes MultiBSDF_sample(const varying BSDF *uniform super, foreach_unique (i in choice) { #endif const varying BSDF *uniform bsdf = self->bsdfs[i]; - res = BSDF_dispatch_sample_base(bsdf, wo, s, ss); + res = BSDF_dispatch_sample_base(bsdf, wo, s, ss, ffo); res.weight = res.weight * self->weights[i]; } @@ -151,7 +155,7 @@ inline BSDF_SampleRes MultiBSDF_sample(const varying BSDF *uniform super, for (uniform int i = 0; i < self->numBsdfs; ++i) { if ((i != choice) & (self->importances[i] > 0.0f)) { const varying BSDF *uniform curBsdf = self->bsdfs[i]; - BSDF_EvalRes cur = BSDF_dispatch_eval_base(curBsdf, wo, res.wi); + BSDF_EvalRes cur = BSDF_dispatch_eval(curBsdf, wo, res.wi, ffo); cur.value = cur.value * self->weights[i]; value = value + cur.value; res.pdf += cur.pdf * self->importances[i]; diff --git a/modules/cpu/render/bsdfs/OrenNayar.ih b/modules/cpu/render/bsdfs/OrenNayar.ih index 4ba64a8ec..b132a731a 100644 --- a/modules/cpu/render/bsdfs/OrenNayar.ih +++ b/modules/cpu/render/bsdfs/OrenNayar.ih @@ -17,8 +17,10 @@ struct OrenNayar float roughness; }; -inline BSDF_EvalRes OrenNayar_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes OrenNayar_eval(const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { const varying OrenNayar *uniform self = (const varying OrenNayar *uniform)super; @@ -39,14 +41,15 @@ inline BSDF_EvalRes OrenNayar_eval( inline BSDF_SampleRes OrenNayar_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss) + float ss, + const uniform FeatureFlagsOther ffo) { const varying OrenNayar *uniform self = (const varying OrenNayar *uniform)super; const vec3f localDir = cosineSampleHemisphere(s); BSDF_SampleRes res; res.wi = getFrame(super) * localDir; - BSDF_EvalRes eval = OrenNayar_eval(super, wo, res.wi); + BSDF_EvalRes eval = OrenNayar_eval(super, wo, res.wi, ffo); res.pdf = eval.pdf; res.type = BSDF_DIFFUSE_REFLECTION; res.weight = eval.value * rcp(eval.pdf); diff --git a/modules/cpu/render/bsdfs/Reflection.ih b/modules/cpu/render/bsdfs/Reflection.ih index 4c7f89241..6fbec1ec3 100644 --- a/modules/cpu/render/bsdfs/Reflection.ih +++ b/modules/cpu/render/bsdfs/Reflection.ih @@ -7,14 +7,19 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -inline BSDF_EvalRes Reflection_eval( - const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes Reflection_eval(const varying BSDF *uniform self, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { return make_BSDF_EvalRes_zero(); } -inline BSDF_SampleRes Reflection_sample( - const varying BSDF *uniform self, const vec3f &wo, const vec2f &s, float ss) +inline BSDF_SampleRes Reflection_sample(const varying BSDF *uniform self, + const vec3f &wo, + const vec2f &s, + float ss, + const uniform FeatureFlagsOther ffo) { BSDF_SampleRes res; diff --git a/modules/cpu/render/bsdfs/RobustDielectric.ih b/modules/cpu/render/bsdfs/RobustDielectric.ih index 778fbe561..c39aac45f 100644 --- a/modules/cpu/render/bsdfs/RobustDielectric.ih +++ b/modules/cpu/render/bsdfs/RobustDielectric.ih @@ -24,8 +24,10 @@ struct RobustDielectric vec3f Ns; }; -inline BSDF_EvalRes RobustDielectric_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes RobustDielectric_eval(const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { return make_BSDF_EvalRes_zero(); } @@ -97,7 +99,8 @@ inline vec3f ReflectionDirection(const vec3f &incomingDir, const vec3f &normal) inline BSDF_SampleRes RobustDielectric_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f & /*randomV*/, - float randomF) + float randomF, + const uniform FeatureFlagsOther ffo) { const varying RobustDielectric *uniform self = (const varying RobustDielectric *uniform)super; diff --git a/modules/cpu/render/bsdfs/RobustThinDielectric.ih b/modules/cpu/render/bsdfs/RobustThinDielectric.ih index fc73c28ae..b7522cb91 100644 --- a/modules/cpu/render/bsdfs/RobustThinDielectric.ih +++ b/modules/cpu/render/bsdfs/RobustThinDielectric.ih @@ -16,8 +16,10 @@ struct RobustThinDielectric vec3f attenuation; }; -inline BSDF_EvalRes RobustThinDielectric_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes RobustThinDielectric_eval(const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { return make_BSDF_EvalRes_zero(); } @@ -26,7 +28,8 @@ inline BSDF_SampleRes RobustThinDielectric_sample( const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss) + float ss, + const uniform FeatureFlagsOther ffo) { const varying RobustThinDielectric *uniform self = (const varying RobustThinDielectric *uniform)super; diff --git a/modules/cpu/render/bsdfs/Scale.ih b/modules/cpu/render/bsdfs/Scale.ih index aea1c1d4a..dd9585026 100644 --- a/modules/cpu/render/bsdfs/Scale.ih +++ b/modules/cpu/render/bsdfs/Scale.ih @@ -15,12 +15,14 @@ struct Scale float factor; }; -inline BSDF_EvalRes Scale_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes Scale_eval(const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { const varying Scale *uniform self = (const varying Scale *uniform)super; - BSDF_EvalRes res = BSDF_dispatch_eval_base(self->base, wo, wi); + BSDF_EvalRes res = BSDF_dispatch_eval_base(self->base, wo, wi, ffo); res.value = res.value * self->factor; return res; @@ -29,11 +31,12 @@ inline BSDF_EvalRes Scale_eval( inline BSDF_SampleRes Scale_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss) + float ss, + const uniform FeatureFlagsOther ffo) { const varying Scale *uniform self = (const varying Scale *uniform)super; - BSDF_SampleRes res = BSDF_dispatch_sample_base(self->base, wo, s, ss); + BSDF_SampleRes res = BSDF_dispatch_sample_base(self->base, wo, s, ss, ffo); res.weight = res.weight * self->factor; return res; diff --git a/modules/cpu/render/bsdfs/Specular.ih b/modules/cpu/render/bsdfs/Specular.ih index 99462c5ca..d8397eeec 100644 --- a/modules/cpu/render/bsdfs/Specular.ih +++ b/modules/cpu/render/bsdfs/Specular.ih @@ -18,8 +18,10 @@ struct Specular float ns; // exponent }; -inline BSDF_EvalRes Specular_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes Specular_eval(const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { const varying Specular *uniform self = (const varying Specular *uniform)super; BSDF_EvalRes res; @@ -42,7 +44,8 @@ inline BSDF_EvalRes Specular_eval( inline BSDF_SampleRes Specular_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss) + float ss, + const uniform FeatureFlagsOther ffo) { const varying Specular *uniform self = (const varying Specular *uniform)super; BSDF_SampleRes res; diff --git a/modules/cpu/render/bsdfs/ThinDielectric.ih b/modules/cpu/render/bsdfs/ThinDielectric.ih index e96dba240..721d14791 100644 --- a/modules/cpu/render/bsdfs/ThinDielectric.ih +++ b/modules/cpu/render/bsdfs/ThinDielectric.ih @@ -17,8 +17,10 @@ struct ThinDielectric vec3f attenuation; }; -inline BSDF_EvalRes ThinDielectric_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes ThinDielectric_eval(const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { return make_BSDF_EvalRes_zero(); } @@ -26,7 +28,8 @@ inline BSDF_EvalRes ThinDielectric_eval( inline BSDF_SampleRes ThinDielectric_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss) + float ss, + const uniform FeatureFlagsOther ffo) { const varying ThinDielectric *uniform self = (const varying ThinDielectric *uniform)super; diff --git a/modules/cpu/render/bsdfs/ThinMicrofacetDielectric.ih b/modules/cpu/render/bsdfs/ThinMicrofacetDielectric.ih index efc69f3ff..6ddd3e3ba 100644 --- a/modules/cpu/render/bsdfs/ThinMicrofacetDielectric.ih +++ b/modules/cpu/render/bsdfs/ThinMicrofacetDielectric.ih @@ -31,7 +31,10 @@ struct ThinMicrofacetDielectric }; inline BSDF_EvalRes ThinMicrofacetDielectric_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) + const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { const varying ThinMicrofacetDielectric *uniform self = (const varying ThinMicrofacetDielectric *uniform)super; @@ -88,7 +91,8 @@ inline BSDF_SampleRes ThinMicrofacetDielectric_sample( const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss) + float ss, + const uniform FeatureFlagsOther ffo) { const varying ThinMicrofacetDielectric *uniform self = (const varying ThinMicrofacetDielectric *uniform)super; diff --git a/modules/cpu/render/bsdfs/Transmission.ih b/modules/cpu/render/bsdfs/Transmission.ih index b1dac37f8..00e2d837b 100644 --- a/modules/cpu/render/bsdfs/Transmission.ih +++ b/modules/cpu/render/bsdfs/Transmission.ih @@ -4,17 +4,23 @@ #pragma once #include "BSDF.ih" +#include "ShadingContext.ih" OSPRAY_BEGIN_ISPC_NAMESPACE -inline BSDF_EvalRes Transmission_eval( - const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes Transmission_eval(const varying BSDF *uniform self, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { return make_BSDF_EvalRes_zero(); } -inline BSDF_SampleRes Transmission_sample( - const varying BSDF *uniform self, const vec3f &wo, const vec2f &s, float ss) +inline BSDF_SampleRes Transmission_sample(const varying BSDF *uniform self, + const vec3f &wo, + const vec2f &s, + float ss, + const uniform FeatureFlagsOther ffo) { BSDF_SampleRes res; diff --git a/modules/cpu/render/bsdfs/Velvety.ih b/modules/cpu/render/bsdfs/Velvety.ih index db595ff99..f28da3c3a 100644 --- a/modules/cpu/render/bsdfs/Velvety.ih +++ b/modules/cpu/render/bsdfs/Velvety.ih @@ -16,8 +16,10 @@ struct Velvety float f; }; -inline BSDF_EvalRes Velvety_eval( - const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi) +inline BSDF_EvalRes Velvety_eval(const varying BSDF *uniform super, + const vec3f &wo, + const vec3f &wi, + const uniform FeatureFlagsOther ffo) { const varying Velvety *uniform self = (const varying Velvety *uniform)super; BSDF_EvalRes res; @@ -35,7 +37,8 @@ inline BSDF_EvalRes Velvety_eval( inline BSDF_SampleRes Velvety_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss) + float ss, + const uniform FeatureFlagsOther ffo) { const varying Velvety *uniform self = (const varying Velvety *uniform)super; BSDF_SampleRes res; @@ -43,7 +46,7 @@ inline BSDF_SampleRes Velvety_sample(const varying BSDF *uniform super, const vec3f localDir = cosineSampleHemisphere(s); res.wi = getFrame(super) * localDir; res.type = BSDF_DIFFUSE_REFLECTION; - BSDF_EvalRes eval = Velvety_eval(super, wo, res.wi); + BSDF_EvalRes eval = Velvety_eval(super, wo, res.wi, ffo); res.pdf = eval.pdf; res.weight = eval.value * rcp(eval.pdf); return res; diff --git a/modules/cpu/render/debug/DebugRenderer.cpp b/modules/cpu/render/debug/DebugRenderer.cpp index 403eb0330..57ed0e8f0 100644 --- a/modules/cpu/render/debug/DebugRenderer.cpp +++ b/modules/cpu/render/debug/DebugRenderer.cpp @@ -4,6 +4,7 @@ // ospray #include "DebugRenderer.h" #include "camera/Camera.h" +#include "common/FeatureFlagsEnum.h" #include "common/World.h" #include "fb/FrameBuffer.h" #ifndef OSPRAY_TARGET_SYCL @@ -11,6 +12,8 @@ #include "render/debug/DebugRenderer_ispc.h" #else #include "DebugRenderer.ih" + +constexpr sycl::specialization_id specFeatureFlags; #endif namespace ospray { @@ -90,18 +93,28 @@ void DebugRenderer::renderTasks(FrameBuffer *fb, #ifdef OSPRAY_TARGET_SYCL const uint32_t *taskIDsPtr = taskIDs.data(); auto event = syclQueue.submit([&](sycl::handler &cgh) { + FeatureFlags ff = world->getFeatureFlags(); + ff.other |= featureFlags; + ff.other |= fb->getFeatureFlagsOther(); + ff.other |= camera->getFeatureFlagsOther(); + cgh.set_specialization_constant(ff); + const sycl::nd_range<1> dispatchRange = computeDispatchRange(numTasks, 16); - cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex) { - if (taskIndex.get_global_id(0) < numTasks) { - ispc::DebugRenderer_renderTask(&rendererSh->super, - fbSh, - cameraSh, - worldSh, - perFrameData, - taskIDsPtr, - taskIndex.get_global_id(0)); - } - }); + cgh.parallel_for(dispatchRange, + [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { + if (taskIndex.get_global_id(0) < numTasks) { + const FeatureFlags ff = + kh.get_specialization_constant(); + ispc::DebugRenderer_renderTask(&rendererSh->super, + fbSh, + cameraSh, + worldSh, + perFrameData, + taskIDsPtr, + taskIndex.get_global_id(0), + ff); + } + }); }); event.wait_and_throw(); // For prints we have to flush the entire queue, because other stuff is queued diff --git a/modules/cpu/render/debug/DebugRenderer.ih b/modules/cpu/render/debug/DebugRenderer.ih index eaa87e693..8105ef8b9 100644 --- a/modules/cpu/render/debug/DebugRenderer.ih +++ b/modules/cpu/render/debug/DebugRenderer.ih @@ -10,7 +10,8 @@ SYCL_EXTERNAL void DebugRenderer_renderTask(Renderer *uniform self, World *uniform world, void *uniform perFrameData, const uint32 *uniform taskIDs, - const int taskIndex0); + const int taskIndex0, + const uniform ospray::FeatureFlags &ff); #endif OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/debug/DebugRenderer.ispc b/modules/cpu/render/debug/DebugRenderer.ispc index 20ee56f3b..feda4e3b1 100644 --- a/modules/cpu/render/debug/DebugRenderer.ispc +++ b/modules/cpu/render/debug/DebugRenderer.ispc @@ -23,23 +23,22 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -// TODO: All `SYCL_EXTERNAL __noinline` are here just to prevent IGC ocloc -// from crashing. We should use `inline` or `static` instead. - // common utility function, traces ray and handles default and background -SYCL_EXTERNAL __noinline bool hitBackground( - Renderer *uniform self, World *uniform world, varying ScreenSample &sample) +inline bool hitBackground(Renderer *uniform self, + World *uniform world, + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { - traceRay(world, sample.ray); + traceRay(world, sample.ray, ff); sample.z = sample.ray.t; sample.alpha = 1.f; - sample.rgb = make_vec3f(Renderer_getBackground(self, sample.pos)); + sample.rgb = make_vec3f(Renderer_getBackground(self, sample.pos, ff.other)); return noHit(sample.ray); } -SYCL_EXTERNAL float eyeLight(varying ScreenSample &sample) +inline float eyeLight(varying ScreenSample &sample) { return 0.2f + 0.8f * abs(dot(normalize(sample.ray.Ng), sample.ray.dir)); } @@ -47,7 +46,7 @@ SYCL_EXTERNAL float eyeLight(varying ScreenSample &sample) /* a simple test-frame renderer that doesn't even trace a ray, just returns a well-defined test frame (mostly useful for debugging whether frame buffers are properly set up etcpp */ -SYCL_EXTERNAL __noinline void DebugRenderer_testFrame(Renderer *uniform, +static void DebugRenderer_testFrame(Renderer *uniform, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, @@ -64,7 +63,7 @@ SYCL_EXTERNAL __noinline void DebugRenderer_testFrame(Renderer *uniform, /* a simple test-frame renderer that doesn't even trace a ray, just returns the absolute of the ray direction */ -SYCL_EXTERNAL __noinline void DebugRenderer_rayDir(Renderer *uniform, +static void DebugRenderer_rayDir(Renderer *uniform, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, @@ -75,69 +74,74 @@ SYCL_EXTERNAL __noinline void DebugRenderer_rayDir(Renderer *uniform, sample.z = 1.f; } -SYCL_EXTERNAL __noinline void DebugRenderer_eyeLight(Renderer *uniform self, +static void DebugRenderer_eyeLight(Renderer *uniform self, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { - if (hitBackground(self, world, sample)) { + if (hitBackground(self, world, sample, ff)) { return; } sample.rgb = make_vec3f(eyeLight(sample)); } -SYCL_EXTERNAL __noinline void DebugRenderer_Ng(Renderer *uniform self, +static void DebugRenderer_Ng(Renderer *uniform self, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { - if (hitBackground(self, world, sample)) + if (hitBackground(self, world, sample, ff)) return; DifferentialGeometry dg; - postIntersect(world, self, dg, sample.ray, DG_NORMALIZE | DG_NG); + postIntersect(world, self, dg, sample.ray, DG_NORMALIZE | DG_NG, ff); sample.rgb = absf(dg.Ng); } -SYCL_EXTERNAL __noinline void DebugRenderer_Ns(Renderer *uniform self, +static void DebugRenderer_Ns(Renderer *uniform self, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { - if (hitBackground(self, world, sample)) + if (hitBackground(self, world, sample, ff)) return; DifferentialGeometry dg; - postIntersect(world, self, dg, sample.ray, DG_NORMALIZE | DG_NS); + postIntersect(world, self, dg, sample.ray, DG_NORMALIZE | DG_NS, ff); sample.rgb = absf(dg.Ns); } -SYCL_EXTERNAL __noinline void DebugRenderer_texCoord(Renderer *uniform self, +static void DebugRenderer_texCoord(Renderer *uniform self, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { - if (hitBackground(self, world, sample)) + if (hitBackground(self, world, sample, ff)) return; DifferentialGeometry dg; - postIntersect(world, self, dg, sample.ray, DG_TEXCOORD); + postIntersect(world, self, dg, sample.ray, DG_TEXCOORD, ff); sample.rgb = abs(make_vec3f(dg.st.x, dg.st.y, 0.0f)); } -SYCL_EXTERNAL __noinline void DebugRenderer_dPds(Renderer *uniform self, +static void DebugRenderer_dPds(Renderer *uniform self, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { - if (hitBackground(self, world, sample)) + if (hitBackground(self, world, sample, ff)) return; DifferentialGeometry dg; - postIntersect(world, self, dg, sample.ray, DG_TANGENTS); + postIntersect(world, self, dg, sample.ray, DG_TANGENTS, ff); sample.rgb = normalize(dg.dPds); if (sample.rgb.x < 0.f) sample.rgb.x = sample.rgb.x * -0.3f; @@ -147,17 +151,18 @@ SYCL_EXTERNAL __noinline void DebugRenderer_dPds(Renderer *uniform self, sample.rgb.z = sample.rgb.z * -0.3f; } -SYCL_EXTERNAL __noinline void DebugRenderer_dPdt(Renderer *uniform self, +static void DebugRenderer_dPdt(Renderer *uniform self, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { - if (hitBackground(self, world, sample)) + if (hitBackground(self, world, sample, ff)) return; DifferentialGeometry dg; - postIntersect(world, self, dg, sample.ray, DG_TANGENTS); + postIntersect(world, self, dg, sample.ray, DG_TANGENTS, ff); sample.rgb = normalize(dg.dPdt); if (sample.rgb.x < 0.f) sample.rgb.x = sample.rgb.x * -0.3f; @@ -167,65 +172,69 @@ SYCL_EXTERNAL __noinline void DebugRenderer_dPdt(Renderer *uniform self, sample.rgb.z = sample.rgb.z * -0.3f; } -SYCL_EXTERNAL __noinline void DebugRenderer_vertexColor(Renderer *uniform self, +static void DebugRenderer_vertexColor(Renderer *uniform self, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { - if (hitBackground(self, world, sample)) + if (hitBackground(self, world, sample, ff)) return; DifferentialGeometry dg; - postIntersect(world, self, dg, sample.ray, DG_COLOR | DG_NS); + postIntersect(world, self, dg, sample.ray, DG_COLOR | DG_NS, ff); sample.rgb = make_vec3f(dg.color) * abs(dot(normalize(sample.ray.dir), normalize(dg.Ns))); } -SYCL_EXTERNAL __noinline void DebugRenderer_primID(Renderer *uniform self, +static void DebugRenderer_primID(Renderer *uniform self, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { - if (hitBackground(self, world, sample)) + if (hitBackground(self, world, sample, ff)) return; sample.rgb = eyeLight(sample) * makeRandomColor(sample.ray.primID); } -SYCL_EXTERNAL __noinline void DebugRenderer_instID(Renderer *uniform self, +static void DebugRenderer_instID(Renderer *uniform self, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { - if (hitBackground(self, world, sample)) + if (hitBackground(self, world, sample, ff)) return; sample.rgb = eyeLight(sample) * makeRandomColor(sample.ray.instID); } -SYCL_EXTERNAL __noinline void DebugRenderer_geomID(Renderer *uniform self, +static void DebugRenderer_geomID(Renderer *uniform self, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { - if (hitBackground(self, world, sample)) + if (hitBackground(self, world, sample, ff)) return; sample.rgb = eyeLight(sample) * makeRandomColor(sample.ray.geomID); } -SYCL_EXTERNAL __noinline void DebugRenderer_backfacing_Ng( - Renderer *uniform self, +static void DebugRenderer_backfacing_Ng(Renderer *uniform self, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { - if (hitBackground(self, world, sample)) + if (hitBackground(self, world, sample, ff)) return; sample.rgb = make_vec3f(eyeLight(sample)); @@ -233,31 +242,35 @@ SYCL_EXTERNAL __noinline void DebugRenderer_backfacing_Ng( sample.rgb.y = 0.f; } -SYCL_EXTERNAL __noinline void DebugRenderer_backfacing_Ns( - Renderer *uniform self, +static void DebugRenderer_backfacing_Ns(Renderer *uniform self, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { - if (hitBackground(self, world, sample)) + if (hitBackground(self, world, sample, ff)) return; DifferentialGeometry dg; - postIntersect(world, self, dg, sample.ray, DG_NORMALIZE | DG_NS); + postIntersect(world, self, dg, sample.ray, DG_NORMALIZE | DG_NS, ff); const float c = dot(dg.Ns, sample.ray.dir); sample.rgb = make_vec3f(.2f + .8f * abs(c)); if (c > 0.f) sample.rgb.y = 0.f; } -SYCL_EXTERNAL __noinline void DebugRenderer_volume(Renderer *uniform self, +static void DebugRenderer_volume(Renderer *uniform self, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { #ifdef OSPRAY_ENABLE_VOLUMES + if (!(ff.volume & FFV_VOLUME)) + return; + LDSampler ldSamplerObj; varying LDSampler *uniform ldSampler = &ldSamplerObj; LDSampler_init(ldSampler, @@ -268,7 +281,7 @@ SYCL_EXTERNAL __noinline void DebugRenderer_volume(Renderer *uniform self, vec3f &color = sample.rgb; float &alpha = sample.alpha; - vec4f bgColor = Renderer_getBackground(self, sample.pos); + vec4f bgColor = Renderer_getBackground(self, sample.pos, FFO_ALL); color = make_vec3f(bgColor); alpha = bgColor.w; @@ -368,7 +381,8 @@ static void DebugRenderer_renderSample(Renderer *uniform _self, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { DebugRenderer *uniform self = (DebugRenderer * uniform) _self; switch (self->type) { @@ -379,43 +393,43 @@ static void DebugRenderer_renderSample(Renderer *uniform _self, DebugRenderer_rayDir(_self, fb, world, perFrameData, sample); break; case EYE_LIGHT: - DebugRenderer_eyeLight(_self, fb, world, perFrameData, sample); + DebugRenderer_eyeLight(_self, fb, world, perFrameData, sample, ff); break; case NG: - DebugRenderer_Ng(_self, fb, world, perFrameData, sample); + DebugRenderer_Ng(_self, fb, world, perFrameData, sample, ff); break; case NS: - DebugRenderer_Ns(_self, fb, world, perFrameData, sample); + DebugRenderer_Ns(_self, fb, world, perFrameData, sample, ff); break; case COLOR: - DebugRenderer_vertexColor(_self, fb, world, perFrameData, sample); + DebugRenderer_vertexColor(_self, fb, world, perFrameData, sample, ff); break; case TEX_COORD: - DebugRenderer_texCoord(_self, fb, world, perFrameData, sample); + DebugRenderer_texCoord(_self, fb, world, perFrameData, sample, ff); break; case DPDS: - DebugRenderer_dPds(_self, fb, world, perFrameData, sample); + DebugRenderer_dPds(_self, fb, world, perFrameData, sample, ff); break; case DPDT: - DebugRenderer_dPdt(_self, fb, world, perFrameData, sample); + DebugRenderer_dPdt(_self, fb, world, perFrameData, sample, ff); break; case PRIM_ID: - DebugRenderer_primID(_self, fb, world, perFrameData, sample); + DebugRenderer_primID(_self, fb, world, perFrameData, sample, ff); break; case GEOM_ID: - DebugRenderer_geomID(_self, fb, world, perFrameData, sample); + DebugRenderer_geomID(_self, fb, world, perFrameData, sample, ff); break; case INST_ID: - DebugRenderer_instID(_self, fb, world, perFrameData, sample); + DebugRenderer_instID(_self, fb, world, perFrameData, sample, ff); break; case BACKFACING_NG: - DebugRenderer_backfacing_Ng(_self, fb, world, perFrameData, sample); + DebugRenderer_backfacing_Ng(_self, fb, world, perFrameData, sample, ff); break; case BACKFACING_NS: - DebugRenderer_backfacing_Ns(_self, fb, world, perFrameData, sample); + DebugRenderer_backfacing_Ns(_self, fb, world, perFrameData, sample, ff); break; case VOLUME: - DebugRenderer_volume(_self, fb, world, perFrameData, sample); + DebugRenderer_volume(_self, fb, world, perFrameData, sample, ff); break; default: DebugRenderer_testFrame(_self, fb, world, perFrameData, sample); @@ -434,10 +448,11 @@ SYCL_EXTERNAL void DebugRenderer_renderTask(Renderer *uniform self, World *uniform world, void *uniform perFrameData, const uint32 *uniform taskIDs, - const int taskIndex0) + const int taskIndex0, + const uniform FeatureFlags &ff) { Renderer_default_renderTask( - self, fb, camera, world, perFrameData, taskIDs, taskIndex0); + self, fb, camera, world, perFrameData, taskIDs, taskIndex0, ff); } #else export void DebugRenderer_renderTasks(void *uniform _self, @@ -454,7 +469,7 @@ export void DebugRenderer_renderTasks(void *uniform _self, World *uniform world = (World * uniform) _world; const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; launch[numTasks] Renderer_default_renderTask( - self, fb, camera, world, perFrameData, taskIDs); + self, fb, camera, world, perFrameData, taskIDs, ffAll()); sync; } #endif diff --git a/modules/cpu/render/materials/Alloy.cpp b/modules/cpu/render/materials/Alloy.cpp index de86ec6d1..db205c30a 100644 --- a/modules/cpu/render/materials/Alloy.cpp +++ b/modules/cpu/render/materials/Alloy.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { Alloy::Alloy(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_ALLOY) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = diff --git a/modules/cpu/render/materials/CarPaint.cpp b/modules/cpu/render/materials/CarPaint.cpp index d3a73565f..be6a02933 100644 --- a/modules/cpu/render/materials/CarPaint.cpp +++ b/modules/cpu/render/materials/CarPaint.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { CarPaint::CarPaint(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_CARPAINT) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = reinterpret_cast( diff --git a/modules/cpu/render/materials/CarPaint.ispc b/modules/cpu/render/materials/CarPaint.ispc index 3fe5c1fd5..11f978e6a 100644 --- a/modules/cpu/render/materials/CarPaint.ispc +++ b/modules/cpu/render/materials/CarPaint.ispc @@ -1,6 +1,7 @@ // Copyright 2009 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +#include "common/Ray.ih" #include "render/Material.ih" #include "render/bsdfs/Conductor.ih" #include "render/bsdfs/DielectricLayer.ih" diff --git a/modules/cpu/render/materials/Glass.cpp b/modules/cpu/render/materials/Glass.cpp index 56d1d5bbb..14637e621 100644 --- a/modules/cpu/render/materials/Glass.cpp +++ b/modules/cpu/render/materials/Glass.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { Glass::Glass(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_GLASS) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = diff --git a/modules/cpu/render/materials/Glass.ispc b/modules/cpu/render/materials/Glass.ispc index 9e3f2df0f..7d54ec0cd 100644 --- a/modules/cpu/render/materials/Glass.ispc +++ b/modules/cpu/render/materials/Glass.ispc @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 #include "Medium.ih" +#include "common/Ray.ih" #include "render/Material.ih" #include "render/bsdfs/RobustDielectric.ih" diff --git a/modules/cpu/render/materials/Luminous.cpp b/modules/cpu/render/materials/Luminous.cpp index 29d68434e..60cb0f9d9 100644 --- a/modules/cpu/render/materials/Luminous.cpp +++ b/modules/cpu/render/materials/Luminous.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { Luminous::Luminous(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_LUMINOUS) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = reinterpret_cast( diff --git a/modules/cpu/render/materials/Metal.cpp b/modules/cpu/render/materials/Metal.cpp index 466a458aa..057587d0e 100644 --- a/modules/cpu/render/materials/Metal.cpp +++ b/modules/cpu/render/materials/Metal.cpp @@ -12,7 +12,7 @@ namespace ospray { namespace pathtracer { Metal::Metal(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_METAL) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = diff --git a/modules/cpu/render/materials/MetallicPaint.cpp b/modules/cpu/render/materials/MetallicPaint.cpp index f4a19ab67..ea69a86a9 100644 --- a/modules/cpu/render/materials/MetallicPaint.cpp +++ b/modules/cpu/render/materials/MetallicPaint.cpp @@ -11,7 +11,8 @@ namespace ospray { namespace pathtracer { MetallicPaint::MetallicPaint(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared( + device.getIspcrtDevice(), device, FFO_MATERIAL_METALLICPAINT) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = reinterpret_cast( diff --git a/modules/cpu/render/materials/Mix.cpp b/modules/cpu/render/materials/Mix.cpp index 6b2c3953d..28fd4e84f 100644 --- a/modules/cpu/render/materials/Mix.cpp +++ b/modules/cpu/render/materials/Mix.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { MixMaterial::MixMaterial(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_MIX) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = diff --git a/modules/cpu/render/materials/OBJ.cpp b/modules/cpu/render/materials/OBJ.cpp index de5846144..1c8ea910a 100644 --- a/modules/cpu/render/materials/OBJ.cpp +++ b/modules/cpu/render/materials/OBJ.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { OBJMaterial::OBJMaterial(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_OBJ) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = diff --git a/modules/cpu/render/materials/OBJ.ih b/modules/cpu/render/materials/OBJ.ih index 0b1c629d6..566897961 100644 --- a/modules/cpu/render/materials/OBJ.ih +++ b/modules/cpu/render/materials/OBJ.ih @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 #pragma once +#include "common/FeatureFlags.ih" #include "rkcommon/math/vec.ih" OSPRAY_BEGIN_ISPC_NAMESPACE @@ -18,11 +19,13 @@ SYCL_EXTERNAL const varying BSDF *varying OBJ_getBSDF( uniform ShadingContext *uniform ctx, const DifferentialGeometry &dg, const Ray &ray, - const Medium ¤tMedium); + const Medium ¤tMedium, + const uniform FeatureFlagsOther ffo); SYCL_EXTERNAL vec3f OBJ_getTransparency(const uniform Material *uniform super, const DifferentialGeometry &dg, const Ray &ray, - const Medium ¤tMedium); + const Medium ¤tMedium, + const uniform FeatureFlagsOther ffo); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/materials/OBJ.ispc b/modules/cpu/render/materials/OBJ.ispc index 38f8fd09d..5099507ef 100644 --- a/modules/cpu/render/materials/OBJ.ispc +++ b/modules/cpu/render/materials/OBJ.ispc @@ -21,7 +21,8 @@ SYCL_EXTERNAL const varying BSDF *varying OBJ_getBSDF( uniform ShadingContext *uniform ctx, const DifferentialGeometry &dg, const Ray &ray, - const Medium ¤tMedium) + const Medium ¤tMedium, + const uniform FeatureFlagsOther ffo) { uniform const OBJ *uniform self = (uniform const OBJ *uniform)super; varying BSDF *uniform bsdf = MultiBSDF_create(ctx); @@ -29,14 +30,14 @@ SYCL_EXTERNAL const varying BSDF *varying OBJ_getBSDF( /*! normal map */ varying linear3f *uniform shadingFrame = LinearSpace3f_create( - ctx, makeShadingFrame(dg, self->bumpMap, self->bumpRot)); + ctx, makeShadingFrame_ff(dg, self->bumpMap, self->bumpRot, ffo)); /*! cut-out opacity */ - float d = self->d * get1f(self->dMap, dg, 1.f) * dg.color.w; + float d = self->d * get1f_ff(self->dMap, dg, 1.f, ffo) * dg.color.w; /*! diffuse component */ vec3f Kd = self->Kd; - if (valid(self->KdMap)) { + if (valid(self->KdMap) && (ffo & FFO_TEXTURE_IN_MATERIAL)) { vec4f Kd_from_map = get4f(self->KdMap, dg); Kd = Kd * make_vec3f(Kd_from_map); d *= Kd_from_map.w; @@ -55,8 +56,8 @@ SYCL_EXTERNAL const varying BSDF *varying OBJ_getBSDF( bsdf, Transmission_create(ctx, shadingFrame, T), 1.f, luminance(T)); /*! specular component */ - float Ns = self->Ns * get1f(self->NsMap, dg, 1.0f); - vec3f Ks = d * self->Ks * get3f(self->KsMap, dg, make_vec3f(1.f)); + float Ns = self->Ns * get1f_ff(self->NsMap, dg, 1.0f, ffo); + vec3f Ks = d * self->Ks * get3f_ff(self->KsMap, dg, make_vec3f(1.f), ffo); if (reduce_max(Ks) > 0.0f) { MultiBSDF_add( bsdf, Specular_create(ctx, shadingFrame, Ks, Ns), 1.f, luminance(Ks)); @@ -67,13 +68,14 @@ SYCL_EXTERNAL const varying BSDF *varying OBJ_getBSDF( SYCL_EXTERNAL vec3f OBJ_getTransparency(const uniform Material *uniform super, const DifferentialGeometry &dg, const Ray &ray, - const Medium ¤tMedium) + const Medium ¤tMedium, + const uniform FeatureFlagsOther ffo) { uniform const OBJ *uniform self = (uniform const OBJ *uniform)super; /*! cut-out opacity */ - float d = self->d * get1f(self->dMap, dg, 1.f) * dg.color.w; - if (hasAlpha(self->KdMap)) { + float d = self->d * get1f_ff(self->dMap, dg, 1.f, ffo) * dg.color.w; + if (hasAlpha(self->KdMap) && (ffo & FFO_TEXTURE_IN_MATERIAL)) { vec4f Kd_from_map = get4f(self->KdMap, dg); d *= Kd_from_map.w; } diff --git a/modules/cpu/render/materials/Plastic.cpp b/modules/cpu/render/materials/Plastic.cpp index 135670749..883e68a06 100644 --- a/modules/cpu/render/materials/Plastic.cpp +++ b/modules/cpu/render/materials/Plastic.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { Plastic::Plastic(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_PLASTIC) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = reinterpret_cast( diff --git a/modules/cpu/render/materials/Principled.cpp b/modules/cpu/render/materials/Principled.cpp index 45eae7c43..b38d3d643 100644 --- a/modules/cpu/render/materials/Principled.cpp +++ b/modules/cpu/render/materials/Principled.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { Principled::Principled(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_PRINCIPLED) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = reinterpret_cast( diff --git a/modules/cpu/render/materials/Principled.ispc b/modules/cpu/render/materials/Principled.ispc index 54e304759..7d5a98e7b 100644 --- a/modules/cpu/render/materials/Principled.ispc +++ b/modules/cpu/render/materials/Principled.ispc @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 #include "Medium.ih" +#include "common/Ray.ih" #include "render/Material.ih" #include "render/bsdfs/Conductor.ih" #include "render/bsdfs/Dielectric.ih" diff --git a/modules/cpu/render/materials/ThinGlass.cpp b/modules/cpu/render/materials/ThinGlass.cpp index 24f4ea825..4a98f4b47 100644 --- a/modules/cpu/render/materials/ThinGlass.cpp +++ b/modules/cpu/render/materials/ThinGlass.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { ThinGlass::ThinGlass(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_THINGLASS) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = reinterpret_cast( diff --git a/modules/cpu/render/materials/ThinGlass.ispc b/modules/cpu/render/materials/ThinGlass.ispc index 5d8dea69e..1bad2f0c9 100644 --- a/modules/cpu/render/materials/ThinGlass.ispc +++ b/modules/cpu/render/materials/ThinGlass.ispc @@ -1,6 +1,7 @@ // Copyright 2009 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +#include "common/Ray.ih" #include "render/Material.ih" #include "render/bsdfs/RobustThinDielectric.ih" #include "texture/TextureParam.ih" diff --git a/modules/cpu/render/materials/Velvet.cpp b/modules/cpu/render/materials/Velvet.cpp index bd38a0c39..032ea83f1 100644 --- a/modules/cpu/render/materials/Velvet.cpp +++ b/modules/cpu/render/materials/Velvet.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { Velvet::Velvet(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_VELVET) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = diff --git a/modules/cpu/render/pathtracer/NextEventEstimation.ih b/modules/cpu/render/pathtracer/NextEventEstimation.ih index 58ea6b821..1d1157fc2 100644 --- a/modules/cpu/render/pathtracer/NextEventEstimation.ih +++ b/modules/cpu/render/pathtracer/NextEventEstimation.ih @@ -3,6 +3,7 @@ #pragma once +#include "common/FeatureFlags.ih" #include "rkcommon/math/vec.ih" OSPRAY_BEGIN_ISPC_NAMESPACE @@ -13,6 +14,7 @@ struct PathVertex; SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, const PathState &pathState, - PathVertex &pathVertex); + PathVertex &pathVertex, + const uniform FeatureFlags &ff); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/pathtracer/NextEventEstimation.ispc b/modules/cpu/render/pathtracer/NextEventEstimation.ispc index 8eccfc564..2345eb9ba 100644 --- a/modules/cpu/render/pathtracer/NextEventEstimation.ispc +++ b/modules/cpu/render/pathtracer/NextEventEstimation.ispc @@ -23,7 +23,8 @@ OSPRAY_BEGIN_ISPC_NAMESPACE SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, const PathState &pathState, - PathVertex &pathVertex) + PathVertex &pathVertex, + const uniform FeatureFlags &ff) { // direct lighting including shadows and MIS vec3f L = make_vec3f(0.f); @@ -50,7 +51,8 @@ SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, #else foreach_unique (l in light) { #endif - ls = Light_dispatch_sample(l, pathVertex.dg, s2, pathState.time); + ls = + Light_dispatch_sample(l, pathVertex.dg, s2, pathState.time, ff.other); } // adjust the contibution with the probabiltiy of selecting the light source ls.weight = ls.weight / lightSelectionProb; @@ -70,18 +72,20 @@ SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, foreach_unique (f in pathVertex.bsdf) { #endif if (f != NULL) - fe = BSDF_dispatch_eval(f, pathVertex.wo, ls.dir); + fe = BSDF_dispatch_eval(f, pathVertex.wo, ls.dir, ff.other); } } else { #ifdef OSPRAY_ENABLE_VOLUMES + if (ff.volume & FFV_VOLUME) { #ifdef OSPRAY_TARGET_SYCL - { - const VolumetricModel *v = pathVertex.volume; + { + const VolumetricModel *v = pathVertex.volume; #else - foreach_unique (v in pathVertex.volume) { + foreach_unique (v in pathVertex.volume) { #endif - if (v != NULL) - fe = HenyeyGreenstein_eval(v->anisotropy, pathVertex.wo, ls.dir); + if (v != NULL) + fe = HenyeyGreenstein_eval(v->anisotropy, pathVertex.wo, ls.dir); + } } #endif } @@ -120,33 +124,37 @@ SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, unshadedLightContrib, shadowRay, rayIntervals, - pathState.currentMedium); + pathState.currentMedium, + ff); if (reduce_max(lightContrib) > 0) { -#ifndef OSPRAY_ENABLE_VOLUMES - L = L + lightContrib * misHeuristic(pathState, ls.pdf, fe.pdf); -#else - const float T = volumeTransmittance(pathState, - pathContext.world, - shadowRay, - rayIntervals, - pathContext.ldSampler, - pathContext.randomSampler); - if (reduce_max(T) > 0) { - // we have to use an independent transmittance estimate for MIS to get - // a correct result - const float T_mis = pathState.disableFWD || pathState.disableNEE - ? 1.f - : volumeTransmittance(pathState, - pathContext.world, - shadowRay, - rayIntervals, - pathContext.ldSampler, - pathContext.randomSampler); - L = L - + T * lightContrib - * misHeuristic(pathState, ls.pdf, fe.pdf * T_mis); - } +#ifdef OSPRAY_ENABLE_VOLUMES + if (ff.volume & FFV_VOLUME) { + const float T = volumeTransmittance(pathState, + pathContext.world, + shadowRay, + rayIntervals, + pathContext.ldSampler, + pathContext.randomSampler); + if (reduce_max(T) > 0) { + // we have to use an independent transmittance estimate for MIS to get + // a correct result + const float T_mis = pathState.disableFWD || pathState.disableNEE + ? 1.f + : volumeTransmittance(pathState, + pathContext.world, + shadowRay, + rayIntervals, + pathContext.ldSampler, + pathContext.randomSampler); + L = L + + T * lightContrib + * misHeuristic(pathState, ls.pdf, fe.pdf * T_mis); + } + } else #endif + { + L = L + lightContrib * misHeuristic(pathState, ls.pdf, fe.pdf); + } } } return L; diff --git a/modules/cpu/render/pathtracer/PathSampler.ih b/modules/cpu/render/pathtracer/PathSampler.ih index 73f13393b..c81f28990 100644 --- a/modules/cpu/render/pathtracer/PathSampler.ih +++ b/modules/cpu/render/pathtracer/PathSampler.ih @@ -3,6 +3,7 @@ #pragma once +#include "common/FeatureFlags.ih" #include "common/OSPCommon.ih" OSPRAY_BEGIN_ISPC_NAMESPACE @@ -15,6 +16,7 @@ struct ScreenSample; SYCL_EXTERNAL void samplePath(const PathContext &pathContext, PathState &pathState, Ray &ray, - ScreenSample &sample); + ScreenSample &sample, + const uniform FeatureFlags &ff); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/pathtracer/PathSampler.ispc b/modules/cpu/render/pathtracer/PathSampler.ispc index 7fa252193..8fddd1507 100644 --- a/modules/cpu/render/pathtracer/PathSampler.ispc +++ b/modules/cpu/render/pathtracer/PathSampler.ispc @@ -16,6 +16,7 @@ #include "render/Material.ih" #include "render/MaterialDispatch.ih" #include "render/bsdfs/MicrofacetAlbedoTables.ih" +#include "render/bsdfs/ShadingContext.ih" #include "render/materials/Medium.ih" #ifdef OSPRAY_ENABLE_VOLUMES #include "render/pathtracer/volumes/VolumeSampler.ih" @@ -30,7 +31,8 @@ OSPRAY_BEGIN_ISPC_NAMESPACE inline void postIntersect(const PathContext &pathContext, const PathState &pathState, PathVertex &pathVertex, - Ray &ray) + Ray &ray, + const uniform FeatureFlags &ff) { const PathTracer *uniform pt = pathContext.context; if (pathVertex.type == SURFACE) { @@ -39,10 +41,11 @@ inline void postIntersect(const PathContext &pathContext, pathVertex.dg, ray, DG_NS | DG_NG | DG_FACEFORWARD | DG_NORMALIZE | DG_TEXCOORD | DG_COLOR - | DG_TANGENTS | DG_MOTIONBLUR); + | DG_TANGENTS | DG_MOTIONBLUR, + ff); } #ifdef OSPRAY_ENABLE_VOLUMES - if (pathVertex.type == VOLUME) { + if ((pathVertex.type == VOLUME) && (ff.volume & FFV_VOLUME)) { pathVertex.dg.P = ray.org + ray.t * ray.dir; pathVertex.dg.renderer = &pt->super; @@ -75,7 +78,8 @@ inline void postIntersect(const PathContext &pathContext, inline Scattering_SampleRes sampleDirection(const PathContext &pathContext, const PathState &pathState, - PathVertex &pathVertex) + PathVertex &pathVertex, + const uniform FeatureFlags &ff) { const vec2f ss = LDSampler_getFloat2(pathContext.ldSampler, pathState.sampleDim); @@ -91,14 +95,14 @@ inline Scattering_SampleRes sampleDirection(const PathContext &pathContext, foreach_unique (f in pathVertex.bsdf) { #endif if (f != NULL) { - fs = BSDF_dispatch_sample(f, pathVertex.wo, ss, s); + fs = BSDF_dispatch_sample(f, pathVertex.wo, ss, s, ff.other); pathVertex.wi = fs.wi; pathVertex.pdf_w = fs.pdf; } } } #ifdef OSPRAY_ENABLE_VOLUMES - if (pathVertex.type == VOLUME) { + if ((pathVertex.type == VOLUME) && (ff.volume & FFV_VOLUME)) { #ifdef OSPRAY_TARGET_SYCL { const VolumetricModel *v = pathVertex.volume; @@ -119,7 +123,8 @@ inline Scattering_SampleRes sampleDirection(const PathContext &pathContext, SYCL_EXTERNAL void samplePath(const PathContext &pathContext, PathState &pathState, Ray &ray, - ScreenSample &sample) + ScreenSample &sample, + const uniform FeatureFlags &ff) { PathVertex lastVertex; lastVertex.type = CAMERA; @@ -152,7 +157,7 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, traceClippingRay(pathContext.world, ray, rayIntervals); // Trace ray intervals in geometry - traceGeometryRayIntervals(pathContext.world, ray, rayIntervals); + traceGeometryRayIntervals(pathContext.world, ray, rayIntervals, ff); PathVertex pathVertex; pathVertex.bsdf = NULL; @@ -166,26 +171,28 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, pathVertex.type = SURFACE; } - if (shadowCatcher(pathContext, pathState, pathVertex, ray, sample)) { + if (shadowCatcher(pathContext, pathState, pathVertex, ray, sample, ff)) { pathVertex.type = ENVIRONMENT; } pathVertex.wo = neg(ray.dir); #ifdef OSPRAY_ENABLE_VOLUMES - float extinctionCoefficient; - float freePath = volumeSampleFreePath(pathState, - pathContext.world, - ray, - rayIntervals, - pathContext.ldSampler, - pathContext.randomSampler, - &pathVertex.volume, - extinctionCoefficient, - pathVertex.albedo); - if (freePath < inf) { - pathVertex.type = VOLUME; - pathState.throughput = pathState.throughput * pathVertex.albedo; + if (ff.volume & FFV_VOLUME) { + float extinctionCoefficient; + float freePath = volumeSampleFreePath(pathState, + pathContext.world, + ray, + rayIntervals, + pathContext.ldSampler, + pathContext.randomSampler, + &pathVertex.volume, + extinctionCoefficient, + pathVertex.albedo); + if (freePath < inf) { + pathVertex.type = VOLUME; + pathState.throughput = pathState.throughput * pathVertex.albedo; + } } #endif @@ -199,7 +206,7 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, ? pathState.specularTransmissionPath : pathState.straightPath)) { vec4f bg = Renderer_getBackground( - &pathContext.context->super, *pathContext.pixel); + &pathContext.context->super, *pathContext.pixel, ff.other); pathState.contribution = pathState.contribution + pathState.throughput * make_vec3f(bg); sample.alpha = 1.0f - luminance(pathState.throughput); @@ -210,7 +217,7 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, && pathVertex.type != VOLUME) { pathState.contribution = pathState.contribution + evaluateVirtualLights( - pathContext, pathState, lastVertex, pathVertex, ray); + pathContext, pathState, lastVertex, pathVertex, ray, ff); } if (pathVertex.type == ENVIRONMENT) { break; @@ -227,9 +234,10 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, break; } - postIntersect(pathContext, pathState, pathVertex, ray); + postIntersect(pathContext, pathState, pathVertex, ray, ff); - if (!pathState.disableFWD && pathVertex.type != VOLUME) { + if (!pathState.disableFWD && (pathVertex.type != VOLUME) + && (ff.other & FFO_LIGHT_GEOMETRY)) { pathState.contribution = pathState.contribution + evaluateGeometryLights( pathContext, pathState, lastVertex, pathVertex, ray); @@ -260,7 +268,7 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, #endif if (m != NULL) { pathVertex.bsdf = Material_dispatch_getBSDF( - m, &ctx, pathVertex.dg, ray, pathState.currentMedium); + m, &ctx, pathVertex.dg, ray, pathState.currentMedium, ff.other); if (pathVertex.bsdf != NULL) { pathVertex.albedo = pathVertex.bsdf->albedo; } @@ -275,11 +283,11 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, // next event estimation if (!pathState.disableNEE && isSmooth(pathVertex)) { pathState.contribution = pathState.contribution - + nextEventEstimation(pathContext, pathState, pathVertex); + + nextEventEstimation(pathContext, pathState, pathVertex, ff); } Scattering_SampleRes fs = - sampleDirection(pathContext, pathState, pathVertex); + sampleDirection(pathContext, pathState, pathVertex, ff); if (pathState.auxFree && (fs.type & SCATTERING_SMOOTH)) { updateAuxilliaryData(pathState, pathVertex, sample); @@ -327,7 +335,7 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, #endif if (m != NULL) { Material_dispatch_selectNextMedium( - m, pathVertex.dg, pathState.currentMedium); + m, pathVertex.dg, pathState.currentMedium, ff.other); } } } diff --git a/modules/cpu/render/pathtracer/PathTracer.cpp b/modules/cpu/render/pathtracer/PathTracer.cpp index 2552af184..916668f1b 100644 --- a/modules/cpu/render/pathtracer/PathTracer.cpp +++ b/modules/cpu/render/pathtracer/PathTracer.cpp @@ -4,6 +4,7 @@ #include "PathTracer.h" #include "PathTracerData.h" #include "camera/Camera.h" +#include "common/FeatureFlagsEnum.h" #include "common/World.h" #include "fb/FrameBuffer.h" #include "geometry/GeometricModel.h" @@ -19,8 +20,10 @@ SYCL_EXTERNAL void PathTracer_renderTask(Renderer *uniform _self, World *uniform world, void *uniform perFrameData, const uint32 *uniform taskIDs, - const int taskIndex0); + const int taskIndex0, + const uniform ospray::FeatureFlags &ff); } +constexpr sycl::specialization_id specFeatureFlags; #else // ispc exports #include "math/Distribution1D_ispc.h" @@ -75,6 +78,9 @@ void *PathTracer::beginFrame(FrameBuffer *, World *world) std::unique_ptr pathtracerData = rkcommon::make_unique( *world, importanceSampleGeometryLights, *this); + if (pathtracerData->getSh()->numGeoLights) + featureFlags |= FFO_LIGHT_GEOMETRY; + world->getSh()->pathtracerData = pathtracerData->getSh(); world->pathtracerData = std::move(pathtracerData); scannedGeometryLights = importanceSampleGeometryLights; @@ -101,18 +107,28 @@ void PathTracer::renderTasks(FrameBuffer *fb, #ifdef OSPRAY_TARGET_SYCL const uint32_t *taskIDsPtr = taskIDs.data(); auto event = syclQueue.submit([&](sycl::handler &cgh) { + FeatureFlags ff = world->getFeatureFlags(); + ff.other |= featureFlags; + ff.other |= fb->getFeatureFlagsOther(); + ff.other |= camera->getFeatureFlagsOther(); + cgh.set_specialization_constant(ff); + const sycl::nd_range<1> dispatchRange = computeDispatchRange(numTasks, 16); - cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex) { - if (taskIndex.get_global_id(0) < numTasks) { - ispc::PathTracer_renderTask(&rendererSh->super, - fbSh, - cameraSh, - worldSh, - perFrameData, - taskIDsPtr, - taskIndex.get_global_id(0)); - } - }); + cgh.parallel_for(dispatchRange, + [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { + if (taskIndex.get_global_id(0) < numTasks) { + const FeatureFlags ff = + kh.get_specialization_constant(); + ispc::PathTracer_renderTask(&rendererSh->super, + fbSh, + cameraSh, + worldSh, + perFrameData, + taskIDsPtr, + taskIndex.get_global_id(0), + ff); + } + }); }); event.wait_and_throw(); // For prints we have to flush the entire queue, because other stuff is queued diff --git a/modules/cpu/render/pathtracer/PathTracer.h b/modules/cpu/render/pathtracer/PathTracer.h index fbe885cda..ebfb7eddb 100644 --- a/modules/cpu/render/pathtracer/PathTracer.h +++ b/modules/cpu/render/pathtracer/PathTracer.h @@ -32,7 +32,6 @@ struct PathTracer : public AddStructShared ) const override; private: - void generateGeometryLights(const World &, std::vector &); bool importanceSampleGeometryLights{ true}; // if geometry lights are importance // sampled using NEE (requires scanning diff --git a/modules/cpu/render/pathtracer/PathTracer.ispc b/modules/cpu/render/pathtracer/PathTracer.ispc index ce921d48c..e3ed54a17 100644 --- a/modules/cpu/render/pathtracer/PathTracer.ispc +++ b/modules/cpu/render/pathtracer/PathTracer.ispc @@ -39,7 +39,8 @@ static ScreenSample PathTraceIntegrator_Li(const PathTracer *uniform self, const vec2f &pixel, // normalized, i.e. in [0..1] Ray &ray, varying LDSampler *uniform ldSampler, - varying RandomSampler *uniform randomSampler) + varying RandomSampler *uniform randomSampler, + const uniform FeatureFlags &ff) { ScreenSample sample; sample.rgb = make_vec3f(0.f); @@ -90,7 +91,7 @@ static ScreenSample PathTraceIntegrator_Li(const PathTracer *uniform self, pathState.debug = false; - samplePath(pathContext, pathState, ray, sample); + samplePath(pathContext, pathState, ray, sample, ff); return sample; } @@ -101,7 +102,8 @@ static ScreenSample PathTracer_renderPixel(PathTracer *uniform self, World *uniform world, const uint32 ix, const uint32 iy, - const uint32 accumID) + const uint32 accumID, + const uniform FeatureFlags &ff) { ScreenSample screenSample; screenSample.rgb = make_vec3f(0.f); @@ -149,8 +151,9 @@ static ScreenSample PathTracer_renderPixel(PathTracer *uniform self, cameraSample.lens = LDSampler_getFloat2(ldSampler, 2); cameraSample.time = LDSampler_getFloat(ldSampler, 4); - Camera_dispatch_initRay(camera, screenSample.ray, cameraSample); - const float tMax = Renderer_getMaxDepth(&self->super, cameraSample.screen); + Camera_dispatch_initRay(camera, screenSample.ray, cameraSample, ff.other); + const float tMax = + Renderer_getMaxDepth(&self->super, cameraSample.screen, ff.other); screenSample.ray.t = min(screenSample.ray.t, tMax); @@ -162,7 +165,8 @@ static ScreenSample PathTracer_renderPixel(PathTracer *uniform self, cameraSample.screen, screenSample.ray, ldSampler, - randomSampler); + randomSampler, + ff); screenSample.rgb = screenSample.rgb + min(sample.rgb, make_vec3f(self->maxRadiance)); @@ -195,16 +199,15 @@ task Camera *uniform camera, World *uniform world, void *uniform perFrameData, - const uint32 *uniform taskIDs + const uint32 *uniform taskIDs, #ifdef OSPRAY_TARGET_SYCL - , - const int taskIndex0 + const int taskIndex0, #endif - ) + const uniform FeatureFlags &ff) { PathTracer *uniform self = (PathTracer * uniform) _self; uniform RenderTaskDesc taskDesc = - FrameBuffer_dispatch_getRenderTaskDesc(fb, taskIDs[taskIndex0]); + FrameBuffer_dispatch_getRenderTaskDesc(fb, taskIDs[taskIndex0], ff.other); if (fb->cancelRender || isEmpty(taskDesc.region)) { return; @@ -218,11 +221,12 @@ task x = taskDesc.region.lower.x... taskDesc.region.upper.x) { #endif ScreenSample screenSample = PathTracer_renderPixel( - self, fb, camera, world, x, y, taskDesc.accumID); + self, fb, camera, world, x, y, taskDesc.accumID, ff); - FrameBuffer_dispatch_accumulateSample(fb, screenSample, taskDesc); + FrameBuffer_dispatch_accumulateSample( + fb, screenSample, taskDesc, ff.other); } - FrameBuffer_dispatch_completeTask(fb, taskDesc); + FrameBuffer_dispatch_completeTask(fb, taskDesc, ff.other); } // Exports (called from C++) ////////////////////////////////////////////////// @@ -243,7 +247,7 @@ export void PathTracer_renderTasks(void *uniform _self, const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; launch[numTasks] PathTracer_renderTask( - self, fb, camera, world, perFrameData, taskIDs); + self, fb, camera, world, perFrameData, taskIDs, ffAll()); } #endif diff --git a/modules/cpu/render/pathtracer/ShadowCatcher.ih b/modules/cpu/render/pathtracer/ShadowCatcher.ih index e27b515c6..c886fcde8 100644 --- a/modules/cpu/render/pathtracer/ShadowCatcher.ih +++ b/modules/cpu/render/pathtracer/ShadowCatcher.ih @@ -3,6 +3,8 @@ #pragma once +#include "common/FeatureFlags.ih" + OSPRAY_BEGIN_ISPC_NAMESPACE struct PathContext; @@ -15,6 +17,7 @@ SYCL_EXTERNAL bool shadowCatcher(const PathContext &pathContext, PathState &pathState, PathVertex &pathVertex, const Ray &ray, - ScreenSample &sample); + ScreenSample &sample, + const uniform FeatureFlags &ff); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/pathtracer/ShadowCatcher.ispc b/modules/cpu/render/pathtracer/ShadowCatcher.ispc index cf69c7f24..abf57282b 100644 --- a/modules/cpu/render/pathtracer/ShadowCatcher.ispc +++ b/modules/cpu/render/pathtracer/ShadowCatcher.ispc @@ -21,7 +21,8 @@ SYCL_EXTERNAL bool shadowCatcher(const PathContext &pathContext, PathState &pathState, PathVertex &pathVertex, const Ray &ray, - ScreenSample &sample) + ScreenSample &sample, + const uniform FeatureFlags &ff) { if (!(pathContext.context->backgroundRefraction @@ -55,8 +56,8 @@ SYCL_EXTERNAL bool shadowCatcher(const PathContext &pathContext, const Light *uniform light = pathtracerData.lights[i]; const vec2f s = LDSampler_getFloat2( pathContext.ldSampler, pathState.sampleDim + 4 + i * 2); - Light_SampleRes ls = - Light_dispatch_sample(light, pathVertex.dg, s, pathState.time); + Light_SampleRes ls = Light_dispatch_sample( + light, pathVertex.dg, s, pathState.time, ff.other); // skip when zero contribution from light if (reduce_max(ls.weight) <= 0.0f | ls.pdf <= PDF_CULLING) @@ -87,7 +88,8 @@ SYCL_EXTERNAL bool shadowCatcher(const PathContext &pathContext, unshadedLightContrib, shadowRay, rayIntervals, - pathState.currentMedium); + pathState.currentMedium, + ff); } // order of args important to filter NaNs (in case unshaded.X is zero) const vec3f ratio = min( diff --git a/modules/cpu/render/pathtracer/TransparentShadow.ih b/modules/cpu/render/pathtracer/TransparentShadow.ih index 638db7ecd..342a7418b 100644 --- a/modules/cpu/render/pathtracer/TransparentShadow.ih +++ b/modules/cpu/render/pathtracer/TransparentShadow.ih @@ -3,6 +3,7 @@ #pragma once +#include "common/FeatureFlags.ih" #include "render/materials/Medium.ih" #include "rkcommon/math/vec.ih" @@ -18,5 +19,6 @@ SYCL_EXTERNAL vec3f transparentShadow(const uniform PathTracer *uniform self, vec3f lightContrib, Ray &shadowRay, RayIntervals &rayIntervals, - Medium medium); + Medium medium, + const uniform FeatureFlags &ff); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/pathtracer/TransparentShadow.ispc b/modules/cpu/render/pathtracer/TransparentShadow.ispc index 6156571a2..ad22455de 100644 --- a/modules/cpu/render/pathtracer/TransparentShadow.ispc +++ b/modules/cpu/render/pathtracer/TransparentShadow.ispc @@ -20,13 +20,14 @@ SYCL_EXTERNAL vec3f transparentShadow(const uniform PathTracer *uniform self, vec3f lightContrib, Ray &shadowRay, RayIntervals &rayIntervals, - Medium medium) + Medium medium, + const uniform FeatureFlags &ff) { uniform int maxDepth = self->super.maxDepth; const float tOriginal = shadowRay.t; while (1) { - traceGeometryRayIntervals(world, shadowRay, rayIntervals); + traceGeometryRayIntervals(world, shadowRay, rayIntervals, ff); if (noHit(shadowRay)) return lightContrib; @@ -37,7 +38,8 @@ SYCL_EXTERNAL vec3f transparentShadow(const uniform PathTracer *uniform self, dg, shadowRay, DG_NS | DG_NG | DG_FACEFORWARD | DG_NORMALIZE | DG_TEXCOORD | DG_COLOR - | DG_MOTIONBLUR); + | DG_MOTIONBLUR, + ff); uniform Material *material = (uniform Material *)dg.material; vec3f transparency; @@ -50,8 +52,8 @@ SYCL_EXTERNAL vec3f transparentShadow(const uniform PathTracer *uniform self, foreach_unique (m in material) { #endif if (m != NULL) { - transparency = - Material_dispatch_getTransparency(m, dg, shadowRay, medium); + transparency = Material_dispatch_getTransparency( + m, dg, shadowRay, medium, ff.other); } } @@ -77,7 +79,7 @@ SYCL_EXTERNAL vec3f transparentShadow(const uniform PathTracer *uniform self, foreach_unique (m in material) { #endif if (m != NULL) { - Material_dispatch_selectNextMedium(m, dg, medium); + Material_dispatch_selectNextMedium(m, dg, medium, ff.other); } } diff --git a/modules/cpu/render/pathtracer/VirtualLight.ih b/modules/cpu/render/pathtracer/VirtualLight.ih index 1696b9de3..420c20931 100644 --- a/modules/cpu/render/pathtracer/VirtualLight.ih +++ b/modules/cpu/render/pathtracer/VirtualLight.ih @@ -16,6 +16,7 @@ SYCL_EXTERNAL vec3f evaluateVirtualLights(const PathContext &pathContext, const PathState &pathState, const PathVertex &lastVertex, const PathVertex &pathVertex, - Ray &ray); + Ray &ray, + const uniform FeatureFlags &ff); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/pathtracer/VirtualLight.ispc b/modules/cpu/render/pathtracer/VirtualLight.ispc index 3a7aaba19..21845aecc 100644 --- a/modules/cpu/render/pathtracer/VirtualLight.ispc +++ b/modules/cpu/render/pathtracer/VirtualLight.ispc @@ -42,7 +42,8 @@ SYCL_EXTERNAL vec3f evaluateVirtualLights(const PathContext &pathContext, const PathState &pathState, const PathVertex &lastVertex, const PathVertex &pathVertex, - Ray &ray) + Ray &ray, + const uniform FeatureFlags &ff) { // add light from *virtual* lights by intersecting them vec3f L = make_vec3f(0.f); @@ -71,7 +72,8 @@ SYCL_EXTERNAL vec3f evaluateVirtualLights(const PathContext &pathContext, ray.dir, intervalLightDist.lower, intervalLightDist.upper, - pathState.time); + pathState.time, + ff.other); if (reduce_max(le.radiance) > 0.0f) { Ray shadowRay; setRay(shadowRay, @@ -85,15 +87,16 @@ SYCL_EXTERNAL vec3f evaluateVirtualLights(const PathContext &pathContext, RayIntervals rayIntervals; traceClippingRay(pathContext.world, shadowRay, rayIntervals); + float T = 1.f; #ifdef OSPRAY_ENABLE_VOLUMES - const float T = volumeTransmittance(pathState, - pathContext.world, - shadowRay, - rayIntervals, - pathContext.ldSampler, - pathContext.randomSampler); -#else - const float T = 1.f; + if (ff.volume & FFV_VOLUME) { + T = volumeTransmittance(pathState, + pathContext.world, + shadowRay, + rayIntervals, + pathContext.ldSampler, + pathContext.randomSampler); + } #endif L = L + pathState.throughput * le.radiance diff --git a/modules/cpu/render/scivis/SciVis.cpp b/modules/cpu/render/scivis/SciVis.cpp index 7317c5c6c..af7c9139c 100644 --- a/modules/cpu/render/scivis/SciVis.cpp +++ b/modules/cpu/render/scivis/SciVis.cpp @@ -5,6 +5,7 @@ #include "SciVis.h" #include "SciVisData.h" #include "camera/Camera.h" +#include "common/FeatureFlagsEnum.h" #include "common/World.h" #include "fb/FrameBuffer.h" #ifndef OSPRAY_TARGET_SYCL @@ -18,8 +19,10 @@ SYCL_EXTERNAL void SciVis_renderTask(Renderer *uniform self, World *uniform world, void *uniform perFrameData, const uint32 *uniform taskIDs, - const int taskIndex0); + const int taskIndex0, + const uniform ospray::FeatureFlags &ff); } +constexpr sycl::specialization_id specFeatureFlags; #endif namespace ospray { @@ -81,18 +84,28 @@ void SciVis::renderTasks(FrameBuffer *fb, #ifdef OSPRAY_TARGET_SYCL const uint32_t *taskIDsPtr = taskIDs.data(); auto event = syclQueue.submit([&](sycl::handler &cgh) { + FeatureFlags ff = world->getFeatureFlags(); + ff.other |= featureFlags; + ff.other |= fb->getFeatureFlagsOther(); + ff.other |= camera->getFeatureFlagsOther(); + cgh.set_specialization_constant(ff); + const sycl::nd_range<1> dispatchRange = computeDispatchRange(numTasks, 16); - cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex) { - if (taskIndex.get_global_id(0) < numTasks) { - ispc::SciVis_renderTask(&rendererSh->super, - fbSh, - cameraSh, - worldSh, - perFrameData, - taskIDsPtr, - taskIndex.get_global_id(0)); - } - }); + cgh.parallel_for(dispatchRange, + [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { + if (taskIndex.get_global_id(0) < numTasks) { + const FeatureFlags ff = + kh.get_specialization_constant(); + ispc::SciVis_renderTask(&rendererSh->super, + fbSh, + cameraSh, + worldSh, + perFrameData, + taskIDsPtr, + taskIndex.get_global_id(0), + ff); + } + }); }); event.wait_and_throw(); // For prints we have to flush the entire queue, because other stuff is queued diff --git a/modules/cpu/render/scivis/SciVis.ih b/modules/cpu/render/scivis/SciVis.ih index 6e79c6a5b..16fe08848 100644 --- a/modules/cpu/render/scivis/SciVis.ih +++ b/modules/cpu/render/scivis/SciVis.ih @@ -3,6 +3,7 @@ #pragma once +#include "common/FeatureFlags.ih" #include "render/ScreenSample.ih" OSPRAY_BEGIN_ISPC_NAMESPACE @@ -24,9 +25,6 @@ struct SciVisRenderContext varying LDSampler *uniform ldSampler; }; -// Note: Making lightAlpha and SciVis_computeAO __noinline helps performance -// on DG2 but the code then hangs if tracing secondary rays (shadows or AO), -// likely same issue as the XDEPS-4374 and XDEPS-4422 SYCL_EXTERNAL vec3f lightAlpha(const uniform SciVis *uniform self, Ray &ray, const FrameBuffer *uniform fb, @@ -35,7 +33,8 @@ SYCL_EXTERNAL vec3f lightAlpha(const uniform SciVis *uniform self, varying LDSampler *uniform ldSampler, vec3f weight, float rayOffset, - uniform float quality); + uniform float quality, + const uniform FeatureFlags &ff); SYCL_EXTERNAL vec3f SciVis_computeAO(const uniform SciVis *uniform self, const FrameBuffer *uniform fb, @@ -45,6 +44,7 @@ SYCL_EXTERNAL vec3f SciVis_computeAO(const uniform SciVis *uniform self, const varying DifferentialGeometry &dg, const uniform int sampleCnt, const uniform float aoRadius, - const varying vec3i &sampleID); + const varying vec3i &sampleID, + const uniform FeatureFlags &ff); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/scivis/SciVis.ispc b/modules/cpu/render/scivis/SciVis.ispc index f939b8c76..f51105a6b 100644 --- a/modules/cpu/render/scivis/SciVis.ispc +++ b/modules/cpu/render/scivis/SciVis.ispc @@ -23,7 +23,8 @@ static void SciVis_renderSample(Renderer *uniform _self, FrameBuffer *uniform fb, World *uniform world, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { uniform SciVis *uniform self = (uniform SciVis * uniform) _self; @@ -68,59 +69,62 @@ static void SciVis_renderSample(Renderer *uniform _self, while (true) { // Then trace normal geometry using calculated ray intervals, // if hit ray.t will be updated - traceGeometryRayIntervals(world, ray, rayIntervals); + traceGeometryRayIntervals(world, ray, rayIntervals, ff); #ifdef OSPRAY_ENABLE_VOLUMES - // Determine volume intervals by tracing ray in the volume scene Ray volumeRay = ray; - traceVolumeRay(world, volumeRay, volumeIntervals); - - // Sample volumes across volume intervals (in front of geometry hit) - if (volumeIntervals.numVolumeIntervals > 0) { - SciVisRenderContext rc; - rc.renderer = self; - rc.fb = fb; - rc.world = world; - rc.sample = sample; - rc.ldSampler = ldSampler; - volumeRay.geomID = RTC_INVALID_GEOMETRY_ID; - vec4f volumeColor = integrateVolumeIntervalsGradient(rc, - volumeIntervals, - rayIntervals, - volumeRay, - ldSampler, - self->volumeSamplingRate, - true); - - // Blend volume - outColor = outColor + outTransmission * make_vec3f(volumeColor); - outTransmission = outTransmission * volumeColor.w; - - if (hadHit(volumeRay) && sample.geomID == RTC_INVALID_GEOMETRY_ID) { - sample.primID = 0; + if (ff.volume & FFV_VOLUME) { + // Determine volume intervals by tracing ray in the volume scene + traceVolumeRay(world, volumeRay, volumeIntervals); + + // Sample volumes across volume intervals (in front of geometry hit) + if (volumeIntervals.numVolumeIntervals > 0) { + SciVisRenderContext rc; + rc.renderer = self; + rc.fb = fb; + rc.world = world; + rc.sample = sample; + rc.ldSampler = ldSampler; + volumeRay.geomID = RTC_INVALID_GEOMETRY_ID; + vec4f volumeColor = integrateVolumeIntervalsGradient(rc, + volumeIntervals, + rayIntervals, + volumeRay, + ldSampler, + self->volumeSamplingRate, + true, + ff); + + // Blend volume + outColor = outColor + outTransmission * make_vec3f(volumeColor); + outTransmission = outTransmission * volumeColor.w; + + if (hadHit(volumeRay) && sample.geomID == RTC_INVALID_GEOMETRY_ID) { + sample.primID = 0; #ifdef OSPRAY_TARGET_SYCL - { - const uint32 instID = volumeRay.instID; + { + const uint32 instID = volumeRay.instID; #else - foreach_unique (instID in volumeRay.instID) { + foreach_unique (instID in volumeRay.instID) { #endif - if (instID != RTC_INVALID_GEOMETRY_ID) { - const Instance *uniform instance = *(world->instances + instID); - sample.instID = (instance->userID == RTC_INVALID_GEOMETRY_ID) - ? instID - : instance->userID; - const VolumetricModel *volms = - *(instance->group->volumetricModels + volumeRay.geomID); + if (instID != RTC_INVALID_GEOMETRY_ID) { + const Instance *uniform instance = *(world->instances + instID); + sample.instID = (instance->userID == RTC_INVALID_GEOMETRY_ID) + ? instID + : instance->userID; + const VolumetricModel *volms = + *(instance->group->volumetricModels + volumeRay.geomID); #ifdef OSPRAY_TARGET_SYCL - { - const VolumetricModel *uniform volm = volms; + { + const VolumetricModel *uniform volm = volms; #else - foreach_unique (volm in volms) { + foreach_unique (volm in volms) { #endif - sample.geomID = (volm->userID == RTC_INVALID_GEOMETRY_ID) - ? volumeRay.geomID - : volm->userID; + sample.geomID = (volm->userID == RTC_INVALID_GEOMETRY_ID) + ? volumeRay.geomID + : volm->userID; + } } } } @@ -133,7 +137,8 @@ static void SciVis_renderSample(Renderer *uniform _self, DifferentialGeometry dg; if (self->visibleLights) { dg.P = ray.org; - outColor = outColor + outTransmission * evaluateLights(world, dg, ray); + outColor = + outColor + outTransmission * evaluateLights(world, dg, ray, ff); } // If any geometry has been hit @@ -145,12 +150,13 @@ static void SciVis_renderSample(Renderer *uniform _self, dg, ray, DG_NG | DG_NS | DG_NORMALIZE | DG_FACEFORWARD | DG_COLOR - | DG_TEXCOORD); + | DG_TEXCOORD, + ff); // Shade geometry SSI surfaceShading; surfaceShading = SciVis_computeShading( - self, fb, world, dg, sample, ldSampler, ray.dir); + self, fb, world, dg, sample, ldSampler, ray.dir, ff); // Initialize other per sample data with first hit values // In addition to considering the first hit, all previous, fully @@ -192,7 +198,8 @@ static void SciVis_renderSample(Renderer *uniform _self, setRay(ray, ray.t + dg.epsilon, originalRayTFar); } else { // Blend background with output final color - vec4f backgroundColor = Renderer_getBackground(&self->super, sample.pos); + vec4f backgroundColor = + Renderer_getBackground(&self->super, sample.pos, ff.other); outColor = outColor + outTransmission * make_vec3f(backgroundColor); outTransmission = outTransmission * (1.f - backgroundColor.w); @@ -229,7 +236,8 @@ SYCL_EXTERNAL vec3f SciVis_computeAO(const uniform SciVis *uniform self, const varying DifferentialGeometry &dg, const uniform int sampleCnt, const uniform float aoRadius, - const varying vec3i &sampleID) + const varying vec3i &sampleID, + const uniform FeatureFlags &ff) { const uniform int accumID = reduce_max(sampleID.z) * sampleCnt; @@ -270,7 +278,8 @@ SYCL_EXTERNAL vec3f SciVis_computeAO(const uniform SciVis *uniform self, ldSampler, make_vec3f(1.f), dg.epsilon, - 0.1f)); + 0.1f, + ff)); } // the cosTheta of cosineSampleHemispherePDF and dot(shadingNormal, ao_dir) @@ -289,10 +298,11 @@ SYCL_EXTERNAL void SciVis_renderTask(Renderer *uniform self, World *uniform world, void *uniform perFrameData, const uint32 *uniform taskIDs, - const int taskIndex0) + const int taskIndex0, + const uniform FeatureFlags &ff) { Renderer_default_renderTask( - self, fb, camera, world, perFrameData, taskIDs, taskIndex0); + self, fb, camera, world, perFrameData, taskIDs, taskIndex0, ff); } #else export void SciVis_renderTasks(void *uniform _self, @@ -309,7 +319,7 @@ export void SciVis_renderTasks(void *uniform _self, World *uniform world = (World * uniform) _world; const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; launch[numTasks] Renderer_default_renderTask( - self, fb, camera, world, perFrameData, taskIDs); + self, fb, camera, world, perFrameData, taskIDs, ffAll()); sync; } #endif diff --git a/modules/cpu/render/scivis/lightAlpha.ispc b/modules/cpu/render/scivis/lightAlpha.ispc index 1d5be9315..70df5a5a7 100644 --- a/modules/cpu/render/scivis/lightAlpha.ispc +++ b/modules/cpu/render/scivis/lightAlpha.ispc @@ -23,7 +23,8 @@ SYCL_EXTERNAL vec3f lightAlpha(const uniform SciVis *uniform self, varying LDSampler *uniform ldSampler, vec3f weight, float rayOffset, - uniform float quality) + uniform float quality, + const uniform FeatureFlags &ff) { vec3f alpha = make_vec3f(1.f); const float org_t_max = ray.t; @@ -45,42 +46,43 @@ SYCL_EXTERNAL vec3f lightAlpha(const uniform SciVis *uniform self, // First trace the ray across clipping scene to calculate ray intervals, // this step should keep ray structure unchanged RayIntervals rayIntervals; -#ifndef OSPRAY_TARGET_SYCL traceClippingRay(world, ray, rayIntervals); -#endif while (true) { // Then trace normal geometry using calculated ray intervals, // if hit ray.t will be updated // WA for https://jira.devtools.intel.com/browse/XDEPS-4875 #ifndef OSPRAY_TARGET_SYCL - traceGeometryRayIntervals(world, ray, rayIntervals); + traceGeometryRayIntervals(world, ray, rayIntervals, ff); #else - traceGeometryRay(world, ray); + traceGeometryRay(world, ray, ff); #endif #ifdef OSPRAY_ENABLE_VOLUMES - // Determine volume intervals by tracing ray in the volume scene - Ray volumeRay = ray; - traceVolumeRay(world, volumeRay, volumeIntervals); + if (ff.volume & FFV_VOLUME) { + // Determine volume intervals by tracing ray in the volume scene + Ray volumeRay = ray; + traceVolumeRay(world, volumeRay, volumeIntervals); - // Sample volumes across volume intervals (in front of geometry hit) - if (volumeIntervals.numVolumeIntervals > 0) { - SciVisRenderContext rc; - rc.renderer = self; - rc.fb = fb; - rc.world = world; - rc.sample = sample; - rc.ldSampler = ldSampler; - vec4f volumeColor = integrateVolumeIntervalsGradient(rc, - volumeIntervals, - rayIntervals, - volumeRay, - ldSampler, - self->volumeSamplingRate * quality, - false); + // Sample volumes across volume intervals (in front of geometry hit) + if (volumeIntervals.numVolumeIntervals > 0) { + SciVisRenderContext rc; + rc.renderer = self; + rc.fb = fb; + rc.world = world; + rc.sample = sample; + rc.ldSampler = ldSampler; + vec4f volumeColor = integrateVolumeIntervalsGradient(rc, + volumeIntervals, + rayIntervals, + volumeRay, + ldSampler, + self->volumeSamplingRate * quality, + false, + ff); - alpha = alpha * make_vec3f(volumeColor.w); + alpha = alpha * make_vec3f(volumeColor.w); + } } #endif @@ -89,9 +91,9 @@ SYCL_EXTERNAL vec3f lightAlpha(const uniform SciVis *uniform self, if (rayHadHit) { // Prepare differential geometry structure DifferentialGeometry dg; - postIntersect(world, &self->super, dg, ray, DG_COLOR | DG_TEXCOORD); + postIntersect(world, &self->super, dg, ray, DG_COLOR | DG_TEXCOORD, ff); - const SciVisBSDF bsdf = evalMaterial(dg); + const SciVisBSDF bsdf = evalMaterial(dg, ff.other); alpha = alpha * bsdf.transmission; // Prepare ray for next loop iteration, diff --git a/modules/cpu/render/scivis/surfaces.ih b/modules/cpu/render/scivis/surfaces.ih index 6b8866a66..26adebf43 100644 --- a/modules/cpu/render/scivis/surfaces.ih +++ b/modules/cpu/render/scivis/surfaces.ih @@ -4,8 +4,8 @@ #pragma once #include "SciVis.ih" +#include "render/Material.ih" #include "render/materials/OBJ.ih" -#include "texture/TextureParam.ih" // c++ shared #include "render/materials/OBJShared.h" @@ -27,10 +27,13 @@ SYCL_EXTERNAL SSI SciVis_computeShading(const SciVis *uniform self, const DifferentialGeometry &dg, ScreenSample &sample, varying LDSampler *uniform ldSampler, - const varying vec3f &inDir); + const varying vec3f &inDir, + const uniform FeatureFlags &ff); -SYCL_EXTERNAL vec3f evaluateLights( - const World *uniform world, const DifferentialGeometry &dg, const Ray &ray); +SYCL_EXTERNAL vec3f evaluateLights(const World *uniform world, + const DifferentialGeometry &dg, + const Ray &ray, + const uniform FeatureFlags &ff); struct SciVisBSDF { @@ -42,7 +45,8 @@ struct SciVisBSDF float opacity; }; -inline SciVisBSDF evalMaterial(const DifferentialGeometry &dg) +inline SciVisBSDF evalMaterial( + const DifferentialGeometry &dg, const uniform FeatureFlagsOther ffo) { const OBJ *mat = (const OBJ *)dg.material; @@ -61,16 +65,16 @@ inline SciVisBSDF evalMaterial(const DifferentialGeometry &dg) foreach_unique (m in mat) { #endif if (m != NULL && m->super.type == MATERIAL_TYPE_OBJ) { - float d = m->d * get1f(m->dMap, dg, 1.f) * dg.color.w; + float d = m->d * get1f_ff(m->dMap, dg, 1.f, ffo) * dg.color.w; res.diffuse = res.diffuse * m->Kd; - if (valid(m->KdMap)) { + if (valid(m->KdMap) && (ffo & FFO_TEXTURE_IN_MATERIAL)) { vec4f Kd_from_map = get4f(m->KdMap, dg); res.diffuse = res.diffuse * make_vec3f(Kd_from_map); d *= Kd_from_map.w; } res.diffuse = res.diffuse * d; - res.specular = d * m->Ks * get3f(m->KsMap, dg, make_vec3f(1.f)); - res.shininess = m->Ns * get1f(m->NsMap, dg, 1.0f); + res.specular = d * m->Ks * get3f_ff(m->KsMap, dg, make_vec3f(1.f), ffo); + res.shininess = m->Ns * get1f_ff(m->NsMap, dg, 1.f, ffo); res.transmission = m->Tf * d + make_vec3f(1.f - d); res.opacity = d; } diff --git a/modules/cpu/render/scivis/surfaces.ispc b/modules/cpu/render/scivis/surfaces.ispc index 0b7223cd6..6953d14d6 100644 --- a/modules/cpu/render/scivis/surfaces.ispc +++ b/modules/cpu/render/scivis/surfaces.ispc @@ -1,11 +1,11 @@ // Copyright 2009 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +#include "lights/LightDispatch.ih" #include "math/random.ih" #include "math/sampling.ih" #include "render/util.ih" #include "surfaces.ih" -#include "lights/LightDispatch.ih" // c++ shared #include "SciVisDataShared.h" #include "SciVisShared.h" @@ -20,7 +20,8 @@ vec3f directIllumination(const uniform SciVis *uniform self, ScreenSample &sample, varying LDSampler *uniform ldSampler, const varying SciVisBSDF &bsdf, - const varying vec3f &inDir) + const varying vec3f &inDir, + const uniform FeatureFlags &ff) { vec3f color = make_vec3f(0.f); @@ -37,7 +38,7 @@ vec3f directIllumination(const uniform SciVis *uniform self, const vec2f s = make_vec2f(0.0f); // sample center of area lights const Light_SampleRes light = - Light_dispatch_sample(l, dg, s, sample.ray.time); + Light_dispatch_sample(l, dg, s, sample.ray.time, ff.other); if (reduce_max(light.weight) > 0.f) { // any potential contribution? const float cosNL = dot(light.dir, dg.Ns); @@ -67,7 +68,8 @@ vec3f directIllumination(const uniform SciVis *uniform self, ldSampler, light_contrib, dg.epsilon, - 0.25f); + 0.25f, + ff); color = color + light_alpha * light_contrib; } @@ -86,15 +88,16 @@ SYCL_EXTERNAL SSI SciVis_computeShading(const SciVis *uniform self, const DifferentialGeometry &dg, ScreenSample &sample, varying LDSampler *uniform ldSampler, - const varying vec3f &inDir) + const varying vec3f &inDir, + const uniform FeatureFlags &ff) { SSI retval; - const SciVisBSDF bsdf = evalMaterial(dg); + const SciVisBSDF bsdf = evalMaterial(dg, ff.other); retval.albedo = bsdf.albedo; - vec3f color = - directIllumination(self, fb, world, dg, sample, ldSampler, bsdf, inDir); + vec3f color = directIllumination( + self, fb, world, dg, sample, ldSampler, bsdf, inDir, ff); vec3f ao = make_vec3f(1.f); const uniform SciVisData &scivisData = @@ -109,7 +112,8 @@ SYCL_EXTERNAL SSI SciVis_computeShading(const SciVis *uniform self, dg, self->aoSamples, self->aoRadius, - sample.sampleID); + sample.sampleID, + ff); color = color + bsdf.diffuse * ao * scivisData.aoColorPi; @@ -120,8 +124,10 @@ SYCL_EXTERNAL SSI SciVis_computeShading(const SciVis *uniform self, return retval; } -SYCL_EXTERNAL vec3f evaluateLights( - const World *uniform world, const DifferentialGeometry &dg, const Ray &ray) +SYCL_EXTERNAL vec3f evaluateLights(const World *uniform world, + const DifferentialGeometry &dg, + const Ray &ray, + const uniform FeatureFlags &ff) { // Iterate through all lights vec3f color = make_vec3f(0.f); @@ -135,7 +141,7 @@ SYCL_EXTERNAL vec3f evaluateLights( // Evaluate light contribution const Light_EvalRes le = - Light_dispatch_eval(l, dg, ray.dir, ray.t0, ray.t, ray.time); + Light_dispatch_eval(l, dg, ray.dir, ray.t0, ray.t, ray.time, ff.other); color = color + le.radiance; } return color; diff --git a/modules/cpu/render/scivis/volumes.ih b/modules/cpu/render/scivis/volumes.ih index 222616ef9..8c3563bd9 100644 --- a/modules/cpu/render/scivis/volumes.ih +++ b/modules/cpu/render/scivis/volumes.ih @@ -18,7 +18,8 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervalsGradient(SciVisRenderContext &rc, Ray &ray, varying LDSampler *uniform ldSampler, const uniform float samplingRate, - const uniform bool shade); + const uniform bool shade, + const uniform FeatureFlags &ff); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/scivis/volumes.ispc b/modules/cpu/render/scivis/volumes.ispc index 0fdd7874c..b22d472f3 100644 --- a/modules/cpu/render/scivis/volumes.ispc +++ b/modules/cpu/render/scivis/volumes.ispc @@ -35,7 +35,8 @@ static void sampleVolume(SciVisRenderContext &rc, Ray &ray, const VolumeInterval &vi, const uniform float samplingRate, - const uniform bool shade) + const uniform bool shade, + const uniform FeatureFlags &ff) { // We have to iterate till we get a valid sample value float dt; @@ -117,8 +118,14 @@ static void sampleVolume(SciVisRenderContext &rc, dg.Ns = dg.Ng = normalize(xfmVector(transposed(vi.instance->rcp_xfm.l), ns)); dg.P = ray.org + vc.distance * ray.dir; - SSI shading = SciVis_computeShading( - rc.renderer, rc.fb, rc.world, dg, rc.sample, rc.ldSampler, ray.dir); + SSI shading = SciVis_computeShading(rc.renderer, + rc.fb, + rc.world, + dg, + rc.sample, + rc.ldSampler, + ray.dir, + ff); vec4f shadedColor = make_vec4f( shading.shadedColor, 1.f - luminance(shading.transmission)); vc.sample = lerp(gsc, vc.sample, shadedColor); @@ -135,7 +142,8 @@ static float sampleAllVolumes(SciVisRenderContext &rc, Ray &ray, const uniform float samplingRate, vec4f &sampledColor, - const uniform bool shade) + const uniform bool shade, + const uniform FeatureFlags &ff) { // Look for the closest sample across all volumes float minDist = inf; @@ -155,7 +163,7 @@ static float sampleAllVolumes(SciVisRenderContext &rc, #else foreach_unique (m in vi.volumetricModel) { #endif - sampleVolume(rc, vc, m, ray, vi, samplingRate, shade); + sampleVolume(rc, vc, m, ray, vi, samplingRate, shade, ff); } vc.ready = 1; } @@ -190,7 +198,8 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervalsGradient(SciVisRenderContext &rc, Ray &ray, varying LDSampler *uniform ldSampler, const uniform float samplingRate, - const uniform bool shade) + const uniform bool shade, + const uniform FeatureFlags &ff) { #ifdef OSPRAY_TARGET_SYCL // Only a single volume context is supported on the GPU, no dynamic allocation @@ -306,7 +315,8 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervalsGradient(SciVisRenderContext &rc, ray, samplingRate, sampledColor, - shade); + shade, + ff); // Exit loop if nothing sampled if (dist == inf) diff --git a/modules/cpu/render/util.ih b/modules/cpu/render/util.ih index 4df470995..3ab1fe677 100644 --- a/modules/cpu/render/util.ih +++ b/modules/cpu/render/util.ih @@ -14,7 +14,8 @@ SYCL_EXTERNAL float computeAO(const Renderer *uniform, const varying DifferentialGeometry &, const uniform int sampleCnt, const uniform float aoRadius, - const varying vec3i &sampleID); + const varying vec3i &sampleID, + const uniform FeatureFlags &ff); // struct that stores a precomputed z-order for tiles of TILE_SIZE x TILE_SIZE // pixels diff --git a/modules/cpu/render/util.ispc b/modules/cpu/render/util.ispc index aaee03f0c..2bf494e3b 100644 --- a/modules/cpu/render/util.ispc +++ b/modules/cpu/render/util.ispc @@ -28,7 +28,8 @@ SYCL_EXTERNAL float computeAO(const Renderer *uniform renderer, const varying DifferentialGeometry &dg, const uniform int sampleCnt, const uniform float aoRadius, - const varying vec3i &sampleID) + const varying vec3i &sampleID, + const uniform FeatureFlags &ff) { const uniform int accumID = reduce_max(sampleID.z) * sampleCnt; @@ -58,7 +59,7 @@ SYCL_EXTERNAL float computeAO(const Renderer *uniform renderer, Ray ao_ray; setRay(ao_ray, dg.P, ao_dir, dg.epsilon, aoRadius); - if (isOccluded(world, ao_ray)) + if (isOccluded(world, ao_ray, ff)) hits++; } diff --git a/modules/cpu/texture/Texture2D.ih b/modules/cpu/texture/Texture2D.ih index c89de8bdb..c045f273b 100644 --- a/modules/cpu/texture/Texture2D.ih +++ b/modules/cpu/texture/Texture2D.ih @@ -9,10 +9,10 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -SYCL_EXTERNAL __noinline vec4f Texture2D_get( +SYCL_EXTERNAL vec4f Texture2D_get( const uniform Texture *uniform self, const DifferentialGeometry &dg); -SYCL_EXTERNAL __noinline vec3f Texture2D_getNormal( +SYCL_EXTERNAL vec3f Texture2D_getNormal( const uniform Texture *uniform self, const DifferentialGeometry &dg); // XXX won't work with MIPmapping: clean implementation with clamping on integer diff --git a/modules/cpu/texture/Texture2D.ispc b/modules/cpu/texture/Texture2D.ispc index 4e591a8d9..0262dbea7 100644 --- a/modules/cpu/texture/Texture2D.ispc +++ b/modules/cpu/texture/Texture2D.ispc @@ -284,7 +284,7 @@ export void *uniform Texture2D_get_addr( return NULL; }; -SYCL_EXTERNAL __noinline vec4f Texture2D_get( +SYCL_EXTERNAL vec4f Texture2D_get( const uniform Texture *uniform _self, const DifferentialGeometry &dg) { const uniform Texture2D *uniform self = @@ -342,7 +342,7 @@ export void *uniform Texture2D_getN_addr( return (void *uniform) & Texture2D_Normal_neutral; }; -SYCL_EXTERNAL __noinline vec3f Texture2D_getNormal( +SYCL_EXTERNAL vec3f Texture2D_getNormal( const uniform Texture *uniform _self, const DifferentialGeometry &dg) { const uniform Texture2D *uniform self = diff --git a/modules/cpu/texture/TextureDispatch.ih b/modules/cpu/texture/TextureDispatch.ih index 05b715c5e..2b5b34041 100644 --- a/modules/cpu/texture/TextureDispatch.ih +++ b/modules/cpu/texture/TextureDispatch.ih @@ -10,10 +10,10 @@ OSPRAY_BEGIN_ISPC_NAMESPACE struct Texture; struct DifferentialGeometry; -SYCL_EXTERNAL __noinline vec4f Texture_dispatch_get( +SYCL_EXTERNAL vec4f Texture_dispatch_get( const uniform Texture *uniform self, const DifferentialGeometry &dg); -SYCL_EXTERNAL __noinline vec3f Texture_dispatch_getNormal( +SYCL_EXTERNAL vec3f Texture_dispatch_getNormal( const uniform Texture *const uniform self, const DifferentialGeometry &dg); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/texture/TextureDispatch.ispc b/modules/cpu/texture/TextureDispatch.ispc index 8c35c1270..b9833eea6 100644 --- a/modules/cpu/texture/TextureDispatch.ispc +++ b/modules/cpu/texture/TextureDispatch.ispc @@ -8,7 +8,7 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -SYCL_EXTERNAL __noinline vec4f Texture_dispatch_get( +SYCL_EXTERNAL vec4f Texture_dispatch_get( const uniform Texture *uniform self, const DifferentialGeometry &dg) { switch (self->type) { @@ -28,7 +28,7 @@ SYCL_EXTERNAL __noinline vec4f Texture_dispatch_get( return make_vec4f(0.f); } -SYCL_EXTERNAL __noinline vec3f Texture_dispatch_getNormal( +SYCL_EXTERNAL vec3f Texture_dispatch_getNormal( const uniform Texture *const uniform self, const DifferentialGeometry &dg) { switch (self->type) { diff --git a/modules/cpu/texture/TextureParam.ih b/modules/cpu/texture/TextureParam.ih index a48875d67..b740ab3df 100644 --- a/modules/cpu/texture/TextureParam.ih +++ b/modules/cpu/texture/TextureParam.ih @@ -4,8 +4,9 @@ #pragma once #include "Texture.ih" -#include "TextureParamShared.h" #include "rkcommon/math/AffineSpace.ih" +// c++ shared +#include "TextureParamShared.h" OSPRAY_BEGIN_ISPC_NAMESPACE diff --git a/modules/cpu/volume/Volume.cpp b/modules/cpu/volume/Volume.cpp index acf5bfade..ecfcbad25 100644 --- a/modules/cpu/volume/Volume.cpp +++ b/modules/cpu/volume/Volume.cpp @@ -27,7 +27,9 @@ namespace ospray { // Volume definitions //////////////////////////////////////////////////////// Volume::Volume(api::ISPCDevice &device, const std::string &type) - : AddStructShared(device.getIspcrtDevice(), device), vklType(type) + : AddStructShared(device.getIspcrtDevice(), device), + vklType(type), + featureFlags(FFV_VOLUME) { // check VKL has default config for VDB if (type == "vdb" diff --git a/modules/cpu/volume/Volume.h b/modules/cpu/volume/Volume.h index d909cd135..8fa8afdd8 100644 --- a/modules/cpu/volume/Volume.h +++ b/modules/cpu/volume/Volume.h @@ -6,6 +6,7 @@ #include "ISPCDeviceObject.h" #include "common/StructShared.h" +#include "common/FeatureFlagsEnum.h" // embree #include "common/Embree.h" // openvkl @@ -28,6 +29,8 @@ struct OSPRAY_SDK_INTERFACE Volume std::string toString() const override; void commit() override; + FeatureFlagsVolume getFeatureFlagsVolume() const; + private: void checkDataStride(const Data *) const; void handleParams(); @@ -45,9 +48,16 @@ struct OSPRAY_SDK_INTERFACE Volume box3f bounds{empty}; std::string vklType; + + FeatureFlagsVolume featureFlags; }; OSPTYPEFOR_SPECIALIZATION(Volume *, OSP_VOLUME); +inline FeatureFlagsVolume Volume::getFeatureFlagsVolume() const +{ + return featureFlags; +} + } // namespace ospray #endif diff --git a/modules/cpu/volume/Volume.ispc b/modules/cpu/volume/Volume.ispc index cd4202f45..59bb2479e 100644 --- a/modules/cpu/volume/Volume.ispc +++ b/modules/cpu/volume/Volume.ispc @@ -41,8 +41,8 @@ RTC_SYCL_INDIRECTLY_CALLABLE unmasked void Volume_intersect_kernel( ray->geomID = args->geomID; ray->primID = 0; - EmbreeRayQueryContextVolume *uniform ctx = - (EmbreeRayQueryContextVolume * uniform) args->context; + RayQueryContextVolume *uniform ctx = + (RayQueryContextVolume * uniform) args->context; varying VolumeIntervals *uniform vIntervals = (varying VolumeIntervals * uniform) ctx->intervals; diff --git a/modules/cpu/volume/VolumetricModel.h b/modules/cpu/volume/VolumetricModel.h index 1c174bbc1..38b7f3db3 100644 --- a/modules/cpu/volume/VolumetricModel.h +++ b/modules/cpu/volume/VolumetricModel.h @@ -5,6 +5,7 @@ #pragma once #include "Volume.h" +#include "common/FeatureFlagsEnum.h" #include "openvkl/openvkl.h" // comment break to prevent clang-format from reordering openvkl includes #if OPENVKL_VERSION_MAJOR > 1 @@ -30,6 +31,10 @@ struct OSPRAY_SDK_INTERFACE VolumetricModel Ref getVolume() const; + FeatureFlagsGeometry getFeatureFlagsGeometry() const; + FeatureFlagsVolume getFeatureFlagsVolume() const; + FeatureFlagsOther getFeatureFlagsOther() const; + private: box3f volumeBounds; Ref volume; @@ -39,5 +44,20 @@ struct OSPRAY_SDK_INTERFACE VolumetricModel OSPTYPEFOR_SPECIALIZATION(VolumetricModel *, OSP_VOLUMETRIC_MODEL); +inline FeatureFlagsGeometry VolumetricModel::getFeatureFlagsGeometry() const +{ + return FFG_NONE; +} + +inline FeatureFlagsVolume VolumetricModel::getFeatureFlagsVolume() const +{ + return volume->getFeatureFlagsVolume(); +} + +inline FeatureFlagsOther VolumetricModel::getFeatureFlagsOther() const +{ + return FFO_NONE; +} + } // namespace ospray #endif diff --git a/modules/mpi/ospray/CMakeLists.txt b/modules/mpi/ospray/CMakeLists.txt index 04b0d3c92..f110e94bf 100644 --- a/modules/mpi/ospray/CMakeLists.txt +++ b/modules/mpi/ospray/CMakeLists.txt @@ -175,6 +175,7 @@ if (OSPRAY_MODULE_GPU) Snappy::snappy rkcommon::rkcommon ospray_mpi_common + ospray_module_gpu_kernels ospray_module_gpu ) @@ -193,12 +194,6 @@ if (OSPRAY_MODULE_GPU) -x c++ ) - target_compile_options(ospray_module_mpi_distributed_gpu PRIVATE - -fsycl - ${OSPRAY_CXX_FLAGS_SYCL} - -fsycl-targets=${OSPRAY_SYCL_TARGET} - ) - # Note: GPU hybrid code defines are brought in for us by linking the CPU module target_compile_definitions(ospray_module_mpi_distributed_gpu PRIVATE OSP_MPI_COMPRESSION_THRESHOLD=${OSPRAY_MPI_COMPRESSION_THRESHOLD} @@ -208,21 +203,6 @@ if (OSPRAY_MODULE_GPU) OBJECTFACTORY_IMPORT ) - if (OSPRAY_SYCL_AOT_DEVICES STREQUAL "none") - target_link_options(ospray_module_mpi_distributed_gpu PRIVATE - -fsycl - -fsycl-targets=${OSPRAY_SYCL_TARGET} - -Xsycl-target-backend=${OSPRAY_SYCL_TARGET} "${OSPRAY_OCL_OPTIONS_STR} -options \"${OSPRAY_OCL_OTHER_OPTIONS_STR} -igc_opts='${OSPRAY_IGC_OPTIONS_STR}'\"" - ) - else() - target_link_options(ospray_module_mpi_distributed_gpu PRIVATE - -fsycl - -fsycl-targets=${OSPRAY_SYCL_TARGET} - -Xsycl-target-backend=${OSPRAY_SYCL_TARGET} "-device ${OSPRAY_SYCL_AOT_DEVICES} -revision_id ${OSPRAY_SYCL_AOT_DEVICE_REVISION} ${OSPRAY_OCL_OPTIONS_STR} -options \"${OSPRAY_OCL_OTHER_OPTIONS_STR} -igc_opts='${OSPRAY_IGC_OPTIONS_STR}'\"" - ) - endif() - - ospray_install_library(ospray_module_mpi_distributed_gpu lib) endif() diff --git a/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp b/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp index 8fd6f7148..946ebd451 100644 --- a/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp +++ b/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp @@ -63,7 +63,7 @@ DFB::DistributedFrameBuffer(api::ISPCDevice &device, // be set from the object handle but pulled from some other ID pool // specific to those objects using the messaging layer : MessageHandler(myId), - FrameBuffer(device, numPixels, colorBufferFormat, channels), + FrameBuffer(device, numPixels, colorBufferFormat, channels, FFO_NONE), mpiGroup(mpicommon::worker.dup()), totalTiles(divRoundUp(size, vec2i(TILE_SIZE))), numRenderTasks((totalTiles * TILE_SIZE) / getRenderTaskSize()), diff --git a/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp b/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp index af6aa13f2..5b054159c 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp +++ b/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp @@ -30,8 +30,10 @@ SYCL_EXTERNAL void DistributedRaycast_renderRegionToTileTask(void *_self, const void *_region, void *perFrameData, const void *_taskIDs, - const int taskIndex0); + const int taskIndex0, + const uniform FeatureFlags &ff); } +constexpr sycl::specialization_id specFeatureFlags; #endif namespace ospray { @@ -118,20 +120,30 @@ void DistributedRaycastRenderer::renderRegionTasks(SparseFrameBuffer *fb, const size_t numTasks = taskIDs.size(); auto event = syclQueue.submit([&](sycl::handler &cgh) { + FeatureFlags ff = world->getFeatureFlags(); + ff.other |= featureFlags; + ff.other |= fb->getFeatureFlagsOther(); + ff.other |= camera->getFeatureFlagsOther(); + cgh.set_specialization_constant(ff); + const sycl::nd_range<1> dispatchRange = computeDispatchRange(numTasks, 16); - cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex) { - const box3f regionCopy = region; - if (taskIndex.get_global_id(0) < numTasks) { - ispc::DistributedRaycast_renderRegionToTileTask(&rendererSh->super, - fbSh, - cameraSh, - worldSh, - (ispc::box3f *)®ionCopy, - perFrameData, - taskIDsPtr, - taskIndex.get_global_id(0)); - } - }); + cgh.parallel_for(dispatchRange, + [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { + const box3f regionCopy = region; + if (taskIndex.get_global_id(0) < numTasks) { + const FeatureFlags ff = + kh.get_specialization_constant(); + ispc::DistributedRaycast_renderRegionToTileTask(&rendererSh->super, + fbSh, + cameraSh, + worldSh, + (ispc::box3f *)®ionCopy, + perFrameData, + taskIDsPtr, + taskIndex.get_global_id(0), + ff); + } + }); }); event.wait_and_throw(); // For prints we have to flush the entire queue, because other stuff is queued diff --git a/modules/mpi/ospray/render/distributed/DistributedRaycast.ispc b/modules/mpi/ospray/render/distributed/DistributedRaycast.ispc index e20b70846..a7a353e31 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRaycast.ispc +++ b/modules/mpi/ospray/render/distributed/DistributedRaycast.ispc @@ -139,7 +139,8 @@ inline float computeAO(const DistributedRaycastRenderer *uniform self, const FrameBuffer *uniform fb, const World *uniform world, const varying vec3i &sampleID, - const varying DifferentialGeometry &dg) + const varying DifferentialGeometry &dg, + const uniform FeatureFlags &ff) { const uniform int &sampleCnt = self->aoSamples; const uniform int accumID = reduce_max(sampleID.z) * sampleCnt; @@ -168,7 +169,7 @@ inline float computeAO(const DistributedRaycastRenderer *uniform self, Ray ao_ray; setRay(ao_ray, dg.P, ao_dir, dg.epsilon, self->aoRadius); - if (isOccluded(world, ao_ray)) + if (isOccluded(world, ao_ray, ff)) hits++; } @@ -182,7 +183,8 @@ vec4f DRR_shadeSurface(const DistributedRaycastRenderer *uniform self, const DistributedWorld *uniform world, const vec3i &sampleID, const Ray &ray, - const DifferentialGeometry &dg) + const DifferentialGeometry &dg, + const uniform FeatureFlags &ff) { // TODO: DRR should have its own support for OBJ material and lighting model @@ -209,7 +211,7 @@ vec4f DRR_shadeSurface(const DistributedRaycastRenderer *uniform self, const float eyeLightIntensity = absf(dot(dg.Ns, ray.dir)); vec3f color = surfaceColor * eyeLightIntensity; if (self->aoSamples > 0) { - float ao = computeAO(self, fb, &world->super, sampleID, dg); + float ao = computeAO(self, fb, &world->super, sampleID, dg, ff); color = color * ao; } return make_vec4f(color, opacity); @@ -261,13 +263,15 @@ vec4f DRR_shadeSurface(const DistributedRaycastRenderer *uniform self, #endif } +// TODO: Better separate geometry and volume code in this function SYCL_EXTERNAL void DRR_renderRegionSample(Renderer *uniform _self, SparseFB *uniform fb, DistributedWorld *uniform world, const box3f *uniform region, const vec2f ®ionInterval, void *uniform perFrameData, - varying ScreenSample &sample) + varying ScreenSample &sample, + const uniform FeatureFlags &ff) { DistributedRaycastRenderer *uniform self = (DistributedRaycastRenderer * uniform) _self; @@ -275,19 +279,22 @@ SYCL_EXTERNAL void DRR_renderRegionSample(Renderer *uniform _self, Ray &geomRay = sample.ray; Ray volumeRay = sample.ray; - traceRay(&world->super, geomRay); + traceRay(&world->super, geomRay, ff); #ifdef OSPRAY_ENABLE_VOLUMES VolumeInterval volumeInterval; - traceVolumeRay(&world->super, volumeRay, volumeInterval); - volumeInterval.interval.lower = - max(volumeInterval.interval.lower, regionInterval.x); - volumeInterval.interval.upper = - min(volumeInterval.interval.upper, regionInterval.y); + if (ff.volume & FFV_VOLUME) { + traceVolumeRay(&world->super, volumeRay, volumeInterval); + volumeInterval.interval.lower = + max(volumeInterval.interval.lower, regionInterval.x); + volumeInterval.interval.upper = + min(volumeInterval.interval.upper, regionInterval.y); - sample.z = min(geomRay.t, volumeInterval.interval.lower); -#else - sample.z = geomRay.t; + sample.z = min(geomRay.t, volumeInterval.interval.lower); + } else #endif + { + sample.z = geomRay.t; + } // TODO: Doesn't seem like much jittering happens with the volume integration vec4f outputColor = make_vec4f(0.f); @@ -296,12 +303,15 @@ SYCL_EXTERNAL void DRR_renderRegionSample(Renderer *uniform _self, && geomRay.t >= regionInterval.x && geomRay.t <= regionInterval.y; #ifdef OSPRAY_ENABLE_VOLUMES - const bool haveVolumeHit = hasInterval(volumeInterval); + const bool haveVolumeHit = + (ff.volume & FFV_VOLUME) ? hasInterval(volumeInterval) : false; const bool bothHit = haveGeometryHit && haveVolumeHit; const bool eitherHit = haveGeometryHit || haveVolumeHit; - const bool volumeFirst = volumeInterval.interval.lower < geomRay.t; + const bool volumeFirst = (ff.volume & FFV_VOLUME) + ? volumeInterval.interval.lower < geomRay.t + : false; #else const bool bothHit = false; const bool eitherHit = haveGeometryHit; @@ -320,14 +330,15 @@ SYCL_EXTERNAL void DRR_renderRegionSample(Renderer *uniform _self, dg, geomRay, DG_NG | DG_NS | DG_NORMALIZE | DG_FACEFORWARD | DG_COLOR - | DG_TEXCOORD); + | DG_TEXCOORD, + ff); surfaceColor = DRR_shadeSurface( - self, &fb->super, world, sample.sampleID, geomRay, dg); + self, &fb->super, world, sample.sampleID, geomRay, dg, ff); } #ifdef OSPRAY_ENABLE_VOLUMES // Always just integrate the volume when it comes in front of the geometry - if (haveVolumeHit && volumeFirst) { + if (haveVolumeHit && volumeFirst && (ff.volume & FFV_VOLUME)) { volumeInterval.interval.upper = min(geomRay.t, volumeInterval.interval.upper); @@ -389,9 +400,10 @@ SYCL_EXTERNAL void DRR_renderRegionSample(Renderer *uniform _self, // Step the volume ray forwards as well volumeRay = geomRay; - traceRay(&world->super, geomRay); + traceRay(&world->super, geomRay, ff); #ifdef OSPRAY_ENABLE_VOLUMES - traceVolumeRay(&world->super, volumeRay, volumeInterval); + if (ff.volume & FFV_VOLUME) + traceVolumeRay(&world->super, volumeRay, volumeInterval); #endif } sample.rgb = make_vec3f(outputColor); @@ -413,7 +425,8 @@ SYCL_EXTERNAL void DistributedRaycast_renderRegionToTileTask( const void *uniform _region, void *uniform perFrameData, const void *uniform _taskIDs, - const int taskIndex0) + const int taskIndex0, + const uniform FeatureFlags &ff) { Renderer *uniform self = (Renderer * uniform) _self; SparseFB *uniform fb = (SparseFB * uniform) _fb; @@ -422,7 +435,7 @@ SYCL_EXTERNAL void DistributedRaycast_renderRegionToTileTask( const box3f *uniform region = (const box3f *uniform)_region; const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; DR_default_renderRegionToTile( - self, fb, camera, world, region, perFrameData, taskIDs, taskIndex0); + self, fb, camera, world, region, perFrameData, taskIDs, taskIndex0, ff); } #else export void DistributedRaycast_renderRegionToTileTask(void *uniform _self, @@ -441,7 +454,7 @@ export void DistributedRaycast_renderRegionToTileTask(void *uniform _self, const box3f *uniform region = (const box3f *uniform)_region; const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; launch[numTasks] DR_default_renderRegionToTile( - self, fb, camera, world, region, perFrameData, taskIDs); + self, fb, camera, world, region, perFrameData, taskIDs, ffAll()); sync; } #endif diff --git a/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp b/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp index e707eb721..5cd299d4e 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp +++ b/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp @@ -17,8 +17,10 @@ SYCL_EXTERNAL void DR_default_computeRegionVisibility(Renderer *uniform self, uint8 *uniform regionVisible, void *uniform perFrameData, const uint32 *uniform taskIDs, - const int taskIndex0); + const int taskIndex0, + const uniform ospray::FeatureFlags &ff); } +constexpr sycl::specialization_id specFeatureFlags; #endif namespace ospray { @@ -64,19 +66,29 @@ void DistributedRenderer::computeRegionVisibility(SparseFrameBuffer *fb, const size_t numTasks = taskIDs.size(); auto event = syclQueue.submit([&](sycl::handler &cgh) { + FeatureFlags ff = world->getFeatureFlags(); + ff.other |= featureFlags; + ff.other |= fb->getFeatureFlagsOther(); + ff.other |= camera->getFeatureFlagsOther(); + cgh.set_specialization_constant(ff); + const sycl::nd_range<1> dispatchRange = computeDispatchRange(numTasks, 16); - cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex) { - if (taskIndex.get_global_id(0) < numTasks) { - ispc::DR_default_computeRegionVisibility(rendererSh, - fbSh, - cameraSh, - worldSh, - regionVisible, - perFrameData, - taskIDsPtr, - taskIndex.get_global_id(0)); - } - }); + cgh.parallel_for(dispatchRange, + [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { + if (taskIndex.get_global_id(0) < numTasks) { + const FeatureFlags ff = + kh.get_specialization_constant(); + ispc::DR_default_computeRegionVisibility(rendererSh, + fbSh, + cameraSh, + worldSh, + regionVisible, + perFrameData, + taskIDsPtr, + taskIndex.get_global_id(0), + ff); + } + }); }); event.wait_and_throw(); // For prints we have to flush the entire queue, because other stuff is queued diff --git a/modules/mpi/ospray/render/distributed/DistributedRenderer.ispc b/modules/mpi/ospray/render/distributed/DistributedRenderer.ispc index 7a26796b6..9290da0ea 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRenderer.ispc +++ b/modules/mpi/ospray/render/distributed/DistributedRenderer.ispc @@ -12,6 +12,7 @@ #include "common/FilterIntersect.ih" #include "common/Intersect.ih" #include "common/Ray.ih" +#include "common/RayQueryContext.ih" #include "common/World.ih" #include "fb/RenderTaskDesc.ih" #include "fb/SparseFB.ih" @@ -22,15 +23,15 @@ #include "rkcommon/utility/random.ih" #include "texture/Texture2D.ih" // c++ shared -#include "render/RendererShared.h" #include "common/DistributedWorldShared.h" #include "fb/FrameBufferDispatch.ih" #include "fb/FrameBufferShared.h" #include "fb/SparseFBShared.h" +#include "render/RendererShared.h" OSPRAY_BEGIN_ISPC_NAMESPACE -struct EmbreeRayQueryContextRegion +struct RayQueryContextRegion { RTCRayQueryContext ectx; uint8 *regionVisible; @@ -60,8 +61,8 @@ RTC_SYCL_INDIRECTLY_CALLABLE unmasked void Regions_intersect_kernel( const Intersections isect = intersectBox(ray->org, ray->dir, box); if ((isect.entry.hit && isect.entry.t > ray->t0 && isect.entry.t <= ray->t) || (isect.exit.hit && isect.exit.t > ray->t0 && isect.exit.t <= ray->t)) { - EmbreeRayQueryContextRegion *uniform ctx = - (EmbreeRayQueryContextRegion * uniform) args->context; + RayQueryContextRegion *uniform ctx = + (RayQueryContextRegion * uniform) args->context; uint8 *uniform regionVisible = (uint8 * uniform) ctx->regionVisible; regionVisible[primID] = 0xff; @@ -78,12 +79,11 @@ task DistributedWorld *uniform world, uint8 *uniform regionVisible, void *uniform perFrameData, - const uint32 *uniform taskIDs + const uint32 *uniform taskIDs, #ifdef OSPRAY_TARGET_SYCL - , - const int taskIndex0 + const int taskIndex0, #endif - ) + const uniform FeatureFlags &ff) { ScreenSample screenSample; screenSample.z = inf; @@ -91,8 +91,8 @@ task CameraSample cameraSample; - uniform RenderTaskDesc taskDesc = - FrameBuffer_dispatch_getRenderTaskDesc(&fb->super, taskIDs[taskIndex0]); + uniform RenderTaskDesc taskDesc = FrameBuffer_dispatch_getRenderTaskDesc( + &fb->super, taskIDs[taskIndex0], ff.other); if (fb->super.cancelRender || isEmpty(taskDesc.region)) { return; @@ -154,11 +154,12 @@ task cameraSample.lens.y = 0.0f; cameraSample.time = 0.5f; - Camera_dispatch_initRay(camera, screenSample.ray, cameraSample); + Camera_dispatch_initRay( + camera, screenSample.ray, cameraSample, ff.other); screenSample.ray.t = min(screenSample.ray.t, tMax); if (world->regionScene) { - uniform EmbreeRayQueryContextRegion context; + uniform RayQueryContextRegion context; rtcInitRayQueryContext(&context.ectx); context.regionVisible = tileRegionVisible; @@ -199,7 +200,8 @@ export void DistributedRenderer_computeRegionVisibility(void *uniform _self, (DistributedWorld * uniform) world, regionVisible, perFrameData, - taskIDs); + taskIDs, + ffAll()); } export void DistributedRenderer_pick(const void *uniform _self, @@ -229,8 +231,8 @@ export void DistributedRenderer_pick(const void *uniform _self, cameraSample.time = 0.5f; Ray ray; - Camera_dispatch_initRay(camera, ray, cameraSample); - ray.t = min(ray.t, Renderer_getMaxDepth(self, cameraSample.screen)); + Camera_dispatch_initRay(camera, ray, cameraSample, FFO_ALL); + ray.t = min(ray.t, Renderer_getMaxDepth(self, cameraSample.screen, FFO_ALL)); // Clip the ray to each region this rank owns and trace the clipped ray to // find the picked object @@ -247,7 +249,7 @@ export void DistributedRenderer_pick(const void *uniform _self, && isect.entry.t <= regionRay.t) { regionRay.t0 = isect.entry.t; regionRay.t = min(regionRay.t, isect.exit.t); - traceRay(&world->super, regionRay); + traceRay(&world->super, regionRay, ffAll()); if (hadHit(regionRay)) { closestHit = regionRay.t; diff --git a/modules/mpi/ospray/render/distributed/DistributedRendererRenderTaskFn.inl b/modules/mpi/ospray/render/distributed/DistributedRendererRenderTaskFn.inl index 84bf2896e..0c5c0c5dc 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRendererRenderTaskFn.inl +++ b/modules/mpi/ospray/render/distributed/DistributedRendererRenderTaskFn.inl @@ -11,12 +11,11 @@ task DistributedWorld *uniform world, const box3f *uniform region, void *uniform perFrameData, - const uint32 *uniform taskIDs + const uint32 *uniform taskIDs, #ifdef OSPRAY_TARGET_SYCL - , - const int taskIndex0 + const int taskIndex0, #endif - ) + const uniform FeatureFlags &ff) { const uniform int32 spp = self->spp; @@ -26,8 +25,8 @@ task CameraSample cameraSample; - uniform RenderTaskDesc taskDesc = - FrameBuffer_dispatch_getRenderTaskDesc(&fb->super, taskIDs[taskIndex0]); + uniform RenderTaskDesc taskDesc = FrameBuffer_dispatch_getRenderTaskDesc( + &fb->super, taskIDs[taskIndex0], ff.other); const uniform int startSampleID = max(taskDesc.accumID, 0) * spp; @@ -58,7 +57,8 @@ task // set ray t value for early ray termination (from maximum depth texture) vec2f center = make_vec2f(screenSample.sampleID.x, screenSample.sampleID.y) + 0.5f; - const float tMax = Renderer_getMaxDepth(self, center * fb->super.rcpSize); + const float tMax = + Renderer_getMaxDepth(self, center * fb->super.rcpSize, ff.other); vec3f col = make_vec3f(0.f); float alpha = 0.f; vec3f normal = make_vec3f(0.f); @@ -81,7 +81,8 @@ task cameraSample.lens.y = 0.0f; cameraSample.time = 0.5f; - Camera_dispatch_initRay(camera, screenSample.ray, cameraSample); + Camera_dispatch_initRay( + camera, screenSample.ray, cameraSample, ff.other); screenSample.ray.t = min(screenSample.ray.t, tMax); // TODO: We could store and use the region t intervals from when @@ -101,7 +102,8 @@ task region, make_vec2f(regionEnter, regionExit), perFrameData, - screenSample); + screenSample, + ff); col = col + screenSample.rgb; alpha += screenSample.alpha; @@ -115,8 +117,8 @@ task screenSample.normal = normal * rspp; screenSample.albedo = albedo * rspp; - FrameBuffer_dispatch_accumulateSample(&fb->super, screenSample, taskDesc); + FrameBuffer_dispatch_accumulateSample( + &fb->super, screenSample, taskDesc, ff.other); } - FrameBuffer_dispatch_completeTask(&fb->super, taskDesc); + FrameBuffer_dispatch_completeTask(&fb->super, taskDesc, ff.other); } - diff --git a/modules/multiDevice/CMakeLists.txt b/modules/multiDevice/CMakeLists.txt index e6f73d67c..ca2af9707 100644 --- a/modules/multiDevice/CMakeLists.txt +++ b/modules/multiDevice/CMakeLists.txt @@ -74,25 +74,22 @@ if (OSPRAY_MODULE_GPU) message(FATAL_ERROR "CMAKE_CXX_COMPILER should be set to '${DPCPP_COMPILER}' for SYCL") endif() -ospray_configure_dpcpp_target() + ospray_configure_dpcpp_target() + add_library(ospray_module_multidevice_gpu SHARED + ${OSPRAY_MULTIDEVICE_CPP_SOURCES}) -add_library(ospray_module_multidevice_gpu SHARED - ${OSPRAY_MULTIDEVICE_CPP_SOURCES}) - -set_target_properties(ospray_module_multidevice_gpu PROPERTIES + set_target_properties(ospray_module_multidevice_gpu PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON) - -target_link_libraries(ospray_module_multidevice_gpu + target_link_libraries(ospray_module_multidevice_gpu PRIVATE rkcommon::rkcommon ospray_module_gpu - ospray_module_gpu_kernels -) + ) -target_include_directories(ospray_module_multidevice_gpu + target_include_directories(ospray_module_multidevice_gpu PUBLIC $ $ @@ -100,38 +97,17 @@ target_include_directories(ospray_module_multidevice_gpu ${CMAKE_CURRENT_BINARY_DIR} # For ISPC module generated ispc headers ${PROJECT_BINARY_DIR}/ospray/ -) + ) - target_compile_options(ospray_module_multidevice_gpu + target_compile_options(ospray_module_multidevice_gpu PRIVATE -x c++ ) - target_compile_options(ospray_module_multidevice_gpu PRIVATE - -fsycl - ${OSPRAY_CXX_FLAGS_SYCL} - -fsycl-targets=${OSPRAY_SYCL_TARGET} + target_compile_definitions(ospray_module_multidevice_gpu PRIVATE + OBJECTFACTORY_IMPORT ) -target_compile_definitions(ospray_module_multidevice_gpu PRIVATE - OBJECTFACTORY_IMPORT -) - - if (OSPRAY_SYCL_AOT_DEVICES STREQUAL "none") - target_link_options(ospray_module_multidevice_gpu PRIVATE - -fsycl - -fsycl-targets=${OSPRAY_SYCL_TARGET} - -Xsycl-target-backend=${OSPRAY_SYCL_TARGET} "${OSPRAY_OCL_OPTIONS_STR} -options \"${OSPRAY_OCL_OTHER_OPTIONS_STR} -igc_opts='${OSPRAY_IGC_OPTIONS_STR}'\"" - ) - else() - target_link_options(ospray_module_multidevice_gpu PRIVATE - -fsycl - -fsycl-targets=${OSPRAY_SYCL_TARGET} - -Xsycl-target-backend=${OSPRAY_SYCL_TARGET} "-device ${OSPRAY_SYCL_AOT_DEVICES} -revision_id ${OSPRAY_SYCL_AOT_DEVICE_REVISION} ${OSPRAY_OCL_OPTIONS_STR} -options \"${OSPRAY_OCL_OTHER_OPTIONS_STR} -igc_opts='${OSPRAY_IGC_OPTIONS_STR}'\"" - ) - endif() - - -ospray_install_library(ospray_module_multidevice_gpu lib) + ospray_install_library(ospray_module_multidevice_gpu lib) endif() diff --git a/modules/pluggableGeometryExample/ospray/geometry/BilinearPatches.cpp b/modules/pluggableGeometryExample/ospray/geometry/BilinearPatches.cpp index 4f1c119fb..4f7c65d72 100644 --- a/modules/pluggableGeometryExample/ospray/geometry/BilinearPatches.cpp +++ b/modules/pluggableGeometryExample/ospray/geometry/BilinearPatches.cpp @@ -19,7 +19,7 @@ namespace ospray { namespace blp { BilinearPatches::BilinearPatches(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtDevice(), device, FFG_NONE) { getSh()->super.postIntersect = ispc::BilinearPatches_postIntersect_addr(); getSh()->super.intersect = ispc::BilinearPatches_intersect_addr(); From 1c4ce88bcd208705e2083c5a993ef316205ba8e4 Mon Sep 17 00:00:00 2001 From: Miroslaw Pawlowski Date: Fri, 17 Mar 2023 14:29:05 +0100 Subject: [PATCH 04/42] Recursion removal from SciVis volume raymarching --- modules/cpu/render/scivis/SciVis.ispc | 1 - modules/cpu/render/scivis/lightAlpha.ispc | 16 ++++------------ modules/cpu/render/scivis/volumes.ih | 1 - modules/cpu/render/scivis/volumes.ispc | 11 +++-------- 4 files changed, 7 insertions(+), 22 deletions(-) diff --git a/modules/cpu/render/scivis/SciVis.ispc b/modules/cpu/render/scivis/SciVis.ispc index f51105a6b..76134b268 100644 --- a/modules/cpu/render/scivis/SciVis.ispc +++ b/modules/cpu/render/scivis/SciVis.ispc @@ -92,7 +92,6 @@ static void SciVis_renderSample(Renderer *uniform _self, volumeRay, ldSampler, self->volumeSamplingRate, - true, ff); // Blend volume diff --git a/modules/cpu/render/scivis/lightAlpha.ispc b/modules/cpu/render/scivis/lightAlpha.ispc index 70df5a5a7..8b13c475f 100644 --- a/modules/cpu/render/scivis/lightAlpha.ispc +++ b/modules/cpu/render/scivis/lightAlpha.ispc @@ -7,7 +7,7 @@ // SciVis renderer #include "surfaces.ih" #ifdef OSPRAY_ENABLE_VOLUMES -#include "volumes.ih" +#include "render/ao/volumes.ih" #endif // c++ shared #include "SciVisShared.h" @@ -66,22 +66,14 @@ SYCL_EXTERNAL vec3f lightAlpha(const uniform SciVis *uniform self, // Sample volumes across volume intervals (in front of geometry hit) if (volumeIntervals.numVolumeIntervals > 0) { - SciVisRenderContext rc; - rc.renderer = self; - rc.fb = fb; - rc.world = world; - rc.sample = sample; - rc.ldSampler = ldSampler; - vec4f volumeColor = integrateVolumeIntervalsGradient(rc, + vec4f volumeColor = integrateVolumeIntervals( volumeIntervals, rayIntervals, volumeRay, ldSampler, - self->volumeSamplingRate * quality, - false, - ff); + self->volumeSamplingRate * quality); - alpha = alpha * make_vec3f(volumeColor.w); + alpha = alpha * make_vec3f(1.f - volumeColor.w); } } #endif diff --git a/modules/cpu/render/scivis/volumes.ih b/modules/cpu/render/scivis/volumes.ih index 8c3563bd9..3d4ae24e9 100644 --- a/modules/cpu/render/scivis/volumes.ih +++ b/modules/cpu/render/scivis/volumes.ih @@ -18,7 +18,6 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervalsGradient(SciVisRenderContext &rc, Ray &ray, varying LDSampler *uniform ldSampler, const uniform float samplingRate, - const uniform bool shade, const uniform FeatureFlags &ff); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/scivis/volumes.ispc b/modules/cpu/render/scivis/volumes.ispc index b22d472f3..226b8c7f0 100644 --- a/modules/cpu/render/scivis/volumes.ispc +++ b/modules/cpu/render/scivis/volumes.ispc @@ -35,13 +35,11 @@ static void sampleVolume(SciVisRenderContext &rc, Ray &ray, const VolumeInterval &vi, const uniform float samplingRate, - const uniform bool shade, const uniform FeatureFlags &ff) { // We have to iterate till we get a valid sample value float dt; float sampleVal = nan; - const uniform float gsc = shade ? m->gradientShadingScale : 0.f; vec3f p; // in volume local coords while (isnan(sampleVal)) { // Iterate till sampling position is within interval @@ -100,7 +98,7 @@ static void sampleVolume(SciVisRenderContext &rc, vc.sample = TransferFunction_dispatch_get(tf, sampleVal); // compute gradient shading lighting - if (gsc > 0.0f) { + if (m->gradientShadingScale > 0.0f) { vec3f ns = Volume_getGradient(m->volume, p); if (dot(ns, ns) > 1e-6f) { // assume that opacity directly correlates to volume scalar field, i.e. @@ -128,7 +126,7 @@ static void sampleVolume(SciVisRenderContext &rc, ff); vec4f shadedColor = make_vec4f( shading.shadedColor, 1.f - luminance(shading.transmission)); - vc.sample = lerp(gsc, vc.sample, shadedColor); + vc.sample = lerp(m->gradientShadingScale, vc.sample, shadedColor); } } @@ -142,7 +140,6 @@ static float sampleAllVolumes(SciVisRenderContext &rc, Ray &ray, const uniform float samplingRate, vec4f &sampledColor, - const uniform bool shade, const uniform FeatureFlags &ff) { // Look for the closest sample across all volumes @@ -163,7 +160,7 @@ static float sampleAllVolumes(SciVisRenderContext &rc, #else foreach_unique (m in vi.volumetricModel) { #endif - sampleVolume(rc, vc, m, ray, vi, samplingRate, shade, ff); + sampleVolume(rc, vc, m, ray, vi, samplingRate, ff); } vc.ready = 1; } @@ -198,7 +195,6 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervalsGradient(SciVisRenderContext &rc, Ray &ray, varying LDSampler *uniform ldSampler, const uniform float samplingRate, - const uniform bool shade, const uniform FeatureFlags &ff) { #ifdef OSPRAY_TARGET_SYCL @@ -315,7 +311,6 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervalsGradient(SciVisRenderContext &rc, ray, samplingRate, sampledColor, - shade, ff); // Exit loop if nothing sampled From 994576a3a09c018cfc3566cb9a34f12f5de90189 Mon Sep 17 00:00:00 2001 From: Miroslaw Pawlowski Date: Tue, 21 Mar 2023 17:16:19 +0100 Subject: [PATCH 05/42] Get back to ci-comp_igc-19476 driver, removed some volume testing on GPU --- .github/workflows/ci.linux.gpu.yml | 8 ++++---- scripts/tests/run_gpu_tests.sh | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.linux.gpu.yml b/.github/workflows/ci.linux.gpu.yml index a3a072077..0258b681d 100644 --- a/.github/workflows/ci.linux.gpu.yml +++ b/.github/workflows/ci.linux.gpu.yml @@ -20,7 +20,7 @@ jobs: secrets: inherit with: force-delete: true # guarantees .gitattributes are respected in working dir - gfx-driver-version: neo-builds/ci/master/ci-neo-master-025731/artifacts/linux/ubuntu/22.04 + gfx-driver-version: gfx-driver-builds/ci/comp_igc/gfx-driver-ci-comp_igc-19476/artifacts/Linux/Ubuntu/22.04/Release dpcpp-version: sycl-nightly/20221214 install-gfx-driver: true submodules: true @@ -38,7 +38,7 @@ jobs: secrets: inherit with: force-delete: true # guarantees .gitattributes are respected in working dir - gfx-driver-version: neo-builds/ci/master/ci-neo-master-025731/artifacts/linux/ubuntu/22.04 + gfx-driver-version: gfx-driver-builds/ci/comp_igc/gfx-driver-ci-comp_igc-19476/artifacts/Linux/Ubuntu/22.04/Release dpcpp-version: intel/2023.0 install-gfx-driver: true submodules: true @@ -60,7 +60,7 @@ jobs: uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker_gpu.yml@main secrets: inherit with: - gfx-driver-version: neo-builds/ci/master/ci-neo-master-025731/artifacts/linux/ubuntu/22.04 + gfx-driver-version: gfx-driver-builds/ci/comp_igc/gfx-driver-ci-comp_igc-19476/artifacts/Linux/Ubuntu/22.04/Release dpcpp-version: sycl-nightly/20221214 install-gfx-driver: true submodules: true @@ -84,7 +84,7 @@ jobs: uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker_gpu.yml@tgt/support_oneapi_icx secrets: inherit with: - gfx-driver-version: neo-builds/ci/master/ci-neo-master-025731/artifacts/linux/ubuntu/22.04 + gfx-driver-version: gfx-driver-builds/ci/comp_igc/gfx-driver-ci-comp_igc-19476/artifacts/Linux/Ubuntu/22.04/Release dpcpp-version: intel/2023.0 install-gfx-driver: true submodules: true diff --git a/scripts/tests/run_gpu_tests.sh b/scripts/tests/run_gpu_tests.sh index eccee742f..87866e6d2 100755 --- a/scripts/tests/run_gpu_tests.sh +++ b/scripts/tests/run_gpu_tests.sh @@ -61,6 +61,10 @@ test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/24" test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/25" test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/26" test_filters+=":TestScenesClipping/FromOsprayTesting.*" +test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/1" +test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/3" +test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/4" +test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/5" test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/6" test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/7" test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/8" From ad263f41d1f77ca704283b17164ec3a3a6e83734 Mon Sep 17 00:00:00 2001 From: Miroslaw Pawlowski Date: Fri, 24 Mar 2023 12:13:37 +0100 Subject: [PATCH 06/42] CI: MPI on GPU job disabled --- .github/workflows/ci.linux.gpu.yml | 47 +++++++++++++++--------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/.github/workflows/ci.linux.gpu.yml b/.github/workflows/ci.linux.gpu.yml index 0258b681d..acfa90b19 100644 --- a/.github/workflows/ci.linux.gpu.yml +++ b/.github/workflows/ci.linux.gpu.yml @@ -79,26 +79,27 @@ jobs: export PATH="$GITHUB_WORKSPACE/build/install/ospray/bin:$PATH" scripts/tests/run_gpu_tests.sh "$GITHUB_WORKSPACE" - test-ubuntu2204-DG2-JIT-mpi: - needs: [build-ubuntu2204-DG2-JIT-mpi] - uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker_gpu.yml@tgt/support_oneapi_icx - secrets: inherit - with: - gfx-driver-version: gfx-driver-builds/ci/comp_igc/gfx-driver-ci-comp_igc-19476/artifacts/Linux/Ubuntu/22.04/Release - dpcpp-version: intel/2023.0 - install-gfx-driver: true - submodules: true - image: ubuntu:22.04 - options: --device=/dev/dri:/dev/dri - runs-on: '[ "Linux", "docker", "dg2" ]' - artifact-in: build-ubuntu2204-DG2-JIT-mpi - artifact-out: test-ubuntu2204-DG2-JIT-mpi - artifact-path: build_regression_tests/tests*.xml build_regression_tests/failed* - artifact-on-failure: true - cmd: | - sudo apt remove openmpi-* libopenmpi3 -y - module load level-zero/1.9.4 - module load mpi - export LD_LIBRARY_PATH="$GITHUB_WORKSPACE/build/install/ospray/lib:$GITHUB_WORKSPACE/build/install/embree/lib:$LD_LIBRARY_PATH" - export PATH="$GITHUB_WORKSPACE/build/install/ospray/bin:$PATH" - scripts/tests/run_gpu_tests.sh "$GITHUB_WORKSPACE" TEST_MPI +# test-ubuntu2204-DG2-JIT-mpi: +# needs: [build-ubuntu2204-DG2-JIT-mpi] +# uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker_gpu.yml@tgt/support_oneapi_icx +# secrets: inherit +# with: +# allow-failure: true +# gfx-driver-version: gfx-driver-builds/ci/comp_igc/gfx-driver-ci-comp_igc-19476/artifacts/Linux/Ubuntu/22.04/Release +# dpcpp-version: intel/2023.0 +# install-gfx-driver: true +# submodules: true +# image: ubuntu:22.04 +# options: --device=/dev/dri:/dev/dri +# runs-on: '[ "Linux", "docker", "dg2" ]' +# artifact-in: build-ubuntu2204-DG2-JIT-mpi +# artifact-out: test-ubuntu2204-DG2-JIT-mpi +# artifact-path: build_regression_tests/tests*.xml build_regression_tests/failed* +# artifact-on-failure: true +# cmd: | +# sudo apt remove openmpi-* libopenmpi3 -y +# module load level-zero/1.9.4 +# module load mpi +# export LD_LIBRARY_PATH="$GITHUB_WORKSPACE/build/install/ospray/lib:$GITHUB_WORKSPACE/build/install/embree/lib:$LD_LIBRARY_PATH" +# export PATH="$GITHUB_WORKSPACE/build/install/ospray/bin:$PATH" +# scripts/tests/run_gpu_tests.sh "$GITHUB_WORKSPACE" TEST_MPI From b767a6596565ddd59204a8c39895ba02bf73bfc9 Mon Sep 17 00:00:00 2001 From: "David E. DeMarle" Date: Wed, 22 Mar 2023 14:33:22 -0400 Subject: [PATCH 07/42] guard against empty tiles as can happen with many mpi ranks --- modules/cpu/fb/SparseFB.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/modules/cpu/fb/SparseFB.cpp b/modules/cpu/fb/SparseFB.cpp index e98b674c7..e826aca9b 100644 --- a/modules/cpu/fb/SparseFB.cpp +++ b/modules/cpu/fb/SparseFB.cpp @@ -96,6 +96,9 @@ uint32_t SparseFrameBuffer::getTotalRenderTasks() const utility::ArrayView SparseFrameBuffer::getRenderTaskIDs( float errorThreshold) { + if (!renderTaskIDs) + return utility::ArrayView(nullptr, 0); + if (errorThreshold > 0.0f && hasVarianceBuffer) { auto last = std::copy_if(renderTaskIDs->begin(), renderTaskIDs->end(), @@ -148,8 +151,10 @@ void SparseFrameBuffer::beginFrame() FrameBuffer::beginFrame(); // TODO We could launch a kernel here - for (auto &tile : *tiles) { - tile.accumID = getFrameID(); + if (tiles) { + for (auto &tile : *tiles) { + tile.accumID = getFrameID(); + } } std::for_each(imageOps.begin(), From c19c5cef38448b795a7229937b2d490b12119c87 Mon Sep 17 00:00:00 2001 From: Will Usher Date: Thu, 23 Mar 2023 14:14:51 -0700 Subject: [PATCH 08/42] Handle empty tileIDs in SparseFrameBuffer::setTiles --- modules/cpu/fb/SparseFB.cpp | 133 +++++++++++++++++++++++------------- 1 file changed, 86 insertions(+), 47 deletions(-) diff --git a/modules/cpu/fb/SparseFB.cpp b/modules/cpu/fb/SparseFB.cpp index e826aca9b..4ecc09a70 100644 --- a/modules/cpu/fb/SparseFB.cpp +++ b/modules/cpu/fb/SparseFB.cpp @@ -36,9 +36,7 @@ SparseFrameBuffer::SparseFrameBuffer(api::ISPCDevice &device, "0"); } - if (!_tileIDs.empty()) { - setTiles(_tileIDs); - } + setTiles(_tileIDs); } SparseFrameBuffer::SparseFrameBuffer(api::ISPCDevice &device, @@ -118,6 +116,12 @@ std::string SparseFrameBuffer::toString() const float SparseFrameBuffer::taskError(const uint32_t taskID) const { + // If this SparseFB doesn't have any tiles return 0. This should not typically + // be called in this case anyways + if (!tiles) { + return 0.f; + } + if (!taskErrorBuffer) { throw std::runtime_error( "SparseFrameBuffer::taskError: trying to get task error on FB without variance/error buffers"); @@ -128,6 +132,11 @@ float SparseFrameBuffer::taskError(const uint32_t taskID) const void SparseFrameBuffer::setTaskError(const uint32_t taskID, const float error) { + // If this SparseFB doesn't have any tiles then do nothing. This should not + // typically be called in this case anyways + if (!tiles) { + return; + } if (!taskErrorBuffer) { throw std::runtime_error( "SparseFrameBuffer::setTaskError: trying to set task error on FB without variance/error buffers"); @@ -140,7 +149,10 @@ void SparseFrameBuffer::setTaskAccumID(const uint32_t taskID, const int accumID) // Note: USM migration? if (taskAccumID) { (*taskAccumID)[taskID] = accumID; - } else { + } else if (tiles) { + // If we have tiles but not an accum buffer it's a hard error to call this + // function. If we don't have tiles we just exit silently, this function + // shouldn't be called when this SparseFB is empty anyways throw std::runtime_error( "SparseFrameBuffer::setTaskAccumID: called on SparseFB without accumIDs"); } @@ -220,76 +232,103 @@ uint32_t SparseFrameBuffer::getTileIndexForTask(uint32_t taskID) const void SparseFrameBuffer::setTiles(const std::vector &_tileIDs) { // (Re-)configure the sparse framebuffer based on the tileIDs we're passed - tileIDs = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), _tileIDs.size()); - std::memcpy( - tileIDs->data(), _tileIDs.data(), sizeof(uint32_t) * _tileIDs.size()); - numRenderTasks = - vec2i(tileIDs->size() * TILE_SIZE, TILE_SIZE) / getRenderTaskSize(); - - if (hasVarianceBuffer) { + + if (!_tileIDs.empty()) { + tileIDs = make_buffer_shared_unique( + getISPCDevice().getIspcrtDevice(), _tileIDs.size()); + std::memcpy( + tileIDs->data(), _tileIDs.data(), sizeof(uint32_t) * _tileIDs.size()); + numRenderTasks = + vec2i(tileIDs->size() * TILE_SIZE, TILE_SIZE) / getRenderTaskSize(); + } else { + tileIDs = nullptr; + numRenderTasks = vec2i(0); + } + + if (hasVarianceBuffer && !_tileIDs.empty()) { taskErrorBuffer = make_buffer_shared_unique( getISPCDevice().getIspcrtDevice(), numRenderTasks.long_product()); std::fill(taskErrorBuffer->begin(), taskErrorBuffer->end(), inf); + } else { + taskErrorBuffer = nullptr; } - tiles = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), tileIDs->size()); - const vec2f rcpSize = rcp(vec2f(size)); - for (size_t i = 0; i < tileIDs->size(); ++i) { - vec2i tilePos; - const uint32_t tid = (*tileIDs)[i]; - tilePos.x = tid % totalTiles.x; - tilePos.y = tid / totalTiles.x; - - Tile &t = (*tiles)[i]; - t.fbSize = size; - t.rcp_fbSize = rcpSize; - t.region.lower = tilePos * TILE_SIZE; - t.region.upper = min(t.region.lower + TILE_SIZE, size); - t.accumID = 0; + if (!_tileIDs.empty()) { + tiles = make_buffer_shared_unique( + getISPCDevice().getIspcrtDevice(), tileIDs->size()); + const vec2f rcpSize = rcp(vec2f(size)); + for (size_t i = 0; i < tileIDs->size(); ++i) { + vec2i tilePos; + const uint32_t tid = (*tileIDs)[i]; + tilePos.x = tid % totalTiles.x; + tilePos.y = tid / totalTiles.x; + + Tile &t = (*tiles)[i]; + t.fbSize = size; + t.rcp_fbSize = rcpSize; + t.region.lower = tilePos * TILE_SIZE; + t.region.upper = min(t.region.lower + TILE_SIZE, size); + t.accumID = 0; + } + } else { + tiles = nullptr; } - const size_t numPixels = tileIDs->size() * TILE_SIZE * TILE_SIZE; - if (hasVarianceBuffer) { + const size_t numPixels = tiles ? tileIDs->size() * TILE_SIZE * TILE_SIZE : 0; + if (hasVarianceBuffer && !_tileIDs.empty()) { varianceBuffer = make_buffer_shared_unique( getISPCDevice().getIspcrtDevice(), numPixels); + } else { + varianceBuffer = nullptr; } - if (hasAccumBuffer) { + if (hasAccumBuffer && !_tileIDs.empty()) { accumulationBuffer = make_buffer_shared_unique( getISPCDevice().getIspcrtDevice(), numPixels); + } else { + accumulationBuffer = nullptr; } - if (hasAccumBuffer || useTaskAccumIDs) { + if ((hasAccumBuffer || useTaskAccumIDs) && !_tileIDs.empty()) { taskAccumID = make_buffer_shared_unique( getISPCDevice().getIspcrtDevice(), getTotalRenderTasks()); std::memset(taskAccumID->begin(), 0, taskAccumID->size() * sizeof(int)); + } else { + taskAccumID = nullptr; } // TODO: Should find a better way for allowing sparse task id sets // here we have this array b/c the tasks will be filtered down based on // variance termination - renderTaskIDs = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), getTotalRenderTasks()); - std::iota(renderTaskIDs->begin(), renderTaskIDs->end(), 0); - if (hasVarianceBuffer) + if (!_tileIDs.empty()) { + renderTaskIDs = make_buffer_shared_unique( + getISPCDevice().getIspcrtDevice(), getTotalRenderTasks()); + std::iota(renderTaskIDs->begin(), renderTaskIDs->end(), 0); + } else { + renderTaskIDs = nullptr; + } + if (hasVarianceBuffer && !_tileIDs.empty()) { activeTaskIDs = make_buffer_shared_unique( getISPCDevice().getIspcrtDevice(), getTotalRenderTasks()); + } else { + activeTaskIDs = nullptr; + } const uint32_t nTasksPerTile = getNumTasksPerTile(); // Sort each tile's tasks in Z order - rkcommon::tasking::parallel_for(tiles->size(), [&](const size_t i) { - std::sort(renderTaskIDs->begin() + i * nTasksPerTile, - renderTaskIDs->begin() + (i + 1) * nTasksPerTile, - [&](const uint32_t &a, const uint32_t &b) { - const vec2i p_a = getTaskPosInTile(a); - const vec2i p_b = getTaskPosInTile(b); - return interleaveZOrder(p_a.x, p_a.y) - < interleaveZOrder(p_b.x, p_b.y); - }); - }); + if (tiles) { + rkcommon::tasking::parallel_for(tiles->size(), [&](const size_t i) { + std::sort(renderTaskIDs->begin() + i * nTasksPerTile, + renderTaskIDs->begin() + (i + 1) * nTasksPerTile, + [&](const uint32_t &a, const uint32_t &b) { + const vec2i p_a = getTaskPosInTile(a); + const vec2i p_b = getTaskPosInTile(b); + return interleaveZOrder(p_a.x, p_a.y) + < interleaveZOrder(p_b.x, p_b.y); + }); + }); + } clear(); @@ -308,8 +347,8 @@ void SparseFrameBuffer::setTiles(const std::vector &_tileIDs) getSh()->numRenderTasks = numRenderTasks; getSh()->totalTiles = totalTiles; - getSh()->tiles = tiles->data(); - getSh()->numTiles = tiles->size(); + getSh()->tiles = tiles ? tiles->data() : nullptr; + getSh()->numTiles = tiles ? tiles->size() : 0; getSh()->taskAccumID = taskAccumID ? taskAccumID->data() : nullptr; getSh()->accumulationBuffer = From 69a19c9680cb654a70aaa549b2cffae947fd70f5 Mon Sep 17 00:00:00 2001 From: Miroslaw Pawlowski Date: Fri, 24 Mar 2023 18:41:28 +0100 Subject: [PATCH 09/42] Fixed issues found by Klocwork --- modules/cpu/common/FeatureFlagsEnum.h | 32 +++++++++++++++++++++------ modules/cpu/common/Group.cpp | 2 +- modules/cpu/common/World.cpp | 2 +- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/modules/cpu/common/FeatureFlagsEnum.h b/modules/cpu/common/FeatureFlagsEnum.h index 94c6af990..d872506bc 100644 --- a/modules/cpu/common/FeatureFlagsEnum.h +++ b/modules/cpu/common/FeatureFlagsEnum.h @@ -4,10 +4,16 @@ #pragma once #ifdef __cplusplus +#include +#include namespace ospray { #endif // __cplusplus +// NOTE: This enum must be binary compatible with Embree RTCFeatureFlags enum FeatureFlagsGeometry +#ifdef __cplusplus + : uint32_t +#endif { FFG_NONE = 0, @@ -48,16 +54,20 @@ enum FeatureFlagsGeometry FFG_USER_GEOMETRY = 1 << 26, // RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_ARGUMENTS + // OSPRay specific flags FFG_BOX = 1 << 29, FFG_PLANE = 1 << 30, - FFG_ISOSURFACE = 1 << 31, + FFG_ISOSURFACE = 1u << 31, - FFG_OSPRAY_MASK = 1 << 29 | 1 << 30 | 1 << 31, + FFG_OSPRAY_MASK = 1 << 29 | 1 << 30 | 1u << 31, FFG_ALL = 0xffffffff }; enum FeatureFlagsVolume +#ifdef __cplusplus + : uint32_t +#endif { FFV_NONE = 0, @@ -67,6 +77,9 @@ enum FeatureFlagsVolume }; enum FeatureFlagsOther +#ifdef __cplusplus + : uint32_t +#endif { FFO_NONE = 0, @@ -111,7 +124,10 @@ struct FeatureFlags FeatureFlagsVolume volume; FeatureFlagsOther other; #ifdef __cplusplus - void setNone() + constexpr FeatureFlags() + : geometry(FFG_NONE), volume(FFV_NONE), other(FFO_NONE) + {} + void reset() { geometry = FFG_NONE; volume = FFV_NONE; @@ -119,16 +135,18 @@ struct FeatureFlags } }; -template +template ::value>::type> inline T operator|(T a, T b) { - return (T)((unsigned int)(a) | (unsigned int)(b)); + return static_cast(static_cast(a) | static_cast(b)); } -template +template ::value>::type> inline T &operator|=(T &a, T b) { - return (T &)((unsigned int &)(a) |= (unsigned int)(b)); + return a = a | b; } } // namespace ospray #else diff --git a/modules/cpu/common/Group.cpp b/modules/cpu/common/Group.cpp index 3b258ac97..684e4f8a5 100644 --- a/modules/cpu/common/Group.cpp +++ b/modules/cpu/common/Group.cpp @@ -126,7 +126,7 @@ void Group::commit() throw std::runtime_error("invalid Embree device"); } - featureFlags.setNone(); + featureFlags.reset(); if (numGeometries > 0) { sceneGeometries = rtcNewScene(embreeDevice); createEmbreeScene(sceneGeometries, diff --git a/modules/cpu/common/World.cpp b/modules/cpu/common/World.cpp index daec0e826..6fafa9f4c 100644 --- a/modules/cpu/common/World.cpp +++ b/modules/cpu/common/World.cpp @@ -128,7 +128,7 @@ void World::commit() // Populate shared buffer with instance pointers, // create Embree instances - featureFlags.setNone(); + featureFlags.reset(); unsigned int id = 0; for (auto &&inst : *instances) { getSh()->instances[id] = inst->getSh(); From e75140e73bcbbe0f29682442f6d3802faf60f77f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCnther?= Date: Tue, 28 Mar 2023 10:09:53 +0200 Subject: [PATCH 10/42] Fix interpolation for cylinder cap SA --- modules/cpu/lights/CylinderCapSolidAngleTable.ih | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/cpu/lights/CylinderCapSolidAngleTable.ih b/modules/cpu/lights/CylinderCapSolidAngleTable.ih index 3bcf77832..ba7807e15 100644 --- a/modules/cpu/lights/CylinderCapSolidAngleTable.ih +++ b/modules/cpu/lights/CylinderCapSolidAngleTable.ih @@ -1372,14 +1372,14 @@ float sampleCylSolidAngleTable(float u, float v, float w) const float p10 = lerp(u, CylCapSolidAngleTable[121 * U0 + 11 * V0 + W1], CylCapSolidAngleTable[121 * U1 + 11 * V0 + W1]); + const float p0 = (1.f - w) * p00 + w * p10; const float p01 = lerp(u, CylCapSolidAngleTable[121 * U0 + 11 * V1 + W0], CylCapSolidAngleTable[121 * U1 + 11 * V1 + W0]); const float p11 = lerp(u, CylCapSolidAngleTable[121 * U0 + 11 * V1 + W1], CylCapSolidAngleTable[121 * U1 + 11 * V1 + W1]); - const float p0 = (1.f - w) * p00 + w * p10; - const float p1 = (1.f - w) * p10 + w * p11; + const float p1 = (1.f - w) * p01 + w * p11; return (1.f - v) * p0 + v * p1; } From 1481fc4eba7c734c0bd27daa11ed404116f3609d Mon Sep 17 00:00:00 2001 From: Miroslaw Pawlowski Date: Wed, 29 Mar 2023 13:57:52 +0200 Subject: [PATCH 11/42] Cleanup compiler warnings and unused code removal --- cmake/compiler/clang.cmake | 3 + cmake/compiler/dpcpp.cmake | 1 - modules/cpu/CMakeLists.txt | 7 +- modules/cpu/common/Clipping.ih | 4 +- modules/cpu/common/Instance.ih | 1 + modules/cpu/common/Managed.cpp | 3 - modules/cpu/common/MotionTransform.ih | 4 + modules/cpu/common/OSPCommon.ih | 7 -- modules/cpu/common/OSPCommon.ispc | 13 --- modules/cpu/common/World.ih | 3 +- modules/cpu/fb/FrameBuffer.ispc | 6 ++ modules/cpu/fb/LocalFB.ispc | 22 ++--- modules/cpu/fb/frame_ops/SSAO.h | 2 +- modules/cpu/fb/frame_ops/SSAO.ispc | 10 ++- modules/cpu/geometry/Boxes.ispc | 4 +- modules/cpu/geometry/Isosurfaces.ispc | 4 +- modules/cpu/geometry/Planes.ispc | 4 +- modules/cpu/geometry/Spheres.ispc | 4 +- modules/cpu/ispc_symbols.txt | 2 - modules/cpu/lights/CylinderLight.ispc | 7 +- modules/cpu/lights/DirectionalLight.ispc | 12 ++- modules/cpu/lights/HDRILight.ispc | 10 +-- modules/cpu/lights/PointLight.ispc | 4 +- modules/cpu/lights/QuadLight.ispc | 4 +- modules/cpu/lights/SpotLight.ispc | 4 +- modules/cpu/pf/PixelFilter.ih | 2 +- modules/cpu/render/Material.ispc | 13 ++- modules/cpu/render/Renderer.cpp | 8 +- modules/cpu/render/Renderer.ispc | 3 +- modules/cpu/render/RendererRenderTaskFn.inl | 3 +- modules/cpu/render/RendererShared.h | 2 +- modules/cpu/render/ao/AORenderer.cpp | 12 +-- modules/cpu/render/ao/AORenderer.ih | 1 - modules/cpu/render/ao/AORenderer.ispc | 11 +-- modules/cpu/render/ao/surfaces.ih | 1 - modules/cpu/render/ao/surfaces.ispc | 1 - modules/cpu/render/ao/volumes.ispc | 14 ++-- modules/cpu/render/bsdfs/Conductor.ih | 14 ++-- modules/cpu/render/bsdfs/Dielectric.ih | 12 +-- modules/cpu/render/bsdfs/Fresnel.ih | 2 +- modules/cpu/render/bsdfs/Lambert.ih | 10 +-- .../cpu/render/bsdfs/LambertTransmission.ih | 10 +-- .../cpu/render/bsdfs/MicrofacetConductor.ih | 7 +- .../cpu/render/bsdfs/MicrofacetDielectric.ih | 2 +- .../render/bsdfs/MicrofacetDielectricLayer.ih | 4 +- modules/cpu/render/bsdfs/Minneart.ih | 5 +- modules/cpu/render/bsdfs/OrenNayar.ih | 6 +- modules/cpu/render/bsdfs/Reflection.ih | 14 ++-- modules/cpu/render/bsdfs/RobustDielectric.ih | 10 +-- .../cpu/render/bsdfs/RobustThinDielectric.ih | 12 +-- modules/cpu/render/bsdfs/Specular.ih | 6 +- modules/cpu/render/bsdfs/ThinDielectric.ih | 12 +-- .../render/bsdfs/ThinMicrofacetDielectric.ih | 6 +- modules/cpu/render/bsdfs/Transmission.ih | 14 ++-- modules/cpu/render/bsdfs/Velvety.ih | 5 +- modules/cpu/render/debug/DebugRenderer.cpp | 12 +-- modules/cpu/render/debug/DebugRenderer.ih | 1 - modules/cpu/render/debug/DebugRenderer.ispc | 80 +++++-------------- modules/cpu/render/materials/Alloy.ispc | 4 +- modules/cpu/render/materials/CarPaint.ispc | 2 +- modules/cpu/render/materials/Glass.ispc | 4 +- modules/cpu/render/materials/Luminous.ispc | 2 +- modules/cpu/render/materials/Metal.ispc | 4 +- .../cpu/render/materials/MetallicPaint.ispc | 4 +- modules/cpu/render/materials/OBJ.ispc | 8 +- modules/cpu/render/materials/Plastic.ispc | 4 +- modules/cpu/render/materials/Principled.ispc | 2 +- modules/cpu/render/materials/ThinGlass.ispc | 6 +- modules/cpu/render/materials/Velvet.ispc | 4 +- .../pathtracer/NextEventEstimation.ispc | 10 +-- .../cpu/render/pathtracer/PathSampler.ispc | 9 +-- modules/cpu/render/pathtracer/PathStructs.ih | 4 +- modules/cpu/render/pathtracer/PathTracer.cpp | 13 +-- modules/cpu/render/pathtracer/PathTracer.ispc | 21 ++--- .../cpu/render/pathtracer/PathTracerShared.h | 2 +- .../cpu/render/pathtracer/ShadowCatcher.ispc | 4 +- .../render/pathtracer/TransparentShadow.ispc | 2 +- .../cpu/render/pathtracer/VirtualLight.ispc | 18 ++--- .../pathtracer/volumes/HenyeyGreenstein.ih | 2 +- .../pathtracer/volumes/VolumeSampler.ih | 8 +- .../pathtracer/volumes/VolumeSampler.ispc | 19 ++--- modules/cpu/render/scivis/SciVis.cpp | 13 +-- modules/cpu/render/scivis/SciVis.ih | 5 -- modules/cpu/render/scivis/SciVis.ispc | 25 ++---- modules/cpu/render/scivis/lightAlpha.ispc | 6 +- modules/cpu/render/scivis/surfaces.ih | 1 - modules/cpu/render/scivis/surfaces.ispc | 24 ++---- modules/cpu/render/scivis/volumes.ispc | 21 ++--- modules/cpu/render/shaders/Flakes.ih | 3 +- modules/cpu/texture/Texture2D.cpp | 1 - modules/cpu/texture/Texture2D.ispc | 2 +- modules/cpu/texture/TextureVolume.ispc | 2 +- modules/cpu/volume/VolumetricModel.ih | 2 +- 93 files changed, 281 insertions(+), 440 deletions(-) delete mode 100644 modules/cpu/common/OSPCommon.ispc diff --git a/cmake/compiler/clang.cmake b/cmake/compiler/clang.cmake index ef378b6e3..246f98610 100644 --- a/cmake/compiler/clang.cmake +++ b/cmake/compiler/clang.cmake @@ -15,6 +15,8 @@ if(OSPRAY_STRICT_BUILD) set(OSPRAY_CXX_FLAGS "-Wno-header-hygiene ${OSPRAY_CXX_FLAGS}") set(OSPRAY_CXX_FLAGS "-Wno-covered-switch-default ${OSPRAY_CXX_FLAGS}") set(OSPRAY_CXX_FLAGS "-Wno-date-time ${OSPRAY_CXX_FLAGS}") + set(OSPRAY_CXX_FLAGS "-Wno-unsafe-buffer-usage ${OSPRAY_CXX_FLAGS}") #we use pointer arithmetics on buffers + set(OSPRAY_CXX_FLAGS "-Wno-reserved-identifier ${OSPRAY_CXX_FLAGS}") #used in autogenerated ISPC headers # Should try to fix and remove... set(OSPRAY_CXX_FLAGS "-Wno-unknown-warning-option ${OSPRAY_CXX_FLAGS}") #don't warn if pragmas are unknown @@ -46,6 +48,7 @@ if(OSPRAY_STRICT_BUILD) set(OSPRAY_CXX_FLAGS "-Wno-extra-semi-stmt ${OSPRAY_CXX_FLAGS}") set(OSPRAY_CXX_FLAGS "-Wno-shadow-field ${OSPRAY_CXX_FLAGS}") set(OSPRAY_CXX_FLAGS "-Wno-alloca ${OSPRAY_CXX_FLAGS}") + set(OSPRAY_CXX_FLAGS "-Wno-cast-function-type-strict ${OSPRAY_CXX_FLAGS}") #ISPC exported functions casts # Options selected for Clang 5.0+ set(OSPRAY_CXX_FLAGS "-Weverything ${OSPRAY_CXX_FLAGS}") diff --git a/cmake/compiler/dpcpp.cmake b/cmake/compiler/dpcpp.cmake index 0500f8294..3b894aae0 100644 --- a/cmake/compiler/dpcpp.cmake +++ b/cmake/compiler/dpcpp.cmake @@ -26,7 +26,6 @@ list(APPEND OSPRAY_CXX_FLAGS_SYCL -fsycl -fsycl-unnamed-lambda -Xclang=-fsycl-allow-func-ptr - -Wdouble-promotion ) # FIXME: debug information generation takes forever in SYCL diff --git a/modules/cpu/CMakeLists.txt b/modules/cpu/CMakeLists.txt index d18cd843f..fc04a8e0b 100644 --- a/modules/cpu/CMakeLists.txt +++ b/modules/cpu/CMakeLists.txt @@ -133,7 +133,6 @@ set(OSPRAY_CPP_SOURCES set(OSPRAY_ISPC_SOURCES ISPCDevice.ispc - common/OSPCommon.ispc common/World.ispc fb/FrameBuffer.ispc @@ -261,7 +260,6 @@ if (WIN32) string(REPLACE "-i32x16" "" isa ${isa}) # strip avx512skx-i32x16 if (NUM_TARGETS EQUAL 1) set(isa "") # for single target no suffix - string(REPLACE "delete_uniform_," "delete_uniform," OSPRAY_ISPC_SYMBOLS_IN ${OSPRAY_ISPC_SYMBOLS_IN}) endif() # add isa suffix string(REPLACE "," "${isa}" OSPRAY_ISPC_SYMBOLS ${OSPRAY_ISPC_SYMBOLS_IN}) @@ -461,6 +459,11 @@ if (OSPRAY_MODULE_GPU) -x c++ ) + # catch implicit float to double promotions in kernels only + target_compile_options(ospray_module_gpu_kernels PRIVATE + -Wdouble-promotion + ) + target_compile_options(ospray_module_gpu_kernels PUBLIC -fsycl ${OSPRAY_CXX_FLAGS_SYCL} diff --git a/modules/cpu/common/Clipping.ih b/modules/cpu/common/Clipping.ih index 28e17b6be..e71c4b09b 100644 --- a/modules/cpu/common/Clipping.ih +++ b/modules/cpu/common/Clipping.ih @@ -40,9 +40,9 @@ inline bool floatUlpCompare(float a, float b, uniform unsigned int ulpMaxDiff) // Integer absolute value, equivalent to: // if (ulpDiff < 0) ulpDiff = -ulpDiff; int mask = (ulpDiff >> 31); - ulpDiff = (ulpDiff ^ mask) - mask; + unsigned int ulpDiffUnsigned = (ulpDiff ^ mask) - mask; - if (ulpDiff <= ulpMaxDiff) + if (ulpDiffUnsigned <= ulpMaxDiff) return true; return false; diff --git a/modules/cpu/common/Instance.ih b/modules/cpu/common/Instance.ih index c60f1a0e2..5d1497a38 100644 --- a/modules/cpu/common/Instance.ih +++ b/modules/cpu/common/Instance.ih @@ -23,6 +23,7 @@ inline uniform AffineSpace3f Instance_getTransform( // TODO: rtcGetGeometryTransform is not callable on the device yet return getInterpolatedTransform(self->geom, time); #else + (void)time; return self->xfm; #endif } diff --git a/modules/cpu/common/Managed.cpp b/modules/cpu/common/Managed.cpp index 4097bb475..f217e5ca8 100644 --- a/modules/cpu/common/Managed.cpp +++ b/modules/cpu/common/Managed.cpp @@ -3,9 +3,6 @@ #include "Managed.h" #include "Data.h" -#ifndef OSPRAY_TARGET_SYCL -#include "common/OSPCommon_ispc.h" -#endif namespace ospray { diff --git a/modules/cpu/common/MotionTransform.ih b/modules/cpu/common/MotionTransform.ih index 97e3896ab..fc9a8b0a9 100644 --- a/modules/cpu/common/MotionTransform.ih +++ b/modules/cpu/common/MotionTransform.ih @@ -15,6 +15,10 @@ inline uniform AffineSpace3f getInterpolatedTransform( #ifndef OSPRAY_TARGET_SYCL // TODO: rtcGetGeometryTransform is not callable on the device yet rtcGetGeometryTransform(geom, time, RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR, &xfm); +#else + // Silence unused parameter warning + (void)geom; + (void)time; #endif return xfm; } diff --git a/modules/cpu/common/OSPCommon.ih b/modules/cpu/common/OSPCommon.ih index 1f4e67319..f2053abe1 100644 --- a/modules/cpu/common/OSPCommon.ih +++ b/modules/cpu/common/OSPCommon.ih @@ -98,13 +98,6 @@ void PRINT3(const rkcommon::math::vec3f &x) #define PRINT_STR(s) print(s); #endif -OSPRAY_BEGIN_ISPC_NAMESPACE - -/*! a C++-callable 'delete' of ISPC-side allocated memory of uniform objects */ -export void delete_uniform(void *uniform uptr); - -OSPRAY_END_ISPC_NAMESPACE - #ifndef OSPRAY_TARGET_SYCL /*! 64-bit malloc. allows for alloc'ing memory larger than 64 bits */ extern "C" void *uniform malloc64(uniform uint64 size); diff --git a/modules/cpu/common/OSPCommon.ispc b/modules/cpu/common/OSPCommon.ispc deleted file mode 100644 index 7bdaa1ff1..000000000 --- a/modules/cpu/common/OSPCommon.ispc +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2009 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "OSPCommon.ih" - -OSPRAY_BEGIN_ISPC_NAMESPACE - -export void delete_uniform(void *uniform uptr) -{ - delete uptr; -} - -OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/common/World.ih b/modules/cpu/common/World.ih index d86c68ab9..f769ff7f4 100644 --- a/modules/cpu/common/World.ih +++ b/modules/cpu/common/World.ih @@ -116,7 +116,7 @@ inline void traceVolumeRay( &intersectArgs); if (intervals.numVolumeIntervals > 0) { - for (uniform int i = 0; i < intervals.numVolumeIntervals; ++i) { + for (uniform uint32 i = 0; i < intervals.numVolumeIntervals; ++i) { Instance *instance = *(world->instances + intervals.intervals[i].instID); intervals.intervals[i].instance = instance; intervals.intervals[i].volumetricModel = @@ -145,6 +145,7 @@ inline void traceClippingRay(const World *uniform world, { // Clipping disabled on GPU for now #ifdef OSPRAY_TARGET_SYCL + (void)world; rayIntervals.intervals[0] = make_box1f(ray.t0, ray.t); rayIntervals.count = 1; return; diff --git a/modules/cpu/fb/FrameBuffer.ispc b/modules/cpu/fb/FrameBuffer.ispc index 2404d5b48..f1e7dc9f0 100644 --- a/modules/cpu/fb/FrameBuffer.ispc +++ b/modules/cpu/fb/FrameBuffer.ispc @@ -18,6 +18,12 @@ SYCL_EXTERNAL vec4f FrameBuffer_runPixelOps(FrameBuffer *uniform self, outColor = self->pixelOps[i]->processPixel( self->pixelOps[i], outColor, depth, normal, albedo); } +#else + // Silence unused parameter warning + (void)self; + (void)depth; + (void)normal; + (void)albedo; #endif return outColor; } diff --git a/modules/cpu/fb/LocalFB.ispc b/modules/cpu/fb/LocalFB.ispc index eda2f24e0..9d437d98b 100644 --- a/modules/cpu/fb/LocalFB.ispc +++ b/modules/cpu/fb/LocalFB.ispc @@ -28,12 +28,12 @@ OSPRAY_BEGIN_ISPC_NAMESPACE color += (uniform uint64)tile->region.lower.y * fb->super.size.x; \ VaryingTile *uniform varyTile = (VaryingTile * uniform) tile; \ for (uniform uint32 iiy = tile->region.lower.y; \ - iiy < tile->region.upper.y; \ + iiy < (uniform uint32)tile->region.upper.y; \ iiy++) { \ uniform uint32 chunkID = \ (iiy - tile->region.lower.y) * (TILE_SIZE / programCount); \ for (uint32 iix = tile->region.lower.x + programIndex; \ - iix < tile->region.upper.x; \ + iix < (uniform uint32)tile->region.upper.x; \ iix += programCount, chunkID++) { \ type cvtCol; \ unmasked \ @@ -301,13 +301,14 @@ export void LocalFrameBuffer_writeDepthTile( uniform float *uniform depth = (uniform float *uniform)fb->depthBuffer; depth += (uniform uint64)tile->region.lower.y * fb->super.size.x; - for (uniform uint32 iiy = tile->region.lower.y; iiy < tile->region.upper.y; + for (uniform uint32 iiy = tile->region.lower.y; + iiy < (uniform uint32)tile->region.upper.y; iiy++) { uniform uint32 chunkID = (iiy - tile->region.lower.y) * (TILE_SIZE / programCount); for (uint32 iix = tile->region.lower.x + programIndex; - iix < tile->region.upper.x; + iix < (uniform uint32)tile->region.upper.x; iix += programCount, chunkID++) { depth[iix] = varyTile->z[chunkID]; } @@ -323,20 +324,20 @@ export void LocalFrameBuffer_writeIDTile(void *uniform _fb, { uniform LocalFB *uniform fb = (uniform LocalFB * uniform) _fb; const Tile *uniform tile = (const Tile *uniform)_tile; - VaryingTile *uniform varyTile = (VaryingTile * uniform) tile; uniform uint32 *uniform buffer = dst; //(uniform uint32 *uniform)fb->primitiveIDBuffer; buffer += (uniform uint64)tile->region.lower.y * fb->super.size.x; const void *uniform upid = src; //(const void* uniform)tile->pid; const varying int32 *uniform pid = (const varying int32 *uniform)upid; - for (uniform uint32 iiy = tile->region.lower.y; iiy < tile->region.upper.y; + for (uniform uint32 iiy = tile->region.lower.y; + iiy < (uniform uint32)tile->region.upper.y; iiy++) { uniform uint32 chunkID = (iiy - tile->region.lower.y) * (TILE_SIZE / programCount); for (uint32 iix = tile->region.lower.x + programIndex; - iix < tile->region.upper.x; + iix < (uniform uint32)tile->region.upper.x; iix += programCount, chunkID++) { if (tile->accumID == 0) { varying uint32 val = -1; @@ -366,17 +367,16 @@ export void LocalFrameBuffer_writeAuxTile(void *uniform _fb, const varying float *uniform ay = (const varying float *uniform)_ay; const varying float *uniform az = (const varying float *uniform)_az; - const uniform float accumID = tile->accumID; - const uniform float accScale = rcpf(tile->accumID + 1); aux += (uniform uint64)tile->region.lower.y * fb->super.size.x; - for (uniform uint32 iiy = tile->region.lower.y; iiy < tile->region.upper.y; + for (uniform uint32 iiy = tile->region.lower.y; + iiy < (uniform uint32)tile->region.upper.y; iiy++) { uniform uint32 chunkID = (iiy - tile->region.lower.y) * (TILE_SIZE / programCount); for (uint32 iix = tile->region.lower.x + programIndex; - iix < tile->region.upper.x; + iix < (uint32)tile->region.upper.x; iix += programCount, chunkID++) { aux[iix] = make_vec3f(ax[chunkID], ay[chunkID], az[chunkID]); } diff --git a/modules/cpu/fb/frame_ops/SSAO.h b/modules/cpu/fb/frame_ops/SSAO.h index 13d1cb6f0..f30d39404 100644 --- a/modules/cpu/fb/frame_ops/SSAO.h +++ b/modules/cpu/fb/frame_ops/SSAO.h @@ -24,7 +24,7 @@ void LiveSSAOFrameOp_getOcclusion(const void *_self, float *occlusionBuffer, const float radius, const float checkRadius, - int kernelSize, + unsigned int kernelSize, int programID); void LiveSSAOFrameOp_applyOcclusion( void *_self, void *_fb, void *_color, float *occlusionBuffer); diff --git a/modules/cpu/fb/frame_ops/SSAO.ispc b/modules/cpu/fb/frame_ops/SSAO.ispc index f3295656b..929c6e6ff 100644 --- a/modules/cpu/fb/frame_ops/SSAO.ispc +++ b/modules/cpu/fb/frame_ops/SSAO.ispc @@ -62,7 +62,7 @@ export void LiveSSAOFrameOp_getOcclusion(const void *uniform _self, float *uniform occlusionBuffer, const uniform float radius, const uniform float checkRadius, - int uniform kernelSize, + unsigned int uniform kernelSize, int uniform programID) { const LiveSSAOFrameOp *uniform self = (const LiveSSAOFrameOp *uniform)_self; @@ -128,15 +128,17 @@ export void LiveSSAOFrameOp_getOcclusion(const void *uniform _self, } } -export void LiveSSAOFrameOp_applyOcclusion(void *uniform _self, +export void LiveSSAOFrameOp_applyOcclusion(void *uniform, void *uniform _fb, void *uniform _color, float *uniform occlusionBuffer) { FrameBufferView *uniform fb = (FrameBufferView * uniform) _fb; - for (uint32 pixelID = 0; pixelID < fb->fbDims.x * fb->fbDims.y; pixelID++) { + for (uniform uint32 pixelID = 0; + pixelID < (uniform uint32)(fb->fbDims.x * fb->fbDims.y); + pixelID++) { if (occlusionBuffer[pixelID] >= 0) { - for (uint32 c = 0; c < 3; ++c) { + for (uniform uint32 c = 0; c < 3; ++c) { if ((fb->colorBufferFormat == OSP_FB_RGBA8) || (fb->colorBufferFormat == OSP_FB_SRGBA)) { uint8 *uniform color = (uint8 * uniform) _color; diff --git a/modules/cpu/geometry/Boxes.ispc b/modules/cpu/geometry/Boxes.ispc index 6cd7a5c76..d5e9dd05a 100644 --- a/modules/cpu/geometry/Boxes.ispc +++ b/modules/cpu/geometry/Boxes.ispc @@ -62,10 +62,10 @@ export void Boxes_occluded( Boxes_intersect_kernel((RTCIntersectFunctionNArguments * uniform) args, true); } -SYCL_EXTERNAL void Boxes_postIntersect(const Geometry *uniform geometry, +SYCL_EXTERNAL void Boxes_postIntersect(const Geometry *uniform, varying DifferentialGeometry &dg, const varying Ray &ray, - uniform int64 flags) + uniform int64) { dg.Ng = dg.Ns = ray.Ng; } diff --git a/modules/cpu/geometry/Isosurfaces.ispc b/modules/cpu/geometry/Isosurfaces.ispc index 49e0a1777..beb456671 100644 --- a/modules/cpu/geometry/Isosurfaces.ispc +++ b/modules/cpu/geometry/Isosurfaces.ispc @@ -7,9 +7,9 @@ #include "common/Ray.ih" #include "common/World.ih" #include "geometry/Geometry.ih" -#include "volume/Volume.ih" #include "rkcommon/math/box.ih" #include "rkcommon/math/vec.ih" +#include "volume/Volume.ih" #include "volume/VolumetricModel.ih" #include "volume/transferFunction/TransferFunctionDispatch.ih" // c++ shared @@ -40,7 +40,6 @@ export void Isosurfaces_bounds(const RTCBoundsFunctionArguments *uniform args) { uniform Isosurfaces *uniform self = (uniform Isosurfaces * uniform) args->geometryUserPtr; - uniform int primID = args->primID; box3fa *uniform out = (box3fa * uniform) args->bounds_o; Volume *uniform volume = Isosurfaces_getVolume(self); @@ -81,7 +80,6 @@ SYCL_EXTERNAL void unmasked Isosurfaces_intersect_kernel( args->valid[programIndex] = 0; Isosurfaces *uniform self = (Isosurfaces * uniform) args->geometryUserPtr; - uniform unsigned int primID = args->primID; // this assumes that the args->rayhit is actually a pointer to a varying ray! varying Ray *uniform ray = (varying Ray * uniform) args->rayhit; diff --git a/modules/cpu/geometry/Planes.ispc b/modules/cpu/geometry/Planes.ispc index 918869c64..b150b1760 100644 --- a/modules/cpu/geometry/Planes.ispc +++ b/modules/cpu/geometry/Planes.ispc @@ -121,10 +121,10 @@ export void Planes_occluded( (RTCIntersectFunctionNArguments * uniform) args, true); } -SYCL_EXTERNAL void Planes_postIntersect(const Geometry *uniform geometry, +SYCL_EXTERNAL void Planes_postIntersect(const Geometry *uniform, varying DifferentialGeometry &dg, const varying Ray &ray, - uniform int64 flags) + uniform int64) { dg.Ng = dg.Ns = ray.Ng; } diff --git a/modules/cpu/geometry/Spheres.ispc b/modules/cpu/geometry/Spheres.ispc index 75dcf8191..80249b140 100644 --- a/modules/cpu/geometry/Spheres.ispc +++ b/modules/cpu/geometry/Spheres.ispc @@ -64,7 +64,7 @@ SYCL_EXTERNAL SampleAreaRes Spheres_sampleArea( } void Spheres_getAreas(const Geometry *const uniform _self, - const int32 *const uniform primIDs, + const int32 *const uniform, const uniform int32 numPrims, const uniform affine3f &xfm, float *const uniform areas) @@ -74,9 +74,9 @@ void Spheres_getAreas(const Geometry *const uniform _self, // detect (non-uniform) scaling; get length of transformed unit-vectors const uniform vec3f scaling3 = make_vec3f(length(xfm.l.vx), length(xfm.l.vy), length(xfm.l.vz)); +#if 0 const uniform float min_scaling = reduce_min(scaling3); const uniform float max_scaling = reduce_max(scaling3); -#if 0 if ((max_scaling - min_scaling) > 1e-4f * min_scaling) { postStatusMsg(ISPC_MSG_SPHERES, OSP_LOG_WARNING); } diff --git a/modules/cpu/ispc_symbols.txt b/modules/cpu/ispc_symbols.txt index 65bf818a4..a6a36da09 100644 --- a/modules/cpu/ispc_symbols.txt +++ b/modules/cpu/ispc_symbols.txt @@ -5,8 +5,6 @@ Light_eval___un_3C_s_5B__c_unLight_5D__3E_REFs_5B__c_vyDifferentialGeometry_5D_R Renderer_getBackground___un_3C_s_5B__c_unRenderer_5D__3E_REFs_5B__c_vyvec2f_5D_Cunenum_5B_FeatureFlagsOther_5D__, Renderer_getMaxDepth___un_3C_s_5B__c_unRenderer_5D__3E_REFs_5B__c_vyvec2f_5D_Cunenum_5B_FeatureFlagsOther_5D__, clippingIntersectionFilterV___UM_un_3C_s_5B__c_unRTCFilterFunctionNArguments_5D__3E__, -delete_uniform___un_3C_unv_3E__, -delete_uniform_, get_zorder____, Geometry_dispatch_intersect___UM_un_3C_s_5B_unRTCIntersectFunctionNArguments_5D__3E__, Geometry_dispatch_occluded___UM_un_3C_s_5B_unRTCOccludedFunctionNArguments_5D__3E__, diff --git a/modules/cpu/lights/CylinderLight.ispc b/modules/cpu/lights/CylinderLight.ispc index c51c01381..6323a3979 100644 --- a/modules/cpu/lights/CylinderLight.ispc +++ b/modules/cpu/lights/CylinderLight.ispc @@ -168,13 +168,11 @@ inline Light_SampleRes Sample(const CylinderLight *uniform self, return res; } - - SYCL_EXTERNAL Light_SampleRes CylinderLight_sample( const uniform Light *uniform super, const DifferentialGeometry &dg, const vec2f &s, - const float time) + const float) { const CylinderLight *uniform self = (CylinderLight * uniform) super; assert(self); @@ -235,14 +233,13 @@ inline Light_EvalRes Eval(const CylinderLight *uniform self, return res; } - SYCL_EXTERNAL Light_EvalRes CylinderLight_eval( const uniform Light *uniform super, const DifferentialGeometry &dg, const vec3f &dir, const float minDist, const float maxDist, - const float time) + const float) { CylinderLight *uniform self = (CylinderLight * uniform) super; assert(self); diff --git a/modules/cpu/lights/DirectionalLight.ispc b/modules/cpu/lights/DirectionalLight.ispc index 09434652e..a6bf19432 100644 --- a/modules/cpu/lights/DirectionalLight.ispc +++ b/modules/cpu/lights/DirectionalLight.ispc @@ -29,12 +29,11 @@ inline Light_SampleRes Sample(const DirectionalLight *uniform self, return res; } - SYCL_EXTERNAL Light_SampleRes DirectionalLight_sample( const Light *uniform super, - const DifferentialGeometry &dg, + const DifferentialGeometry &, const vec2f &s, - const float time) + const float) { const DirectionalLight *uniform self = (DirectionalLight * uniform) super; assert(self); @@ -80,13 +79,12 @@ inline Light_EvalRes Eval(const DirectionalLight *uniform self, return res; } - SYCL_EXTERNAL Light_EvalRes DirectionalLight_eval(const Light *uniform super, - const DifferentialGeometry &dg, + const DifferentialGeometry &, const vec3f &dir, - const float minDist, + const float, const float maxDist, - const float time) + const float) { const DirectionalLight *uniform self = (DirectionalLight * uniform) super; assert(self); diff --git a/modules/cpu/lights/HDRILight.ispc b/modules/cpu/lights/HDRILight.ispc index 639c309da..ac2151cb3 100644 --- a/modules/cpu/lights/HDRILight.ispc +++ b/modules/cpu/lights/HDRILight.ispc @@ -60,9 +60,9 @@ inline Light_SampleRes Sample(const HDRILight *uniform self, } SYCL_EXTERNAL Light_SampleRes HDRILight_sample(const Light *uniform super, - const DifferentialGeometry &dg, + const DifferentialGeometry &, const vec2f &s, - const float time) + const float) { const HDRILight *uniform self = (HDRILight * uniform) super; assert(self); @@ -125,11 +125,11 @@ inline Light_EvalRes Eval(const HDRILight *uniform self, } SYCL_EXTERNAL Light_EvalRes HDRILight_eval(const Light *uniform super, - const DifferentialGeometry &dg, + const DifferentialGeometry &, const vec3f &dir, - const float minDist, + const float, const float maxDist, - const float time) + const float) { const HDRILight *uniform self = (HDRILight * uniform) super; assert(self); diff --git a/modules/cpu/lights/PointLight.ispc b/modules/cpu/lights/PointLight.ispc index 82de68cca..ef0363614 100644 --- a/modules/cpu/lights/PointLight.ispc +++ b/modules/cpu/lights/PointLight.ispc @@ -92,7 +92,7 @@ inline Light_SampleRes Sample(const PointLight *uniform self, SYCL_EXTERNAL Light_SampleRes PointLight_sample(const Light *uniform super, const DifferentialGeometry &dg, const vec2f &s, - const float time) + const float) { const PointLight *uniform self = (PointLight * uniform) super; assert(self); @@ -161,7 +161,7 @@ SYCL_EXTERNAL Light_EvalRes PointLight_eval(const Light *uniform super, const vec3f &dir, const float minDist, const float maxDist, - const float time) + const float) { const PointLight *uniform self = (PointLight * uniform) super; assert(self); diff --git a/modules/cpu/lights/QuadLight.ispc b/modules/cpu/lights/QuadLight.ispc index 064521d82..256a31635 100644 --- a/modules/cpu/lights/QuadLight.ispc +++ b/modules/cpu/lights/QuadLight.ispc @@ -135,7 +135,7 @@ inline Light_SampleRes Sample(const QuadLight *uniform self, SYCL_EXTERNAL Light_SampleRes QuadLight_sample(const Light *uniform super, const DifferentialGeometry &dg, const vec2f &sp, - const float time) + const float) { const QuadLight *uniform self = (QuadLight * uniform) super; assert(self); @@ -219,7 +219,7 @@ SYCL_EXTERNAL Light_EvalRes QuadLight_eval(const Light *uniform super, const vec3f &dir, const float minDist, const float maxDist, - const float time) + const float) { const QuadLight *uniform self = (QuadLight * uniform) super; assert(self); diff --git a/modules/cpu/lights/SpotLight.ispc b/modules/cpu/lights/SpotLight.ispc index 9cceda28e..38b771853 100644 --- a/modules/cpu/lights/SpotLight.ispc +++ b/modules/cpu/lights/SpotLight.ispc @@ -97,7 +97,7 @@ inline Light_SampleRes Sample(const SpotLight *uniform self, SYCL_EXTERNAL Light_SampleRes SpotLight_sample(const Light *uniform super, const DifferentialGeometry &dg, const vec2f &s, - const float time) + const float) { const SpotLight *uniform self = (SpotLight * uniform) super; assert(self); @@ -175,7 +175,7 @@ SYCL_EXTERNAL Light_EvalRes SpotLight_eval(const Light *uniform super, const vec3f &dir, const float minDist, const float maxDist, - const float time) + const float) { const SpotLight *uniform self = (SpotLight * uniform) super; assert(self); diff --git a/modules/cpu/pf/PixelFilter.ih b/modules/cpu/pf/PixelFilter.ih index 1f9699220..891298b9a 100644 --- a/modules/cpu/pf/PixelFilter.ih +++ b/modules/cpu/pf/PixelFilter.ih @@ -17,7 +17,7 @@ inline void PixelFilter_Constructor(PixelFilter *uniform self, self->sample = sample; } -inline vec2f Point_sample(const PixelFilter *uniform self, const vec2f &s) +inline vec2f Point_sample(const PixelFilter *uniform, const vec2f &) { return make_vec2f(0.0f); } diff --git a/modules/cpu/render/Material.ispc b/modules/cpu/render/Material.ispc index d49b3839b..31815cf51 100644 --- a/modules/cpu/render/Material.ispc +++ b/modules/cpu/render/Material.ispc @@ -5,19 +5,16 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -SYCL_EXTERNAL vec3f Material_getTransparency( - const uniform Material *uniform self, - const DifferentialGeometry &dg, - const Ray &ray, - const Medium ¤tMedium) +SYCL_EXTERNAL vec3f Material_getTransparency(const uniform Material *uniform, + const DifferentialGeometry &, + const Ray &, + const Medium &) { return make_vec3f(0.0f); } SYCL_EXTERNAL void Material_selectNextMedium( - const uniform Material *uniform self, - const DifferentialGeometry &dg, - Medium ¤tMedium) + const uniform Material *uniform, const DifferentialGeometry &, Medium &) { /* do nothing by default */ } diff --git a/modules/cpu/render/Renderer.cpp b/modules/cpu/render/Renderer.cpp index 175765c65..1c1567c6f 100644 --- a/modules/cpu/render/Renderer.cpp +++ b/modules/cpu/render/Renderer.cpp @@ -40,7 +40,7 @@ std::string Renderer::toString() const void Renderer::commit() { spp = std::max(1, getParam("pixelSamples", 1)); - const int32 maxDepth = std::max(0, getParam("maxPathLength", 20)); + const uint32_t maxDepth = std::max(0, getParam("maxPathLength", 20)); const float minContribution = getParam("minContribution", 0.001f); errorThreshold = getParam("varianceThreshold", 0.f); @@ -146,6 +146,12 @@ OSPPickResult Renderer::pick( res.model = (OSPGeometricModel)model; res.primID = static_cast(primID); } +#else + // Silence unused parameter warning + (void)fb; + (void)camera; + (void)world; + (void)screenPos; #endif return res; diff --git a/modules/cpu/render/Renderer.ispc b/modules/cpu/render/Renderer.ispc index 24340d55b..5a821dccf 100644 --- a/modules/cpu/render/Renderer.ispc +++ b/modules/cpu/render/Renderer.ispc @@ -45,7 +45,7 @@ SYCL_EXTERNAL float Renderer_getMaxDepth(const Renderer *uniform self, // Exports (called from C++) ////////////////////////////////////////////////// export void Renderer_pick(const void *uniform _self, - const void *uniform _fb, + const void *uniform, const void *uniform _camera, const void *uniform _world, const uniform vec2f &screenPos, @@ -56,7 +56,6 @@ export void Renderer_pick(const void *uniform _self, uniform int32 &hit) { const Renderer *uniform self = (const Renderer *uniform)_self; - const FrameBuffer *uniform fb = (const FrameBuffer *uniform)_fb; const Camera *uniform camera = (const Camera *uniform)_camera; const World *uniform world = (const World *uniform)_world; diff --git a/modules/cpu/render/RendererRenderTaskFn.inl b/modules/cpu/render/RendererRenderTaskFn.inl index 5a0ac6781..379974b30 100644 --- a/modules/cpu/render/RendererRenderTaskFn.inl +++ b/modules/cpu/render/RendererRenderTaskFn.inl @@ -13,7 +13,6 @@ task FrameBuffer *uniform fb, Camera *uniform camera, World *uniform world, - void *uniform perFrameData, const uint32 *uniform taskIDs, #ifdef OSPRAY_TARGET_SYCL const int taskIndex0, @@ -109,7 +108,7 @@ task #endif #endif - renderSampleFn(self, fb, world, perFrameData, screenSample, ff); + renderSampleFn(self, fb, world, screenSample, ff); col = col + screenSample.rgb; alpha += screenSample.alpha; diff --git a/modules/cpu/render/RendererShared.h b/modules/cpu/render/RendererShared.h index 5a68a1397..91df2e716 100644 --- a/modules/cpu/render/RendererShared.h +++ b/modules/cpu/render/RendererShared.h @@ -19,7 +19,7 @@ struct Renderer Texture2D *backplate; Texture2D *maxDepthTexture; // optional maximum depth texture used for early // ray termination - int maxDepth; + uint32 maxDepth; float minContribution; int32 numMaterials; diff --git a/modules/cpu/render/ao/AORenderer.cpp b/modules/cpu/render/ao/AORenderer.cpp index 1b9e498f9..30587eefc 100644 --- a/modules/cpu/render/ao/AORenderer.cpp +++ b/modules/cpu/render/ao/AORenderer.cpp @@ -39,7 +39,7 @@ void AORenderer::commit() void AORenderer::renderTasks(FrameBuffer *fb, Camera *camera, World *world, - void *perFrameData, + void *, const utility::ArrayView &taskIDs #ifdef OSPRAY_TARGET_SYCL , @@ -72,7 +72,6 @@ void AORenderer::renderTasks(FrameBuffer *fb, fbSh, cameraSh, worldSh, - perFrameData, taskIDsPtr, taskIndex.get_global_id(0), ff); @@ -83,13 +82,8 @@ void AORenderer::renderTasks(FrameBuffer *fb, // For prints we have to flush the entire queue, because other stuff is queued syclQueue.wait_and_throw(); #else - ispc::AORenderer_renderTasks(&rendererSh->super, - fbSh, - cameraSh, - worldSh, - perFrameData, - taskIDs.data(), - numTasks); + ispc::AORenderer_renderTasks( + &rendererSh->super, fbSh, cameraSh, worldSh, taskIDs.data(), numTasks); #endif } diff --git a/modules/cpu/render/ao/AORenderer.ih b/modules/cpu/render/ao/AORenderer.ih index 211ec9f8a..4ad347505 100644 --- a/modules/cpu/render/ao/AORenderer.ih +++ b/modules/cpu/render/ao/AORenderer.ih @@ -8,7 +8,6 @@ SYCL_EXTERNAL void AORenderer_renderTask(Renderer *uniform self, FrameBuffer *uniform fb, Camera *uniform camera, World *uniform world, - void *uniform perFrameData, const uint32 *uniform taskIDs, const int taskIndex0, const uniform ospray::FeatureFlags &ff); diff --git a/modules/cpu/render/ao/AORenderer.ispc b/modules/cpu/render/ao/AORenderer.ispc index 2bb721b17..5fc6619e5 100644 --- a/modules/cpu/render/ao/AORenderer.ispc +++ b/modules/cpu/render/ao/AORenderer.ispc @@ -21,7 +21,6 @@ OSPRAY_BEGIN_ISPC_NAMESPACE static void AORenderer_renderSample(Renderer *uniform _self, FrameBuffer *uniform fb, World *uniform world, - void *uniform perFrameData, varying ScreenSample &sample, const uniform FeatureFlags &ff) { @@ -106,8 +105,7 @@ static void AORenderer_renderSample(Renderer *uniform _self, // Shade geometry SSI surfaceShading; - surfaceShading = - AORenderer_computeShading(self, fb, world, dg, sample, ff); + surfaceShading = AORenderer_computeShading(self, world, dg, sample, ff); // Use shaded color for blending blendedColor = surfaceShading.shadedColor; @@ -159,20 +157,17 @@ SYCL_EXTERNAL void AORenderer_renderTask(Renderer *uniform self, FrameBuffer *uniform fb, Camera *uniform camera, World *uniform world, - void *uniform perFrameData, const uint32 *uniform taskIDs, const int taskIndex0, const uniform FeatureFlags &ff) { - Renderer_default_renderTask( - self, fb, camera, world, perFrameData, taskIDs, taskIndex0, ff); + Renderer_default_renderTask(self, fb, camera, world, taskIDs, taskIndex0, ff); } #else export void AORenderer_renderTasks(void *uniform _self, void *uniform _fb, void *uniform _camera, void *uniform _world, - void *uniform perFrameData, void *uniform _taskIDs, uniform uint32 numTasks) { @@ -182,7 +177,7 @@ export void AORenderer_renderTasks(void *uniform _self, World *uniform world = (World * uniform) _world; const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; launch[numTasks] Renderer_default_renderTask( - self, fb, camera, world, perFrameData, taskIDs, ffAll()); + self, fb, camera, world, taskIDs, ffAll()); sync; } #endif diff --git a/modules/cpu/render/ao/surfaces.ih b/modules/cpu/render/ao/surfaces.ih index 457db939a..617b87e40 100644 --- a/modules/cpu/render/ao/surfaces.ih +++ b/modules/cpu/render/ao/surfaces.ih @@ -36,7 +36,6 @@ inline void computeDG(const World *uniform world, } SYCL_EXTERNAL SSI AORenderer_computeShading(const AORenderer *uniform self, - const FrameBuffer *uniform fb, const World *uniform world, const DifferentialGeometry &dg, ScreenSample &sample, diff --git a/modules/cpu/render/ao/surfaces.ispc b/modules/cpu/render/ao/surfaces.ispc index ffc9c4fa1..a6f5ebe56 100644 --- a/modules/cpu/render/ao/surfaces.ispc +++ b/modules/cpu/render/ao/surfaces.ispc @@ -45,7 +45,6 @@ inline vec4f getSurfaceColor( } SYCL_EXTERNAL SSI AORenderer_computeShading(const AORenderer *uniform self, - const FrameBuffer *uniform fb, const World *uniform world, const DifferentialGeometry &dg, ScreenSample &sample, diff --git a/modules/cpu/render/ao/volumes.ispc b/modules/cpu/render/ao/volumes.ispc index 4bd984430..960960f72 100644 --- a/modules/cpu/render/ao/volumes.ispc +++ b/modules/cpu/render/ao/volumes.ispc @@ -4,8 +4,8 @@ #include "math/random.ih" #include "math/sampling.ih" -#include "volume/Volume.ih" #include "render/util.ih" +#include "volume/Volume.ih" #include "volume/VolumetricModel.ih" #include "volume/transferFunction/TransferFunctionDispatch.ih" #include "volumes.ih" @@ -32,7 +32,7 @@ static void sampleVolume(VolumeContext &vc, const uniform float samplingRate) { // We have to iterate till we get a valid sample value - float dt; + float dt = 0.f; float sampleVal = nan; while (isnan(sampleVal)) { // Iterate till sampling position is within interval @@ -101,7 +101,7 @@ static float sampleAllVolumes(const VolumeIntervals &volumeIntervals, // Look for the closest sample across all volumes float minDist = inf; int usedSampleId = -1; - for (uniform int i = 0; i < reduce_max(volumeIntervals.numVolumeIntervals); + for (uniform uint32 i = 0; i < reduce_max(volumeIntervals.numVolumeIntervals); i++) { if (i >= volumeIntervals.numVolumeIntervals) break; @@ -162,7 +162,7 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervals( // Iterate through all volumes and initialize its contexts with data that // do not change across ray intervals - for (uniform int i = 0; i < reduce_max(volumeIntervals.numVolumeIntervals); + for (uniform uint32 i = 0; i < reduce_max(volumeIntervals.numVolumeIntervals); i++) { if (i >= volumeIntervals.numVolumeIntervals) break; @@ -184,13 +184,15 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervals( float alpha = 0.f; // Iterate through all ray intervals - for (uniform int i = 0; i < reduce_max(rayIntervals.count) && (alpha < .99f); + for (uniform uint32 i = 0; + i < reduce_max(rayIntervals.count) && (alpha < .99f); i++) { if (i >= rayIntervals.count) break; // Iterate through all volumes - for (uniform int j = 0; j < reduce_max(volumeIntervals.numVolumeIntervals); + for (uniform uint32 j = 0; + j < reduce_max(volumeIntervals.numVolumeIntervals); j++) { if (j >= volumeIntervals.numVolumeIntervals) break; diff --git a/modules/cpu/render/bsdfs/Conductor.ih b/modules/cpu/render/bsdfs/Conductor.ih index 59c4a7340..c01dcac0e 100644 --- a/modules/cpu/render/bsdfs/Conductor.ih +++ b/modules/cpu/render/bsdfs/Conductor.ih @@ -15,19 +15,19 @@ struct Conductor Fresnel *uniform fresnel; }; -inline BSDF_EvalRes Conductor_eval(const varying BSDF *uniform super, - const vec3f &wo, - const vec3f &wi, - const uniform FeatureFlagsOther ffo) +inline BSDF_EvalRes Conductor_eval(const varying BSDF *uniform, + const vec3f &, + const vec3f &, + const uniform FeatureFlagsOther) { return make_BSDF_EvalRes_zero(); } inline BSDF_SampleRes Conductor_sample(const varying BSDF *uniform super, const vec3f &wo, - const vec2f &s, - float ss, - const uniform FeatureFlagsOther ffo) + const vec2f &, + float, + const uniform FeatureFlagsOther) { const varying Conductor *uniform self = (const varying Conductor *uniform)super; diff --git a/modules/cpu/render/bsdfs/Dielectric.ih b/modules/cpu/render/bsdfs/Dielectric.ih index 12b56368a..173add8bc 100644 --- a/modules/cpu/render/bsdfs/Dielectric.ih +++ b/modules/cpu/render/bsdfs/Dielectric.ih @@ -16,19 +16,19 @@ struct Dielectric float eta; // etaO / etaI }; -inline BSDF_EvalRes Dielectric_eval(const varying BSDF *uniform super, - const vec3f &wo, - const vec3f &wi, - const uniform FeatureFlagsOther ffo) +inline BSDF_EvalRes Dielectric_eval(const varying BSDF *uniform, + const vec3f &, + const vec3f &, + const uniform FeatureFlagsOther) { return make_BSDF_EvalRes_zero(); } inline BSDF_SampleRes Dielectric_sample(const varying BSDF *uniform super, const vec3f &wo, - const vec2f &s, + const vec2f &, float ss, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsOther) { const varying Dielectric *uniform self = (const varying Dielectric *uniform)super; diff --git a/modules/cpu/render/bsdfs/Fresnel.ih b/modules/cpu/render/bsdfs/Fresnel.ih index d48c91047..a66fd996d 100644 --- a/modules/cpu/render/bsdfs/Fresnel.ih +++ b/modules/cpu/render/bsdfs/Fresnel.ih @@ -126,7 +126,7 @@ struct Fresnel #endif }; -inline vec3f Fresnel_evalAvg(const Fresnel *uniform self) +inline vec3f Fresnel_evalAvg(const Fresnel *uniform) { return make_vec3f(0.f); } diff --git a/modules/cpu/render/bsdfs/Lambert.ih b/modules/cpu/render/bsdfs/Lambert.ih index 993596eee..57147a8e2 100644 --- a/modules/cpu/render/bsdfs/Lambert.ih +++ b/modules/cpu/render/bsdfs/Lambert.ih @@ -9,9 +9,9 @@ OSPRAY_BEGIN_ISPC_NAMESPACE inline BSDF_EvalRes Lambert_eval(const varying BSDF *uniform self, - const vec3f &wo, + const vec3f &, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsOther) { BSDF_EvalRes res; float cosThetaI = max(dot(wi, getN(self)), 0.f); @@ -21,10 +21,10 @@ inline BSDF_EvalRes Lambert_eval(const varying BSDF *uniform self, } inline BSDF_SampleRes Lambert_sample(const varying BSDF *uniform self, - const vec3f &wo, + const vec3f &, const vec2f &s, - float ss, - const uniform FeatureFlagsOther ffo) + float, + const uniform FeatureFlagsOther) { const vec3f localDir = cosineSampleHemisphere(s); BSDF_SampleRes res; diff --git a/modules/cpu/render/bsdfs/LambertTransmission.ih b/modules/cpu/render/bsdfs/LambertTransmission.ih index 8fb50bc84..503f82a72 100644 --- a/modules/cpu/render/bsdfs/LambertTransmission.ih +++ b/modules/cpu/render/bsdfs/LambertTransmission.ih @@ -8,9 +8,9 @@ OSPRAY_BEGIN_ISPC_NAMESPACE inline BSDF_EvalRes LambertTransmission_eval(const varying BSDF *uniform self, - const vec3f &wo, + const vec3f &, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsOther) { BSDF_EvalRes res; float cosThetaI = max(-dot(wi, getN(self)), 0.f); @@ -21,10 +21,10 @@ inline BSDF_EvalRes LambertTransmission_eval(const varying BSDF *uniform self, inline BSDF_SampleRes LambertTransmission_sample( const varying BSDF *uniform self, - const vec3f &wo, + const vec3f &, const vec2f &s, - float ss, - const uniform FeatureFlagsOther ffo) + float, + const uniform FeatureFlagsOther) { const vec3f localDir = cosineSampleHemisphere(s); BSDF_SampleRes res; diff --git a/modules/cpu/render/bsdfs/MicrofacetConductor.ih b/modules/cpu/render/bsdfs/MicrofacetConductor.ih index dd2a81c51..a0f31342d 100644 --- a/modules/cpu/render/bsdfs/MicrofacetConductor.ih +++ b/modules/cpu/render/bsdfs/MicrofacetConductor.ih @@ -31,7 +31,7 @@ struct MicrofacetConductor inline BSDF_EvalRes MicrofacetConductor_eval(const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsOther) { const varying MicrofacetConductor *uniform self = (const varying MicrofacetConductor *uniform)super; @@ -44,7 +44,6 @@ inline BSDF_EvalRes MicrofacetConductor_eval(const varying BSDF *uniform super, // Compute the microfacet normal vec3f wh = normalize(wi + wo); - float cosThetaH = dot(wh, getN(super)); float cosThetaOH = dot(wo, wh); float cosThetaIH = dot(wi, wh); @@ -76,8 +75,8 @@ inline BSDF_SampleRes MicrofacetConductor_sample( const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss, - const uniform FeatureFlagsOther ffo) + float, + const uniform FeatureFlagsOther) { const varying MicrofacetConductor *uniform self = (const varying MicrofacetConductor *uniform)super; diff --git a/modules/cpu/render/bsdfs/MicrofacetDielectric.ih b/modules/cpu/render/bsdfs/MicrofacetDielectric.ih index 2170820cc..28d514b1c 100644 --- a/modules/cpu/render/bsdfs/MicrofacetDielectric.ih +++ b/modules/cpu/render/bsdfs/MicrofacetDielectric.ih @@ -36,7 +36,7 @@ inline BSDF_EvalRes MicrofacetDielectric_evalSingle( const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsOther) { const varying MicrofacetDielectric *uniform self = (const varying MicrofacetDielectric *uniform)super; diff --git a/modules/cpu/render/bsdfs/MicrofacetDielectricLayer.ih b/modules/cpu/render/bsdfs/MicrofacetDielectricLayer.ih index 319571370..acc62d982 100644 --- a/modules/cpu/render/bsdfs/MicrofacetDielectricLayer.ih +++ b/modules/cpu/render/bsdfs/MicrofacetDielectricLayer.ih @@ -122,7 +122,6 @@ inline BSDF_EvalRes MicrofacetDielectricLayer_eval( float F = fresnelDielectric(cosThetaOH, self->eta) * self->weight; // Evaluate the coating reflection - float cosThetaH = dot(wh, getN(super)); float cosThetaIH = dot(wi, wh); linear3f toLocal = transposed(getFrame(super)); @@ -299,7 +298,6 @@ inline BSDF_SampleRes MicrofacetDielectricLayer_sample( float F = fresnelDielectric(cosThetaOH, self->eta) * self->weight; // Evaluate the coating reflection - float cosThetaH = dot(wh, getN(super)); float cosThetaIH = dot(res.wi, wh); vec3f wi0 = toLocal * res.wi; vec3f wh0 = toLocal * wh; @@ -409,7 +407,7 @@ inline vec3f MicrofacetDielectricLayer_getTransparency( vec3f transmittance, float thickness, float roughness, - float anisotropy, + float, float weight) { if (eta > 1.f) diff --git a/modules/cpu/render/bsdfs/Minneart.ih b/modules/cpu/render/bsdfs/Minneart.ih index 294708034..4c9d1894c 100644 --- a/modules/cpu/render/bsdfs/Minneart.ih +++ b/modules/cpu/render/bsdfs/Minneart.ih @@ -19,7 +19,7 @@ struct Minneart inline BSDF_EvalRes Minneart_eval(const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsOther) { const varying Minneart *uniform self = (const varying Minneart *uniform)super; BSDF_EvalRes res; @@ -35,10 +35,9 @@ inline BSDF_EvalRes Minneart_eval(const varying BSDF *uniform super, inline BSDF_SampleRes Minneart_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss, + float, const uniform FeatureFlagsOther ffo) { - const varying Minneart *uniform self = (const varying Minneart *uniform)super; BSDF_SampleRes res; const vec3f localDir = cosineSampleHemisphere(s); diff --git a/modules/cpu/render/bsdfs/OrenNayar.ih b/modules/cpu/render/bsdfs/OrenNayar.ih index b132a731a..73a73adf4 100644 --- a/modules/cpu/render/bsdfs/OrenNayar.ih +++ b/modules/cpu/render/bsdfs/OrenNayar.ih @@ -20,7 +20,7 @@ struct OrenNayar inline BSDF_EvalRes OrenNayar_eval(const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsOther) { const varying OrenNayar *uniform self = (const varying OrenNayar *uniform)super; @@ -41,11 +41,9 @@ inline BSDF_EvalRes OrenNayar_eval(const varying BSDF *uniform super, inline BSDF_SampleRes OrenNayar_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss, + float, const uniform FeatureFlagsOther ffo) { - const varying OrenNayar *uniform self = - (const varying OrenNayar *uniform)super; const vec3f localDir = cosineSampleHemisphere(s); BSDF_SampleRes res; res.wi = getFrame(super) * localDir; diff --git a/modules/cpu/render/bsdfs/Reflection.ih b/modules/cpu/render/bsdfs/Reflection.ih index 6fbec1ec3..8eabf4903 100644 --- a/modules/cpu/render/bsdfs/Reflection.ih +++ b/modules/cpu/render/bsdfs/Reflection.ih @@ -7,19 +7,19 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -inline BSDF_EvalRes Reflection_eval(const varying BSDF *uniform self, - const vec3f &wo, - const vec3f &wi, - const uniform FeatureFlagsOther ffo) +inline BSDF_EvalRes Reflection_eval(const varying BSDF *uniform, + const vec3f &, + const vec3f &, + const uniform FeatureFlagsOther) { return make_BSDF_EvalRes_zero(); } inline BSDF_SampleRes Reflection_sample(const varying BSDF *uniform self, const vec3f &wo, - const vec2f &s, - float ss, - const uniform FeatureFlagsOther ffo) + const vec2f &, + float, + const uniform FeatureFlagsOther) { BSDF_SampleRes res; diff --git a/modules/cpu/render/bsdfs/RobustDielectric.ih b/modules/cpu/render/bsdfs/RobustDielectric.ih index c39aac45f..a64f8bf01 100644 --- a/modules/cpu/render/bsdfs/RobustDielectric.ih +++ b/modules/cpu/render/bsdfs/RobustDielectric.ih @@ -24,10 +24,10 @@ struct RobustDielectric vec3f Ns; }; -inline BSDF_EvalRes RobustDielectric_eval(const varying BSDF *uniform super, - const vec3f &wo, - const vec3f &wi, - const uniform FeatureFlagsOther ffo) +inline BSDF_EvalRes RobustDielectric_eval(const varying BSDF *uniform, + const vec3f &, + const vec3f &, + const uniform FeatureFlagsOther) { return make_BSDF_EvalRes_zero(); } @@ -100,7 +100,7 @@ inline BSDF_SampleRes RobustDielectric_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f & /*randomV*/, float randomF, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsOther) { const varying RobustDielectric *uniform self = (const varying RobustDielectric *uniform)super; diff --git a/modules/cpu/render/bsdfs/RobustThinDielectric.ih b/modules/cpu/render/bsdfs/RobustThinDielectric.ih index b7522cb91..ef1f72c33 100644 --- a/modules/cpu/render/bsdfs/RobustThinDielectric.ih +++ b/modules/cpu/render/bsdfs/RobustThinDielectric.ih @@ -16,10 +16,10 @@ struct RobustThinDielectric vec3f attenuation; }; -inline BSDF_EvalRes RobustThinDielectric_eval(const varying BSDF *uniform super, - const vec3f &wo, - const vec3f &wi, - const uniform FeatureFlagsOther ffo) +inline BSDF_EvalRes RobustThinDielectric_eval(const varying BSDF *uniform, + const vec3f &, + const vec3f &, + const uniform FeatureFlagsOther) { return make_BSDF_EvalRes_zero(); } @@ -27,9 +27,9 @@ inline BSDF_EvalRes RobustThinDielectric_eval(const varying BSDF *uniform super, inline BSDF_SampleRes RobustThinDielectric_sample( const varying BSDF *uniform super, const vec3f &wo, - const vec2f &s, + const vec2f &, float ss, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsOther) { const varying RobustThinDielectric *uniform self = (const varying RobustThinDielectric *uniform)super; diff --git a/modules/cpu/render/bsdfs/Specular.ih b/modules/cpu/render/bsdfs/Specular.ih index d8397eeec..34c414ab8 100644 --- a/modules/cpu/render/bsdfs/Specular.ih +++ b/modules/cpu/render/bsdfs/Specular.ih @@ -21,7 +21,7 @@ struct Specular inline BSDF_EvalRes Specular_eval(const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsOther) { const varying Specular *uniform self = (const varying Specular *uniform)super; BSDF_EvalRes res; @@ -44,8 +44,8 @@ inline BSDF_EvalRes Specular_eval(const varying BSDF *uniform super, inline BSDF_SampleRes Specular_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss, - const uniform FeatureFlagsOther ffo) + float, + const uniform FeatureFlagsOther) { const varying Specular *uniform self = (const varying Specular *uniform)super; BSDF_SampleRes res; diff --git a/modules/cpu/render/bsdfs/ThinDielectric.ih b/modules/cpu/render/bsdfs/ThinDielectric.ih index 721d14791..52f900845 100644 --- a/modules/cpu/render/bsdfs/ThinDielectric.ih +++ b/modules/cpu/render/bsdfs/ThinDielectric.ih @@ -17,19 +17,19 @@ struct ThinDielectric vec3f attenuation; }; -inline BSDF_EvalRes ThinDielectric_eval(const varying BSDF *uniform super, - const vec3f &wo, - const vec3f &wi, - const uniform FeatureFlagsOther ffo) +inline BSDF_EvalRes ThinDielectric_eval(const varying BSDF *uniform, + const vec3f &, + const vec3f &, + const uniform FeatureFlagsOther) { return make_BSDF_EvalRes_zero(); } inline BSDF_SampleRes ThinDielectric_sample(const varying BSDF *uniform super, const vec3f &wo, - const vec2f &s, + const vec2f &, float ss, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsOther) { const varying ThinDielectric *uniform self = (const varying ThinDielectric *uniform)super; diff --git a/modules/cpu/render/bsdfs/ThinMicrofacetDielectric.ih b/modules/cpu/render/bsdfs/ThinMicrofacetDielectric.ih index 6ddd3e3ba..1f33673d6 100644 --- a/modules/cpu/render/bsdfs/ThinMicrofacetDielectric.ih +++ b/modules/cpu/render/bsdfs/ThinMicrofacetDielectric.ih @@ -34,11 +34,10 @@ inline BSDF_EvalRes ThinMicrofacetDielectric_eval( const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsOther) { const varying ThinMicrofacetDielectric *uniform self = (const varying ThinMicrofacetDielectric *uniform)super; - BSDF_EvalRes res; float cosThetaO = dot(wo, getN(super)); if (cosThetaO <= 0.f) @@ -49,7 +48,6 @@ inline BSDF_EvalRes ThinMicrofacetDielectric_eval( if (isReflection) { // Compute the microfacet normal vec3f wh = normalize(wo + wi); - float cosThetaH = dot(wh, getN(super)); float cosThetaOH = dot(wo, wh); float cosThetaIH = dot(wi, wh); @@ -92,7 +90,7 @@ inline BSDF_SampleRes ThinMicrofacetDielectric_sample( const vec3f &wo, const vec2f &s, float ss, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsOther) { const varying ThinMicrofacetDielectric *uniform self = (const varying ThinMicrofacetDielectric *uniform)super; diff --git a/modules/cpu/render/bsdfs/Transmission.ih b/modules/cpu/render/bsdfs/Transmission.ih index 00e2d837b..8efc18bed 100644 --- a/modules/cpu/render/bsdfs/Transmission.ih +++ b/modules/cpu/render/bsdfs/Transmission.ih @@ -8,19 +8,19 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -inline BSDF_EvalRes Transmission_eval(const varying BSDF *uniform self, - const vec3f &wo, - const vec3f &wi, - const uniform FeatureFlagsOther ffo) +inline BSDF_EvalRes Transmission_eval(const varying BSDF *uniform, + const vec3f &, + const vec3f &, + const uniform FeatureFlagsOther) { return make_BSDF_EvalRes_zero(); } inline BSDF_SampleRes Transmission_sample(const varying BSDF *uniform self, const vec3f &wo, - const vec2f &s, - float ss, - const uniform FeatureFlagsOther ffo) + const vec2f &, + float, + const uniform FeatureFlagsOther) { BSDF_SampleRes res; diff --git a/modules/cpu/render/bsdfs/Velvety.ih b/modules/cpu/render/bsdfs/Velvety.ih index f28da3c3a..cf0e5b086 100644 --- a/modules/cpu/render/bsdfs/Velvety.ih +++ b/modules/cpu/render/bsdfs/Velvety.ih @@ -19,7 +19,7 @@ struct Velvety inline BSDF_EvalRes Velvety_eval(const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsOther) { const varying Velvety *uniform self = (const varying Velvety *uniform)super; BSDF_EvalRes res; @@ -37,10 +37,9 @@ inline BSDF_EvalRes Velvety_eval(const varying BSDF *uniform super, inline BSDF_SampleRes Velvety_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, - float ss, + float, const uniform FeatureFlagsOther ffo) { - const varying Velvety *uniform self = (const varying Velvety *uniform)super; BSDF_SampleRes res; const vec3f localDir = cosineSampleHemisphere(s); diff --git a/modules/cpu/render/debug/DebugRenderer.cpp b/modules/cpu/render/debug/DebugRenderer.cpp index 57ed0e8f0..e7ac554a7 100644 --- a/modules/cpu/render/debug/DebugRenderer.cpp +++ b/modules/cpu/render/debug/DebugRenderer.cpp @@ -76,7 +76,7 @@ void DebugRenderer::commit() void DebugRenderer::renderTasks(FrameBuffer *fb, Camera *camera, World *world, - void *perFrameData, + void *, const utility::ArrayView &taskIDs #ifdef OSPRAY_TARGET_SYCL , @@ -109,7 +109,6 @@ void DebugRenderer::renderTasks(FrameBuffer *fb, fbSh, cameraSh, worldSh, - perFrameData, taskIDsPtr, taskIndex.get_global_id(0), ff); @@ -120,13 +119,8 @@ void DebugRenderer::renderTasks(FrameBuffer *fb, // For prints we have to flush the entire queue, because other stuff is queued syclQueue.wait_and_throw(); #else - ispc::DebugRenderer_renderTasks(&rendererSh->super, - fbSh, - cameraSh, - worldSh, - perFrameData, - taskIDs.data(), - numTasks); + ispc::DebugRenderer_renderTasks( + &rendererSh->super, fbSh, cameraSh, worldSh, taskIDs.data(), numTasks); #endif } diff --git a/modules/cpu/render/debug/DebugRenderer.ih b/modules/cpu/render/debug/DebugRenderer.ih index 8105ef8b9..d277adbc2 100644 --- a/modules/cpu/render/debug/DebugRenderer.ih +++ b/modules/cpu/render/debug/DebugRenderer.ih @@ -8,7 +8,6 @@ SYCL_EXTERNAL void DebugRenderer_renderTask(Renderer *uniform self, FrameBuffer *uniform fb, Camera *uniform camera, World *uniform world, - void *uniform perFrameData, const uint32 *uniform taskIDs, const int taskIndex0, const uniform ospray::FeatureFlags &ff); diff --git a/modules/cpu/render/debug/DebugRenderer.ispc b/modules/cpu/render/debug/DebugRenderer.ispc index feda4e3b1..98d43ac3f 100644 --- a/modules/cpu/render/debug/DebugRenderer.ispc +++ b/modules/cpu/render/debug/DebugRenderer.ispc @@ -46,11 +46,8 @@ inline float eyeLight(varying ScreenSample &sample) /* a simple test-frame renderer that doesn't even trace a ray, just returns a well-defined test frame (mostly useful for debugging whether frame buffers are properly set up etcpp */ -static void DebugRenderer_testFrame(Renderer *uniform, - FrameBuffer *uniform fb, - World *uniform world, - void *uniform perFrameData, - varying ScreenSample &sample) +static void DebugRenderer_testFrame( + Renderer *uniform, World *uniform, varying ScreenSample &sample) { sample.rgb.x = ((sample.sampleID.x) % 256) / 255.f; sample.rgb.y = ((sample.sampleID.y) % 256) / 255.f; @@ -63,11 +60,8 @@ static void DebugRenderer_testFrame(Renderer *uniform, /* a simple test-frame renderer that doesn't even trace a ray, just returns the absolute of the ray direction */ -static void DebugRenderer_rayDir(Renderer *uniform, - FrameBuffer *uniform fb, - World *uniform world, - void *uniform perFrameData, - varying ScreenSample &sample) +static void DebugRenderer_rayDir( + Renderer *uniform, World *uniform, varying ScreenSample &sample) { sample.rgb = absf(sample.ray.dir); sample.alpha = 1.f; @@ -75,9 +69,7 @@ static void DebugRenderer_rayDir(Renderer *uniform, } static void DebugRenderer_eyeLight(Renderer *uniform self, - FrameBuffer *uniform fb, World *uniform world, - void *uniform perFrameData, varying ScreenSample &sample, const uniform FeatureFlags &ff) { @@ -88,9 +80,7 @@ static void DebugRenderer_eyeLight(Renderer *uniform self, } static void DebugRenderer_Ng(Renderer *uniform self, - FrameBuffer *uniform fb, World *uniform world, - void *uniform perFrameData, varying ScreenSample &sample, const uniform FeatureFlags &ff) { @@ -103,9 +93,7 @@ static void DebugRenderer_Ng(Renderer *uniform self, } static void DebugRenderer_Ns(Renderer *uniform self, - FrameBuffer *uniform fb, World *uniform world, - void *uniform perFrameData, varying ScreenSample &sample, const uniform FeatureFlags &ff) { @@ -118,9 +106,7 @@ static void DebugRenderer_Ns(Renderer *uniform self, } static void DebugRenderer_texCoord(Renderer *uniform self, - FrameBuffer *uniform fb, World *uniform world, - void *uniform perFrameData, varying ScreenSample &sample, const uniform FeatureFlags &ff) { @@ -132,9 +118,7 @@ static void DebugRenderer_texCoord(Renderer *uniform self, } static void DebugRenderer_dPds(Renderer *uniform self, - FrameBuffer *uniform fb, World *uniform world, - void *uniform perFrameData, varying ScreenSample &sample, const uniform FeatureFlags &ff) { @@ -152,9 +136,7 @@ static void DebugRenderer_dPds(Renderer *uniform self, } static void DebugRenderer_dPdt(Renderer *uniform self, - FrameBuffer *uniform fb, World *uniform world, - void *uniform perFrameData, varying ScreenSample &sample, const uniform FeatureFlags &ff) { @@ -173,9 +155,7 @@ static void DebugRenderer_dPdt(Renderer *uniform self, } static void DebugRenderer_vertexColor(Renderer *uniform self, - FrameBuffer *uniform fb, World *uniform world, - void *uniform perFrameData, varying ScreenSample &sample, const uniform FeatureFlags &ff) { @@ -189,9 +169,7 @@ static void DebugRenderer_vertexColor(Renderer *uniform self, } static void DebugRenderer_primID(Renderer *uniform self, - FrameBuffer *uniform fb, World *uniform world, - void *uniform perFrameData, varying ScreenSample &sample, const uniform FeatureFlags &ff) { @@ -202,9 +180,7 @@ static void DebugRenderer_primID(Renderer *uniform self, } static void DebugRenderer_instID(Renderer *uniform self, - FrameBuffer *uniform fb, World *uniform world, - void *uniform perFrameData, varying ScreenSample &sample, const uniform FeatureFlags &ff) { @@ -215,9 +191,7 @@ static void DebugRenderer_instID(Renderer *uniform self, } static void DebugRenderer_geomID(Renderer *uniform self, - FrameBuffer *uniform fb, World *uniform world, - void *uniform perFrameData, varying ScreenSample &sample, const uniform FeatureFlags &ff) { @@ -228,9 +202,7 @@ static void DebugRenderer_geomID(Renderer *uniform self, } static void DebugRenderer_backfacing_Ng(Renderer *uniform self, - FrameBuffer *uniform fb, World *uniform world, - void *uniform perFrameData, varying ScreenSample &sample, const uniform FeatureFlags &ff) { @@ -243,9 +215,7 @@ static void DebugRenderer_backfacing_Ng(Renderer *uniform self, } static void DebugRenderer_backfacing_Ns(Renderer *uniform self, - FrameBuffer *uniform fb, World *uniform world, - void *uniform perFrameData, varying ScreenSample &sample, const uniform FeatureFlags &ff) { @@ -263,7 +233,6 @@ static void DebugRenderer_backfacing_Ns(Renderer *uniform self, static void DebugRenderer_volume(Renderer *uniform self, FrameBuffer *uniform fb, World *uniform world, - void *uniform perFrameData, varying ScreenSample &sample, const uniform FeatureFlags &ff) { @@ -308,7 +277,6 @@ static void DebugRenderer_volume(Renderer *uniform self, foreach_unique (m in model) { #endif Volume *uniform volume = m->volume; - TransferFunction *uniform tf = m->transferFunction; float time = 0.5f; VKLIntervalIterator intervalIterator = vklInitIntervalIteratorV( @@ -380,59 +348,58 @@ static void DebugRenderer_volume(Renderer *uniform self, static void DebugRenderer_renderSample(Renderer *uniform _self, FrameBuffer *uniform fb, World *uniform world, - void *uniform perFrameData, varying ScreenSample &sample, const uniform FeatureFlags &ff) { DebugRenderer *uniform self = (DebugRenderer * uniform) _self; switch (self->type) { case TEST_FRAME: - DebugRenderer_testFrame(_self, fb, world, perFrameData, sample); + DebugRenderer_testFrame(_self, world, sample); break; case RAY_DIR: - DebugRenderer_rayDir(_self, fb, world, perFrameData, sample); + DebugRenderer_rayDir(_self, world, sample); break; case EYE_LIGHT: - DebugRenderer_eyeLight(_self, fb, world, perFrameData, sample, ff); + DebugRenderer_eyeLight(_self, world, sample, ff); break; case NG: - DebugRenderer_Ng(_self, fb, world, perFrameData, sample, ff); + DebugRenderer_Ng(_self, world, sample, ff); break; case NS: - DebugRenderer_Ns(_self, fb, world, perFrameData, sample, ff); + DebugRenderer_Ns(_self, world, sample, ff); break; case COLOR: - DebugRenderer_vertexColor(_self, fb, world, perFrameData, sample, ff); + DebugRenderer_vertexColor(_self, world, sample, ff); break; case TEX_COORD: - DebugRenderer_texCoord(_self, fb, world, perFrameData, sample, ff); + DebugRenderer_texCoord(_self, world, sample, ff); break; case DPDS: - DebugRenderer_dPds(_self, fb, world, perFrameData, sample, ff); + DebugRenderer_dPds(_self, world, sample, ff); break; case DPDT: - DebugRenderer_dPdt(_self, fb, world, perFrameData, sample, ff); + DebugRenderer_dPdt(_self, world, sample, ff); break; case PRIM_ID: - DebugRenderer_primID(_self, fb, world, perFrameData, sample, ff); + DebugRenderer_primID(_self, world, sample, ff); break; case GEOM_ID: - DebugRenderer_geomID(_self, fb, world, perFrameData, sample, ff); + DebugRenderer_geomID(_self, world, sample, ff); break; case INST_ID: - DebugRenderer_instID(_self, fb, world, perFrameData, sample, ff); + DebugRenderer_instID(_self, world, sample, ff); break; case BACKFACING_NG: - DebugRenderer_backfacing_Ng(_self, fb, world, perFrameData, sample, ff); + DebugRenderer_backfacing_Ng(_self, world, sample, ff); break; case BACKFACING_NS: - DebugRenderer_backfacing_Ns(_self, fb, world, perFrameData, sample, ff); + DebugRenderer_backfacing_Ns(_self, world, sample, ff); break; case VOLUME: - DebugRenderer_volume(_self, fb, world, perFrameData, sample, ff); + DebugRenderer_volume(_self, fb, world, sample, ff); break; default: - DebugRenderer_testFrame(_self, fb, world, perFrameData, sample); + DebugRenderer_testFrame(_self, world, sample); break; }; } @@ -446,20 +413,17 @@ SYCL_EXTERNAL void DebugRenderer_renderTask(Renderer *uniform self, FrameBuffer *uniform fb, Camera *uniform camera, World *uniform world, - void *uniform perFrameData, const uint32 *uniform taskIDs, const int taskIndex0, const uniform FeatureFlags &ff) { - Renderer_default_renderTask( - self, fb, camera, world, perFrameData, taskIDs, taskIndex0, ff); + Renderer_default_renderTask(self, fb, camera, world, taskIDs, taskIndex0, ff); } #else export void DebugRenderer_renderTasks(void *uniform _self, void *uniform _fb, void *uniform _camera, void *uniform _world, - void *uniform perFrameData, void *uniform _taskIDs, uniform uint32 numTasks) { @@ -469,7 +433,7 @@ export void DebugRenderer_renderTasks(void *uniform _self, World *uniform world = (World * uniform) _world; const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; launch[numTasks] Renderer_default_renderTask( - self, fb, camera, world, perFrameData, taskIDs, ffAll()); + self, fb, camera, world, taskIDs, ffAll()); sync; } #endif diff --git a/modules/cpu/render/materials/Alloy.ispc b/modules/cpu/render/materials/Alloy.ispc index 73997cb3b..e6cea8817 100644 --- a/modules/cpu/render/materials/Alloy.ispc +++ b/modules/cpu/render/materials/Alloy.ispc @@ -17,8 +17,8 @@ SYCL_EXTERNAL const varying BSDF *varying Alloy_getBSDF( const uniform Material *uniform super, uniform ShadingContext *uniform ctx, const DifferentialGeometry &dg, - const Ray &ray, - const Medium ¤tMedium) + const Ray &, + const Medium &) { const uniform Alloy *uniform self = (const uniform Alloy *uniform)super; varying linear3f *uniform shadingframe = diff --git a/modules/cpu/render/materials/CarPaint.ispc b/modules/cpu/render/materials/CarPaint.ispc index 11f978e6a..2cfd13ef7 100644 --- a/modules/cpu/render/materials/CarPaint.ispc +++ b/modules/cpu/render/materials/CarPaint.ispc @@ -25,7 +25,7 @@ SYCL_EXTERNAL const varying BSDF *varying CarPaint_getBSDF( uniform ShadingContext *uniform ctx, const DifferentialGeometry &dg, const Ray &ray, - const Medium ¤tMedium) + const Medium &) { const uniform CarPaint *uniform self = (const uniform CarPaint *uniform)super; varying BSDF *varying bsdf = NULL; diff --git a/modules/cpu/render/materials/Glass.ispc b/modules/cpu/render/materials/Glass.ispc index 7d54ec0cd..6e3b2521f 100644 --- a/modules/cpu/render/materials/Glass.ispc +++ b/modules/cpu/render/materials/Glass.ispc @@ -17,7 +17,7 @@ SYCL_EXTERNAL const varying BSDF *varying Glass_getBSDF( const uniform Material *uniform super, uniform ShadingContext *uniform ctx, const DifferentialGeometry &dg, - const Ray &ray, + const Ray &, const Medium ¤tMedium) { uniform const Glass *uniform self = (uniform const Glass *uniform)super; @@ -47,7 +47,7 @@ SYCL_EXTERNAL vec3f Glass_getTransparency( } SYCL_EXTERNAL void Glass_selectNextMedium(const uniform Material *uniform super, - const DifferentialGeometry &dg, + const DifferentialGeometry &, Medium ¤tMedium) { const uniform Glass *uniform self = (const uniform Glass *uniform)super; diff --git a/modules/cpu/render/materials/Luminous.ispc b/modules/cpu/render/materials/Luminous.ispc index c13ca7523..1b414d3bc 100644 --- a/modules/cpu/render/materials/Luminous.ispc +++ b/modules/cpu/render/materials/Luminous.ispc @@ -14,7 +14,7 @@ OSPRAY_BEGIN_ISPC_NAMESPACE SYCL_EXTERNAL const varying BSDF *varying Luminous_getBSDF( const uniform Material *uniform _self, uniform ShadingContext *uniform ctx, - const DifferentialGeometry &dg, + const DifferentialGeometry &, const Ray &, const Medium &) { diff --git a/modules/cpu/render/materials/Metal.ispc b/modules/cpu/render/materials/Metal.ispc index 8e321c900..3f75ac023 100644 --- a/modules/cpu/render/materials/Metal.ispc +++ b/modules/cpu/render/materials/Metal.ispc @@ -17,8 +17,8 @@ SYCL_EXTERNAL const varying BSDF *varying Metal_getBSDF( const uniform Material *uniform super, uniform ShadingContext *uniform ctx, const DifferentialGeometry &dg, - const Ray &ray, - const Medium ¤tMedium) + const Ray &, + const Medium &) { const uniform Metal *uniform self = (const uniform Metal *uniform)super; varying linear3f *uniform shadingframe = diff --git a/modules/cpu/render/materials/MetallicPaint.ispc b/modules/cpu/render/materials/MetallicPaint.ispc index d7ba1c71f..490496487 100644 --- a/modules/cpu/render/materials/MetallicPaint.ispc +++ b/modules/cpu/render/materials/MetallicPaint.ispc @@ -19,8 +19,8 @@ SYCL_EXTERNAL const varying BSDF *varying MetallicPaint_getBSDF( const uniform Material *uniform super, uniform ShadingContext *uniform ctx, const DifferentialGeometry &dg, - const Ray &ray, - const Medium ¤tMedium) + const Ray &, + const Medium &) { const uniform MetallicPaint *uniform self = (const uniform MetallicPaint *uniform)super; diff --git a/modules/cpu/render/materials/OBJ.ispc b/modules/cpu/render/materials/OBJ.ispc index 5099507ef..d21c9db81 100644 --- a/modules/cpu/render/materials/OBJ.ispc +++ b/modules/cpu/render/materials/OBJ.ispc @@ -20,8 +20,8 @@ SYCL_EXTERNAL const varying BSDF *varying OBJ_getBSDF( const uniform Material *uniform super, uniform ShadingContext *uniform ctx, const DifferentialGeometry &dg, - const Ray &ray, - const Medium ¤tMedium, + const Ray &, + const Medium &, const uniform FeatureFlagsOther ffo) { uniform const OBJ *uniform self = (uniform const OBJ *uniform)super; @@ -67,8 +67,8 @@ SYCL_EXTERNAL const varying BSDF *varying OBJ_getBSDF( SYCL_EXTERNAL vec3f OBJ_getTransparency(const uniform Material *uniform super, const DifferentialGeometry &dg, - const Ray &ray, - const Medium ¤tMedium, + const Ray &, + const Medium &, const uniform FeatureFlagsOther ffo) { uniform const OBJ *uniform self = (uniform const OBJ *uniform)super; diff --git a/modules/cpu/render/materials/Plastic.ispc b/modules/cpu/render/materials/Plastic.ispc index 2a855d74a..603d99811 100644 --- a/modules/cpu/render/materials/Plastic.ispc +++ b/modules/cpu/render/materials/Plastic.ispc @@ -17,8 +17,8 @@ SYCL_EXTERNAL const varying BSDF *varying Plastic_getBSDF( const uniform Material *uniform super, uniform ShadingContext *uniform ctx, const DifferentialGeometry &dg, - const Ray &ray, - const Medium ¤tMedium) + const Ray &, + const Medium &) { const uniform Plastic *uniform self = (const uniform Plastic *uniform)super; diff --git a/modules/cpu/render/materials/Principled.ispc b/modules/cpu/render/materials/Principled.ispc index 7d5a98e7b..c7ccc00e4 100644 --- a/modules/cpu/render/materials/Principled.ispc +++ b/modules/cpu/render/materials/Principled.ispc @@ -31,7 +31,7 @@ SYCL_EXTERNAL const varying BSDF *varying Principled_getBSDF( const uniform Material *uniform super, uniform ShadingContext *uniform ctx, const DifferentialGeometry &dg, - const Ray &ray, + const Ray &, const Medium ¤tMedium) { const uniform Principled *uniform self = diff --git a/modules/cpu/render/materials/ThinGlass.ispc b/modules/cpu/render/materials/ThinGlass.ispc index 1bad2f0c9..b9a164b51 100644 --- a/modules/cpu/render/materials/ThinGlass.ispc +++ b/modules/cpu/render/materials/ThinGlass.ispc @@ -30,8 +30,8 @@ SYCL_EXTERNAL const varying BSDF *varying ThinGlass_getBSDF( const Material *uniform super, ShadingContext *uniform ctx, const DifferentialGeometry &dg, - const Ray &ray, - const Medium ¤tMedium) + const Ray &, + const Medium &) { const ThinGlass *uniform self = (const ThinGlass *uniform)super; @@ -44,7 +44,7 @@ SYCL_EXTERNAL const varying BSDF *varying ThinGlass_getBSDF( SYCL_EXTERNAL vec3f ThinGlass_getTransparency(const Material *uniform material, const DifferentialGeometry &dg, const Ray &ray, - const Medium ¤tMedium) + const Medium &) { const ThinGlass *uniform self = (const ThinGlass *uniform)material; diff --git a/modules/cpu/render/materials/Velvet.ispc b/modules/cpu/render/materials/Velvet.ispc index 26e384165..449145a57 100644 --- a/modules/cpu/render/materials/Velvet.ispc +++ b/modules/cpu/render/materials/Velvet.ispc @@ -16,8 +16,8 @@ SYCL_EXTERNAL const varying BSDF *varying Velvet_getBSDF( const uniform Material *uniform super, uniform ShadingContext *uniform ctx, const DifferentialGeometry &dg, - const Ray &ray, - const Medium ¤tMedium) + const Ray &, + const Medium &) { const uniform Velvet *uniform self = (const uniform Velvet *uniform)super; diff --git a/modules/cpu/render/pathtracer/NextEventEstimation.ispc b/modules/cpu/render/pathtracer/NextEventEstimation.ispc index 2345eb9ba..25e070b38 100644 --- a/modules/cpu/render/pathtracer/NextEventEstimation.ispc +++ b/modules/cpu/render/pathtracer/NextEventEstimation.ispc @@ -32,7 +32,7 @@ SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, const World *uniform world = pathContext.world; const uniform PathTracerData &pathtracerData = *((const uniform PathTracerData *)world->pathtracerData); - for (uniform int i = 0; i < pathContext.numLightSamples; i++) { + for (uniform uint32 i = 0; i < pathContext.numLightSamples; i++) { // select a random light source from the list const float s1 = LDSampler_getFloat( pathContext.ldSampler, pathState.sampleDim + 4 + i * 3); @@ -129,22 +129,18 @@ SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, if (reduce_max(lightContrib) > 0) { #ifdef OSPRAY_ENABLE_VOLUMES if (ff.volume & FFV_VOLUME) { - const float T = volumeTransmittance(pathState, - pathContext.world, + const float T = volumeTransmittance(pathContext.world, shadowRay, rayIntervals, - pathContext.ldSampler, pathContext.randomSampler); if (reduce_max(T) > 0) { // we have to use an independent transmittance estimate for MIS to get // a correct result const float T_mis = pathState.disableFWD || pathState.disableNEE ? 1.f - : volumeTransmittance(pathState, - pathContext.world, + : volumeTransmittance(pathContext.world, shadowRay, rayIntervals, - pathContext.ldSampler, pathContext.randomSampler); L = L + T * lightContrib diff --git a/modules/cpu/render/pathtracer/PathSampler.ispc b/modules/cpu/render/pathtracer/PathSampler.ispc index 8fddd1507..656849248 100644 --- a/modules/cpu/render/pathtracer/PathSampler.ispc +++ b/modules/cpu/render/pathtracer/PathSampler.ispc @@ -29,7 +29,7 @@ OSPRAY_BEGIN_ISPC_NAMESPACE inline void postIntersect(const PathContext &pathContext, - const PathState &pathState, + const PathState &, PathVertex &pathVertex, Ray &ray, const uniform FeatureFlags &ff) @@ -52,7 +52,7 @@ inline void postIntersect(const PathContext &pathContext, pathVertex.dg.instID = ray.instID; #ifdef OSPRAY_TARGET_SYCL { - const int instID = ray.instID; + const uint32 instID = ray.instID; #else foreach_unique (instID in ray.instID) { #endif @@ -87,7 +87,6 @@ inline Scattering_SampleRes sampleDirection(const PathContext &pathContext, LDSampler_getFloat(pathContext.ldSampler, pathState.sampleDim + 2); Scattering_SampleRes fs; if (pathVertex.type == SURFACE) { - vec3f Ns = pathVertex.dg.Ns; #ifdef OSPRAY_TARGET_SYCL { const BSDF *f = pathVertex.bsdf; @@ -180,11 +179,9 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, #ifdef OSPRAY_ENABLE_VOLUMES if (ff.volume & FFV_VOLUME) { float extinctionCoefficient; - float freePath = volumeSampleFreePath(pathState, - pathContext.world, + float freePath = volumeSampleFreePath(pathContext.world, ray, rayIntervals, - pathContext.ldSampler, pathContext.randomSampler, &pathVertex.volume, extinctionCoefficient, diff --git a/modules/cpu/render/pathtracer/PathStructs.ih b/modules/cpu/render/pathtracer/PathStructs.ih index 401d0ecf6..42cb0f12a 100644 --- a/modules/cpu/render/pathtracer/PathStructs.ih +++ b/modules/cpu/render/pathtracer/PathStructs.ih @@ -54,8 +54,8 @@ struct PathContext const varying vec2f *uniform pixel; varying LDSampler *uniform ldSampler; varying RandomSampler *uniform randomSampler; - uniform int numLights; - uniform int numLightSamples; + uniform uint32 numLights; + uniform uint32 numLightSamples; uniform float *uniform lightsCDF; uniform uint32 numBounceSampleDims; // BSDF sample (3D), roulette (1D), light // samples (numLightSamples*(1D+2D)) diff --git a/modules/cpu/render/pathtracer/PathTracer.cpp b/modules/cpu/render/pathtracer/PathTracer.cpp index 916668f1b..b4f578422 100644 --- a/modules/cpu/render/pathtracer/PathTracer.cpp +++ b/modules/cpu/render/pathtracer/PathTracer.cpp @@ -18,7 +18,6 @@ SYCL_EXTERNAL void PathTracer_renderTask(Renderer *uniform _self, FrameBuffer *uniform fb, Camera *uniform camera, World *uniform world, - void *uniform perFrameData, const uint32 *uniform taskIDs, const int taskIndex0, const uniform ospray::FeatureFlags &ff); @@ -90,7 +89,7 @@ void *PathTracer::beginFrame(FrameBuffer *, World *world) void PathTracer::renderTasks(FrameBuffer *fb, Camera *camera, World *world, - void *perFrameData, + void *, const utility::ArrayView &taskIDs #ifdef OSPRAY_TARGET_SYCL , @@ -123,7 +122,6 @@ void PathTracer::renderTasks(FrameBuffer *fb, fbSh, cameraSh, worldSh, - perFrameData, taskIDsPtr, taskIndex.get_global_id(0), ff); @@ -134,13 +132,8 @@ void PathTracer::renderTasks(FrameBuffer *fb, // For prints we have to flush the entire queue, because other stuff is queued syclQueue.wait_and_throw(); #else - ispc::PathTracer_renderTasks(&rendererSh->super, - fbSh, - cameraSh, - worldSh, - perFrameData, - taskIDs.data(), - numTasks); + ispc::PathTracer_renderTasks( + &rendererSh->super, fbSh, cameraSh, worldSh, taskIDs.data(), numTasks); #endif } diff --git a/modules/cpu/render/pathtracer/PathTracer.ispc b/modules/cpu/render/pathtracer/PathTracer.ispc index e3ed54a17..19ac7356a 100644 --- a/modules/cpu/render/pathtracer/PathTracer.ispc +++ b/modules/cpu/render/pathtracer/PathTracer.ispc @@ -33,9 +33,6 @@ OSPRAY_BEGIN_ISPC_NAMESPACE static ScreenSample PathTraceIntegrator_Li(const PathTracer *uniform self, const World *uniform world, - FrameBuffer *uniform fb, - const uint32 ix, - const uint32 iy, const vec2f &pixel, // normalized, i.e. in [0..1] Ray &ray, varying LDSampler *uniform ldSampler, @@ -53,9 +50,9 @@ static ScreenSample PathTraceIntegrator_Li(const PathTracer *uniform self, uniform PathTracerData &pathtracerData = *((uniform PathTracerData *)world->pathtracerData); - const uniform int numLights = + const uniform uint32 numLights = pathtracerData.lights ? min(MAX_LIGHTS, pathtracerData.numLights) : 0; - const uniform int numLightSamples = + const uniform uint32 numLightSamples = self->numLightSamples >= 0 && numLights > 0 ? self->numLightSamples : numLights; PathContext pathContext; @@ -159,9 +156,6 @@ static ScreenSample PathTracer_renderPixel(PathTracer *uniform self, ScreenSample sample = PathTraceIntegrator_Li(self, world, - fb, - ix, - iy, cameraSample.screen, screenSample.ray, ldSampler, @@ -198,7 +192,6 @@ task FrameBuffer *uniform fb, Camera *uniform camera, World *uniform world, - void *uniform perFrameData, const uint32 *uniform taskIDs, #ifdef OSPRAY_TARGET_SYCL const int taskIndex0, @@ -214,8 +207,11 @@ task } #ifdef OSPRAY_TARGET_SYCL - for (uint32 y = taskDesc.region.lower.y; y < taskDesc.region.upper.y; ++y) - for (uint32 x = taskDesc.region.lower.x; x < taskDesc.region.upper.x; ++x) { + for (uint32 y = taskDesc.region.lower.y; y < (uint32)taskDesc.region.upper.y; + ++y) + for (uint32 x = taskDesc.region.lower.x; + x < (uint32)taskDesc.region.upper.x; + ++x) { #else foreach_tiled (y = taskDesc.region.lower.y... taskDesc.region.upper.y, x = taskDesc.region.lower.x... taskDesc.region.upper.x) { @@ -236,7 +232,6 @@ export void PathTracer_renderTasks(void *uniform _self, void *uniform _fb, void *uniform _camera, void *uniform _world, - void *uniform perFrameData, void *uniform _taskIDs, uniform uint32 numTasks) { @@ -247,7 +242,7 @@ export void PathTracer_renderTasks(void *uniform _self, const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; launch[numTasks] PathTracer_renderTask( - self, fb, camera, world, perFrameData, taskIDs, ffAll()); + self, fb, camera, world, taskIDs, ffAll()); } #endif diff --git a/modules/cpu/render/pathtracer/PathTracerShared.h b/modules/cpu/render/pathtracer/PathTracerShared.h index 3273de1dc..994a94f16 100644 --- a/modules/cpu/render/pathtracer/PathTracerShared.h +++ b/modules/cpu/render/pathtracer/PathTracerShared.h @@ -13,7 +13,7 @@ struct PathTracer { Renderer super; - int32 rouletteDepth; // path depth from which on RR is used + uint32 rouletteDepth; // path depth from which on RR is used float maxRadiance; // coefficients of plane equation defining geometry to catch shadows for // compositing; disabled if normal is zero-length diff --git a/modules/cpu/render/pathtracer/ShadowCatcher.ispc b/modules/cpu/render/pathtracer/ShadowCatcher.ispc index abf57282b..ddd178824 100644 --- a/modules/cpu/render/pathtracer/ShadowCatcher.ispc +++ b/modules/cpu/render/pathtracer/ShadowCatcher.ispc @@ -21,7 +21,7 @@ SYCL_EXTERNAL bool shadowCatcher(const PathContext &pathContext, PathState &pathState, PathVertex &pathVertex, const Ray &ray, - ScreenSample &sample, + ScreenSample &, const uniform FeatureFlags &ff) { @@ -52,7 +52,7 @@ SYCL_EXTERNAL bool shadowCatcher(const PathContext &pathContext, const World *uniform world = pathContext.world; const uniform PathTracerData &pathtracerData = *((const uniform PathTracerData *)world->pathtracerData); - for (uniform int i = 0; i < pathContext.numLights; i++) { + for (uniform uint32 i = 0; i < pathContext.numLights; i++) { const Light *uniform light = pathtracerData.lights[i]; const vec2f s = LDSampler_getFloat2( pathContext.ldSampler, pathState.sampleDim + 4 + i * 2); diff --git a/modules/cpu/render/pathtracer/TransparentShadow.ispc b/modules/cpu/render/pathtracer/TransparentShadow.ispc index ad22455de..746cc6d3b 100644 --- a/modules/cpu/render/pathtracer/TransparentShadow.ispc +++ b/modules/cpu/render/pathtracer/TransparentShadow.ispc @@ -23,7 +23,7 @@ SYCL_EXTERNAL vec3f transparentShadow(const uniform PathTracer *uniform self, Medium medium, const uniform FeatureFlags &ff) { - uniform int maxDepth = self->super.maxDepth; + uniform uint32 maxDepth = self->super.maxDepth; const float tOriginal = shadowRay.t; while (1) { diff --git a/modules/cpu/render/pathtracer/VirtualLight.ispc b/modules/cpu/render/pathtracer/VirtualLight.ispc index 21845aecc..0b1cd9aa8 100644 --- a/modules/cpu/render/pathtracer/VirtualLight.ispc +++ b/modules/cpu/render/pathtracer/VirtualLight.ispc @@ -15,11 +15,8 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -inline box1f getMinMaxDistForVirtualLights(const PathContext &pathContext, - const PathState &pathState, - const PathVertex &lastVertex, - const PathVertex &pathVertex, - const Ray &ray) +inline box1f getMinMaxDistForVirtualLights( + const PathVertex &lastVertex, const PathVertex &pathVertex, const Ray &ray) { box1f interval; @@ -54,12 +51,13 @@ SYCL_EXTERNAL vec3f evaluateVirtualLights(const PathContext &pathContext, if (pathContext.numLights <= pathtracerData.numGeoLights) return L; // return if no virtual lights at all - box1f intervalLightDist = getMinMaxDistForVirtualLights( - pathContext, pathState, lastVertex, pathVertex, ray); + box1f intervalLightDist = + getMinMaxDistForVirtualLights(lastVertex, pathVertex, ray); const uniform float selectionPDF = rcp((uniform float)pathContext.numLights - pathtracerData.numGeoLights); - for (uniform int i = pathtracerData.numGeoLights; i < pathContext.numLights; + for (uniform uint32 i = pathtracerData.numGeoLights; + i < pathContext.numLights; i++) { const Light *uniform light = pathtracerData.lights[i]; if (!pathState.straightPath || light->isVisible) { @@ -90,11 +88,9 @@ SYCL_EXTERNAL vec3f evaluateVirtualLights(const PathContext &pathContext, float T = 1.f; #ifdef OSPRAY_ENABLE_VOLUMES if (ff.volume & FFV_VOLUME) { - T = volumeTransmittance(pathState, - pathContext.world, + T = volumeTransmittance(pathContext.world, shadowRay, rayIntervals, - pathContext.ldSampler, pathContext.randomSampler); } #endif diff --git a/modules/cpu/render/pathtracer/volumes/HenyeyGreenstein.ih b/modules/cpu/render/pathtracer/volumes/HenyeyGreenstein.ih index c0163f947..705d27744 100644 --- a/modules/cpu/render/pathtracer/volumes/HenyeyGreenstein.ih +++ b/modules/cpu/render/pathtracer/volumes/HenyeyGreenstein.ih @@ -39,7 +39,7 @@ inline Scattering_EvalRes HenyeyGreenstein_eval( } inline Scattering_SampleRes HenyeyGreenstein_sample( - float anisotropy, const vec3f wo, const vec2f s, float /*ss*/) + float anisotropy, const vec3f wo, const vec2f s, float) { Scattering_SampleRes res; diff --git a/modules/cpu/render/pathtracer/volumes/VolumeSampler.ih b/modules/cpu/render/pathtracer/volumes/VolumeSampler.ih index e393fcd94..7e9d15266 100644 --- a/modules/cpu/render/pathtracer/volumes/VolumeSampler.ih +++ b/modules/cpu/render/pathtracer/volumes/VolumeSampler.ih @@ -16,21 +16,17 @@ struct RandomSampler; struct Ray; struct RayIntervals; -SYCL_EXTERNAL float volumeSampleFreePath(const PathState &pathState, - const World *uniform world, +SYCL_EXTERNAL float volumeSampleFreePath(const World *uniform world, Ray &ray, RayIntervals &rayIntervals, - varying LDSampler *uniform ldSampler, varying RandomSampler *uniform randomSampler, const VolumetricModel *varying *uniform sampledInstance, float &sampledExtinctionCoefficient, vec3f &sampledAlbedo); -SYCL_EXTERNAL float volumeTransmittance(const PathState &pathState, - const World *uniform world, +SYCL_EXTERNAL float volumeTransmittance(const World *uniform world, Ray &ray, RayIntervals &rayIntervals, - varying LDSampler *uniform ldSampler, varying RandomSampler *uniform randomSampler); inline bool isSmoothVolumeVertex(const PathVertex &pathVertex) diff --git a/modules/cpu/render/pathtracer/volumes/VolumeSampler.ispc b/modules/cpu/render/pathtracer/volumes/VolumeSampler.ispc index 72bd0f7cf..b68a6e11b 100644 --- a/modules/cpu/render/pathtracer/volumes/VolumeSampler.ispc +++ b/modules/cpu/render/pathtracer/volumes/VolumeSampler.ispc @@ -50,8 +50,6 @@ float delta_tracking(const VolumetricModel *uniform vModel, float t = 0.f; VKLInterval interval; while (vklIterateIntervalV(intervalIterator, &interval)) { - box1f subInterval = - make_box1f(interval.tRange.lower, interval.tRange.upper); t = interval.tRange.lower; const float maxOpacity = @@ -94,11 +92,9 @@ float delta_tracking(const VolumetricModel *uniform vModel, return inf; } -SYCL_EXTERNAL float volumeSampleFreePath(const PathState &pathState, - const World *uniform world, +SYCL_EXTERNAL float volumeSampleFreePath(const World *uniform world, Ray &ray, RayIntervals &rayIntervals, - varying LDSampler *uniform ldSampler, varying RandomSampler *uniform randomSampler, const VolumetricModel *varying *uniform sampledInstance, float &sampledExtinctionCoefficient, @@ -126,7 +122,7 @@ SYCL_EXTERNAL float volumeSampleFreePath(const PathState &pathState, { *sampledInstance = NULL; } - for (uniform int i = 0; i < reduce_max(volumeIntervals.numVolumeIntervals); + for (uniform uint32 i = 0; i < reduce_max(volumeIntervals.numVolumeIntervals); i++) { if (i >= volumeIntervals.numVolumeIntervals) break; @@ -159,7 +155,7 @@ SYCL_EXTERNAL float volumeSampleFreePath(const PathState &pathState, #else foreach_unique (inst in instance) { #endif - for (int j = 0; j < rayIntervals.count; j++) { + for (uint32 j = 0; j < rayIntervals.count; j++) { range1f rInterval = rayIntervals.intervals[j]; // Skip ray intervals outside of the volume range @@ -204,11 +200,9 @@ SYCL_EXTERNAL float volumeSampleFreePath(const PathState &pathState, return min_dist; } -SYCL_EXTERNAL float volumeTransmittance(const PathState &pathState, - const World *uniform world, +SYCL_EXTERNAL float volumeTransmittance(const World *uniform world, Ray &ray, RayIntervals &rayIntervals, - varying LDSampler *uniform ldSampler, varying RandomSampler *uniform randomSampler) { #ifdef OSPRAY_TARGET_SYCL @@ -229,7 +223,8 @@ SYCL_EXTERNAL float volumeTransmittance(const PathState &pathState, // Iterate through volume intervals float transmittance = 1.f; - for (int i = 0; i < volumeIntervals.numVolumeIntervals && transmittance > 0.f; + for (uint32 i = 0; + i < volumeIntervals.numVolumeIntervals && transmittance > 0.f; i++) { range1f vInterval = volumeIntervals.intervals[i].interval; vInterval.lower = max(ray.t0, vInterval.lower); @@ -259,7 +254,7 @@ SYCL_EXTERNAL float volumeTransmittance(const PathState &pathState, #else foreach_unique (inst in instance) { #endif - for (int j = 0; j < rayIntervals.count && transmittance > 0.f; j++) { + for (uint32 j = 0; j < rayIntervals.count && transmittance > 0.f; j++) { range1f rInterval = rayIntervals.intervals[j]; // Skip ray intervals outside of the volume range diff --git a/modules/cpu/render/scivis/SciVis.cpp b/modules/cpu/render/scivis/SciVis.cpp index af7c9139c..133a4fb2c 100644 --- a/modules/cpu/render/scivis/SciVis.cpp +++ b/modules/cpu/render/scivis/SciVis.cpp @@ -17,7 +17,6 @@ SYCL_EXTERNAL void SciVis_renderTask(Renderer *uniform self, FrameBuffer *uniform fb, Camera *uniform camera, World *uniform world, - void *uniform perFrameData, const uint32 *uniform taskIDs, const int taskIndex0, const uniform ospray::FeatureFlags &ff); @@ -67,7 +66,7 @@ void *SciVis::beginFrame(FrameBuffer *, World *world) void SciVis::renderTasks(FrameBuffer *fb, Camera *camera, World *world, - void *perFrameData, + void *, const utility::ArrayView &taskIDs #ifdef OSPRAY_TARGET_SYCL , @@ -100,7 +99,6 @@ void SciVis::renderTasks(FrameBuffer *fb, fbSh, cameraSh, worldSh, - perFrameData, taskIDsPtr, taskIndex.get_global_id(0), ff); @@ -111,13 +109,8 @@ void SciVis::renderTasks(FrameBuffer *fb, // For prints we have to flush the entire queue, because other stuff is queued syclQueue.wait_and_throw(); #else - ispc::SciVis_renderTasks(&rendererSh->super, - fbSh, - cameraSh, - worldSh, - perFrameData, - taskIDs.data(), - numTasks); + ispc::SciVis_renderTasks( + &rendererSh->super, fbSh, cameraSh, worldSh, taskIDs.data(), numTasks); #endif } diff --git a/modules/cpu/render/scivis/SciVis.ih b/modules/cpu/render/scivis/SciVis.ih index 16fe08848..3ed0371e9 100644 --- a/modules/cpu/render/scivis/SciVis.ih +++ b/modules/cpu/render/scivis/SciVis.ih @@ -27,19 +27,14 @@ struct SciVisRenderContext SYCL_EXTERNAL vec3f lightAlpha(const uniform SciVis *uniform self, Ray &ray, - const FrameBuffer *uniform fb, const World *uniform world, - ScreenSample &sample, varying LDSampler *uniform ldSampler, vec3f weight, - float rayOffset, uniform float quality, const uniform FeatureFlags &ff); SYCL_EXTERNAL vec3f SciVis_computeAO(const uniform SciVis *uniform self, - const FrameBuffer *uniform fb, const World *uniform world, - ScreenSample &sample, varying LDSampler *uniform ldSampler, const varying DifferentialGeometry &dg, const uniform int sampleCnt, diff --git a/modules/cpu/render/scivis/SciVis.ispc b/modules/cpu/render/scivis/SciVis.ispc index 76134b268..fee05db76 100644 --- a/modules/cpu/render/scivis/SciVis.ispc +++ b/modules/cpu/render/scivis/SciVis.ispc @@ -22,7 +22,6 @@ OSPRAY_BEGIN_ISPC_NAMESPACE static void SciVis_renderSample(Renderer *uniform _self, FrameBuffer *uniform fb, World *uniform world, - void *uniform perFrameData, varying ScreenSample &sample, const uniform FeatureFlags &ff) { @@ -65,7 +64,6 @@ static void SciVis_renderSample(Renderer *uniform _self, // Iterate over all translucent geometry till we are fully opaque vec3f outColor = make_vec3f(0.f); vec3f outTransmission = make_vec3f(1.f); - int minVolIndex = 0; // interval index of closest volume while (true) { // Then trace normal geometry using calculated ray intervals, // if hit ray.t will be updated @@ -155,7 +153,7 @@ static void SciVis_renderSample(Renderer *uniform _self, // Shade geometry SSI surfaceShading; surfaceShading = SciVis_computeShading( - self, fb, world, dg, sample, ldSampler, ray.dir, ff); + self, world, dg, sample, ldSampler, ray.dir, ff); // Initialize other per sample data with first hit values // In addition to considering the first hit, all previous, fully @@ -228,9 +226,7 @@ static void SciVis_renderSample(Renderer *uniform _self, } SYCL_EXTERNAL vec3f SciVis_computeAO(const uniform SciVis *uniform self, - const FrameBuffer *uniform fb, const World *uniform world, - ScreenSample &sample, varying LDSampler *uniform ldSampler, const varying DifferentialGeometry &dg, const uniform int sampleCnt, @@ -269,16 +265,8 @@ SYCL_EXTERNAL vec3f SciVis_computeAO(const uniform SciVis *uniform self, hits = hits + (1.f - - lightAlpha(self, - ao_ray, - fb, - world, - sample, - ldSampler, - make_vec3f(1.f), - dg.epsilon, - 0.1f, - ff)); + - lightAlpha( + self, ao_ray, world, ldSampler, make_vec3f(1.f), 0.1f, ff)); } // the cosTheta of cosineSampleHemispherePDF and dot(shadingNormal, ao_dir) @@ -295,20 +283,17 @@ SYCL_EXTERNAL void SciVis_renderTask(Renderer *uniform self, FrameBuffer *uniform fb, Camera *uniform camera, World *uniform world, - void *uniform perFrameData, const uint32 *uniform taskIDs, const int taskIndex0, const uniform FeatureFlags &ff) { - Renderer_default_renderTask( - self, fb, camera, world, perFrameData, taskIDs, taskIndex0, ff); + Renderer_default_renderTask(self, fb, camera, world, taskIDs, taskIndex0, ff); } #else export void SciVis_renderTasks(void *uniform _self, void *uniform _fb, void *uniform _camera, void *uniform _world, - void *uniform perFrameData, void *uniform _taskIDs, uniform uint32 numTasks) { @@ -318,7 +303,7 @@ export void SciVis_renderTasks(void *uniform _self, World *uniform world = (World * uniform) _world; const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; launch[numTasks] Renderer_default_renderTask( - self, fb, camera, world, perFrameData, taskIDs, ffAll()); + self, fb, camera, world, taskIDs, ffAll()); sync; } #endif diff --git a/modules/cpu/render/scivis/lightAlpha.ispc b/modules/cpu/render/scivis/lightAlpha.ispc index 8b13c475f..abd1b2a10 100644 --- a/modules/cpu/render/scivis/lightAlpha.ispc +++ b/modules/cpu/render/scivis/lightAlpha.ispc @@ -17,12 +17,9 @@ OSPRAY_BEGIN_ISPC_NAMESPACE // Lighting functions // SYCL_EXTERNAL vec3f lightAlpha(const uniform SciVis *uniform self, Ray &ray, - const FrameBuffer *uniform fb, const World *uniform world, - ScreenSample &sample, varying LDSampler *uniform ldSampler, vec3f weight, - float rayOffset, uniform float quality, const uniform FeatureFlags &ff) { @@ -66,8 +63,7 @@ SYCL_EXTERNAL vec3f lightAlpha(const uniform SciVis *uniform self, // Sample volumes across volume intervals (in front of geometry hit) if (volumeIntervals.numVolumeIntervals > 0) { - vec4f volumeColor = integrateVolumeIntervals( - volumeIntervals, + vec4f volumeColor = integrateVolumeIntervals(volumeIntervals, rayIntervals, volumeRay, ldSampler, diff --git a/modules/cpu/render/scivis/surfaces.ih b/modules/cpu/render/scivis/surfaces.ih index 26adebf43..6809a8a25 100644 --- a/modules/cpu/render/scivis/surfaces.ih +++ b/modules/cpu/render/scivis/surfaces.ih @@ -22,7 +22,6 @@ struct SurfaceShadingInfo typedef SurfaceShadingInfo SSI; SYCL_EXTERNAL SSI SciVis_computeShading(const SciVis *uniform self, - const FrameBuffer *uniform fb, const World *uniform world, const DifferentialGeometry &dg, ScreenSample &sample, diff --git a/modules/cpu/render/scivis/surfaces.ispc b/modules/cpu/render/scivis/surfaces.ispc index 6953d14d6..e5e863b79 100644 --- a/modules/cpu/render/scivis/surfaces.ispc +++ b/modules/cpu/render/scivis/surfaces.ispc @@ -14,7 +14,6 @@ OSPRAY_BEGIN_ISPC_NAMESPACE vec3f directIllumination(const uniform SciVis *uniform self, - const FrameBuffer *uniform fb, const World *uniform world, const varying DifferentialGeometry &dg, ScreenSample &sample, @@ -31,7 +30,7 @@ vec3f directIllumination(const uniform SciVis *uniform self, return color; // calculate shading for all lights - for (uniform int i = scivisData.numLightsVisibleOnly; + for (uniform uint32 i = scivisData.numLightsVisibleOnly; i < scivisData.numLights; i++) { const Light *uniform l = scivisData.lights[i]; @@ -60,16 +59,8 @@ vec3f directIllumination(const uniform SciVis *uniform self, Ray shadowRay; setRay(shadowRay, P, light.dir, 0.0f, light.dist); - vec3f light_alpha = lightAlpha(self, - shadowRay, - fb, - world, - sample, - ldSampler, - light_contrib, - dg.epsilon, - 0.25f, - ff); + vec3f light_alpha = lightAlpha( + self, shadowRay, world, ldSampler, light_contrib, 0.25f, ff); color = color + light_alpha * light_contrib; } @@ -83,7 +74,6 @@ vec3f directIllumination(const uniform SciVis *uniform self, } SYCL_EXTERNAL SSI SciVis_computeShading(const SciVis *uniform self, - const FrameBuffer *uniform fb, const World *uniform world, const DifferentialGeometry &dg, ScreenSample &sample, @@ -96,8 +86,8 @@ SYCL_EXTERNAL SSI SciVis_computeShading(const SciVis *uniform self, const SciVisBSDF bsdf = evalMaterial(dg, ff.other); retval.albedo = bsdf.albedo; - vec3f color = directIllumination( - self, fb, world, dg, sample, ldSampler, bsdf, inDir, ff); + vec3f color = + directIllumination(self, world, dg, sample, ldSampler, bsdf, inDir, ff); vec3f ao = make_vec3f(1.f); const uniform SciVisData &scivisData = @@ -105,9 +95,7 @@ SYCL_EXTERNAL SSI SciVis_computeShading(const SciVis *uniform self, if (self->aoSamples > 0 && luminance(scivisData.aoColorPi) > self->super.minContribution) ao = SciVis_computeAO(self, - fb, world, - sample, ldSampler, dg, self->aoSamples, @@ -133,7 +121,7 @@ SYCL_EXTERNAL vec3f evaluateLights(const World *uniform world, vec3f color = make_vec3f(0.f); const uniform SciVisData &scivisData = *((const uniform SciVisData *)world->scivisData); - for (uniform int i = 0; i < scivisData.numLights; i++) { + for (uniform uint32 i = 0; i < scivisData.numLights; i++) { // Skip lights with disabled visibility via parameter const Light *uniform l = scivisData.lights[i]; if (!l->isVisible) diff --git a/modules/cpu/render/scivis/volumes.ispc b/modules/cpu/render/scivis/volumes.ispc index 226b8c7f0..abe79a5b2 100644 --- a/modules/cpu/render/scivis/volumes.ispc +++ b/modules/cpu/render/scivis/volumes.ispc @@ -38,7 +38,7 @@ static void sampleVolume(SciVisRenderContext &rc, const uniform FeatureFlags &ff) { // We have to iterate till we get a valid sample value - float dt; + float dt = 0.f; float sampleVal = nan; vec3f p; // in volume local coords while (isnan(sampleVal)) { @@ -116,14 +116,8 @@ static void sampleVolume(SciVisRenderContext &rc, dg.Ns = dg.Ng = normalize(xfmVector(transposed(vi.instance->rcp_xfm.l), ns)); dg.P = ray.org + vc.distance * ray.dir; - SSI shading = SciVis_computeShading(rc.renderer, - rc.fb, - rc.world, - dg, - rc.sample, - rc.ldSampler, - ray.dir, - ff); + SSI shading = SciVis_computeShading( + rc.renderer, rc.world, dg, rc.sample, rc.ldSampler, ray.dir, ff); vec4f shadedColor = make_vec4f( shading.shadedColor, 1.f - luminance(shading.transmission)); vc.sample = lerp(m->gradientShadingScale, vc.sample, shadedColor); @@ -145,7 +139,7 @@ static float sampleAllVolumes(SciVisRenderContext &rc, // Look for the closest sample across all volumes float minDist = inf; int usedSampleId = -1; - for (uniform int i = 0; i < reduce_max(volumeIntervals.numVolumeIntervals); + for (uniform uint32 i = 0; i < reduce_max(volumeIntervals.numVolumeIntervals); i++) { if (i >= volumeIntervals.numVolumeIntervals) break; @@ -214,7 +208,7 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervalsGradient(SciVisRenderContext &rc, // Iterate through all volumes and initialize its contexts with data that // do not change across ray intervals - for (uniform int i = 0; i < reduce_max(volumeIntervals.numVolumeIntervals); + for (uniform uint32 i = 0; i < reduce_max(volumeIntervals.numVolumeIntervals); i++) { if (i >= volumeIntervals.numVolumeIntervals) break; @@ -236,14 +230,15 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervalsGradient(SciVisRenderContext &rc, float transmission = 1.f; // Iterate through all ray intervals - for (uniform int i = 0; + for (uniform uint32 i = 0; i < reduce_max(rayIntervals.count) && (transmission > 0.f); i++) { if (i >= rayIntervals.count) break; // Iterate through all volumes - for (uniform int j = 0; j < reduce_max(volumeIntervals.numVolumeIntervals); + for (uniform uint32 j = 0; + j < reduce_max(volumeIntervals.numVolumeIntervals); j++) { if (j >= volumeIntervals.numVolumeIntervals) break; diff --git a/modules/cpu/render/shaders/Flakes.ih b/modules/cpu/render/shaders/Flakes.ih index 30724933d..c3f553fff 100644 --- a/modules/cpu/render/shaders/Flakes.ih +++ b/modules/cpu/render/shaders/Flakes.ih @@ -3,8 +3,8 @@ #pragma once -#include "render/bsdfs/BeckmannDistribution.ih" #include "Noise.ih" +#include "render/bsdfs/BeckmannDistribution.ih" OSPRAY_BEGIN_ISPC_NAMESPACE @@ -33,7 +33,6 @@ inline vec3f Flakes_eval(const Flakes &self, vec3f P, int &mask) // Pictures", pp. 255-261] const vec3f thisCell = floor(P) + 0.5f; float f1 = 1000.f; - vec3f cellPos; unsigned int cellRnd; for (int i = -1; i <= 1; i++) { diff --git a/modules/cpu/texture/Texture2D.cpp b/modules/cpu/texture/Texture2D.cpp index 89147709d..63b114ad8 100644 --- a/modules/cpu/texture/Texture2D.cpp +++ b/modules/cpu/texture/Texture2D.cpp @@ -3,7 +3,6 @@ #include "Texture2D.h" #ifndef OSPRAY_TARGET_SYCL -#include "common/OSPCommon_ispc.h" #include "texture/Texture2D_ispc.h" #endif diff --git a/modules/cpu/texture/Texture2D.ispc b/modules/cpu/texture/Texture2D.ispc index 0262dbea7..661e8c994 100644 --- a/modules/cpu/texture/Texture2D.ispc +++ b/modules/cpu/texture/Texture2D.ispc @@ -300,7 +300,7 @@ SYCL_EXTERNAL vec4f Texture2D_get( } static vec3f Texture2D_Normal_neutral( - const Texture2D *uniform self, const DifferentialGeometry &) + const Texture2D *uniform, const DifferentialGeometry &) { return make_vec3f(0.f, 0.f, 1.f); } diff --git a/modules/cpu/texture/TextureVolume.ispc b/modules/cpu/texture/TextureVolume.ispc index 4b1a3fa86..30491cb66 100644 --- a/modules/cpu/texture/TextureVolume.ispc +++ b/modules/cpu/texture/TextureVolume.ispc @@ -26,7 +26,7 @@ SYCL_EXTERNAL vec4f TextureVolume_get( } SYCL_EXTERNAL vec3f TextureVolume_getNormal( - const Texture *uniform self, const varying DifferentialGeometry &dg) + const Texture *uniform, const varying DifferentialGeometry &) { // TODO return make_vec3f(0.f, 0.0f, 1.0f); diff --git a/modules/cpu/volume/VolumetricModel.ih b/modules/cpu/volume/VolumetricModel.ih index a830899ed..661e6858b 100644 --- a/modules/cpu/volume/VolumetricModel.ih +++ b/modules/cpu/volume/VolumetricModel.ih @@ -16,7 +16,7 @@ OSPRAY_BEGIN_ISPC_NAMESPACE inline void VolumetricModel_postIntersect(const VolumetricModel *uniform self, varying DifferentialGeometry &dg, const varying Ray &ray, - uniform int64 flags) + uniform int64) { dg.Ng = make_vec3f(0.f); dg.Ns = make_vec3f(0.f); From 811e261853793e5ef621a4e1d69adb4cb34c0d0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCnther?= Date: Fri, 17 Mar 2023 10:41:19 +0100 Subject: [PATCH 12/42] Roulette to use random sampler --- .../cpu/render/pathtracer/NextEventEstimation.ispc | 4 ++-- modules/cpu/render/pathtracer/PathSampler.ispc | 3 +-- modules/cpu/render/pathtracer/PathTracer.ispc | 5 ++--- scripts/tests/update_test_baseline.sh | 11 ++++++++--- ...amples_TestScenesLightSamples_testScenes_0.png.md5 | 2 +- ...ayTesting_TestScenesClipping_test_scenes_7.png.md5 | 2 +- ...yTesting_TestScenesGeometry_test_scenes_19.png.md5 | 2 +- ...ayTesting_TestScenesGeometry_test_scenes_4.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_1.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_11.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_13.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_15.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_17.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_19.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_21.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_23.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_3.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_5.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_7.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_9.png.md5 | 2 +- ...tructuredVolume_TestScenesVolumes_simple_3.png.md5 | 2 +- ...amples_TestScenesLightSamples_testScenes_0.png.md5 | 2 +- ...ayTesting_TestScenesClipping_test_scenes_7.png.md5 | 2 +- ...yTesting_TestScenesGeometry_test_scenes_19.png.md5 | 2 +- ...ayTesting_TestScenesGeometry_test_scenes_4.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_1.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_11.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_13.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_15.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_17.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_19.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_21.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_23.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_3.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_5.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_7.png.md5 | 2 +- .../SpherePrecision_Intersection_sphere_9.png.md5 | 2 +- ...tructuredVolume_TestScenesVolumes_simple_3.png.md5 | 2 +- 38 files changed, 47 insertions(+), 44 deletions(-) diff --git a/modules/cpu/render/pathtracer/NextEventEstimation.ispc b/modules/cpu/render/pathtracer/NextEventEstimation.ispc index 25e070b38..956595211 100644 --- a/modules/cpu/render/pathtracer/NextEventEstimation.ispc +++ b/modules/cpu/render/pathtracer/NextEventEstimation.ispc @@ -35,7 +35,7 @@ SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, for (uniform uint32 i = 0; i < pathContext.numLightSamples; i++) { // select a random light source from the list const float s1 = LDSampler_getFloat( - pathContext.ldSampler, pathState.sampleDim + 4 + i * 3); + pathContext.ldSampler, pathState.sampleDim + 3 + i * 3); const Sample1D lSelectSample = Distribution1D_sample( pathContext.numLights, pathContext.lightsCDF, 0, s1); const float lightSelectionProb = @@ -43,7 +43,7 @@ SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, const Light *light = pathtracerData.lights[lSelectSample.idx]; // sample the contribution from the random light source const vec2f s2 = LDSampler_getFloat2( - pathContext.ldSampler, pathState.sampleDim + 4 + i * 3 + 1); + pathContext.ldSampler, pathState.sampleDim + 3 + i * 3 + 1); Light_SampleRes ls; #ifdef OSPRAY_TARGET_SYCL { diff --git a/modules/cpu/render/pathtracer/PathSampler.ispc b/modules/cpu/render/pathtracer/PathSampler.ispc index 656849248..96c4cc489 100644 --- a/modules/cpu/render/pathtracer/PathSampler.ispc +++ b/modules/cpu/render/pathtracer/PathSampler.ispc @@ -300,8 +300,7 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, #if ROULETTE == 1 // Russian roulette if (pathState.depth >= pathContext.context->rouletteDepth) { - const float rr = - LDSampler_getFloat(pathContext.ldSampler, pathState.sampleDim + 3); + const float rr = RandomSampler_getFloat(pathContext.randomSampler); const float contProb = min(luminance(pathState.throughput), MAX_ROULETTE_CONT_PROB); if (rr > contProb) { diff --git a/modules/cpu/render/pathtracer/PathTracer.ispc b/modules/cpu/render/pathtracer/PathTracer.ispc index 19ac7356a..ff50f53e9 100644 --- a/modules/cpu/render/pathtracer/PathTracer.ispc +++ b/modules/cpu/render/pathtracer/PathTracer.ispc @@ -65,9 +65,8 @@ static ScreenSample PathTraceIntegrator_Li(const PathTracer *uniform self, pathContext.numLightSamples = numLightSamples; pathContext.lightsCDF = pathtracerData.lightsCDF; - pathContext.numBounceSampleDims = - 4 + numLightSamples * 3; // BSDF sample (3D), roulette (1D), light samples - // * (3D, light selection and sampling the light) + // BSDF sample (3D), light samples * (3D, light selection and sampling) + pathContext.numBounceSampleDims = 3 + numLightSamples * 3; PathState pathState; pathState.disableNEE = false; diff --git a/scripts/tests/update_test_baseline.sh b/scripts/tests/update_test_baseline.sh index 0ac34d740..a4b31b4b1 100755 --- a/scripts/tests/update_test_baseline.sh +++ b/scripts/tests/update_test_baseline.sh @@ -8,18 +8,23 @@ # argument 1: name of test # argument 2: directory of test result images # argument 3: path of ospray source -# argument 4: ISA (optional) +# argument 4: path to ospray-test-data +# argument 5: ISA (optional) # isas="AVX2 AVX512SKX" md5=`md5sum $2/$1.png | awk '{print $1 }'` echo $md5 -if [ -n "$4" ]; then +if [ -n "$5" ]; then # if ISA specified place reference image only in the ISA specific directories - isas=$4 + isas=$5 fi for isa in $isas do echo $md5 > $3/test_image_data/baseline/$isa/$1.png.md5 done + +cp $2/$1.png $4/MD5/$md5 +cd $4 +git add MD5/$md5 diff --git a/test_image_data/baseline/AVX2/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_0.png.md5 b/test_image_data/baseline/AVX2/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_0.png.md5 index 480b060e4..951c4391e 100644 --- a/test_image_data/baseline/AVX2/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_0.png.md5 +++ b/test_image_data/baseline/AVX2/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_0.png.md5 @@ -1 +1 @@ -0b2d35e353b50e1ea34f0d0d3857fd82 +45adc210402a9215cf0cf77cb85f3039 diff --git a/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesClipping_test_scenes_7.png.md5 b/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesClipping_test_scenes_7.png.md5 index ff9262963..6bec8ac57 100644 --- a/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesClipping_test_scenes_7.png.md5 +++ b/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesClipping_test_scenes_7.png.md5 @@ -1 +1 @@ -6a65608105a362dddc11315ec3d145b8 +1dd830d508709c52000290579fd7b3e7 diff --git a/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_19.png.md5 b/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_19.png.md5 index 08f3db08a..d16e5d086 100644 --- a/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_19.png.md5 +++ b/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_19.png.md5 @@ -1 +1 @@ -3cb1ff0b2609d037f8f54d86bb1f0a52 +93f2910c461f8d4a65578de41d888414 diff --git a/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_4.png.md5 b/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_4.png.md5 index 9a8d5ef31..64c8f689c 100644 --- a/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_4.png.md5 +++ b/test_image_data/baseline/AVX2/FromOsprayTesting_TestScenesGeometry_test_scenes_4.png.md5 @@ -1 +1 @@ -4ddc20dbabed07c6d36429e3bdcd304b +449624037e1ef5de3c21708ab9f5adde diff --git a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_1.png.md5 b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_1.png.md5 index 19740e317..0661244a6 100644 --- a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_1.png.md5 +++ b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_1.png.md5 @@ -1 +1 @@ -59ae69278779970220956357b4d48928 +6ef1519f3e152fa90fab6f331563a8ca diff --git a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_11.png.md5 b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_11.png.md5 index 9c348fa58..aaa4e98e4 100644 --- a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_11.png.md5 +++ b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_11.png.md5 @@ -1 +1 @@ -c1fb30725a952d69ad7cb602e09d26c9 +2b81f2fc2ac6de5c73f65561e17c057c diff --git a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_13.png.md5 b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_13.png.md5 index 66a4933c8..52473fc4a 100644 --- a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_13.png.md5 +++ b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_13.png.md5 @@ -1 +1 @@ -92029585d2bae69943a3db59e2f7548f +4594ca2ff268ef351a3a59d7e0ea1886 diff --git a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_15.png.md5 b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_15.png.md5 index ecc457354..821ad293a 100644 --- a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_15.png.md5 +++ b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_15.png.md5 @@ -1 +1 @@ -e7e3152b6645c3f647311abf94baf9ed +bc09ff74ff3f256e1f7dabbb9d8c181f diff --git a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_17.png.md5 b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_17.png.md5 index 1d37af0a3..da1d763cf 100644 --- a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_17.png.md5 +++ b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_17.png.md5 @@ -1 +1 @@ -fcb6d59b6ccc4e210eb14c3c17aab07d +ba802dd2427997b1fa27db1c1fb25b00 diff --git a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_19.png.md5 b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_19.png.md5 index 4f0f9fa97..ec5886255 100644 --- a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_19.png.md5 +++ b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_19.png.md5 @@ -1 +1 @@ -538102c806266f3587dfdc8d9bb9d3c6 +ce97d3ecd5fdcaf71ef7dc08d13980a7 diff --git a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_21.png.md5 b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_21.png.md5 index d95161e98..a69742092 100644 --- a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_21.png.md5 +++ b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_21.png.md5 @@ -1 +1 @@ -234d3594355b6ff3195063f78b6dcd87 +9494d5f0c1af83c8274371e13fdb1430 diff --git a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_23.png.md5 b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_23.png.md5 index 794888ed9..e7036da21 100644 --- a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_23.png.md5 +++ b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_23.png.md5 @@ -1 +1 @@ -14e9d7461d461780d9b77b5952d9eb25 +6a9cd39cb0d33b09cd07134a8d21730c diff --git a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_3.png.md5 b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_3.png.md5 index 03f865a5c..bac1fa7f7 100644 --- a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_3.png.md5 +++ b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_3.png.md5 @@ -1 +1 @@ -0270b2ceaa3f10a85034611eb44c4b52 +98acdf8a7d6e42921685f8ca979ffca9 diff --git a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_5.png.md5 b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_5.png.md5 index 6378edc00..216e7a3f0 100644 --- a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_5.png.md5 +++ b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_5.png.md5 @@ -1 +1 @@ -82775633f1e0b7c855efc84bda8e3bd5 +c265eea80094b81827992c46dfca9b40 diff --git a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_7.png.md5 b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_7.png.md5 index 2a4832ca0..4e5082a49 100644 --- a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_7.png.md5 +++ b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_7.png.md5 @@ -1 +1 @@ -077608adf78b5ade5ea7b106dc277490 +805160e724a3a845e232ad47f43c7f7b diff --git a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_9.png.md5 b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_9.png.md5 index f90cfce0f..f591446a9 100644 --- a/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_9.png.md5 +++ b/test_image_data/baseline/AVX2/SpherePrecision_Intersection_sphere_9.png.md5 @@ -1 +1 @@ -ae5d7de975a0ec9d06712f92e686e61d +c2891de2526c66bc9dc2a0839698e873 diff --git a/test_image_data/baseline/AVX2/UnstructuredVolume_TestScenesVolumes_simple_3.png.md5 b/test_image_data/baseline/AVX2/UnstructuredVolume_TestScenesVolumes_simple_3.png.md5 index adcd95245..74c45bf05 100644 --- a/test_image_data/baseline/AVX2/UnstructuredVolume_TestScenesVolumes_simple_3.png.md5 +++ b/test_image_data/baseline/AVX2/UnstructuredVolume_TestScenesVolumes_simple_3.png.md5 @@ -1 +1 @@ -45a089f2090d82d165b36739a62fd034 +45f9c3bb677494082f97375c96bfef35 diff --git a/test_image_data/baseline/AVX512SKX/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_0.png.md5 b/test_image_data/baseline/AVX512SKX/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_0.png.md5 index 480b060e4..951c4391e 100644 --- a/test_image_data/baseline/AVX512SKX/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_0.png.md5 +++ b/test_image_data/baseline/AVX512SKX/FromOsprayTestingLightSamples_TestScenesLightSamples_testScenes_0.png.md5 @@ -1 +1 @@ -0b2d35e353b50e1ea34f0d0d3857fd82 +45adc210402a9215cf0cf77cb85f3039 diff --git a/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesClipping_test_scenes_7.png.md5 b/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesClipping_test_scenes_7.png.md5 index ff9262963..6bec8ac57 100644 --- a/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesClipping_test_scenes_7.png.md5 +++ b/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesClipping_test_scenes_7.png.md5 @@ -1 +1 @@ -6a65608105a362dddc11315ec3d145b8 +1dd830d508709c52000290579fd7b3e7 diff --git a/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_19.png.md5 b/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_19.png.md5 index 08f3db08a..d16e5d086 100644 --- a/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_19.png.md5 +++ b/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_19.png.md5 @@ -1 +1 @@ -3cb1ff0b2609d037f8f54d86bb1f0a52 +93f2910c461f8d4a65578de41d888414 diff --git a/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_4.png.md5 b/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_4.png.md5 index 9a8d5ef31..64c8f689c 100644 --- a/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_4.png.md5 +++ b/test_image_data/baseline/AVX512SKX/FromOsprayTesting_TestScenesGeometry_test_scenes_4.png.md5 @@ -1 +1 @@ -4ddc20dbabed07c6d36429e3bdcd304b +449624037e1ef5de3c21708ab9f5adde diff --git a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_1.png.md5 b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_1.png.md5 index 19740e317..0661244a6 100644 --- a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_1.png.md5 +++ b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_1.png.md5 @@ -1 +1 @@ -59ae69278779970220956357b4d48928 +6ef1519f3e152fa90fab6f331563a8ca diff --git a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_11.png.md5 b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_11.png.md5 index 644b994cc..8d8e54732 100644 --- a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_11.png.md5 +++ b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_11.png.md5 @@ -1 +1 @@ -1f4595a75880219c8b27ba29e4d236d8 +09c211f89d2220d98b94742fccb37a14 diff --git a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_13.png.md5 b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_13.png.md5 index 66a4933c8..52473fc4a 100644 --- a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_13.png.md5 +++ b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_13.png.md5 @@ -1 +1 @@ -92029585d2bae69943a3db59e2f7548f +4594ca2ff268ef351a3a59d7e0ea1886 diff --git a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_15.png.md5 b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_15.png.md5 index ecc457354..821ad293a 100644 --- a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_15.png.md5 +++ b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_15.png.md5 @@ -1 +1 @@ -e7e3152b6645c3f647311abf94baf9ed +bc09ff74ff3f256e1f7dabbb9d8c181f diff --git a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_17.png.md5 b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_17.png.md5 index 925b033f4..da1d763cf 100644 --- a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_17.png.md5 +++ b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_17.png.md5 @@ -1 +1 @@ -95e01209e2d3cc2574a6e15fceffeaf2 +ba802dd2427997b1fa27db1c1fb25b00 diff --git a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_19.png.md5 b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_19.png.md5 index 578b102b0..ec5886255 100644 --- a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_19.png.md5 +++ b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_19.png.md5 @@ -1 +1 @@ -7e54fe555a829599a3848a01ae69e07f +ce97d3ecd5fdcaf71ef7dc08d13980a7 diff --git a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_21.png.md5 b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_21.png.md5 index b3c1bab6e..166c71ee2 100644 --- a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_21.png.md5 +++ b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_21.png.md5 @@ -1 +1 @@ -c56940102804b5e8dd248872fd7bfa5e +df52f755ff1538b0762908a0d4c31fbe diff --git a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_23.png.md5 b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_23.png.md5 index 60a95e0df..3d75db0fc 100644 --- a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_23.png.md5 +++ b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_23.png.md5 @@ -1 +1 @@ -36eb895065f583a2c833842e2b40803a +f698ef0dad8597604c7d3181a4b35b27 diff --git a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_3.png.md5 b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_3.png.md5 index 03f865a5c..bac1fa7f7 100644 --- a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_3.png.md5 +++ b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_3.png.md5 @@ -1 +1 @@ -0270b2ceaa3f10a85034611eb44c4b52 +98acdf8a7d6e42921685f8ca979ffca9 diff --git a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_5.png.md5 b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_5.png.md5 index bb81a8d10..216e7a3f0 100644 --- a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_5.png.md5 +++ b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_5.png.md5 @@ -1 +1 @@ -0dac99cc1302cff93e1de8ec9a50d29a +c265eea80094b81827992c46dfca9b40 diff --git a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_7.png.md5 b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_7.png.md5 index da0db28bc..4e5082a49 100644 --- a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_7.png.md5 +++ b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_7.png.md5 @@ -1 +1 @@ -d8ad6c7f13b84a0ed838c9c7a4cb6019 +805160e724a3a845e232ad47f43c7f7b diff --git a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_9.png.md5 b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_9.png.md5 index 7216a73db..d296dd734 100644 --- a/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_9.png.md5 +++ b/test_image_data/baseline/AVX512SKX/SpherePrecision_Intersection_sphere_9.png.md5 @@ -1 +1 @@ -4fdf7a328dc4c24b3fea6b2a5ab63e91 +f1cd4a263206986e534d5151958b90af diff --git a/test_image_data/baseline/AVX512SKX/UnstructuredVolume_TestScenesVolumes_simple_3.png.md5 b/test_image_data/baseline/AVX512SKX/UnstructuredVolume_TestScenesVolumes_simple_3.png.md5 index adcd95245..74c45bf05 100644 --- a/test_image_data/baseline/AVX512SKX/UnstructuredVolume_TestScenesVolumes_simple_3.png.md5 +++ b/test_image_data/baseline/AVX512SKX/UnstructuredVolume_TestScenesVolumes_simple_3.png.md5 @@ -1 +1 @@ -45a089f2090d82d165b36739a62fd034 +45f9c3bb677494082f97375c96bfef35 From 2a4b20a801af5969f2c9f60b1e44966dbc7cecf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCnther?= Date: Fri, 17 Mar 2023 14:58:12 +0100 Subject: [PATCH 13/42] Fix transparent shadow --- modules/cpu/render/pathtracer/PathTracer.ispc | 2 +- modules/cpu/render/pathtracer/TransparentShadow.ispc | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/modules/cpu/render/pathtracer/PathTracer.ispc b/modules/cpu/render/pathtracer/PathTracer.ispc index ff50f53e9..b4a8b2cd7 100644 --- a/modules/cpu/render/pathtracer/PathTracer.ispc +++ b/modules/cpu/render/pathtracer/PathTracer.ispc @@ -69,7 +69,7 @@ static ScreenSample PathTraceIntegrator_Li(const PathTracer *uniform self, pathContext.numBounceSampleDims = 3 + numLightSamples * 3; PathState pathState; - pathState.disableNEE = false; + pathState.disableNEE = numLightSamples == 0; pathState.disableFWD = false; pathState.straightPath = true; // path from camera did not change direction, // for alpha and backplate diff --git a/modules/cpu/render/pathtracer/TransparentShadow.ispc b/modules/cpu/render/pathtracer/TransparentShadow.ispc index 746cc6d3b..f52241a07 100644 --- a/modules/cpu/render/pathtracer/TransparentShadow.ispc +++ b/modules/cpu/render/pathtracer/TransparentShadow.ispc @@ -64,10 +64,8 @@ SYCL_EXTERNAL vec3f transparentShadow(const uniform PathTracer *uniform self, lightContrib = lightContrib * expf(medium.attenuation * (shadowRay.t - shadowRay.t0)); - if (reduce_max(lightContrib) <= self->super.minContribution) - return lightContrib; - - if (--maxDepth <= 0) + if (reduce_max(lightContrib) <= self->super.minContribution + || --maxDepth <= 0) return make_vec3f(0.f); // Tracking medium if we hit a medium interface. From 58e0dec8f0cc1a3def9421a4055d3704c82b882c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCnther?= Date: Mon, 20 Mar 2023 13:25:40 +0100 Subject: [PATCH 14/42] Parameter to limit diffuse/glossy bounces --- CHANGELOG.md | 6 ++++++ doc/api.md | 3 +++ modules/cpu/render/pathtracer/PathSampler.ispc | 5 ++++- modules/cpu/render/pathtracer/PathStructs.ih | 11 ++++++----- modules/cpu/render/pathtracer/PathTracer.cpp | 1 + modules/cpu/render/pathtracer/PathTracer.ispc | 1 + modules/cpu/render/pathtracer/PathTracerShared.h | 2 ++ 7 files changed, 23 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7efabace..be61db139 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ Version History --------------- +### Changes in v2.12.0: + +- New parameter `maxScatteringEvents` for the `pathtracer` which + limits the number of non-specular (i.e., diffuse and glossy) bounces + + ### Changes in v2.11.0: - Support single ISPC target on Windows diff --git a/doc/api.md b/doc/api.md index 9d94893ea..fd3c0af99 100644 --- a/doc/api.md +++ b/doc/api.md @@ -2087,6 +2087,9 @@ supports the following special parameters: int roulettePathLength 5 ray recursion depth at which to start Russian roulette termination + int maxScatteringEvents 20 maximum number of non-specular + (i.e., diffuse and glossy) bounces + float maxContribution ∞ samples are clamped to this value before they are accumulated into the framebuffer diff --git a/modules/cpu/render/pathtracer/PathSampler.ispc b/modules/cpu/render/pathtracer/PathSampler.ispc index 96c4cc489..61e42a54e 100644 --- a/modules/cpu/render/pathtracer/PathSampler.ispc +++ b/modules/cpu/render/pathtracer/PathSampler.ispc @@ -249,7 +249,8 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, // terminate after evaluation of lights and before next shading to always // have both samples for MIS - if (pathState.depth >= pathContext.context->super.maxDepth) { + if (pathState.depth >= pathContext.context->super.maxDepth + || pathState.scatteringEvents >= pathContext.context->maxScatteringEvents) { break; } @@ -353,6 +354,8 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, setRay(ray, ray_org, fs.wi, pathState.time); pathState.depth++; pathState.sampleDim += pathContext.numBounceSampleDims; + if (fs.type & SCATTERING_SMOOTH) + pathState.scatteringEvents++; } while (reduce_max(pathState.throughput) > pathContext.context->super.minContribution); diff --git a/modules/cpu/render/pathtracer/PathStructs.ih b/modules/cpu/render/pathtracer/PathStructs.ih index 42cb0f12a..72dc86711 100644 --- a/modules/cpu/render/pathtracer/PathStructs.ih +++ b/modules/cpu/render/pathtracer/PathStructs.ih @@ -63,21 +63,22 @@ struct PathContext struct PathState { - bool debug; - bool disableNEE; // turn off NEE contribution for debugging purposes - bool disableFWD; // turn off FWD contribution for debugging purposes bool straightPath; // path from camera did not change direction, for alpha and // backplate bool specularTransmissionPath; // path from camera only has specular // transmissions, for alpha and backplate bool auxFree; // normal & albedo buffer were not yet written to - uniform uint32 depth; - uniform uint32 sampleDim; // skip: pixel (2D), lens (2D), time (1D) vec3f throughput; vec3f contribution; float time; Medium currentMedium; float shadowCatcherDist; + uniform uint32 sampleDim; // skip: pixel (2D), lens (2D), time (1D) + uint32 scatteringEvents; // counting diffuse and glossy bounces + uniform uint32 depth; + uniform bool disableNEE; // turn off NEE contribution for debugging purposes + uniform bool disableFWD; // turn off FWD contribution for debugging purposes + uniform bool debug; }; OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/pathtracer/PathTracer.cpp b/modules/cpu/render/pathtracer/PathTracer.cpp index b4f578422..010ce4345 100644 --- a/modules/cpu/render/pathtracer/PathTracer.cpp +++ b/modules/cpu/render/pathtracer/PathTracer.cpp @@ -46,6 +46,7 @@ void PathTracer::commit() Renderer::commit(); getSh()->rouletteDepth = getParam("roulettePathLength", 5); + getSh()->maxScatteringEvents = getParam("maxScatteringEvents", 20); getSh()->maxRadiance = getParam("maxContribution", inf); getSh()->numLightSamples = getParam("lightSamples", -1); diff --git a/modules/cpu/render/pathtracer/PathTracer.ispc b/modules/cpu/render/pathtracer/PathTracer.ispc index b4a8b2cd7..e8efb0c83 100644 --- a/modules/cpu/render/pathtracer/PathTracer.ispc +++ b/modules/cpu/render/pathtracer/PathTracer.ispc @@ -79,6 +79,7 @@ static ScreenSample PathTraceIntegrator_Li(const PathTracer *uniform self, pathState.auxFree = true; // normal & albedo buffer were not yet written to pathState.depth = 0; pathState.sampleDim = 5; // skip: pixel (2D), lens (2D), time (1D) + pathState.scatteringEvents = 0; pathState.throughput = make_vec3f(1.f); pathState.contribution = make_vec3f(0.f); pathState.time = ray.time; diff --git a/modules/cpu/render/pathtracer/PathTracerShared.h b/modules/cpu/render/pathtracer/PathTracerShared.h index 994a94f16..22b3e4aa0 100644 --- a/modules/cpu/render/pathtracer/PathTracerShared.h +++ b/modules/cpu/render/pathtracer/PathTracerShared.h @@ -14,6 +14,7 @@ struct PathTracer Renderer super; uint32 rouletteDepth; // path depth from which on RR is used + uint32 maxScatteringEvents; float maxRadiance; // coefficients of plane equation defining geometry to catch shadows for // compositing; disabled if normal is zero-length @@ -25,6 +26,7 @@ struct PathTracer #ifdef __cplusplus PathTracer() : rouletteDepth(5), + maxScatteringEvents(20), maxRadiance(inf), shadowCatcherPlane(0.f), shadowCatcher(false), From bb3e0d9cdf3f56053cb612ceb67c21c4524b3b0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCnther?= Date: Tue, 28 Mar 2023 10:10:47 +0200 Subject: [PATCH 15/42] Fix title of ospExamples --- apps/ospExamples/GLFWOSPRayWindow.cpp | 2 +- doc/api.md | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/apps/ospExamples/GLFWOSPRayWindow.cpp b/apps/ospExamples/GLFWOSPRayWindow.cpp index 806b49a7c..97052ea7d 100644 --- a/apps/ospExamples/GLFWOSPRayWindow.cpp +++ b/apps/ospExamples/GLFWOSPRayWindow.cpp @@ -143,7 +143,7 @@ GLFWOSPRayWindow::GLFWOSPRayWindow(const vec2i &windowSize, bool denoiser) glfwWindowHint(GLFW_SRGB_CAPABLE, GLFW_TRUE); // create GLFW window glfwWindow = glfwCreateWindow( - windowSize.x, windowSize.y, "OSPRay Tutorial", nullptr, nullptr); + windowSize.x, windowSize.y, "OSPRay Examples", nullptr, nullptr); if (!glfwWindow) { glfwTerminate(); diff --git a/doc/api.md b/doc/api.md index fd3c0af99..639053b25 100644 --- a/doc/api.md +++ b/doc/api.md @@ -2046,8 +2046,7 @@ ambient lights cause ambient illumination (without occlusion). This renderer supports only a subset of the features of the [SciVis renderer] to gain performance. As the name suggest its main shading -method is ambient occlusion (AO), [lights] are *not* considered at all -and , +method is ambient occlusion (AO), [lights] are *not* considered at all. Volume rendering is supported. The Ambient Occlusion renderer is created by passing the type string "`ao`" to `ospNewRenderer`. In addition to the [general From 7a924c817247cbc3c5c0d41fcac40d331122e68d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCnther?= Date: Wed, 12 Apr 2023 10:05:17 +0200 Subject: [PATCH 16/42] CI: use common env for driver version --- .github/workflows/ci.linux.gpu.yml | 104 +++++++++++++++----- .github/workflows/dpcpp-sycl-nightly.env | 1 + .github/workflows/dpcpp-sycl-public.env | 1 + .github/workflows/gfx-ubuntu22-internal.env | 1 + .github/workflows/gfx-ubuntu22.env | 1 + 5 files changed, 85 insertions(+), 23 deletions(-) create mode 100644 .github/workflows/dpcpp-sycl-nightly.env create mode 100644 .github/workflows/dpcpp-sycl-public.env create mode 100644 .github/workflows/gfx-ubuntu22-internal.env create mode 100644 .github/workflows/gfx-ubuntu22.env diff --git a/.github/workflows/ci.linux.gpu.yml b/.github/workflows/ci.linux.gpu.yml index acfa90b19..12745ac89 100644 --- a/.github/workflows/ci.linux.gpu.yml +++ b/.github/workflows/ci.linux.gpu.yml @@ -15,60 +15,118 @@ jobs: ### BUILD JOBS ### - build-ubuntu2204-DG2-JIT: + build-ubuntu2204: uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker_gpu.yml@main secrets: inherit with: force-delete: true # guarantees .gitattributes are respected in working dir - gfx-driver-version: gfx-driver-builds/ci/comp_igc/gfx-driver-ci-comp_igc-19476/artifacts/Linux/Ubuntu/22.04/Release - dpcpp-version: sycl-nightly/20221214 - install-gfx-driver: true - submodules: true image: ubuntu:22.04 + env-from-files: .github/workflows/dpcpp-sycl-nightly.env .github/workflows/gfx-ubuntu22.env + level-zero-version: public/1.9.9 + install-gfx-driver: false + submodules: true cmd: | export SYCL_BUNDLE_ROOT=$DPCPP_ROOT export CC=clang export CXX=clang++ - scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_MODULE_MULTIDEVICE=OFF -DBUILD_GPU_SUPPORT=ON -DOPENVKL_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.openvkl.git -DOPENVKL_BRANCH=kraszkow/embree-sycl-build-gpu -DRKCOMMON_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.rkcommon.git -DRKCOMMON_VERSION=devel - artifact-out: build-ubuntu2204-DG2-JIT + scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_MODULE_MULTIDEVICE=OFF -DBUILD_GPU_SUPPORT=ON -DOPENVKL_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.openvkl.git -DOPENVKL_BRANCH=bcca9b98 -DRKCOMMON_VERSION=devel + artifact-out: build-ubuntu2204 artifact-path: build/install/ospray build/install/embree build/CMakeCache.txt build/*/build/CMakeCache.txt - build-ubuntu2204-DG2-JIT-mpi: - uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker_gpu.yml@tgt/support_oneapi_icx + build-ubuntu2204-devel: + uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker_gpu.yml@main secrets: inherit with: force-delete: true # guarantees .gitattributes are respected in working dir - gfx-driver-version: gfx-driver-builds/ci/comp_igc/gfx-driver-ci-comp_igc-19476/artifacts/Linux/Ubuntu/22.04/Release - dpcpp-version: intel/2023.0 - install-gfx-driver: true + image: ubuntu:22.04 + env-from-files: .github/workflows/dpcpp-sycl-nightly.env .github/workflows/gfx-ubuntu22-internal.env + level-zero-version: public/1.9.9 + install-gfx-driver: false submodules: true + cmd: | + export SYCL_BUNDLE_ROOT=$DPCPP_ROOT + export CC=clang + export CXX=clang++ + scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_MODULE_MULTIDEVICE=OFF -DBUILD_GPU_SUPPORT=ON -DOPENVKL_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.openvkl.git -DOPENVKL_BRANCH=gpu-devel -DRKCOMMON_VERSION=devel + artifact-out: build-ubuntu2204-devel + artifact-path: build/install/ospray build/install/embree build/CMakeCache.txt build/*/build/CMakeCache.txt + + build-ubuntu2204-mpi: + uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker_gpu.yml@main + secrets: inherit + with: + force-delete: true # guarantees .gitattributes are respected in working dir image: ubuntu:22.04 + env-from-files: .github/workflows/dpcpp-sycl-public.env .github/workflows/gfx-ubuntu22.env + level-zero-version: public/1.9.9 + install-gfx-driver: false + submodules: true cmd: | sudo apt remove openmpi-* libopenmpi3 -y - module load level-zero/1.9.4 module load mpi export CC=icx export CXX=icpx - scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_MODULE_MPI=ON -DBUILD_OSPRAY_MODULE_MULTIDEVICE=OFF -DBUILD_GPU_SUPPORT=ON -DOPENVKL_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.openvkl.git -DOPENVKL_BRANCH=kraszkow/embree-sycl-build-gpu -DRKCOMMON_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.rkcommon.git -DRKCOMMON_VERSION=devel - artifact-out: build-ubuntu2204-DG2-JIT-mpi + scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_MODULE_MPI=ON -DBUILD_OSPRAY_MODULE_MULTIDEVICE=OFF -DBUILD_GPU_SUPPORT=ON -DOPENVKL_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.openvkl.git -DOPENVKL_BRANCH=gpu-devel -DRKCOMMON_VERSION=devel + artifact-out: build-ubuntu2204-mpi artifact-path: build/install/ospray build/install/embree build/CMakeCache.txt build/*/build/CMakeCache.txt + build-suse: + uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker_gpu.yml@main + secrets: inherit + with: + force-delete: true # guarantees .gitattributes are respected in working dir + image: opensuse/leap:15.3 + env-from-files: .github/workflows/dpcpp-sycl-nightly.env .github/workflows/gfx-ubuntu22.env + level-zero-version: public/1.9.9 + install-gfx-driver: false + submodules: true + cmd: | + export SYCL_BUNDLE_ROOT=$DPCPP_ROOT + export CC=clang + export CXX=clang++ + scripts/build/linux.sh -DBUILD_GLFW=ON -DBUILD_OSPRAY_MODULE_MULTIDEVICE=OFF -DBUILD_GPU_SUPPORT=ON -DOPENVKL_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.openvkl.git -DOPENVKL_BRANCH=gpu-devel -DRKCOMMON_VERSION=devel + artifact-out: build-suse + artifact-path: build/install/ospray build/install/embree build/CMakeCache.txt build/*/build/CMakeCache.txt + + ### TEST JOBS ### - test-ubuntu2204-DG2-JIT: - needs: [build-ubuntu2204-DG2-JIT] + test-ubuntu2204-dg2: + needs: [build-ubuntu2204] uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker_gpu.yml@main secrets: inherit with: - gfx-driver-version: gfx-driver-builds/ci/comp_igc/gfx-driver-ci-comp_igc-19476/artifacts/Linux/Ubuntu/22.04/Release - dpcpp-version: sycl-nightly/20221214 + image: ubuntu:22.04 + env-from-files: .github/workflows/dpcpp-sycl-nightly.env .github/workflows/gfx-ubuntu22.env install-gfx-driver: true submodules: true + options: --device=/dev/dri:/dev/dri + runs-on: '[ "Linux", "docker", "dg2" ]' + artifact-in: build-ubuntu2204 + artifact-out: test-ubuntu2204-dg2 + artifact-path: build_regression_tests/tests*.xml build_regression_tests/failed* + artifact-on-failure: true + cmd: | + export SYCL_BUNDLE_ROOT=$DPCPP_ROOT + export CC=clang + export CXX=clang++ + export LD_LIBRARY_PATH="$GITHUB_WORKSPACE/build/install/ospray/lib:$GITHUB_WORKSPACE/build/install/embree/lib:$LD_LIBRARY_PATH" + export PATH="$GITHUB_WORKSPACE/build/install/ospray/bin:$PATH" + scripts/tests/run_gpu_tests.sh "$GITHUB_WORKSPACE" + + test-ubuntu2204-devel-dg2: + needs: [build-ubuntu2204-devel] + uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker_gpu.yml@main + secrets: inherit + with: image: ubuntu:22.04 + env-from-files: .github/workflows/dpcpp-sycl-nightly.env .github/workflows/gfx-ubuntu22-internal.env + install-gfx-driver: true + submodules: true options: --device=/dev/dri:/dev/dri runs-on: '[ "Linux", "docker", "dg2" ]' - artifact-in: build-ubuntu2204-DG2-JIT - artifact-out: test-ubuntu2204-DG2-JIT + artifact-in: build-ubuntu2204-devel + artifact-out: test-ubuntu2204-devel-dg2 artifact-path: build_regression_tests/tests*.xml build_regression_tests/failed* artifact-on-failure: true cmd: | @@ -81,7 +139,7 @@ jobs: # test-ubuntu2204-DG2-JIT-mpi: # needs: [build-ubuntu2204-DG2-JIT-mpi] -# uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker_gpu.yml@tgt/support_oneapi_icx +# uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker_gpu.yml@main # secrets: inherit # with: # allow-failure: true @@ -98,7 +156,7 @@ jobs: # artifact-on-failure: true # cmd: | # sudo apt remove openmpi-* libopenmpi3 -y -# module load level-zero/1.9.4 +# module load level-zero/1.9.9 # module load mpi # export LD_LIBRARY_PATH="$GITHUB_WORKSPACE/build/install/ospray/lib:$GITHUB_WORKSPACE/build/install/embree/lib:$LD_LIBRARY_PATH" # export PATH="$GITHUB_WORKSPACE/build/install/ospray/bin:$PATH" diff --git a/.github/workflows/dpcpp-sycl-nightly.env b/.github/workflows/dpcpp-sycl-nightly.env new file mode 100644 index 000000000..d3a62e102 --- /dev/null +++ b/.github/workflows/dpcpp-sycl-nightly.env @@ -0,0 +1 @@ +DPCPP_VERSION=sycl-nightly/20230304 diff --git a/.github/workflows/dpcpp-sycl-public.env b/.github/workflows/dpcpp-sycl-public.env new file mode 100644 index 000000000..d6181b175 --- /dev/null +++ b/.github/workflows/dpcpp-sycl-public.env @@ -0,0 +1 @@ +DPCPP_VERSION=intel/2023.0 diff --git a/.github/workflows/gfx-ubuntu22-internal.env b/.github/workflows/gfx-ubuntu22-internal.env new file mode 100644 index 000000000..6eaa34133 --- /dev/null +++ b/.github/workflows/gfx-ubuntu22-internal.env @@ -0,0 +1 @@ +GFX_DRIVER_VERSION=neo-builds/ci/master/ci-neo-master-025812/artifacts/linux/ubuntu/22.04 diff --git a/.github/workflows/gfx-ubuntu22.env b/.github/workflows/gfx-ubuntu22.env new file mode 100644 index 000000000..fb7a3e1d0 --- /dev/null +++ b/.github/workflows/gfx-ubuntu22.env @@ -0,0 +1 @@ +GFX_DRIVER_VERSION=gfx-driver-builds/ci/comp_igc/gfx-driver-ci-comp_igc-19476/artifacts/Linux/Ubuntu/22.04/Release From 039a4865106dd2bb7bd5655ed72dff7eb27496fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCnther?= Date: Tue, 28 Mar 2023 11:22:30 +0200 Subject: [PATCH 17/42] Ensure proper exit for GPU CI --- scripts/tests/run_gpu_tests.sh | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/scripts/tests/run_gpu_tests.sh b/scripts/tests/run_gpu_tests.sh index 87866e6d2..3c10110cb 100755 --- a/scripts/tests/run_gpu_tests.sh +++ b/scripts/tests/run_gpu_tests.sh @@ -100,20 +100,31 @@ test_filters+=":Color/Interpolation.Interpolation/6" test_filters+=":Color/Interpolation.Interpolation/7" test_filters+=":Texcoord/Interpolation.Interpolation/2" test_filters+=":Texcoord/Interpolation.Interpolation/3" +test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/10" +test_filters+=":TestScenesGeometry/Curves.test_scenes/1" +test_filters+=":TestScenesGeometry/Curves.test_scenes/4" +test_filters+=":TestScenesGeometry/Curves.test_scenes/7" +test_filters+=":TestScenesGeometry/Curves.test_scenes/10" +test_filters+=":TestScenesGeometry/Curves.test_scenes/13" +test_filters+=":TestScenesGeometry/Curves.test_scenes/16" +test_filters+=":TestScenesVariance/FromOsprayTestingVariance.testScenes/0" +test_filters+=":Appearance/PTBackgroundRefraction.backgroundRefraction/0" +test_filters+=":Appearance/PTBackgroundRefraction.backgroundRefraction/1" export ONEAPI_DEVICE_SELECTOR=level_zero:* mkdir failed-gpu -ospTestSuite --gtest_output=xml:tests.xml --baseline-dir=regression_test_baseline/ --failed-dir=failed-gpu --osp:load-modules=gpu --osp:device=gpu --gtest_filter="-$test_filters" +ospTestSuite --gtest_output=xml:tests.xml --baseline-dir=regression_test_baseline/ --failed-dir=failed-gpu --osp:load-modules=gpu --osp:device=gpu --gtest_filter="-$test_filters" || exit 2 if [ $TEST_MPI ]; then mkdir failed-mpi-gpu # Need to export, not just set for MPI to pick it up export OSPRAY_MPI_DISTRIBUTED_GPU=1 - mpiexec $MPI_ROOT_CONFIG ospTestSuite --gtest_output=xml:tests-mpi-offload.xml --baseline-dir=regression_test_baseline/ --failed-dir=failed-mpi-gpu --osp:load-modules=mpi_offload --osp:device=mpiOffload --gtest_filter="-$test_filters" : $MPI_WORKER_CONFIG ospray_mpi_worker + mpiexec $MPI_ROOT_CONFIG ospTestSuite --gtest_output=xml:tests-mpi-offload.xml --baseline-dir=regression_test_baseline/ --failed-dir=failed-mpi-gpu --osp:load-modules=mpi_offload --osp:device=mpiOffload --gtest_filter="-$test_filters" : $MPI_WORKER_CONFIG ospray_mpi_worker || exit 2 mkdir failed-mpi-gpu-data-parallel - mpiexec $MPI_ROOT_CONFIG ospMPIDistribTestSuite --gtest_output=xml:tests-mpi-distrib.xml --baseline-dir=regression_test_baseline/ --failed-dir=failed-mpi-gpu-data-parallel --gtest_filter="MPIDistribTestScenesGeometry*:MPIDistribTestScenesVolumes*test_scenes/0" + mpiexec $MPI_ROOT_CONFIG ospMPIDistribTestSuite --gtest_output=xml:tests-mpi-distrib.xml --baseline-dir=regression_test_baseline/ --failed-dir=failed-mpi-gpu-data-parallel --gtest_filter="MPIDistribTestScenesGeometry*:MPIDistribTestScenesVolumes*test_scenes/0" || exit 2 fi +exit $? From ea97897e54a00eb01810ce8dc7b4f0f9a61be62c Mon Sep 17 00:00:00 2001 From: Will Usher Date: Fri, 20 Jan 2023 16:55:25 -0800 Subject: [PATCH 18/42] Refactor struct shared/BufferShared to alloc on the context This is needed to enable memory pooling in ISPCRT --- modules/cpu/ISPCDevice.h | 5 ++ modules/cpu/camera/Camera.cpp | 2 +- modules/cpu/camera/OrthographicCamera.cpp | 3 +- modules/cpu/camera/PanoramicCamera.cpp | 2 +- modules/cpu/camera/PerspectiveCamera.cpp | 2 +- modules/cpu/common/BufferShared.h | 58 +++++++++++++------ modules/cpu/common/Data.cpp | 11 ++-- modules/cpu/common/Group.cpp | 8 +-- modules/cpu/common/Instance.cpp | 2 +- modules/cpu/common/StructShared.h | 15 ++--- modules/cpu/common/World.cpp | 4 +- modules/cpu/fb/FrameBuffer.cpp | 2 +- modules/cpu/fb/ImageOp.cpp | 2 +- modules/cpu/fb/LocalFB.cpp | 28 ++++----- modules/cpu/fb/SparseFB.cpp | 21 ++++--- modules/cpu/fb/TaskError.cpp | 4 +- modules/cpu/fb/TaskError.h | 2 +- modules/cpu/fb/pixel_ops/ToneMapper.cpp | 2 +- modules/cpu/geometry/Boxes.cpp | 2 +- modules/cpu/geometry/Curves.cpp | 2 +- modules/cpu/geometry/GeometricModel.cpp | 6 +- modules/cpu/geometry/Geometry.cpp | 2 +- modules/cpu/geometry/Isosurfaces.cpp | 2 +- modules/cpu/geometry/Mesh.cpp | 2 +- modules/cpu/geometry/Planes.cpp | 2 +- modules/cpu/geometry/Spheres.cpp | 2 +- modules/cpu/geometry/Subdivision.cpp | 2 +- modules/cpu/lights/AmbientLight.cpp | 2 +- modules/cpu/lights/CylinderLight.cpp | 2 +- modules/cpu/lights/DirectionalLight.cpp | 2 +- modules/cpu/lights/HDRILight.cpp | 2 +- modules/cpu/lights/PointLight.cpp | 2 +- modules/cpu/lights/QuadLight.cpp | 2 +- modules/cpu/lights/SpotLight.cpp | 2 +- modules/cpu/lights/SunSkyLight.cpp | 6 +- modules/cpu/math/Distribution2D.cpp | 6 +- modules/cpu/math/MathConstants.cpp | 10 ++-- modules/cpu/pf/PixelFilter.cpp | 4 +- modules/cpu/render/Material.cpp | 2 +- modules/cpu/render/Renderer.cpp | 4 +- modules/cpu/render/ao/AORenderer.cpp | 2 +- .../render/bsdfs/MicrofacetAlbedoTables.cpp | 20 +++---- modules/cpu/render/debug/DebugRenderer.cpp | 2 +- modules/cpu/render/materials/Alloy.cpp | 2 +- modules/cpu/render/materials/CarPaint.cpp | 2 +- modules/cpu/render/materials/Glass.cpp | 2 +- modules/cpu/render/materials/Luminous.cpp | 2 +- modules/cpu/render/materials/Metal.cpp | 2 +- .../cpu/render/materials/MetallicPaint.cpp | 2 +- modules/cpu/render/materials/Mix.cpp | 2 +- modules/cpu/render/materials/OBJ.cpp | 2 +- modules/cpu/render/materials/Plastic.cpp | 2 +- modules/cpu/render/materials/Principled.cpp | 3 +- modules/cpu/render/materials/ThinGlass.cpp | 2 +- modules/cpu/render/materials/Velvet.cpp | 2 +- modules/cpu/render/pathtracer/PathTracer.cpp | 2 +- .../cpu/render/pathtracer/PathTracerData.cpp | 16 ++--- modules/cpu/render/scivis/SciVis.cpp | 2 +- modules/cpu/render/scivis/SciVisData.cpp | 4 +- modules/cpu/texture/Texture.cpp | 2 +- modules/cpu/texture/Texture2D.h | 2 +- modules/cpu/texture/TextureVolume.h | 2 +- modules/cpu/volume/Volume.cpp | 2 +- modules/cpu/volume/VolumetricModel.cpp | 2 +- .../LinearTransferFunction.cpp | 2 +- .../transferFunction/TransferFunction.cpp | 2 +- .../mpi/ospray/common/DistributedWorld.cpp | 2 +- .../mpi/ospray/fb/DistributedFrameBuffer.cpp | 2 +- .../ospray/render/DistributedLoadBalancer.cpp | 5 +- .../render/distributed/DistributedRaycast.cpp | 2 +- .../distributed/DistributedRenderer.cpp | 2 +- .../ospray/geometry/BilinearPatches.cpp | 2 +- 72 files changed, 189 insertions(+), 154 deletions(-) diff --git a/modules/cpu/ISPCDevice.h b/modules/cpu/ISPCDevice.h index e4cbe36e6..3ea19d955 100644 --- a/modules/cpu/ISPCDevice.h +++ b/modules/cpu/ISPCDevice.h @@ -159,6 +159,11 @@ struct OSPRAY_SDK_INTERFACE ISPCDevice : public Device return ispcrtDevice; } + ispcrt::Context &getIspcrtContext() + { + return ispcrtContext; + } + #ifdef OSPRAY_TARGET_SYCL sycl::queue *getSyclQueue() { diff --git a/modules/cpu/camera/Camera.cpp b/modules/cpu/camera/Camera.cpp index ccc841477..33398f179 100644 --- a/modules/cpu/camera/Camera.cpp +++ b/modules/cpu/camera/Camera.cpp @@ -7,7 +7,7 @@ namespace ospray { Camera::Camera(api::ISPCDevice &device, const FeatureFlagsOther featureFlags) - : AddStructShared(device.getIspcrtDevice(), device), + : AddStructShared(device.getIspcrtContext(), device), featureFlags(featureFlags) { managedObjectType = OSP_CAMERA; diff --git a/modules/cpu/camera/OrthographicCamera.cpp b/modules/cpu/camera/OrthographicCamera.cpp index 76ae09a7d..0da5b436e 100644 --- a/modules/cpu/camera/OrthographicCamera.cpp +++ b/modules/cpu/camera/OrthographicCamera.cpp @@ -10,7 +10,8 @@ namespace ospray { OrthographicCamera::OrthographicCamera(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFO_CAMERA_ORTHOGRAPHIC) + : AddStructShared( + device.getIspcrtContext(), device, FFO_CAMERA_ORTHOGRAPHIC) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.initRay = reinterpret_cast( diff --git a/modules/cpu/camera/PanoramicCamera.cpp b/modules/cpu/camera/PanoramicCamera.cpp index 9b52c5e93..efc4f9d83 100644 --- a/modules/cpu/camera/PanoramicCamera.cpp +++ b/modules/cpu/camera/PanoramicCamera.cpp @@ -14,7 +14,7 @@ void *PanoramicCamera_initRay_addr(); namespace ospray { PanoramicCamera::PanoramicCamera(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFO_CAMERA_PANORAMIC) + : AddStructShared(device.getIspcrtContext(), device, FFO_CAMERA_PANORAMIC) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.initRay = reinterpret_cast( diff --git a/modules/cpu/camera/PerspectiveCamera.cpp b/modules/cpu/camera/PerspectiveCamera.cpp index aaf04107f..ae110d35d 100644 --- a/modules/cpu/camera/PerspectiveCamera.cpp +++ b/modules/cpu/camera/PerspectiveCamera.cpp @@ -10,7 +10,7 @@ namespace ospray { PerspectiveCamera::PerspectiveCamera(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFO_CAMERA_PERSPECTIVE) + : AddStructShared(device.getIspcrtContext(), device, FFO_CAMERA_PERSPECTIVE) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.initRay = reinterpret_cast( diff --git a/modules/cpu/common/BufferShared.h b/modules/cpu/common/BufferShared.h index a5f9b1443..f2f4b5032 100644 --- a/modules/cpu/common/BufferShared.h +++ b/modules/cpu/common/BufferShared.h @@ -11,11 +11,15 @@ namespace ospray { // C version //////////////////////////////////////////// -inline ISPCRTMemoryView BufferSharedCreate(ISPCRTDevice device, size_t size) +inline ISPCRTMemoryView BufferSharedCreate(ISPCRTContext context, + size_t size, + ISPCRTSharedMemoryAllocationHint allocHint = + ISPCRT_SM_HOST_DEVICE_READ_WRITE) { ISPCRTNewMemoryViewFlags flags; flags.allocType = ISPCRT_ALLOC_TYPE_SHARED; - return ispcrtNewMemoryView(device, nullptr, size, &flags); + flags.smHint = allocHint; + return ispcrtNewMemoryViewForContext(context, nullptr, size, &flags); } inline void BufferSharedDelete(ISPCRTMemoryView view) @@ -29,10 +33,22 @@ template struct BufferShared : public ispcrt::Array { using ispcrt::Array::sharedPtr; - BufferShared(ispcrt::Device &device); - BufferShared(ispcrt::Device &device, size_t size); - BufferShared(ispcrt::Device &device, const std::vector &v); - BufferShared(ispcrt::Device &device, const T *data, size_t size); + BufferShared(ispcrt::Context &context, + ispcrt::SharedMemoryUsageHint allocHint = + ispcrt::SharedMemoryUsageHint::HostDeviceReadWrite); + BufferShared(ispcrt::Context &context, + size_t size, + ispcrt::SharedMemoryUsageHint allocHint = + ispcrt::SharedMemoryUsageHint::HostDeviceReadWrite); + BufferShared(ispcrt::Context &context, + const std::vector &v, + ispcrt::SharedMemoryUsageHint allocHint = + ispcrt::SharedMemoryUsageHint::HostDeviceReadWrite); + BufferShared(ispcrt::Context &context, + const T *data, + size_t size, + ispcrt::SharedMemoryUsageHint allocHint = + ispcrt::SharedMemoryUsageHint::HostDeviceReadWrite); // TODO: We should move these up into the ISPCRT wrapper T *data(); @@ -50,26 +66,34 @@ struct BufferShared : public ispcrt::Array }; template -BufferShared::BufferShared(ispcrt::Device &device) - : ispcrt::Array(device) +BufferShared::BufferShared( + ispcrt::Context &context, ispcrt::SharedMemoryUsageHint allocHint) + : ispcrt::Array(context, allocHint) {} template -BufferShared::BufferShared(ispcrt::Device &device, size_t size) - : ispcrt::Array(device, size) -{} +BufferShared::BufferShared(ispcrt::Context &context, + size_t size, + ispcrt::SharedMemoryUsageHint allocHint) + : ispcrt::Array(context, size, allocHint) +{ +} template -BufferShared::BufferShared(ispcrt::Device &device, const std::vector &v) - : ispcrt::Array(device, v.size()) +BufferShared::BufferShared(ispcrt::Context &context, + const std::vector &v, + ispcrt::SharedMemoryUsageHint allocHint) + : ispcrt::Array(context, v.size(), allocHint) { std::memcpy(sharedPtr(), v.data(), sizeof(T) * v.size()); } template -BufferShared::BufferShared( - ispcrt::Device &device, const T *data, size_t size) - : ispcrt::Array(device, size) +BufferShared::BufferShared(ispcrt::Context &context, + const T *data, + size_t size, + ispcrt::SharedMemoryUsageHint allocHint) + : ispcrt::Array(context, size, allocHint) { std::memcpy(sharedPtr(), data, sizeof(T) * size); } @@ -129,7 +153,7 @@ T *BufferShared::sharedPtr() const template inline std::unique_ptr> make_buffer_shared_unique( - Args &&... args) + Args &&...args) { return std::unique_ptr>( new BufferShared(std::forward(args)...)); diff --git a/modules/cpu/common/Data.cpp b/modules/cpu/common/Data.cpp index 75ac42b8a..18a09ef5f 100644 --- a/modules/cpu/common/Data.cpp +++ b/modules/cpu/common/Data.cpp @@ -43,8 +43,9 @@ Data::Data(api::ISPCDevice &device, OSPDataType type, const vec3ul &numItems) byteStride(0) { // TODO: is this pad out by 16 still needed? - view = make_buffer_shared_unique( - device.getIspcrtDevice(), size() * sizeOf(type) + 16); + view = make_buffer_shared_unique(device.getIspcrtContext(), + size() * sizeOf(type) + 16, + ispcrt::SharedMemoryUsageHint::HostWriteDeviceRead); addr = view->data(); init(); @@ -83,6 +84,7 @@ void Data::init() // Check if the shared data the app gave is actually in USM, if not we still // need to make a copy of it internally so it's accessible on the GPU if (shared) { + ispcrt::Context &ispcrtContext = getISPCDevice().getIspcrtContext(); ispcrt::Device &ispcrtDevice = getISPCDevice().getIspcrtDevice(); auto memType = ispcrtDevice.getMemoryAllocType(addr); @@ -90,8 +92,9 @@ void Data::init() const size_t sizeBytes = byteStride.z * numItems.z; shared = false; // TODO: is the padding still needed? - view = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), sizeBytes + 16); + view = make_buffer_shared_unique(ispcrtContext, + sizeBytes + 16, + ispcrt::SharedMemoryUsageHint::HostWriteDeviceRead); addr = view->data(); std::memcpy(addr, appSharedPtr, sizeBytes); } diff --git a/modules/cpu/common/Group.cpp b/modules/cpu/common/Group.cpp index 684e4f8a5..ff5234173 100644 --- a/modules/cpu/common/Group.cpp +++ b/modules/cpu/common/Group.cpp @@ -43,7 +43,7 @@ static void freeAndNullifyEmbreeScene(RTCScene &scene) // Group definitions //////////////////////////////////////////////////////// Group::Group(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtContext(), device) { managedObjectType = OSP_GROUP; } @@ -136,7 +136,7 @@ void Group::commit() buildQuality); geometricModelsArray = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), + getISPCDevice().getIspcrtContext(), createArrayOfSh(*geometricModels)); getSh()->geometricModels = geometricModelsArray->sharedPtr(); @@ -153,7 +153,7 @@ void Group::commit() buildQuality); volumetricModelsArray = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), + getISPCDevice().getIspcrtContext(), createArrayOfSh(*volumetricModels)); getSh()->volumetricModels = volumetricModelsArray->sharedPtr(); @@ -171,7 +171,7 @@ void Group::commit() buildQuality); clipModelsArray = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), + getISPCDevice().getIspcrtContext(), createArrayOfSh(*clipModels)); getSh()->clipModels = clipModelsArray->sharedPtr(); diff --git a/modules/cpu/common/Instance.cpp b/modules/cpu/common/Instance.cpp index 77bf987b4..f88d5eec9 100644 --- a/modules/cpu/common/Instance.cpp +++ b/modules/cpu/common/Instance.cpp @@ -7,7 +7,7 @@ namespace ospray { Instance::Instance(api::ISPCDevice &device, Group *_group) - : AddStructShared(device.getIspcrtDevice(), device), groupAPI(_group) + : AddStructShared(device.getIspcrtContext(), device), groupAPI(_group) { managedObjectType = OSP_INSTANCE; } diff --git a/modules/cpu/common/StructShared.h b/modules/cpu/common/StructShared.h index 8e098d061..5e0476444 100644 --- a/modules/cpu/common/StructShared.h +++ b/modules/cpu/common/StructShared.h @@ -42,9 +42,10 @@ namespace ospray { */ template -inline ISPCRTMemoryView StructSharedCreate(ISPCRTDevice device) +inline ISPCRTMemoryView StructSharedCreate(ISPCRTContext context) { - ISPCRTMemoryView view = BufferSharedCreate(device, sizeof(T)); + ISPCRTMemoryView view = + BufferSharedCreate(context, sizeof(T), ISPCRT_SM_HOST_WRITE_DEVICE_READ); new (ispcrtSharedPtr(view)) T; return view; } @@ -66,7 +67,7 @@ struct StructSharedView template struct StructSharedGet { - StructSharedGet(ISPCRTDevice, ISPCRTMemoryView *); + StructSharedGet(ISPCRTContext, ISPCRTMemoryView *); T *getSh() const; }; @@ -121,9 +122,9 @@ struct AddStructShared "StructShared_t needs to have 'super' member of type Base::StructShared_t"); template - AddStructShared(ispcrt::Device &device, Args &&... args) + AddStructShared(ispcrt::Context &context, Args &&...args) : StructSharedGet>( - device.handle(), &_view), + context.handle(), &_view), Base(std::forward(args)...) {} }; @@ -137,7 +138,7 @@ inline StructSharedView::~StructSharedView() template StructSharedGet::StructSharedGet( - ISPCRTDevice device, ISPCRTMemoryView *view) + ISPCRTContext device, ISPCRTMemoryView *view) { if (!*view) *view = StructSharedCreate(device); @@ -164,7 +165,7 @@ struct ShouldPass3 : public AddStructShared {}; //struct ShouldFail1 : public AddStructShared {}; //struct ShouldFail2 : public AddStructShared {}; //struct ShouldFail3 : public AddStructShared {}; -// clang-format on +// clang-format on } // namespace test } // namespace ospray diff --git a/modules/cpu/common/World.cpp b/modules/cpu/common/World.cpp index 6fafa9f4c..313473153 100644 --- a/modules/cpu/common/World.cpp +++ b/modules/cpu/common/World.cpp @@ -57,7 +57,7 @@ World::~World() } World::World(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtContext(), device) { managedObjectType = OSP_WORLD; } @@ -122,7 +122,7 @@ void World::commit() // Create shared buffers for instance pointers instanceArray = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), + getISPCDevice().getIspcrtContext(), sizeof(ispc::Instance *) * numInstances); getSh()->instances = instanceArray->sharedPtr(); diff --git a/modules/cpu/fb/FrameBuffer.cpp b/modules/cpu/fb/FrameBuffer.cpp index ae7456e0d..f6dc8b3f6 100644 --- a/modules/cpu/fb/FrameBuffer.cpp +++ b/modules/cpu/fb/FrameBuffer.cpp @@ -31,7 +31,7 @@ FrameBuffer::FrameBuffer(api::ISPCDevice &device, ColorBufferFormat _colorBufferFormat, const uint32 channels, const FeatureFlagsOther ffo) - : AddStructShared(device.getIspcrtDevice(), device), + : AddStructShared(device.getIspcrtContext(), device), size(_size), hasDepthBuffer(channels & OSP_FB_DEPTH), hasAccumBuffer(channels & OSP_FB_ACCUM), diff --git a/modules/cpu/fb/ImageOp.cpp b/modules/cpu/fb/ImageOp.cpp index 35271f04e..071b79a51 100644 --- a/modules/cpu/fb/ImageOp.cpp +++ b/modules/cpu/fb/ImageOp.cpp @@ -22,7 +22,7 @@ std::string ImageOp::toString() const LivePixelOp::LivePixelOp(FrameBufferView &_fbView) : AddStructShared( - _fbView.originalFB->getISPCDevice().getIspcrtDevice(), _fbView) + _fbView.originalFB->getISPCDevice().getIspcrtContext(), _fbView) {} LiveFrameOp::LiveFrameOp(FrameBufferView &_fbView) : LiveImageOp(_fbView) {} diff --git a/modules/cpu/fb/LocalFB.cpp b/modules/cpu/fb/LocalFB.cpp index 9c7a1616b..e1ef0032a 100644 --- a/modules/cpu/fb/LocalFB.cpp +++ b/modules/cpu/fb/LocalFB.cpp @@ -39,14 +39,14 @@ LocalFrameBuffer::LocalFrameBuffer(api::ISPCDevice &device, const vec2i &_size, ColorBufferFormat _colorBufferFormat, const uint32 channels) - : AddStructShared(device.getIspcrtDevice(), + : AddStructShared(device.getIspcrtContext(), device, _size, _colorBufferFormat, channels, FFO_FB_LOCAL), numRenderTasks(divRoundUp(size, getRenderTaskSize())), - taskErrorRegion(device.getIspcrtDevice(), + taskErrorRegion(device.getIspcrtContext(), hasVarianceBuffer ? getNumRenderTasks() : vec2i(0)) { const size_t pixelBytes = sizeOf(_colorBufferFormat); @@ -54,55 +54,55 @@ LocalFrameBuffer::LocalFrameBuffer(api::ISPCDevice &device, if (getColorBufferFormat() != OSP_FB_NONE) { colorBuffer = make_buffer_shared_unique( - device.getIspcrtDevice(), pixelBytes * numPixels); + device.getIspcrtContext(), pixelBytes * numPixels); } if (hasDepthBuffer) depthBuffer = - make_buffer_shared_unique(device.getIspcrtDevice(), numPixels); + make_buffer_shared_unique(device.getIspcrtContext(), numPixels); if (hasAccumBuffer) { accumBuffer = - make_buffer_shared_unique(device.getIspcrtDevice(), numPixels); + make_buffer_shared_unique(device.getIspcrtContext(), numPixels); taskAccumID = make_buffer_shared_unique( - device.getIspcrtDevice(), getTotalRenderTasks()); + device.getIspcrtContext(), getTotalRenderTasks()); std::memset(taskAccumID->data(), 0, taskAccumID->size() * sizeof(int32_t)); } if (hasVarianceBuffer) varianceBuffer = - make_buffer_shared_unique(device.getIspcrtDevice(), numPixels); + make_buffer_shared_unique(device.getIspcrtContext(), numPixels); if (hasNormalBuffer) normalBuffer = - make_buffer_shared_unique(device.getIspcrtDevice(), numPixels); + make_buffer_shared_unique(device.getIspcrtContext(), numPixels); if (hasAlbedoBuffer) albedoBuffer = - make_buffer_shared_unique(device.getIspcrtDevice(), numPixels); + make_buffer_shared_unique(device.getIspcrtContext(), numPixels); if (hasPrimitiveIDBuffer) primitiveIDBuffer = make_buffer_shared_unique( - device.getIspcrtDevice(), numPixels); + device.getIspcrtContext(), numPixels); if (hasObjectIDBuffer) objectIDBuffer = make_buffer_shared_unique( - device.getIspcrtDevice(), numPixels); + device.getIspcrtContext(), numPixels); if (hasInstanceIDBuffer) instanceIDBuffer = make_buffer_shared_unique( - device.getIspcrtDevice(), numPixels); + device.getIspcrtContext(), numPixels); // TODO: Better way to pass the task IDs that doesn't require just storing // them all? Maybe as blocks/tiles similar to when we just had tiles? Will // make task ID lookup more expensive for sparse case though renderTaskIDs = make_buffer_shared_unique( - device.getIspcrtDevice(), getTotalRenderTasks()); + device.getIspcrtContext(), getTotalRenderTasks()); std::iota(renderTaskIDs->begin(), renderTaskIDs->end(), 0); if (hasVarianceBuffer) activeTaskIDs = make_buffer_shared_unique( - device.getIspcrtDevice(), getTotalRenderTasks()); + device.getIspcrtContext(), getTotalRenderTasks()); // TODO: Could use TBB parallel sort here if it's exposed through the rkcommon // tasking system diff --git a/modules/cpu/fb/SparseFB.cpp b/modules/cpu/fb/SparseFB.cpp index 4ecc09a70..f6d67a589 100644 --- a/modules/cpu/fb/SparseFB.cpp +++ b/modules/cpu/fb/SparseFB.cpp @@ -21,7 +21,7 @@ SparseFrameBuffer::SparseFrameBuffer(api::ISPCDevice &device, const uint32 channels, const std::vector &_tileIDs, const bool overrideUseTaskAccumIDs) - : AddStructShared(device.getIspcrtDevice(), + : AddStructShared(device.getIspcrtContext(), device, _size, _colorBufferFormat, @@ -44,7 +44,7 @@ SparseFrameBuffer::SparseFrameBuffer(api::ISPCDevice &device, ColorBufferFormat _colorBufferFormat, const uint32 channels, const bool overrideUseTaskAccumIDs) - : AddStructShared(device.getIspcrtDevice(), + : AddStructShared(device.getIspcrtContext(), device, _size, _colorBufferFormat, @@ -232,10 +232,9 @@ uint32_t SparseFrameBuffer::getTileIndexForTask(uint32_t taskID) const void SparseFrameBuffer::setTiles(const std::vector &_tileIDs) { // (Re-)configure the sparse framebuffer based on the tileIDs we're passed - if (!_tileIDs.empty()) { tileIDs = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), _tileIDs.size()); + getISPCDevice().getIspcrtContext(), _tileIDs.size()); std::memcpy( tileIDs->data(), _tileIDs.data(), sizeof(uint32_t) * _tileIDs.size()); numRenderTasks = @@ -247,7 +246,7 @@ void SparseFrameBuffer::setTiles(const std::vector &_tileIDs) if (hasVarianceBuffer && !_tileIDs.empty()) { taskErrorBuffer = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), numRenderTasks.long_product()); + getISPCDevice().getIspcrtContext(), numRenderTasks.long_product()); std::fill(taskErrorBuffer->begin(), taskErrorBuffer->end(), inf); } else { taskErrorBuffer = nullptr; @@ -255,7 +254,7 @@ void SparseFrameBuffer::setTiles(const std::vector &_tileIDs) if (!_tileIDs.empty()) { tiles = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), tileIDs->size()); + getISPCDevice().getIspcrtContext(), tileIDs->size()); const vec2f rcpSize = rcp(vec2f(size)); for (size_t i = 0; i < tileIDs->size(); ++i) { vec2i tilePos; @@ -277,21 +276,21 @@ void SparseFrameBuffer::setTiles(const std::vector &_tileIDs) const size_t numPixels = tiles ? tileIDs->size() * TILE_SIZE * TILE_SIZE : 0; if (hasVarianceBuffer && !_tileIDs.empty()) { varianceBuffer = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), numPixels); + getISPCDevice().getIspcrtContext(), numPixels); } else { varianceBuffer = nullptr; } if (hasAccumBuffer && !_tileIDs.empty()) { accumulationBuffer = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), numPixels); + getISPCDevice().getIspcrtContext(), numPixels); } else { accumulationBuffer = nullptr; } if ((hasAccumBuffer || useTaskAccumIDs) && !_tileIDs.empty()) { taskAccumID = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), getTotalRenderTasks()); + getISPCDevice().getIspcrtContext(), getTotalRenderTasks()); std::memset(taskAccumID->begin(), 0, taskAccumID->size() * sizeof(int)); } else { taskAccumID = nullptr; @@ -302,14 +301,14 @@ void SparseFrameBuffer::setTiles(const std::vector &_tileIDs) // variance termination if (!_tileIDs.empty()) { renderTaskIDs = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), getTotalRenderTasks()); + getISPCDevice().getIspcrtContext(), getTotalRenderTasks()); std::iota(renderTaskIDs->begin(), renderTaskIDs->end(), 0); } else { renderTaskIDs = nullptr; } if (hasVarianceBuffer && !_tileIDs.empty()) { activeTaskIDs = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), getTotalRenderTasks()); + getISPCDevice().getIspcrtContext(), getTotalRenderTasks()); } else { activeTaskIDs = nullptr; } diff --git a/modules/cpu/fb/TaskError.cpp b/modules/cpu/fb/TaskError.cpp index cf9e3c72f..578b8f2d9 100644 --- a/modules/cpu/fb/TaskError.cpp +++ b/modules/cpu/fb/TaskError.cpp @@ -6,12 +6,12 @@ namespace ospray { -TaskError::TaskError(ispcrt::Device &device, const vec2i &_numTasks) +TaskError::TaskError(ispcrt::Context &context, const vec2i &_numTasks) : numTasks(_numTasks) { if (numTasks.long_product() > 0) { taskErrorBuffer = - make_buffer_shared_unique(device, numTasks.long_product()); + make_buffer_shared_unique(context, numTasks.long_product()); // maximum number of regions: all regions are of size 3 are split in // half errorRegion.reserve(divRoundUp(taskErrorBuffer->size() * 2, size_t(3))); diff --git a/modules/cpu/fb/TaskError.h b/modules/cpu/fb/TaskError.h index 206815a04..30f9625b8 100644 --- a/modules/cpu/fb/TaskError.h +++ b/modules/cpu/fb/TaskError.h @@ -16,7 +16,7 @@ namespace ospray { class OSPRAY_SDK_INTERFACE TaskError { public: - TaskError(ispcrt::Device &ispcrtDevice, const vec2i &numTasks); + TaskError(ispcrt::Context &ispcrtContext, const vec2i &numTasks); // The default constructor will make an empty task error region TaskError() = default; diff --git a/modules/cpu/fb/pixel_ops/ToneMapper.cpp b/modules/cpu/fb/pixel_ops/ToneMapper.cpp index 2d0d353a5..8ec43b367 100644 --- a/modules/cpu/fb/pixel_ops/ToneMapper.cpp +++ b/modules/cpu/fb/pixel_ops/ToneMapper.cpp @@ -73,7 +73,7 @@ LiveToneMapper::LiveToneMapper(FrameBufferView &_fbView, float d, bool acesColor) : AddStructShared( - _fbView.originalFB->getISPCDevice().getIspcrtDevice(), _fbView) + _fbView.originalFB->getISPCDevice().getIspcrtContext(), _fbView) { getSh()->super.processPixel = reinterpret_cast( diff --git a/modules/cpu/geometry/Boxes.cpp b/modules/cpu/geometry/Boxes.cpp index a119d04f8..fae789e68 100644 --- a/modules/cpu/geometry/Boxes.cpp +++ b/modules/cpu/geometry/Boxes.cpp @@ -17,7 +17,7 @@ namespace ospray { Boxes::Boxes(api::ISPCDevice &device) : AddStructShared( - device.getIspcrtDevice(), device, FFG_BOX | FFG_USER_GEOMETRY) + device.getIspcrtContext(), device, FFG_BOX | FFG_USER_GEOMETRY) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.postIntersect = diff --git a/modules/cpu/geometry/Curves.cpp b/modules/cpu/geometry/Curves.cpp index a046d1383..5ba7f6a04 100644 --- a/modules/cpu/geometry/Curves.cpp +++ b/modules/cpu/geometry/Curves.cpp @@ -69,7 +69,7 @@ static std::map curveFeatureFlags = { // Curves definitions /////////////////////////////////////////////////////// Curves::Curves(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFG_NONE) + : AddStructShared(device.getIspcrtContext(), device, FFG_NONE) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.postIntersect = diff --git a/modules/cpu/geometry/GeometricModel.cpp b/modules/cpu/geometry/GeometricModel.cpp index abf3e372d..169d21734 100644 --- a/modules/cpu/geometry/GeometricModel.cpp +++ b/modules/cpu/geometry/GeometricModel.cpp @@ -8,7 +8,7 @@ namespace ospray { GeometricModel::GeometricModel(api::ISPCDevice &device, Geometry *_geometry) - : AddStructShared(device.getIspcrtDevice(), device), geomAPI(_geometry) + : AddStructShared(device.getIspcrtContext(), device), geomAPI(_geometry) { managedObjectType = OSP_GEOMETRIC_MODEL; } @@ -38,7 +38,7 @@ void GeometricModel::commit() featureFlags |= mat->getFeatureFlagsOther(); materialArray = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), + getISPCDevice().getIspcrtContext(), createArrayOfSh(materialData->as())); getSh()->material = materialArray->sharedPtr(); getSh()->numMaterials = materialArray->size(); @@ -46,7 +46,7 @@ void GeometricModel::commit() materialData = getParamDataT("material", false, true); if (materialData) { materialIDArray = - make_buffer_shared_unique(getISPCDevice().getIspcrtDevice(), + make_buffer_shared_unique(getISPCDevice().getIspcrtContext(), materialData->as().data(), materialData->size()); getSh()->materialID = materialIDArray->sharedPtr(); diff --git a/modules/cpu/geometry/Geometry.cpp b/modules/cpu/geometry/Geometry.cpp index 693453455..0c91d7faa 100644 --- a/modules/cpu/geometry/Geometry.cpp +++ b/modules/cpu/geometry/Geometry.cpp @@ -13,7 +13,7 @@ namespace ospray { // Geometry definitions /////////////////////////////////////////////////////// Geometry::Geometry(api::ISPCDevice &device, const FeatureFlagsGeometry ffg) - : AddStructShared(device.getIspcrtDevice(), device), featureFlags(ffg) + : AddStructShared(device.getIspcrtContext(), device), featureFlags(ffg) { managedObjectType = OSP_GEOMETRY; } diff --git a/modules/cpu/geometry/Isosurfaces.cpp b/modules/cpu/geometry/Isosurfaces.cpp index e0a01f032..247a7562f 100644 --- a/modules/cpu/geometry/Isosurfaces.cpp +++ b/modules/cpu/geometry/Isosurfaces.cpp @@ -24,7 +24,7 @@ namespace ospray { Isosurfaces::Isosurfaces(api::ISPCDevice &device) : AddStructShared( - device.getIspcrtDevice(), device, FFG_ISOSURFACE | FFG_USER_GEOMETRY) + device.getIspcrtContext(), device, FFG_ISOSURFACE | FFG_USER_GEOMETRY) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.postIntersect = ispc::Isosurfaces_postIntersect_addr(); diff --git a/modules/cpu/geometry/Mesh.cpp b/modules/cpu/geometry/Mesh.cpp index bfa419c45..912631566 100644 --- a/modules/cpu/geometry/Mesh.cpp +++ b/modules/cpu/geometry/Mesh.cpp @@ -21,7 +21,7 @@ void *Mesh_getAreas_addr(); namespace ospray { Mesh::Mesh(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFG_NONE) + : AddStructShared(device.getIspcrtContext(), device, FFG_NONE) { getSh()->super.getAreas = reinterpret_cast(ispc::Mesh_getAreas_addr()); diff --git a/modules/cpu/geometry/Planes.cpp b/modules/cpu/geometry/Planes.cpp index 95d3c273d..fbf68347b 100644 --- a/modules/cpu/geometry/Planes.cpp +++ b/modules/cpu/geometry/Planes.cpp @@ -17,7 +17,7 @@ namespace ospray { Planes::Planes(api::ISPCDevice &device) : AddStructShared( - device.getIspcrtDevice(), device, FFG_PLANE | FFG_USER_GEOMETRY) + device.getIspcrtContext(), device, FFG_PLANE | FFG_USER_GEOMETRY) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.postIntersect = diff --git a/modules/cpu/geometry/Spheres.cpp b/modules/cpu/geometry/Spheres.cpp index ca6fee7a5..e2531d097 100644 --- a/modules/cpu/geometry/Spheres.cpp +++ b/modules/cpu/geometry/Spheres.cpp @@ -18,7 +18,7 @@ void *Spheres_getAreas_addr(); namespace ospray { Spheres::Spheres(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFG_SPHERE) + : AddStructShared(device.getIspcrtContext(), device, FFG_SPHERE) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.postIntersect = diff --git a/modules/cpu/geometry/Subdivision.cpp b/modules/cpu/geometry/Subdivision.cpp index 226f5032d..1eaab6932 100644 --- a/modules/cpu/geometry/Subdivision.cpp +++ b/modules/cpu/geometry/Subdivision.cpp @@ -16,7 +16,7 @@ void *Subdivision_postIntersect_addr(); namespace ospray { Subdivision::Subdivision(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFG_SUBDIVISION) + : AddStructShared(device.getIspcrtContext(), device, FFG_SUBDIVISION) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.postIntersect = diff --git a/modules/cpu/lights/AmbientLight.cpp b/modules/cpu/lights/AmbientLight.cpp index 21253d652..02bb98d6e 100644 --- a/modules/cpu/lights/AmbientLight.cpp +++ b/modules/cpu/lights/AmbientLight.cpp @@ -20,7 +20,7 @@ ISPCRTMemoryView AmbientLight::createSh( uint32_t, const ispc::Instance *instance) const { ISPCRTMemoryView view = StructSharedCreate( - getISPCDevice().getIspcrtDevice().handle()); + getISPCDevice().getIspcrtContext().handle()); ispc::AmbientLight *sh = (ispc::AmbientLight *)ispcrtSharedPtr(view); #ifndef OSPRAY_TARGET_SYCL sh->super.sample = reinterpret_cast( diff --git a/modules/cpu/lights/CylinderLight.cpp b/modules/cpu/lights/CylinderLight.cpp index c70ca815b..6df0fcaa6 100644 --- a/modules/cpu/lights/CylinderLight.cpp +++ b/modules/cpu/lights/CylinderLight.cpp @@ -24,7 +24,7 @@ ISPCRTMemoryView CylinderLight::createSh( uint32_t, const ispc::Instance *instance) const { ISPCRTMemoryView view = StructSharedCreate( - getISPCDevice().getIspcrtDevice().handle()); + getISPCDevice().getIspcrtContext().handle()); ispc::CylinderLight *sh = (ispc::CylinderLight *)ispcrtSharedPtr(view); #ifndef OSPRAY_TARGET_SYCL diff --git a/modules/cpu/lights/DirectionalLight.cpp b/modules/cpu/lights/DirectionalLight.cpp index c7c34606b..2ab074edf 100644 --- a/modules/cpu/lights/DirectionalLight.cpp +++ b/modules/cpu/lights/DirectionalLight.cpp @@ -64,7 +64,7 @@ ISPCRTMemoryView DirectionalLight::createSh( uint32_t, const ispc::Instance *instance) const { ISPCRTMemoryView view = StructSharedCreate( - getISPCDevice().getIspcrtDevice().handle()); + getISPCDevice().getIspcrtContext().handle()); ispc::DirectionalLight *sh = (ispc::DirectionalLight *)ispcrtSharedPtr(view); sh->set(visible, instance, direction, irradiance, cosAngle); return view; diff --git a/modules/cpu/lights/HDRILight.cpp b/modules/cpu/lights/HDRILight.cpp index 17e91aa1c..1702804f6 100644 --- a/modules/cpu/lights/HDRILight.cpp +++ b/modules/cpu/lights/HDRILight.cpp @@ -72,7 +72,7 @@ ISPCRTMemoryView HDRILight::createSh( uint32_t, const ispc::Instance *instance) const { ISPCRTMemoryView view = StructSharedCreate( - getISPCDevice().getIspcrtDevice().handle()); + getISPCDevice().getIspcrtContext().handle()); ispc::HDRILight *sh = (ispc::HDRILight *)ispcrtSharedPtr(view); sh->set(visible, instance, diff --git a/modules/cpu/lights/PointLight.cpp b/modules/cpu/lights/PointLight.cpp index e03de1823..7ba0873bd 100644 --- a/modules/cpu/lights/PointLight.cpp +++ b/modules/cpu/lights/PointLight.cpp @@ -23,7 +23,7 @@ ISPCRTMemoryView PointLight::createSh( uint32_t, const ispc::Instance *instance) const { ISPCRTMemoryView view = StructSharedCreate( - getISPCDevice().getIspcrtDevice().handle()); + getISPCDevice().getIspcrtContext().handle()); ispc::PointLight *sh = (ispc::PointLight *)ispcrtSharedPtr(view); sh->super.isVisible = visible; diff --git a/modules/cpu/lights/QuadLight.cpp b/modules/cpu/lights/QuadLight.cpp index 890c95ccb..730c12cf7 100644 --- a/modules/cpu/lights/QuadLight.cpp +++ b/modules/cpu/lights/QuadLight.cpp @@ -19,7 +19,7 @@ ISPCRTMemoryView QuadLight::createSh( uint32_t, const ispc::Instance *instance) const { ISPCRTMemoryView view = StructSharedCreate( - getISPCDevice().getIspcrtDevice().handle()); + getISPCDevice().getIspcrtContext().handle()); ispc::QuadLight *sh = (ispc::QuadLight *)ispcrtSharedPtr(view); #ifndef OSPRAY_TARGET_SYCL sh->super.sample = diff --git a/modules/cpu/lights/SpotLight.cpp b/modules/cpu/lights/SpotLight.cpp index b26fe6820..88bf70841 100644 --- a/modules/cpu/lights/SpotLight.cpp +++ b/modules/cpu/lights/SpotLight.cpp @@ -24,7 +24,7 @@ ISPCRTMemoryView SpotLight::createSh( uint32_t, const ispc::Instance *instance) const { ISPCRTMemoryView view = StructSharedCreate( - getISPCDevice().getIspcrtDevice().handle()); + getISPCDevice().getIspcrtContext().handle()); ispc::SpotLight *sh = (ispc::SpotLight *)ispcrtSharedPtr(view); #ifndef OSPRAY_TARGET_SYCL sh->super.sample = diff --git a/modules/cpu/lights/SunSkyLight.cpp b/modules/cpu/lights/SunSkyLight.cpp index 3495b2b46..dde1b3815 100644 --- a/modules/cpu/lights/SunSkyLight.cpp +++ b/modules/cpu/lights/SunSkyLight.cpp @@ -24,7 +24,7 @@ SunSkyLight::SunSkyLight(api::ISPCDevice &device) static const int skyResolution = 512; this->skySize = vec2i(skyResolution, skyResolution / 2); this->skyImage = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), skySize.product()); + getISPCDevice().getIspcrtContext(), skySize.product()); static auto format = static_cast(OSP_TEXTURE_RGB32F); static auto filter = static_cast(OSP_TEXTURE_FILTER_BILINEAR); @@ -40,7 +40,7 @@ ISPCRTMemoryView SunSkyLight::createSh( switch (index) { case 0: { ISPCRTMemoryView view = StructSharedCreate( - getISPCDevice().getIspcrtDevice().handle()); + getISPCDevice().getIspcrtContext().handle()); ispc::HDRILight *sh = (ispc::HDRILight *)ispcrtSharedPtr(view); sh->set(visible, instance, @@ -52,7 +52,7 @@ ISPCRTMemoryView SunSkyLight::createSh( } case 1: { ISPCRTMemoryView view = StructSharedCreate( - getISPCDevice().getIspcrtDevice().handle()); + getISPCDevice().getIspcrtContext().handle()); ispc::DirectionalLight *sh = (ispc::DirectionalLight *)ispcrtSharedPtr(view); sh->set(visible, instance, direction, solarIrradiance, cosAngle); diff --git a/modules/cpu/math/Distribution2D.cpp b/modules/cpu/math/Distribution2D.cpp index 6ebb3259d..fdec4e688 100644 --- a/modules/cpu/math/Distribution2D.cpp +++ b/modules/cpu/math/Distribution2D.cpp @@ -6,14 +6,14 @@ namespace ospray { Distribution2D::Distribution2D(const vec2i &size, api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtContext(), device) { getSh()->size = size; getSh()->rcpSize = vec2f(1.f / size.x, 1.f / size.y); cdf_x = make_buffer_shared_unique( - device.getIspcrtDevice(), size.x * size.y); - cdf_y = make_buffer_shared_unique(device.getIspcrtDevice(), size.y); + device.getIspcrtContext(), size.x * size.y); + cdf_y = make_buffer_shared_unique(device.getIspcrtContext(), size.y); getSh()->cdf_x = cdf_x->data(); getSh()->cdf_y = cdf_y->data(); diff --git a/modules/cpu/math/MathConstants.cpp b/modules/cpu/math/MathConstants.cpp index 2cf6a92f5..3dd59ee34 100644 --- a/modules/cpu/math/MathConstants.cpp +++ b/modules/cpu/math/MathConstants.cpp @@ -8,16 +8,16 @@ namespace ospray { MathConstants::MathConstants(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice()) + : AddStructShared(device.getIspcrtContext()) { haltonPerm3Buf = - make_buffer_shared_unique(device.getIspcrtDevice(), 243); + make_buffer_shared_unique(device.getIspcrtContext(), 243); haltonPerm5Buf = - make_buffer_shared_unique(device.getIspcrtDevice(), 125); + make_buffer_shared_unique(device.getIspcrtContext(), 125); haltonPerm7Buf = - make_buffer_shared_unique(device.getIspcrtDevice(), 343); + make_buffer_shared_unique(device.getIspcrtContext(), 343); sobolMatricesBuf = make_buffer_shared_unique( - device.getIspcrtDevice(), Sobol_numDimensions * Sobol_numBits); + device.getIspcrtContext(), Sobol_numDimensions * Sobol_numBits); std::memcpy(haltonPerm3Buf->begin(), halton_perm3, diff --git a/modules/cpu/pf/PixelFilter.cpp b/modules/cpu/pf/PixelFilter.cpp index 06e4abfbb..1ab18c395 100644 --- a/modules/cpu/pf/PixelFilter.cpp +++ b/modules/cpu/pf/PixelFilter.cpp @@ -17,13 +17,13 @@ void LUTPixelFilter_buildLUT(void *self); namespace ospray { PixelFilter::PixelFilter(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtContext(), device) {} LUTPixelFilter::LUTPixelFilter(const float size, ispc::LUTPixelFilterType lutFilterType, api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtContext(), device) { getSh()->super.width = size; getSh()->super.type = ispc::PIXEL_FILTER_TYPE_LUT; diff --git a/modules/cpu/render/Material.cpp b/modules/cpu/render/Material.cpp index 2b677a6ee..653817e07 100644 --- a/modules/cpu/render/Material.cpp +++ b/modules/cpu/render/Material.cpp @@ -17,7 +17,7 @@ namespace ospray { Ref Material::microfacetAlbedoTables = nullptr; Material::Material(api::ISPCDevice &device, const FeatureFlagsOther ffo) - : AddStructShared(device.getIspcrtDevice(), device), featureFlags(ffo) + : AddStructShared(device.getIspcrtContext(), device), featureFlags(ffo) { managedObjectType = OSP_MATERIAL; #ifndef OSPRAY_TARGET_SYCL diff --git a/modules/cpu/render/Renderer.cpp b/modules/cpu/render/Renderer.cpp index 1c1567c6f..f6bfba5c6 100644 --- a/modules/cpu/render/Renderer.cpp +++ b/modules/cpu/render/Renderer.cpp @@ -24,7 +24,7 @@ namespace ospray { // Renderer definitions /////////////////////////////////////////////////////// Renderer::Renderer(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtContext(), device) { managedObjectType = OSP_RENDERER; pixelFilter = nullptr; @@ -73,7 +73,7 @@ void Renderer::commit() featureFlags |= mat->getFeatureFlagsOther(); materialArray = make_buffer_shared_unique( - getISPCDevice().getIspcrtDevice(), + getISPCDevice().getIspcrtContext(), createArrayOfSh(*materialData)); getSh()->numMaterials = materialArray->size(); getSh()->material = materialArray->sharedPtr(); diff --git a/modules/cpu/render/ao/AORenderer.cpp b/modules/cpu/render/ao/AORenderer.cpp index 30587eefc..39d2d8cb3 100644 --- a/modules/cpu/render/ao/AORenderer.cpp +++ b/modules/cpu/render/ao/AORenderer.cpp @@ -16,7 +16,7 @@ constexpr sycl::specialization_id specFeatureFlags; namespace ospray { AORenderer::AORenderer(api::ISPCDevice &device, int defaultNumSamples) - : AddStructShared(device.getIspcrtDevice(), device), + : AddStructShared(device.getIspcrtContext(), device), aoSamples(defaultNumSamples) {} diff --git a/modules/cpu/render/bsdfs/MicrofacetAlbedoTables.cpp b/modules/cpu/render/bsdfs/MicrofacetAlbedoTables.cpp index fc2dc422a..c7ffe6f7d 100644 --- a/modules/cpu/render/bsdfs/MicrofacetAlbedoTables.cpp +++ b/modules/cpu/render/bsdfs/MicrofacetAlbedoTables.cpp @@ -12,44 +12,44 @@ void precomputeMicrofacetAlbedoTables(void *_tables); namespace ospray { MicrofacetAlbedoTables::MicrofacetAlbedoTables(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice()) + : AddStructShared(device.getIspcrtContext()) { - albedo_dir = make_buffer_shared_unique(device.getIspcrtDevice(), + albedo_dir = make_buffer_shared_unique(device.getIspcrtContext(), MICROFACET_ALBEDO_TABLE_SIZE * MICROFACET_ALBEDO_TABLE_SIZE); albedo_avg = make_buffer_shared_unique( - device.getIspcrtDevice(), MICROFACET_ALBEDO_TABLE_SIZE); + device.getIspcrtContext(), MICROFACET_ALBEDO_TABLE_SIZE); dielectricAlbedo_dir = - make_buffer_shared_unique(device.getIspcrtDevice(), + make_buffer_shared_unique(device.getIspcrtContext(), MICROFACET_DIELECTRIC_ALBEDO_TABLE_SIZE * MICROFACET_DIELECTRIC_ALBEDO_TABLE_SIZE * MICROFACET_DIELECTRIC_ALBEDO_TABLE_SIZE); dielectricAlbedo_avg = - make_buffer_shared_unique(device.getIspcrtDevice(), + make_buffer_shared_unique(device.getIspcrtContext(), MICROFACET_DIELECTRIC_ALBEDO_TABLE_SIZE * MICROFACET_DIELECTRIC_ALBEDO_TABLE_SIZE); dielectricRcpEtaAlbedo_dir = - make_buffer_shared_unique(device.getIspcrtDevice(), + make_buffer_shared_unique(device.getIspcrtContext(), MICROFACET_DIELECTRIC_ALBEDO_TABLE_SIZE * MICROFACET_DIELECTRIC_ALBEDO_TABLE_SIZE * MICROFACET_DIELECTRIC_ALBEDO_TABLE_SIZE); dielectricRcpEtaAlbedo_avg = - make_buffer_shared_unique(device.getIspcrtDevice(), + make_buffer_shared_unique(device.getIspcrtContext(), MICROFACET_DIELECTRIC_ALBEDO_TABLE_SIZE * MICROFACET_DIELECTRIC_ALBEDO_TABLE_SIZE); dielectricReflectionAlbedo_dir = - make_buffer_shared_unique(device.getIspcrtDevice(), + make_buffer_shared_unique(device.getIspcrtContext(), MICROFACET_DIELECTRIC_ALBEDO_TABLE_SIZE * MICROFACET_DIELECTRIC_ALBEDO_TABLE_SIZE * MICROFACET_DIELECTRIC_ALBEDO_TABLE_SIZE); dielectricReflectionAlbedo_avg = - make_buffer_shared_unique(device.getIspcrtDevice(), + make_buffer_shared_unique(device.getIspcrtContext(), MICROFACET_DIELECTRIC_ALBEDO_TABLE_SIZE * MICROFACET_DIELECTRIC_ALBEDO_TABLE_SIZE); - sheenAlbedo_dir = make_buffer_shared_unique(device.getIspcrtDevice(), + sheenAlbedo_dir = make_buffer_shared_unique(device.getIspcrtContext(), MICROFACET_SHEEN_ALBEDO_TABLE_SIZE * MICROFACET_SHEEN_ALBEDO_TABLE_SIZE); getSh()->albedo_dir = albedo_dir->data(); diff --git a/modules/cpu/render/debug/DebugRenderer.cpp b/modules/cpu/render/debug/DebugRenderer.cpp index e7ac554a7..502210488 100644 --- a/modules/cpu/render/debug/DebugRenderer.cpp +++ b/modules/cpu/render/debug/DebugRenderer.cpp @@ -57,7 +57,7 @@ static ispc::DebugRendererType typeFromString(const std::string &name) // DebugRenderer definitions //////////////////////////////////////////////// DebugRenderer::DebugRenderer(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtContext(), device) {} std::string DebugRenderer::toString() const diff --git a/modules/cpu/render/materials/Alloy.cpp b/modules/cpu/render/materials/Alloy.cpp index db205c30a..b1d98177a 100644 --- a/modules/cpu/render/materials/Alloy.cpp +++ b/modules/cpu/render/materials/Alloy.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { Alloy::Alloy(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_ALLOY) + : AddStructShared(device.getIspcrtContext(), device, FFO_MATERIAL_ALLOY) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = diff --git a/modules/cpu/render/materials/CarPaint.cpp b/modules/cpu/render/materials/CarPaint.cpp index be6a02933..77a155831 100644 --- a/modules/cpu/render/materials/CarPaint.cpp +++ b/modules/cpu/render/materials/CarPaint.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { CarPaint::CarPaint(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_CARPAINT) + : AddStructShared(device.getIspcrtContext(), device, FFO_MATERIAL_CARPAINT) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = reinterpret_cast( diff --git a/modules/cpu/render/materials/Glass.cpp b/modules/cpu/render/materials/Glass.cpp index 14637e621..d3f543ef5 100644 --- a/modules/cpu/render/materials/Glass.cpp +++ b/modules/cpu/render/materials/Glass.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { Glass::Glass(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_GLASS) + : AddStructShared(device.getIspcrtContext(), device, FFO_MATERIAL_GLASS) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = diff --git a/modules/cpu/render/materials/Luminous.cpp b/modules/cpu/render/materials/Luminous.cpp index 60cb0f9d9..9349302f3 100644 --- a/modules/cpu/render/materials/Luminous.cpp +++ b/modules/cpu/render/materials/Luminous.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { Luminous::Luminous(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_LUMINOUS) + : AddStructShared(device.getIspcrtContext(), device, FFO_MATERIAL_LUMINOUS) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = reinterpret_cast( diff --git a/modules/cpu/render/materials/Metal.cpp b/modules/cpu/render/materials/Metal.cpp index 057587d0e..cbc18b482 100644 --- a/modules/cpu/render/materials/Metal.cpp +++ b/modules/cpu/render/materials/Metal.cpp @@ -12,7 +12,7 @@ namespace ospray { namespace pathtracer { Metal::Metal(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_METAL) + : AddStructShared(device.getIspcrtContext(), device, FFO_MATERIAL_METAL) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = diff --git a/modules/cpu/render/materials/MetallicPaint.cpp b/modules/cpu/render/materials/MetallicPaint.cpp index ea69a86a9..c14ae0598 100644 --- a/modules/cpu/render/materials/MetallicPaint.cpp +++ b/modules/cpu/render/materials/MetallicPaint.cpp @@ -12,7 +12,7 @@ namespace pathtracer { MetallicPaint::MetallicPaint(api::ISPCDevice &device) : AddStructShared( - device.getIspcrtDevice(), device, FFO_MATERIAL_METALLICPAINT) + device.getIspcrtContext(), device, FFO_MATERIAL_METALLICPAINT) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = reinterpret_cast( diff --git a/modules/cpu/render/materials/Mix.cpp b/modules/cpu/render/materials/Mix.cpp index 28fd4e84f..c40d3c5d0 100644 --- a/modules/cpu/render/materials/Mix.cpp +++ b/modules/cpu/render/materials/Mix.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { MixMaterial::MixMaterial(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_MIX) + : AddStructShared(device.getIspcrtContext(), device, FFO_MATERIAL_MIX) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = diff --git a/modules/cpu/render/materials/OBJ.cpp b/modules/cpu/render/materials/OBJ.cpp index 1c8ea910a..1ab0467b6 100644 --- a/modules/cpu/render/materials/OBJ.cpp +++ b/modules/cpu/render/materials/OBJ.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { OBJMaterial::OBJMaterial(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_OBJ) + : AddStructShared(device.getIspcrtContext(), device, FFO_MATERIAL_OBJ) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = diff --git a/modules/cpu/render/materials/Plastic.cpp b/modules/cpu/render/materials/Plastic.cpp index 883e68a06..7b2e1f43a 100644 --- a/modules/cpu/render/materials/Plastic.cpp +++ b/modules/cpu/render/materials/Plastic.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { Plastic::Plastic(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_PLASTIC) + : AddStructShared(device.getIspcrtContext(), device, FFO_MATERIAL_PLASTIC) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = reinterpret_cast( diff --git a/modules/cpu/render/materials/Principled.cpp b/modules/cpu/render/materials/Principled.cpp index b38d3d643..4c3859c81 100644 --- a/modules/cpu/render/materials/Principled.cpp +++ b/modules/cpu/render/materials/Principled.cpp @@ -11,7 +11,8 @@ namespace ospray { namespace pathtracer { Principled::Principled(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_PRINCIPLED) + : AddStructShared( + device.getIspcrtContext(), device, FFO_MATERIAL_PRINCIPLED) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = reinterpret_cast( diff --git a/modules/cpu/render/materials/ThinGlass.cpp b/modules/cpu/render/materials/ThinGlass.cpp index 4a98f4b47..6a131ec1e 100644 --- a/modules/cpu/render/materials/ThinGlass.cpp +++ b/modules/cpu/render/materials/ThinGlass.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { ThinGlass::ThinGlass(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_THINGLASS) + : AddStructShared(device.getIspcrtContext(), device, FFO_MATERIAL_THINGLASS) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = reinterpret_cast( diff --git a/modules/cpu/render/materials/Velvet.cpp b/modules/cpu/render/materials/Velvet.cpp index 032ea83f1..cee6c67de 100644 --- a/modules/cpu/render/materials/Velvet.cpp +++ b/modules/cpu/render/materials/Velvet.cpp @@ -11,7 +11,7 @@ namespace ospray { namespace pathtracer { Velvet::Velvet(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFO_MATERIAL_VELVET) + : AddStructShared(device.getIspcrtContext(), device, FFO_MATERIAL_VELVET) { #ifndef OSPRAY_TARGET_SYCL getSh()->super.getBSDF = diff --git a/modules/cpu/render/pathtracer/PathTracer.cpp b/modules/cpu/render/pathtracer/PathTracer.cpp index 010ce4345..828891b1c 100644 --- a/modules/cpu/render/pathtracer/PathTracer.cpp +++ b/modules/cpu/render/pathtracer/PathTracer.cpp @@ -33,7 +33,7 @@ constexpr sycl::specialization_id specFeatureFlags; namespace ospray { PathTracer::PathTracer(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtContext(), device) {} std::string PathTracer::toString() const diff --git a/modules/cpu/render/pathtracer/PathTracerData.cpp b/modules/cpu/render/pathtracer/PathTracerData.cpp index a45b345b1..47f9db298 100644 --- a/modules/cpu/render/pathtracer/PathTracerData.cpp +++ b/modules/cpu/render/pathtracer/PathTracerData.cpp @@ -32,7 +32,7 @@ namespace ospray { PathTracerData::PathTracerData(const World &world, bool importanceSampleGeometryLights, const Renderer &renderer) - : AddStructShared(world.getISPCDevice().getIspcrtDevice()) + : AddStructShared(world.getISPCDevice().getIspcrtContext()) { size_t geometryLights{0}; @@ -76,9 +76,9 @@ PathTracerData::PathTracerData(const World &world, lights[i] = (ispc::Light *)ispcrtSharedPtr(lightViews[i]); // Then create shared buffer from the temporary std::vector - ispcrt::Device &device = world.getISPCDevice().getIspcrtDevice(); - lightArray = make_buffer_shared_unique(device, lights); - lightCDFArray = make_buffer_shared_unique(device, lightsCDF); + ispcrt::Context &context = world.getISPCDevice().getIspcrtContext(); + lightArray = make_buffer_shared_unique(context, lights); + lightCDFArray = make_buffer_shared_unique(context, lightsCDF); getSh()->lights = lightArray->sharedPtr(); getSh()->lightsCDF = lightCDFArray->sharedPtr(); getSh()->numLights = lights.size(); @@ -98,9 +98,9 @@ ISPCRTMemoryView PathTracerData::createGeometryLight(const Instance *instance, const std::vector &distribution, float pdf) { - ispcrt::Device &device = instance->getISPCDevice().getIspcrtDevice(); + ispcrt::Context &context = instance->getISPCDevice().getIspcrtContext(); ISPCRTMemoryView view = - StructSharedCreate(device.handle()); + StructSharedCreate(context.handle()); ispc::GeometryLight *sh = (ispc::GeometryLight *)ispcrtSharedPtr(view); sh->super.instance = instance->getSh(); @@ -114,9 +114,9 @@ ISPCRTMemoryView PathTracerData::createGeometryLight(const Instance *instance, sh->numPrimitives = primIDs.size(); sh->pdf = pdf; - geoLightPrimIDArray.emplace_back(device, primIDs); + geoLightPrimIDArray.emplace_back(context, primIDs); sh->primIDs = geoLightPrimIDArray.back().sharedPtr(); - geoLightDistrArray.emplace_back(device, distribution); + geoLightDistrArray.emplace_back(context, distribution); sh->distribution = geoLightDistrArray.back().sharedPtr(); return view; } diff --git a/modules/cpu/render/scivis/SciVis.cpp b/modules/cpu/render/scivis/SciVis.cpp index 133a4fb2c..144981882 100644 --- a/modules/cpu/render/scivis/SciVis.cpp +++ b/modules/cpu/render/scivis/SciVis.cpp @@ -27,7 +27,7 @@ constexpr sycl::specialization_id specFeatureFlags; namespace ospray { SciVis::SciVis(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtContext(), device) {} std::string SciVis::toString() const diff --git a/modules/cpu/render/scivis/SciVisData.cpp b/modules/cpu/render/scivis/SciVisData.cpp index 01fad2959..29d45ffa2 100644 --- a/modules/cpu/render/scivis/SciVisData.cpp +++ b/modules/cpu/render/scivis/SciVisData.cpp @@ -72,7 +72,7 @@ vec3f addLightsToArray(std::vector &lightViews, } // namespace SciVisData::SciVisData(const World &world) - : AddStructShared(world.getISPCDevice().getIspcrtDevice()) + : AddStructShared(world.getISPCDevice().getIspcrtContext()) { vec3f aoColor = vec3f(0.f); uint32_t visibleOnly = 0; @@ -102,7 +102,7 @@ SciVisData::SciVisData(const World &world) // Then create shared buffer from the temporary std::vector lightArray = make_buffer_shared_unique( - world.getISPCDevice().getIspcrtDevice(), lights); + world.getISPCDevice().getIspcrtContext(), lights); getSh()->lights = lightArray->sharedPtr(); getSh()->numLights = lights.size(); getSh()->numLightsVisibleOnly = visibleOnly; diff --git a/modules/cpu/texture/Texture.cpp b/modules/cpu/texture/Texture.cpp index 34af9adb4..ea63352d8 100644 --- a/modules/cpu/texture/Texture.cpp +++ b/modules/cpu/texture/Texture.cpp @@ -8,7 +8,7 @@ namespace ospray { // Texture definitions //////////////////////////////////////////////////////// Texture::Texture(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtContext(), device) { managedObjectType = OSP_TEXTURE; } diff --git a/modules/cpu/texture/Texture2D.h b/modules/cpu/texture/Texture2D.h index 82b7e9d55..04b5d8ee7 100644 --- a/modules/cpu/texture/Texture2D.h +++ b/modules/cpu/texture/Texture2D.h @@ -15,7 +15,7 @@ struct OSPRAY_SDK_INTERFACE Texture2D : public AddStructShared { Texture2D(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtContext(), device) {} virtual ~Texture2D() override = default; diff --git a/modules/cpu/texture/TextureVolume.h b/modules/cpu/texture/TextureVolume.h index 30f835ac7..3b9d5753e 100644 --- a/modules/cpu/texture/TextureVolume.h +++ b/modules/cpu/texture/TextureVolume.h @@ -18,7 +18,7 @@ struct OSPRAY_SDK_INTERFACE TextureVolume : public AddStructShared { TextureVolume(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtContext(), device) {} virtual std::string toString() const override; diff --git a/modules/cpu/volume/Volume.cpp b/modules/cpu/volume/Volume.cpp index ecfcbad25..2e7dd805f 100644 --- a/modules/cpu/volume/Volume.cpp +++ b/modules/cpu/volume/Volume.cpp @@ -27,7 +27,7 @@ namespace ospray { // Volume definitions //////////////////////////////////////////////////////// Volume::Volume(api::ISPCDevice &device, const std::string &type) - : AddStructShared(device.getIspcrtDevice(), device), + : AddStructShared(device.getIspcrtContext(), device), vklType(type), featureFlags(FFV_VOLUME) { diff --git a/modules/cpu/volume/VolumetricModel.cpp b/modules/cpu/volume/VolumetricModel.cpp index 1f137b7fb..13ee67bdb 100644 --- a/modules/cpu/volume/VolumetricModel.cpp +++ b/modules/cpu/volume/VolumetricModel.cpp @@ -11,7 +11,7 @@ namespace ospray { VolumetricModel::VolumetricModel(api::ISPCDevice &device, Volume *_volume) - : AddStructShared(device.getIspcrtDevice(), device), + : AddStructShared(device.getIspcrtContext(), device), volumeAPI(_volume) #if OPENVKL_VERSION_MAJOR == 1 , diff --git a/modules/cpu/volume/transferFunction/LinearTransferFunction.cpp b/modules/cpu/volume/transferFunction/LinearTransferFunction.cpp index 93cc55bce..9062cc8a2 100644 --- a/modules/cpu/volume/transferFunction/LinearTransferFunction.cpp +++ b/modules/cpu/volume/transferFunction/LinearTransferFunction.cpp @@ -14,7 +14,7 @@ void *LinearTransferFunction_getMaxOpacity_addr(); namespace ospray { LinearTransferFunction::LinearTransferFunction(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtContext(), device) { getSh()->super.valueRange = range1f(0.0f, 1.0f); } diff --git a/modules/cpu/volume/transferFunction/TransferFunction.cpp b/modules/cpu/volume/transferFunction/TransferFunction.cpp index 6a923c272..b8c7b4d24 100644 --- a/modules/cpu/volume/transferFunction/TransferFunction.cpp +++ b/modules/cpu/volume/transferFunction/TransferFunction.cpp @@ -9,7 +9,7 @@ namespace ospray { // TransferFunction definitions /////////////////////////////////////////////// TransferFunction::TransferFunction(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device) + : AddStructShared(device.getIspcrtContext(), device) { managedObjectType = OSP_TRANSFER_FUNCTION; } diff --git a/modules/mpi/ospray/common/DistributedWorld.cpp b/modules/mpi/ospray/common/DistributedWorld.cpp index a82b3ae4a..d36c160af 100644 --- a/modules/mpi/ospray/common/DistributedWorld.cpp +++ b/modules/mpi/ospray/common/DistributedWorld.cpp @@ -15,7 +15,7 @@ namespace mpi { using namespace rkcommon; DistributedWorld::DistributedWorld(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device), + : AddStructShared(device.getIspcrtContext(), device), mpiGroup(mpicommon::worker.dup()) { managedObjectType = OSP_WORLD; diff --git a/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp b/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp index 946ebd451..95fc85cc8 100644 --- a/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp +++ b/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp @@ -32,7 +32,7 @@ using DFB = DistributedFrameBuffer; DistributedTileError::DistributedTileError( api::ISPCDevice &device, const vec2i &numTiles, mpicommon::Group group) - : TaskError(device.getIspcrtDevice(), numTiles), group(group) + : TaskError(device.getIspcrtContext(), numTiles), group(group) {} void DistributedTileError::sync() diff --git a/modules/mpi/ospray/render/DistributedLoadBalancer.cpp b/modules/mpi/ospray/render/DistributedLoadBalancer.cpp index 5d625e6ba..486a6699c 100644 --- a/modules/mpi/ospray/render/DistributedLoadBalancer.cpp +++ b/modules/mpi/ospray/render/DistributedLoadBalancer.cpp @@ -165,7 +165,8 @@ void DistributedLoadBalancer::renderFrame( // We use uint8 instead of bool to avoid hitting UB with differing "true" // values used by ISPC and C++ BufferShared regionVisible( - sparseFb->getISPCDevice().getIspcrtDevice(), numRegions * tiles.size()); + sparseFb->getISPCDevice().getIspcrtContext(), + numRegions * tiles.size()); std::memset(regionVisible.sharedPtr(), 0, regionVisible.size()); // Compute visibility for the tasks we're rendering @@ -240,7 +241,7 @@ void DistributedLoadBalancer::renderFrame( }); activeTasks.erase(removeTasks, activeTasks.end()); BufferShared activeTasksShared( - sparseFb->getISPCDevice().getIspcrtDevice(), activeTasks); + sparseFb->getISPCDevice().getIspcrtContext(), activeTasks); renderer->renderRegionTasks(sparseFb, camera, diff --git a/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp b/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp index 5b054159c..90158e106 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp +++ b/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp @@ -46,7 +46,7 @@ static bool DETAILED_LOGGING = false; // DistributedRaycastRenderer definitions ///////////////////////////////// DistributedRaycastRenderer::DistributedRaycastRenderer(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device), + : AddStructShared(device.getIspcrtContext(), device), mpiGroup(mpicommon::worker.dup()) { DETAILED_LOGGING = diff --git a/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp b/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp index 5cd299d4e..13fb3cf75 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp +++ b/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp @@ -27,7 +27,7 @@ namespace ospray { namespace mpi { DistributedRenderer::DistributedRenderer(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device), + : AddStructShared(device.getIspcrtContext(), device), mpiGroup(mpicommon::worker.dup()) {} diff --git a/modules/pluggableGeometryExample/ospray/geometry/BilinearPatches.cpp b/modules/pluggableGeometryExample/ospray/geometry/BilinearPatches.cpp index 4f7c65d72..28eb0c404 100644 --- a/modules/pluggableGeometryExample/ospray/geometry/BilinearPatches.cpp +++ b/modules/pluggableGeometryExample/ospray/geometry/BilinearPatches.cpp @@ -19,7 +19,7 @@ namespace ospray { namespace blp { BilinearPatches::BilinearPatches(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtDevice(), device, FFG_NONE) + : AddStructShared(device.getIspcrtContext(), device, FFG_NONE) { getSh()->super.postIntersect = ispc::BilinearPatches_postIntersect_addr(); getSh()->super.intersect = ispc::BilinearPatches_intersect_addr(); From 3431d6131a4c7d9b0fb64e58d7275cace1894e88 Mon Sep 17 00:00:00 2001 From: Krzysztof Raszkowski Date: Wed, 22 Feb 2023 22:41:33 +0100 Subject: [PATCH 19/42] ci: add PVC GPU tests --- .github/workflows/ci.linux.gpu.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/.github/workflows/ci.linux.gpu.yml b/.github/workflows/ci.linux.gpu.yml index 12745ac89..07df3cd68 100644 --- a/.github/workflows/ci.linux.gpu.yml +++ b/.github/workflows/ci.linux.gpu.yml @@ -114,6 +114,29 @@ jobs: export PATH="$GITHUB_WORKSPACE/build/install/ospray/bin:$PATH" scripts/tests/run_gpu_tests.sh "$GITHUB_WORKSPACE" + test-ubuntu2204-pvc: + needs: [build-ubuntu2204] + uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker_gpu.yml@main + secrets: inherit + with: + image: ubuntu:22.04 + env-from-files: .github/workflows/dpcpp-sycl-nightly.env .github/workflows/gfx-ubuntu22.env + install-gfx-driver: true + submodules: true + options: --device=/dev/dri:/dev/dri + runs-on: '[ "Linux", "docker", "pvc" ]' + artifact-in: build-ubuntu2204 + artifact-out: test-ubuntu2204-pvc + artifact-path: build_regression_tests/tests*.xml build_regression_tests/failed* + artifact-on-failure: true + cmd: | + export SYCL_BUNDLE_ROOT=$DPCPP_ROOT + export CC=clang + export CXX=clang++ + export LD_LIBRARY_PATH="$GITHUB_WORKSPACE/build/install/ospray/lib:$GITHUB_WORKSPACE/build/install/embree/lib:$LD_LIBRARY_PATH" + export PATH="$GITHUB_WORKSPACE/build/install/ospray/bin:$PATH" + scripts/tests/run_gpu_tests.sh "$GITHUB_WORKSPACE" + test-ubuntu2204-devel-dg2: needs: [build-ubuntu2204-devel] uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker_gpu.yml@main From 4106d54864502992526515abbe186dc2cca692ab Mon Sep 17 00:00:00 2001 From: Miroslaw Pawlowski Date: Tue, 14 Mar 2023 17:38:02 +0100 Subject: [PATCH 20/42] Optimized host-device traffic in the local frame buffer --- apps/ospExamples/GLFWOSPRayWindow.cpp | 23 +++- apps/ospExamples/GLFWOSPRayWindow.h | 1 + modules/cpu/ISPCDevice.h | 5 + modules/cpu/common/Data.cpp | 2 +- .../{BufferShared.h => ISPCRTBuffers.h} | 105 +++++++++++++--- modules/cpu/common/StructShared.h | 2 +- modules/cpu/fb/FrameBuffer.cpp | 17 ++- modules/cpu/fb/FrameBuffer.h | 9 +- modules/cpu/fb/FrameBufferShared.h | 2 + modules/cpu/fb/LocalFB.cpp | 112 +++++++++++------- modules/cpu/fb/LocalFB.h | 32 +++-- modules/cpu/fb/SparseFB.cpp | 3 +- modules/cpu/fb/SparseFB.h | 2 +- modules/cpu/fb/TaskError.h | 2 +- modules/cpu/math/Distribution2D.cpp | 2 +- modules/cpu/render/LoadBalancer.cpp | 1 - .../render/bsdfs/MicrofacetAlbedoTables.cpp | 2 +- 17 files changed, 226 insertions(+), 96 deletions(-) rename modules/cpu/common/{BufferShared.h => ISPCRTBuffers.h} (54%) diff --git a/apps/ospExamples/GLFWOSPRayWindow.cpp b/apps/ospExamples/GLFWOSPRayWindow.cpp index 97052ea7d..237b20a99 100644 --- a/apps/ospExamples/GLFWOSPRayWindow.cpp +++ b/apps/ospExamples/GLFWOSPRayWindow.cpp @@ -355,21 +355,34 @@ void GLFWOSPRayWindow::motion(const vec2f &position) void GLFWOSPRayWindow::display() { + static auto displayStart = std::chrono::high_resolution_clock::now(); + if (showUi) buildUI(); if (displayCallback) displayCallback(this); - updateTitleBar(); - glEnable(GL_FRAMEBUFFER_SRGB); // Turn on sRGB conversion for OSPRay frame static bool firstFrame = true; if (firstFrame || currentFrame.isReady()) { - waitOnOSPRayFrame(); + // display frame rate in window title + auto displayEnd = std::chrono::high_resolution_clock::now(); + auto durationMilliseconds = + std::chrono::duration_cast( + displayEnd - displayStart); + + // update FPS every second + framesCounter++; + if (durationMilliseconds > std::chrono::seconds(1)) { + displayStart = displayEnd; + latestFPS = 1000.0f * float(framesCounter) / durationMilliseconds.count(); + framesCounter = 0; + updateTitleBar(); + } - latestFPS = 1.f / currentFrame.duration(); + waitOnOSPRayFrame(); auto fbChannel = OSP_FB_COLOR; if (showDepth) @@ -504,7 +517,7 @@ void GLFWOSPRayWindow::updateTitleBar() { std::stringstream windowTitle; windowTitle << "OSPRay: " << std::setprecision(3) << latestFPS << " fps"; - if (latestFPS < 2.f) { + if (latestFPS > 0.f && latestFPS < 2.f) { float progress = currentFrame.progress(); windowTitle << " | "; int barWidth = 20; diff --git a/apps/ospExamples/GLFWOSPRayWindow.h b/apps/ospExamples/GLFWOSPRayWindow.h index 5896bde71..0f1b26db7 100644 --- a/apps/ospExamples/GLFWOSPRayWindow.h +++ b/apps/ospExamples/GLFWOSPRayWindow.h @@ -125,5 +125,6 @@ class GLFWOSPRayWindow std::function uiCallback; // FPS measurement of last frame + uint32_t framesCounter{0}; float latestFPS{0.f}; }; diff --git a/modules/cpu/ISPCDevice.h b/modules/cpu/ISPCDevice.h index 3ea19d955..c4dcb9008 100644 --- a/modules/cpu/ISPCDevice.h +++ b/modules/cpu/ISPCDevice.h @@ -164,6 +164,11 @@ struct OSPRAY_SDK_INTERFACE ISPCDevice : public Device return ispcrtContext; } + ispcrt::TaskQueue &getIspcrtQueue() + { + return ispcrtQueue; + } + #ifdef OSPRAY_TARGET_SYCL sycl::queue *getSyclQueue() { diff --git a/modules/cpu/common/Data.cpp b/modules/cpu/common/Data.cpp index 18a09ef5f..14ddd56c9 100644 --- a/modules/cpu/common/Data.cpp +++ b/modules/cpu/common/Data.cpp @@ -3,7 +3,7 @@ // ospray #include "Data.h" -#include "common/BufferShared.h" +#include "common/ISPCRTBuffers.h" #include "ospray/ospray.h" #include "rkcommon/utility/multidim_index_sequence.h" diff --git a/modules/cpu/common/BufferShared.h b/modules/cpu/common/ISPCRTBuffers.h similarity index 54% rename from modules/cpu/common/BufferShared.h rename to modules/cpu/common/ISPCRTBuffers.h index f2f4b5032..9cd12fe27 100644 --- a/modules/cpu/common/BufferShared.h +++ b/modules/cpu/common/ISPCRTBuffers.h @@ -5,11 +5,83 @@ #include #include +#include #include "ispcrt.hpp" namespace ospray { -// C version //////////////////////////////////////////// +///////////////////////////////////////////////////////////////////// +// BufferDevice + +template +struct BufferDevice : public ispcrt::Array +{ + using ispcrt::Array::devicePtr; + BufferDevice(ispcrt::Device &device); + BufferDevice(ispcrt::Device &device, size_t size); +}; + +template +BufferDevice::BufferDevice(ispcrt::Device &device, size_t size) + : ispcrt::Array(device, nullptr, size) +{} + +template +inline std::unique_ptr> make_buffer_device_unique( + Args &&...args) +{ + return std::unique_ptr>( + new BufferDevice(std::forward(args)...)); +} + +///////////////////////////////////////////////////////////////////// +// BufferDeviceShadowed + +template +struct BufferDeviceShadowed : public std::vector, + public ispcrt::Array +{ + using ispcrt::Array::devicePtr; + using std::vector::size; + BufferDeviceShadowed(ispcrt::Device &device, size_t size); + BufferDeviceShadowed(ispcrt::Device &device, std::vector &v); + BufferDeviceShadowed(ispcrt::Device &device, T *data, size_t size); +}; + +template +BufferDeviceShadowed::BufferDeviceShadowed( + ispcrt::Device &device, size_t size) + : std::vector(size), + ispcrt::Array( + device, std::vector::data(), size) +{} + +template +BufferDeviceShadowed::BufferDeviceShadowed( + ispcrt::Device &device, std::vector &v) + : std::vector(v), + ispcrt::Array( + device, std::vector::data(), v.size()) +{} + +template +BufferDeviceShadowed::BufferDeviceShadowed( + ispcrt::Device &device, T *data, size_t size) + : std::vector(data, data + size), + ispcrt::Array( + device, std::vector::data(), size) +{} + +template +inline std::unique_ptr> +make_buffer_device_shadowed_unique(Args &&...args) +{ + return std::unique_ptr>( + new BufferDeviceShadowed(std::forward(args)...)); +} + +///////////////////////////////////////////////////////////////////// +// BufferShared C version inline ISPCRTMemoryView BufferSharedCreate(ISPCRTContext context, size_t size, @@ -27,12 +99,13 @@ inline void BufferSharedDelete(ISPCRTMemoryView view) ispcrtRelease(view); } -// C++ version //////////////////////////////////////////// +///////////////////////////////////////////////////////////////////// +// BufferShared C++ version template struct BufferShared : public ispcrt::Array { - using ispcrt::Array::sharedPtr; + using ispcrt::Array::size; BufferShared(ispcrt::Context &context, ispcrt::SharedMemoryUsageHint allocHint = ispcrt::SharedMemoryUsageHint::HostDeviceReadWrite); @@ -50,7 +123,6 @@ struct BufferShared : public ispcrt::Array ispcrt::SharedMemoryUsageHint allocHint = ispcrt::SharedMemoryUsageHint::HostDeviceReadWrite); - // TODO: We should move these up into the ISPCRT wrapper T *data(); T *begin(); @@ -76,8 +148,7 @@ BufferShared::BufferShared(ispcrt::Context &context, size_t size, ispcrt::SharedMemoryUsageHint allocHint) : ispcrt::Array(context, size, allocHint) -{ -} +{} template BufferShared::BufferShared(ispcrt::Context &context, @@ -101,54 +172,52 @@ BufferShared::BufferShared(ispcrt::Context &context, template T *BufferShared::data() { - return begin(); + return sharedPtr(); } template T *BufferShared::begin() { - return sharedPtr(); + return data(); } template T *BufferShared::end() { - return begin() + ispcrt::Array::size(); + return begin() + size(); } template const T *BufferShared::cbegin() const { - return sharedPtr(); + return data(); } template const T *BufferShared::cend() const { - return cbegin() + ispcrt::Array::size(); + return cbegin() + size(); } template T &BufferShared::operator[](const size_t i) { - return *(sharedPtr() + i); + return *(data() + i); } template const T &BufferShared::operator[](const size_t i) const { - return *(sharedPtr() + i); + return *(data() + i); } -// The below method is WA for ISPCRT bug, when running on GPU sharedPtr() +// The below method is WA for Level Zero bug, when running on GPU sharedPtr() // crashes on 0-sized ispcrt::Array -// TODO: Fix it in ISPCRT template T *BufferShared::sharedPtr() const { - return ispcrt::Array::size() - ? ispcrt::Array::sharedPtr() - : nullptr; + return size() ? ispcrt::Array::sharedPtr() + : nullptr; } template diff --git a/modules/cpu/common/StructShared.h b/modules/cpu/common/StructShared.h index 5e0476444..a8e3b5c9b 100644 --- a/modules/cpu/common/StructShared.h +++ b/modules/cpu/common/StructShared.h @@ -11,7 +11,7 @@ #include "rkcommon/math/rkmath.h" #include "rkcommon/math/vec.h" -#include "BufferShared.h" +#include "ISPCRTBuffers.h" namespace ispc { diff --git a/modules/cpu/fb/FrameBuffer.cpp b/modules/cpu/fb/FrameBuffer.cpp index f6dc8b3f6..765738946 100644 --- a/modules/cpu/fb/FrameBuffer.cpp +++ b/modules/cpu/fb/FrameBuffer.cpp @@ -81,6 +81,11 @@ void FrameBuffer::commit() imageOpData = getParamDataT("imageOperation"); } +void FrameBuffer::clear() +{ + frameID = -1; // we increment at the start of the frame +} + vec2i FrameBuffer::getRenderTaskSize() const { return getSh()->renderTaskSize; @@ -104,10 +109,13 @@ float FrameBuffer::getVariance() const void FrameBuffer::beginFrame() { cancelRender = false; + frameID++; // TODO: Maybe better as a kernel to avoid USM thrash to host +#ifndef OSPRAY_TARGET_SYCL getSh()->cancelRender = 0; getSh()->numPixelsRendered = 0; - getSh()->frameID++; + getSh()->frameID = frameID; +#endif } std::string FrameBuffer::toString() const @@ -117,12 +125,14 @@ std::string FrameBuffer::toString() const void FrameBuffer::setCompletedEvent(OSPSyncEvent event) { +#ifndef OSPRAY_TARGET_SYCL // We won't be running ISPC-side rendering tasks when updating the // progress values here in C++ if (event == OSP_NONE_FINISHED) getSh()->numPixelsRendered = 0; if (event == OSP_FRAME_FINISHED) getSh()->numPixelsRendered = getNumPixels().long_product(); +#endif stagesCompleted = event; } @@ -241,11 +251,6 @@ uint32 FrameBuffer::getChannelFlags() const return channels; } -int32 FrameBuffer::getFrameID() const -{ - return getSh()->frameID; -} - bool FrameBuffer::hasPrimitiveIDBuf() const { return hasPrimitiveIDBuffer; diff --git a/modules/cpu/fb/FrameBuffer.h b/modules/cpu/fb/FrameBuffer.h index d9a31985d..a082dfa1a 100644 --- a/modules/cpu/fb/FrameBuffer.h +++ b/modules/cpu/fb/FrameBuffer.h @@ -39,7 +39,7 @@ struct OSPRAY_SDK_INTERFACE FrameBuffer virtual void unmap(const void *mappedMem) = 0; // clear (the specified channels of) this frame buffer - virtual void clear() = 0; + virtual void clear(); // Get number of pixels per render task, in x and y direction vec2i getRenderTaskSize() const; @@ -109,6 +109,8 @@ struct OSPRAY_SDK_INTERFACE FrameBuffer const vec2i size; + int32_t frameID{-1}; + // indicates whether the app requested this frame buffer to have // an (application-mappable) depth buffer bool hasDepthBuffer; @@ -138,6 +140,11 @@ struct OSPRAY_SDK_INTERFACE FrameBuffer OSPTYPEFOR_SPECIALIZATION(FrameBuffer *, OSP_FRAMEBUFFER); +inline int32_t FrameBuffer::getFrameID() const +{ + return frameID; +} + inline FeatureFlagsOther FrameBuffer::getFeatureFlagsOther() const { return featureFlags; diff --git a/modules/cpu/fb/FrameBufferShared.h b/modules/cpu/fb/FrameBufferShared.h index dbac127bd..08cc0d927 100644 --- a/modules/cpu/fb/FrameBufferShared.h +++ b/modules/cpu/fb/FrameBufferShared.h @@ -75,6 +75,7 @@ struct FrameBuffer // The default size of each each render task, in pixels vec2i renderTaskSize; + // Not used on GPU to avoid USM thrashing int32 frameID; // The channels stored in the framebuffer @@ -91,6 +92,7 @@ struct FrameBuffer uint32 cancelRender; // The number of pixels rendered this frame, for tracking rendering progress + // Not used on GPU to avoid USM thrashing uint32 numPixelsRendered; #ifdef __cplusplus diff --git a/modules/cpu/fb/LocalFB.cpp b/modules/cpu/fb/LocalFB.cpp index e1ef0032a..640377257 100644 --- a/modules/cpu/fb/LocalFB.cpp +++ b/modules/cpu/fb/LocalFB.cpp @@ -45,6 +45,7 @@ LocalFrameBuffer::LocalFrameBuffer(api::ISPCDevice &device, _colorBufferFormat, channels, FFO_FB_LOCAL), + device(device), numRenderTasks(divRoundUp(size, getRenderTaskSize())), taskErrorRegion(device.getIspcrtContext(), hasVarianceBuffer ? getNumRenderTasks() : vec2i(0)) @@ -53,46 +54,48 @@ LocalFrameBuffer::LocalFrameBuffer(api::ISPCDevice &device, const size_t numPixels = _size.long_product(); if (getColorBufferFormat() != OSP_FB_NONE) { - colorBuffer = make_buffer_shared_unique( - device.getIspcrtContext(), pixelBytes * numPixels); + colorBuffer = make_buffer_device_shadowed_unique( + device.getIspcrtDevice(), pixelBytes * numPixels); } if (hasDepthBuffer) - depthBuffer = - make_buffer_shared_unique(device.getIspcrtContext(), numPixels); + depthBuffer = make_buffer_device_shadowed_unique( + device.getIspcrtDevice(), numPixels); if (hasAccumBuffer) { accumBuffer = - make_buffer_shared_unique(device.getIspcrtContext(), numPixels); + make_buffer_device_unique(device.getIspcrtDevice(), numPixels); - taskAccumID = make_buffer_shared_unique( - device.getIspcrtContext(), getTotalRenderTasks()); + // TODO: Implement fill function in ISPCRT + taskAccumID = make_buffer_device_shadowed_unique( + device.getIspcrtDevice(), getTotalRenderTasks()); std::memset(taskAccumID->data(), 0, taskAccumID->size() * sizeof(int32_t)); + device.getIspcrtQueue().copyToDevice(*taskAccumID); } if (hasVarianceBuffer) varianceBuffer = - make_buffer_shared_unique(device.getIspcrtContext(), numPixels); + make_buffer_device_unique(device.getIspcrtDevice(), numPixels); if (hasNormalBuffer) - normalBuffer = - make_buffer_shared_unique(device.getIspcrtContext(), numPixels); + normalBuffer = make_buffer_device_shadowed_unique( + device.getIspcrtDevice(), numPixels); if (hasAlbedoBuffer) - albedoBuffer = - make_buffer_shared_unique(device.getIspcrtContext(), numPixels); + albedoBuffer = make_buffer_device_shadowed_unique( + device.getIspcrtDevice(), numPixels); if (hasPrimitiveIDBuffer) - primitiveIDBuffer = make_buffer_shared_unique( - device.getIspcrtContext(), numPixels); + primitiveIDBuffer = make_buffer_device_shadowed_unique( + device.getIspcrtDevice(), numPixels); if (hasObjectIDBuffer) - objectIDBuffer = make_buffer_shared_unique( - device.getIspcrtContext(), numPixels); + objectIDBuffer = make_buffer_device_shadowed_unique( + device.getIspcrtDevice(), numPixels); if (hasInstanceIDBuffer) - instanceIDBuffer = make_buffer_shared_unique( - device.getIspcrtContext(), numPixels); + instanceIDBuffer = make_buffer_device_shadowed_unique( + device.getIspcrtDevice(), numPixels); // TODO: Better way to pass the task IDs that doesn't require just storing // them all? Maybe as blocks/tiles similar to when we just had tiles? Will @@ -126,20 +129,22 @@ LocalFrameBuffer::LocalFrameBuffer(api::ISPCDevice &device, ispc::LocalFrameBuffer_completeTask_addr()); #endif - getSh()->colorBuffer = colorBuffer ? colorBuffer->data() : nullptr; - getSh()->depthBuffer = depthBuffer ? depthBuffer->data() : nullptr; - getSh()->accumBuffer = accumBuffer ? accumBuffer->data() : nullptr; - getSh()->varianceBuffer = varianceBuffer ? varianceBuffer->data() : nullptr; - getSh()->normalBuffer = normalBuffer ? normalBuffer->data() : nullptr; - getSh()->albedoBuffer = albedoBuffer ? albedoBuffer->data() : nullptr; - getSh()->taskAccumID = taskAccumID ? taskAccumID->data() : nullptr; + getSh()->colorBuffer = colorBuffer ? colorBuffer->devicePtr() : nullptr; + getSh()->depthBuffer = depthBuffer ? depthBuffer->devicePtr() : nullptr; + getSh()->accumBuffer = accumBuffer ? accumBuffer->devicePtr() : nullptr; + getSh()->varianceBuffer = + varianceBuffer ? varianceBuffer->devicePtr() : nullptr; + getSh()->normalBuffer = normalBuffer ? normalBuffer->devicePtr() : nullptr; + getSh()->albedoBuffer = albedoBuffer ? albedoBuffer->devicePtr() : nullptr; + getSh()->taskAccumID = taskAccumID ? taskAccumID->devicePtr() : nullptr; getSh()->taskRegionError = taskErrorRegion.errorBuffer(); getSh()->numRenderTasks = numRenderTasks; getSh()->primitiveIDBuffer = - primitiveIDBuffer ? primitiveIDBuffer->data() : nullptr; - getSh()->objectIDBuffer = objectIDBuffer ? objectIDBuffer->data() : nullptr; + primitiveIDBuffer ? primitiveIDBuffer->devicePtr() : nullptr; + getSh()->objectIDBuffer = + objectIDBuffer ? objectIDBuffer->devicePtr() : nullptr; getSh()->instanceIDBuffer = - instanceIDBuffer ? instanceIDBuffer->data() : nullptr; + instanceIDBuffer ? instanceIDBuffer->devicePtr() : nullptr; } void LocalFrameBuffer::commit() @@ -193,12 +198,16 @@ std::string LocalFrameBuffer::toString() const void LocalFrameBuffer::clear() { - getSh()->super.frameID = -1; // we increment at the start of the frame + FrameBuffer::clear(); + // it is only necessary to reset the accumID, // LocalFrameBuffer_accumulateTile takes care of clearing the // accumulating buffers if (taskAccumID) { + // TODO: Implement fill function in ISPCRT to do this through level-zero + // on the device std::fill(taskAccumID->begin(), taskAccumID->end(), 0); + device.getIspcrtQueue().copyToDevice(*taskAccumID); } // always also clear error buffer (if present) @@ -346,32 +355,45 @@ void LocalFrameBuffer::endFrame( const void *LocalFrameBuffer::mapBuffer(OSPFrameBufferChannel channel) { - // TODO: Mapping the USM back to the app like this will cause a lot of USM - // thrashing between host/device const void *buf = nullptr; + ispcrt::TaskQueue &tq = device.getIspcrtQueue(); switch (channel) { - case OSP_FB_COLOR: + case OSP_FB_COLOR: { + tq.copyToHost(*colorBuffer); + tq.sync(); buf = colorBuffer ? colorBuffer->data() : nullptr; - break; - case OSP_FB_DEPTH: + } break; + case OSP_FB_DEPTH: { + tq.copyToHost(*depthBuffer); + tq.sync(); buf = depthBuffer ? depthBuffer->data() : nullptr; - break; - case OSP_FB_NORMAL: + } break; + case OSP_FB_NORMAL: { + tq.copyToHost(*normalBuffer); + tq.sync(); buf = normalBuffer ? normalBuffer->data() : nullptr; - break; - case OSP_FB_ALBEDO: + } break; + case OSP_FB_ALBEDO: { + tq.copyToHost(*albedoBuffer); + tq.sync(); buf = albedoBuffer ? albedoBuffer->data() : nullptr; - break; - case OSP_FB_ID_PRIMITIVE: + } break; + case OSP_FB_ID_PRIMITIVE: { + tq.copyToHost(*primitiveIDBuffer); + tq.sync(); buf = primitiveIDBuffer ? primitiveIDBuffer->data() : nullptr; - break; - case OSP_FB_ID_OBJECT: + } break; + case OSP_FB_ID_OBJECT: { + tq.copyToHost(*objectIDBuffer); + tq.sync(); buf = objectIDBuffer ? objectIDBuffer->data() : nullptr; - break; - case OSP_FB_ID_INSTANCE: + } break; + case OSP_FB_ID_INSTANCE: { + tq.copyToHost(*instanceIDBuffer); + tq.sync(); buf = instanceIDBuffer ? instanceIDBuffer->data() : nullptr; - break; + } break; default: break; } diff --git a/modules/cpu/fb/LocalFB.h b/modules/cpu/fb/LocalFB.h index 7b8ff3122..0c3f3ed1a 100644 --- a/modules/cpu/fb/LocalFB.h +++ b/modules/cpu/fb/LocalFB.h @@ -4,7 +4,7 @@ #pragma once // ospray -#include "common/BufferShared.h" +#include "common/ISPCRTBuffers.h" #include "fb/FrameBuffer.h" #include "fb/TaskError.h" // rkcommon @@ -26,7 +26,13 @@ struct OSPRAY_SDK_INTERFACE LocalFrameBuffer ColorBufferFormat colorBufferFormat, const uint32 channels); - virtual ~LocalFrameBuffer() override = default; + virtual ~LocalFrameBuffer() override + { +#ifdef OSPRAY_TARGET_SYCL + device.getSyclQueue().wait_and_throw(); + device.getIspcrtQueue().sync(); +#endif + } virtual void commit() override; @@ -75,33 +81,35 @@ struct OSPRAY_SDK_INTERFACE LocalFrameBuffer // flag was passed on construction // format depends on FrameBuffer::colorBufferFormat - std::unique_ptr> colorBuffer; + std::unique_ptr> colorBuffer; // one float per pixel - std::unique_ptr> depthBuffer; + std::unique_ptr> depthBuffer; // one RGBA per pixel - std::unique_ptr> accumBuffer; + std::unique_ptr> accumBuffer; // one RGBA per pixel, accumulates every other sample, for variance estimation - std::unique_ptr> varianceBuffer; + std::unique_ptr> varianceBuffer; // accumulated world-space normal per pixel - std::unique_ptr> normalBuffer; + std::unique_ptr> normalBuffer; // accumulated, one RGB per pixel - std::unique_ptr> albedoBuffer; + std::unique_ptr> albedoBuffer; // primitive ID, object ID, and instance ID - std::unique_ptr> primitiveIDBuffer; - std::unique_ptr> objectIDBuffer; - std::unique_ptr> instanceIDBuffer; + std::unique_ptr> primitiveIDBuffer; + std::unique_ptr> objectIDBuffer; + std::unique_ptr> instanceIDBuffer; protected: vec2i getTaskStartPos(const uint32_t taskID) const; //// Data //// + api::ISPCDevice &device; + vec2i numRenderTasks; std::unique_ptr> renderTaskIDs; std::unique_ptr> activeTaskIDs; // holds accumID per render task, for adaptive accumulation - std::unique_ptr> taskAccumID; + std::unique_ptr> taskAccumID; // holds error per tile and adaptive regions TaskError taskErrorRegion; diff --git a/modules/cpu/fb/SparseFB.cpp b/modules/cpu/fb/SparseFB.cpp index f6d67a589..be6c96097 100644 --- a/modules/cpu/fb/SparseFB.cpp +++ b/modules/cpu/fb/SparseFB.cpp @@ -190,8 +190,7 @@ void SparseFrameBuffer::unmap(const void *) {} void SparseFrameBuffer::clear() { - // we increment at the start of the frame - getSh()->super.frameID = -1; + FrameBuffer::clear(); // We only need to reset the accumID, SparseFB_accumulateWriteSample will // handle overwriting the image when accumID == 0 diff --git a/modules/cpu/fb/SparseFB.h b/modules/cpu/fb/SparseFB.h index 3d90ca3e7..9b4a29eef 100644 --- a/modules/cpu/fb/SparseFB.h +++ b/modules/cpu/fb/SparseFB.h @@ -10,7 +10,7 @@ #include "rkcommon/utility/ArrayView.h" // ispc shared #include "TileShared.h" -#include "common/BufferShared.h" +#include "common/ISPCRTBuffers.h" #include "fb/SparseFBShared.h" namespace ospray { diff --git a/modules/cpu/fb/TaskError.h b/modules/cpu/fb/TaskError.h index 30f9625b8..c35b22458 100644 --- a/modules/cpu/fb/TaskError.h +++ b/modules/cpu/fb/TaskError.h @@ -4,7 +4,7 @@ #pragma once #include -#include "common/BufferShared.h" +#include "common/ISPCRTBuffers.h" #include "common/OSPCommon.h" #include "rkcommon/containers/AlignedVector.h" diff --git a/modules/cpu/math/Distribution2D.cpp b/modules/cpu/math/Distribution2D.cpp index fdec4e688..5681ce092 100644 --- a/modules/cpu/math/Distribution2D.cpp +++ b/modules/cpu/math/Distribution2D.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 #include "Distribution2D.h" -#include "common/BufferShared.h" +#include "common/ISPCRTBuffers.h" namespace ospray { Distribution2D::Distribution2D(const vec2i &size, api::ISPCDevice &device) diff --git a/modules/cpu/render/LoadBalancer.cpp b/modules/cpu/render/LoadBalancer.cpp index ce54fa878..827b53e13 100644 --- a/modules/cpu/render/LoadBalancer.cpp +++ b/modules/cpu/render/LoadBalancer.cpp @@ -4,7 +4,6 @@ #include "LoadBalancer.h" #include "Renderer.h" #include "api/Device.h" -#include "common/BufferShared.h" #include "common/Group.h" #include "common/Instance.h" #include "rkcommon/tasking/parallel_for.h" diff --git a/modules/cpu/render/bsdfs/MicrofacetAlbedoTables.cpp b/modules/cpu/render/bsdfs/MicrofacetAlbedoTables.cpp index c7ffe6f7d..070f338c0 100644 --- a/modules/cpu/render/bsdfs/MicrofacetAlbedoTables.cpp +++ b/modules/cpu/render/bsdfs/MicrofacetAlbedoTables.cpp @@ -1,5 +1,5 @@ #include "MicrofacetAlbedoTables.h" -#include "common/BufferShared.h" +#include "common/ISPCRTBuffers.h" #include "render/bsdfs/MicrofacetAlbedoTablesShared.h" #ifndef OSPRAY_TARGET_SYCL #include "render/bsdfs/MicrofacetAlbedoTables_ispc.h" From d95b5833f56b20f8796827abef7d8021d1dcf642 Mon Sep 17 00:00:00 2001 From: Miroslaw Pawlowski Date: Wed, 12 Apr 2023 19:35:57 +0200 Subject: [PATCH 21/42] Async GPU rendering using sycl::event --- apps/ospTestSuite/test_fixture.cpp | 8 ++- modules/cpu/ISPCDevice.cpp | 24 +++---- modules/cpu/ISPCDevice.h | 4 +- modules/cpu/render/LoadBalancer.cpp | 59 +++++++-------- modules/cpu/render/LoadBalancer.h | 27 ++++--- modules/cpu/render/RenderTaskSycl.h | 71 +++++++++++++++++++ modules/cpu/render/Renderer.cpp | 5 +- modules/cpu/render/Renderer.h | 20 +++--- modules/cpu/render/ao/AORenderer.cpp | 21 +++--- modules/cpu/render/ao/AORenderer.h | 10 +-- modules/cpu/render/debug/DebugRenderer.cpp | 21 +++--- modules/cpu/render/debug/DebugRenderer.h | 10 +-- modules/cpu/render/pathtracer/PathTracer.cpp | 21 +++--- modules/cpu/render/pathtracer/PathTracer.h | 10 +-- modules/cpu/render/scivis/SciVis.cpp | 21 +++--- modules/cpu/render/scivis/SciVis.h | 10 +-- modules/mpi/ospray/MPIDistributedDevice.cpp | 4 -- .../ospray/render/DistributedLoadBalancer.cpp | 45 +++--------- .../ospray/render/DistributedLoadBalancer.h | 13 +--- .../render/distributed/DistributedRaycast.cpp | 7 +- .../render/distributed/DistributedRaycast.h | 3 +- .../distributed/DistributedRenderer.cpp | 11 +-- .../render/distributed/DistributedRenderer.h | 30 ++++---- 23 files changed, 234 insertions(+), 221 deletions(-) create mode 100644 modules/cpu/render/RenderTaskSycl.h diff --git a/apps/ospTestSuite/test_fixture.cpp b/apps/ospTestSuite/test_fixture.cpp index 9d4521ac5..b467afd74 100644 --- a/apps/ospTestSuite/test_fixture.cpp +++ b/apps/ospTestSuite/test_fixture.cpp @@ -149,8 +149,12 @@ void Base::SetLights() void Base::RenderFrame() { - for (int frame = 0; frame < frames; ++frame) - framebuffer.renderFrame(renderer, camera, world); + for (int frame = 0; frame < frames; ++frame) { + cpp::Future future = framebuffer.renderFrame(renderer, camera, world); + // TODO: Need to wait after every frame or variance + // is incorrectly calculated + future.wait(); + } } FromOsprayTesting::FromOsprayTesting() diff --git a/modules/cpu/ISPCDevice.cpp b/modules/cpu/ISPCDevice.cpp index e03f6b34e..a400b6dee 100644 --- a/modules/cpu/ISPCDevice.cpp +++ b/modules/cpu/ISPCDevice.cpp @@ -21,7 +21,11 @@ #include "lights/Light.h" #include "render/LoadBalancer.h" #include "render/Material.h" +#ifdef OSPRAY_TARGET_SYCL +#include "render/RenderTaskSycl.h" +#else #include "render/RenderTask.h" +#endif #include "render/Renderer.h" #include "texture/Texture.h" #include "texture/Texture2D.h" @@ -286,8 +290,6 @@ void ISPCDevice::commit() syclDevice, reinterpret_cast(ispcrtQueue.nativeTaskQueueHandle()), true); - - loadBalancer->setQueue(&syclQueue); #endif } @@ -357,13 +359,8 @@ void ISPCDevice::commit() #ifndef OSPRAY_TARGET_SYCL // Output device info string - const char *isaNames[] = {"unknown", - "SSE2", - "SSE4", - "AVX", - "AVX2", - "AVX512SKX", - "NEON"}; + const char *isaNames[] = { + "unknown", "SSE2", "SSE4", "AVX", "AVX2", "AVX512SKX", "NEON"}; postStatusMsg(OSP_LOG_INFO) << "Using ISPC device with " << isaNames[ispc::ISPCDevice_isa()] << " instruction set"; @@ -619,6 +616,7 @@ OSPFuture ISPCDevice::renderFrame(OSPFrameBuffer _fb, Camera *camera = (Camera *)_camera; World *world = (World *)_world; +#ifndef OSPRAY_TARGET_SYCL fb->setCompletedEvent(OSP_NONE_FINISHED); fb->refInc(); @@ -626,7 +624,7 @@ OSPFuture ISPCDevice::renderFrame(OSPFrameBuffer _fb, camera->refInc(); world->refInc(); - auto *f = new RenderTask(fb, [=]() { + return (OSPFuture) new RenderTask(fb, [=]() { utility::CodeTimer timer; timer.start(); loadBalancer->renderFrame(fb, renderer, camera, world); @@ -639,8 +637,10 @@ OSPFuture ISPCDevice::renderFrame(OSPFrameBuffer _fb, return timer.seconds(); }); - - return (OSPFuture)f; +#else + return (OSPFuture) new RenderTask( + loadBalancer->renderFrame(fb, renderer, camera, world, false)); +#endif } int ISPCDevice::isReady(OSPFuture _task, OSPSyncEvent event) diff --git a/modules/cpu/ISPCDevice.h b/modules/cpu/ISPCDevice.h index c4dcb9008..afcaf46b3 100644 --- a/modules/cpu/ISPCDevice.h +++ b/modules/cpu/ISPCDevice.h @@ -170,9 +170,9 @@ struct OSPRAY_SDK_INTERFACE ISPCDevice : public Device } #ifdef OSPRAY_TARGET_SYCL - sycl::queue *getSyclQueue() + sycl::queue &getSyclQueue() { - return &syclQueue; + return syclQueue; } #endif diff --git a/modules/cpu/render/LoadBalancer.cpp b/modules/cpu/render/LoadBalancer.cpp index 827b53e13..0e286b41c 100644 --- a/modules/cpu/render/LoadBalancer.cpp +++ b/modules/cpu/render/LoadBalancer.cpp @@ -2,47 +2,50 @@ // SPDX-License-Identifier: Apache-2.0 #include "LoadBalancer.h" -#include "Renderer.h" -#include "api/Device.h" -#include "common/Group.h" -#include "common/Instance.h" -#include "rkcommon/tasking/parallel_for.h" - +#include "camera/Camera.h" +#include "common/World.h" +#include "fb/FrameBuffer.h" +// ispc shared #include "fb/LocalFBShared.h" +#ifndef OSPRAY_TARGET_SYCL +#include "render/RenderTask.h" +#include "rkcommon/utility/CodeTimer.h" +#else +#include "render/RenderTaskSycl.h" +#endif + namespace ospray { -void LocalTiledLoadBalancer::renderFrame( - FrameBuffer *fb, Renderer *renderer, Camera *camera, World *world) +Renderer::Event LocalTiledLoadBalancer::renderFrame(FrameBuffer *fb, + Renderer *renderer, + Camera *camera, + World *world, + bool wait) { fb->beginFrame(); void *perFrameData = renderer->beginFrame(fb, world); - renderer->renderTasks(fb, + Renderer::Event event = renderer->renderTasks(fb, camera, world, perFrameData, - fb->getRenderTaskIDs(renderer->errorThreshold) -#ifdef OSPRAY_TARGET_SYCL - , - *syclQueue -#endif - ); - - renderer->endFrame(fb, perFrameData); - - fb->setCompletedEvent(OSP_WORLD_RENDERED); - fb->endFrame(renderer->errorThreshold, camera); - fb->setCompletedEvent(OSP_FRAME_FINISHED); + fb->getRenderTaskIDs(renderer->errorThreshold), + wait); + + // No renderer->endFrame() and fb->endFrame() on GPU. + // Frame post-processing need to be done as a separate + // kernel submitted to the main compute queue. + if (wait) { + renderer->endFrame(fb, perFrameData); + fb->setCompletedEvent(OSP_WORLD_RENDERED); + + fb->endFrame(renderer->errorThreshold, camera); + fb->setCompletedEvent(OSP_FRAME_FINISHED); + } + return event; } -#ifdef OSPRAY_TARGET_SYCL -void LocalTiledLoadBalancer::setQueue(sycl::queue *sq) -{ - syclQueue = sq; -} -#endif - std::string LocalTiledLoadBalancer::toString() const { return "ospray::LocalTiledLoadBalancer"; diff --git a/modules/cpu/render/LoadBalancer.h b/modules/cpu/render/LoadBalancer.h index 4c6b339e9..84bccac35 100644 --- a/modules/cpu/render/LoadBalancer.h +++ b/modules/cpu/render/LoadBalancer.h @@ -3,15 +3,15 @@ #pragma once -#include "camera/Camera.h" #include "common/OSPCommon.h" -#include "common/World.h" -#include "fb/FrameBuffer.h" #include "render/Renderer.h" -#include "rkcommon/utility/ArrayView.h" namespace ospray { +struct FrameBuffer; +struct Camera; +struct World; + struct OSPRAY_SDK_INTERFACE TiledLoadBalancer { virtual ~TiledLoadBalancer() = default; @@ -21,8 +21,11 @@ struct OSPRAY_SDK_INTERFACE TiledLoadBalancer /*! Render the entire framebuffer using the given renderer, camera and * world configuration using the load balancer to parallelize the work */ - virtual void renderFrame( - FrameBuffer *fb, Renderer *renderer, Camera *camera, World *world) = 0; + virtual Renderer::Event renderFrame(FrameBuffer *fb, + Renderer *renderer, + Camera *camera, + World *world, + bool wait = true) = 0; }; // Inlined definitions ////////////////////////////////////////////////////// @@ -34,19 +37,13 @@ struct OSPRAY_SDK_INTERFACE TiledLoadBalancer application ranks each doing local rendering on their own) */ struct OSPRAY_SDK_INTERFACE LocalTiledLoadBalancer : public TiledLoadBalancer { - void renderFrame(FrameBuffer *fb, + Renderer::Event renderFrame(FrameBuffer *fb, Renderer *renderer, Camera *camera, - World *world) override; + World *world, + bool wait = true) override; std::string toString() const override; - -#ifdef OSPRAY_TARGET_SYCL - void setQueue(sycl::queue *syclQueue); - - private: - sycl::queue *syclQueue = nullptr; -#endif }; } // namespace ospray diff --git a/modules/cpu/render/RenderTaskSycl.h b/modules/cpu/render/RenderTaskSycl.h new file mode 100644 index 000000000..7f6398042 --- /dev/null +++ b/modules/cpu/render/RenderTaskSycl.h @@ -0,0 +1,71 @@ +// Copyright 2009 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +// ospray +#include "../common/Future.h" + +namespace ospray { + +struct RenderTask : public Future +{ + RenderTask(sycl::event); + ~RenderTask() override = default; + + bool isFinished(OSPSyncEvent event = OSP_TASK_FINISHED) override; + void wait(OSPSyncEvent event = OSP_TASK_FINISHED) override; + void cancel() override; + float getProgress() override; + float getTaskDuration() override; + + private: + sycl::event syclEvent; +}; + +// Inlined definitions ////////////////////////////////////////////////////// + +inline RenderTask::RenderTask(sycl::event syclEvent) : syclEvent(syclEvent) {} + +inline bool RenderTask::isFinished(OSPSyncEvent event) +{ + (void)event; + syclEvent.wait_and_throw(); + return true; + + // The proper way of checking is commented out because it degrades + // performance by a factor of 3. Analysis shows that sharing GPU + // between SYCL kernel and OpenGL Draws/SwapBuffers is highly ineffective. + // Need to first finish SYCL kernel then render FB and GUI. + // return (syclEvent.get_info() + // == sycl::info::event_command_status::complete); +} + +inline void RenderTask::wait(OSPSyncEvent event) +{ + (void)event; + syclEvent.wait_and_throw(); +} + +inline void RenderTask::cancel() +{ + // No task canceling on GPU for now +} + +inline float RenderTask::getProgress() +{ + // No progress reported on GPU for now + return 0.f; +} + +inline float RenderTask::getTaskDuration() +{ + const auto t0 = + syclEvent + .get_profiling_info(); + const auto t1 = + syclEvent.get_profiling_info(); + return (t1 - t0) * 1E-9; +} + +} // namespace ospray diff --git a/modules/cpu/render/Renderer.cpp b/modules/cpu/render/Renderer.cpp index f6bfba5c6..58830c4ed 100644 --- a/modules/cpu/render/Renderer.cpp +++ b/modules/cpu/render/Renderer.cpp @@ -5,7 +5,10 @@ #include "Renderer.h" #include "LoadBalancer.h" #include "Material.h" +#include "camera/Camera.h" #include "common/Instance.h" +#include "common/World.h" +#include "fb/FrameBuffer.h" #include "geometry/GeometricModel.h" #include "ospray/OSPEnums.h" #include "pf/PixelFilter.h" @@ -24,7 +27,7 @@ namespace ospray { // Renderer definitions /////////////////////////////////////////////////////// Renderer::Renderer(api::ISPCDevice &device) - : AddStructShared(device.getIspcrtContext(), device) + : AddStructShared(device.getIspcrtContext(), device), device(device) { managedObjectType = OSP_RENDERER; pixelFilter = nullptr; diff --git a/modules/cpu/render/Renderer.h b/modules/cpu/render/Renderer.h index 641a56367..4866fbdad 100644 --- a/modules/cpu/render/Renderer.h +++ b/modules/cpu/render/Renderer.h @@ -29,6 +29,14 @@ struct OSPRAY_SDK_INTERFACE Renderer : public AddStructShared, public ObjectFactory { +#ifdef OSPRAY_TARGET_SYCL + using Event = sycl::event; +#else + struct Event + { + }; +#endif + Renderer(api::ISPCDevice &device); virtual ~Renderer() override = default; @@ -52,17 +60,12 @@ struct OSPRAY_SDK_INTERFACE Renderer virtual void endFrame(FrameBuffer *fb, void *perFrameData); // called by the load balancer to render one "sample" for each task - virtual void renderTasks(FrameBuffer *, + virtual Event renderTasks(FrameBuffer *, Camera *, World *, void *, - const utility::ArrayView & -#ifdef OSPRAY_TARGET_SYCL - , - sycl::queue & -#endif - ) const - {} + const utility::ArrayView &, + bool wait = true) const = 0; #ifdef OSPRAY_TARGET_SYCL /* Compute the rounded dispatch global size for the given work group size. @@ -98,6 +101,7 @@ struct OSPRAY_SDK_INTERFACE Renderer protected: FeatureFlagsOther featureFlags{FFO_NONE}; + api::ISPCDevice &device; private: void setupPixelFilter(); diff --git a/modules/cpu/render/ao/AORenderer.cpp b/modules/cpu/render/ao/AORenderer.cpp index 39d2d8cb3..e36f7d5f8 100644 --- a/modules/cpu/render/ao/AORenderer.cpp +++ b/modules/cpu/render/ao/AORenderer.cpp @@ -36,17 +36,14 @@ void AORenderer::commit() getSh()->volumeSamplingRate = getParam("volumeSamplingRate", 1.f); } -void AORenderer::renderTasks(FrameBuffer *fb, +Renderer::Event AORenderer::renderTasks(FrameBuffer *fb, Camera *camera, World *world, void *, - const utility::ArrayView &taskIDs -#ifdef OSPRAY_TARGET_SYCL - , - sycl::queue &syclQueue -#endif -) const + const utility::ArrayView &taskIDs, + bool wait) const { + Event event; auto *rendererSh = getSh(); auto *fbSh = fb->getSh(); auto *cameraSh = camera->getSh(); @@ -55,7 +52,7 @@ void AORenderer::renderTasks(FrameBuffer *fb, #ifdef OSPRAY_TARGET_SYCL const uint32_t *taskIDsPtr = taskIDs.data(); - auto event = syclQueue.submit([&](sycl::handler &cgh) { + event = device.getSyclQueue().submit([&](sycl::handler &cgh) { FeatureFlags ff = world->getFeatureFlags(); ff.other |= featureFlags; ff.other |= fb->getFeatureFlagsOther(); @@ -78,13 +75,15 @@ void AORenderer::renderTasks(FrameBuffer *fb, } }); }); - event.wait_and_throw(); - // For prints we have to flush the entire queue, because other stuff is queued - syclQueue.wait_and_throw(); + + if (wait) + event.wait_and_throw(); #else + (void)wait; ispc::AORenderer_renderTasks( &rendererSh->super, fbSh, cameraSh, worldSh, taskIDs.data(), numTasks); #endif + return event; } } // namespace ospray diff --git a/modules/cpu/render/ao/AORenderer.h b/modules/cpu/render/ao/AORenderer.h index c59be9a72..e35e9fb13 100644 --- a/modules/cpu/render/ao/AORenderer.h +++ b/modules/cpu/render/ao/AORenderer.h @@ -14,16 +14,12 @@ struct AORenderer : public AddStructShared std::string toString() const override; void commit() override; - virtual void renderTasks(FrameBuffer *fb, + virtual Event renderTasks(FrameBuffer *fb, Camera *camera, World *world, void *perFrameData, - const utility::ArrayView &taskIDs -#ifdef OSPRAY_TARGET_SYCL - , - sycl::queue &syclQueue -#endif - ) const override; + const utility::ArrayView &taskIDs, + bool wait) const override; private: int aoSamples{1}; diff --git a/modules/cpu/render/debug/DebugRenderer.cpp b/modules/cpu/render/debug/DebugRenderer.cpp index 502210488..e54c21259 100644 --- a/modules/cpu/render/debug/DebugRenderer.cpp +++ b/modules/cpu/render/debug/DebugRenderer.cpp @@ -73,17 +73,14 @@ void DebugRenderer::commit() getSh()->type = typeFromString(method); } -void DebugRenderer::renderTasks(FrameBuffer *fb, +Renderer::Event DebugRenderer::renderTasks(FrameBuffer *fb, Camera *camera, World *world, void *, - const utility::ArrayView &taskIDs -#ifdef OSPRAY_TARGET_SYCL - , - sycl::queue &syclQueue -#endif -) const + const utility::ArrayView &taskIDs, + bool wait) const { + Event event; auto *rendererSh = getSh(); auto *fbSh = fb->getSh(); auto *cameraSh = camera->getSh(); @@ -92,7 +89,7 @@ void DebugRenderer::renderTasks(FrameBuffer *fb, #ifdef OSPRAY_TARGET_SYCL const uint32_t *taskIDsPtr = taskIDs.data(); - auto event = syclQueue.submit([&](sycl::handler &cgh) { + event = device.getSyclQueue().submit([&](sycl::handler &cgh) { FeatureFlags ff = world->getFeatureFlags(); ff.other |= featureFlags; ff.other |= fb->getFeatureFlagsOther(); @@ -115,13 +112,15 @@ void DebugRenderer::renderTasks(FrameBuffer *fb, } }); }); - event.wait_and_throw(); - // For prints we have to flush the entire queue, because other stuff is queued - syclQueue.wait_and_throw(); + + if (wait) + event.wait_and_throw(); #else + (void)wait; ispc::DebugRenderer_renderTasks( &rendererSh->super, fbSh, cameraSh, worldSh, taskIDs.data(), numTasks); #endif + return event; } } // namespace ospray diff --git a/modules/cpu/render/debug/DebugRenderer.h b/modules/cpu/render/debug/DebugRenderer.h index b89d2df00..d0df542ef 100644 --- a/modules/cpu/render/debug/DebugRenderer.h +++ b/modules/cpu/render/debug/DebugRenderer.h @@ -20,16 +20,12 @@ struct DebugRenderer : public AddStructShared void commit() override; - virtual void renderTasks(FrameBuffer *fb, + virtual Event renderTasks(FrameBuffer *fb, Camera *camera, World *world, void *perFrameData, - const utility::ArrayView &taskIDs -#ifdef OSPRAY_TARGET_SYCL - , - sycl::queue &syclQueue -#endif - ) const override; + const utility::ArrayView &taskIDs, + bool wait) const override; }; } // namespace ospray diff --git a/modules/cpu/render/pathtracer/PathTracer.cpp b/modules/cpu/render/pathtracer/PathTracer.cpp index 828891b1c..fce00f58f 100644 --- a/modules/cpu/render/pathtracer/PathTracer.cpp +++ b/modules/cpu/render/pathtracer/PathTracer.cpp @@ -87,17 +87,14 @@ void *PathTracer::beginFrame(FrameBuffer *, World *world) return nullptr; } -void PathTracer::renderTasks(FrameBuffer *fb, +Renderer::Event PathTracer::renderTasks(FrameBuffer *fb, Camera *camera, World *world, void *, - const utility::ArrayView &taskIDs -#ifdef OSPRAY_TARGET_SYCL - , - sycl::queue &syclQueue -#endif -) const + const utility::ArrayView &taskIDs, + bool wait) const { + Event event; auto *rendererSh = getSh(); auto *fbSh = fb->getSh(); auto *cameraSh = camera->getSh(); @@ -106,7 +103,7 @@ void PathTracer::renderTasks(FrameBuffer *fb, #ifdef OSPRAY_TARGET_SYCL const uint32_t *taskIDsPtr = taskIDs.data(); - auto event = syclQueue.submit([&](sycl::handler &cgh) { + event = device.getSyclQueue().submit([&](sycl::handler &cgh) { FeatureFlags ff = world->getFeatureFlags(); ff.other |= featureFlags; ff.other |= fb->getFeatureFlagsOther(); @@ -129,13 +126,15 @@ void PathTracer::renderTasks(FrameBuffer *fb, } }); }); - event.wait_and_throw(); - // For prints we have to flush the entire queue, because other stuff is queued - syclQueue.wait_and_throw(); + + if (wait) + event.wait_and_throw(); #else + (void)wait; ispc::PathTracer_renderTasks( &rendererSh->super, fbSh, cameraSh, worldSh, taskIDs.data(), numTasks); #endif + return event; } } // namespace ospray diff --git a/modules/cpu/render/pathtracer/PathTracer.h b/modules/cpu/render/pathtracer/PathTracer.h index ebfb7eddb..a226abeb6 100644 --- a/modules/cpu/render/pathtracer/PathTracer.h +++ b/modules/cpu/render/pathtracer/PathTracer.h @@ -20,16 +20,12 @@ struct PathTracer : public AddStructShared virtual void commit() override; virtual void *beginFrame(FrameBuffer *, World *) override; - virtual void renderTasks(FrameBuffer *fb, + virtual Event renderTasks(FrameBuffer *fb, Camera *camera, World *world, void *perFrameData, - const utility::ArrayView &taskIDs -#ifdef OSPRAY_TARGET_SYCL - , - sycl::queue &syclQueue -#endif - ) const override; + const utility::ArrayView &taskIDs, + bool wait) const override; private: bool importanceSampleGeometryLights{ diff --git a/modules/cpu/render/scivis/SciVis.cpp b/modules/cpu/render/scivis/SciVis.cpp index 144981882..5c11189f9 100644 --- a/modules/cpu/render/scivis/SciVis.cpp +++ b/modules/cpu/render/scivis/SciVis.cpp @@ -63,17 +63,14 @@ void *SciVis::beginFrame(FrameBuffer *, World *world) return nullptr; } -void SciVis::renderTasks(FrameBuffer *fb, +Renderer::Event SciVis::renderTasks(FrameBuffer *fb, Camera *camera, World *world, void *, - const utility::ArrayView &taskIDs -#ifdef OSPRAY_TARGET_SYCL - , - sycl::queue &syclQueue -#endif -) const + const utility::ArrayView &taskIDs, + bool wait) const { + Event event; auto *rendererSh = getSh(); auto *fbSh = fb->getSh(); auto *cameraSh = camera->getSh(); @@ -82,7 +79,7 @@ void SciVis::renderTasks(FrameBuffer *fb, #ifdef OSPRAY_TARGET_SYCL const uint32_t *taskIDsPtr = taskIDs.data(); - auto event = syclQueue.submit([&](sycl::handler &cgh) { + event = device.getSyclQueue().submit([&](sycl::handler &cgh) { FeatureFlags ff = world->getFeatureFlags(); ff.other |= featureFlags; ff.other |= fb->getFeatureFlagsOther(); @@ -105,13 +102,15 @@ void SciVis::renderTasks(FrameBuffer *fb, } }); }); - event.wait_and_throw(); - // For prints we have to flush the entire queue, because other stuff is queued - syclQueue.wait_and_throw(); + + if (wait) + event.wait_and_throw(); #else + (void)wait; ispc::SciVis_renderTasks( &rendererSh->super, fbSh, cameraSh, worldSh, taskIDs.data(), numTasks); #endif + return event; } } // namespace ospray diff --git a/modules/cpu/render/scivis/SciVis.h b/modules/cpu/render/scivis/SciVis.h index 176f7d13f..74a8afbd9 100644 --- a/modules/cpu/render/scivis/SciVis.h +++ b/modules/cpu/render/scivis/SciVis.h @@ -15,16 +15,12 @@ struct SciVis : public AddStructShared void commit() override; void *beginFrame(FrameBuffer *, World *) override; - virtual void renderTasks(FrameBuffer *fb, + virtual Event renderTasks(FrameBuffer *fb, Camera *camera, World *world, void *perFrameData, - const utility::ArrayView &taskIDs -#ifdef OSPRAY_TARGET_SYCL - , - sycl::queue &syclQueue -#endif - ) const override; + const utility::ArrayView &taskIDs, + bool wait) const override; }; } // namespace ospray diff --git a/modules/mpi/ospray/MPIDistributedDevice.cpp b/modules/mpi/ospray/MPIDistributedDevice.cpp index 2051fd01d..9cc1376ac 100644 --- a/modules/mpi/ospray/MPIDistributedDevice.cpp +++ b/modules/mpi/ospray/MPIDistributedDevice.cpp @@ -405,10 +405,6 @@ OSPFuture MPIDistributedDevice::renderFrame(OSPFrameBuffer _fb, auto loadBalancer = std::make_shared(allocateHandle()); -#ifdef OSPRAY_TARGET_SYCL - auto ispcDevice = std::dynamic_pointer_cast(internalDevice); - loadBalancer->setQueue(ispcDevice->getSyclQueue()); -#endif fb->setCompletedEvent(OSP_NONE_FINISHED); diff --git a/modules/mpi/ospray/render/DistributedLoadBalancer.cpp b/modules/mpi/ospray/render/DistributedLoadBalancer.cpp index 486a6699c..65f63f30a 100644 --- a/modules/mpi/ospray/render/DistributedLoadBalancer.cpp +++ b/modules/mpi/ospray/render/DistributedLoadBalancer.cpp @@ -32,8 +32,11 @@ DistributedLoadBalancer::~DistributedLoadBalancer() handle.free(); } -void DistributedLoadBalancer::renderFrame( - FrameBuffer *_fb, Renderer *_renderer, Camera *camera, World *_world) +Renderer::Event DistributedLoadBalancer::renderFrame(FrameBuffer *_fb, + Renderer *_renderer, + Camera *camera, + World *_world, + bool wait) { auto *dfb = dynamic_cast(_fb); @@ -47,7 +50,7 @@ void DistributedLoadBalancer::renderFrame( if (!renderer) { if (world->allRegions.size() == 1) { renderFrameReplicated(dfb, _renderer, camera, world); - return; + return Renderer::Event(); } else { throw std::runtime_error( "Distributed rendering requires a distributed renderer!"); @@ -177,12 +180,7 @@ void DistributedLoadBalancer::renderFrame( world, regionVisible.sharedPtr(), perFrameData, - renderTaskIDs -#ifdef OSPRAY_TARGET_SYCL - , - *syclQueue -#endif - ); + renderTaskIDs); // If we're rendering the background tiles send them over now if (layer == 0) { @@ -249,12 +247,7 @@ void DistributedLoadBalancer::renderFrame( world->allRegions[rid], perFrameData, utility::ArrayView( - activeTasksShared.data(), activeTasksShared.size()) -#ifdef OSPRAY_TARGET_SYCL - , - *syclQueue -#endif - ); + activeTasksShared.data(), activeTasksShared.size())); tasking::parallel_for(tiles.size(), [&](size_t i) { if (!regionVisible[numRegions * i + rid] @@ -277,6 +270,7 @@ void DistributedLoadBalancer::renderFrame( renderer->endFrame(dfb, perFrameData); dfb->endFrame(renderer->errorThreshold, camera); + return Renderer::Event(); } void DistributedLoadBalancer::renderFrameReplicated(DistributedFrameBuffer *dfb, @@ -347,13 +341,6 @@ std::string DistributedLoadBalancer::toString() const return "ospray::mpi::Distributed"; } -#ifdef OSPRAY_TARGET_SYCL -void DistributedLoadBalancer::setQueue(sycl::queue *sq) -{ - syclQueue = sq; -} -#endif - void DistributedLoadBalancer::renderFrameReplicatedDynamicLB( DistributedFrameBuffer *dfb, Renderer *renderer, @@ -455,12 +442,7 @@ void DistributedLoadBalancer::renderFrameReplicatedDynamicLB( camera, world, perFrameData, - sparseFb->getRenderTaskIDs(renderer->errorThreshold) -#ifdef OSPRAY_TARGET_SYCL - , - *syclQueue -#endif - ); + sparseFb->getRenderTaskIDs(renderer->errorThreshold)); // TODO: Now the tile setting happens as a bulk-sync operation after // rendering, because we still need to send them through the compositing @@ -521,12 +503,7 @@ void DistributedLoadBalancer::renderFrameReplicatedStaticLB( camera, world, perFrameData, - ownedTilesFb->getRenderTaskIDs(renderer->errorThreshold) -#ifdef OSPRAY_TARGET_SYCL - , - *syclQueue -#endif - ); + ownedTilesFb->getRenderTaskIDs(renderer->errorThreshold)); // TODO: Now the tile setting happens as a bulk-sync operation after // rendering, because we still need to send them through the compositing diff --git a/modules/mpi/ospray/render/DistributedLoadBalancer.h b/modules/mpi/ospray/render/DistributedLoadBalancer.h index 872db7522..b2d0147c2 100644 --- a/modules/mpi/ospray/render/DistributedLoadBalancer.h +++ b/modules/mpi/ospray/render/DistributedLoadBalancer.h @@ -30,10 +30,11 @@ struct DistributedLoadBalancer : public TiledLoadBalancer ~DistributedLoadBalancer() override; - void renderFrame(FrameBuffer *fb, + Renderer::Event renderFrame(FrameBuffer *fb, Renderer *renderer, Camera *camera, - World *world) override; + World *world, + bool wait = true) override; void renderFrameReplicated(DistributedFrameBuffer *dfb, Renderer *renderer, @@ -42,10 +43,6 @@ struct DistributedLoadBalancer : public TiledLoadBalancer std::string toString() const override; -#ifdef OSPRAY_TARGET_SYCL - void setQueue(sycl::queue *syclQueue); -#endif - private: void renderFrameReplicatedDynamicLB(DistributedFrameBuffer *dfb, Renderer *renderer, @@ -60,10 +57,6 @@ struct DistributedLoadBalancer : public TiledLoadBalancer void *perFrameData); ObjectHandle handle; - -#ifdef OSPRAY_TARGET_SYCL - sycl::queue *syclQueue = nullptr; -#endif }; } // namespace mpi diff --git a/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp b/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp index 90158e106..3a16a60b0 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp +++ b/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp @@ -109,8 +109,7 @@ void DistributedRaycastRenderer::renderRegionTasks(SparseFrameBuffer *fb, DistributedWorld *world, const box3f ®ion, void *perFrameData, - const utility::ArrayView &taskIDs, - sycl::queue &syclQueue) const + const utility::ArrayView &taskIDs) const { auto *rendererSh = getSh(); auto *fbSh = fb->getSh(); @@ -119,7 +118,7 @@ void DistributedRaycastRenderer::renderRegionTasks(SparseFrameBuffer *fb, const uint32_t *taskIDsPtr = taskIDs.data(); const size_t numTasks = taskIDs.size(); - auto event = syclQueue.submit([&](sycl::handler &cgh) { + auto event = device.getSyclQueue().submit([&](sycl::handler &cgh) { FeatureFlags ff = world->getFeatureFlags(); ff.other |= featureFlags; ff.other |= fb->getFeatureFlagsOther(); @@ -146,8 +145,6 @@ void DistributedRaycastRenderer::renderRegionTasks(SparseFrameBuffer *fb, }); }); event.wait_and_throw(); - // For prints we have to flush the entire queue, because other stuff is queued - syclQueue.wait_and_throw(); } #endif diff --git a/modules/mpi/ospray/render/distributed/DistributedRaycast.h b/modules/mpi/ospray/render/distributed/DistributedRaycast.h index aee6a1fda..8292e7f8e 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRaycast.h +++ b/modules/mpi/ospray/render/distributed/DistributedRaycast.h @@ -49,8 +49,7 @@ struct DistributedRaycastRenderer : public AddStructShared &taskIDs, - sycl::queue &syclQueue) const override; + const utility::ArrayView &taskIDs) const override; #endif private: diff --git a/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp b/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp index 13fb3cf75..31aa76f14 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp +++ b/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp @@ -41,12 +41,7 @@ void DistributedRenderer::computeRegionVisibility(SparseFrameBuffer *fb, DistributedWorld *world, uint8_t *regionVisible, void *perFrameData, - const utility::ArrayView &taskIDs -#ifdef OSPRAY_TARGET_SYCL - , - sycl::queue &syclQueue -#endif -) const + const utility::ArrayView &taskIDs) const { #ifndef OSPRAY_TARGET_SYCL ispc::DistributedRenderer_computeRegionVisibility(getSh(), @@ -65,7 +60,7 @@ void DistributedRenderer::computeRegionVisibility(SparseFrameBuffer *fb, const uint32_t *taskIDsPtr = taskIDs.data(); const size_t numTasks = taskIDs.size(); - auto event = syclQueue.submit([&](sycl::handler &cgh) { + auto event = device.getSyclQueue().submit([&](sycl::handler &cgh) { FeatureFlags ff = world->getFeatureFlags(); ff.other |= featureFlags; ff.other |= fb->getFeatureFlagsOther(); @@ -91,8 +86,6 @@ void DistributedRenderer::computeRegionVisibility(SparseFrameBuffer *fb, }); }); event.wait_and_throw(); - // For prints we have to flush the entire queue, because other stuff is queued - syclQueue.wait_and_throw(); #endif } diff --git a/modules/mpi/ospray/render/distributed/DistributedRenderer.h b/modules/mpi/ospray/render/distributed/DistributedRenderer.h index f0c625670..2e7de93d5 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRenderer.h +++ b/modules/mpi/ospray/render/distributed/DistributedRenderer.h @@ -32,12 +32,18 @@ struct DistributedRenderer : public AddStructShared DistributedWorld *world, uint8_t *regionVisible, void *perFrameData, - const utility::ArrayView &taskIDs -#ifdef OSPRAY_TARGET_SYCL - , - sycl::queue &syclQueue -#endif - ) const; + const utility::ArrayView &taskIDs) const; + + // Not used by distributed renderers + Event renderTasks(FrameBuffer *fb, + Camera *camera, + World *world, + void *perFrameData, + const utility::ArrayView &taskIDs, + bool wait) const override + { + return Event(); + } #ifndef OSPRAY_TARGET_SYCL virtual void renderRegionTasks(SparseFrameBuffer *fb, @@ -47,22 +53,12 @@ struct DistributedRenderer : public AddStructShared void *perFrameData, const utility::ArrayView &taskIDs) const = 0; #else - // Not used by distributed renderers - void renderTasks(FrameBuffer *fb, - Camera *camera, - World *world, - void *perFrameData, - const utility::ArrayView &taskIDs, - sycl::queue &syclQueue) const override - {} - virtual void renderRegionTasks(SparseFrameBuffer *fb, Camera *camera, DistributedWorld *world, const box3f ®ion, void *perFrameData, - const utility::ArrayView &taskIDs, - sycl::queue &syclQueue) const = 0; + const utility::ArrayView &taskIDs) const = 0; #endif virtual std::shared_ptr tileOperation() = 0; From 432e8c7bbf9b9c87ff3e4cb3d48538cffc085e03 Mon Sep 17 00:00:00 2001 From: Will Usher Date: Thu, 20 Apr 2023 10:56:02 -0700 Subject: [PATCH 22/42] Make syclQueue with profiling enabled --- modules/cpu/ISPCDevice.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/modules/cpu/ISPCDevice.cpp b/modules/cpu/ISPCDevice.cpp index a400b6dee..b0eb5ac43 100644 --- a/modules/cpu/ISPCDevice.cpp +++ b/modules/cpu/ISPCDevice.cpp @@ -286,10 +286,8 @@ void ISPCDevice::commit() reinterpret_cast(ispcrtDevice.nativeContextHandle()), true); - syclQueue = sycl::ext::oneapi::level_zero::make_queue(syclContext, - syclDevice, - reinterpret_cast(ispcrtQueue.nativeTaskQueueHandle()), - true); + syclQueue = sycl::queue( + syclContext, syclDevice, {sycl::property::queue::enable_profiling()}); #endif } From 13527b957f79c8b6611e3ba26465e6bbe9a78eed Mon Sep 17 00:00:00 2001 From: Will Usher Date: Thu, 27 Apr 2023 09:59:25 -0700 Subject: [PATCH 23/42] Fix frame ID reset when clearing DFB --- modules/mpi/ospray/fb/DistributedFrameBuffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp b/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp index 95fc85cc8..5a19291d5 100644 --- a/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp +++ b/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp @@ -912,7 +912,7 @@ void DFB::setTile(const ispc::Tile &tile) void DFB::clear() { - getSh()->frameID = -1; // we increment at the start of the frame + FrameBuffer::clear(); if (hasAccumBuffer) { tileErrorRegion.clear(); From 8668f1827073d239e4334a55a7bef8cee5fc524a Mon Sep 17 00:00:00 2001 From: Will Usher Date: Fri, 5 May 2023 16:16:01 -0700 Subject: [PATCH 24/42] Base default maxTilesPerRound on the # of tiles assigned to the rank We default to 0.33 (1/3) of tiles per-round and allow users to configure this with the OSPRAY_MPI_LB_TILES_PER_ROUND variable which takes values in [0-1] The percentage of tiles active for work stealing to occur can also be tuned with OSPRAY_MPI_LIB_MIN_ACTIVE_TILES --- .../ospray/render/DistributedLoadBalancer.cpp | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/modules/mpi/ospray/render/DistributedLoadBalancer.cpp b/modules/mpi/ospray/render/DistributedLoadBalancer.cpp index 65f63f30a..1d10e1e80 100644 --- a/modules/mpi/ospray/render/DistributedLoadBalancer.cpp +++ b/modules/mpi/ospray/render/DistributedLoadBalancer.cpp @@ -361,11 +361,28 @@ void DistributedLoadBalancer::renderFrameReplicatedDynamicLB( // Do not pass all tiles at once, this way if other ranks want to steal // work, they can - const int maxTilesPerRound = 20; - const int numRounds = std::max(NTILES / maxTilesPerRound, 1); - const int tilesPerRound = NTILES / numRounds; - const int remainTiles = NTILES % numRounds; - const int minActiveTiles = (ALLTILES / workerSize()) * 0.25f; + // We target 1/3rd of tiles per-round by default to balance between executing + // work locally and allowing work to be stolen for load balancing. + // This can be overriden by the environment variable + // OSPRAY_MPI_LB_TILES_PER_ROUND + const float percentTilesPerRound = + utility::getEnvVar("OSPRAY_MPI_LB_TILES_PER_ROUND") + .value_or(0.33f); + const int maxTilesPerRound = std::ceil(NTILES * percentTilesPerRound); + const float minActiveTilesPercent = + utility::getEnvVar("OSPRAY_MPI_LB_MIN_ACTIVE_TILES") + .value_or(0.25f); + const int minActiveTiles = (ALLTILES / workerSize()) * minActiveTilesPercent; + + // Avoid division by 0 for the case that this rank doesn't have any tiles + int numRounds = 0; + int tilesPerRound = 0; + int remainTiles = 0; + if (NTILES > 0) { + numRounds = std::max(NTILES / maxTilesPerRound, 1); + tilesPerRound = NTILES / numRounds; + remainTiles = NTILES % numRounds; + } int terminatedTiles = 0; auto dynamicLB = make_unique(handle, ALLTILES); From 9a805990c0767dc5eb6835e9e3167f840faa5ca0 Mon Sep 17 00:00:00 2001 From: Moushumi Maria Date: Wed, 10 May 2023 23:03:05 -0700 Subject: [PATCH 25/42] Add MPI ON to build-arch-clang-extras build job --- .github/workflows/ci.linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.linux.yml b/.github/workflows/ci.linux.yml index bdac10127..a5372fd3a 100644 --- a/.github/workflows/ci.linux.yml +++ b/.github/workflows/ci.linux.yml @@ -35,7 +35,7 @@ jobs: cmd: | export CC=clang export CXX=clang++ - scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_CI_EXTRAS=ON -DBUILD_OIDN=ON + scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_CI_EXTRAS=ON -DBUILD_OIDN=ON -DBUILD_OSPRAY_MODULE_MPI=ON -DBUILD_OSPRAY_MODULE_MULTIDEVICE=ON build-arch-gcc: uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main From 14ae46b13fda67c9bf1d2ffe3acdb6e3d95bb680 Mon Sep 17 00:00:00 2001 From: Moushumi Maria Date: Wed, 10 May 2023 23:11:31 -0700 Subject: [PATCH 26/42] Fix unused parameter errors --- .../mpi/ospray/render/DistributedLoadBalancer.cpp | 2 +- .../ospray/render/distributed/DistributedRenderer.h | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/mpi/ospray/render/DistributedLoadBalancer.cpp b/modules/mpi/ospray/render/DistributedLoadBalancer.cpp index 1d10e1e80..43615f806 100644 --- a/modules/mpi/ospray/render/DistributedLoadBalancer.cpp +++ b/modules/mpi/ospray/render/DistributedLoadBalancer.cpp @@ -36,7 +36,7 @@ Renderer::Event DistributedLoadBalancer::renderFrame(FrameBuffer *_fb, Renderer *_renderer, Camera *camera, World *_world, - bool wait) + bool /*wait*/) { auto *dfb = dynamic_cast(_fb); diff --git a/modules/mpi/ospray/render/distributed/DistributedRenderer.h b/modules/mpi/ospray/render/distributed/DistributedRenderer.h index 2e7de93d5..1c11acda4 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRenderer.h +++ b/modules/mpi/ospray/render/distributed/DistributedRenderer.h @@ -35,12 +35,12 @@ struct DistributedRenderer : public AddStructShared const utility::ArrayView &taskIDs) const; // Not used by distributed renderers - Event renderTasks(FrameBuffer *fb, - Camera *camera, - World *world, - void *perFrameData, - const utility::ArrayView &taskIDs, - bool wait) const override + Event renderTasks(FrameBuffer *, + Camera *, + World *, + void * /*perFrameData*/, + const utility::ArrayView & /*taskIDs*/, + bool /*wait*/) const override { return Event(); } From 4400168f6333f799438de7f5e8288512b5b955de Mon Sep 17 00:00:00 2001 From: Miroslaw Pawlowski Date: Tue, 9 May 2023 12:13:59 +0200 Subject: [PATCH 27/42] Wait on SYCL RenderTask destruction --- modules/cpu/render/RenderTaskSycl.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/cpu/render/RenderTaskSycl.h b/modules/cpu/render/RenderTaskSycl.h index 7f6398042..c48e64909 100644 --- a/modules/cpu/render/RenderTaskSycl.h +++ b/modules/cpu/render/RenderTaskSycl.h @@ -11,7 +11,7 @@ namespace ospray { struct RenderTask : public Future { RenderTask(sycl::event); - ~RenderTask() override = default; + ~RenderTask() override; bool isFinished(OSPSyncEvent event = OSP_TASK_FINISHED) override; void wait(OSPSyncEvent event = OSP_TASK_FINISHED) override; @@ -27,6 +27,12 @@ struct RenderTask : public Future inline RenderTask::RenderTask(sycl::event syclEvent) : syclEvent(syclEvent) {} +inline RenderTask::~RenderTask() +{ + // Mimic non-sycl RenderTask behavior which waits on destruction + wait(); +} + inline bool RenderTask::isFinished(OSPSyncEvent event) { (void)event; From 8664fd62b100292f6c6b3058e25d54bc2eec1920 Mon Sep 17 00:00:00 2001 From: Miroslaw Pawlowski Date: Mon, 22 May 2023 17:38:56 +0200 Subject: [PATCH 28/42] HDR images comparison/write fixes in ospTestSuite --- apps/ospTestSuite/test_fixture.cpp | 2 +- apps/ospTestSuite/test_tools.cpp | 143 ++++++++++++++++------------- apps/ospTestSuite/test_tools.h | 30 ++++-- 3 files changed, 98 insertions(+), 77 deletions(-) diff --git a/apps/ospTestSuite/test_fixture.cpp b/apps/ospTestSuite/test_fixture.cpp index b467afd74..66681c65f 100644 --- a/apps/ospTestSuite/test_fixture.cpp +++ b/apps/ospTestSuite/test_fixture.cpp @@ -105,7 +105,7 @@ void Base::PerformRenderTest() RenderFrame(); - auto *framebuffer_data = (uint32_t *)framebuffer.map(OSP_FB_COLOR); + void *framebuffer_data = framebuffer.map(OSP_FB_COLOR); if (ospEnv->GetDumpImg()) { EXPECT_EQ(imageTool->saveTestImage(framebuffer_data), OsprayStatus::Ok); diff --git a/apps/ospTestSuite/test_tools.cpp b/apps/ospTestSuite/test_tools.cpp index 65084c389..5f12c4df0 100644 --- a/apps/ospTestSuite/test_tools.cpp +++ b/apps/ospTestSuite/test_tools.cpp @@ -32,23 +32,9 @@ OSPImageTools::OSPImageTools( OsprayStatus OSPImageTools::writePNG( std::string fileName, const uint32_t *pixel) { - unsigned int bufferLen = ImgType::RGBA * size.x * size.y; - std::vector writeImage( - bufferLen, std::numeric_limits::max()); - - for (int y = 0; y < size.y; ++y) { - pixelColorValue *lineAdrr = - &(writeImage[ImgType::RGBA * size.x * (size.y - 1 - y)]); - std::memcpy(lineAdrr, - &(pixel[size.x * y]), - ImgType::RGBA * sizeof(pixelColorValue) * size.x); - } - int retCode = stbi_write_png(fileName.c_str(), - size.x, - size.y, - ImgType::RGBA, - (const void *)writeImage.data(), - 0); + stbi_flip_vertically_on_write(true); + int retCode = stbi_write_png( + fileName.c_str(), size.x, size.y, ImgType::RGBA, (const void *)pixel, 0); if (!retCode) { std::cerr << "Failed to save image: " << fileName << std::endl; return OsprayStatus::Fail; @@ -58,17 +44,9 @@ OsprayStatus OSPImageTools::writePNG( OsprayStatus OSPImageTools::writeHDR(std::string fileName, const float *pixel) { - unsigned int bufferLen = ImgType::RGBA * size.x * size.y; - std::vector writeImage(bufferLen, std::numeric_limits::max()); - - for (int y = 0; y < size.y; ++y) { - float *lineAdrr = &(writeImage[ImgType::RGBA * size.x * (size.y - 1 - y)]); - std::memcpy(lineAdrr, - &(pixel[size.x * y]), - ImgType::RGBA * sizeof(pixelColorValue) * size.x); - } - int retCode = stbi_write_hdr( - fileName.c_str(), size.x, size.y, ImgType::RGBA, writeImage.data()); + stbi_flip_vertically_on_write(true); + int retCode = + stbi_write_hdr(fileName.c_str(), size.x, size.y, ImgType::RGBA, pixel); if (!retCode) { std::cerr << "Failed to save image: " << fileName << std::endl; return OsprayStatus::Fail; @@ -97,16 +75,27 @@ OsprayStatus OSPImageTools::writeImg(std::string fileName, const void *pixel) return writeErr; } -OsprayStatus OSPImageTools::saveTestImage(const void *pixel) +OsprayStatus OSPImageTools::verifyBaselineImage(const int sizeX, + const int sizeY, + const void *baselineImage, + const std::string &baselineName) { - return writeImg(ospEnv->GetBaselineDir() + "/" + imgName, pixel); + // Check if baseline image is suitable + if (!baselineImage) { + std::cerr << "Failed to load image: " << baselineName << std::endl; + return OsprayStatus::Fail; + } else if (sizeX != size.x || sizeY != size.y) { + std::cerr << "Wrong image loaded for: " << baselineName << std::endl; + return OsprayStatus::Fail; + } + return OsprayStatus::Ok; } -vec4f OSPImageTools::getAveragedPixel(const vec4i *image, +vec4f OSPImageTools::getAveragedPixel(const vec4f *image, vec2i pixelIndex, const rkcommon::index_sequence_2D &imageIndices) { - vec4i p(0); + vec4f p(0.f); unsigned int count = 0; rkcommon::index_sequence_2D indices(vec2i(5)); for (vec2i id : indices) { @@ -121,48 +110,33 @@ vec4f OSPImageTools::getAveragedPixel(const vec4i *image, } // compare the baseline image with the values form the framebuffer -OsprayStatus OSPImageTools::compareImgWithBaseline(const uint32_t *testImg) +template +OsprayStatus OSPImageTools::compareImgWithBaselineTmpl( + const T *testImage, const T *baselineImage, const std::string &baselineName) { - vec4uc *testImage = (vec4uc *)testImg; - std::string baselineName = - ospEnv->GetBaselineDir() + "/" + imgName + GetFileFormat(); - - int dataX, dataY, dataN; - stbi_set_flip_vertically_on_load(true); - vec4uc *baselineImage = (vec4uc *)stbi_load( - baselineName.c_str(), &dataX, &dataY, &dataN, ImgType::RGBA); - if (!baselineImage) { - std::cerr << "Failed to load image: " << baselineName << std::endl; - return OsprayStatus::Fail; - } else if (dataX != size.x || dataY != size.y) { - std::cerr << "Wrong image loaded for: " << baselineName << std::endl; - stbi_image_free(baselineImage); - return OsprayStatus::Fail; - } - bool notPerfect = false; - long long totalError = 0; + double totalError = 0.; rkcommon::index_sequence_2D imageIndices(size); - std::vector diffAbsImage(imageIndices.total_indices()); + std::vector diffAbsImage(imageIndices.total_indices()); { - // Prepare temporary diff image with signed integers - std::vector diffImage(imageIndices.total_indices()); + // Prepare temporary diff image with floats + std::vector diffImage(imageIndices.total_indices()); for (vec2i i : imageIndices) { const unsigned int pixelIndex = imageIndices.flatten(i); - const vec4i baselineValue = baselineImage[pixelIndex]; - const vec4i renderedValue = testImage[pixelIndex]; + const vec4f baselineValue = baselineImage[pixelIndex]; + const vec4f renderedValue = testImage[pixelIndex]; diffImage[pixelIndex] = baselineValue - renderedValue; } for (vec2i i : imageIndices) { const unsigned int pixelIndex = imageIndices.flatten(i); - const vec4uc diffValue = abs(diffImage[pixelIndex]); - const vec4i diffAvgValue = + const T diffValue = abs(diffImage[pixelIndex]); + const vec4f diffAvgValue = abs(getAveragedPixel(diffImage.data(), i, imageIndices)); // Only count errors if above specified threshold, this removes blurred // noise - const int pixelError = reduce_add(diffAvgValue); + const float pixelError = reduce_add(diffAvgValue); if (pixelError > pixelThreshold) totalError += pixelError; @@ -187,17 +161,54 @@ OsprayStatus OSPImageTools::compareImgWithBaseline(const uint32_t *testImg) bool failed = meanError > errorRate; if (failed) { - writeImg(ospEnv->GetFailedDir() + "/" + imgName + "_baseline", - (const uint32_t *)baselineImage); - writeImg(ospEnv->GetFailedDir() + "/" + imgName + "_rendered", - (const uint32_t *)testImage); - writeImg(ospEnv->GetFailedDir() + "/" + imgName + "_diff", - (const uint32_t *)diffAbsImage.data()); + writeImg( + ospEnv->GetFailedDir() + "/" + imgName + "_baseline", baselineImage); + writeImg(ospEnv->GetFailedDir() + "/" + imgName + "_rendered", testImage); + writeImg( + ospEnv->GetFailedDir() + "/" + imgName + "_diff", diffAbsImage.data()); } - stbi_image_free(baselineImage); if (failed) return OsprayStatus::Fail; else return OsprayStatus::Ok; } + +OsprayStatus OSPImageTools::saveTestImage(const void *pixel) +{ + return writeImg(ospEnv->GetBaselineDir() + "/" + imgName, pixel); +} + +// compare the baseline image with the values form the framebuffer +OsprayStatus OSPImageTools::compareImgWithBaseline(const void *testImage) +{ + std::string baselineName = + ospEnv->GetBaselineDir() + "/" + imgName + GetFileFormat(); + stbi_set_flip_vertically_on_load(true); + + int dataX, dataY, dataN; + OsprayStatus compErr = OsprayStatus::Error; + if (GetFileFormat() == ".png") { + vec4uc *baselineImage = (vec4uc *)stbi_load( + baselineName.c_str(), &dataX, &dataY, &dataN, ImgType::RGBA); + compErr = verifyBaselineImage(dataX, dataY, baselineImage, baselineName); + if (compErr == OsprayStatus::Ok) + compErr = compareImgWithBaselineTmpl( + (vec4uc *)testImage, baselineImage, baselineName); + if (baselineImage) + stbi_image_free(baselineImage); + } else if (GetFileFormat() == ".hdr") { + vec4f *baselineImage = (vec4f *)stbi_loadf( + baselineName.c_str(), &dataX, &dataY, &dataN, ImgType::RGBA); + compErr = verifyBaselineImage(dataX, dataY, baselineImage, baselineName); + if (compErr == OsprayStatus::Ok) + compErr = compareImgWithBaselineTmpl( + (vec4f *)testImage, baselineImage, baselineName); + if (baselineImage) + stbi_image_free(baselineImage); + } else { + std::cerr << "Unsuporrted file format" << std::endl; + compErr = OsprayStatus::Error; + } + return compErr; +} diff --git a/apps/ospTestSuite/test_tools.h b/apps/ospTestSuite/test_tools.h index b103e75cf..f93a8a384 100644 --- a/apps/ospTestSuite/test_tools.h +++ b/apps/ospTestSuite/test_tools.h @@ -21,9 +21,9 @@ #include "rkcommon/utility/multidim_index_sequence.h" -using pixelColorValue = unsigned char; +using pixelColorValue = float; -const pixelColorValue pixelThreshold = 10; +const pixelColorValue pixelThreshold = 13.f; const float errorRate = 0.1; enum class OsprayStatus @@ -48,16 +48,29 @@ class OSPImageTools std::string fileFormat; std::string imgName; + std::string GetFileFormat() const + { + return fileFormat; + } // helper method to write the rendered image as PNG file OsprayStatus writePNG(std::string fileName, const uint32_t *pixel); // helper method to write the rendered image as HDR file OsprayStatus writeHDR(std::string fileName, const float *pixel); // helper method to write the image with given format OsprayStatus writeImg(std::string fileName, const void *pixel); - std::string GetFileFormat() const - { - return fileFormat; - } + // average pixels over some window + OsprayStatus verifyBaselineImage(const int sizeX, + const int sizeY, + const void *baselineImage, + const std::string &baselineName); + vec4f getAveragedPixel(const vec4f *image, + vec2i pixelIndex, + const rkcommon::index_sequence_2D &imageIndices); + // compare gold image with fb but with storage type abstraction + template + OsprayStatus compareImgWithBaselineTmpl(const T *testImage, + const T *baselineImage, + const std::string &baselineName); public: OSPImageTools(vec2i imgSize, @@ -68,8 +81,5 @@ class OSPImageTools // helper method to saved rendered file OsprayStatus saveTestImage(const void *pixel); // helper method to compare gold image with current framebuffer render - vec4f getAveragedPixel(const vec4i *image, - vec2i pixelIndex, - const rkcommon::index_sequence_2D &imageIndices); - OsprayStatus compareImgWithBaseline(const uint32_t *testImg); + OsprayStatus compareImgWithBaseline(const void *testImage); }; From 0102937937d9fba26482ecc0fef70e2e98338cb9 Mon Sep 17 00:00:00 2001 From: Miroslaw Pawlowski Date: Mon, 15 May 2023 15:29:10 +0200 Subject: [PATCH 29/42] OIDN2 support, frame buffer postprocessing in separate kernel on GPU --- .github/workflows/ci.linux.gpu.yml | 4 +- CHANGELOG.md | 2 + apps/ospTestSuite/CMakeLists.txt | 9 ++ apps/ospTestSuite/test_fixture.cpp | 16 ++-- apps/ospTestSuite/test_framebuffer.cpp | 5 +- apps/ospTestSuite/test_imageop.cpp | 96 +++++++++++++++++++ apps/ospTestSuite/test_interpolation.cpp | 6 +- cmake/ospray_options.cmake | 2 +- cmake/ospray_redistribute_deps.cmake | 35 +++++++ doc/prerequisites.md | 2 +- modules/cpu/CMakeLists.txt | 5 +- modules/cpu/ISPCDevice.cpp | 25 ++++- modules/cpu/ISPCDevice.h | 26 +++++ modules/cpu/common/Data.h | 16 ++++ modules/cpu/common/ISPCRTBuffers.h | 7 +- modules/cpu/fb/FrameBuffer.cpp | 60 ++++++------ modules/cpu/fb/FrameBuffer.h | 25 +++-- modules/cpu/fb/FrameBufferShared.h | 2 +- modules/cpu/fb/FrameBufferView.cpp | 26 ----- modules/cpu/fb/FrameOp.cpp | 15 +++ modules/cpu/fb/FrameOp.h | 39 ++++++++ modules/cpu/fb/ImageOp.h | 86 ----------------- modules/cpu/fb/LocalFB.cpp | 63 ++++++------ modules/cpu/fb/LocalFB.h | 4 +- modules/cpu/fb/PixelOp.cpp | 14 +++ modules/cpu/fb/PixelOp.h | 34 +++++++ .../fb/{ImageOpShared.h => PixelOpShared.h} | 0 modules/cpu/fb/SparseFB.cpp | 36 +++---- modules/cpu/fb/SparseFB.h | 9 +- modules/cpu/fb/frame_ops/Blur.cpp | 7 +- modules/cpu/fb/frame_ops/Blur.h | 18 +++- modules/cpu/fb/frame_ops/Debug.cpp | 45 ++++++++- modules/cpu/fb/frame_ops/Debug.h | 13 ++- modules/cpu/fb/frame_ops/Depth.cpp | 13 ++- modules/cpu/fb/frame_ops/Depth.h | 13 ++- modules/cpu/fb/frame_ops/SSAO.cpp | 21 ++-- modules/cpu/fb/frame_ops/SSAO.h | 21 ++-- modules/cpu/fb/frame_ops/SSAO.ispc | 4 +- modules/cpu/fb/pixel_ops/ToneMapper.cpp | 9 +- modules/cpu/fb/pixel_ops/ToneMapper.h | 11 ++- modules/cpu/fb/pixel_ops/ToneMapperShared.h | 2 +- modules/cpu/fb/registration.cpp | 6 ++ modules/cpu/render/LoadBalancer.cpp | 18 ++-- modules/cpu/render/LoadBalancer.h | 4 +- modules/cpu/render/RenderTaskSycl.h | 52 +++++++--- modules/cpu/render/Renderer.cpp | 12 --- modules/cpu/render/Renderer.h | 21 +--- modules/cpu/render/ao/AORenderer.cpp | 7 +- modules/cpu/render/ao/AORenderer.h | 2 +- modules/cpu/render/debug/DebugRenderer.cpp | 7 +- modules/cpu/render/debug/DebugRenderer.h | 2 +- modules/cpu/render/pathtracer/PathTracer.cpp | 7 +- modules/cpu/render/pathtracer/PathTracer.h | 2 +- modules/cpu/render/scivis/SciVis.cpp | 7 +- modules/cpu/render/scivis/SciVis.h | 2 +- modules/denoiser/CMakeLists.txt | 2 +- modules/denoiser/DenoiseFrameOp.cpp | 64 +++++++------ modules/denoiser/DenoiseFrameOp.h | 9 +- modules/mpi/ospray/MPIDistributedDevice.cpp | 6 ++ modules/mpi/ospray/MPIDistributedDevice.h | 2 + modules/mpi/ospray/MPIOffloadDevice.cpp | 7 ++ modules/mpi/ospray/MPIOffloadDevice.h | 2 + .../mpi/ospray/fb/DistributedFrameBuffer.cpp | 75 ++++++++------- .../mpi/ospray/fb/DistributedFrameBuffer.h | 2 + .../ospray/render/DistributedLoadBalancer.cpp | 7 +- .../ospray/render/DistributedLoadBalancer.h | 2 +- .../render/distributed/DistributedRaycast.cpp | 3 +- .../distributed/DistributedRenderer.cpp | 3 +- .../render/distributed/DistributedRenderer.h | 4 +- modules/multiDevice/MultiDevice.cpp | 15 ++- modules/multiDevice/MultiDevice.h | 2 + ospray/CMakeLists.txt | 3 + ospray/api/Device.h | 8 +- {modules/cpu => ospray}/common/Managed.cpp | 1 - {modules/cpu => ospray}/common/Managed.h | 20 +--- ospray/common/ObjectFactory.h | 6 +- {modules/cpu => ospray}/fb/FrameBufferView.h | 18 +++- {modules/cpu => ospray}/fb/ImageOp.cpp | 13 +-- ospray/fb/ImageOp.h | 64 +++++++++++++ scripts/superbuild/CMakeLists.txt | 10 +- .../superbuild/dependencies/dep_oidn.cmake | 2 +- scripts/tests/run_tests.ps1 | 7 +- scripts/tests/run_tests.sh | 6 +- .../AVX2/DenoiserOp_DenoiserOp.hdr.md5 | 1 + .../AVX2/ImageOp_DebugOp_ImageOp_0.png.md5 | 1 + .../AVX512SKX/DenoiserOp_DenoiserOp.hdr.md5 | 1 + .../ImageOp_DebugOp_ImageOp_0.png.md5 | 1 + 87 files changed, 874 insertions(+), 488 deletions(-) create mode 100644 apps/ospTestSuite/test_imageop.cpp delete mode 100644 modules/cpu/fb/FrameBufferView.cpp create mode 100644 modules/cpu/fb/FrameOp.cpp create mode 100644 modules/cpu/fb/FrameOp.h delete mode 100644 modules/cpu/fb/ImageOp.h create mode 100644 modules/cpu/fb/PixelOp.cpp create mode 100644 modules/cpu/fb/PixelOp.h rename modules/cpu/fb/{ImageOpShared.h => PixelOpShared.h} (100%) rename {modules/cpu => ospray}/common/Managed.cpp (98%) rename {modules/cpu => ospray}/common/Managed.h (82%) rename {modules/cpu => ospray}/fb/FrameBufferView.h (79%) rename {modules/cpu => ospray}/fb/ImageOp.cpp (55%) create mode 100644 ospray/fb/ImageOp.h create mode 100644 test_image_data/baseline/AVX2/DenoiserOp_DenoiserOp.hdr.md5 create mode 100644 test_image_data/baseline/AVX2/ImageOp_DebugOp_ImageOp_0.png.md5 create mode 100644 test_image_data/baseline/AVX512SKX/DenoiserOp_DenoiserOp.hdr.md5 create mode 100644 test_image_data/baseline/AVX512SKX/ImageOp_DebugOp_ImageOp_0.png.md5 diff --git a/.github/workflows/ci.linux.gpu.yml b/.github/workflows/ci.linux.gpu.yml index 07df3cd68..1f52a741c 100644 --- a/.github/workflows/ci.linux.gpu.yml +++ b/.github/workflows/ci.linux.gpu.yml @@ -29,7 +29,7 @@ jobs: export SYCL_BUNDLE_ROOT=$DPCPP_ROOT export CC=clang export CXX=clang++ - scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_MODULE_MULTIDEVICE=OFF -DBUILD_GPU_SUPPORT=ON -DOPENVKL_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.openvkl.git -DOPENVKL_BRANCH=bcca9b98 -DRKCOMMON_VERSION=devel + scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_MODULE_MULTIDEVICE=OFF -DBUILD_GPU_SUPPORT=ON -DBUILD_OIDN=ON -DOPENVKL_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.openvkl.git -DOPENVKL_BRANCH=bcca9b98 -DRKCOMMON_VERSION=devel artifact-out: build-ubuntu2204 artifact-path: build/install/ospray build/install/embree build/CMakeCache.txt build/*/build/CMakeCache.txt @@ -47,7 +47,7 @@ jobs: export SYCL_BUNDLE_ROOT=$DPCPP_ROOT export CC=clang export CXX=clang++ - scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_MODULE_MULTIDEVICE=OFF -DBUILD_GPU_SUPPORT=ON -DOPENVKL_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.openvkl.git -DOPENVKL_BRANCH=gpu-devel -DRKCOMMON_VERSION=devel + scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_MODULE_MULTIDEVICE=OFF -DBUILD_GPU_SUPPORT=ON -DBUILD_OIDN=ON -DOPENVKL_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.openvkl.git -DOPENVKL_BRANCH=gpu-devel -DRKCOMMON_VERSION=devel artifact-out: build-ubuntu2204-devel artifact-path: build/install/ospray build/install/embree build/CMakeCache.txt build/*/build/CMakeCache.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index be61db139..94f92d973 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ Version History - New parameter `maxScatteringEvents` for the `pathtracer` which limits the number of non-specular (i.e., diffuse and glossy) bounces +- Support denoising on the GPU with OIDN 2.0, which is the new minimum + version ### Changes in v2.11.0: diff --git a/apps/ospTestSuite/CMakeLists.txt b/apps/ospTestSuite/CMakeLists.txt index cf55d0415..292ee49ec 100644 --- a/apps/ospTestSuite/CMakeLists.txt +++ b/apps/ospTestSuite/CMakeLists.txt @@ -50,6 +50,7 @@ add_executable(ospTestSuite test_motionblur.cpp test_framebuffer.cpp test_interpolation.cpp + test_imageop.cpp ospTestSuite.cpp ) @@ -62,6 +63,14 @@ PRIVATE $<$:${GLM_TARGET}> ) +if (OSPRAY_MODULE_DENOISER) + # Enable OIDN tests + target_compile_definitions(ospTestSuite + PUBLIC + -DOSPRAY_MODULE_DENOISER + ) +endif() + ospray_sign_target(ospTestSuite) install(TARGETS ospTestSuite diff --git a/apps/ospTestSuite/test_fixture.cpp b/apps/ospTestSuite/test_fixture.cpp index 66681c65f..81fa27513 100644 --- a/apps/ospTestSuite/test_fixture.cpp +++ b/apps/ospTestSuite/test_fixture.cpp @@ -15,12 +15,6 @@ Base::Base() ::testing::UnitTest::GetInstance()->current_test_case(); const ::testing::TestInfo *const testInfo = ::testing::UnitTest::GetInstance()->current_test_info(); - imgSize = ospEnv->GetImgSize(); - - framebuffer = cpp::FrameBuffer(imgSize.x, - imgSize.y, - frameBufferFormat, - OSP_FB_COLOR | OSP_FB_ACCUM | OSP_FB_DEPTH); { std::string testCaseName = testCase->name(); @@ -38,15 +32,21 @@ Base::Base() byte = '_'; } + imgSize = ospEnv->GetImgSize(); rendererType = "scivis"; frames = 1; samplesPerPixel = 16; - - imageTool.reset(new OSPImageTools(imgSize, GetTestName(), frameBufferFormat)); } void Base::SetUp() { + framebuffer = cpp::FrameBuffer(imgSize.x, + imgSize.y, + frameBufferFormat, + OSP_FB_COLOR | OSP_FB_ACCUM | OSP_FB_DEPTH); + + imageTool.reset(new OSPImageTools(imgSize, GetTestName(), frameBufferFormat)); + CreateEmptyScene(); } diff --git a/apps/ospTestSuite/test_framebuffer.cpp b/apps/ospTestSuite/test_framebuffer.cpp index 5307bdce2..0da3b1832 100644 --- a/apps/ospTestSuite/test_framebuffer.cpp +++ b/apps/ospTestSuite/test_framebuffer.cpp @@ -33,8 +33,6 @@ IDBuffer::IDBuffer() appIDs = std::get<1>(params); rendererType = std::get<2>(params); - framebuffer = - cpp::FrameBuffer(imgSize.x, imgSize.y, frameBufferFormat, idBuffer); samplesPerPixel = 2; } @@ -42,6 +40,9 @@ void IDBuffer::SetUp() { Base::SetUp(); + framebuffer = + cpp::FrameBuffer(imgSize.x, imgSize.y, frameBufferFormat, idBuffer); + instances.clear(); auto builder = ospray::testing::newBuilder( diff --git a/apps/ospTestSuite/test_imageop.cpp b/apps/ospTestSuite/test_imageop.cpp new file mode 100644 index 000000000..52df67e3c --- /dev/null +++ b/apps/ospTestSuite/test_imageop.cpp @@ -0,0 +1,96 @@ +// Copyright 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "ospray_testing.h" +#include "test_fixture.h" + +namespace OSPRayTestScenes { + +class ImageOpBase : public Base +{ + public: + ImageOpBase() + { + samplesPerPixel = 4; + } + void SetUp() override; + + protected: + std::string imageOp; +}; + +void ImageOpBase::SetUp() +{ + Base::SetUp(); + + instances.clear(); + + auto builder = ospray::testing::newBuilder("cornell_box"); + ospray::testing::setParam(builder, "rendererType", rendererType); + ospray::testing::commit(builder); + + world = ospray::testing::buildWorld(builder); + ospray::testing::release(builder); + + camera.setParam("position", vec3f(0.f, 0.f, -2.f)); + + cpp::ImageOperation imgOp(imageOp); + framebuffer.setParam("imageOperation", cpp::CopiedData(imgOp)); + framebuffer.commit(); +} + +#ifdef OSPRAY_MODULE_DENOISER +class DenoiserOp : public ImageOpBase, public ::testing::Test +{ + public: + DenoiserOp() + { + ospLoadModule("denoiser"); + + frameBufferFormat = OSP_FB_RGBA32F; + rendererType = "pathtracer"; + imageOp = "denoiser"; + } + void SetUp() override + { + ImageOpBase::SetUp(); + } +}; +#endif + +class ImageOp + : public ImageOpBase, + public ::testing::TestWithParam< + std::tuple> +{ + public: + ImageOp() + { + auto params = GetParam(); + imageOp = std::get<0>(params); + rendererType = std::get<1>(params); + } + void SetUp() override + { + ImageOpBase::SetUp(); + } +}; + +// Test Instantiations ////////////////////////////////////////////////////// + +#ifdef OSPRAY_MODULE_DENOISER +TEST_F(DenoiserOp, DenoiserOp) +{ + PerformRenderTest(); +} +#endif + +TEST_P(ImageOp, ImageOp) +{ + PerformRenderTest(); +} + +INSTANTIATE_TEST_SUITE_P( + DebugOp, ImageOp, ::testing::Values(std::make_tuple("debug", "scivis"))); + +} // namespace OSPRayTestScenes diff --git a/apps/ospTestSuite/test_interpolation.cpp b/apps/ospTestSuite/test_interpolation.cpp index d1d0b73fd..d6e46bc20 100644 --- a/apps/ospTestSuite/test_interpolation.cpp +++ b/apps/ospTestSuite/test_interpolation.cpp @@ -40,15 +40,15 @@ Interpolation::Interpolation() if (attribute == 2) idBuffer = OSP_FB_NORMAL; - - framebuffer = - cpp::FrameBuffer(imgSize.x, imgSize.y, frameBufferFormat, idBuffer); } void Interpolation::SetUp() { Base::SetUp(); + framebuffer = + cpp::FrameBuffer(imgSize.x, imgSize.y, frameBufferFormat, idBuffer); + instances.clear(); auto builder = ospray::testing::newBuilder("interpolation"); diff --git a/cmake/ospray_options.cmake b/cmake/ospray_options.cmake index 47ee2e69f..ede06701e 100644 --- a/cmake/ospray_options.cmake +++ b/cmake/ospray_options.cmake @@ -115,7 +115,7 @@ endif() # OpenImageDenoise if (OSPRAY_MODULE_DENOISER) - find_package(OpenImageDenoise 1.2.3 REQUIRED) + find_package(OpenImageDenoise 2.0.0 REQUIRED) endif() # ISPC diff --git a/cmake/ospray_redistribute_deps.cmake b/cmake/ospray_redistribute_deps.cmake index c7f8a0a56..bdbd218a5 100644 --- a/cmake/ospray_redistribute_deps.cmake +++ b/cmake/ospray_redistribute_deps.cmake @@ -60,6 +60,39 @@ macro(ospray_add_dependent_lib TARGET_NAME) endif() endmacro() +macro(ospray_add_dependent_lib_plugins TARGET_NAME PLUGINS_PATTERN) + if (TARGET ${TARGET_NAME}) + # retrieve library directory + get_target_property(CONFIGURATIONS ${TARGET_NAME} IMPORTED_CONFIGURATIONS) + list(GET CONFIGURATIONS 0 CONFIGURATION) # use first/default configuration + get_target_property(LIBRARY ${TARGET_NAME} IMPORTED_LOCATION_${CONFIGURATION}) + get_filename_component(LIBRARY_DIR ${LIBRARY} DIRECTORY) + + # search for plugins with given file pattern + if (WIN32) + file(GLOB LIBRARY_PLUGINS LIST_DIRECTORIES FALSE + "${LIBRARY_DIR}/${PLUGINS_PATTERN}.dll" + ) + elseif (APPLE) + file(GLOB LIBRARY_PLUGINS LIST_DIRECTORIES FALSE + "${LIBRARY_DIR}/lib${PLUGINS_PATTERN}*.dylib" + ) + else() + file(GLOB LIBRARY_PLUGINS LIST_DIRECTORIES FALSE + "${LIBRARY_DIR}/lib${PLUGINS_PATTERN}.so*" + ) + endif() + + # iterate over all found plugins and add them to DEPENDENT_LIBS list + foreach(LIBRARY_PLUGIN ${LIBRARY_PLUGINS}) + list(APPEND DEPENDENT_LIBS ${LIBRARY_PLUGIN}) + ospray_install_namelink(${LIBRARY_PLUGIN}) + endforeach() + else() + message(STATUS "Skipping target '${TARGET_NAME}' plugins") + endif() +endmacro() + ospray_add_dependent_lib(ispcrt::ispcrt) ospray_add_dependent_lib(rkcommon::rkcommon) if (RKCOMMON_TASKING_TBB) @@ -77,6 +110,8 @@ ospray_add_dependent_lib(openvkl::openvkl_module_cpu_device_8) ospray_add_dependent_lib(openvkl::openvkl_module_cpu_device_16) if (OSPRAY_MODULE_DENOISER) ospray_add_dependent_lib(OpenImageDenoise) + ospray_add_dependent_lib(OpenImageDenoise_core) + ospray_add_dependent_lib_plugins(OpenImageDenoise "OpenImageDenoise_device_*") endif() if (WIN32) diff --git a/doc/prerequisites.md b/doc/prerequisites.md index 9ca91f3f5..4fcac45a7 100644 --- a/doc/prerequisites.md +++ b/doc/prerequisites.md @@ -47,7 +47,7 @@ before you can build OSPRay you need the following prerequisites: the variable `openvkl_DIR`, or disable `OSPRAY_ENABLE_VOLUMES`. - OSPRay also provides an optional module implementing the `denoiser` image operation, which is enabled by `OSPRAY_MODULE_DENOISER`. This - module requires Intel [Open Image Denoise] in version 1.2.3 or + module requires Intel [Open Image Denoise] in version 2.0.0 or newer. You may need to hint the location of the library with the CMake variable `OpenImageDenoise_DIR`. - For the optional MPI modules (enabled by `OSPRAY_MODULE_MPI`), which diff --git a/modules/cpu/CMakeLists.txt b/modules/cpu/CMakeLists.txt index fc04a8e0b..05e9df627 100644 --- a/modules/cpu/CMakeLists.txt +++ b/modules/cpu/CMakeLists.txt @@ -41,14 +41,13 @@ set(OSPRAY_CPP_SOURCES common/Group.cpp common/Instance.cpp common/MotionTransform.cpp - common/Managed.cpp common/World.cpp fb/FrameBuffer.cpp - fb/FrameBufferView.cpp fb/LocalFB.cpp fb/SparseFB.cpp - fb/ImageOp.cpp + fb/FrameOp.cpp + fb/PixelOp.cpp fb/TaskError.cpp fb/registration.cpp diff --git a/modules/cpu/ISPCDevice.cpp b/modules/cpu/ISPCDevice.cpp index b0eb5ac43..21aee02b9 100644 --- a/modules/cpu/ISPCDevice.cpp +++ b/modules/cpu/ISPCDevice.cpp @@ -286,8 +286,10 @@ void ISPCDevice::commit() reinterpret_cast(ispcrtDevice.nativeContextHandle()), true); - syclQueue = sycl::queue( - syclContext, syclDevice, {sycl::property::queue::enable_profiling()}); + syclQueue = sycl::queue(syclContext, + syclDevice, + {sycl::property::queue::enable_profiling(), + sycl::property::queue::in_order()}); #endif } @@ -566,7 +568,7 @@ OSPFrameBuffer ISPCDevice::frameBufferCreate( OSPImageOperation ISPCDevice::newImageOp(const char *type) { - ospray::ImageOp *ret = ImageOp::createInstance(type); + ospray::ImageOp *ret = ImageOp::createImageOp(type, *this); return (OSPImageOperation)ret; } @@ -636,8 +638,9 @@ OSPFuture ISPCDevice::renderFrame(OSPFrameBuffer _fb, return timer.seconds(); }); #else - return (OSPFuture) new RenderTask( - loadBalancer->renderFrame(fb, renderer, camera, world, false)); + std::pair events = + loadBalancer->renderFrame(fb, renderer, camera, world, false); + return (OSPFuture) new RenderTask(events.first, events.second); #endif } @@ -684,5 +687,17 @@ OSPPickResult ISPCDevice::pick(OSPFrameBuffer _fb, return renderer->pick(fb, camera, world, screenPos); } +#ifdef OSPRAY_TARGET_SYCL +sycl::nd_range<1> ISPCDevice::computeDispatchRange( + const size_t globalSize, const size_t workgroupSize) const +{ + // roundedRange global size must be at least workgroupSize + const size_t roundedRange = + std::max(size_t(1), (globalSize + workgroupSize - 1) / workgroupSize) + * workgroupSize; + return sycl::nd_range<1>(roundedRange, workgroupSize); +} +#endif + } // namespace api } // namespace ospray diff --git a/modules/cpu/ISPCDevice.h b/modules/cpu/ISPCDevice.h index afcaf46b3..6f901ea24 100644 --- a/modules/cpu/ISPCDevice.h +++ b/modules/cpu/ISPCDevice.h @@ -31,6 +31,14 @@ namespace ospray { struct LocalTiledLoadBalancer; +#ifdef OSPRAY_TARGET_SYCL +using AsyncEvent = sycl::event; +#else +struct AsyncEvent +{ +}; +#endif + namespace api { struct OSPRAY_SDK_INTERFACE ISPCDevice : public Device @@ -169,11 +177,29 @@ struct OSPRAY_SDK_INTERFACE ISPCDevice : public Device return ispcrtQueue; } + void *getPostProcessingCommandQueuePtr() override + { +#ifdef OSPRAY_TARGET_SYCL + return &syclQueue; +#else + return nullptr; +#endif + } + #ifdef OSPRAY_TARGET_SYCL sycl::queue &getSyclQueue() { return syclQueue; } + + /* Compute the rounded dispatch global size for the given work group size. + * SYCL requires that globalSize % workgroupSize == 0, ths function will + * round up globalSize and return nd_range(roundedSize, workgroupSize). + * The kernel being launched must discard tasks that are out of bounds + * bounds due to this rounding + */ + sycl::nd_range<1> computeDispatchRange( + const size_t globalSize, const size_t workgroupSize) const; #endif private: diff --git a/modules/cpu/common/Data.h b/modules/cpu/common/Data.h index f13541c8f..75ada2c5e 100644 --- a/modules/cpu/common/Data.h +++ b/modules/cpu/common/Data.h @@ -398,4 +398,20 @@ std::vector createArrayOfSh(const DataT &data) return retval; } +template <> +inline Data *ManagedObject::getParam( + const char *name, Data *valIfNotFound) +{ + auto *obj = ParameterizedObject::getParam( + name, (ManagedObject *)valIfNotFound); + if (obj && obj->managedObjectType == OSP_DATA) + return (Data *)obj; + else { + // reset query status if object is not a Data* + if (obj) + findParam(name)->query = false; + return valIfNotFound; + } +} + } // namespace ospray diff --git a/modules/cpu/common/ISPCRTBuffers.h b/modules/cpu/common/ISPCRTBuffers.h index 9cd12fe27..5f3b10f08 100644 --- a/modules/cpu/common/ISPCRTBuffers.h +++ b/modules/cpu/common/ISPCRTBuffers.h @@ -17,10 +17,15 @@ template struct BufferDevice : public ispcrt::Array { using ispcrt::Array::devicePtr; - BufferDevice(ispcrt::Device &device); + BufferDevice(ispcrt::Device &device, T *hostPtr = nullptr); BufferDevice(ispcrt::Device &device, size_t size); }; +template +BufferDevice::BufferDevice(ispcrt::Device &device, T *hostPtr) + : ispcrt::Array(device, hostPtr) +{} + template BufferDevice::BufferDevice(ispcrt::Device &device, size_t size) : ispcrt::Array(device, nullptr, size) diff --git a/modules/cpu/fb/FrameBuffer.cpp b/modules/cpu/fb/FrameBuffer.cpp index 765738946..1b7767f5d 100644 --- a/modules/cpu/fb/FrameBuffer.cpp +++ b/modules/cpu/fb/FrameBuffer.cpp @@ -2,10 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 #include "FrameBuffer.h" +#include "FrameOp.h" +#include "OSPConfig.h" #ifndef OSPRAY_TARGET_SYCL #include "ISPCDevice_ispc.h" #endif -#include "OSPConfig.h" namespace { // Internal utilities for thread local progress tracking @@ -78,6 +79,14 @@ FrameBuffer::FrameBuffer(api::ISPCDevice &device, void FrameBuffer::commit() { + // Erase all image operations arrays + frameOps.clear(); + pixelOps.clear(); + pixelOpShs.clear(); + getSh()->pixelOps = nullptr; + getSh()->numPixelOps = 0; + + // Read image operations array set by user imageOpData = getParamDataT("imageOperation"); } @@ -170,42 +179,27 @@ bool FrameBuffer::frameCancelled() const return cancelRender; } -void FrameBuffer::prepareImageOps() -{ - findFirstFrameOperation(); - setPixelOpShs(); -} - -void FrameBuffer::findFirstFrameOperation() +void FrameBuffer::prepareLiveOpsForFBV( + FrameBufferView &fbv, bool fillFrameOps, bool fillPixelOps) { - firstFrameOperation = -1; - if (imageOps.empty()) - return; - - firstFrameOperation = imageOps.size(); - for (size_t i = 0; i < imageOps.size(); ++i) { - const auto *obj = imageOps[i].get(); - const bool isFrameOp = dynamic_cast(obj) != nullptr; - - if (firstFrameOperation == imageOps.size() && isFrameOp) - firstFrameOperation = i; - else if (firstFrameOperation < imageOps.size() && !isFrameOp) { - postStatusMsg(OSP_LOG_WARNING) - << "Invalid pixel/frame op pipeline: all frame operations " - "must come after all pixel operations"; - } - } -} - -void FrameBuffer::setPixelOpShs() -{ - pixelOpShs.clear(); - for (auto &op : imageOps) { - LivePixelOp *pop = dynamic_cast(op.get()); + // Iterate through all image operations set on commit + for (auto &&obj : *imageOpData) { + // Populate pixel operations + PixelOp *pop = dynamic_cast(obj); if (pop) { - pixelOpShs.push_back(pop->getSh()); + if (fillPixelOps) { + pixelOps.push_back(pop->attach()); + pixelOpShs.push_back(pixelOps.back()->getSh()); + } + } else { + // Populate frame operations + FrameOpInterface *fopi = dynamic_cast(obj); + if (fillFrameOps && fopi) + frameOps.push_back(fopi->attach(fbv)); } } + + // Prepare shared parameters for kernel getSh()->pixelOps = pixelOpShs.empty() ? nullptr : pixelOpShs.data(); getSh()->numPixelOps = pixelOpShs.size(); } diff --git a/modules/cpu/fb/FrameBuffer.h b/modules/cpu/fb/FrameBuffer.h index a082dfa1a..2b3b701fe 100644 --- a/modules/cpu/fb/FrameBuffer.h +++ b/modules/cpu/fb/FrameBuffer.h @@ -6,9 +6,9 @@ #include // ospray #include "ISPCDeviceObject.h" +#include "PixelOp.h" #include "common/Data.h" #include "common/FeatureFlagsEnum.h" -#include "fb/ImageOp.h" #include "ospray/ospray.h" #include "rkcommon/utility/ArrayView.h" // ispc shared @@ -16,6 +16,9 @@ namespace ospray { +struct Camera; +struct FrameBufferView; + // abstract frame buffer class struct OSPRAY_SDK_INTERFACE FrameBuffer : public AddStructShared @@ -68,6 +71,9 @@ struct OSPRAY_SDK_INTERFACE FrameBuffer // end the frame and run any final post-processing frame ops virtual void endFrame(const float errorThreshold, const Camera *camera) = 0; + // Invoke post-processing by calling all FrameOps + virtual AsyncEvent postProcess(const Camera *camera, bool wait) = 0; + // common function to help printf-debugging, every derived class should // override this virtual std::string toString() const override; @@ -96,16 +102,9 @@ struct OSPRAY_SDK_INTERFACE FrameBuffer FeatureFlagsOther getFeatureFlagsOther() const; protected: - // Finalize the pixel op and frame op state for rendering on commit - void prepareImageOps(); - - /*! Find the index of the first frameoperation included in - * the imageop pipeline - */ - void findFirstFrameOperation(); - - // Find all the LivePixelOps and set their ISPC-side data on the FrameBuffer - void setPixelOpShs(); + // Fill vectors with instantiated live objects + void prepareLiveOpsForFBV( + FrameBufferView &fbv, bool fillFrameOps = true, bool fillPixelOps = true); const vec2i size; @@ -131,9 +130,9 @@ struct OSPRAY_SDK_INTERFACE FrameBuffer std::atomic stagesCompleted{OSP_FRAME_FINISHED}; Ref> imageOpData; - std::vector> imageOps; + std::vector> frameOps; + std::vector> pixelOps; std::vector pixelOpShs; - size_t firstFrameOperation = -1; FeatureFlagsOther featureFlags{FFO_NONE}; }; diff --git a/modules/cpu/fb/FrameBufferShared.h b/modules/cpu/fb/FrameBufferShared.h index 08cc0d927..17f6040aa 100644 --- a/modules/cpu/fb/FrameBufferShared.h +++ b/modules/cpu/fb/FrameBufferShared.h @@ -3,7 +3,7 @@ #pragma once -#include "fb/ImageOpShared.h" +#include "fb/PixelOpShared.h" #include "ospray/OSPEnums.h" #ifdef __cplusplus diff --git a/modules/cpu/fb/FrameBufferView.cpp b/modules/cpu/fb/FrameBufferView.cpp deleted file mode 100644 index 6f377e4a6..000000000 --- a/modules/cpu/fb/FrameBufferView.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2009 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "FrameBufferView.h" -#include "FrameBuffer.h" - -namespace ospray { - -FrameBufferView::FrameBufferView(FrameBuffer *fb, - OSPFrameBufferFormat colorFormat, - void *colorBuffer, - float *depthBuffer, - vec3f *normalBuffer, - vec3f *albedoBuffer) - : fbDims(fb->getNumPixels()), - viewDims(fbDims), - haloDims(0), - colorBufferFormat(colorFormat), - colorBuffer(colorBuffer), - depthBuffer(depthBuffer), - normalBuffer(normalBuffer), - albedoBuffer(albedoBuffer), - originalFB(fb) -{} - -} // namespace ospray diff --git a/modules/cpu/fb/FrameOp.cpp b/modules/cpu/fb/FrameOp.cpp new file mode 100644 index 000000000..fdf2c721b --- /dev/null +++ b/modules/cpu/fb/FrameOp.cpp @@ -0,0 +1,15 @@ +// Copyright 2009 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "FrameOp.h" + +namespace ospray { + +LiveFrameOp::LiveFrameOp(api::ISPCDevice &device, FrameBufferView &_fbView) + : device(device), fbViewSh(device.getIspcrtContext()) +{ + // Copy data pointed by _fbView pointer to shared memory + std::memcpy(fbViewSh.data(), &_fbView, sizeof(FrameBufferView)); +} + +} // namespace ospray diff --git a/modules/cpu/fb/FrameOp.h b/modules/cpu/fb/FrameOp.h new file mode 100644 index 000000000..58b0383c5 --- /dev/null +++ b/modules/cpu/fb/FrameOp.h @@ -0,0 +1,39 @@ +// Copyright 2009 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "ISPCDevice.h" +#include "common/ISPCRTBuffers.h" +#include "fb/FrameBufferView.h" +#include "fb/ImageOp.h" + +namespace ospray { + +struct OSPRAY_SDK_INTERFACE FrameOp : public FrameOpInterface +{ + FrameOp(api::ISPCDevice &device) : device(device) {} + ~FrameOp() override = default; + + protected: + api::ISPCDevice &device; +}; + +struct OSPRAY_SDK_INTERFACE LiveFrameOp : public LiveFrameOpInterface +{ + LiveFrameOp(api::ISPCDevice &device, FrameBufferView &fbView); + ~LiveFrameOp() override = default; + + protected: + inline const FrameBufferView &getFBView() + { + return *fbViewSh.data(); + } + + api::ISPCDevice &device; + + private: + BufferShared fbViewSh; +}; + +} // namespace ospray diff --git a/modules/cpu/fb/ImageOp.h b/modules/cpu/fb/ImageOp.h deleted file mode 100644 index e1f31969a..000000000 --- a/modules/cpu/fb/ImageOp.h +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2009 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "FrameBufferView.h" -#include "common/ObjectFactory.h" -#include "common/StructShared.h" -#include "rkcommon/math/box.h" -// ispc shared -#include "ImageOpShared.h" -#include "fb/TileShared.h" - -namespace ospray { - -struct Camera; - -// An instance of an image op that is actually attached to a framebuffer -struct OSPRAY_SDK_INTERFACE LiveImageOp -{ - FrameBufferView fbView; - - LiveImageOp(FrameBufferView &fbView); - - virtual ~LiveImageOp() = default; - - virtual void beginFrame() {} - - virtual void endFrame() {} -}; - -/*! \brief base abstraction for a "Image Op" to be performed for - every image that gets written into a frame buffer. - - A ImageOp is basically a "hook" that allows to inject arbitrary - code, such as postprocessing, filtering, blending, tone mapping, - sending tiles to a display wall, etc. -*/ -struct OSPRAY_SDK_INTERFACE ImageOp : public ManagedObject, - public ObjectFactory -{ - ImageOp(); - ~ImageOp() override = default; - - std::string toString() const override; - - /*! Attach an image op to an existing framebuffer. Use this - * to pass the params from the API to the instance of the image op - * which will actually be run on the framebuffer view or tiles of the - * framebuffer passed - */ - virtual std::unique_ptr attach(FrameBufferView &fbView) = 0; -}; - -OSPTYPEFOR_SPECIALIZATION(ImageOp *, OSP_IMAGE_OPERATION); - -struct OSPRAY_SDK_INTERFACE PixelOp : public ImageOp -{ - ~PixelOp() override = default; -}; - -struct OSPRAY_SDK_INTERFACE FrameOp : public ImageOp -{ - ~FrameOp() override = default; - virtual vec2i haloSize() - { - return vec2i(0); - } -}; - -struct OSPRAY_SDK_INTERFACE LivePixelOp - : public AddStructShared -{ - LivePixelOp(FrameBufferView &fbView); - ~LivePixelOp() override = default; -}; - -struct OSPRAY_SDK_INTERFACE LiveFrameOp : public LiveImageOp -{ - LiveFrameOp(FrameBufferView &fbView); - ~LiveFrameOp() override = default; - - virtual void process(const Camera *camera) = 0; -}; - -} // namespace ospray diff --git a/modules/cpu/fb/LocalFB.cpp b/modules/cpu/fb/LocalFB.cpp index 640377257..58b990762 100644 --- a/modules/cpu/fb/LocalFB.cpp +++ b/modules/cpu/fb/LocalFB.cpp @@ -2,13 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 #include "LocalFB.h" -#include -#include -#include -#include -#include -#include "ImageOp.h" +#include "FrameOp.h" #include "SparseFB.h" +#include "fb/FrameBufferView.h" +#include "render/util.h" +#include "rkcommon/common.h" +#include "rkcommon/tasking/parallel_for.h" +#include "rkcommon/utility/ArrayView.h" + #ifndef OSPRAY_TARGET_SYCL #include "fb/LocalFB_ispc.h" #else @@ -29,9 +30,11 @@ void LocalFrameBuffer_writeIDTile(void *uniform _fb, const void *uniform src); } // namespace ispc #endif -#include "render/util.h" -#include "rkcommon/common.h" -#include "rkcommon/tasking/parallel_for.h" + +#include +#include +#include +#include namespace ospray { @@ -151,19 +154,17 @@ void LocalFrameBuffer::commit() { FrameBuffer::commit(); - imageOps.clear(); if (imageOpData) { FrameBufferView fbv(this, getColorBufferFormat(), - colorBuffer ? colorBuffer->data() : nullptr, - depthBuffer ? depthBuffer->data() : nullptr, - normalBuffer ? normalBuffer->data() : nullptr, - albedoBuffer ? albedoBuffer->data() : nullptr); + getNumPixels(), + colorBuffer ? colorBuffer->devicePtr() : nullptr, + depthBuffer ? depthBuffer->devicePtr() : nullptr, + normalBuffer ? normalBuffer->devicePtr() : nullptr, + albedoBuffer ? albedoBuffer->devicePtr() : nullptr); - for (auto &&obj : *imageOpData) - imageOps.push_back(obj->attach(fbv)); + prepareLiveOpsForFBV(fbv); } - prepareImageOps(); } vec2i LocalFrameBuffer::getNumRenderTasks() const @@ -327,32 +328,22 @@ float LocalFrameBuffer::taskError(const uint32_t taskID) const void LocalFrameBuffer::beginFrame() { FrameBuffer::beginFrame(); - - std::for_each(imageOps.begin(), - imageOps.end(), - [](std::unique_ptr &p) { p->beginFrame(); }); } void LocalFrameBuffer::endFrame( - const float errorThreshold, const Camera *camera) + const float errorThreshold, const Camera *) { - if (!imageOps.empty() && firstFrameOperation < imageOps.size()) { - std::for_each(imageOps.begin() + firstFrameOperation, - imageOps.end(), - [&](std::unique_ptr &iop) { - LiveFrameOp *fop = dynamic_cast(iop.get()); - if (fop) - fop->process(camera); - }); - } - - std::for_each(imageOps.begin(), - imageOps.end(), - [](std::unique_ptr &p) { p->endFrame(); }); - frameVariance = taskErrorRegion.refine(errorThreshold); } +AsyncEvent LocalFrameBuffer::postProcess(const Camera *camera, bool wait) +{ + AsyncEvent event; + for (auto &p : frameOps) + p->process((wait) ? nullptr : &event, camera); + return event; +} + const void *LocalFrameBuffer::mapBuffer(OSPFrameBufferChannel channel) { const void *buf = nullptr; diff --git a/modules/cpu/fb/LocalFB.h b/modules/cpu/fb/LocalFB.h index 0c3f3ed1a..06bd499ef 100644 --- a/modules/cpu/fb/LocalFB.h +++ b/modules/cpu/fb/LocalFB.h @@ -8,8 +8,8 @@ #include "fb/FrameBuffer.h" #include "fb/TaskError.h" // rkcommon -#include #include "rkcommon/containers/AlignedVector.h" +#include "rkcommon/utility/ArrayView.h" // ispc shared #include "LocalFBShared.h" #include "TileShared.h" @@ -55,6 +55,8 @@ struct OSPRAY_SDK_INTERFACE LocalFrameBuffer void endFrame(const float errorThreshold, const Camera *camera) override; + AsyncEvent postProcess(const Camera *camera, bool wait) override; + const void *mapBuffer(OSPFrameBufferChannel channel) override; void unmap(const void *mappedMem) override; diff --git a/modules/cpu/fb/PixelOp.cpp b/modules/cpu/fb/PixelOp.cpp new file mode 100644 index 000000000..35472c5ca --- /dev/null +++ b/modules/cpu/fb/PixelOp.cpp @@ -0,0 +1,14 @@ +// Copyright 2009 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "PixelOp.h" + +namespace ospray { + +// ImageOp definitions //////////////////////////////////////////////////////// + +LivePixelOp::LivePixelOp(api::ISPCDevice &device) + : AddStructShared(device.getIspcrtContext()) +{} + +} // namespace ospray diff --git a/modules/cpu/fb/PixelOp.h b/modules/cpu/fb/PixelOp.h new file mode 100644 index 000000000..080a395d9 --- /dev/null +++ b/modules/cpu/fb/PixelOp.h @@ -0,0 +1,34 @@ +// Copyright 2009 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "ISPCDevice.h" +#include "common/StructShared.h" +#include "fb/ImageOp.h" +// ispc shared +#include "PixelOpShared.h" + +namespace ospray { + +struct FrameBufferView; + +struct OSPRAY_SDK_INTERFACE LivePixelOp + : public AddStructShared +{ + LivePixelOp(api::ISPCDevice &device); + ~LivePixelOp() override = default; +}; + +struct OSPRAY_SDK_INTERFACE PixelOp : public ImageOp +{ + PixelOp(api::ISPCDevice &device) : device(device) {} + ~PixelOp() override = default; + + virtual std::unique_ptr attach() = 0; + + protected: + api::ISPCDevice &device; +}; + +} // namespace ospray diff --git a/modules/cpu/fb/ImageOpShared.h b/modules/cpu/fb/PixelOpShared.h similarity index 100% rename from modules/cpu/fb/ImageOpShared.h rename to modules/cpu/fb/PixelOpShared.h diff --git a/modules/cpu/fb/SparseFB.cpp b/modules/cpu/fb/SparseFB.cpp index be6c96097..92bea09ad 100644 --- a/modules/cpu/fb/SparseFB.cpp +++ b/modules/cpu/fb/SparseFB.cpp @@ -2,9 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 #include "SparseFB.h" -#include -#include -#include "ImageOp.h" +#include "PixelOp.h" +#include "fb/FrameBufferView.h" #include "render/util.h" #include "rkcommon/common.h" #include "rkcommon/tasking/parallel_for.h" @@ -13,6 +12,9 @@ #include "fb/SparseFB_ispc.h" #endif +#include +#include + namespace ospray { SparseFrameBuffer::SparseFrameBuffer(api::ISPCDevice &device, @@ -64,21 +66,20 @@ void SparseFrameBuffer::commit() { FrameBuffer::commit(); - imageOps.clear(); if (imageOpData) { - FrameBufferView fbv( - this, getColorBufferFormat(), nullptr, nullptr, nullptr, nullptr); + FrameBufferView fbv(this, + getColorBufferFormat(), + getNumPixels(), + nullptr, + nullptr, + nullptr, + nullptr); // Sparse framebuffer cannot execute frame operations because it doesn't // have the full framebuffer. This is handled by the parent object managing // the set of sparse framebuffer's, so here we just ignore them - for (auto &&obj : *imageOpData) { - if (dynamic_cast(obj)) { - imageOps.push_back(obj->attach(fbv)); - } - } + prepareLiveOpsForFBV(fbv, false, true); } - prepareImageOps(); } vec2i SparseFrameBuffer::getNumRenderTasks() const @@ -168,17 +169,6 @@ void SparseFrameBuffer::beginFrame() tile.accumID = getFrameID(); } } - - std::for_each(imageOps.begin(), - imageOps.end(), - [](std::unique_ptr &p) { p->beginFrame(); }); -} - -void SparseFrameBuffer::endFrame(const float, const Camera *) -{ - std::for_each(imageOps.begin(), - imageOps.end(), - [](std::unique_ptr &p) { p->endFrame(); }); } const void *SparseFrameBuffer::mapBuffer(OSPFrameBufferChannel) diff --git a/modules/cpu/fb/SparseFB.h b/modules/cpu/fb/SparseFB.h index 9b4a29eef..7bc800989 100644 --- a/modules/cpu/fb/SparseFB.h +++ b/modules/cpu/fb/SparseFB.h @@ -78,7 +78,14 @@ struct OSPRAY_SDK_INTERFACE SparseFrameBuffer void beginFrame() override; - void endFrame(const float errorThreshold, const Camera *camera) override; + void endFrame(const float, const Camera *) override {} + + AsyncEvent postProcess(const Camera *, bool) override + { + AsyncEvent e; + // Do not run post-processing on sparse frame buffer + return e; + } // Mapping sparse framebuffers is not supported, will return null const void *mapBuffer(OSPFrameBufferChannel channel) override; diff --git a/modules/cpu/fb/frame_ops/Blur.cpp b/modules/cpu/fb/frame_ops/Blur.cpp index 5007af616..e08afc2c5 100644 --- a/modules/cpu/fb/frame_ops/Blur.cpp +++ b/modules/cpu/fb/frame_ops/Blur.cpp @@ -5,7 +5,8 @@ namespace ospray { -std::unique_ptr BlurFrameOp::attach(FrameBufferView &fbView) +std::unique_ptr BlurFrameOp::attach( + FrameBufferView &fbView) { if (!fbView.colorBuffer) { throw std::runtime_error( @@ -15,10 +16,10 @@ std::unique_ptr BlurFrameOp::attach(FrameBufferView &fbView) if (fbView.colorBufferFormat == OSP_FB_RGBA8 || fbView.colorBufferFormat == OSP_FB_SRGBA) { - return rkcommon::make_unique>(fbView); + return rkcommon::make_unique>(device, fbView); } - return rkcommon::make_unique>(fbView); + return rkcommon::make_unique>(device, fbView); } std::string BlurFrameOp::toString() const diff --git a/modules/cpu/fb/frame_ops/Blur.h b/modules/cpu/fb/frame_ops/Blur.h index b2b6507a9..aeb4dd205 100644 --- a/modules/cpu/fb/frame_ops/Blur.h +++ b/modules/cpu/fb/frame_ops/Blur.h @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // ospray -#include "../ImageOp.h" +#include "fb/FrameOp.h" #include "rkcommon/tasking/parallel_for.h" // std #include @@ -12,7 +12,12 @@ namespace ospray { // The blur frame op is a test which applies a Gaussian blur to the frame struct OSPRAY_SDK_INTERFACE BlurFrameOp : public FrameOp { - std::unique_ptr attach(FrameBufferView &fbView) override; + BlurFrameOp(api::Device &device) + : FrameOp(static_cast(device)) + {} + + std::unique_ptr attach( + FrameBufferView &fbView) override; std::string toString() const override; }; @@ -20,17 +25,20 @@ struct OSPRAY_SDK_INTERFACE BlurFrameOp : public FrameOp template struct OSPRAY_SDK_INTERFACE LiveBlurFrameOp : public LiveFrameOp { - LiveBlurFrameOp(FrameBufferView &_fbView) : LiveFrameOp(_fbView) {} + LiveBlurFrameOp(api::ISPCDevice &device, FrameBufferView &_fbView) + : LiveFrameOp(device, _fbView) + {} - void process(const Camera *) override; + void process(void *, const Camera *) override; }; // Inlined definitions //////////////////////////////////////////////////////// template -inline void LiveBlurFrameOp::process(const Camera *) +inline void LiveBlurFrameOp::process(void *, const Camera *) { // TODO: For SRGBA we actually need to convert to linear before filtering + const FrameBufferView &fbView = getFBView(); T *color = static_cast(fbView.colorBuffer); const int blurRadius = 4; // variance = std-dev^2 diff --git a/modules/cpu/fb/frame_ops/Debug.cpp b/modules/cpu/fb/frame_ops/Debug.cpp index d312f1925..ac7bbe691 100644 --- a/modules/cpu/fb/frame_ops/Debug.cpp +++ b/modules/cpu/fb/frame_ops/Debug.cpp @@ -2,13 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 #include "Debug.h" +#include "fb/FrameBuffer.h" #include "rkcommon/tasking/parallel_for.h" +// ispc shared +#include "fb/LocalFBShared.h" // std #include namespace ospray { -std::unique_ptr DebugFrameOp::attach(FrameBufferView &fbView) +std::unique_ptr DebugFrameOp::attach( + FrameBufferView &fbView) { if (!fbView.colorBuffer) { throw std::runtime_error( @@ -16,7 +20,7 @@ std::unique_ptr DebugFrameOp::attach(FrameBufferView &fbView) "data"); } - return rkcommon::make_unique(fbView); + return rkcommon::make_unique(device, fbView); } std::string DebugFrameOp::toString() const @@ -24,13 +28,43 @@ std::string DebugFrameOp::toString() const return "ospray::DebugFrameOp"; } -LiveDebugFrameOp::LiveDebugFrameOp(FrameBufferView &_fbView) - : LiveFrameOp(_fbView) +LiveDebugFrameOp::LiveDebugFrameOp( + api::ISPCDevice &device, FrameBufferView &_fbView) + : LiveFrameOp(device, _fbView) {} -void LiveDebugFrameOp::process(const Camera *) +void LiveDebugFrameOp::process(void *waitEvent, const Camera *) { +#ifdef OSPRAY_TARGET_SYCL + const FrameBufferView &fbView = getFBView(); + const size_t numTasks = fbView.viewDims.x * fbView.viewDims.y; + const sycl::nd_range<1> dispatchRange = + device.computeDispatchRange(numTasks, 16); + sycl::event event = device.getSyclQueue().submit([&](sycl::handler &cgh) { + cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex) { + uint32_t i = taskIndex.get_global_id(0); + if (i >= numTasks) + return; + if (fbView.colorBufferFormat == OSP_FB_RGBA8 + || fbView.colorBufferFormat == OSP_FB_SRGBA) { + uint8_t *pixel = static_cast(fbView.colorBuffer) + i * 4; + pixel[0] = 255; + } else { + float *pixel = static_cast(fbView.colorBuffer) + i * 4; + pixel[0] = 1.f; + } + }); + }); + + if (!waitEvent) + event.wait_and_throw(); + else + *(sycl::event *)waitEvent = event; +#else + (void)waitEvent; + // DebugFrameOp just colors the whole frame with red + const FrameBufferView &fbView = getFBView(); tasking::parallel_for(fbView.viewDims.x * fbView.viewDims.y, [&](int i) { if (fbView.colorBufferFormat == OSP_FB_RGBA8 || fbView.colorBufferFormat == OSP_FB_SRGBA) { @@ -41,6 +75,7 @@ void LiveDebugFrameOp::process(const Camera *) pixel[0] = 1.f; } }); +#endif } } // namespace ospray diff --git a/modules/cpu/fb/frame_ops/Debug.h b/modules/cpu/fb/frame_ops/Debug.h index edaef789c..2f4ffa699 100644 --- a/modules/cpu/fb/frame_ops/Debug.h +++ b/modules/cpu/fb/frame_ops/Debug.h @@ -2,21 +2,26 @@ // SPDX-License-Identifier: Apache-2.0 // ospray -#include "../ImageOp.h" +#include "fb/FrameOp.h" namespace ospray { struct OSPRAY_SDK_INTERFACE DebugFrameOp : public FrameOp { - std::unique_ptr attach(FrameBufferView &fbView) override; + DebugFrameOp(api::Device &device) + : FrameOp(static_cast(device)) + {} + + std::unique_ptr attach( + FrameBufferView &fbView) override; std::string toString() const override; }; struct OSPRAY_SDK_INTERFACE LiveDebugFrameOp : public LiveFrameOp { - LiveDebugFrameOp(FrameBufferView &fbView); - void process(const Camera *) override; + LiveDebugFrameOp(api::ISPCDevice &device, FrameBufferView &fbView); + void process(void *waitEvent, const Camera *) override; }; } // namespace ospray diff --git a/modules/cpu/fb/frame_ops/Depth.cpp b/modules/cpu/fb/frame_ops/Depth.cpp index 59b3625f0..d878592a6 100644 --- a/modules/cpu/fb/frame_ops/Depth.cpp +++ b/modules/cpu/fb/frame_ops/Depth.cpp @@ -8,7 +8,8 @@ namespace ospray { -std::unique_ptr DepthFrameOp::attach(FrameBufferView &fbView) +std::unique_ptr DepthFrameOp::attach( + FrameBufferView &fbView) { if (!fbView.colorBuffer) { throw std::runtime_error( @@ -22,7 +23,7 @@ std::unique_ptr DepthFrameOp::attach(FrameBufferView &fbView) "data"); } - return rkcommon::make_unique(fbView); + return rkcommon::make_unique(device, fbView); } std::string DepthFrameOp::toString() const @@ -30,15 +31,17 @@ std::string DepthFrameOp::toString() const return "ospray::DepthFrameOp"; } -LiveDepthFrameOp::LiveDepthFrameOp(FrameBufferView &_fbView) - : LiveFrameOp(_fbView) +LiveDepthFrameOp::LiveDepthFrameOp( + api::ISPCDevice &device, FrameBufferView &_fbView) + : LiveFrameOp(device, _fbView) {} -void LiveDepthFrameOp::process(const Camera *) +void LiveDepthFrameOp::process(void *, const Camera *) { // First find the min/max depth range to normalize the image, // we don't use minmax_element here b/c we don't want inf to be // found as the max depth value + const FrameBufferView &fbView = getFBView(); const int numPixels = fbView.fbDims.x * fbView.fbDims.y; vec2f depthRange(std::numeric_limits::infinity(), -std::numeric_limits::infinity()); diff --git a/modules/cpu/fb/frame_ops/Depth.h b/modules/cpu/fb/frame_ops/Depth.h index 2699a218a..654e39342 100644 --- a/modules/cpu/fb/frame_ops/Depth.h +++ b/modules/cpu/fb/frame_ops/Depth.h @@ -1,23 +1,28 @@ // Copyright 2020 Intel Corporation // SPDX-License-Identifier: Apache-2.0 -#include "../ImageOp.h" +#include "fb/FrameOp.h" namespace ospray { //! Depth frameop replaces the color data with a normalized depth buffer img struct OSPRAY_SDK_INTERFACE DepthFrameOp : public FrameOp { - std::unique_ptr attach(FrameBufferView &fbView) override; + DepthFrameOp(api::Device &device) + : FrameOp(static_cast(device)) + {} + + std::unique_ptr attach( + FrameBufferView &fbView) override; std::string toString() const override; }; struct OSPRAY_SDK_INTERFACE LiveDepthFrameOp : public LiveFrameOp { - LiveDepthFrameOp(FrameBufferView &fbView); + LiveDepthFrameOp(api::ISPCDevice &device, FrameBufferView &fbView); - void process(const Camera *) override; + void process(void *, const Camera *) override; }; } // namespace ospray diff --git a/modules/cpu/fb/frame_ops/SSAO.cpp b/modules/cpu/fb/frame_ops/SSAO.cpp index 0e0bd78a9..1b2af5cc4 100644 --- a/modules/cpu/fb/frame_ops/SSAO.cpp +++ b/modules/cpu/fb/frame_ops/SSAO.cpp @@ -5,7 +5,8 @@ namespace ospray { -std::unique_ptr SSAOFrameOp::attach(FrameBufferView &fbView) +std::unique_ptr SSAOFrameOp::attach( + FrameBufferView &fbView) { if (!fbView.colorBuffer) { throw std::runtime_error( @@ -27,8 +28,14 @@ std::unique_ptr SSAOFrameOp::attach(FrameBufferView &fbView) void *ispcEquiv = ispc::LiveSSAOFrameOp_create(); - return rkcommon::make_unique( - fbView, ispcEquiv, ssaoStrength, radius, checkRadius, kernel, randomVecs); + return rkcommon::make_unique(device, + fbView, + ispcEquiv, + ssaoStrength, + radius, + checkRadius, + kernel, + randomVecs); } std::string SSAOFrameOp::toString() const @@ -62,14 +69,15 @@ void SSAOFrameOp::commit() } } -LiveSSAOFrameOp::LiveSSAOFrameOp(FrameBufferView &_fbView, +LiveSSAOFrameOp::LiveSSAOFrameOp(api::ISPCDevice &device, + FrameBufferView &_fbView, void *ispcEquiv, float ssaoStrength, float radius, float checkRadius, std::vector kernel, std::vector randomVecs) - : LiveFrameOp(_fbView), + : LiveFrameOp(device, _fbView), ispcEquiv(ispcEquiv), ssaoStrength(ssaoStrength), radius(radius), @@ -78,8 +86,9 @@ LiveSSAOFrameOp::LiveSSAOFrameOp(FrameBufferView &_fbView, randomVecs(randomVecs) {} -void LiveSSAOFrameOp::process(const Camera *camera) +void LiveSSAOFrameOp::process(void *, const Camera *camera) { + const FrameBufferView &fbView = getFBView(); if (fbView.colorBufferFormat == OSP_FB_RGBA8 || fbView.colorBufferFormat == OSP_FB_SRGBA) { // TODO: For SRGBA we actually need to convert to linear before filtering diff --git a/modules/cpu/fb/frame_ops/SSAO.h b/modules/cpu/fb/frame_ops/SSAO.h index f30d39404..708e4bc08 100644 --- a/modules/cpu/fb/frame_ops/SSAO.h +++ b/modules/cpu/fb/frame_ops/SSAO.h @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // ospray -#include "../ImageOp.h" #include "camera/PerspectiveCamera.h" +#include "fb/FrameOp.h" #include "rkcommon/tasking/parallel_for.h" // std #include @@ -20,14 +20,14 @@ void LiveSSAOFrameOp_set(void *uniform _self, void *_kernel, void *_randomVecs); void LiveSSAOFrameOp_getOcclusion(const void *_self, - void *_fb, + const void *_fb, float *occlusionBuffer, const float radius, const float checkRadius, unsigned int kernelSize, int programID); void LiveSSAOFrameOp_applyOcclusion( - void *_self, void *_fb, void *_color, float *occlusionBuffer); + void *_self, const void *_fb, void *_color, float *occlusionBuffer); int8_t getProgramCount(); } // namespace ispc #else @@ -48,7 +48,11 @@ struct OSPRAY_SDK_INTERFACE SSAOFrameOp : public FrameOp std::vector kernel; std::vector randomVecs; - std::unique_ptr attach(FrameBufferView &fbView) override; + SSAOFrameOp(api::Device &device) + : FrameOp(static_cast(device)) + {} + std::unique_ptr attach( + FrameBufferView &fbView) override; void commit() override; std::string toString() const override; }; @@ -62,23 +66,24 @@ struct OSPRAY_SDK_INTERFACE LiveSSAOFrameOp : public LiveFrameOp std::vector randomVecs; template - void applySSAO(FrameBufferView &fb, T *color, const Camera *); + void applySSAO(const FrameBufferView &fb, T *color, const Camera *); - LiveSSAOFrameOp(FrameBufferView &fbView, + LiveSSAOFrameOp(api::ISPCDevice &device, + FrameBufferView &fbView, void *, float, float, float, std::vector, std::vector); - void process(const Camera *) override; + void process(void *, const Camera *) override; }; // Inlined definitions //////////////////////////////////////////////////////// template inline void LiveSSAOFrameOp::applySSAO( - FrameBufferView &fb, T *color, const Camera *cam) + const FrameBufferView &fb, T *color, const Camera *cam) { if (cam->toString().compare("ospray::PerspectiveCamera")) throw std::runtime_error( diff --git a/modules/cpu/fb/frame_ops/SSAO.ispc b/modules/cpu/fb/frame_ops/SSAO.ispc index 929c6e6ff..ef0c1c3a7 100644 --- a/modules/cpu/fb/frame_ops/SSAO.ispc +++ b/modules/cpu/fb/frame_ops/SSAO.ispc @@ -58,7 +58,7 @@ export void LiveSSAOFrameOp_set(void *uniform _self, } export void LiveSSAOFrameOp_getOcclusion(const void *uniform _self, - void *uniform _fb, + const void *uniform _fb, float *uniform occlusionBuffer, const uniform float radius, const uniform float checkRadius, @@ -129,7 +129,7 @@ export void LiveSSAOFrameOp_getOcclusion(const void *uniform _self, } export void LiveSSAOFrameOp_applyOcclusion(void *uniform, - void *uniform _fb, + const void *uniform _fb, void *uniform _color, float *uniform occlusionBuffer) { diff --git a/modules/cpu/fb/pixel_ops/ToneMapper.cpp b/modules/cpu/fb/pixel_ops/ToneMapper.cpp index 8ec43b367..e1a2d7f14 100644 --- a/modules/cpu/fb/pixel_ops/ToneMapper.cpp +++ b/modules/cpu/fb/pixel_ops/ToneMapper.cpp @@ -54,10 +54,10 @@ void ToneMapper::commit() 0.f); } -std::unique_ptr ToneMapper::attach(FrameBufferView &fbView) +std::unique_ptr ToneMapper::attach() { return rkcommon::make_unique( - fbView, exposure, a, b, c, d, acesColor); + device, exposure, a, b, c, d, acesColor); } std::string ToneMapper::toString() const @@ -65,15 +65,14 @@ std::string ToneMapper::toString() const return "ospray::ToneMapper"; } -LiveToneMapper::LiveToneMapper(FrameBufferView &_fbView, +LiveToneMapper::LiveToneMapper(api::ISPCDevice &device, float exposure, float a, float b, float c, float d, bool acesColor) - : AddStructShared( - _fbView.originalFB->getISPCDevice().getIspcrtContext(), _fbView) + : AddStructShared(device.getIspcrtContext(), device) { getSh()->super.processPixel = reinterpret_cast( diff --git a/modules/cpu/fb/pixel_ops/ToneMapper.h b/modules/cpu/fb/pixel_ops/ToneMapper.h index 7c7741838..e46edf553 100644 --- a/modules/cpu/fb/pixel_ops/ToneMapper.h +++ b/modules/cpu/fb/pixel_ops/ToneMapper.h @@ -1,7 +1,8 @@ // Copyright 2009 Intel Corporation // SPDX-License-Identifier: Apache-2.0 -#include "../ImageOp.h" +#include "fb/PixelOp.h" +// ispc shared #include "ToneMapperShared.h" using namespace rkcommon; @@ -11,9 +12,13 @@ namespace ospray { /*! \brief Generic tone mapping operator approximating ACES by default. */ struct OSPRAY_SDK_INTERFACE ToneMapper : public PixelOp { + ToneMapper(api::Device &device) + : PixelOp(static_cast(device)) + {} + void commit() override; - std::unique_ptr attach(FrameBufferView &fbView) override; + std::unique_ptr attach() override; std::string toString() const override; @@ -26,7 +31,7 @@ struct OSPRAY_SDK_INTERFACE ToneMapper : public PixelOp struct OSPRAY_SDK_INTERFACE LiveToneMapper : public AddStructShared { - LiveToneMapper(FrameBufferView &fbView, + LiveToneMapper(api::ISPCDevice &device, float exposure, float a, float b, diff --git a/modules/cpu/fb/pixel_ops/ToneMapperShared.h b/modules/cpu/fb/pixel_ops/ToneMapperShared.h index de138abdc..ef3849b51 100644 --- a/modules/cpu/fb/pixel_ops/ToneMapperShared.h +++ b/modules/cpu/fb/pixel_ops/ToneMapperShared.h @@ -3,7 +3,7 @@ #pragma once -#include "../ImageOpShared.h" +#include "fb/PixelOpShared.h" #ifdef __cplusplus namespace ispc { diff --git a/modules/cpu/fb/registration.cpp b/modules/cpu/fb/registration.cpp index 02dcf08f4..2dab456fc 100644 --- a/modules/cpu/fb/registration.cpp +++ b/modules/cpu/fb/registration.cpp @@ -1,6 +1,12 @@ // Copyright 2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +// We don't want an instance of ObjectFactory static table in this library +// so we have to include it with import define so the table will be imported +// from 'ospray' library +#define OBJECTFACTORY_IMPORT +#include "common/ObjectFactory.h" + #include "frame_ops/Blur.h" #include "frame_ops/Debug.h" #include "frame_ops/Depth.h" diff --git a/modules/cpu/render/LoadBalancer.cpp b/modules/cpu/render/LoadBalancer.cpp index 0e286b41c..a20571a9b 100644 --- a/modules/cpu/render/LoadBalancer.cpp +++ b/modules/cpu/render/LoadBalancer.cpp @@ -17,7 +17,8 @@ namespace ospray { -Renderer::Event LocalTiledLoadBalancer::renderFrame(FrameBuffer *fb, +std::pair LocalTiledLoadBalancer::renderFrame( + FrameBuffer *fb, Renderer *renderer, Camera *camera, World *world, @@ -26,24 +27,29 @@ Renderer::Event LocalTiledLoadBalancer::renderFrame(FrameBuffer *fb, fb->beginFrame(); void *perFrameData = renderer->beginFrame(fb, world); - Renderer::Event event = renderer->renderTasks(fb, + AsyncEvent rendererEvent = renderer->renderTasks(fb, camera, world, perFrameData, fb->getRenderTaskIDs(renderer->errorThreshold), wait); - // No renderer->endFrame() and fb->endFrame() on GPU. - // Frame post-processing need to be done as a separate - // kernel submitted to the main compute queue. + // Can't call renderer->endFrame() because we might still render if (wait) { renderer->endFrame(fb, perFrameData); fb->setCompletedEvent(OSP_WORLD_RENDERED); + } + + // But we can queue FB post-processing kernel + AsyncEvent fbEvent = fb->postProcess(camera, wait); + // Can't call fb->endFrame() because we might still post-process + if (wait) { fb->endFrame(renderer->errorThreshold, camera); fb->setCompletedEvent(OSP_FRAME_FINISHED); } - return event; + + return std::make_pair(rendererEvent, fbEvent); } std::string LocalTiledLoadBalancer::toString() const diff --git a/modules/cpu/render/LoadBalancer.h b/modules/cpu/render/LoadBalancer.h index 84bccac35..6c6edfacf 100644 --- a/modules/cpu/render/LoadBalancer.h +++ b/modules/cpu/render/LoadBalancer.h @@ -21,7 +21,7 @@ struct OSPRAY_SDK_INTERFACE TiledLoadBalancer /*! Render the entire framebuffer using the given renderer, camera and * world configuration using the load balancer to parallelize the work */ - virtual Renderer::Event renderFrame(FrameBuffer *fb, + virtual std::pair renderFrame(FrameBuffer *fb, Renderer *renderer, Camera *camera, World *world, @@ -37,7 +37,7 @@ struct OSPRAY_SDK_INTERFACE TiledLoadBalancer application ranks each doing local rendering on their own) */ struct OSPRAY_SDK_INTERFACE LocalTiledLoadBalancer : public TiledLoadBalancer { - Renderer::Event renderFrame(FrameBuffer *fb, + std::pair renderFrame(FrameBuffer *fb, Renderer *renderer, Camera *camera, World *world, diff --git a/modules/cpu/render/RenderTaskSycl.h b/modules/cpu/render/RenderTaskSycl.h index c48e64909..a0ff3479d 100644 --- a/modules/cpu/render/RenderTaskSycl.h +++ b/modules/cpu/render/RenderTaskSycl.h @@ -4,13 +4,13 @@ #pragma once // ospray -#include "../common/Future.h" +#include "common/Future.h" namespace ospray { struct RenderTask : public Future { - RenderTask(sycl::event); + RenderTask(sycl::event rendererEvent, sycl::event frameBufferEvent); ~RenderTask() override; bool isFinished(OSPSyncEvent event = OSP_TASK_FINISHED) override; @@ -20,12 +20,16 @@ struct RenderTask : public Future float getTaskDuration() override; private: - sycl::event syclEvent; + sycl::event rendererEvent; + sycl::event frameBufferEvent; }; // Inlined definitions ////////////////////////////////////////////////////// -inline RenderTask::RenderTask(sycl::event syclEvent) : syclEvent(syclEvent) {} +inline RenderTask::RenderTask( + sycl::event rendererEvent, sycl::event frameBufferEvent) + : rendererEvent(rendererEvent), frameBufferEvent(frameBufferEvent) +{} inline RenderTask::~RenderTask() { @@ -35,9 +39,17 @@ inline RenderTask::~RenderTask() inline bool RenderTask::isFinished(OSPSyncEvent event) { - (void)event; - syclEvent.wait_and_throw(); - return true; + switch (event) { + case OSP_TASK_FINISHED: + case OSP_FRAME_FINISHED: + frameBufferEvent.wait_and_throw(); + [[fallthrough]]; + case OSP_WORLD_RENDERED: + rendererEvent.wait_and_throw(); + [[fallthrough]]; + default: + return true; + } // The proper way of checking is commented out because it degrades // performance by a factor of 3. Analysis shows that sharing GPU @@ -49,8 +61,17 @@ inline bool RenderTask::isFinished(OSPSyncEvent event) inline void RenderTask::wait(OSPSyncEvent event) { - (void)event; - syclEvent.wait_and_throw(); + switch (event) { + case OSP_TASK_FINISHED: + case OSP_FRAME_FINISHED: + frameBufferEvent.wait_and_throw(); + [[fallthrough]]; + case OSP_WORLD_RENDERED: + rendererEvent.wait_and_throw(); + [[fallthrough]]; + default: + return; + } } inline void RenderTask::cancel() @@ -67,11 +88,18 @@ inline float RenderTask::getProgress() inline float RenderTask::getTaskDuration() { const auto t0 = - syclEvent + rendererEvent .get_profiling_info(); const auto t1 = - syclEvent.get_profiling_info(); - return (t1 - t0) * 1E-9; + rendererEvent + .get_profiling_info(); + const auto t2 = + frameBufferEvent + .get_profiling_info(); + const auto t3 = + frameBufferEvent + .get_profiling_info(); + return ((t1 - t0) + (t3 - t2)) * 1E-9; } } // namespace ospray diff --git a/modules/cpu/render/Renderer.cpp b/modules/cpu/render/Renderer.cpp index 58830c4ed..a9ae95d88 100644 --- a/modules/cpu/render/Renderer.cpp +++ b/modules/cpu/render/Renderer.cpp @@ -96,18 +96,6 @@ void Renderer::commit() ispc::precomputeZOrder(); } -#ifdef OSPRAY_TARGET_SYCL -sycl::nd_range<1> Renderer::computeDispatchRange( - const size_t globalSize, const size_t workgroupSize) const -{ - // roundedRange global size must be at least workgroupSize - const size_t roundedRange = - std::max(size_t(1), (globalSize + workgroupSize - 1) / workgroupSize) - * workgroupSize; - return sycl::nd_range<1>(roundedRange, workgroupSize); -} -#endif - OSPPickResult Renderer::pick( FrameBuffer *fb, Camera *camera, World *world, const vec2f &screenPos) { diff --git a/modules/cpu/render/Renderer.h b/modules/cpu/render/Renderer.h index 4866fbdad..3eb174c06 100644 --- a/modules/cpu/render/Renderer.h +++ b/modules/cpu/render/Renderer.h @@ -29,14 +29,6 @@ struct OSPRAY_SDK_INTERFACE Renderer : public AddStructShared, public ObjectFactory { -#ifdef OSPRAY_TARGET_SYCL - using Event = sycl::event; -#else - struct Event - { - }; -#endif - Renderer(api::ISPCDevice &device); virtual ~Renderer() override = default; @@ -60,24 +52,13 @@ struct OSPRAY_SDK_INTERFACE Renderer virtual void endFrame(FrameBuffer *fb, void *perFrameData); // called by the load balancer to render one "sample" for each task - virtual Event renderTasks(FrameBuffer *, + virtual AsyncEvent renderTasks(FrameBuffer *, Camera *, World *, void *, const utility::ArrayView &, bool wait = true) const = 0; -#ifdef OSPRAY_TARGET_SYCL - /* Compute the rounded dispatch global size for the given work group size. - * SYCL requires that globalSize % workgroupSize == 0, ths function will - * round up globalSize and return nd_range(roundedSize, workgroupSize). - * The kernel being launched must discard tasks that are out of bounds - * bounds due to this rounding - */ - sycl::nd_range<1> computeDispatchRange( - const size_t globalSize, const size_t workgroupSize) const; -#endif - virtual OSPPickResult pick( FrameBuffer *fb, Camera *camera, World *world, const vec2f &screenPos); diff --git a/modules/cpu/render/ao/AORenderer.cpp b/modules/cpu/render/ao/AORenderer.cpp index e36f7d5f8..2684121ca 100644 --- a/modules/cpu/render/ao/AORenderer.cpp +++ b/modules/cpu/render/ao/AORenderer.cpp @@ -36,14 +36,14 @@ void AORenderer::commit() getSh()->volumeSamplingRate = getParam("volumeSamplingRate", 1.f); } -Renderer::Event AORenderer::renderTasks(FrameBuffer *fb, +AsyncEvent AORenderer::renderTasks(FrameBuffer *fb, Camera *camera, World *world, void *, const utility::ArrayView &taskIDs, bool wait) const { - Event event; + AsyncEvent event; auto *rendererSh = getSh(); auto *fbSh = fb->getSh(); auto *cameraSh = camera->getSh(); @@ -59,7 +59,8 @@ Renderer::Event AORenderer::renderTasks(FrameBuffer *fb, ff.other |= camera->getFeatureFlagsOther(); cgh.set_specialization_constant(ff); - const sycl::nd_range<1> dispatchRange = computeDispatchRange(numTasks, 16); + const sycl::nd_range<1> dispatchRange = + device.computeDispatchRange(numTasks, 16); cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { if (taskIndex.get_global_id(0) < numTasks) { diff --git a/modules/cpu/render/ao/AORenderer.h b/modules/cpu/render/ao/AORenderer.h index e35e9fb13..483ba534c 100644 --- a/modules/cpu/render/ao/AORenderer.h +++ b/modules/cpu/render/ao/AORenderer.h @@ -14,7 +14,7 @@ struct AORenderer : public AddStructShared std::string toString() const override; void commit() override; - virtual Event renderTasks(FrameBuffer *fb, + virtual AsyncEvent renderTasks(FrameBuffer *fb, Camera *camera, World *world, void *perFrameData, diff --git a/modules/cpu/render/debug/DebugRenderer.cpp b/modules/cpu/render/debug/DebugRenderer.cpp index e54c21259..a0dd7cd19 100644 --- a/modules/cpu/render/debug/DebugRenderer.cpp +++ b/modules/cpu/render/debug/DebugRenderer.cpp @@ -73,14 +73,14 @@ void DebugRenderer::commit() getSh()->type = typeFromString(method); } -Renderer::Event DebugRenderer::renderTasks(FrameBuffer *fb, +AsyncEvent DebugRenderer::renderTasks(FrameBuffer *fb, Camera *camera, World *world, void *, const utility::ArrayView &taskIDs, bool wait) const { - Event event; + AsyncEvent event; auto *rendererSh = getSh(); auto *fbSh = fb->getSh(); auto *cameraSh = camera->getSh(); @@ -96,7 +96,8 @@ Renderer::Event DebugRenderer::renderTasks(FrameBuffer *fb, ff.other |= camera->getFeatureFlagsOther(); cgh.set_specialization_constant(ff); - const sycl::nd_range<1> dispatchRange = computeDispatchRange(numTasks, 16); + const sycl::nd_range<1> dispatchRange = + device.computeDispatchRange(numTasks, 16); cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { if (taskIndex.get_global_id(0) < numTasks) { diff --git a/modules/cpu/render/debug/DebugRenderer.h b/modules/cpu/render/debug/DebugRenderer.h index d0df542ef..401537e50 100644 --- a/modules/cpu/render/debug/DebugRenderer.h +++ b/modules/cpu/render/debug/DebugRenderer.h @@ -20,7 +20,7 @@ struct DebugRenderer : public AddStructShared void commit() override; - virtual Event renderTasks(FrameBuffer *fb, + virtual AsyncEvent renderTasks(FrameBuffer *fb, Camera *camera, World *world, void *perFrameData, diff --git a/modules/cpu/render/pathtracer/PathTracer.cpp b/modules/cpu/render/pathtracer/PathTracer.cpp index fce00f58f..d4afd41ac 100644 --- a/modules/cpu/render/pathtracer/PathTracer.cpp +++ b/modules/cpu/render/pathtracer/PathTracer.cpp @@ -87,14 +87,14 @@ void *PathTracer::beginFrame(FrameBuffer *, World *world) return nullptr; } -Renderer::Event PathTracer::renderTasks(FrameBuffer *fb, +AsyncEvent PathTracer::renderTasks(FrameBuffer *fb, Camera *camera, World *world, void *, const utility::ArrayView &taskIDs, bool wait) const { - Event event; + AsyncEvent event; auto *rendererSh = getSh(); auto *fbSh = fb->getSh(); auto *cameraSh = camera->getSh(); @@ -110,7 +110,8 @@ Renderer::Event PathTracer::renderTasks(FrameBuffer *fb, ff.other |= camera->getFeatureFlagsOther(); cgh.set_specialization_constant(ff); - const sycl::nd_range<1> dispatchRange = computeDispatchRange(numTasks, 16); + const sycl::nd_range<1> dispatchRange = + device.computeDispatchRange(numTasks, 16); cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { if (taskIndex.get_global_id(0) < numTasks) { diff --git a/modules/cpu/render/pathtracer/PathTracer.h b/modules/cpu/render/pathtracer/PathTracer.h index a226abeb6..6c2ec7575 100644 --- a/modules/cpu/render/pathtracer/PathTracer.h +++ b/modules/cpu/render/pathtracer/PathTracer.h @@ -20,7 +20,7 @@ struct PathTracer : public AddStructShared virtual void commit() override; virtual void *beginFrame(FrameBuffer *, World *) override; - virtual Event renderTasks(FrameBuffer *fb, + virtual AsyncEvent renderTasks(FrameBuffer *fb, Camera *camera, World *world, void *perFrameData, diff --git a/modules/cpu/render/scivis/SciVis.cpp b/modules/cpu/render/scivis/SciVis.cpp index 5c11189f9..09b03b622 100644 --- a/modules/cpu/render/scivis/SciVis.cpp +++ b/modules/cpu/render/scivis/SciVis.cpp @@ -63,14 +63,14 @@ void *SciVis::beginFrame(FrameBuffer *, World *world) return nullptr; } -Renderer::Event SciVis::renderTasks(FrameBuffer *fb, +AsyncEvent SciVis::renderTasks(FrameBuffer *fb, Camera *camera, World *world, void *, const utility::ArrayView &taskIDs, bool wait) const { - Event event; + AsyncEvent event; auto *rendererSh = getSh(); auto *fbSh = fb->getSh(); auto *cameraSh = camera->getSh(); @@ -86,7 +86,8 @@ Renderer::Event SciVis::renderTasks(FrameBuffer *fb, ff.other |= camera->getFeatureFlagsOther(); cgh.set_specialization_constant(ff); - const sycl::nd_range<1> dispatchRange = computeDispatchRange(numTasks, 16); + const sycl::nd_range<1> dispatchRange = + device.computeDispatchRange(numTasks, 16); cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { if (taskIndex.get_global_id(0) < numTasks) { diff --git a/modules/cpu/render/scivis/SciVis.h b/modules/cpu/render/scivis/SciVis.h index 74a8afbd9..649ed9682 100644 --- a/modules/cpu/render/scivis/SciVis.h +++ b/modules/cpu/render/scivis/SciVis.h @@ -15,7 +15,7 @@ struct SciVis : public AddStructShared void commit() override; void *beginFrame(FrameBuffer *, World *) override; - virtual Event renderTasks(FrameBuffer *fb, + virtual AsyncEvent renderTasks(FrameBuffer *fb, Camera *camera, World *world, void *perFrameData, diff --git a/modules/denoiser/CMakeLists.txt b/modules/denoiser/CMakeLists.txt index 4d3fb4ed3..82b86cc87 100644 --- a/modules/denoiser/CMakeLists.txt +++ b/modules/denoiser/CMakeLists.txt @@ -22,7 +22,7 @@ generate_export_header(ospray_module_denoiser) target_link_libraries(ospray_module_denoiser PRIVATE OpenImageDenoise - ospray_module_cpu + ospray ) target_include_directories(ospray_module_denoiser diff --git a/modules/denoiser/DenoiseFrameOp.cpp b/modules/denoiser/DenoiseFrameOp.cpp index d701755d2..43776f33d 100644 --- a/modules/denoiser/DenoiseFrameOp.cpp +++ b/modules/denoiser/DenoiseFrameOp.cpp @@ -2,24 +2,20 @@ // SPDX-License-Identifier: Apache-2.0 #include "DenoiseFrameOp.h" -#include "fb/FrameBuffer.h" +#include "api/Device.h" +#include "fb/FrameBufferView.h" namespace ospray { -static bool osprayDenoiseMonitorCallback(void *userPtr, double) +struct OSPRAY_MODULE_DENOISER_EXPORT LiveDenoiseFrameOp + : public LiveFrameOpInterface { - auto *fb = (FrameBuffer *)userPtr; - return !fb->frameCancelled(); -} - -struct OSPRAY_MODULE_DENOISER_EXPORT LiveDenoiseFrameOp : public LiveFrameOp -{ - LiveDenoiseFrameOp(FrameBufferView &_fbView, OIDNDevice device) - : LiveFrameOp(_fbView), - device(device), - filter(oidnNewFilter(device, "RT")) + LiveDenoiseFrameOp(FrameBufferView &fbView, OIDNDevice oidnDevice) + : oidnDevice(oidnDevice), + filter(oidnNewFilter(oidnDevice, "RT")), + fbView(fbView) { - oidnRetainDevice(device); + oidnRetainDevice(oidnDevice); float *fbColor = static_cast(fbView.colorBuffer); oidnSetSharedFilterImage(filter, @@ -64,11 +60,7 @@ struct OSPRAY_MODULE_DENOISER_EXPORT LiveDenoiseFrameOp : public LiveFrameOp sizeof(float) * 4, 0); - oidnSetFilter1b(filter, "hdr", false); - - oidnSetFilterProgressMonitorFunction(filter, - (OIDNProgressMonitorFunction)osprayDenoiseMonitorCallback, - _fbView.originalFB); + oidnSetFilterBool(filter, "hdr", false); oidnCommitFilter(filter); } @@ -76,16 +68,18 @@ struct OSPRAY_MODULE_DENOISER_EXPORT LiveDenoiseFrameOp : public LiveFrameOp ~LiveDenoiseFrameOp() override { oidnReleaseFilter(filter); - oidnReleaseDevice(device); + oidnReleaseDevice(oidnDevice); } - void process(const Camera *) override + void process(void *waitEvent, const Camera *) override { - if (fbView.originalFB->getSh()->numPixelsRendered) + if (waitEvent) + oidnExecuteSYCLFilterAsync(filter, nullptr, 0, (sycl::event *)waitEvent); + else oidnExecuteFilter(filter); const char *errorMessage = nullptr; - auto error = oidnGetDeviceError(device, &errorMessage); + auto error = oidnGetDeviceError(oidnDevice, &errorMessage); if (error != OIDN_ERROR_NONE && error != OIDN_ERROR_CANCELLED) { throw std::runtime_error( @@ -93,30 +87,40 @@ struct OSPRAY_MODULE_DENOISER_EXPORT LiveDenoiseFrameOp : public LiveFrameOp } } - OIDNDevice device; + OIDNDevice oidnDevice; OIDNFilter filter; + + FrameBufferView fbView; }; -DenoiseFrameOp::DenoiseFrameOp() - : device(oidnNewDevice(OIDN_DEVICE_TYPE_DEFAULT)) +DenoiseFrameOp::DenoiseFrameOp(api::Device &device) { - oidnSetDevice1b(device, "setAffinity", false); - oidnCommitDevice(device); + // Get appropriate SYCL command queue for post-processing from device + sycl::queue *syclQueuePtr = + (sycl::queue *)device.getPostProcessingCommandQueuePtr(); + if (syclQueuePtr) + oidnDevice = oidnNewSYCLDevice(syclQueuePtr, 1); + else + oidnDevice = oidnNewDevice(OIDN_DEVICE_TYPE_CPU); + + oidnSetDeviceBool(oidnDevice, "setAffinity", false); + oidnCommitDevice(oidnDevice); } DenoiseFrameOp::~DenoiseFrameOp() { - oidnReleaseDevice(device); + oidnReleaseDevice(oidnDevice); } -std::unique_ptr DenoiseFrameOp::attach(FrameBufferView &fbView) +std::unique_ptr DenoiseFrameOp::attach( + FrameBufferView &fbView) { if (fbView.colorBufferFormat != OSP_FB_RGBA32F) throw std::runtime_error( "DenoiseFrameOp must be used with an RGBA32F " "color format framebuffer!"); - return rkcommon::make_unique(fbView, device); + return rkcommon::make_unique(fbView, oidnDevice); } std::string DenoiseFrameOp::toString() const diff --git a/modules/denoiser/DenoiseFrameOp.h b/modules/denoiser/DenoiseFrameOp.h index 69d0d26b1..d2629c2a9 100644 --- a/modules/denoiser/DenoiseFrameOp.h +++ b/modules/denoiser/DenoiseFrameOp.h @@ -11,18 +11,19 @@ namespace ospray { -struct OSPRAY_MODULE_DENOISER_EXPORT DenoiseFrameOp : public FrameOp +struct OSPRAY_MODULE_DENOISER_EXPORT DenoiseFrameOp : public FrameOpInterface { - DenoiseFrameOp(); + DenoiseFrameOp(api::Device &device); ~DenoiseFrameOp() override; - std::unique_ptr attach(FrameBufferView &fbView) override; + std::unique_ptr attach( + FrameBufferView &fbView) override; std::string toString() const override; private: - OIDNDevice device; + OIDNDevice oidnDevice; }; } // namespace ospray diff --git a/modules/mpi/ospray/MPIDistributedDevice.cpp b/modules/mpi/ospray/MPIDistributedDevice.cpp index 9cc1376ac..ae0b3eb3a 100644 --- a/modules/mpi/ospray/MPIDistributedDevice.cpp +++ b/modules/mpi/ospray/MPIDistributedDevice.cpp @@ -474,6 +474,12 @@ float MPIDistributedDevice::getVariance(OSPFrameBuffer _fb) return internalDevice->getVariance((OSPFrameBuffer)fb); } +void *MPIDistributedDevice::getPostProcessingCommandQueuePtr() +{ + // Run post-processing on internal device only + return internalDevice->getPostProcessingCommandQueuePtr(); +} + void MPIDistributedDevice::setObjectParam( OSPObject object, const char *name, OSPDataType type, const void *mem) { diff --git a/modules/mpi/ospray/MPIDistributedDevice.h b/modules/mpi/ospray/MPIDistributedDevice.h index 638ce3082..5d3b54f2c 100644 --- a/modules/mpi/ospray/MPIDistributedDevice.h +++ b/modules/mpi/ospray/MPIDistributedDevice.h @@ -119,6 +119,8 @@ struct MPIDistributedDevice : public api::Device float getVariance(OSPFrameBuffer) override; + void *getPostProcessingCommandQueuePtr() override; + /*! load module */ int loadModule(const char *name) override; diff --git a/modules/mpi/ospray/MPIOffloadDevice.cpp b/modules/mpi/ospray/MPIOffloadDevice.cpp index 0d643f652..461d81870 100644 --- a/modules/mpi/ospray/MPIOffloadDevice.cpp +++ b/modules/mpi/ospray/MPIOffloadDevice.cpp @@ -1035,6 +1035,13 @@ float MPIOffloadDevice::getTaskDuration(OSPFuture _task) return result; } +void *MPIOffloadDevice::getPostProcessingCommandQueuePtr() +{ + // We don't support running external GPU post-processing through + // offload device + return nullptr; +} + OSPPickResult MPIOffloadDevice::pick(OSPFrameBuffer fb, OSPRenderer renderer, OSPCamera camera, diff --git a/modules/mpi/ospray/MPIOffloadDevice.h b/modules/mpi/ospray/MPIOffloadDevice.h index b66c0133d..70465016e 100644 --- a/modules/mpi/ospray/MPIOffloadDevice.h +++ b/modules/mpi/ospray/MPIOffloadDevice.h @@ -115,6 +115,8 @@ struct MPIOffloadDevice : public api::Device float getTaskDuration(OSPFuture) override; + void *getPostProcessingCommandQueuePtr() override; + OSPPickResult pick( OSPFrameBuffer, OSPRenderer, OSPCamera, OSPWorld, const vec2f &) override; diff --git a/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp b/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp index 5a19291d5..35c2824b0 100644 --- a/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp +++ b/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp @@ -9,6 +9,7 @@ #include "ISPCDevice.h" #include "TileOperation.h" #include "common/Profiling.h" +#include "fb/FrameBufferView.h" #ifndef OSPRAY_TARGET_SYCL #include "fb/DistributedFrameBuffer_ispc.h" #endif @@ -90,22 +91,38 @@ void DFB::commit() { FrameBuffer::commit(); - imageOps.clear(); if (imageOpData) { - FrameBufferView fbv(localFBonMaster ? localFBonMaster.get() - : static_cast(this), - getSh()->colorBufferFormat, - localFBonMaster ? localFBonMaster->colorBuffer->data() : nullptr, - localFBonMaster ? localFBonMaster->depthBuffer->data() : nullptr, - localFBonMaster ? localFBonMaster->normalBuffer->data() : nullptr, - localFBonMaster ? localFBonMaster->albedoBuffer->data() : nullptr); - - std::for_each(imageOpData->begin(), imageOpData->end(), [&](ImageOp *i) { - if (!dynamic_cast(i) || localFBonMaster) - imageOps.push_back(i->attach(fbv)); - }); + FrameBuffer *fb = static_cast(this); + void *colorBuffer = nullptr; + float *depthBuffer = nullptr; + vec3f *normalBuffer = nullptr; + vec3f *albedoBuffer = nullptr; + if (localFBonMaster) { + fb = localFBonMaster.get(); + colorBuffer = localFBonMaster->colorBuffer + ? localFBonMaster->colorBuffer->devicePtr() + : nullptr; + depthBuffer = localFBonMaster->depthBuffer + ? localFBonMaster->depthBuffer->devicePtr() + : nullptr; + normalBuffer = localFBonMaster->normalBuffer + ? localFBonMaster->normalBuffer->devicePtr() + : nullptr; + albedoBuffer = localFBonMaster->albedoBuffer + ? localFBonMaster->albedoBuffer->devicePtr() + : nullptr; + } + + FrameBufferView fbv(fb, + getColorBufferFormat(), + getNumPixels(), + colorBuffer, + depthBuffer, + normalBuffer, + albedoBuffer); + + prepareLiveOpsForFBV(fbv, localFBonMaster != nullptr, true); } - prepareImageOps(); } mpicommon::Group DFB::getMPIGroup() @@ -144,10 +161,6 @@ void DFB::startNewFrame(const float errorThreshold) FrameBuffer::beginFrame(); - std::for_each(imageOps.begin(), - imageOps.end(), - [](std::unique_ptr &p) { p->beginFrame(); }); - lastProgressReport = std::chrono::steady_clock::now(); renderingProgressTiles = 0; @@ -962,21 +975,9 @@ float DFB::tileError(const uint32_t tileID) void DFB::endFrame(const float errorThreshold, const Camera *camera) { - if (localFBonMaster && !imageOps.empty() - && firstFrameOperation < imageOps.size()) { - std::for_each(imageOps.begin() + firstFrameOperation, - imageOps.end(), - [&](std::unique_ptr &iop) { - LiveFrameOp *fop = dynamic_cast(iop.get()); - if (fop) - fop->process(camera); - }); - } - if (!imageOps.empty()) { - std::for_each(imageOps.begin(), - imageOps.end(), - [](std::unique_ptr &p) { p->endFrame(); }); - } + if (localFBonMaster) + for (auto &p : frameOps) + p->process(nullptr, camera); // only refine on master if (mpicommon::IamTheMaster()) { @@ -989,4 +990,12 @@ void DFB::endFrame(const float errorThreshold, const Camera *camera) setCompletedEvent(OSP_FRAME_FINISHED); } +AsyncEvent DFB::postProcess(const Camera *, bool) +{ + AsyncEvent event; + // TODO: Modify DistributedLoadBalancer and move here post-processing loop + // from endFrame() + return event; +} + } // namespace ospray diff --git a/modules/mpi/ospray/fb/DistributedFrameBuffer.h b/modules/mpi/ospray/fb/DistributedFrameBuffer.h index 6a21577a4..a2a9b5af1 100644 --- a/modules/mpi/ospray/fb/DistributedFrameBuffer.h +++ b/modules/mpi/ospray/fb/DistributedFrameBuffer.h @@ -101,6 +101,8 @@ struct DistributedFrameBuffer : public mpi::messaging::MessageHandler, void endFrame(const float errorThreshold, const Camera *camera) override; + AsyncEvent postProcess(const Camera *camera, bool wait) override; + void setTileOperation( std::shared_ptr tileOp, const Renderer *renderer); diff --git a/modules/mpi/ospray/render/DistributedLoadBalancer.cpp b/modules/mpi/ospray/render/DistributedLoadBalancer.cpp index 43615f806..310d74553 100644 --- a/modules/mpi/ospray/render/DistributedLoadBalancer.cpp +++ b/modules/mpi/ospray/render/DistributedLoadBalancer.cpp @@ -32,7 +32,8 @@ DistributedLoadBalancer::~DistributedLoadBalancer() handle.free(); } -Renderer::Event DistributedLoadBalancer::renderFrame(FrameBuffer *_fb, +std::pair DistributedLoadBalancer::renderFrame( + FrameBuffer *_fb, Renderer *_renderer, Camera *camera, World *_world, @@ -50,7 +51,7 @@ Renderer::Event DistributedLoadBalancer::renderFrame(FrameBuffer *_fb, if (!renderer) { if (world->allRegions.size() == 1) { renderFrameReplicated(dfb, _renderer, camera, world); - return Renderer::Event(); + return std::make_pair(AsyncEvent(), AsyncEvent()); } else { throw std::runtime_error( "Distributed rendering requires a distributed renderer!"); @@ -270,7 +271,7 @@ Renderer::Event DistributedLoadBalancer::renderFrame(FrameBuffer *_fb, renderer->endFrame(dfb, perFrameData); dfb->endFrame(renderer->errorThreshold, camera); - return Renderer::Event(); + return std::make_pair(AsyncEvent(), AsyncEvent()); } void DistributedLoadBalancer::renderFrameReplicated(DistributedFrameBuffer *dfb, diff --git a/modules/mpi/ospray/render/DistributedLoadBalancer.h b/modules/mpi/ospray/render/DistributedLoadBalancer.h index b2d0147c2..512bdbb07 100644 --- a/modules/mpi/ospray/render/DistributedLoadBalancer.h +++ b/modules/mpi/ospray/render/DistributedLoadBalancer.h @@ -30,7 +30,7 @@ struct DistributedLoadBalancer : public TiledLoadBalancer ~DistributedLoadBalancer() override; - Renderer::Event renderFrame(FrameBuffer *fb, + std::pair renderFrame(FrameBuffer *fb, Renderer *renderer, Camera *camera, World *world, diff --git a/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp b/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp index 3a16a60b0..eb39f43f0 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp +++ b/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp @@ -125,7 +125,8 @@ void DistributedRaycastRenderer::renderRegionTasks(SparseFrameBuffer *fb, ff.other |= camera->getFeatureFlagsOther(); cgh.set_specialization_constant(ff); - const sycl::nd_range<1> dispatchRange = computeDispatchRange(numTasks, 16); + const sycl::nd_range<1> dispatchRange = + device.computeDispatchRange(numTasks, 16); cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { const box3f regionCopy = region; diff --git a/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp b/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp index 31aa76f14..2a79108cb 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp +++ b/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp @@ -67,7 +67,8 @@ void DistributedRenderer::computeRegionVisibility(SparseFrameBuffer *fb, ff.other |= camera->getFeatureFlagsOther(); cgh.set_specialization_constant(ff); - const sycl::nd_range<1> dispatchRange = computeDispatchRange(numTasks, 16); + const sycl::nd_range<1> dispatchRange = + device.computeDispatchRange(numTasks, 16); cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { if (taskIndex.get_global_id(0) < numTasks) { diff --git a/modules/mpi/ospray/render/distributed/DistributedRenderer.h b/modules/mpi/ospray/render/distributed/DistributedRenderer.h index 1c11acda4..75c3ffb9c 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRenderer.h +++ b/modules/mpi/ospray/render/distributed/DistributedRenderer.h @@ -35,14 +35,14 @@ struct DistributedRenderer : public AddStructShared const utility::ArrayView &taskIDs) const; // Not used by distributed renderers - Event renderTasks(FrameBuffer *, + AsyncEvent renderTasks(FrameBuffer *, Camera *, World *, void * /*perFrameData*/, const utility::ArrayView & /*taskIDs*/, bool /*wait*/) const override { - return Event(); + return AsyncEvent(); } #ifndef OSPRAY_TARGET_SYCL diff --git a/modules/multiDevice/MultiDevice.cpp b/modules/multiDevice/MultiDevice.cpp index 4f831d9c3..fe2e31c0f 100644 --- a/modules/multiDevice/MultiDevice.cpp +++ b/modules/multiDevice/MultiDevice.cpp @@ -51,8 +51,8 @@ void MultiDevice::commit() int numSubdevices = OSPRAY_NUM_SUBDEVICES.value_or(getParam("numSubdevices", 1)); std::vector deviceIndex(numSubdevices, 0); - for (int i = 0; i < numSubdevices; i++) - deviceIndex[i] = i%numPhyDevices; + for (int i = 0; i < numSubdevices; i++) + deviceIndex[i] = i % numPhyDevices; postStatusMsg(OSP_LOG_DEBUG) << "# of subdevices =" << numSubdevices; @@ -411,7 +411,7 @@ OSPImageOperation MultiDevice::newImageOp(const char *type) // Same note for image ops as for framebuffers in terms of how they are // treated as shared. Eventually we would have per hardware device ones though // for cpu/gpus - auto *op = ImageOp::createInstance(type); + auto *op = ImageOp::createInstance(type, hostDevice); MultiDeviceObject *o = new MultiDeviceObject(); for (size_t i = 0; i < subdevices.size(); ++i) { o->objects.push_back((OSPImageOperation)op); @@ -527,6 +527,15 @@ float MultiDevice::getTaskDuration(OSPFuture _task) return task->getTaskDuration(); } +void *MultiDevice::getPostProcessingCommandQueuePtr() +{ + // TODO: Return appropriate command queue for post-processing here. + // Either one device will be statically selected for post-processing or + // dynamically load balancer will assign device based on current load + // distribution + return nullptr; +} + OSPPickResult MultiDevice::pick(OSPFrameBuffer _fb, OSPRenderer _renderer, OSPCamera _camera, diff --git a/modules/multiDevice/MultiDevice.h b/modules/multiDevice/MultiDevice.h index c453eb644..5472a08e3 100644 --- a/modules/multiDevice/MultiDevice.h +++ b/modules/multiDevice/MultiDevice.h @@ -119,6 +119,8 @@ struct MultiDevice : public Device float getProgress(OSPFuture) override; float getTaskDuration(OSPFuture) override; + void *getPostProcessingCommandQueuePtr() override; + OSPPickResult pick( OSPFrameBuffer, OSPRenderer, OSPCamera, OSPWorld, const vec2f &) override; diff --git a/ospray/CMakeLists.txt b/ospray/CMakeLists.txt index baf1c60b5..7adadc8b6 100644 --- a/ospray/CMakeLists.txt +++ b/ospray/CMakeLists.txt @@ -17,6 +17,9 @@ set(OSPRAY_SOURCES api/ospray_cpp_defs.cpp common/OSPCommon.cpp + common/Managed.cpp + + fb/ImageOp.cpp include/ospray/ospray.h include/ospray/ospray_util.h diff --git a/ospray/api/Device.h b/ospray/api/Device.h index e7d0bac10..34abc7280 100644 --- a/ospray/api/Device.h +++ b/ospray/api/Device.h @@ -7,8 +7,8 @@ #include "rkcommon/utility/Optional.h" #include "rkcommon/utility/ParameterizedObject.h" // ospray -#include "../common/OSPCommon.h" -#include "../common/ObjectFactory.h" +#include "common/OSPCommon.h" +#include "common/ObjectFactory.h" #include "ospray/version.h" // std #include @@ -131,6 +131,10 @@ struct OSPRAY_CORE_INTERFACE Device : public memory::RefCountedObject, virtual float getProgress(OSPFuture) = 0; virtual float getTaskDuration(OSPFuture) = 0; + // Return pointer to command queue that is goint to be used by external + // post-processing kernels (e.g. OIDN) + virtual void *getPostProcessingCommandQueuePtr() = 0; + virtual OSPPickResult pick( OSPFrameBuffer, OSPRenderer, OSPCamera, OSPWorld, const vec2f &) { diff --git a/modules/cpu/common/Managed.cpp b/ospray/common/Managed.cpp similarity index 98% rename from modules/cpu/common/Managed.cpp rename to ospray/common/Managed.cpp index f217e5ca8..7a849f674 100644 --- a/modules/cpu/common/Managed.cpp +++ b/ospray/common/Managed.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 #include "Managed.h" -#include "Data.h" namespace ospray { diff --git a/modules/cpu/common/Managed.h b/ospray/common/Managed.h similarity index 82% rename from modules/cpu/common/Managed.h rename to ospray/common/Managed.h index 47863f9be..b2df79f8f 100644 --- a/modules/cpu/common/Managed.h +++ b/ospray/common/Managed.h @@ -18,8 +18,8 @@ struct Data; template struct DataT; -struct OSPRAY_SDK_INTERFACE ManagedObject : public memory::RefCount, - public utility::ParameterizedObject +struct OSPRAY_CORE_INTERFACE ManagedObject : public memory::RefCount, + public utility::ParameterizedObject { using OSP_PTR = ManagedObject *; @@ -95,22 +95,6 @@ inline utility::Optional ManagedObject::getOptParam(const char *name) return retval; } -template <> -inline Data *ManagedObject::getParam( - const char *name, Data *valIfNotFound) -{ - auto *obj = ParameterizedObject::getParam( - name, (ManagedObject *)valIfNotFound); - if (obj && obj->managedObjectType == OSP_DATA) - return (Data *)obj; - else { - // reset query status if object is not a Data* - if (obj) - findParam(name)->query = false; - return valIfNotFound; - } -} - } // namespace ospray // Specializations for ISPCDevice ///////////////////////////////////////////// diff --git a/ospray/common/ObjectFactory.h b/ospray/common/ObjectFactory.h index f2012e6e1..be428e611 100644 --- a/ospray/common/ObjectFactory.h +++ b/ospray/common/ObjectFactory.h @@ -9,9 +9,13 @@ #include #include -#if defined(OBJECTFACTORY_IMPORT) && defined(_WIN32) +#ifdef _WIN32 +#ifdef OBJECTFACTORY_IMPORT #define OF_DECLSPEC __declspec(dllimport) #else +#define OF_DECLSPEC __declspec(dllexport) +#endif +#else #define OF_DECLSPEC #endif diff --git a/modules/cpu/fb/FrameBufferView.h b/ospray/fb/FrameBufferView.h similarity index 79% rename from modules/cpu/fb/FrameBufferView.h rename to ospray/fb/FrameBufferView.h index 3439cb15b..876e6870c 100644 --- a/modules/cpu/fb/FrameBufferView.h +++ b/ospray/fb/FrameBufferView.h @@ -3,7 +3,7 @@ #pragma once -#include "common/Managed.h" +#include "common/OSPCommon.h" namespace ospray { @@ -11,7 +11,7 @@ struct FrameBuffer; /*! A view into a portion of the framebuffer to run the frame operation on */ -struct OSPRAY_SDK_INTERFACE FrameBufferView +struct FrameBufferView { // TODO Replace w/ arrayview once LocalFB is updated // The total dimensions of the global framebuffer @@ -44,10 +44,22 @@ struct OSPRAY_SDK_INTERFACE FrameBufferView //! Convenience method to make a view of the entire framebuffer FrameBufferView(FrameBuffer *fb, OSPFrameBufferFormat colorFormat, + const vec2i &dims, void *colorBuffer, float *depthBuffer, vec3f *normalBuffer, - vec3f *albedoBuffer); + vec3f *albedoBuffer) + : fbDims(dims), + viewDims(dims), + haloDims(0), + colorBufferFormat(colorFormat), + colorBuffer(colorBuffer), + depthBuffer(depthBuffer), + normalBuffer(normalBuffer), + albedoBuffer(albedoBuffer), + originalFB(fb) + {} + FrameBufferView() = default; }; diff --git a/modules/cpu/fb/ImageOp.cpp b/ospray/fb/ImageOp.cpp similarity index 55% rename from modules/cpu/fb/ImageOp.cpp rename to ospray/fb/ImageOp.cpp index 071b79a51..8f402e5a6 100644 --- a/modules/cpu/fb/ImageOp.cpp +++ b/ospray/fb/ImageOp.cpp @@ -2,13 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 #include "ImageOp.h" -#include "fb/FrameBuffer.h" namespace ospray { // ImageOp definitions //////////////////////////////////////////////////////// -LiveImageOp::LiveImageOp(FrameBufferView &_fbView) : fbView(_fbView) {} +ImageOp *ImageOp::createImageOp(const char *type, api::Device &device) +{ + return createInstance(type, device); +} ImageOp::ImageOp() { @@ -20,13 +22,6 @@ std::string ImageOp::toString() const return "ospray::ImageOp(base class)"; } -LivePixelOp::LivePixelOp(FrameBufferView &_fbView) - : AddStructShared( - _fbView.originalFB->getISPCDevice().getIspcrtContext(), _fbView) -{} - -LiveFrameOp::LiveFrameOp(FrameBufferView &_fbView) : LiveImageOp(_fbView) {} - OSPTYPEFOR_DEFINITION(ImageOp *); } // namespace ospray diff --git a/ospray/fb/ImageOp.h b/ospray/fb/ImageOp.h new file mode 100644 index 000000000..eb0daf99c --- /dev/null +++ b/ospray/fb/ImageOp.h @@ -0,0 +1,64 @@ +// Copyright 2009 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "common/Managed.h" +#include "common/ObjectFactory.h" + +#include + +namespace ospray { + +namespace api { +struct Device; +} + +struct FrameBufferView; + +// An instance of an image op that is actually attached to a framebuffer +struct OSPRAY_CORE_INTERFACE LiveImageOp +{ + virtual ~LiveImageOp() = default; +}; + +// Base abstraction for a "Image Op" to be performed for +// every image that gets written into a frame buffer. +// A ImageOp is basically a "hook" that allows to inject arbitrary +// code, such as postprocessing, filtering, blending, tone mapping, +// sending tiles to a display wall, etc. +struct OSPRAY_CORE_INTERFACE ImageOp + : public ManagedObject, + public ObjectFactory +{ + static ImageOp *createImageOp(const char *type, api::Device &device); + + ImageOp(); + ~ImageOp() override = default; + + std::string toString() const override; +}; + +OSPTYPEFOR_SPECIALIZATION(ImageOp *, OSP_IMAGE_OPERATION); + +struct Camera; +struct OSPRAY_CORE_INTERFACE LiveFrameOpInterface : public LiveImageOp +{ + ~LiveFrameOpInterface() override = default; + + virtual void process(void *waitEvent, const Camera *camera) = 0; +}; + +struct OSPRAY_CORE_INTERFACE FrameOpInterface : public ImageOp +{ + ~FrameOpInterface() override = default; + + // Attach an image op to an existing framebuffer. Use this + // to pass the params from the API to the instance of the image op + // which will actually be run on the framebuffer view or tiles of the + // framebuffer passed + virtual std::unique_ptr attach( + FrameBufferView &fbView) = 0; +}; + +} // namespace ospray diff --git a/scripts/superbuild/CMakeLists.txt b/scripts/superbuild/CMakeLists.txt index 8edf7ef65..7b1c388dc 100644 --- a/scripts/superbuild/CMakeLists.txt +++ b/scripts/superbuild/CMakeLists.txt @@ -140,17 +140,17 @@ endif() option(BUILD_OIDN "Build OpenImageDenoise as a part of the superbuild." OFF) if (BUILD_OIDN) - set(OIDN_VERSION "1.4.3" CACHE STRING "Which version of OpenImageDenoise to build?") + set(OIDN_VERSION "2.0.0" CACHE STRING "Which version of OpenImageDenoise to build?") mark_as_advanced(CLEAR OIDN_VERSION) option(BUILD_OIDN_FROM_SOURCE "Build OpenImageDenoise or use pre-built version?" OFF) mark_as_advanced(CLEAR BUILD_OIDN_FROM_SOURCE) - if (OIDN_VERSION STREQUAL "1.4.3") + if (OIDN_VERSION STREQUAL "2.0.0") if (APPLE) - set(OIDN_HASH "3dffa9a9a0f2cb9a0d89cf42ee7e3acb0db5552f0128d7868af71e9b80d72ecf") + set(OIDN_HASH "44055036c5ee3cfc26057d4ad3b6e7f0cbd1feb4230386a30eca115004308c9a") elseif (WIN32) - set(OIDN_HASH "5c0f3d8a4211dfd064f7baeb4403e33cba36035ad8b6c8a8c0cabb7c705db4f4") + set(OIDN_HASH "df0c6bf8185f9736dd10eb07e4bf1c68c6ef28c0cddd42a9555f9e8435a24e40") else() - set(OIDN_HASH "f6ec3b996c69386bbc69f61bc0e6fd17765530e6aa788cfc7a5d3e739548cd58") + set(OIDN_HASH "8de608405ab4e687a154560245a4520cf44a5a917f7fa2ec2b53e885638e2865") endif() endif() else() diff --git a/scripts/superbuild/dependencies/dep_oidn.cmake b/scripts/superbuild/dependencies/dep_oidn.cmake index 6f6e8c7ce..75df364a9 100644 --- a/scripts/superbuild/dependencies/dep_oidn.cmake +++ b/scripts/superbuild/dependencies/dep_oidn.cmake @@ -56,7 +56,7 @@ else() if (APPLE) set(OIDN_OSSUFFIX "x86_64.macos.tar.gz") elseif (WIN32) - set(OIDN_OSSUFFIX "x64.vc14.windows.zip") + set(OIDN_OSSUFFIX "x64.windows.zip") else() set(OIDN_OSSUFFIX "x86_64.linux.tar.gz") endif() diff --git a/scripts/tests/run_tests.ps1 b/scripts/tests/run_tests.ps1 index 28b6f716f..df0f2e6dd 100755 --- a/scripts/tests/run_tests.ps1 +++ b/scripts/tests/run_tests.ps1 @@ -28,7 +28,8 @@ cmake --build . --config Release --target ospray_test_data if ( $testMultiDevice ) { md failed-multidevice $Env:OSPRAY_NUM_SUBDEVICES = 2 - ospTestSuite.exe --osp:load-modules=multidevice_cpu --osp:device=multidevice --gtest_output=xml:tests-multidevice.xml --baseline-dir=regression_test_baseline\ --failed-dir=failed-multidevice + $test_filters = "DebugOp/ImageOp.ImageOp/0" # post-processing not enabled on multidevice + ospTestSuite.exe --osp:load-modules=multidevice_cpu --osp:device=multidevice --gtest_output=xml:tests-multidevice.xml --baseline-dir=regression_test_baseline\ --failed-dir=failed-multidevice --gtest_filter="-$test_filters" $exitCode = $LastExitCode if ( $exitCode) { exit $exitCode @@ -37,7 +38,9 @@ if ( $testMultiDevice ) { if ( $testMPI ) { md failed-mpi - mpiexec.exe -n 2 ospTestSuite.exe --osp:load-modules=mpi_offload --osp:device=mpiOffload --gtest_filter="-TestScenesVariance/*" --gtest_output=xml:tests-mpi.xml --baseline-dir=regression_test_baseline\ --failed-dir=failed-mpi + $test_filters = "DebugOp/ImageOp.ImageOp/0" # post-processing not enabled on mpi + $test_filters += ":TestScenesVariance/*" + mpiexec.exe -n 2 ospTestSuite.exe --osp:load-modules=mpi_offload --osp:device=mpiOffload --gtest_output=xml:tests-mpi.xml --baseline-dir=regression_test_baseline\ --failed-dir=failed-mpi --gtest_filter="-$test_filters" $exitCode = $LastExitCode if ( $exitCode) { exit $exitCode diff --git a/scripts/tests/run_tests.sh b/scripts/tests/run_tests.sh index 02a5d15d1..67e9f25ef 100755 --- a/scripts/tests/run_tests.sh +++ b/scripts/tests/run_tests.sh @@ -51,12 +51,14 @@ ospTestSuite --gtest_output=xml:tests.xml --baseline-dir=regression_test_baselin if [ $TEST_MULTIDEVICE ]; then mkdir failed-multidevice - OSPRAY_NUM_SUBDEVICES=2 ospTestSuite --gtest_output=xml:tests.xml --baseline-dir=regression_test_baseline/ --failed-dir=failed-multidevice --osp:load-modules=multidevice_cpu --osp:device=multidevice || exit 2 + test_filters="DebugOp/ImageOp.ImageOp/0" # post-processing not enabled on multidevice + OSPRAY_NUM_SUBDEVICES=2 ospTestSuite --gtest_output=xml:tests.xml --baseline-dir=regression_test_baseline/ --failed-dir=failed-multidevice --gtest_filter="-$test_filters" --osp:load-modules=multidevice_cpu --osp:device=multidevice || exit 2 fi if [ $TEST_MPI ]; then mkdir failed-mpi - mpiexec $MPI_ROOT_CONFIG ospTestSuite --gtest_output=xml:tests-mpi-offload.xml --baseline-dir=regression_test_baseline/ --failed-dir=failed-mpi --gtest_filter="-TestScenesVariance/*" --osp:load-modules=mpi_offload --osp:device=mpiOffload : $MPI_WORKER_CONFIG ospray_mpi_worker || exit 2 + test_filters="TestScenesVariance/*" + mpiexec $MPI_ROOT_CONFIG ospTestSuite --gtest_output=xml:tests-mpi-offload.xml --baseline-dir=regression_test_baseline/ --failed-dir=failed-mpi --gtest_filter="-$test_filters" --osp:load-modules=mpi_offload --osp:device=mpiOffload : $MPI_WORKER_CONFIG ospray_mpi_worker || exit 2 mkdir failed-mpi-data-parallel mpiexec $MPI_ROOT_CONFIG ospMPIDistribTestSuite --gtest_output=xml:tests-mpi-distrib.xml --baseline-dir=regression_test_baseline/ --failed-dir=failed-mpi-data-parallel || exit 2 diff --git a/test_image_data/baseline/AVX2/DenoiserOp_DenoiserOp.hdr.md5 b/test_image_data/baseline/AVX2/DenoiserOp_DenoiserOp.hdr.md5 new file mode 100644 index 000000000..f22a849d9 --- /dev/null +++ b/test_image_data/baseline/AVX2/DenoiserOp_DenoiserOp.hdr.md5 @@ -0,0 +1 @@ +34af3898958f056c55d3bfa09c641584 diff --git a/test_image_data/baseline/AVX2/ImageOp_DebugOp_ImageOp_0.png.md5 b/test_image_data/baseline/AVX2/ImageOp_DebugOp_ImageOp_0.png.md5 new file mode 100644 index 000000000..0a718b8fc --- /dev/null +++ b/test_image_data/baseline/AVX2/ImageOp_DebugOp_ImageOp_0.png.md5 @@ -0,0 +1 @@ +c7c62db8d871c82c66cebd74435d416d diff --git a/test_image_data/baseline/AVX512SKX/DenoiserOp_DenoiserOp.hdr.md5 b/test_image_data/baseline/AVX512SKX/DenoiserOp_DenoiserOp.hdr.md5 new file mode 100644 index 000000000..f22a849d9 --- /dev/null +++ b/test_image_data/baseline/AVX512SKX/DenoiserOp_DenoiserOp.hdr.md5 @@ -0,0 +1 @@ +34af3898958f056c55d3bfa09c641584 diff --git a/test_image_data/baseline/AVX512SKX/ImageOp_DebugOp_ImageOp_0.png.md5 b/test_image_data/baseline/AVX512SKX/ImageOp_DebugOp_ImageOp_0.png.md5 new file mode 100644 index 000000000..0a718b8fc --- /dev/null +++ b/test_image_data/baseline/AVX512SKX/ImageOp_DebugOp_ImageOp_0.png.md5 @@ -0,0 +1 @@ +c7c62db8d871c82c66cebd74435d416d From 0d8337ef27c0010f9b2c588f3b18118d8e1ce74a Mon Sep 17 00:00:00 2001 From: Miroslaw Pawlowski Date: Fri, 2 Jun 2023 12:44:35 +0200 Subject: [PATCH 30/42] New versions of Embree, ISPC & ISPCRT, closes #558 --- CHANGELOG.md | 1 + cmake/ospray_options.cmake | 2 +- cmake/ospray_redistribute_deps.cmake | 1 + doc/prerequisites.md | 2 +- scripts/superbuild/CMakeLists.txt | 22 +++++++++---------- .../superbuild/dependencies/dep_ispc.cmake | 2 +- 6 files changed, 16 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 94f92d973..d300a872c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Version History limits the number of non-specular (i.e., diffuse and glossy) bounces - Support denoising on the GPU with OIDN 2.0, which is the new minimum version +- The new minimum version for ISPC is v1.20.0 ### Changes in v2.11.0: diff --git a/cmake/ospray_options.cmake b/cmake/ospray_options.cmake index ede06701e..58f9e7536 100644 --- a/cmake/ospray_options.cmake +++ b/cmake/ospray_options.cmake @@ -11,7 +11,7 @@ include(CMakeDependentOption) set(OSPRAY_CMAKECONFIG_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/ospray-${OSPRAY_VERSION}") -set(ISPC_VERSION_REQUIRED 1.19.0) +set(ISPC_VERSION_REQUIRED 1.20.0) set(RKCOMMON_VERSION_REQUIRED 1.11.0) set(EMBREE_VERSION_REQUIRED 4.0.0) set(OPENVKL_GPU_VERSION_REQUIRED 2.0.0) diff --git a/cmake/ospray_redistribute_deps.cmake b/cmake/ospray_redistribute_deps.cmake index bdbd218a5..0f24a1fdc 100644 --- a/cmake/ospray_redistribute_deps.cmake +++ b/cmake/ospray_redistribute_deps.cmake @@ -94,6 +94,7 @@ macro(ospray_add_dependent_lib_plugins TARGET_NAME PLUGINS_PATTERN) endmacro() ospray_add_dependent_lib(ispcrt::ispcrt) +ospray_add_dependent_lib_plugins(ispcrt::ispcrt "ispcrt_device_*") ospray_add_dependent_lib(rkcommon::rkcommon) if (RKCOMMON_TASKING_TBB) ospray_add_dependent_lib(TBB::tbb) diff --git a/doc/prerequisites.md b/doc/prerequisites.md index 4fcac45a7..627e4d60f 100644 --- a/doc/prerequisites.md +++ b/doc/prerequisites.md @@ -21,7 +21,7 @@ before you can build OSPRay you need the following prerequisites: (icc)](https://software.intel.com/en-us/c-compilers)), and standard Linux development tools. - Additionally you require a copy of the [Intel® Implicit SPMD Program - Compiler (ISPC)](http://ispc.github.io), version 1.19.0 or later. + Compiler (ISPC)](http://ispc.github.io), version 1.20.0 or later. Please obtain a release of ISPC from the [ISPC downloads page](https://ispc.github.io/downloads.html). If ISPC is not found by CMake its location can be hinted with the diff --git a/scripts/superbuild/CMakeLists.txt b/scripts/superbuild/CMakeLists.txt index 7b1c388dc..22551ef85 100644 --- a/scripts/superbuild/CMakeLists.txt +++ b/scripts/superbuild/CMakeLists.txt @@ -73,15 +73,15 @@ option(ALWAYS_REBUILD "Force every project to always be rebuilt?" OFF) option(DOWNLOAD_ISPC "Download ISPC or use the one found in the system environment?" ON) if (DOWNLOAD_ISPC) - set(ISPC_VERSION "1.19.0" CACHE STRING "Which version of ISPC to download?") + set(ISPC_VERSION "1.20.0" CACHE STRING "Which version of ISPC to download?") mark_as_advanced(CLEAR ISPC_VERSION) - if (ISPC_VERSION STREQUAL "1.19.0") + if (ISPC_VERSION STREQUAL "1.20.0") if (APPLE) - set(ISPC_HASH "f61993807ba00d5deeb46f65be7e38d7c95bcfb6e6e114993afa53b44320556c") + set(ISPC_HASH "a675ac08e6587d6ad7eb563df3db3027a3c18482f404d5b9592bf2ef4a4fab9a") elseif (WIN32) - set(ISPC_HASH "3f2953f9328290adfd0143707dce13e342f6df1099b3be5148a25e41347f958c") + set(ISPC_HASH "e212ebfb4e8afb57adc103a2579c52673a3ca49610fbc2a5eae643d3d378548d") else() - set(ISPC_HASH "f99a0afd4c8b5e8aceb46af8e90a7ba0813bf4c4111044ced27d498591304f9c") + set(ISPC_HASH "e6412b88aa312fcd10c46f92df0149ccc4d99e53552c4ce127aa6c634fe9b308") endif() endif() else() @@ -118,17 +118,17 @@ else() endif() option(BUILD_EMBREE_FROM_SOURCE "Build Embree or use pre-built version?" ON) -set(EMBREE_VERSION "4.0.0" CACHE STRING "Which version of Embree to build?") -if (EMBREE_VERSION STREQUAL "4.0.0") +set(EMBREE_VERSION "4.1.0" CACHE STRING "Which version of Embree to build?") +if (EMBREE_VERSION STREQUAL "4.1.0") if (BUILD_EMBREE_FROM_SOURCE) - set(EMBREE_HASH "e00d1f6f19ff12d7067420a081afc994744d4862f95c527bfb0436f5f0908794") + set(EMBREE_HASH "0d98995712b8d10ba6dd861b037e5cd74285f6277e12b9b68c81b5aa093722b8") else() if (APPLE) - set(EMBREE_HASH "837c49702d688623b48ba40ee10fb39f9dd39be6b73ccb4d910c32c5d5415836") + set(EMBREE_HASH "2d32b650e0e88b067d2d856473dc70411b2f5b5d27ae95b70feafb1e39364078") elseif (WIN32) - set(EMBREE_HASH "4f375d71d70b4fcf1dd18ca88f26a1255ff0d827bf3db62a88f204cc9089dc1f") + set(EMBREE_HASH "8972ad00497b06f6ed83750edf7ff57760b82e5c7262b4c40ce77599f516d197") else() - set(EMBREE_HASH "524842e2f141dca0db584c33a0821176373e7058f3ec2201bfb19d9e9a1b80b9") + set(EMBREE_HASH "8e5dd14c91054708fc589dd679e0fd7de37ebcf8e208e8bc254abc91f4c66c0b") endif() endif() endif() diff --git a/scripts/superbuild/dependencies/dep_ispc.cmake b/scripts/superbuild/dependencies/dep_ispc.cmake index 49c404afe..18ecb72d7 100644 --- a/scripts/superbuild/dependencies/dep_ispc.cmake +++ b/scripts/superbuild/dependencies/dep_ispc.cmake @@ -9,7 +9,7 @@ if (INSTALL_IN_SEPARATE_DIRECTORIES) endif() if (APPLE) - set(ISPC_OSSUFFIX "macOS.tar.gz") + set(ISPC_OSSUFFIX "macOS.universal.tar.gz") elseif(WIN32) set(ISPC_OSSUFFIX "windows.zip") else() From 1e76cb84b8b149e74e44cbd2301f949025235636 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCnther?= Date: Fri, 2 Jun 2023 14:02:50 +0200 Subject: [PATCH 31/42] Bump version --- README.md | 9 +++++---- cmake/ospray_version.cmake | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index fde558633..95d2d70e2 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ OSPRay ====== -This is release v2.11.1 (devel) of Intel® OSPRay. For changes and new +This is release v2.12.0 (devel) of Intel® OSPRay. For changes and new features see the [changelog](CHANGELOG.md). Visit http://www.ospray.org for more information. @@ -75,7 +75,7 @@ before you can build OSPRay you need the following prerequisites: Linux development tools. - Additionally you require a copy of the [Intel® Implicit SPMD Program - Compiler (ISPC)](http://ispc.github.io), version 1.19.0 or later. + Compiler (ISPC)](http://ispc.github.io), version 1.20.0 or later. Please obtain a release of ISPC from the [ISPC downloads page](https://ispc.github.io/downloads.html). If ISPC is not found by CMake its location can be hinted with the variable `ispcrt_DIR`. @@ -104,7 +104,7 @@ before you can build OSPRay you need the following prerequisites: - OSPRay also provides an optional module implementing the `denoiser` image operation, which is enabled by `OSPRAY_MODULE_DENOISER`. This module requires Intel [Open Image - Denoise](https://openimagedenoise.github.io/) in version 1.2.3 or + Denoise](https://openimagedenoise.github.io/) in version 2.0.0 or newer. You may need to hint the location of the library with the CMake variable `OpenImageDenoise_DIR`. @@ -2081,7 +2081,7 @@ occlusion). This renderer supports only a subset of the features of the [SciVis renderer](#scivis-renderer) to gain performance. As the name suggest its main shading method is ambient occlusion (AO), [lights](#lights) are -*not* considered at all and , Volume rendering is supported. The Ambient +*not* considered at all. Volume rendering is supported. The Ambient Occlusion renderer is created by passing the type string “`ao`” to `ospNewRenderer`. In addition to the [general parameters](#renderer) understood by all renderers the following parameters are supported as @@ -2108,6 +2108,7 @@ supports the following special parameters: |:------|:---------------------|--------:|:------------------------------------------------------------------------------------------| | int | lightSamples | all | number of random light samples per path vertex, per default all light sources are sampled | | int | roulettePathLength | 5 | ray recursion depth at which to start Russian roulette termination | +| int | maxScatteringEvents | 20 | maximum number of non-specular (i.e., diffuse and glossy) bounces | | float | maxContribution | ∞ | samples are clamped to this value before they are accumulated into the framebuffer | | bool | backgroundRefraction | false | allow for alpha blending even if background is seen through refractive objects like glass | diff --git a/cmake/ospray_version.cmake b/cmake/ospray_version.cmake index 65a9ec3b4..2910e02c9 100644 --- a/cmake/ospray_version.cmake +++ b/cmake/ospray_version.cmake @@ -2,8 +2,8 @@ ## SPDX-License-Identifier: Apache-2.0 set(OSPRAY_VERSION_MAJOR 2) -set(OSPRAY_VERSION_MINOR 11) -set(OSPRAY_VERSION_PATCH 1) +set(OSPRAY_VERSION_MINOR 12) +set(OSPRAY_VERSION_PATCH 0) set(OSPRAY_SOVERSION 2) set(OSPRAY_VERSION_GITHASH 0) set(OSPRAY_VERSION_NOTE "") From 9def6a9e969226dbeb958a8c620fdf93bdc8172a Mon Sep 17 00:00:00 2001 From: Roba Binyahib Date: Tue, 16 May 2023 15:56:18 -0600 Subject: [PATCH 32/42] Refactor LocalFB writeTiles to use sycl event for GPU --- modules/cpu/fb/LocalFB.cpp | 93 +++++++++++++++++++++++++++++++------ modules/cpu/fb/LocalFB.ispc | 9 ++-- 2 files changed, 83 insertions(+), 19 deletions(-) diff --git a/modules/cpu/fb/LocalFB.cpp b/modules/cpu/fb/LocalFB.cpp index 58b990762..f88972893 100644 --- a/modules/cpu/fb/LocalFB.cpp +++ b/modules/cpu/fb/LocalFB.cpp @@ -14,16 +14,26 @@ #include "fb/LocalFB_ispc.h" #else namespace ispc { -void LocalFrameBuffer_writeTile_RGBA8(void *_fb, const void *_tile); -void LocalFrameBuffer_writeTile_SRGBA(void *_fb, const void *_tile); -void LocalFrameBuffer_writeTile_RGBA32F(void *_fb, const void *_tile); + +SYCL_EXTERNAL void LocalFrameBuffer_writeTile_RGBA8( + void *_fb, const void *_tile); +SYCL_EXTERNAL void LocalFrameBuffer_writeTile_SRGBA( + void *_fb, const void *_tile); +SYCL_EXTERNAL void LocalFrameBuffer_writeTile_RGBA32F( + void *_fb, const void *_tile); + +SYCL_EXTERNAL void LocalFrameBuffer_writeDepthTile(void *_fb, const void *uniform _tile); + +SYCL_EXTERNAL void LocalFrameBuffer_writeAuxTile(void *_fb, const void *_tile, void *aux, const void *_ax, const void *_ay, const void *_az); + +SYCL_EXTERNAL void LocalFrameBuffer_writeIDTile(void *uniform _fb, const void *uniform _tile, uniform uint32 *uniform dst, @@ -216,12 +226,12 @@ void LocalFrameBuffer::clear() taskErrorRegion.clear(); } } - void LocalFrameBuffer::writeTiles(const utility::ArrayView &tiles) { // TODO: The parallel dispatch part of this should be moved into ISPC as an // ISPC launch that calls the individual (currently) exported functions that // we call below in this loop +#ifndef OSPRAY_TARGET_SYCL tasking::parallel_for(tiles.size(), [&](const size_t i) { const Tile *tile = &tiles[i]; if (hasDepthBuffer) { @@ -231,11 +241,7 @@ void LocalFrameBuffer::writeTiles(const utility::ArrayView &tiles) if (hasAlbedoBuffer) { ispc::LocalFrameBuffer_writeAuxTile(getSh(), tile, -#ifndef OSPRAY_TARGET_SYCL (ispc::vec3f *)albedoBuffer->data(), -#else - *albedoBuffer->data(), -#endif tile->ar, tile->ag, tile->ab); @@ -259,31 +265,88 @@ void LocalFrameBuffer::writeTiles(const utility::ArrayView &tiles) if (hasNormalBuffer) { ispc::LocalFrameBuffer_writeAuxTile(getSh(), tile, -#ifndef OSPRAY_TARGET_SYCL (ispc::vec3f *)normalBuffer->data(), -#else - *normalBuffer->data(), -#endif tile->nx, tile->ny, tile->nz); } if (colorBuffer) { switch (getColorBufferFormat()) { - case OSP_FB_RGBA8: + case OSP_FB_RGBA8: { ispc::LocalFrameBuffer_writeTile_RGBA8(getSh(), tile); break; - case OSP_FB_SRGBA: + } + case OSP_FB_SRGBA: { ispc::LocalFrameBuffer_writeTile_SRGBA(getSh(), tile); break; - case OSP_FB_RGBA32F: + } + case OSP_FB_RGBA32F: { ispc::LocalFrameBuffer_writeTile_RGBA32F(getSh(), tile); break; + } default: NOT_IMPLEMENTED; } } }); + +#else + auto *fbSh = getSh(); + const size_t numTasks = tiles.size(); + const Tile *tilesPtr = tiles.data(); + const int colorFormat = getColorBufferFormat(); + vec3f *albedoBufferPtr = fbSh->super.channels & OSP_FB_ALBEDO ? albedoBuffer->data() : nullptr; + vec3f *normalBufferPtr = fbSh->super.channels & OSP_FB_NORMAL ? normalBuffer->data() : nullptr; + + device.getSyclQueue() + .submit([&](sycl::handler &cgh) { + const sycl::nd_range<1> dispatchRange = + device.computeDispatchRange(numTasks, 16); + cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex) { + if (taskIndex.get_global_id(0) < numTasks) { + const Tile *tile = &tilesPtr[taskIndex.get_global_id(0)]; + if (fbSh->super.channels & OSP_FB_DEPTH) { + ispc::LocalFrameBuffer_writeDepthTile(fbSh, tile); + } + if (fbSh->super.channels & OSP_FB_ALBEDO) { + ispc::LocalFrameBuffer_writeAuxTile( + fbSh, tile, albedoBufferPtr, tile->ar, tile->ag, tile->ab); + } + if (fbSh->super.channels & OSP_FB_ID_PRIMITIVE) { + ispc::LocalFrameBuffer_writeIDTile( + fbSh, tile, fbSh->primitiveIDBuffer, tile->pid); + } + if (fbSh->super.channels & OSP_FB_ID_OBJECT) { + ispc::LocalFrameBuffer_writeIDTile( + fbSh, tile, fbSh->objectIDBuffer, tile->gid); + } + if (fbSh->super.channels & OSP_FB_ID_INSTANCE) { + ispc::LocalFrameBuffer_writeIDTile( + fbSh, tile, fbSh->instanceIDBuffer, tile->iid); + } + if (fbSh->super.channels & OSP_FB_NORMAL) { + ispc::LocalFrameBuffer_writeAuxTile( + fbSh, tile, normalBufferPtr, tile->nx, tile->ny, tile->nz); + } + switch (colorFormat) { + case OSP_FB_RGBA8: + ispc::LocalFrameBuffer_writeTile_RGBA8(fbSh, tile); + break; + case OSP_FB_SRGBA: + ispc::LocalFrameBuffer_writeTile_SRGBA(fbSh, tile); + break; + case OSP_FB_RGBA32F: + ispc::LocalFrameBuffer_writeTile_RGBA32F(fbSh, tile); + break; + default: + break; + } + } + }); + }) + .wait_and_throw(); + +#endif } void LocalFrameBuffer::writeTiles(const SparseFrameBuffer *sparseFb) diff --git a/modules/cpu/fb/LocalFB.ispc b/modules/cpu/fb/LocalFB.ispc index 9d437d98b..b210d72d6 100644 --- a/modules/cpu/fb/LocalFB.ispc +++ b/modules/cpu/fb/LocalFB.ispc @@ -15,7 +15,7 @@ OSPRAY_BEGIN_ISPC_NAMESPACE // this buffer _must_ exist when this fct is called, and it // _must_ have format 'name' #define template_writeTile(name, type, cvt) \ - export void LocalFrameBuffer_writeTile_##name( \ +export SYCL_EXTERNAL void LocalFrameBuffer_writeTile_##name( \ void *uniform _fb, const void *uniform _tile) \ { \ Tile *uniform tile = (Tile * uniform) _tile; \ @@ -291,7 +291,7 @@ SYCL_EXTERNAL void LocalFB_completeTask( } // Write the input tile into the row-major framebuffer's depth buffer -export void LocalFrameBuffer_writeDepthTile( +export SYCL_EXTERNAL void LocalFrameBuffer_writeDepthTile( void *uniform _fb, const void *uniform _tile) { uniform LocalFB *uniform fb = (uniform LocalFB * uniform) _fb; @@ -317,7 +317,7 @@ export void LocalFrameBuffer_writeDepthTile( } // "accumulate" first frame only and write into buffer -export void LocalFrameBuffer_writeIDTile(void *uniform _fb, +export SYCL_EXTERNAL void LocalFrameBuffer_writeIDTile(void *uniform _fb, const void *uniform _tile, uniform uint32 *uniform dst, const void *uniform src) @@ -353,7 +353,7 @@ export void LocalFrameBuffer_writeIDTile(void *uniform _fb, } // Write a Tile's auxiliary buffer into the framebuffer's row major storage -export void LocalFrameBuffer_writeAuxTile(void *uniform _fb, +export SYCL_EXTERNAL void LocalFrameBuffer_writeAuxTile(void *uniform _fb, const void *uniform _tile, void *uniform _aux, const void *uniform _ax, @@ -398,4 +398,5 @@ export void *uniform LocalFrameBuffer_completeTask_addr() { return (void *uniform)LocalFB_completeTask; } + OSPRAY_END_ISPC_NAMESPACE From 9e9a20249f18583230102035ffbe97c1b1340939 Mon Sep 17 00:00:00 2001 From: Will Usher Date: Tue, 2 May 2023 16:58:51 -0700 Subject: [PATCH 33/42] Use OpenVKL Specialization Constants - Add VKL spec. consts to AO, SciVis, PathTracer - Change to pass FeatureFlagsHandler through to avoid indir call issues - Move to u64 vkl feature flags - Always pass FeatureFlagsHandler to OSPRay internal functions - Update MPI module to spec. const and GPU localfb - need openvkl gpu-devel in ci for all builds - Unify getFeatureFlags across code, always return full FeatureFlags - mark inline, embree 4.1.0 resolved calla issue - Use ff.geometry to remove unused geometry code for volume-only scenes - Update exported symbols - Add flag to remove volume codepaths for isosurface-only scenes - Also add debug renderer type specialization constant to improve JIT time - Enable more tests, sphere precision does match avx512 --- cmake/compiler/dpcpp.cmake | 2 +- modules/cpu/camera/Camera.h | 8 +- modules/cpu/camera/CameraDispatch.ih | 2 +- modules/cpu/camera/CameraDispatch.ispc | 5 +- modules/cpu/common/FeatureFlags.ih | 37 +++- modules/cpu/common/FeatureFlagsEnum.h | 90 +++++++-- modules/cpu/common/Group.cpp | 6 +- modules/cpu/common/Instance.ih | 5 +- modules/cpu/common/RayQueryContext.ih | 8 +- modules/cpu/common/World.cpp | 14 +- modules/cpu/common/World.ih | 50 ++--- modules/cpu/common/World.ispc | 2 +- modules/cpu/fb/FrameBuffer.h | 8 +- modules/cpu/fb/FrameBufferDispatch.ih | 7 +- modules/cpu/fb/FrameBufferDispatch.ispc | 10 +- modules/cpu/fb/LocalFB.ih | 1 - modules/cpu/fb/LocalFB.ispc | 1 - modules/cpu/geometry/Curves.cpp | 2 +- modules/cpu/geometry/GeometricModel.cpp | 12 +- modules/cpu/geometry/GeometricModel.h | 22 +-- modules/cpu/geometry/GeometricModel.ih | 4 +- modules/cpu/geometry/Geometry.cpp | 3 +- modules/cpu/geometry/Geometry.h | 10 +- modules/cpu/geometry/GeometryDispatch.ih | 7 +- modules/cpu/geometry/GeometryDispatch.ispc | 54 ++++-- modules/cpu/geometry/Isosurfaces.h | 14 ++ modules/cpu/geometry/Isosurfaces.ih | 10 +- modules/cpu/geometry/Isosurfaces.ispc | 42 +++- modules/cpu/geometry/Mesh.cpp | 2 +- modules/cpu/ispc_symbols.txt | 14 +- modules/cpu/lights/AmbientLight.ih | 3 +- modules/cpu/lights/AmbientLight.ispc | 3 +- modules/cpu/lights/CylinderLight.ih | 6 +- modules/cpu/lights/CylinderLight.ispc | 6 +- modules/cpu/lights/DirectionalLight.ih | 6 +- modules/cpu/lights/DirectionalLight.ispc | 6 +- modules/cpu/lights/HDRILight.ih | 6 +- modules/cpu/lights/HDRILight.ispc | 6 +- modules/cpu/lights/Light.h | 8 +- modules/cpu/lights/LightDispatch.ih | 4 +- modules/cpu/lights/LightDispatch.ispc | 27 +-- modules/cpu/lights/LightShared.h | 7 +- modules/cpu/lights/PointLight.ih | 6 +- modules/cpu/lights/PointLight.ispc | 6 +- modules/cpu/lights/QuadLight.ih | 7 +- modules/cpu/lights/QuadLight.ispc | 6 +- modules/cpu/lights/SpotLight.ih | 6 +- modules/cpu/lights/SpotLight.ispc | 6 +- modules/cpu/render/Material.h | 8 +- modules/cpu/render/Material.ih | 9 +- modules/cpu/render/MaterialDispatch.ih | 7 +- modules/cpu/render/MaterialDispatch.ispc | 14 +- modules/cpu/render/RenderTaskSycl.h | 5 + modules/cpu/render/Renderer.cpp | 6 +- modules/cpu/render/Renderer.h | 2 +- modules/cpu/render/Renderer.ih | 4 +- modules/cpu/render/Renderer.ispc | 30 ++- modules/cpu/render/RendererRenderTaskFn.inl | 23 +-- modules/cpu/render/ao/AORenderer.cpp | 14 +- modules/cpu/render/ao/AORenderer.ih | 4 +- modules/cpu/render/ao/AORenderer.ispc | 43 ++-- modules/cpu/render/ao/surfaces.ih | 7 +- modules/cpu/render/ao/surfaces.ispc | 12 +- modules/cpu/render/ao/volumes.ih | 4 +- modules/cpu/render/ao/volumes.ispc | 68 +++++-- modules/cpu/render/bsdfs/BSDF.ih | 12 +- modules/cpu/render/bsdfs/BSDF.ispc | 115 +++++------ modules/cpu/render/bsdfs/Conductor.ih | 4 +- modules/cpu/render/bsdfs/Dielectric.ih | 4 +- modules/cpu/render/bsdfs/DielectricLayer.ih | 12 +- modules/cpu/render/bsdfs/Lambert.ih | 4 +- .../cpu/render/bsdfs/LambertTransmission.ih | 4 +- .../cpu/render/bsdfs/MicrofacetConductor.ih | 4 +- .../cpu/render/bsdfs/MicrofacetDielectric.ih | 10 +- .../render/bsdfs/MicrofacetDielectricLayer.ih | 16 +- .../cpu/render/bsdfs/MicrofacetSheenLayer.ih | 16 +- modules/cpu/render/bsdfs/Minneart.ih | 6 +- modules/cpu/render/bsdfs/MultiBSDF.ih | 12 +- modules/cpu/render/bsdfs/OrenNayar.ih | 6 +- modules/cpu/render/bsdfs/Reflection.ih | 4 +- modules/cpu/render/bsdfs/RobustDielectric.ih | 4 +- .../cpu/render/bsdfs/RobustThinDielectric.ih | 4 +- modules/cpu/render/bsdfs/Scale.ih | 8 +- modules/cpu/render/bsdfs/Specular.ih | 4 +- modules/cpu/render/bsdfs/ThinDielectric.ih | 4 +- .../render/bsdfs/ThinMicrofacetDielectric.ih | 4 +- modules/cpu/render/bsdfs/Transmission.ih | 4 +- modules/cpu/render/bsdfs/Velvety.ih | 6 +- modules/cpu/render/debug/DebugRenderer.cpp | 18 +- modules/cpu/render/debug/DebugRenderer.ih | 4 +- modules/cpu/render/debug/DebugRenderer.ispc | 183 +++++++++++------- .../cpu/render/debug/DebugRendererShared.h | 6 + modules/cpu/render/materials/OBJ.ih | 4 +- modules/cpu/render/materials/OBJ.ispc | 17 +- .../cpu/render/pathtracer/GeometryLight.ih | 4 +- .../cpu/render/pathtracer/GeometryLight.ispc | 8 +- .../render/pathtracer/NextEventEstimation.ih | 2 +- .../pathtracer/NextEventEstimation.ispc | 25 +-- modules/cpu/render/pathtracer/PathSampler.ih | 2 +- .../cpu/render/pathtracer/PathSampler.ispc | 60 +++--- modules/cpu/render/pathtracer/PathTracer.cpp | 19 +- modules/cpu/render/pathtracer/PathTracer.ispc | 29 ++- .../cpu/render/pathtracer/ShadowCatcher.ih | 2 +- .../cpu/render/pathtracer/ShadowCatcher.ispc | 10 +- .../render/pathtracer/TransparentShadow.ih | 3 +- .../render/pathtracer/TransparentShadow.ispc | 18 +- modules/cpu/render/pathtracer/VirtualLight.ih | 2 +- .../cpu/render/pathtracer/VirtualLight.ispc | 13 +- .../pathtracer/volumes/VolumeSampler.ih | 7 +- .../pathtracer/volumes/VolumeSampler.ispc | 75 +++++-- modules/cpu/render/scivis/SciVis.cpp | 17 +- modules/cpu/render/scivis/SciVis.ih | 4 +- modules/cpu/render/scivis/SciVis.ispc | 41 ++-- modules/cpu/render/scivis/lightAlpha.ispc | 21 +- modules/cpu/render/scivis/surfaces.ih | 15 +- modules/cpu/render/scivis/surfaces.ispc | 19 +- modules/cpu/render/scivis/volumes.ih | 3 +- modules/cpu/render/scivis/volumes.ispc | 62 ++++-- modules/cpu/render/util.ih | 3 +- modules/cpu/render/util.ispc | 5 +- modules/cpu/volume/Volume.cpp | 6 +- modules/cpu/volume/Volume.h | 12 +- modules/cpu/volume/Volume.ih | 16 +- modules/cpu/volume/VolumetricModel.h | 18 +- modules/mpi/ospray/CMakeLists.txt | 1 - .../mpi/ospray/fb/DistributedFrameBuffer.cpp | 52 ++++- .../ospray/render/DistributedLoadBalancer.cpp | 22 ++- .../render/distributed/DistributedRaycast.cpp | 17 +- .../render/distributed/DistributedRaycast.ih | 22 --- .../distributed/DistributedRaycast.ispc | 120 ++++++++---- .../distributed/DistributedRenderer.cpp | 22 ++- .../distributed/DistributedRenderer.ispc | 21 +- .../DistributedRendererRenderTaskFn.inl | 15 +- scripts/tests/run_gpu_tests.sh | 57 ++---- 134 files changed, 1343 insertions(+), 863 deletions(-) delete mode 100644 modules/mpi/ospray/render/distributed/DistributedRaycast.ih diff --git a/cmake/compiler/dpcpp.cmake b/cmake/compiler/dpcpp.cmake index 3b894aae0..c1cac0882 100644 --- a/cmake/compiler/dpcpp.cmake +++ b/cmake/compiler/dpcpp.cmake @@ -87,7 +87,7 @@ elseif (CMAKE_BUILD_TYPE MATCHES "Debug") endif() # Large GRF mode -option(OSPRAY_SYCL_LARGEGRF "Enable SYCL Large GRF Support" OFF) +option(OSPRAY_SYCL_LARGEGRF "Enable SYCL Large GRF Support" ON) if (OSPRAY_SYCL_LARGEGRF) list(APPEND OSPRAY_OCL_OPTIONS "-internal_options -cl-intel-256-GRF-per-thread") endif() diff --git a/modules/cpu/camera/Camera.h b/modules/cpu/camera/Camera.h index cdcaa64e4..91ceea08e 100644 --- a/modules/cpu/camera/Camera.h +++ b/modules/cpu/camera/Camera.h @@ -33,7 +33,7 @@ struct OSPRAY_SDK_INTERFACE Camera // Assume no motion blur nor depth of field (true for SciVis) virtual box3f projectBox(const box3f &b) const; - FeatureFlagsOther getFeatureFlagsOther() const; + FeatureFlags getFeatureFlags() const; // Data members // @@ -59,9 +59,11 @@ struct OSPRAY_SDK_INTERFACE Camera OSPTYPEFOR_SPECIALIZATION(Camera *, OSP_CAMERA); -inline FeatureFlagsOther Camera::getFeatureFlagsOther() const +inline FeatureFlags Camera::getFeatureFlags() const { - return featureFlags; + FeatureFlags ff; + ff.other = featureFlags; + return ff; } } // namespace ospray diff --git a/modules/cpu/camera/CameraDispatch.ih b/modules/cpu/camera/CameraDispatch.ih index 30b463100..c91bc12d4 100644 --- a/modules/cpu/camera/CameraDispatch.ih +++ b/modules/cpu/camera/CameraDispatch.ih @@ -10,6 +10,6 @@ OSPRAY_BEGIN_ISPC_NAMESPACE SYCL_EXTERNAL void Camera_dispatch_initRay(const Camera *uniform self, varying Ray &ray, const varying CameraSample &sample, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/camera/CameraDispatch.ispc b/modules/cpu/camera/CameraDispatch.ispc index a0c7f84e7..73857e5da 100644 --- a/modules/cpu/camera/CameraDispatch.ispc +++ b/modules/cpu/camera/CameraDispatch.ispc @@ -6,14 +6,17 @@ #include "OrthographicCamera.ih" #include "PanoramicCamera.ih" #include "PerspectiveCamera.ih" +#include "common/FeatureFlags.ih" +#include "common/FeatureFlagsEnum.h" OSPRAY_BEGIN_ISPC_NAMESPACE SYCL_EXTERNAL void Camera_dispatch_initRay(const Camera *uniform self, varying Ray &ray, const varying CameraSample &sample, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); if ((self->type == CAMERA_TYPE_PERSPECTIVE) && (ffo & FFO_CAMERA_PERSPECTIVE)) { PerspectiveCamera_initRay(self, ray, sample); diff --git a/modules/cpu/common/FeatureFlags.ih b/modules/cpu/common/FeatureFlags.ih index 240328d9f..57f75de84 100644 --- a/modules/cpu/common/FeatureFlags.ih +++ b/modules/cpu/common/FeatureFlags.ih @@ -3,21 +3,56 @@ #pragma once +#ifdef OSPRAY_TARGET_SYCL +#include +#endif + #include "FeatureFlagsEnum.h" OSPRAY_BEGIN_ISPC_NAMESPACE #ifdef OSPRAY_TARGET_SYCL using namespace ospray; + +inline constexpr sycl::specialization_id specFeatureFlags; +#endif + +struct FeatureFlagsHandler +{ +#ifdef OSPRAY_TARGET_SYCL + sycl::kernel_handler &kernel_handler; + + FeatureFlagsHandler(sycl::kernel_handler &kh) : kernel_handler(kh) {} #endif +}; inline uniform FeatureFlags ffAll() { uniform FeatureFlags ff; ff.geometry = FFG_ALL; - ff.volume = FFV_ALL; +#ifdef OSPRAY_ENABLE_VOLUMES + ff.volume = VKL_FEATURE_FLAGS_DEFAULT; +#endif ff.other = FFO_ALL; return ff; } +inline uniform FeatureFlags getFeatureFlags( + const uniform FeatureFlagsHandler &ffh) +{ +#ifdef OSPRAY_TARGET_SYCL + return ffh.kernel_handler.get_specialization_constant(); +#else + return ffAll(); +#endif +} + +// For working around https://github.com/ispc/ispc/issues/2533 +inline uniform FeatureFlagsOther getFeatureFlagsOther( + const uniform FeatureFlagsHandler &ffh) +{ + const uniform FeatureFlags ff = getFeatureFlags(ffh); + return ff.other; +} + OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/common/FeatureFlagsEnum.h b/modules/cpu/common/FeatureFlagsEnum.h index d872506bc..8b1a14a1b 100644 --- a/modules/cpu/common/FeatureFlagsEnum.h +++ b/modules/cpu/common/FeatureFlagsEnum.h @@ -3,6 +3,28 @@ #pragma once +#ifdef OSPRAY_ENABLE_VOLUMES +#ifdef ISPC +#include +#else +#include +#endif + +#if OPENVKL_VERSION_MAJOR == 1 +#ifdef ISPC +typedef uint64 VKLFeatureFlags; +#else +#include +typedef uint64_t VKLFeatureFlags; +#endif + +#define VKL_FEATURE_FLAGS_NONE 0 +#define VKL_FEATURE_FLAGS_DEFAULT -1 + +#endif + +#endif + #ifdef __cplusplus #include #include @@ -64,18 +86,6 @@ enum FeatureFlagsGeometry FFG_ALL = 0xffffffff }; -enum FeatureFlagsVolume -#ifdef __cplusplus - : uint32_t -#endif -{ - FFV_NONE = 0, - - FFV_VOLUME = 1 << 0, - - FFV_ALL = 0xffffffff -}; - enum FeatureFlagsOther #ifdef __cplusplus : uint32_t @@ -115,26 +125,45 @@ enum FeatureFlagsOther FFO_TEXTURE_IN_MATERIAL = 1 << 25, FFO_TEXTURE_IN_RENDERER = 1 << 26, + // We track if there's a volume object in the scene separately from the volume + // feature flags to distinguish between needing the volume rendering/sampling + // code paths or just needing the isosurface traversal code path. + FFO_VOLUME_IN_SCENE = 1 << 27, + FFO_ALL = 0xffffffff }; struct FeatureFlags { FeatureFlagsGeometry geometry; - FeatureFlagsVolume volume; + FeatureFlagsOther other; + +#ifdef OSPRAY_ENABLE_VOLUMES + VKLFeatureFlags volume; +#endif + #ifdef __cplusplus constexpr FeatureFlags() - : geometry(FFG_NONE), volume(FFV_NONE), other(FFO_NONE) + : geometry(FFG_NONE), + other(FFO_NONE) +#ifdef OSPRAY_ENABLE_VOLUMES + , + volume(VKL_FEATURE_FLAGS_NONE) +#endif {} void reset() { geometry = FFG_NONE; - volume = FFV_NONE; other = FFO_NONE; +#ifdef OSPRAY_ENABLE_VOLUMES + volume = VKL_FEATURE_FLAGS_NONE; +#endif } +#endif }; +#ifdef __cplusplus template ::value>::type> inline T operator|(T a, T b) @@ -148,7 +177,30 @@ inline T &operator|=(T &a, T b) { return a = a | b; } -} // namespace ospray -#else -}; -#endif // __cplusplus + +inline FeatureFlags operator|(const FeatureFlags &a, const FeatureFlags &b) +{ + FeatureFlags ff; + ff.geometry = a.geometry | b.geometry; +#ifdef OSPRAY_ENABLE_VOLUMES + ff.volume = a.volume | b.volume; +#endif + ff.other = a.other | b.other; + return ff; +} + +inline FeatureFlags &operator|=(FeatureFlags &a, const FeatureFlags &b) +{ + a.geometry |= b.geometry; +#ifdef OSPRAY_ENABLE_VOLUMES + a.volume |= b.volume; +#endif + a.other |= b.other; + return a; +} +#endif + +#ifdef __cplusplus +} +// namespace ospray +#endif diff --git a/modules/cpu/common/Group.cpp b/modules/cpu/common/Group.cpp index ff5234173..5a1304466 100644 --- a/modules/cpu/common/Group.cpp +++ b/modules/cpu/common/Group.cpp @@ -23,9 +23,7 @@ inline void createEmbreeScene(RTCScene &scene, { for (auto &&obj : objects) { rtcAttachGeometry(scene, obj->embreeGeometryHandle()); - featureFlags.geometry |= obj->getFeatureFlagsGeometry(); - featureFlags.volume |= obj->getFeatureFlagsVolume(); - featureFlags.other |= obj->getFeatureFlagsOther(); + featureFlags |= obj->getFeatureFlags(); } rtcSetSceneFlags(scene, static_cast(embreeFlags)); @@ -191,7 +189,7 @@ void Group::commit() if (numLights > 0) { // Gather light types for (auto &&light : *lights) - featureFlags.other |= light->getFeatureFlagsOther(); + featureFlags |= light->getFeatureFlags(); // Create empty scene for lights-only group, // it is needed to have rtcGeometry created in Instance object diff --git a/modules/cpu/common/Instance.ih b/modules/cpu/common/Instance.ih index 5d1497a38..e62a60060 100644 --- a/modules/cpu/common/Instance.ih +++ b/modules/cpu/common/Instance.ih @@ -11,6 +11,7 @@ // c++ shared #include "GroupShared.h" #include "InstanceShared.h" +#include "common/FeatureFlagsEnum.h" OSPRAY_BEGIN_ISPC_NAMESPACE @@ -42,7 +43,7 @@ inline void Instance_postIntersect(const Instance *uniform self, const varying Ray &ray, uniform int64 flags, const uniform bool clip, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { GeometricModel **uniform models = clip ? self->group->clipModels : self->group->geometricModels; @@ -54,7 +55,7 @@ inline void Instance_postIntersect(const Instance *uniform self, foreach_unique (geomID in ray.geomID) { #endif GeometricModel_postIntersect( - *(models + geomID), renderer, dg, ray, flags, ff); + *(models + geomID), renderer, dg, ray, flags, ffh); } dg.instID = diff --git a/modules/cpu/common/RayQueryContext.ih b/modules/cpu/common/RayQueryContext.ih index c51ec516b..1cfd14b99 100644 --- a/modules/cpu/common/RayQueryContext.ih +++ b/modules/cpu/common/RayQueryContext.ih @@ -4,7 +4,7 @@ #pragma once // ospray -#include "FeatureFlagsEnum.h" +#include "FeatureFlags.ih" // embree #include "Embree.h" @@ -25,15 +25,15 @@ struct RayQueryContextDefault { RTCRayQueryContext ectx; RayQueryContextType type; - FeatureFlagsGeometry ffg; + const FeatureFlagsHandler *uniform ffh; }; inline void initRayQueryContextDefault(RayQueryContextDefault *uniform context, - const uniform FeatureFlagsGeometry ffg) + const uniform FeatureFlagsHandler &ffh) { rtcInitRayQueryContext(&context->ectx); context->type = RQCT_DEFAULT; - context->ffg = ffg; + context->ffh = &ffh; } OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/common/World.cpp b/modules/cpu/common/World.cpp index 313473153..327bff6ae 100644 --- a/modules/cpu/common/World.cpp +++ b/modules/cpu/common/World.cpp @@ -4,6 +4,7 @@ // ospray #include "World.h" #include "Instance.h" +#include "common/FeatureFlagsEnum.h" #include "lights/Light.h" #include "render/pathtracer/PathTracerData.h" #include "render/scivis/SciVisData.h" @@ -138,6 +139,7 @@ void World::commit() } #ifdef OSPRAY_ENABLE_VOLUMES if (inst->group->sceneVolumes) { + featureFlags.other |= FFO_VOLUME_IN_SCENE; addGeometryInstance( esVol, inst->group->sceneVolumes, inst, embreeDevice, id); } @@ -150,17 +152,17 @@ void World::commit() #endif // Gather feature flags from all groups const FeatureFlags &gff = inst->group->getFeatureFlags(); - featureFlags.geometry |= gff.geometry; - featureFlags.volume |= gff.volume; - featureFlags.other |= gff.other; + featureFlags |= gff; id++; } } // Gather light types - if (lights) - for (auto &&light : *lights) - featureFlags.other |= light->getFeatureFlagsOther(); + if (lights) { + for (auto &&light : *lights) { + featureFlags |= light->getFeatureFlags(); + } + } if (esGeom) { rtcSetSceneFlags(esGeom, static_cast(sceneFlags)); diff --git a/modules/cpu/common/World.ih b/modules/cpu/common/World.ih index f769ff7f4..83b5c6e22 100644 --- a/modules/cpu/common/World.ih +++ b/modules/cpu/common/World.ih @@ -8,6 +8,7 @@ #include "common/DGEnum.h" #include "common/DifferentialGeometry.ih" #include "common/FeatureFlags.ih" +#include "common/FeatureFlagsEnum.h" #include "common/Instance.ih" #include "common/Ray.ih" #include "common/RayQueryContext.ih" @@ -27,14 +28,16 @@ struct Renderer; inline void traceGeometryRay(const World *uniform world, varying Ray &ray, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { // Skip if no geometries scene if (!world->embreeSceneHandleGeometries) return; + const uniform FeatureFlags ff = getFeatureFlags(ffh); + uniform RayQueryContextDefault context; - initRayQueryContextDefault(&context, ff.geometry); + initRayQueryContextDefault(&context, ffh); uniform RTCIntersectArguments intersectArgs; rtcInitIntersectArguments(&intersectArgs); @@ -141,11 +144,13 @@ unmasked void clippingIntersectionFilterV( inline void traceClippingRay(const World *uniform world, varying Ray &ray, - varying RayIntervals &rayIntervals) + varying RayIntervals &rayIntervals, + const uniform FeatureFlagsHandler &ffh) { // Clipping disabled on GPU for now #ifdef OSPRAY_TARGET_SYCL (void)world; + (void)ffh; rayIntervals.intervals[0] = make_box1f(ray.t0, ray.t); rayIntervals.count = 1; return; @@ -161,7 +166,7 @@ inline void traceClippingRay(const World *uniform world, RayQueryContextClipping context; rtcInitRayQueryContext(&context.super.ectx); context.super.type = RQCT_CLIPPING; - context.super.ffg = FFG_ALL; + context.super.ffh = &ffh; context.world = world; context.corrClippingDepth = 0; context.hitsCount = 0; @@ -242,7 +247,7 @@ inline void traceClippingRay(const World *uniform world, inline void traceGeometryRayIntervals(const World *uniform world, Ray &ray, RayIntervals &rayIntervals, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { // Save the ray const float t0 = ray.t0; @@ -266,7 +271,7 @@ inline void traceGeometryRayIntervals(const World *uniform world, ray.t = min(ray.t, t); // Shoot the ray - traceGeometryRay(world, ray, ff); + traceGeometryRay(world, ray, ffh); // Exit loop if geometry hit if (hadHit(ray)) { @@ -282,37 +287,38 @@ inline void traceGeometryRayIntervals(const World *uniform world, inline void traceRay(const World *uniform world, varying Ray &ray, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { #ifdef OSPRAY_TARGET_SYCL // Clipping disabled for now - traceGeometryRay(world, ray, ff); + traceGeometryRay(world, ray, ffh); #else // Fast path if no clipping geometry if (!world->embreeSceneHandleClippers) { - traceGeometryRay(world, ray, ff); + traceGeometryRay(world, ray, ffh); return; } // Trace ray in clipping geometries scene, fill array with ray intervals varying RayIntervals rayIntervals; - traceClippingRay(world, ray, rayIntervals); + traceClippingRay(world, ray, rayIntervals, ffh); // Trace ray intervals - traceGeometryRayIntervals(world, ray, rayIntervals, ff); + traceGeometryRayIntervals(world, ray, rayIntervals, ffh); #endif } inline bool isOccludedNoClipping(const World *uniform world, varying Ray &ray, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { // Skip if no geometries scene if (!world->embreeSceneHandleGeometries) return false; + const uniform FeatureFlags ff = getFeatureFlags(ffh); uniform RayQueryContextDefault context; - initRayQueryContextDefault(&context, ff.geometry); + initRayQueryContextDefault(&context, ffh); uniform RTCOccludedArguments occludedArgs; rtcInitOccludedArguments(&occludedArgs); @@ -331,7 +337,7 @@ inline bool isOccludedNoClipping(const World *uniform world, inline bool areIntervalsOccluded(const World *uniform world, varying Ray &ray, varying RayIntervals &rayIntervals, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { // Iterate through ray intervals for (uint32 i = 0; i < rayIntervals.count; i++) { @@ -340,7 +346,7 @@ inline bool areIntervalsOccluded(const World *uniform world, ray.t = rayIntervals.intervals[i].upper; // Check for occluders - if (isOccludedNoClipping(world, ray, ff)) + if (isOccludedNoClipping(world, ray, ffh)) return true; } @@ -350,15 +356,15 @@ inline bool areIntervalsOccluded(const World *uniform world, inline bool isOccluded(const World *uniform world, varying Ray &ray, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { #ifdef OSPRAY_TARGET_SYCL // Clipping disabled for now - return isOccludedNoClipping(world, ray, ff); + return isOccludedNoClipping(world, ray, ffh); #else // Fast path if no clipping geometry if (!world->embreeSceneHandleClippers) { - return isOccludedNoClipping(world, ray, ff); + return isOccludedNoClipping(world, ray, ffh); } // Allocate array for ray intervals @@ -366,10 +372,10 @@ inline bool isOccluded(const World *uniform world, rayIntervals.count = 0; // Trace ray in clipping geometries scene, fill array with ray intervals - traceClippingRay(world, ray, rayIntervals); + traceClippingRay(world, ray, rayIntervals, ffh); // Is there any occluder within given ray intervals - return areIntervalsOccluded(world, ray, rayIntervals, ff); + return areIntervalsOccluded(world, ray, rayIntervals, ffh); #endif } @@ -385,7 +391,7 @@ inline void postIntersect(const World *uniform world, varying DifferentialGeometry &dg, const varying Ray &ray, uniform int64 flags, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { dg.primID = ray.primID; dg.st = make_vec2f(ray.u, ray.v); @@ -413,7 +419,7 @@ inline void postIntersect(const World *uniform world, #endif if (instID != RTC_INVALID_GEOMETRY_ID) { Instance *uniform instance = *(world->instances + instID); - Instance_postIntersect(instance, renderer, dg, ray, flags, false, ff); + Instance_postIntersect(instance, renderer, dg, ray, flags, false, ffh); } else { dg.Ns = dg.Ng = ray.Ng; } diff --git a/modules/cpu/common/World.ispc b/modules/cpu/common/World.ispc index 1ff246f73..a4f098a06 100644 --- a/modules/cpu/common/World.ispc +++ b/modules/cpu/common/World.ispc @@ -53,7 +53,7 @@ unmasked void clippingIntersectionFilterV( // Call postIntersect to get shading normal Instance *uniform instance = *(context->world->instances + instID); Instance_postIntersect( - instance, NULL, dg, *ray, DG_NG | DG_NS, true, ffAll()); + instance, NULL, dg, *ray, DG_NG | DG_NS, true, *context->super.ffh); // Use geometry normal for clipping // but use shading normal to check if invertion is needed diff --git a/modules/cpu/fb/FrameBuffer.h b/modules/cpu/fb/FrameBuffer.h index 2b3b701fe..0f4281e3d 100644 --- a/modules/cpu/fb/FrameBuffer.h +++ b/modules/cpu/fb/FrameBuffer.h @@ -99,7 +99,7 @@ struct OSPRAY_SDK_INTERFACE FrameBuffer int32 getFrameID() const; - FeatureFlagsOther getFeatureFlagsOther() const; + FeatureFlags getFeatureFlags() const; protected: // Fill vectors with instantiated live objects @@ -144,9 +144,11 @@ inline int32_t FrameBuffer::getFrameID() const return frameID; } -inline FeatureFlagsOther FrameBuffer::getFeatureFlagsOther() const +inline FeatureFlags FrameBuffer::getFeatureFlags() const { - return featureFlags; + FeatureFlags ff; + ff.other = featureFlags; + return ff; } } // namespace ospray diff --git a/modules/cpu/fb/FrameBufferDispatch.ih b/modules/cpu/fb/FrameBufferDispatch.ih index 6c33e67e8..90a5a3d04 100644 --- a/modules/cpu/fb/FrameBufferDispatch.ih +++ b/modules/cpu/fb/FrameBufferDispatch.ih @@ -4,6 +4,7 @@ #pragma once #include "common/FeatureFlags.ih" +#include "common/FeatureFlagsEnum.h" #include "common/OSPCommon.ih" OSPRAY_BEGIN_ISPC_NAMESPACE @@ -15,16 +16,16 @@ struct ScreenSample; SYCL_EXTERNAL uniform RenderTaskDesc FrameBuffer_dispatch_getRenderTaskDesc( FrameBuffer *uniform fb, const uniform uint32 taskID, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); SYCL_EXTERNAL void FrameBuffer_dispatch_accumulateSample( FrameBuffer *uniform fb, const varying ScreenSample &screenSample, uniform RenderTaskDesc &taskDesc, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); SYCL_EXTERNAL void FrameBuffer_dispatch_completeTask(FrameBuffer *uniform fb, const uniform RenderTaskDesc &taskDesc, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/fb/FrameBufferDispatch.ispc b/modules/cpu/fb/FrameBufferDispatch.ispc index 3edb14b21..47504f8be 100644 --- a/modules/cpu/fb/FrameBufferDispatch.ispc +++ b/modules/cpu/fb/FrameBufferDispatch.ispc @@ -1,6 +1,7 @@ // Copyright 2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +#include "common/FeatureFlagsEnum.h" #include "fb/FrameBufferDispatch.ih" #include "fb/LocalFB.ih" #include "fb/RenderTaskDesc.ih" @@ -14,8 +15,9 @@ OSPRAY_BEGIN_ISPC_NAMESPACE SYCL_EXTERNAL uniform RenderTaskDesc FrameBuffer_dispatch_getRenderTaskDesc( FrameBuffer *uniform fb, const uniform uint32 taskID, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); if ((fb->type == FRAMEBUFFER_TYPE_LOCAL) && (ffo & FFO_FB_LOCAL)) { return LocalFB_getRenderTaskDesc(fb, taskID); } else if ((fb->type == FRAMEBUFFER_TYPE_SPARSE) && (ffo & FFO_FB_SPARSE)) { @@ -36,8 +38,9 @@ SYCL_EXTERNAL void FrameBuffer_dispatch_accumulateSample( FrameBuffer *uniform fb, const varying ScreenSample &screenSample, uniform RenderTaskDesc &taskDesc, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); if ((fb->type == FRAMEBUFFER_TYPE_LOCAL) && (ffo & FFO_FB_LOCAL)) { LocalFB_accumulateSample(fb, screenSample, taskDesc); } else if ((fb->type == FRAMEBUFFER_TYPE_SPARSE) && (ffo & FFO_FB_SPARSE)) { @@ -51,8 +54,9 @@ SYCL_EXTERNAL void FrameBuffer_dispatch_accumulateSample( SYCL_EXTERNAL void FrameBuffer_dispatch_completeTask(FrameBuffer *uniform fb, const uniform RenderTaskDesc &taskDesc, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); if ((fb->type == FRAMEBUFFER_TYPE_LOCAL) && (ffo & FFO_FB_LOCAL)) { LocalFB_completeTask(fb, taskDesc); } else if ((fb->type == FRAMEBUFFER_TYPE_SPARSE) && (ffo & FFO_FB_SPARSE)) { diff --git a/modules/cpu/fb/LocalFB.ih b/modules/cpu/fb/LocalFB.ih index e7b80c773..f7f5fefa3 100644 --- a/modules/cpu/fb/LocalFB.ih +++ b/modules/cpu/fb/LocalFB.ih @@ -1,7 +1,6 @@ // Copyright 2009-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 -#include "common/FeatureFlags.ih" #include "fb/FrameBuffer.ih" #include "fb/Tile.ih" // c++ shared diff --git a/modules/cpu/fb/LocalFB.ispc b/modules/cpu/fb/LocalFB.ispc index b210d72d6..e59d2c512 100644 --- a/modules/cpu/fb/LocalFB.ispc +++ b/modules/cpu/fb/LocalFB.ispc @@ -4,7 +4,6 @@ #include "FrameBuffer.ih" #include "RenderTaskDesc.ih" #include "Tile.ih" -#include "common/FeatureFlags.ih" #include "render/ScreenSample.ih" // c++ shared #include "LocalFBShared.h" diff --git a/modules/cpu/geometry/Curves.cpp b/modules/cpu/geometry/Curves.cpp index 5ba7f6a04..686ed600a 100644 --- a/modules/cpu/geometry/Curves.cpp +++ b/modules/cpu/geometry/Curves.cpp @@ -171,7 +171,7 @@ void Curves::commit() } postCreationInfo(vertexData->size()); - featureFlags = curveFeatureFlags[embreeCurveType]; + featureFlagsGeometry = curveFeatureFlags[embreeCurveType]; } size_t Curves::numPrimitives() const diff --git a/modules/cpu/geometry/GeometricModel.cpp b/modules/cpu/geometry/GeometricModel.cpp index 169d21734..2494b71a0 100644 --- a/modules/cpu/geometry/GeometricModel.cpp +++ b/modules/cpu/geometry/GeometricModel.cpp @@ -32,10 +32,10 @@ void GeometricModel::commit() getSh()->material = nullptr; getSh()->materialID = nullptr; getSh()->numMaterials = 0; - featureFlags = FFO_NONE; + featureFlagsOther = FFO_NONE; if (materialData) { for (auto &&mat : materialData->as()) - featureFlags |= mat->getFeatureFlagsOther(); + featureFlagsOther |= mat->getFeatureFlags().other; materialArray = make_buffer_shared_unique( getISPCDevice().getIspcrtContext(), @@ -45,10 +45,10 @@ void GeometricModel::commit() } else { materialData = getParamDataT("material", false, true); if (materialData) { - materialIDArray = - make_buffer_shared_unique(getISPCDevice().getIspcrtContext(), - materialData->as().data(), - materialData->size()); + materialIDArray = make_buffer_shared_unique( + getISPCDevice().getIspcrtContext(), + materialData->as().data(), + materialData->size()); getSh()->materialID = materialIDArray->sharedPtr(); getSh()->numMaterials = materialIDArray->size(); } diff --git a/modules/cpu/geometry/GeometricModel.h b/modules/cpu/geometry/GeometricModel.h index 377609aa1..853c82abf 100644 --- a/modules/cpu/geometry/GeometricModel.h +++ b/modules/cpu/geometry/GeometricModel.h @@ -34,9 +34,7 @@ struct OSPRAY_SDK_INTERFACE GeometricModel bool hasEmissiveMaterials( Ref> rendererMaterials) const; - FeatureFlagsGeometry getFeatureFlagsGeometry() const; - FeatureFlagsVolume getFeatureFlagsVolume() const; - FeatureFlagsOther getFeatureFlagsOther() const; + FeatureFlags getFeatureFlags() const; private: Ref geom; @@ -47,7 +45,7 @@ struct OSPRAY_SDK_INTERFACE GeometricModel std::unique_ptr> materialArray; std::unique_ptr> materialIDArray; - FeatureFlagsOther featureFlags{FFO_NONE}; + FeatureFlagsOther featureFlagsOther{FFO_NONE}; }; OSPTYPEFOR_SPECIALIZATION(GeometricModel *, OSP_GEOMETRIC_MODEL); @@ -69,19 +67,11 @@ inline bool GeometricModel::invertedNormals() const return getSh()->invertedNormals; } -inline FeatureFlagsGeometry GeometricModel::getFeatureFlagsGeometry() const +inline FeatureFlags GeometricModel::getFeatureFlags() const { - return geom->getFeatureFlagsGeometry(); -} - -inline FeatureFlagsVolume GeometricModel::getFeatureFlagsVolume() const -{ - return FFV_NONE; -} - -inline FeatureFlagsOther GeometricModel::getFeatureFlagsOther() const -{ - return featureFlags; + FeatureFlags ff = geom->getFeatureFlags(); + ff.other |= featureFlagsOther; + return ff; } } // namespace ospray diff --git a/modules/cpu/geometry/GeometricModel.ih b/modules/cpu/geometry/GeometricModel.ih index 7606a2ce0..385a4c3ad 100644 --- a/modules/cpu/geometry/GeometricModel.ih +++ b/modules/cpu/geometry/GeometricModel.ih @@ -49,11 +49,11 @@ inline void GeometricModel_postIntersect(const GeometricModel *uniform self, varying DifferentialGeometry &dg, const varying Ray &ray, uniform int64 flags, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { Geometry *uniform geom = self->geom; - Geometry_dispatch_postIntersect(geom, dg, ray, flags, ff.geometry); + Geometry_dispatch_postIntersect(geom, dg, ray, flags, ffh); dg.areaPDF = self->areaPDF; dg.objID = diff --git a/modules/cpu/geometry/Geometry.cpp b/modules/cpu/geometry/Geometry.cpp index 0c91d7faa..d9f5b34ea 100644 --- a/modules/cpu/geometry/Geometry.cpp +++ b/modules/cpu/geometry/Geometry.cpp @@ -13,7 +13,8 @@ namespace ospray { // Geometry definitions /////////////////////////////////////////////////////// Geometry::Geometry(api::ISPCDevice &device, const FeatureFlagsGeometry ffg) - : AddStructShared(device.getIspcrtContext(), device), featureFlags(ffg) + : AddStructShared(device.getIspcrtContext(), device), + featureFlagsGeometry(ffg) { managedObjectType = OSP_GEOMETRY; } diff --git a/modules/cpu/geometry/Geometry.h b/modules/cpu/geometry/Geometry.h index 914eb0d1e..9b773c14c 100644 --- a/modules/cpu/geometry/Geometry.h +++ b/modules/cpu/geometry/Geometry.h @@ -30,12 +30,12 @@ struct OSPRAY_SDK_INTERFACE Geometry bool supportAreaLighting() const; - FeatureFlagsGeometry getFeatureFlagsGeometry() const; + virtual FeatureFlags getFeatureFlags() const; protected: RTCGeometry embreeGeometry{nullptr}; - FeatureFlagsGeometry featureFlags; + FeatureFlagsGeometry featureFlagsGeometry; void createEmbreeGeometry(RTCGeometryType type); // NOTE: We now pass intersection functions through Embree RTCIntersectionArgs @@ -55,9 +55,11 @@ inline bool Geometry::supportAreaLighting() const return (getSh()->sampleArea != nullptr) && (getSh()->getAreas != nullptr); } -inline FeatureFlagsGeometry Geometry::getFeatureFlagsGeometry() const +inline FeatureFlags Geometry::getFeatureFlags() const { - return featureFlags; + FeatureFlags ff; + ff.geometry = featureFlagsGeometry; + return ff; } // convenience wrappers to set Embree buffer ////////////////////////////////// diff --git a/modules/cpu/geometry/GeometryDispatch.ih b/modules/cpu/geometry/GeometryDispatch.ih index 0e56b62be..3c51cc9a8 100644 --- a/modules/cpu/geometry/GeometryDispatch.ih +++ b/modules/cpu/geometry/GeometryDispatch.ih @@ -4,6 +4,7 @@ #pragma once #include "common/FeatureFlags.ih" +#include "common/FeatureFlagsEnum.h" OSPRAY_BEGIN_ISPC_NAMESPACE @@ -22,7 +23,7 @@ SYCL_EXTERNAL void Geometry_dispatch_postIntersect(const Geometry *uniform self, varying DifferentialGeometry &dg, const varying Ray &ray, uniform int64 flags, - const uniform FeatureFlagsGeometry ffg); + const uniform FeatureFlagsHandler &ffh); // sample the given primitive uniformly wrt. area SYCL_EXTERNAL SampleAreaRes Geometry_dispatch_sampleArea( @@ -31,8 +32,8 @@ SYCL_EXTERNAL SampleAreaRes Geometry_dispatch_sampleArea( const uniform affine3f &xfm, // instance transformation (obj2world) const uniform affine3f &rcp_xfm, // inverse transformation (world2obj) const vec2f &s, // random numbers to generate the sample - const float time // for deformation motion blur -); + const float time, // for deformation motion blur + const uniform FeatureFlagsHandler &ffh); RTC_SYCL_INDIRECTLY_CALLABLE unmasked void Geometry_dispatch_intersect( RTCIntersectFunctionNArguments *uniform args); diff --git a/modules/cpu/geometry/GeometryDispatch.ispc b/modules/cpu/geometry/GeometryDispatch.ispc index 0e3f18060..0bb5daaa7 100644 --- a/modules/cpu/geometry/GeometryDispatch.ispc +++ b/modules/cpu/geometry/GeometryDispatch.ispc @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 #include "common/DifferentialGeometry.ih" +#include "common/FeatureFlagsEnum.h" #include "common/Ray.ih" #include "common/RayQueryContext.ih" #include "geometry/Boxes.ih" @@ -23,8 +24,12 @@ SYCL_EXTERNAL void Geometry_dispatch_postIntersect(const Geometry *uniform self, varying DifferentialGeometry &dg, const varying Ray &ray, uniform int64 flags, - const uniform FeatureFlagsGeometry ffg) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlags ff = getFeatureFlags(ffh); + // See https://github.com/ispc/ispc/issues/2533 + const uniform FeatureFlagsGeometry ffg = ff.geometry; + if ((self->type == GEOMETRY_TYPE_QUAD_MESH) && (ffg & FFG_QUAD)) { QuadMesh_postIntersect(self, dg, ray, flags); } else if ((self->type == GEOMETRY_TYPE_TRIANGLE_MESH) @@ -41,7 +46,7 @@ SYCL_EXTERNAL void Geometry_dispatch_postIntersect(const Geometry *uniform self, #ifdef OSPRAY_ENABLE_VOLUMES } else if ((self->type == GEOMETRY_TYPE_ISOSURFACES) && (ffg & FFG_ISOSURFACE)) { - Isosurfaces_postIntersect(self, dg, ray, flags); + Isosurfaces_postIntersect(self, dg, ray, flags, ffh); #endif #ifndef OSPRAY_TARGET_SYCL } else if ((self->type == GEOMETRY_TYPE_SUBDIVISION) @@ -62,27 +67,31 @@ SYCL_EXTERNAL SampleAreaRes Geometry_dispatch_sampleArea( const uniform affine3f &xfm, const uniform affine3f &rcp_xfm, const vec2f &s, - const float time) + const float time, + const uniform FeatureFlagsHandler &ffh) { - switch (self->type) { - case GEOMETRY_TYPE_QUAD_MESH: - case GEOMETRY_TYPE_TRIANGLE_MESH: + const uniform FeatureFlags ff = getFeatureFlags(ffh); + // See https://github.com/ispc/ispc/issues/2533 + const uniform FeatureFlagsGeometry ffg = ff.geometry; + + if (((self->type == GEOMETRY_TYPE_QUAD_MESH) && (ffg & FFG_QUAD)) + || ((self->type == GEOMETRY_TYPE_TRIANGLE_MESH) + && (ffg & FFG_TRIANGLE))) { return Mesh_sampleArea(self, primID, xfm, rcp_xfm, s, time); - case GEOMETRY_TYPE_SPHERES: + } else if ((self->type == GEOMETRY_TYPE_SPHERES) && (ffg & FFG_SPHERE)) { return Spheres_sampleArea(self, primID, xfm, rcp_xfm, s, time); - default: + } else { #ifndef OSPRAY_TARGET_SYCL return self->sampleArea(self, primID, xfm, rcp_xfm, s, time); #endif - break; - } #ifdef OSPRAY_TARGET_SYCL - SampleAreaRes res; - res.pos = make_vec3f(0.f); - res.normal = make_vec3f(0.f); - return res; + SampleAreaRes res; + res.pos = make_vec3f(0.f); + res.normal = make_vec3f(0.f); + return res; #endif + } } RTC_SYCL_INDIRECTLY_CALLABLE unmasked void Geometry_dispatch_intersect( @@ -90,7 +99,12 @@ RTC_SYCL_INDIRECTLY_CALLABLE unmasked void Geometry_dispatch_intersect( { RayQueryContextDefault *uniform ctx = (RayQueryContextDefault * uniform) args->context; - const uniform FeatureFlagsGeometry ffg = ctx->ffg; + + const uniform FeatureFlagsHandler &ffh = *ctx->ffh; + const uniform FeatureFlags ff = getFeatureFlags(ffh); + // See https://github.com/ispc/ispc/issues/2533 + const uniform FeatureFlagsGeometry ffg = ff.geometry; + Geometry *uniform geom = (Geometry * uniform) args->geometryUserPtr; if ((geom->type == GEOMETRY_TYPE_BOXES) && (ffg & FFG_BOX)) { Boxes_intersect_kernel(args, false); @@ -99,7 +113,7 @@ RTC_SYCL_INDIRECTLY_CALLABLE unmasked void Geometry_dispatch_intersect( #ifdef OSPRAY_ENABLE_VOLUMES } else if ((geom->type == GEOMETRY_TYPE_ISOSURFACES) && (ffg & FFG_ISOSURFACE)) { - Isosurfaces_intersect_kernel(args, false); + Isosurfaces_intersect_kernel(args, false, ffh); #endif } else { #ifndef OSPRAY_TARGET_SYCL @@ -113,7 +127,11 @@ RTC_SYCL_INDIRECTLY_CALLABLE unmasked void Geometry_dispatch_occluded( { RayQueryContextDefault *uniform ctx = (RayQueryContextDefault * uniform) args->context; - const uniform FeatureFlagsGeometry ffg = ctx->ffg; + const uniform FeatureFlagsHandler &ffh = *ctx->ffh; + const uniform FeatureFlags ff = getFeatureFlags(ffh); + // See https://github.com/ispc/ispc/issues/2533 + const uniform FeatureFlagsGeometry ffg = ff.geometry; + Geometry *uniform geom = (Geometry * uniform) args->geometryUserPtr; if ((geom->type == GEOMETRY_TYPE_BOXES) && (ffg & FFG_BOX)) { Boxes_intersect_kernel( @@ -125,7 +143,7 @@ RTC_SYCL_INDIRECTLY_CALLABLE unmasked void Geometry_dispatch_occluded( } else if ((geom->type == GEOMETRY_TYPE_ISOSURFACES) && (ffg & FFG_ISOSURFACE)) { Isosurfaces_intersect_kernel( - (RTCIntersectFunctionNArguments * uniform) args, true); + (RTCIntersectFunctionNArguments * uniform) args, true, ffh); #endif } else { #ifndef OSPRAY_TARGET_SYCL diff --git a/modules/cpu/geometry/Isosurfaces.h b/modules/cpu/geometry/Isosurfaces.h index c36f2f2e4..57688daf2 100644 --- a/modules/cpu/geometry/Isosurfaces.h +++ b/modules/cpu/geometry/Isosurfaces.h @@ -1,5 +1,6 @@ // Copyright 2009 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +#include "common/FeatureFlagsEnum.h" #ifdef OSPRAY_ENABLE_VOLUMES #pragma once @@ -23,6 +24,8 @@ struct OSPRAY_SDK_INTERFACE Isosurfaces virtual size_t numPrimitives() const override; + FeatureFlags getFeatureFlags() const override; + protected: // Data members // @@ -34,6 +37,17 @@ struct OSPRAY_SDK_INTERFACE Isosurfaces VKLHitIteratorContext vklHitContext = VKLHitIteratorContext(); }; +inline FeatureFlags Isosurfaces::getFeatureFlags() const +{ + FeatureFlags ff = Geometry::getFeatureFlags(); + if (model) { + ff |= model->getFeatureFlags(); + } else { + ff |= volume->getFeatureFlags(); + } + return ff; +} + } // namespace ospray #endif diff --git a/modules/cpu/geometry/Isosurfaces.ih b/modules/cpu/geometry/Isosurfaces.ih index cd412a898..694b42111 100644 --- a/modules/cpu/geometry/Isosurfaces.ih +++ b/modules/cpu/geometry/Isosurfaces.ih @@ -2,22 +2,26 @@ // SPDX-License-Identifier: Apache-2.0 #ifdef OSPRAY_ENABLE_VOLUMES +#include "common/FeatureFlags.ih" #include "geometry/Geometry.ih" OSPRAY_BEGIN_ISPC_NAMESPACE +#ifdef OSPRAY_TARGET_SYCL void Isosurfaces_bounds(const RTCBoundsFunctionArguments *uniform args); +#endif SYCL_EXTERNAL void Isosurfaces_postIntersect(const Geometry *uniform geometry, varying DifferentialGeometry &dg, const varying Ray &ray, - uniform int64 flags); + uniform int64 flags, + const uniform FeatureFlagsHandler &ffh); SYCL_EXTERNAL void unmasked Isosurfaces_intersect_kernel( const RTCIntersectFunctionNArguments *uniform args, - const uniform bool isOcclusionTest); + const uniform bool isOcclusionTest, + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE #endif - diff --git a/modules/cpu/geometry/Isosurfaces.ispc b/modules/cpu/geometry/Isosurfaces.ispc index beb456671..f54377a33 100644 --- a/modules/cpu/geometry/Isosurfaces.ispc +++ b/modules/cpu/geometry/Isosurfaces.ispc @@ -1,5 +1,6 @@ // Copyright 2009 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +#include "common/FeatureFlagsEnum.h" #ifdef OSPRAY_ENABLE_VOLUMES // ospray @@ -49,13 +50,15 @@ export void Isosurfaces_bounds(const RTCBoundsFunctionArguments *uniform args) SYCL_EXTERNAL void Isosurfaces_postIntersect(const Geometry *uniform geometry, varying DifferentialGeometry &dg, const varying Ray &ray, - uniform int64 flags) + uniform int64 flags, + const uniform FeatureFlagsHandler &ffh) { Isosurfaces *uniform self = (Isosurfaces * uniform) geometry; Volume *uniform volume = Isosurfaces_getVolume(self); if (flags & (DG_NS | DG_NG)) - dg.Ng = dg.Ns = Volume_getGradient(volume, ray.Ng /* actually local hit */); + dg.Ng = dg.Ns = + Volume_getGradient(volume, ray.Ng /* actually local hit */, ffh); // convert ray-space epsilon (in ray.u) to object-/world-space using max(dir) // instead of costly length; the error is at most sqrt(3)~1.7, quite @@ -70,13 +73,16 @@ SYCL_EXTERNAL void Isosurfaces_postIntersect(const Geometry *uniform geometry, SYCL_EXTERNAL void unmasked Isosurfaces_intersect_kernel( const RTCIntersectFunctionNArguments *uniform args, - const uniform bool isOcclusionTest) + const uniform bool isOcclusionTest, + const uniform FeatureFlagsHandler &ffh) { // make sure to set the mask if (!args->valid[programIndex]) { return; } + const uniform FeatureFlags ff = getFeatureFlags(ffh); + args->valid[programIndex] = 0; Isosurfaces *uniform self = (Isosurfaces * uniform) args->geometryUserPtr; @@ -92,12 +98,17 @@ SYCL_EXTERNAL void unmasked Isosurfaces_intersect_kernel( // do not use alloca: it prevents inlining and thus optimization // wrt. "template arg" isOcclusionTest, alloca also not supported on GPU uniform uint8 hitIteratorBuffer[VKL_MAX_HIT_ITERATOR_SIZE]; - iterator = vklInitHitIteratorV( + #if OPENVKL_VERSION_MAJOR == 1 - self->vklHitContext, + // We know if we have OpenVKL version 1 we're not targetting SYCL + iterator = vklInitHitIteratorV(self->vklHitContext, + (varying vkl_vec3f *)&ray->org, + (varying vkl_vec3f *)&ray->dir, + &tRange, + &time, + hitIteratorBuffer); #else - &self->vklHitContext, -#endif + iterator = vklInitHitIteratorV(&self->vklHitContext, (varying vkl_vec3f *)&ray->org, (varying vkl_vec3f *)&ray->dir, &tRange, @@ -106,9 +117,22 @@ SYCL_EXTERNAL void unmasked Isosurfaces_intersect_kernel( #else &time, #endif - hitIteratorBuffer); + hitIteratorBuffer +#ifdef OSPRAY_TARGET_SYCL + , + ff.volume +#endif + ); +#endif + VKLHit hit; - bool gotHit = vklIterateHitV(iterator, &hit); + bool gotHit = vklIterateHitV(iterator, + &hit +#ifdef OSPRAY_TARGET_SYCL + , + ff.volume +#endif + ); if (!gotHit) return; diff --git a/modules/cpu/geometry/Mesh.cpp b/modules/cpu/geometry/Mesh.cpp index 912631566..3edca819d 100644 --- a/modules/cpu/geometry/Mesh.cpp +++ b/modules/cpu/geometry/Mesh.cpp @@ -170,7 +170,7 @@ void Mesh::commit() getSh()->flagMask &= ispc::int64(~DG_TEXCOORD); postCreationInfo(vertexData->size()); - featureFlags = isTri ? FFG_TRIANGLE : FFG_QUAD; + featureFlagsGeometry = isTri ? FFG_TRIANGLE : FFG_QUAD; } size_t Mesh::numPrimitives() const diff --git a/modules/cpu/ispc_symbols.txt b/modules/cpu/ispc_symbols.txt index a6a36da09..c7ab4dab6 100644 --- a/modules/cpu/ispc_symbols.txt +++ b/modules/cpu/ispc_symbols.txt @@ -2,18 +2,18 @@ Distribution2D_pdf___un_3C_s_5B__c_unDistribution2D_5D__3E_REFs_5B__c_vyvec2f_5D Distribution2D_sample___un_3C_s_5B__c_unDistribution2D_5D__3E_REFs_5B__c_vyvec2f_5D__, FrameBuffer_runPixelOps___un_3C_s_5B_unFrameBuffer_5D__3E_REFs_5B__c_vyvec4f_5D_CvyfREFs_5B__c_vyvec3f_5D_REFs_5B__c_vyvec3f_5D__, Light_eval___un_3C_s_5B__c_unLight_5D__3E_REFs_5B__c_vyDifferentialGeometry_5D_REFs_5B__c_vyvec3f_5D_CvyfCvyfCvyf_, -Renderer_getBackground___un_3C_s_5B__c_unRenderer_5D__3E_REFs_5B__c_vyvec2f_5D_Cunenum_5B_FeatureFlagsOther_5D__, -Renderer_getMaxDepth___un_3C_s_5B__c_unRenderer_5D__3E_REFs_5B__c_vyvec2f_5D_Cunenum_5B_FeatureFlagsOther_5D__, +Renderer_getBackground___un_3C_s_5B__c_unRenderer_5D__3E_REFs_5B__c_vyvec2f_5D_REFs_5B__c_unFeatureFlagsHandler_5D__, +Renderer_getMaxDepth___un_3C_s_5B__c_unRenderer_5D__3E_REFs_5B__c_vyvec2f_5D_REFs_5B__c_unFeatureFlagsHandler_5D__, clippingIntersectionFilterV___UM_un_3C_s_5B__c_unRTCFilterFunctionNArguments_5D__3E__, get_zorder____, Geometry_dispatch_intersect___UM_un_3C_s_5B_unRTCIntersectFunctionNArguments_5D__3E__, Geometry_dispatch_occluded___UM_un_3C_s_5B_unRTCOccludedFunctionNArguments_5D__3E__, -Geometry_dispatch_postIntersect___un_3C_s_5B__c_unGeometry_5D__3E_REFs_5B_vyDifferentialGeometry_5D_REFs_5B__c_vyRay_5D_unICunenum_5B_FeatureFlagsGeometry_5D__, +Geometry_dispatch_postIntersect___un_3C_s_5B__c_unGeometry_5D__3E_REFs_5B_vyDifferentialGeometry_5D_REFs_5B__c_vyRay_5D_unIREFs_5B__c_unFeatureFlagsHandler_5D__, Texture_dispatch_get___un_3C_s_5B__c_unTexture_5D__3E_REFs_5B__c_vyDifferentialGeometry_5D__, Volume_intersect_kernel___UM_un_3C_s_5B_unRTCIntersectFunctionNArguments_5D__3E__, -FrameBuffer_dispatch_getRenderTaskDesc___un_3C_s_5B_unFrameBuffer_5D__3E_CunuCunenum_5B_FeatureFlagsOther_5D__, -FrameBuffer_dispatch_accumulateSample___un_3C_s_5B_unFrameBuffer_5D__3E_REFs_5B__c_vyScreenSample_5D_REFs_5B_unRenderTaskDesc_5D_Cunenum_5B_FeatureFlagsOther_5D__, -FrameBuffer_dispatch_completeTask___un_3C_s_5B_unFrameBuffer_5D__3E_REFs_5B__c_unRenderTaskDesc_5D_Cunenum_5B_FeatureFlagsOther_5D__, -Camera_dispatch_initRay___un_3C_s_5B__c_unCamera_5D__3E_REFs_5B_vyRay_5D_REFs_5B__c_vyCameraSample_5D_Cunenum_5B_FeatureFlagsOther_5D__, +FrameBuffer_dispatch_getRenderTaskDesc___un_3C_s_5B_unFrameBuffer_5D__3E_CunuREFs_5B__c_unFeatureFlagsHandler_5D__, +FrameBuffer_dispatch_accumulateSample___un_3C_s_5B_unFrameBuffer_5D__3E_REFs_5B__c_vyScreenSample_5D_REFs_5B_unRenderTaskDesc_5D_REFs_5B__c_unFeatureFlagsHandler_5D__, +FrameBuffer_dispatch_completeTask___un_3C_s_5B_unFrameBuffer_5D__3E_REFs_5B__c_unRenderTaskDesc_5D_REFs_5B__c_unFeatureFlagsHandler_5D__, +Camera_dispatch_initRay___un_3C_s_5B__c_unCamera_5D__3E_REFs_5B_vyRay_5D_REFs_5B__c_vyCameraSample_5D_REFs_5B__c_unFeatureFlagsHandler_5D__, LinearTransferFunction_get___un_3C_s_5B__c_unTransferFunction_5D__3E_vyf_, TransferFunction_dispatch_get___un_3C_s_5B__c_unTransferFunction_5D__3E_vyf_, diff --git a/modules/cpu/lights/AmbientLight.ih b/modules/cpu/lights/AmbientLight.ih index 644c2604a..f0536b1be 100644 --- a/modules/cpu/lights/AmbientLight.ih +++ b/modules/cpu/lights/AmbientLight.ih @@ -8,7 +8,8 @@ OSPRAY_BEGIN_ISPC_NAMESPACE SYCL_EXTERNAL Light_SampleRes AmbientLight_sample(const Light *uniform super, const DifferentialGeometry &dg, const vec2f &sp, - const float); + const float, + const uniform FeatureFlagsHandler &); SYCL_EXTERNAL Light_EvalRes AmbientLight_eval(const Light *uniform super, const DifferentialGeometry &dg, diff --git a/modules/cpu/lights/AmbientLight.ispc b/modules/cpu/lights/AmbientLight.ispc index 6ce29947a..dca984a2b 100644 --- a/modules/cpu/lights/AmbientLight.ispc +++ b/modules/cpu/lights/AmbientLight.ispc @@ -17,7 +17,8 @@ OSPRAY_BEGIN_ISPC_NAMESPACE SYCL_EXTERNAL Light_SampleRes AmbientLight_sample(const Light *uniform super, const DifferentialGeometry &dg, const vec2f &s, - const float) + const float, + const uniform FeatureFlagsHandler &) { uniform AmbientLight *uniform self = (uniform AmbientLight * uniform) super; Light_SampleRes res; diff --git a/modules/cpu/lights/CylinderLight.ih b/modules/cpu/lights/CylinderLight.ih index 0eb81a7fc..fee4d622a 100644 --- a/modules/cpu/lights/CylinderLight.ih +++ b/modules/cpu/lights/CylinderLight.ih @@ -8,13 +8,15 @@ OSPRAY_BEGIN_ISPC_NAMESPACE SYCL_EXTERNAL Light_SampleRes CylinderLight_sample(const Light *uniform super, const DifferentialGeometry &dg, const vec2f &sp, - const float); + const float, + const uniform FeatureFlagsHandler &); SYCL_EXTERNAL Light_SampleRes CylinderLight_sample_instanced( const Light *uniform super, const DifferentialGeometry &dg, const vec2f &sp, - const float time); + const float time, + const uniform FeatureFlagsHandler &); SYCL_EXTERNAL Light_EvalRes CylinderLight_eval(const Light *uniform super, const DifferentialGeometry &dg, diff --git a/modules/cpu/lights/CylinderLight.ispc b/modules/cpu/lights/CylinderLight.ispc index 6323a3979..48693914f 100644 --- a/modules/cpu/lights/CylinderLight.ispc +++ b/modules/cpu/lights/CylinderLight.ispc @@ -172,7 +172,8 @@ SYCL_EXTERNAL Light_SampleRes CylinderLight_sample( const uniform Light *uniform super, const DifferentialGeometry &dg, const vec2f &s, - const float) + const float, + const uniform FeatureFlagsHandler &) { const CylinderLight *uniform self = (CylinderLight * uniform) super; assert(self); @@ -184,7 +185,8 @@ SYCL_EXTERNAL Light_SampleRes CylinderLight_sample_instanced( const Light *uniform super, const DifferentialGeometry &dg, const vec2f &s, - const float time) + const float time, + const uniform FeatureFlagsHandler &) { const CylinderLight *uniform self = (CylinderLight * uniform) super; assert(self); diff --git a/modules/cpu/lights/DirectionalLight.ih b/modules/cpu/lights/DirectionalLight.ih index 6fac55b81..09b6b12e9 100644 --- a/modules/cpu/lights/DirectionalLight.ih +++ b/modules/cpu/lights/DirectionalLight.ih @@ -9,13 +9,15 @@ SYCL_EXTERNAL Light_SampleRes DirectionalLight_sample( const Light *uniform super, const DifferentialGeometry &dg, const vec2f &sp, - const float); + const float, + const uniform FeatureFlagsHandler &); SYCL_EXTERNAL Light_SampleRes DirectionalLight_sample_instanced( const Light *uniform super, const DifferentialGeometry &dg, const vec2f &sp, - const float time); + const float time, + const uniform FeatureFlagsHandler &); SYCL_EXTERNAL Light_EvalRes DirectionalLight_eval(const Light *uniform super, const DifferentialGeometry &dg, diff --git a/modules/cpu/lights/DirectionalLight.ispc b/modules/cpu/lights/DirectionalLight.ispc index a6bf19432..89a487946 100644 --- a/modules/cpu/lights/DirectionalLight.ispc +++ b/modules/cpu/lights/DirectionalLight.ispc @@ -33,7 +33,8 @@ SYCL_EXTERNAL Light_SampleRes DirectionalLight_sample( const Light *uniform super, const DifferentialGeometry &, const vec2f &s, - const float) + const float, + const uniform FeatureFlagsHandler &) { const DirectionalLight *uniform self = (DirectionalLight * uniform) super; assert(self); @@ -45,7 +46,8 @@ SYCL_EXTERNAL Light_SampleRes DirectionalLight_sample_instanced( const Light *uniform super, const DifferentialGeometry &dg, const vec2f &s, - const float time) + const float time, + const uniform FeatureFlagsHandler &) { const DirectionalLight *uniform self = (DirectionalLight * uniform) super; assert(self); diff --git a/modules/cpu/lights/HDRILight.ih b/modules/cpu/lights/HDRILight.ih index 08ab7272a..d1242a1a4 100644 --- a/modules/cpu/lights/HDRILight.ih +++ b/modules/cpu/lights/HDRILight.ih @@ -8,13 +8,15 @@ OSPRAY_BEGIN_ISPC_NAMESPACE SYCL_EXTERNAL Light_SampleRes HDRILight_sample(const Light *uniform super, const DifferentialGeometry &dg, const vec2f &sp, - const float); + const float, + const uniform FeatureFlagsHandler &); SYCL_EXTERNAL Light_SampleRes HDRILight_sample_instanced( const Light *uniform super, const DifferentialGeometry &dg, const vec2f &sp, - const float time); + const float time, + const uniform FeatureFlagsHandler &); SYCL_EXTERNAL Light_EvalRes HDRILight_eval(const Light *uniform super, const DifferentialGeometry &dg, diff --git a/modules/cpu/lights/HDRILight.ispc b/modules/cpu/lights/HDRILight.ispc index ac2151cb3..40e96a3fd 100644 --- a/modules/cpu/lights/HDRILight.ispc +++ b/modules/cpu/lights/HDRILight.ispc @@ -62,7 +62,8 @@ inline Light_SampleRes Sample(const HDRILight *uniform self, SYCL_EXTERNAL Light_SampleRes HDRILight_sample(const Light *uniform super, const DifferentialGeometry &, const vec2f &s, - const float) + const float, + const uniform FeatureFlagsHandler &) { const HDRILight *uniform self = (HDRILight * uniform) super; assert(self); @@ -74,7 +75,8 @@ SYCL_EXTERNAL Light_SampleRes HDRILight_sample_instanced( const Light *uniform super, const DifferentialGeometry &, const vec2f &s, - const float time) + const float time, + const uniform FeatureFlagsHandler &) { const HDRILight *uniform self = (HDRILight * uniform) super; assert(self); diff --git a/modules/cpu/lights/Light.h b/modules/cpu/lights/Light.h index 7c71af557..f637c2182 100644 --- a/modules/cpu/lights/Light.h +++ b/modules/cpu/lights/Light.h @@ -33,7 +33,7 @@ struct OSPRAY_SDK_INTERFACE Light vec3f coloredIntensity{1.0f, 1.0f, 1.0f}; OSPIntensityQuantity intensityQuantity = OSP_INTENSITY_QUANTITY_UNKNOWN; - FeatureFlagsOther getFeatureFlagsOther() const; + FeatureFlags getFeatureFlags() const; protected: FeatureFlagsOther featureFlags; @@ -50,9 +50,11 @@ inline uint32_t Light::getShCount() const return 1; } -inline FeatureFlagsOther Light::getFeatureFlagsOther() const +inline FeatureFlags Light::getFeatureFlags() const { - return featureFlags; + FeatureFlags ff; + ff.other = featureFlags; + return ff; } } // namespace ospray diff --git a/modules/cpu/lights/LightDispatch.ih b/modules/cpu/lights/LightDispatch.ih index 80e8e7319..7f1cc409a 100644 --- a/modules/cpu/lights/LightDispatch.ih +++ b/modules/cpu/lights/LightDispatch.ih @@ -22,7 +22,7 @@ SYCL_EXTERNAL Light_SampleRes Light_dispatch_sample(const Light *uniform self, const DifferentialGeometry &dg, // point (&normal) to generate the sample const vec2f &s, // random numbers to generate the sample const float time, // generate the sample at time (motion blur) - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); //! compute the radiance and pdf caused by the light source (pointed to by the //! given direction up until maxDist) @@ -32,6 +32,6 @@ SYCL_EXTERNAL Light_EvalRes Light_dispatch_eval(const Light *uniform self, const float minDist, // minimum distance to look for light contribution const float maxDist, // maximum distance to look for light contribution const float time, // evaluate at time (motion blur) - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/lights/LightDispatch.ispc b/modules/cpu/lights/LightDispatch.ispc index cba709815..416f9c3ae 100644 --- a/modules/cpu/lights/LightDispatch.ispc +++ b/modules/cpu/lights/LightDispatch.ispc @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 #include "common/DifferentialGeometry.ih" +#include "common/FeatureFlagsEnum.h" #include "lights/LightDispatch.ih" #include "lights/LightShared.h" @@ -20,30 +21,32 @@ SYCL_EXTERNAL Light_SampleRes Light_dispatch_sample(const Light *uniform self, const DifferentialGeometry &dg, const vec2f &s, const float time, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); + #ifndef OSPRAY_TARGET_SYCL - return self->sample(self, dg, s, time); + return self->sample(self, dg, s, time, ffh); #else if ((self->type == LIGHT_TYPE_AMBIENT) && (ffo & FFO_LIGHT_AMBIENT)) { - return AmbientLight_sample(self, dg, s, time); + return AmbientLight_sample(self, dg, s, time, ffh); } else if ((self->type == LIGHT_TYPE_DIRECTIONAL) && (ffo & FFO_LIGHT_DIRECTIONAL)) { - return DirectionalLight_sample(self, dg, s, time); + return DirectionalLight_sample(self, dg, s, time, ffh); } else if ((self->type == LIGHT_TYPE_POINT) && (ffo & FFO_LIGHT_POINT)) { - return PointLight_sample(self, dg, s, time); + return PointLight_sample(self, dg, s, time, ffh); } else if ((self->type == LIGHT_TYPE_SPOT) && (ffo & FFO_LIGHT_SPOT)) { - return SpotLight_sample(self, dg, s, time); + return SpotLight_sample(self, dg, s, time, ffh); } else if ((self->type == LIGHT_TYPE_HDRI) && (ffo & FFO_LIGHT_HDRI)) { - return HDRILight_sample(self, dg, s, time); + return HDRILight_sample(self, dg, s, time, ffh); } else if ((self->type == LIGHT_TYPE_QUAD) && (ffo & FFO_LIGHT_QUAD)) { - return QuadLight_sample(self, dg, s, time); + return QuadLight_sample(self, dg, s, time, ffh); } else if ((self->type == LIGHT_TYPE_CYLINDER) && (ffo & FFO_LIGHT_CYLINDER)) { - return CylinderLight_sample(self, dg, s, time); + return CylinderLight_sample(self, dg, s, time, ffh); } else if ((self->type == LIGHT_TYPE_GEOMETRY) && (ffo & FFO_LIGHT_GEOMETRY)) { - return GeometryLight_sample(self, dg, s, time); + return GeometryLight_sample(self, dg, s, time, ffh); } else { Light_SampleRes res; res.weight = make_vec3f(0.f); @@ -61,8 +64,10 @@ SYCL_EXTERNAL Light_EvalRes Light_dispatch_eval(const Light *uniform self, const float minDist, const float maxDist, const float time, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); + #ifndef OSPRAY_TARGET_SYCL return self->eval(self, dg, dir, minDist, maxDist, time); #else diff --git a/modules/cpu/lights/LightShared.h b/modules/cpu/lights/LightShared.h index d4879f216..b3ac44c93 100644 --- a/modules/cpu/lights/LightShared.h +++ b/modules/cpu/lights/LightShared.h @@ -3,6 +3,10 @@ #pragma once +#if defined(ISPC) || defined(OSPRAY_TARGET_SYCL) +#include "common/FeatureFlags.ih" +#endif + #ifdef __cplusplus namespace ispc { #endif // __cplusplus @@ -31,7 +35,8 @@ struct Light_SampleRes typedef Light_SampleRes (*Light_SampleFunc)(const Light *uniform self, const DifferentialGeometry &dg, // point (&normal) to generate the sample const vec2f &s, // random numbers to generate the sample - const float time); // generate the sample at time (motion blur) + const float time, // generate the sample at time (motion blur) + const uniform FeatureFlagsHandler &ffh); struct Light_EvalRes { diff --git a/modules/cpu/lights/PointLight.ih b/modules/cpu/lights/PointLight.ih index 075af2c72..c3b908783 100644 --- a/modules/cpu/lights/PointLight.ih +++ b/modules/cpu/lights/PointLight.ih @@ -8,13 +8,15 @@ OSPRAY_BEGIN_ISPC_NAMESPACE SYCL_EXTERNAL Light_SampleRes PointLight_sample(const Light *uniform super, const DifferentialGeometry &dg, const vec2f &sp, - const float); + const float, + const uniform FeatureFlagsHandler &); SYCL_EXTERNAL Light_SampleRes PointLight_sample_instanced( const Light *uniform super, const DifferentialGeometry &dg, const vec2f &sp, - const float time); + const float time, + const uniform FeatureFlagsHandler &); SYCL_EXTERNAL Light_EvalRes PointLight_eval(const Light *uniform super, const DifferentialGeometry &dg, diff --git a/modules/cpu/lights/PointLight.ispc b/modules/cpu/lights/PointLight.ispc index ef0363614..40104d9bb 100644 --- a/modules/cpu/lights/PointLight.ispc +++ b/modules/cpu/lights/PointLight.ispc @@ -92,7 +92,8 @@ inline Light_SampleRes Sample(const PointLight *uniform self, SYCL_EXTERNAL Light_SampleRes PointLight_sample(const Light *uniform super, const DifferentialGeometry &dg, const vec2f &s, - const float) + const float, + const uniform FeatureFlagsHandler &) { const PointLight *uniform self = (PointLight * uniform) super; assert(self); @@ -104,7 +105,8 @@ SYCL_EXTERNAL Light_SampleRes PointLight_sample_instanced( const Light *uniform super, const DifferentialGeometry &dg, const vec2f &s, - const float time) + const float time, + const uniform FeatureFlagsHandler &) { const PointLight *uniform self = (PointLight * uniform) super; assert(self); diff --git a/modules/cpu/lights/QuadLight.ih b/modules/cpu/lights/QuadLight.ih index e7161bd87..23d90d54d 100644 --- a/modules/cpu/lights/QuadLight.ih +++ b/modules/cpu/lights/QuadLight.ih @@ -8,13 +8,15 @@ OSPRAY_BEGIN_ISPC_NAMESPACE SYCL_EXTERNAL Light_SampleRes QuadLight_sample(const Light *uniform super, const DifferentialGeometry &dg, const vec2f &sp, - const float); + const float, + const uniform FeatureFlagsHandler &); SYCL_EXTERNAL Light_SampleRes QuadLight_sample_instanced( const Light *uniform super, const DifferentialGeometry &dg, const vec2f &sp, - const float time); + const float time, + const uniform FeatureFlagsHandler); SYCL_EXTERNAL Light_EvalRes QuadLight_eval(const Light *uniform super, const DifferentialGeometry &dg, @@ -31,4 +33,3 @@ SYCL_EXTERNAL Light_EvalRes QuadLight_eval_instanced(const Light *uniform super, const float time); OSPRAY_END_ISPC_NAMESPACE - diff --git a/modules/cpu/lights/QuadLight.ispc b/modules/cpu/lights/QuadLight.ispc index 256a31635..4e50d5a02 100644 --- a/modules/cpu/lights/QuadLight.ispc +++ b/modules/cpu/lights/QuadLight.ispc @@ -135,7 +135,8 @@ inline Light_SampleRes Sample(const QuadLight *uniform self, SYCL_EXTERNAL Light_SampleRes QuadLight_sample(const Light *uniform super, const DifferentialGeometry &dg, const vec2f &sp, - const float) + const float, + const uniform FeatureFlagsHandler &) { const QuadLight *uniform self = (QuadLight * uniform) super; assert(self); @@ -147,7 +148,8 @@ SYCL_EXTERNAL Light_SampleRes QuadLight_sample_instanced( const Light *uniform super, const DifferentialGeometry &dg, const vec2f &sp, - const float time) + const float time, + const uniform FeatureFlagsHandler &) { const QuadLight *uniform self = (QuadLight * uniform) super; assert(self); diff --git a/modules/cpu/lights/SpotLight.ih b/modules/cpu/lights/SpotLight.ih index 75e2da049..b846381b1 100644 --- a/modules/cpu/lights/SpotLight.ih +++ b/modules/cpu/lights/SpotLight.ih @@ -8,13 +8,15 @@ OSPRAY_BEGIN_ISPC_NAMESPACE SYCL_EXTERNAL Light_SampleRes SpotLight_sample(const Light *uniform super, const DifferentialGeometry &dg, const vec2f &sp, - const float); + const float, + const uniform FeatureFlagsHandler &); SYCL_EXTERNAL Light_SampleRes SpotLight_sample_instanced( const Light *uniform super, const DifferentialGeometry &dg, const vec2f &sp, - const float time); + const float time, + const uniform FeatureFlagsHandler &); SYCL_EXTERNAL Light_EvalRes SpotLight_eval(const Light *uniform super, const DifferentialGeometry &dg, diff --git a/modules/cpu/lights/SpotLight.ispc b/modules/cpu/lights/SpotLight.ispc index 38b771853..ea9aca697 100644 --- a/modules/cpu/lights/SpotLight.ispc +++ b/modules/cpu/lights/SpotLight.ispc @@ -97,7 +97,8 @@ inline Light_SampleRes Sample(const SpotLight *uniform self, SYCL_EXTERNAL Light_SampleRes SpotLight_sample(const Light *uniform super, const DifferentialGeometry &dg, const vec2f &s, - const float) + const float, + const uniform FeatureFlagsHandler &) { const SpotLight *uniform self = (SpotLight * uniform) super; assert(self); @@ -109,7 +110,8 @@ SYCL_EXTERNAL Light_SampleRes SpotLight_sample_instanced( const Light *uniform super, const DifferentialGeometry &dg, const vec2f &s, - const float time) + const float time, + const uniform FeatureFlagsHandler &) { const SpotLight *uniform self = (SpotLight * uniform) super; assert(self); diff --git a/modules/cpu/render/Material.h b/modules/cpu/render/Material.h index 84499f6db..a7a651fc9 100644 --- a/modules/cpu/render/Material.h +++ b/modules/cpu/render/Material.h @@ -35,7 +35,7 @@ struct OSPRAY_SDK_INTERFACE Material virtual std::string toString() const override; virtual void commit() override; - FeatureFlagsOther getFeatureFlagsOther() const; + FeatureFlags getFeatureFlags() const; // helper function to get all texture related parameters ispc::TextureParam getTextureParam(const char *texture_name); @@ -79,9 +79,11 @@ OSPTYPEFOR_SPECIALIZATION(Material *, OSP_MATERIAL); // Inlined definitions ///////////////////////////////////////////////////////// -inline FeatureFlagsOther Material::getFeatureFlagsOther() const +inline FeatureFlags Material::getFeatureFlags() const { - return featureFlags; + FeatureFlags ff; + ff.other = featureFlags; + return ff; } inline bool Material::isEmissive() const diff --git a/modules/cpu/render/Material.ih b/modules/cpu/render/Material.ih index 7ca2be742..81710a621 100644 --- a/modules/cpu/render/Material.ih +++ b/modules/cpu/render/Material.ih @@ -11,8 +11,9 @@ OSPRAY_BEGIN_ISPC_NAMESPACE -#define define_texture_get_ff(fn, map, dg, val, ff) \ - ((ffo & FFO_TEXTURE_IN_MATERIAL) ? fn(map, dg, val) : val) +#define define_texture_get_ff(fn, map, dg, val, ffh) \ + ((getFeatureFlagsOther(ffh) & FFO_TEXTURE_IN_MATERIAL) ? fn(map, dg, val) \ + : val) #define get1f_ff(map, dg, val, ff) \ define_texture_get_ff(get1f, map, dg, val, ff) #define get3f_ff(map, dg, val, ff) \ @@ -80,9 +81,9 @@ inline linear3f makeShadingFrame(const DifferentialGeometry &dg, inline linear3f makeShadingFrame_ff(const DifferentialGeometry &dg, const uniform TextureParam &normalMap, const uniform linear2f &normalRot, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { - return (ffo & FFO_TEXTURE_IN_MATERIAL) + return (getFeatureFlagsOther(ffh) & FFO_TEXTURE_IN_MATERIAL) ? makeShadingFrame(dg, normalMap, normalRot) : makeShadingFrame(dg); } diff --git a/modules/cpu/render/MaterialDispatch.ih b/modules/cpu/render/MaterialDispatch.ih index 7b2b35586..37213f1fd 100644 --- a/modules/cpu/render/MaterialDispatch.ih +++ b/modules/cpu/render/MaterialDispatch.ih @@ -4,6 +4,7 @@ #pragma once #include "common/FeatureFlags.ih" +#include "common/FeatureFlagsEnum.h" #include "rkcommon/math/vec.ih" OSPRAY_BEGIN_ISPC_NAMESPACE @@ -20,7 +21,7 @@ SYCL_EXTERNAL __noinline const varying BSDF *varying Material_dispatch_getBSDF( const Ray &ray, // The medium this ray travels inside. const Medium ¤tMedium, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); SYCL_EXTERNAL __noinline vec3f Material_dispatch_getTransparency( const uniform Material *uniform self, @@ -30,12 +31,12 @@ SYCL_EXTERNAL __noinline vec3f Material_dispatch_getTransparency( const Ray &ray, // The medium this ray travels inside. const Medium ¤tMedium, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); SYCL_EXTERNAL __noinline void Material_dispatch_selectNextMedium( const uniform Material *uniform self, const DifferentialGeometry &dg, Medium ¤tMedium, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/MaterialDispatch.ispc b/modules/cpu/render/MaterialDispatch.ispc index 062d94972..25d2839b5 100644 --- a/modules/cpu/render/MaterialDispatch.ispc +++ b/modules/cpu/render/MaterialDispatch.ispc @@ -4,6 +4,7 @@ #include "Material.ih" #include "MaterialDispatch.ih" +#include "common/FeatureFlagsEnum.h" #include "render/materials/Alloy.ih" #include "render/materials/CarPaint.ih" #include "render/materials/Glass.ih" @@ -25,8 +26,9 @@ SYCL_EXTERNAL __noinline const varying BSDF *varying Material_dispatch_getBSDF( const DifferentialGeometry &dg, const Ray &ray, const Medium ¤tMedium, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); if ((self->type == MATERIAL_TYPE_ALLOY) && (ffo & FFO_MATERIAL_ALLOY)) { return Alloy_getBSDF(self, ctx, dg, ray, currentMedium); } else if ((self->type == MATERIAL_TYPE_CARPAINT) @@ -47,7 +49,7 @@ SYCL_EXTERNAL __noinline const varying BSDF *varying Material_dispatch_getBSDF( } else if ((self->type == MATERIAL_TYPE_MIX) && (ffo & FFO_MATERIAL_MIX)) { return Mix_getBSDF(self, ctx, dg, ray, currentMedium); } else if ((self->type == MATERIAL_TYPE_OBJ) && (ffo & FFO_MATERIAL_OBJ)) { - return OBJ_getBSDF(self, ctx, dg, ray, currentMedium, ffo); + return OBJ_getBSDF(self, ctx, dg, ray, currentMedium, ffh); } else if ((self->type == MATERIAL_TYPE_PLASTIC) && (ffo & FFO_MATERIAL_PLASTIC)) { return Plastic_getBSDF(self, ctx, dg, ray, currentMedium); @@ -73,8 +75,9 @@ SYCL_EXTERNAL __noinline vec3f Material_dispatch_getTransparency( const DifferentialGeometry &dg, const Ray &ray, const Medium ¤tMedium, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); if ((self->type == MATERIAL_TYPE_GLASS) && (ffo & FFO_MATERIAL_GLASS)) { return Glass_getTransparency(self, dg, ray, currentMedium); } else if ((self->type == MATERIAL_TYPE_LUMINOUS) @@ -83,7 +86,7 @@ SYCL_EXTERNAL __noinline vec3f Material_dispatch_getTransparency( } else if ((self->type == MATERIAL_TYPE_MIX) && (ffo & FFO_MATERIAL_MIX)) { return Mix_getTransparency(self, dg, ray, currentMedium); } else if ((self->type == MATERIAL_TYPE_OBJ) && (ffo & FFO_MATERIAL_OBJ)) { - return OBJ_getTransparency(self, dg, ray, currentMedium, ffo); + return OBJ_getTransparency(self, dg, ray, currentMedium, ffh); } else if ((self->type == MATERIAL_TYPE_PRINCIPLED) && (ffo & FFO_MATERIAL_PRINCIPLED)) { return Principled_getTransparency(self, dg, ray, currentMedium); @@ -103,8 +106,9 @@ SYCL_EXTERNAL __noinline void Material_dispatch_selectNextMedium( const uniform Material *uniform self, const DifferentialGeometry &dg, Medium ¤tMedium, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); if ((self->type == MATERIAL_TYPE_GLASS) && (ffo & FFO_MATERIAL_GLASS)) { Glass_selectNextMedium(self, dg, currentMedium); } else if ((self->type == MATERIAL_TYPE_PRINCIPLED) diff --git a/modules/cpu/render/RenderTaskSycl.h b/modules/cpu/render/RenderTaskSycl.h index a0ff3479d..4a2326c71 100644 --- a/modules/cpu/render/RenderTaskSycl.h +++ b/modules/cpu/render/RenderTaskSycl.h @@ -93,6 +93,9 @@ inline float RenderTask::getTaskDuration() const auto t1 = rendererEvent .get_profiling_info(); + // TODO: We need a way to tell if the fbEvent was actually submitted, + // otherwise getting the time gives an error + /* const auto t2 = frameBufferEvent .get_profiling_info(); @@ -100,6 +103,8 @@ inline float RenderTask::getTaskDuration() frameBufferEvent .get_profiling_info(); return ((t1 - t0) + (t3 - t2)) * 1E-9; + */ + return (t1 - t0) * 1.0e-9; } } // namespace ospray diff --git a/modules/cpu/render/Renderer.cpp b/modules/cpu/render/Renderer.cpp index a9ae95d88..e42e92e19 100644 --- a/modules/cpu/render/Renderer.cpp +++ b/modules/cpu/render/Renderer.cpp @@ -49,9 +49,9 @@ void Renderer::commit() maxDepthTexture = (Texture2D *)getParamObject("map_maxDepth"); backplate = (Texture2D *)getParamObject("map_backplate"); - featureFlags = FFO_NONE; + featureFlags.reset(); if (maxDepthTexture || backplate) - featureFlags |= FFO_TEXTURE_IN_RENDERER; + featureFlags.other |= FFO_TEXTURE_IN_RENDERER; if (maxDepthTexture) { if (maxDepthTexture->format != OSP_TEXTURE_R32F @@ -73,7 +73,7 @@ void Renderer::commit() materialData = getParamDataT("material"); if (materialData) { for (auto &&mat : *materialData) - featureFlags |= mat->getFeatureFlagsOther(); + featureFlags |= mat->getFeatureFlags(); materialArray = make_buffer_shared_unique( getISPCDevice().getIspcrtContext(), diff --git a/modules/cpu/render/Renderer.h b/modules/cpu/render/Renderer.h index 3eb174c06..233da58a9 100644 --- a/modules/cpu/render/Renderer.h +++ b/modules/cpu/render/Renderer.h @@ -81,7 +81,7 @@ struct OSPRAY_SDK_INTERFACE Renderer std::unique_ptr> materialArray; protected: - FeatureFlagsOther featureFlags{FFO_NONE}; + FeatureFlags featureFlags; api::ISPCDevice &device; private: diff --git a/modules/cpu/render/Renderer.ih b/modules/cpu/render/Renderer.ih index b81e6cba0..49b81aadf 100644 --- a/modules/cpu/render/Renderer.ih +++ b/modules/cpu/render/Renderer.ih @@ -11,9 +11,9 @@ OSPRAY_BEGIN_ISPC_NAMESPACE SYCL_EXTERNAL vec4f Renderer_getBackground(const Renderer *uniform self, const vec2f &screenPos, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); SYCL_EXTERNAL float Renderer_getMaxDepth(const Renderer *uniform self, const vec2f &screenPos, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/Renderer.ispc b/modules/cpu/render/Renderer.ispc index 5a821dccf..de7f6137f 100644 --- a/modules/cpu/render/Renderer.ispc +++ b/modules/cpu/render/Renderer.ispc @@ -4,6 +4,7 @@ // TODO: Seems like ISPC bug: if Renderer.ih included before // Texture2D.ih I get an error that the Texture2D type is declared but not // defined +#include "common/FeatureFlagsEnum.h" #include "texture/Texture2D.ih" #include "Renderer.ih" @@ -17,9 +18,10 @@ OSPRAY_BEGIN_ISPC_NAMESPACE SYCL_EXTERNAL vec4f Renderer_getBackground(const Renderer *uniform self, const vec2f &screenPos, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { - if (!self->backplate || !(ffo & FFO_TEXTURE_IN_RENDERER)) + const uniform FeatureFlags ff = getFeatureFlags(ffh); + if (!self->backplate || !(ff.other & FFO_TEXTURE_IN_RENDERER)) return self->bgColor; // TODO: Now for GPU making a whole DifferentialGeometry object @@ -32,9 +34,10 @@ SYCL_EXTERNAL vec4f Renderer_getBackground(const Renderer *uniform self, SYCL_EXTERNAL float Renderer_getMaxDepth(const Renderer *uniform self, const vec2f &screenPos, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { - if (!self->maxDepthTexture || !(ffo & FFO_TEXTURE_IN_RENDERER)) + const uniform FeatureFlags ff = getFeatureFlags(ffh); + if (!self->maxDepthTexture || !(ff.other & FFO_TEXTURE_IN_RENDERER)) return inf; DifferentialGeometry lookup; @@ -55,6 +58,7 @@ export void Renderer_pick(const void *uniform _self, uniform int32 &primID, uniform int32 &hit) { +#ifndef OSPRAY_TARGET_SYCL const Renderer *uniform self = (const Renderer *uniform)_self; const Camera *uniform camera = (const Camera *uniform)_camera; const World *uniform world = (const World *uniform)_world; @@ -67,11 +71,12 @@ export void Renderer_pick(const void *uniform _self, cameraSample.lens.y = 0.0f; cameraSample.time = 0.5f; + uniform FeatureFlagsHandler ffh; Ray ray; - Camera_dispatch_initRay(camera, ray, cameraSample, FFO_ALL); - ray.t = min(ray.t, Renderer_getMaxDepth(self, cameraSample.screen, FFO_ALL)); + Camera_dispatch_initRay(camera, ray, cameraSample, ffh); + ray.t = min(ray.t, Renderer_getMaxDepth(self, cameraSample.screen, ffh)); - traceRay(world, ray, ffAll()); + traceRay(world, ray, ffh); vec3f p = ray.org + ray.dir * ray.t; @@ -82,6 +87,17 @@ export void Renderer_pick(const void *uniform _self, instID = extract(ray.instID, 0); geomID = extract(ray.geomID, 0); primID = extract(ray.primID, 0); +#else + (void)_self; + (void)_camera; + (void)_world; + (void)screenPos; + (void)pos; + (void)instID; + (void)geomID; + (void)primID; + (void)hit; +#endif } OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/RendererRenderTaskFn.inl b/modules/cpu/render/RendererRenderTaskFn.inl index 379974b30..9a03bbf07 100644 --- a/modules/cpu/render/RendererRenderTaskFn.inl +++ b/modules/cpu/render/RendererRenderTaskFn.inl @@ -17,7 +17,7 @@ task #ifdef OSPRAY_TARGET_SYCL const int taskIndex0, #endif - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { const uniform int32 spp = self->spp; @@ -28,7 +28,7 @@ task CameraSample cameraSample; uniform RenderTaskDesc taskDesc = - FrameBuffer_dispatch_getRenderTaskDesc(fb, taskIDs[taskIndex0], ff.other); + FrameBuffer_dispatch_getRenderTaskDesc(fb, taskIDs[taskIndex0], ffh); const uniform int startSampleID = max(taskDesc.accumID, 0) * spp; if (fb->cancelRender || isEmpty(taskDesc.region)) { @@ -48,8 +48,7 @@ task // set ray t value for early ray termination (from maximum depth texture) vec2f center = make_vec2f(screenSample.sampleID.x, screenSample.sampleID.y) + 0.5f; - const float tMax = - Renderer_getMaxDepth(self, center * fb->rcpSize, ff.other); + const float tMax = Renderer_getMaxDepth(self, center * fb->rcpSize, ffh); screenSample.z = tMax; vec3f col = make_vec3f(0.f); @@ -86,16 +85,15 @@ task cameraSample.lens.y = 0.0f; cameraSample.time = 0.5f; - Camera_dispatch_initRay( - camera, screenSample.ray, cameraSample, ff.other); + Camera_dispatch_initRay(camera, screenSample.ray, cameraSample, ffh); screenSample.ray.t = min(screenSample.ray.t, tMax); screenSample.z = inf; screenSample.primID = RTC_INVALID_GEOMETRY_ID; screenSample.geomID = RTC_INVALID_GEOMETRY_ID; screenSample.instID = RTC_INVALID_GEOMETRY_ID; - screenSample.albedo = make_vec3f( - Renderer_getBackground(self, screenSample.pos, ff.other)); + screenSample.albedo = + make_vec3f(Renderer_getBackground(self, screenSample.pos, ffh)); screenSample.normal = make_vec3f(0.f); #ifdef OSPRAY_TARGET_SYCL @@ -103,12 +101,12 @@ task // Dummy top level print so that prints at lower levels of the kernel // will work See JIRA https://jira.devtools.intel.com/browse/XDEPS-4729 if (taskIndex0 == 0) { - sycl::ext::oneapi::experimental::printf(""); + sycl::ext::oneapi::experimental::printf("0\n"); } #endif #endif - renderSampleFn(self, fb, world, screenSample, ff); + renderSampleFn(self, fb, world, screenSample, ffh); col = col + screenSample.rgb; alpha += screenSample.alpha; @@ -124,8 +122,7 @@ task screenSample.normal = normal * rspp; screenSample.albedo = albedo * rspp; - FrameBuffer_dispatch_accumulateSample( - fb, screenSample, taskDesc, ff.other); + FrameBuffer_dispatch_accumulateSample(fb, screenSample, taskDesc, ffh); } - FrameBuffer_dispatch_completeTask(fb, taskDesc, ff.other); + FrameBuffer_dispatch_completeTask(fb, taskDesc, ffh); } diff --git a/modules/cpu/render/ao/AORenderer.cpp b/modules/cpu/render/ao/AORenderer.cpp index 2684121ca..a35902204 100644 --- a/modules/cpu/render/ao/AORenderer.cpp +++ b/modules/cpu/render/ao/AORenderer.cpp @@ -10,7 +10,6 @@ #include "render/ao/AORenderer_ispc.h" #else #include "AORenderer.ih" -constexpr sycl::specialization_id specFeatureFlags; #endif namespace ospray { @@ -54,25 +53,24 @@ AsyncEvent AORenderer::renderTasks(FrameBuffer *fb, const uint32_t *taskIDsPtr = taskIDs.data(); event = device.getSyclQueue().submit([&](sycl::handler &cgh) { FeatureFlags ff = world->getFeatureFlags(); - ff.other |= featureFlags; - ff.other |= fb->getFeatureFlagsOther(); - ff.other |= camera->getFeatureFlagsOther(); - cgh.set_specialization_constant(ff); + ff |= featureFlags; + ff |= fb->getFeatureFlags(); + ff |= camera->getFeatureFlags(); + cgh.set_specialization_constant(ff); const sycl::nd_range<1> dispatchRange = device.computeDispatchRange(numTasks, 16); cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { if (taskIndex.get_global_id(0) < numTasks) { - const FeatureFlags ff = - kh.get_specialization_constant(); + ispc::FeatureFlagsHandler ffh(kh); ispc::AORenderer_renderTask(&rendererSh->super, fbSh, cameraSh, worldSh, taskIDsPtr, taskIndex.get_global_id(0), - ff); + ffh); } }); }); diff --git a/modules/cpu/render/ao/AORenderer.ih b/modules/cpu/render/ao/AORenderer.ih index 4ad347505..91f7499f7 100644 --- a/modules/cpu/render/ao/AORenderer.ih +++ b/modules/cpu/render/ao/AORenderer.ih @@ -1,6 +1,8 @@ // Copyright 2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +#include "common/FeatureFlags.ih" + OSPRAY_BEGIN_ISPC_NAMESPACE #ifdef OSPRAY_TARGET_SYCL @@ -10,7 +12,7 @@ SYCL_EXTERNAL void AORenderer_renderTask(Renderer *uniform self, World *uniform world, const uint32 *uniform taskIDs, const int taskIndex0, - const uniform ospray::FeatureFlags &ff); + const uniform FeatureFlagsHandler &ffh); #endif OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/ao/AORenderer.ispc b/modules/cpu/render/ao/AORenderer.ispc index 5fc6619e5..db4c10bb8 100644 --- a/modules/cpu/render/ao/AORenderer.ispc +++ b/modules/cpu/render/ao/AORenderer.ispc @@ -3,6 +3,7 @@ #include "camera/Camera.ih" #include "camera/CameraDispatch.ih" +#include "common/FeatureFlagsEnum.h" #include "fb/FrameBuffer.ih" #include "fb/FrameBufferDispatch.ih" #include "fb/RenderTaskDesc.ih" @@ -22,10 +23,12 @@ static void AORenderer_renderSample(Renderer *uniform _self, FrameBuffer *uniform fb, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { uniform AORenderer *uniform self = (uniform AORenderer * uniform) _self; + const uniform FeatureFlags ff = getFeatureFlags(ffh); + LDSampler ldSamplerObj; varying LDSampler *uniform ldSampler = &ldSamplerObj; LDSampler_init(ldSampler, @@ -57,28 +60,19 @@ static void AORenderer_renderSample(Renderer *uniform _self, // First trace the ray across clipping scene to calculate ray intervals, // this step should keep ray structure unchanged RayIntervals rayIntervals; -#ifndef OSPRAY_TARGET_SYCL - traceClippingRay(world, ray, rayIntervals); -#else - rayIntervals.count = 1; - rayIntervals.intervals[0].lower = ray.t0; - rayIntervals.intervals[0].upper = ray.t; -#endif + traceClippingRay(world, ray, rayIntervals, ffh); // Iterate over all translucent geometry till we are fully opaque vec4f outputColor = make_vec4f(0.f); while (outputColor.w < 0.99f) { // Then trace normal geometry using calculated ray intervals, // if hit ray.t will be updated - // Note: work around for bug when using traceGeometryRayIntervals on GPU -#ifdef OSPRAY_TARGET_SYCL - traceGeometryRay(world, ray, ff); -#else - traceGeometryRayIntervals(world, ray, rayIntervals, ff); -#endif + if (ff.geometry) { + traceGeometryRayIntervals(world, ray, rayIntervals, ffh); + } #ifdef OSPRAY_ENABLE_VOLUMES - if (ff.volume & FFV_VOLUME) { + if (ff.other & FFO_VOLUME_IN_SCENE) { // Determine volume intervals by tracing ray in the volume scene Ray volumeRay = ray; traceVolumeRay(world, volumeRay, volumeIntervals); @@ -89,7 +83,8 @@ static void AORenderer_renderSample(Renderer *uniform _self, rayIntervals, volumeRay, ldSampler, - self->volumeSamplingRate); + self->volumeSamplingRate, + ffh); outputColor = outputColor + (1.f - outputColor.w) * volumeColor; } } @@ -98,14 +93,14 @@ static void AORenderer_renderSample(Renderer *uniform _self, // If any geometry has been hit vec4f blendedColor; const bool rayHadHit = hadHit(ray); - if (rayHadHit) { + if (rayHadHit && ff.geometry) { // Prepare differential geometry structure DifferentialGeometry dg; - computeDG(world, self, ray, dg, ff); + computeDG(world, self, ray, dg, ffh); // Shade geometry SSI surfaceShading; - surfaceShading = AORenderer_computeShading(self, world, dg, sample, ff); + surfaceShading = AORenderer_computeShading(self, world, dg, sample, ffh); // Use shaded color for blending blendedColor = surfaceShading.shadedColor; @@ -124,7 +119,7 @@ static void AORenderer_renderSample(Renderer *uniform _self, // start from the last geometry hit all over to initial Tfar setRay(ray, ray.t + dg.epsilon, originalRayTFar); } else { - blendedColor = Renderer_getBackground(&self->super, sample.pos, ff.other); + blendedColor = Renderer_getBackground(&self->super, sample.pos, ffh); // Initialize other per sample data with first hit values if (firstHit) { sample.z = ray.t; @@ -159,9 +154,10 @@ SYCL_EXTERNAL void AORenderer_renderTask(Renderer *uniform self, World *uniform world, const uint32 *uniform taskIDs, const int taskIndex0, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { - Renderer_default_renderTask(self, fb, camera, world, taskIDs, taskIndex0, ff); + Renderer_default_renderTask( + self, fb, camera, world, taskIDs, taskIndex0, ffh); } #else export void AORenderer_renderTasks(void *uniform _self, @@ -176,8 +172,9 @@ export void AORenderer_renderTasks(void *uniform _self, Camera *uniform camera = (Camera * uniform) _camera; World *uniform world = (World * uniform) _world; const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; + uniform FeatureFlagsHandler ffh; launch[numTasks] Renderer_default_renderTask( - self, fb, camera, world, taskIDs, ffAll()); + self, fb, camera, world, taskIDs, ffh); sync; } #endif diff --git a/modules/cpu/render/ao/surfaces.ih b/modules/cpu/render/ao/surfaces.ih index 617b87e40..5af691588 100644 --- a/modules/cpu/render/ao/surfaces.ih +++ b/modules/cpu/render/ao/surfaces.ih @@ -3,6 +3,7 @@ #pragma once +#include "common/FeatureFlagsEnum.h" #include "common/World.ih" // c++ shared #include "AORendererShared.h" @@ -25,20 +26,20 @@ inline void computeDG(const World *uniform world, const AORenderer *uniform renderer, const Ray &ray, DifferentialGeometry &dg, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { postIntersect(world, &renderer->super, dg, ray, DG_NG | DG_NS | DG_NORMALIZE | DG_FACEFORWARD | DG_COLOR | DG_TEXCOORD, - ff); + ffh); } SYCL_EXTERNAL SSI AORenderer_computeShading(const AORenderer *uniform self, const World *uniform world, const DifferentialGeometry &dg, ScreenSample &sample, - const uniform FeatureFlags &ff); + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/ao/surfaces.ispc b/modules/cpu/render/ao/surfaces.ispc index a6f5ebe56..cb28aed3f 100644 --- a/modules/cpu/render/ao/surfaces.ispc +++ b/modules/cpu/render/ao/surfaces.ispc @@ -1,6 +1,7 @@ // Copyright 2009 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +#include "common/FeatureFlagsEnum.h" #include "math/random.ih" #include "math/sampling.ih" #include "render/ScreenSample.ih" @@ -15,14 +16,15 @@ OSPRAY_BEGIN_ISPC_NAMESPACE inline vec4f getSurfaceColor( - const DifferentialGeometry &dg, const uniform FeatureFlagsOther ffo) + const DifferentialGeometry &dg, const uniform FeatureFlagsHandler &ffh) { const OBJ *mat = (const OBJ *)dg.material; vec3f surfaceColor = make_vec3f(1.f); float opacity = 1.f; - const uniform bool fft = ffo & FFO_TEXTURE_IN_MATERIAL; + const uniform FeatureFlags ff = getFeatureFlags(ffh); + const uniform bool fft = ff.other & FFO_TEXTURE_IN_MATERIAL; #ifdef OSPRAY_TARGET_SYCL { @@ -48,11 +50,11 @@ SYCL_EXTERNAL SSI AORenderer_computeShading(const AORenderer *uniform self, const World *uniform world, const DifferentialGeometry &dg, ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { SSI retval; - const vec4f surfaceColor = getSurfaceColor(dg, ff.other); + const vec4f surfaceColor = getSurfaceColor(dg, ffh); const vec3f color = make_vec3f(surfaceColor); const float opacity = surfaceColor.w; @@ -66,7 +68,7 @@ SYCL_EXTERNAL SSI AORenderer_computeShading(const AORenderer *uniform self, self->aoSamples, self->aoRadius, sample.sampleID, - ff); + ffh); intensity *= (1.0f - self->aoIntensity) + (ao * self->aoIntensity); } diff --git a/modules/cpu/render/ao/volumes.ih b/modules/cpu/render/ao/volumes.ih index 9d3b60b98..4893c0b8c 100644 --- a/modules/cpu/render/ao/volumes.ih +++ b/modules/cpu/render/ao/volumes.ih @@ -5,6 +5,7 @@ #pragma once #include "common/Clipping.ih" +#include "common/FeatureFlagsEnum.h" #include "common/Ray.ih" #include "common/VolumeIntervals.ih" @@ -16,7 +17,8 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervals(const VolumeIntervals &intervals, const RayIntervals &rayIntervals, Ray &ray, varying LDSampler *uniform ldSampler, - const uniform float samplingRate); + const uniform float samplingRate, + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/ao/volumes.ispc b/modules/cpu/render/ao/volumes.ispc index 960960f72..653e46adc 100644 --- a/modules/cpu/render/ao/volumes.ispc +++ b/modules/cpu/render/ao/volumes.ispc @@ -29,8 +29,11 @@ struct VolumeContext static void sampleVolume(VolumeContext &vc, const VolumetricModel *uniform m, - const uniform float samplingRate) + const uniform float samplingRate, + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlags ff = getFeatureFlags(ffh); + // We have to iterate till we get a valid sample value float dt = 0.f; float sampleVal = nan; @@ -40,7 +43,13 @@ static void sampleVolume(VolumeContext &vc, while (vc.iuDistance > vc.iuLength) { // Get next VKL interval const float prevUpper = vc.interval.tRange.upper; - if (vklIterateIntervalV(vc.intervalIterator, &vc.interval)) { + if (vklIterateIntervalV(vc.intervalIterator, + &vc.interval +#ifdef OSPRAY_TARGET_SYCL + , + ff.volume +#endif + )) { // Intervals may not be contiguous, accumulate empty space emptySpace += max(vc.interval.tRange.lower - prevUpper, 0.f); @@ -72,14 +81,21 @@ static void sampleVolume(VolumeContext &vc, const vec3f p = vc.org + newDistance * vc.dir; // Sample volume value in given point - sampleVal = vklComputeSampleV( #if OPENVKL_VERSION_MAJOR == 1 - m->volume->vklSampler, + // We know if we have OpenVKL version 1 we're not targetting SYCL + sampleVal = vklComputeSampleV( + m->volume->vklSampler, (const varying vkl_vec3f *uniform) & p); #else - &m->volume->vklSampler, + sampleVal = vklComputeSampleV(&m->volume->vklSampler, + (const varying vkl_vec3f *uniform) & p +#ifdef OSPRAY_TARGET_SYCL + , + 0, + 0.5f, + ff.volume +#endif + ); #endif - (const varying vkl_vec3f *uniform) & p); - // Go to the next sub-interval vc.iuDistance += 1.f; dt = newDistance - vc.distance - emptySpace; @@ -96,7 +112,8 @@ static void sampleVolume(VolumeContext &vc, static float sampleAllVolumes(const VolumeIntervals &volumeIntervals, varying VolumeContext *uniform volumeContexts, const uniform float samplingRate, - vec4f &sampledColor) + vec4f &sampledColor, + const uniform FeatureFlagsHandler &ffh) { // Look for the closest sample across all volumes float minDist = inf; @@ -116,7 +133,7 @@ static float sampleAllVolumes(const VolumeIntervals &volumeIntervals, #else foreach_unique (m in vi.volumetricModel) { #endif - sampleVolume(vc, m, samplingRate); + sampleVolume(vc, m, samplingRate, ffh); } vc.ready = 1; } @@ -143,8 +160,11 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervals( const RayIntervals &rayIntervals, Ray &ray, varying LDSampler *uniform ldSampler, - const uniform float samplingRate) + const uniform float samplingRate, + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlags ff = getFeatureFlags(ffh); + #ifdef OSPRAY_TARGET_SYCL // Only a single volume context is supported on the GPU, no dynamic allocation VolumeContext volumeContext; @@ -231,13 +251,18 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervals( #else foreach_unique (m in model) { #endif + // Create volume interval iterator - vc.intervalIterator = vklInitIntervalIteratorV( #if OPENVKL_VERSION_MAJOR == 1 - m->vklIntervalContext, + // We know if we have OpenVKL version 1 we're not targetting SYCL + vc.intervalIterator = vklInitIntervalIteratorV(m->vklIntervalContext, + (varying vkl_vec3f *)&vc.org, + (varying vkl_vec3f *)&vc.dir, + (varying vkl_range1f *)&rInterval, + &time, + vc.intervalIteratorBuffer); #else - &m->vklIntervalContext, -#endif + vc.intervalIterator = vklInitIntervalIteratorV(&m->vklIntervalContext, (varying vkl_vec3f *)&vc.org, (varying vkl_vec3f *)&vc.dir, (varying vkl_range1f *)&rInterval, @@ -246,7 +271,13 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervals( #else &time, #endif - vc.intervalIteratorBuffer); + vc.intervalIteratorBuffer +#ifdef OSPRAY_TARGET_SYCL + , + ff.volume +#endif + ); +#endif } } @@ -254,8 +285,11 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervals( while (alpha < .99f) { // Sample across all volumes vec4f sampledColorOpacity; - float dist = sampleAllVolumes( - volumeIntervals, volumeContexts, samplingRate, sampledColorOpacity); + float dist = sampleAllVolumes(volumeIntervals, + volumeContexts, + samplingRate, + sampledColorOpacity, + ffh); // Exit loop if nothing sampled if (dist == inf) diff --git a/modules/cpu/render/bsdfs/BSDF.ih b/modules/cpu/render/bsdfs/BSDF.ih index 287a71e07..fb6cc1b01 100644 --- a/modules/cpu/render/bsdfs/BSDF.ih +++ b/modules/cpu/render/bsdfs/BSDF.ih @@ -85,13 +85,13 @@ inline BSDF_SampleRes make_BSDF_SampleRes_zero() typedef BSDF_EvalRes (*BSDF_EvalFunc)(const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); typedef BSDF_SampleRes (*BSDF_SampleFunc)(const varying BSDF *uniform self, const vec3f &wo, const vec2f &s, float ss, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); struct BSDF { @@ -134,14 +134,14 @@ inline linear3f getFrame(const varying BSDF *uniform bsdf) SYCL_EXTERNAL BSDF_EvalRes BSDF_dispatch_eval(const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); SYCL_EXTERNAL BSDF_SampleRes BSDF_dispatch_sample( const varying BSDF *uniform self, const vec3f &wo, const vec2f &s, float ss, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); // We need to break the possible recursive call structure of a multi bsdf // containing multi-BSDFs, since SYCL doesn't allow recursive calls @@ -150,13 +150,13 @@ SYCL_EXTERNAL BSDF_EvalRes BSDF_dispatch_eval_base( const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); SYCL_EXTERNAL BSDF_SampleRes BSDF_dispatch_sample_base( const varying BSDF *uniform self, const vec3f &wo, const vec2f &s, float ss, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/bsdfs/BSDF.ispc b/modules/cpu/render/bsdfs/BSDF.ispc index cf28b7bb7..c881fff4c 100644 --- a/modules/cpu/render/bsdfs/BSDF.ispc +++ b/modules/cpu/render/bsdfs/BSDF.ispc @@ -26,6 +26,7 @@ #include "render/bsdfs/Velvety.ih" #include "common/FeatureFlags.ih" +#include "common/FeatureFlagsEnum.h" OSPRAY_BEGIN_ISPC_NAMESPACE @@ -67,21 +68,22 @@ OSPRAY_BEGIN_ISPC_NAMESPACE static BSDF_EvalRes BSDF_dispatch_eval_nomulti(const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); if ((self->bsdfType == BSDF_TYPE_DIELECTRIC_LAYER) && (ffo & MTB_DIELECTRIC_LAYER)) { - return DielectricLayer_eval(self, wo, wi, ffo); + return DielectricLayer_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_MICROFACET_DIELECTRIC_LAYER) && (ffo & MTB_MICROFACET_DIELECTRIC_LAYER)) { - return MicrofacetDielectricLayer_eval(self, wo, wi, ffo); + return MicrofacetDielectricLayer_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_SCALE) && (ffo & MTB_SCALE)) { - return Scale_eval(self, wo, wi, ffo); + return Scale_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_MICROFACET_SHEEN_LAYER) && (ffo & MTB_MICROFACET_SHEEN_LAYER)) { - return MicrofacetSheenLayer_eval(self, wo, wi, ffo); + return MicrofacetSheenLayer_eval(self, wo, wi, ffh); } else { - return BSDF_dispatch_eval_base(self, wo, wi, ffo); + return BSDF_dispatch_eval_base(self, wo, wi, ffh); } } @@ -90,33 +92,35 @@ static BSDF_SampleRes BSDF_dispatch_sample_nomulti( const vec3f &wo, const vec2f &s, float ss, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); if ((self->bsdfType == BSDF_TYPE_DIELECTRIC_LAYER) && (ffo & MTB_DIELECTRIC_LAYER)) { - return DielectricLayer_sample(self, wo, s, ss, ffo); + return DielectricLayer_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_MICROFACET_DIELECTRIC_LAYER) && (ffo & MTB_MICROFACET_DIELECTRIC_LAYER)) { - return MicrofacetDielectricLayer_sample(self, wo, s, ss, ffo); + return MicrofacetDielectricLayer_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_SCALE) && (ffo & MTB_SCALE)) { - return Scale_sample(self, wo, s, ss, ffo); + return Scale_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_MICROFACET_SHEEN_LAYER) && (ffo & MTB_MICROFACET_SHEEN_LAYER)) { - return MicrofacetSheenLayer_sample(self, wo, s, ss, ffo); + return MicrofacetSheenLayer_sample(self, wo, s, ss, ffh); } else { - return BSDF_dispatch_sample_base(self, wo, s, ss, ffo); + return BSDF_dispatch_sample_base(self, wo, s, ss, ffh); } } SYCL_EXTERNAL BSDF_EvalRes BSDF_dispatch_eval(const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); if ((self->bsdfType == BSDF_TYPE_MULTI_BSDF) && (ffo & MTB_MULTI_BSDF)) { - return MultiBSDF_eval(self, wo, wi, ffo); + return MultiBSDF_eval(self, wo, wi, ffh); } else { - return BSDF_dispatch_eval_nomulti(self, wo, wi, ffo); + return BSDF_dispatch_eval_nomulti(self, wo, wi, ffh); } } @@ -125,12 +129,13 @@ SYCL_EXTERNAL BSDF_SampleRes BSDF_dispatch_sample( const vec3f &wo, const vec2f &s, float ss, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); if ((self->bsdfType == BSDF_TYPE_MULTI_BSDF) && (ffo & MTB_MULTI_BSDF)) { - return MultiBSDF_sample(self, wo, s, ss, ffo); + return MultiBSDF_sample(self, wo, s, ss, ffh); } else { - return BSDF_dispatch_sample_nomulti(self, wo, s, ss, ffo); + return BSDF_dispatch_sample_nomulti(self, wo, s, ss, ffh); } } @@ -138,55 +143,56 @@ SYCL_EXTERNAL BSDF_EvalRes BSDF_dispatch_eval_base( const varying BSDF *uniform self, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); if ((self->bsdfType == BSDF_TYPE_CONDUCTOR) && (ffo & MTB_CONDUCTOR)) { - return Conductor_eval(self, wo, wi, ffo); + return Conductor_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_DIELECTRIC) && (ffo & MTB_DIELECTRIC)) { - return Dielectric_eval(self, wo, wi, ffo); + return Dielectric_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_LAMBERT) && (ffo & MTB_LAMBERT)) { - return Lambert_eval(self, wo, wi, ffo); + return Lambert_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_LAMBERT_TRANSMISSION) && (ffo & MTB_LAMBERT_TRANSMISSION)) { - return LambertTransmission_eval(self, wo, wi, ffo); + return LambertTransmission_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_MICROFACET_CONDUCTOR) && (ffo & MTB_MICROFACET_CONDUCTOR)) { - return MicrofacetConductor_eval(self, wo, wi, ffo); + return MicrofacetConductor_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_MICROFACET_DIELECTRIC) && (ffo & MTB_MICROFACET_DIELECTRIC)) { - return MicrofacetDielectric_eval(self, wo, wi, ffo); + return MicrofacetDielectric_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_MINNEART) && (ffo & MTB_MINNEART)) { - return Minneart_eval(self, wo, wi, ffo); + return Minneart_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_SPECULAR) && (ffo & MTB_SPECULAR)) { - return Specular_eval(self, wo, wi, ffo); + return Specular_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_OREN_NAYAR) && (ffo & MTB_OREN_NAYAR)) { - return OrenNayar_eval(self, wo, wi, ffo); + return OrenNayar_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_REFLECTION) && (ffo & MTB_REFLECTION)) { - return Reflection_eval(self, wo, wi, ffo); + return Reflection_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_ROBUST_DIELECTRIC) && (ffo & MTB_ROBUST_DIELECTRIC)) { - return RobustDielectric_eval(self, wo, wi, ffo); + return RobustDielectric_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_ROBUST_THIN_DIELECTRIC) && (ffo & MTB_ROBUST_THIN_DIELECTRIC)) { - return RobustThinDielectric_eval(self, wo, wi, ffo); + return RobustThinDielectric_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_THIN_DIELECTRIC) && (ffo & MTB_THIN_DIELECTRIC)) { - return ThinDielectric_eval(self, wo, wi, ffo); + return ThinDielectric_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_THIN_MICROFACET_DIELECTRIC) && (ffo & MTB_THIN_MICROFACET_DIELECTRIC)) { - return ThinMicrofacetDielectric_eval(self, wo, wi, ffo); + return ThinMicrofacetDielectric_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_TRANSMISSION) && (ffo & MTB_TRANSMISSION)) { - return Transmission_eval(self, wo, wi, ffo); + return Transmission_eval(self, wo, wi, ffh); } else if ((self->bsdfType == BSDF_TYPE_VELVETY) && (ffo & MTB_VELVETY)) { - return Velvety_eval(self, wo, wi, ffo); + return Velvety_eval(self, wo, wi, ffh); } else if (self->bsdfType == BSDF_TYPE_REALLY_UNKNOWN) { } else { #ifndef OSPRAY_TARGET_SYCL - return self->eval(self, wo, wi, ffo); + return self->eval(self, wo, wi, ffh); #endif } return make_BSDF_EvalRes_zero(); @@ -197,55 +203,56 @@ SYCL_EXTERNAL BSDF_SampleRes BSDF_dispatch_sample_base( const vec3f &wo, const vec2f &s, float ss, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); if ((self->bsdfType == BSDF_TYPE_CONDUCTOR) && (ffo & MTB_CONDUCTOR)) { - return Conductor_sample(self, wo, s, ss, ffo); + return Conductor_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_DIELECTRIC) && (ffo & MTB_DIELECTRIC)) { - return Dielectric_sample(self, wo, s, ss, ffo); + return Dielectric_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_LAMBERT) && (ffo & MTB_LAMBERT)) { - return Lambert_sample(self, wo, s, ss, ffo); + return Lambert_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_LAMBERT_TRANSMISSION) && (ffo & MTB_LAMBERT_TRANSMISSION)) { - return LambertTransmission_sample(self, wo, s, ss, ffo); + return LambertTransmission_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_MICROFACET_CONDUCTOR) && (ffo & MTB_MICROFACET_CONDUCTOR)) { - return MicrofacetConductor_sample(self, wo, s, ss, ffo); + return MicrofacetConductor_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_MICROFACET_DIELECTRIC) && (ffo & MTB_MICROFACET_DIELECTRIC)) { - return MicrofacetDielectric_sample(self, wo, s, ss, ffo); + return MicrofacetDielectric_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_MINNEART) && (ffo & MTB_MINNEART)) { - return Minneart_sample(self, wo, s, ss, ffo); + return Minneart_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_OREN_NAYAR) && (ffo & MTB_OREN_NAYAR)) { - return OrenNayar_sample(self, wo, s, ss, ffo); + return OrenNayar_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_SPECULAR) && (ffo & MTB_SPECULAR)) { - return Specular_sample(self, wo, s, ss, ffo); + return Specular_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_REFLECTION) && (ffo & MTB_REFLECTION)) { - return Reflection_sample(self, wo, s, ss, ffo); + return Reflection_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_ROBUST_DIELECTRIC) && (ffo & MTB_ROBUST_DIELECTRIC)) { - return RobustDielectric_sample(self, wo, s, ss, ffo); + return RobustDielectric_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_ROBUST_THIN_DIELECTRIC) && (ffo & MTB_ROBUST_THIN_DIELECTRIC)) { - return RobustThinDielectric_sample(self, wo, s, ss, ffo); + return RobustThinDielectric_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_THIN_DIELECTRIC) && (ffo & MTB_THIN_DIELECTRIC)) { - return ThinDielectric_sample(self, wo, s, ss, ffo); + return ThinDielectric_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_THIN_MICROFACET_DIELECTRIC) && (ffo & MTB_THIN_MICROFACET_DIELECTRIC)) { - return ThinMicrofacetDielectric_sample(self, wo, s, ss, ffo); + return ThinMicrofacetDielectric_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_TRANSMISSION) && (ffo & MTB_TRANSMISSION)) { - return Transmission_sample(self, wo, s, ss, ffo); + return Transmission_sample(self, wo, s, ss, ffh); } else if ((self->bsdfType == BSDF_TYPE_VELVETY) && (ffo & MTB_VELVETY)) { - return Velvety_sample(self, wo, s, ss, ffo); + return Velvety_sample(self, wo, s, ss, ffh); } else if (self->bsdfType == BSDF_TYPE_REALLY_UNKNOWN) { } else { #ifndef OSPRAY_TARGET_SYCL - return self->sample(self, wo, s, ss, ffo); + return self->sample(self, wo, s, ss, ffh); #endif } return make_BSDF_SampleRes_zero(); diff --git a/modules/cpu/render/bsdfs/Conductor.ih b/modules/cpu/render/bsdfs/Conductor.ih index c01dcac0e..48823a178 100644 --- a/modules/cpu/render/bsdfs/Conductor.ih +++ b/modules/cpu/render/bsdfs/Conductor.ih @@ -18,7 +18,7 @@ struct Conductor inline BSDF_EvalRes Conductor_eval(const varying BSDF *uniform, const vec3f &, const vec3f &, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { return make_BSDF_EvalRes_zero(); } @@ -27,7 +27,7 @@ inline BSDF_SampleRes Conductor_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &, float, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const varying Conductor *uniform self = (const varying Conductor *uniform)super; diff --git a/modules/cpu/render/bsdfs/Dielectric.ih b/modules/cpu/render/bsdfs/Dielectric.ih index 173add8bc..e3f5dad81 100644 --- a/modules/cpu/render/bsdfs/Dielectric.ih +++ b/modules/cpu/render/bsdfs/Dielectric.ih @@ -19,7 +19,7 @@ struct Dielectric inline BSDF_EvalRes Dielectric_eval(const varying BSDF *uniform, const vec3f &, const vec3f &, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { return make_BSDF_EvalRes_zero(); } @@ -28,7 +28,7 @@ inline BSDF_SampleRes Dielectric_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &, float ss, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const varying Dielectric *uniform self = (const varying Dielectric *uniform)super; diff --git a/modules/cpu/render/bsdfs/DielectricLayer.ih b/modules/cpu/render/bsdfs/DielectricLayer.ih index c0364f5f7..ce334eff6 100644 --- a/modules/cpu/render/bsdfs/DielectricLayer.ih +++ b/modules/cpu/render/bsdfs/DielectricLayer.ih @@ -32,7 +32,7 @@ struct DielectricLayer inline BSDF_EvalRes DielectricLayer_eval(const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { const varying DielectricLayer *uniform self = (const varying DielectricLayer *uniform)super; @@ -60,10 +60,10 @@ inline BSDF_EvalRes DielectricLayer_eval(const varying BSDF *uniform super, // recursion (that we know will terminate) from the SYCL compiler we have // to call MultiBSDF directly here if we have a layer over a MultiBSDF if (f->bsdfType == BSDF_TYPE_MULTI_BSDF) { - substrate = MultiBSDF_eval(f, wo, wi, ffo); // f->eval(f, wo, wi); + substrate = MultiBSDF_eval(f, wo, wi, ffh); // f->eval(f, wo, wi); } else { substrate = - BSDF_dispatch_eval_base(f, wo, wi, ffo); // f->eval(f, wo, wi); + BSDF_dispatch_eval_base(f, wo, wi, ffh); // f->eval(f, wo, wi); } } @@ -101,7 +101,7 @@ inline BSDF_SampleRes DielectricLayer_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, float ss, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { const varying DielectricLayer *uniform self = (const varying DielectricLayer *uniform)super; @@ -136,10 +136,10 @@ inline BSDF_SampleRes DielectricLayer_sample(const varying BSDF *uniform super, // recursion (that we know will terminate) from the SYCL compiler we have // to call MultiBSDF directly here if we have a layer over a MultiBSDF if (f->bsdfType == BSDF_TYPE_MULTI_BSDF) { - res = MultiBSDF_sample(f, wo, s, ss1, ffo); // f->sample(f, wo, s, ss1); + res = MultiBSDF_sample(f, wo, s, ss1, ffh); // f->sample(f, wo, s, ss1); } else { res = BSDF_dispatch_sample_base( - f, wo, s, ss1, ffo); // f->sample(f, wo, s, ss1); + f, wo, s, ss1, ffh); // f->sample(f, wo, s, ss1); } } if (reduce_max(res.weight) <= 0.f) diff --git a/modules/cpu/render/bsdfs/Lambert.ih b/modules/cpu/render/bsdfs/Lambert.ih index 57147a8e2..d5bdce6d8 100644 --- a/modules/cpu/render/bsdfs/Lambert.ih +++ b/modules/cpu/render/bsdfs/Lambert.ih @@ -11,7 +11,7 @@ OSPRAY_BEGIN_ISPC_NAMESPACE inline BSDF_EvalRes Lambert_eval(const varying BSDF *uniform self, const vec3f &, const vec3f &wi, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { BSDF_EvalRes res; float cosThetaI = max(dot(wi, getN(self)), 0.f); @@ -24,7 +24,7 @@ inline BSDF_SampleRes Lambert_sample(const varying BSDF *uniform self, const vec3f &, const vec2f &s, float, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const vec3f localDir = cosineSampleHemisphere(s); BSDF_SampleRes res; diff --git a/modules/cpu/render/bsdfs/LambertTransmission.ih b/modules/cpu/render/bsdfs/LambertTransmission.ih index 503f82a72..1fbf01be2 100644 --- a/modules/cpu/render/bsdfs/LambertTransmission.ih +++ b/modules/cpu/render/bsdfs/LambertTransmission.ih @@ -10,7 +10,7 @@ OSPRAY_BEGIN_ISPC_NAMESPACE inline BSDF_EvalRes LambertTransmission_eval(const varying BSDF *uniform self, const vec3f &, const vec3f &wi, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { BSDF_EvalRes res; float cosThetaI = max(-dot(wi, getN(self)), 0.f); @@ -24,7 +24,7 @@ inline BSDF_SampleRes LambertTransmission_sample( const vec3f &, const vec2f &s, float, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const vec3f localDir = cosineSampleHemisphere(s); BSDF_SampleRes res; diff --git a/modules/cpu/render/bsdfs/MicrofacetConductor.ih b/modules/cpu/render/bsdfs/MicrofacetConductor.ih index a0f31342d..4841e1090 100644 --- a/modules/cpu/render/bsdfs/MicrofacetConductor.ih +++ b/modules/cpu/render/bsdfs/MicrofacetConductor.ih @@ -31,7 +31,7 @@ struct MicrofacetConductor inline BSDF_EvalRes MicrofacetConductor_eval(const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const varying MicrofacetConductor *uniform self = (const varying MicrofacetConductor *uniform)super; @@ -76,7 +76,7 @@ inline BSDF_SampleRes MicrofacetConductor_sample( const vec3f &wo, const vec2f &s, float, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const varying MicrofacetConductor *uniform self = (const varying MicrofacetConductor *uniform)super; diff --git a/modules/cpu/render/bsdfs/MicrofacetDielectric.ih b/modules/cpu/render/bsdfs/MicrofacetDielectric.ih index 28d514b1c..c653f3176 100644 --- a/modules/cpu/render/bsdfs/MicrofacetDielectric.ih +++ b/modules/cpu/render/bsdfs/MicrofacetDielectric.ih @@ -36,7 +36,7 @@ inline BSDF_EvalRes MicrofacetDielectric_evalSingle( const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const varying MicrofacetDielectric *uniform self = (const varying MicrofacetDielectric *uniform)super; @@ -101,7 +101,7 @@ inline BSDF_EvalRes MicrofacetDielectric_evalSingle( inline BSDF_EvalRes MicrofacetDielectric_eval(const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { const varying MicrofacetDielectric *uniform self = (const varying MicrofacetDielectric *uniform)super; @@ -139,7 +139,7 @@ inline BSDF_EvalRes MicrofacetDielectric_eval(const varying BSDF *uniform super, } // Evaluate the single-scattering lobe - BSDF_EvalRes single = MicrofacetDielectric_evalSingle(super, wo, wi, ffo); + BSDF_EvalRes single = MicrofacetDielectric_evalSingle(super, wo, wi, ffh); // Compute the final result float singlePickProb = Eo; @@ -153,7 +153,7 @@ inline BSDF_SampleRes MicrofacetDielectric_sample( const vec3f &wo, const vec2f &s, float ss, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { const varying MicrofacetDielectric *uniform self = (const varying MicrofacetDielectric *uniform)super; @@ -242,7 +242,7 @@ inline BSDF_SampleRes MicrofacetDielectric_sample( } // Evaluate the single-scattering lobe - BSDF_EvalRes single = MicrofacetDielectric_evalSingle(super, wo, res.wi, ffo); + BSDF_EvalRes single = MicrofacetDielectric_evalSingle(super, wo, res.wi, ffh); // Compute the final result res.pdf = singlePickProb * single.pdf + (1.f - singlePickProb) * fms.pdf; diff --git a/modules/cpu/render/bsdfs/MicrofacetDielectricLayer.ih b/modules/cpu/render/bsdfs/MicrofacetDielectricLayer.ih index acc62d982..274882ffe 100644 --- a/modules/cpu/render/bsdfs/MicrofacetDielectricLayer.ih +++ b/modules/cpu/render/bsdfs/MicrofacetDielectricLayer.ih @@ -42,7 +42,7 @@ inline BSDF_EvalRes MicrofacetDielectricLayer_eval( const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { const varying MicrofacetDielectricLayer *uniform self = (const varying MicrofacetDielectricLayer *uniform)super; @@ -65,10 +65,10 @@ inline BSDF_EvalRes MicrofacetDielectricLayer_eval( // recursion (that we know will terminate) from the SYCL compiler we have // to call MultiBSDF directly here if we have a layer over a MultiBSDF if (f->bsdfType == BSDF_TYPE_MULTI_BSDF) { - substrate = MultiBSDF_eval(f, wo, wi, ffo); // f->eval(f, wo, wi); + substrate = MultiBSDF_eval(f, wo, wi, ffh); // f->eval(f, wo, wi); } else { substrate = - BSDF_dispatch_eval_base(f, wo, wi, ffo); // f->eval(f, wo, wi); + BSDF_dispatch_eval_base(f, wo, wi, ffh); // f->eval(f, wo, wi); } } @@ -159,7 +159,7 @@ inline BSDF_SampleRes MicrofacetDielectricLayer_sample( const vec3f &wo, const vec2f &s, float ss, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { const varying MicrofacetDielectricLayer *uniform self = (const varying MicrofacetDielectricLayer *uniform)super; @@ -209,10 +209,10 @@ inline BSDF_SampleRes MicrofacetDielectricLayer_sample( // recursion (that we know will terminate) from the SYCL compiler we have // to call MultiBSDF directly here if we have a layer over a MultiBSDF if (f->bsdfType == BSDF_TYPE_MULTI_BSDF) { - substrate = MultiBSDF_eval(f, wo, res.wi, ffo); // f->eval(f, wo, wi); + substrate = MultiBSDF_eval(f, wo, res.wi, ffh); // f->eval(f, wo, wi); } else { substrate = - BSDF_dispatch_eval_base(f, wo, res.wi, ffo); // f->eval(f, wo, wi); + BSDF_dispatch_eval_base(f, wo, res.wi, ffh); // f->eval(f, wo, wi); } } } else { @@ -230,10 +230,10 @@ inline BSDF_SampleRes MicrofacetDielectricLayer_sample( // recursion (that we know will terminate) from the SYCL compiler we have // to call MultiBSDF directly here if we have a layer over a MultiBSDF if (f->bsdfType == BSDF_TYPE_MULTI_BSDF) { - res = MultiBSDF_sample(f, wo, s, ss1, ffo); // f->sample(f, wo, s, ss1); + res = MultiBSDF_sample(f, wo, s, ss1, ffh); // f->sample(f, wo, s, ss1); } else { res = BSDF_dispatch_sample_base( - f, wo, s, ss1, ffo); // f->sample(f, wo, s, ss1); + f, wo, s, ss1, ffh); // f->sample(f, wo, s, ss1); } } if (reduce_max(res.weight) <= 0.f) diff --git a/modules/cpu/render/bsdfs/MicrofacetSheenLayer.ih b/modules/cpu/render/bsdfs/MicrofacetSheenLayer.ih index 5b2989b10..94805d656 100644 --- a/modules/cpu/render/bsdfs/MicrofacetSheenLayer.ih +++ b/modules/cpu/render/bsdfs/MicrofacetSheenLayer.ih @@ -28,7 +28,7 @@ struct MicrofacetSheenLayer inline BSDF_EvalRes MicrofacetSheenLayer_eval(const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { const varying MicrofacetSheenLayer *uniform self = (const varying MicrofacetSheenLayer *uniform)super; @@ -50,10 +50,10 @@ inline BSDF_EvalRes MicrofacetSheenLayer_eval(const varying BSDF *uniform super, // recursion (that we know will terminate) from the SYCL compiler we have // to call MultiBSDF directly here if we have a layer over a MultiBSDF if (f->bsdfType == BSDF_TYPE_MULTI_BSDF) { - substrate = MultiBSDF_eval(f, wo, wi, ffo); // f->eval(f, wo, wi); + substrate = MultiBSDF_eval(f, wo, wi, ffh); // f->eval(f, wo, wi); } else { substrate = - BSDF_dispatch_eval_base(f, wo, wi, ffo); // f->eval(f, wo, wi); + BSDF_dispatch_eval_base(f, wo, wi, ffh); // f->eval(f, wo, wi); } } @@ -103,7 +103,7 @@ inline BSDF_SampleRes MicrofacetSheenLayer_sample( const vec3f &wo, const vec2f &s, float ss, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { const varying MicrofacetSheenLayer *uniform self = (const varying MicrofacetSheenLayer *uniform)super; @@ -140,10 +140,10 @@ inline BSDF_SampleRes MicrofacetSheenLayer_sample( // recursion (that we know will terminate) from the SYCL compiler we have // to call MultiBSDF directly here if we have a layer over a MultiBSDF if (f->bsdfType == BSDF_TYPE_MULTI_BSDF) { - substrate = MultiBSDF_eval(f, wo, res.wi, ffo); // f->eval(f, wo, wi); + substrate = MultiBSDF_eval(f, wo, res.wi, ffh); // f->eval(f, wo, wi); } else { substrate = - BSDF_dispatch_eval_base(f, wo, res.wi, ffo); // f->eval(f, wo, wi); + BSDF_dispatch_eval_base(f, wo, res.wi, ffh); // f->eval(f, wo, wi); } } } else { @@ -160,10 +160,10 @@ inline BSDF_SampleRes MicrofacetSheenLayer_sample( // recursion (that we know will terminate) from the SYCL compiler we have // to call MultiBSDF directly here if we have a layer over a MultiBSDF if (f->bsdfType == BSDF_TYPE_MULTI_BSDF) { - res = MultiBSDF_sample(f, wo, s, ss1, ffo); // f->sample(f, wo, s, ss1); + res = MultiBSDF_sample(f, wo, s, ss1, ffh); // f->sample(f, wo, s, ss1); } else { res = BSDF_dispatch_sample_base( - f, wo, s, ss1, ffo); // f->sample(f, wo, s, ss1); + f, wo, s, ss1, ffh); // f->sample(f, wo, s, ss1); } } if (reduce_max(res.weight) <= 0.f) diff --git a/modules/cpu/render/bsdfs/Minneart.ih b/modules/cpu/render/bsdfs/Minneart.ih index 4c9d1894c..6c655da3f 100644 --- a/modules/cpu/render/bsdfs/Minneart.ih +++ b/modules/cpu/render/bsdfs/Minneart.ih @@ -19,7 +19,7 @@ struct Minneart inline BSDF_EvalRes Minneart_eval(const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const varying Minneart *uniform self = (const varying Minneart *uniform)super; BSDF_EvalRes res; @@ -36,14 +36,14 @@ inline BSDF_SampleRes Minneart_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, float, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { BSDF_SampleRes res; const vec3f localDir = cosineSampleHemisphere(s); res.wi = getFrame(super) * localDir; res.type = BSDF_DIFFUSE_REFLECTION; - BSDF_EvalRes eval = Minneart_eval(super, wo, res.wi, ffo); + BSDF_EvalRes eval = Minneart_eval(super, wo, res.wi, ffh); res.pdf = eval.pdf; res.weight = eval.value * rcp(eval.pdf); return res; diff --git a/modules/cpu/render/bsdfs/MultiBSDF.ih b/modules/cpu/render/bsdfs/MultiBSDF.ih index 2fc2ebd11..950cc9fe4 100644 --- a/modules/cpu/render/bsdfs/MultiBSDF.ih +++ b/modules/cpu/render/bsdfs/MultiBSDF.ih @@ -66,7 +66,7 @@ inline void MultiBSDF_add(varying BSDF *uniform super, inline BSDF_EvalRes MultiBSDF_eval(const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { const varying MultiBSDF *uniform self = (const varying MultiBSDF *uniform)super; @@ -76,7 +76,7 @@ inline BSDF_EvalRes MultiBSDF_eval(const varying BSDF *uniform super, for (uniform int i = 0; i < self->numBsdfs; ++i) { if (self->importances[i] > 0.0f) { const varying BSDF *uniform curBsdf = self->bsdfs[i]; - BSDF_EvalRes cur = BSDF_dispatch_eval_base(curBsdf, wo, wi, ffo); + BSDF_EvalRes cur = BSDF_dispatch_eval_base(curBsdf, wo, wi, ffh); cur.value = cur.value * self->weights[i]; res.value = res.value + cur.value; res.pdf += cur.pdf * self->importances[i]; @@ -92,7 +92,7 @@ inline BSDF_SampleRes MultiBSDF_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, float ss, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { const varying MultiBSDF *uniform self = (const varying MultiBSDF *uniform)super; @@ -104,7 +104,7 @@ inline BSDF_SampleRes MultiBSDF_sample(const varying BSDF *uniform super, if (self->numBsdfs == 1) { const varying BSDF *uniform bsdf = self->bsdfs[0]; - res = BSDF_dispatch_sample_base(bsdf, wo, s, ss, ffo); + res = BSDF_dispatch_sample_base(bsdf, wo, s, ss, ffh); res.weight = res.weight * self->weights[0]; return res; } else { @@ -131,7 +131,7 @@ inline BSDF_SampleRes MultiBSDF_sample(const varying BSDF *uniform super, foreach_unique (i in choice) { #endif const varying BSDF *uniform bsdf = self->bsdfs[i]; - res = BSDF_dispatch_sample_base(bsdf, wo, s, ss, ffo); + res = BSDF_dispatch_sample_base(bsdf, wo, s, ss, ffh); res.weight = res.weight * self->weights[i]; } @@ -155,7 +155,7 @@ inline BSDF_SampleRes MultiBSDF_sample(const varying BSDF *uniform super, for (uniform int i = 0; i < self->numBsdfs; ++i) { if ((i != choice) & (self->importances[i] > 0.0f)) { const varying BSDF *uniform curBsdf = self->bsdfs[i]; - BSDF_EvalRes cur = BSDF_dispatch_eval(curBsdf, wo, res.wi, ffo); + BSDF_EvalRes cur = BSDF_dispatch_eval(curBsdf, wo, res.wi, ffh); cur.value = cur.value * self->weights[i]; value = value + cur.value; res.pdf += cur.pdf * self->importances[i]; diff --git a/modules/cpu/render/bsdfs/OrenNayar.ih b/modules/cpu/render/bsdfs/OrenNayar.ih index 73a73adf4..e4d849501 100644 --- a/modules/cpu/render/bsdfs/OrenNayar.ih +++ b/modules/cpu/render/bsdfs/OrenNayar.ih @@ -20,7 +20,7 @@ struct OrenNayar inline BSDF_EvalRes OrenNayar_eval(const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const varying OrenNayar *uniform self = (const varying OrenNayar *uniform)super; @@ -42,12 +42,12 @@ inline BSDF_SampleRes OrenNayar_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, float, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { const vec3f localDir = cosineSampleHemisphere(s); BSDF_SampleRes res; res.wi = getFrame(super) * localDir; - BSDF_EvalRes eval = OrenNayar_eval(super, wo, res.wi, ffo); + BSDF_EvalRes eval = OrenNayar_eval(super, wo, res.wi, ffh); res.pdf = eval.pdf; res.type = BSDF_DIFFUSE_REFLECTION; res.weight = eval.value * rcp(eval.pdf); diff --git a/modules/cpu/render/bsdfs/Reflection.ih b/modules/cpu/render/bsdfs/Reflection.ih index 8eabf4903..847eee9c4 100644 --- a/modules/cpu/render/bsdfs/Reflection.ih +++ b/modules/cpu/render/bsdfs/Reflection.ih @@ -10,7 +10,7 @@ OSPRAY_BEGIN_ISPC_NAMESPACE inline BSDF_EvalRes Reflection_eval(const varying BSDF *uniform, const vec3f &, const vec3f &, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { return make_BSDF_EvalRes_zero(); } @@ -19,7 +19,7 @@ inline BSDF_SampleRes Reflection_sample(const varying BSDF *uniform self, const vec3f &wo, const vec2f &, float, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { BSDF_SampleRes res; diff --git a/modules/cpu/render/bsdfs/RobustDielectric.ih b/modules/cpu/render/bsdfs/RobustDielectric.ih index a64f8bf01..185e9ff42 100644 --- a/modules/cpu/render/bsdfs/RobustDielectric.ih +++ b/modules/cpu/render/bsdfs/RobustDielectric.ih @@ -27,7 +27,7 @@ struct RobustDielectric inline BSDF_EvalRes RobustDielectric_eval(const varying BSDF *uniform, const vec3f &, const vec3f &, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { return make_BSDF_EvalRes_zero(); } @@ -100,7 +100,7 @@ inline BSDF_SampleRes RobustDielectric_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f & /*randomV*/, float randomF, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const varying RobustDielectric *uniform self = (const varying RobustDielectric *uniform)super; diff --git a/modules/cpu/render/bsdfs/RobustThinDielectric.ih b/modules/cpu/render/bsdfs/RobustThinDielectric.ih index ef1f72c33..9860f80b0 100644 --- a/modules/cpu/render/bsdfs/RobustThinDielectric.ih +++ b/modules/cpu/render/bsdfs/RobustThinDielectric.ih @@ -19,7 +19,7 @@ struct RobustThinDielectric inline BSDF_EvalRes RobustThinDielectric_eval(const varying BSDF *uniform, const vec3f &, const vec3f &, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { return make_BSDF_EvalRes_zero(); } @@ -29,7 +29,7 @@ inline BSDF_SampleRes RobustThinDielectric_sample( const vec3f &wo, const vec2f &, float ss, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const varying RobustThinDielectric *uniform self = (const varying RobustThinDielectric *uniform)super; diff --git a/modules/cpu/render/bsdfs/Scale.ih b/modules/cpu/render/bsdfs/Scale.ih index dd9585026..1e52dd741 100644 --- a/modules/cpu/render/bsdfs/Scale.ih +++ b/modules/cpu/render/bsdfs/Scale.ih @@ -18,11 +18,11 @@ struct Scale inline BSDF_EvalRes Scale_eval(const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { const varying Scale *uniform self = (const varying Scale *uniform)super; - BSDF_EvalRes res = BSDF_dispatch_eval_base(self->base, wo, wi, ffo); + BSDF_EvalRes res = BSDF_dispatch_eval_base(self->base, wo, wi, ffh); res.value = res.value * self->factor; return res; @@ -32,11 +32,11 @@ inline BSDF_SampleRes Scale_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, float ss, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { const varying Scale *uniform self = (const varying Scale *uniform)super; - BSDF_SampleRes res = BSDF_dispatch_sample_base(self->base, wo, s, ss, ffo); + BSDF_SampleRes res = BSDF_dispatch_sample_base(self->base, wo, s, ss, ffh); res.weight = res.weight * self->factor; return res; diff --git a/modules/cpu/render/bsdfs/Specular.ih b/modules/cpu/render/bsdfs/Specular.ih index 34c414ab8..934855698 100644 --- a/modules/cpu/render/bsdfs/Specular.ih +++ b/modules/cpu/render/bsdfs/Specular.ih @@ -21,7 +21,7 @@ struct Specular inline BSDF_EvalRes Specular_eval(const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const varying Specular *uniform self = (const varying Specular *uniform)super; BSDF_EvalRes res; @@ -45,7 +45,7 @@ inline BSDF_SampleRes Specular_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, float, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const varying Specular *uniform self = (const varying Specular *uniform)super; BSDF_SampleRes res; diff --git a/modules/cpu/render/bsdfs/ThinDielectric.ih b/modules/cpu/render/bsdfs/ThinDielectric.ih index 52f900845..4894a5769 100644 --- a/modules/cpu/render/bsdfs/ThinDielectric.ih +++ b/modules/cpu/render/bsdfs/ThinDielectric.ih @@ -20,7 +20,7 @@ struct ThinDielectric inline BSDF_EvalRes ThinDielectric_eval(const varying BSDF *uniform, const vec3f &, const vec3f &, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { return make_BSDF_EvalRes_zero(); } @@ -29,7 +29,7 @@ inline BSDF_SampleRes ThinDielectric_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &, float ss, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const varying ThinDielectric *uniform self = (const varying ThinDielectric *uniform)super; diff --git a/modules/cpu/render/bsdfs/ThinMicrofacetDielectric.ih b/modules/cpu/render/bsdfs/ThinMicrofacetDielectric.ih index 1f33673d6..c27a37ba2 100644 --- a/modules/cpu/render/bsdfs/ThinMicrofacetDielectric.ih +++ b/modules/cpu/render/bsdfs/ThinMicrofacetDielectric.ih @@ -34,7 +34,7 @@ inline BSDF_EvalRes ThinMicrofacetDielectric_eval( const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const varying ThinMicrofacetDielectric *uniform self = (const varying ThinMicrofacetDielectric *uniform)super; @@ -90,7 +90,7 @@ inline BSDF_SampleRes ThinMicrofacetDielectric_sample( const vec3f &wo, const vec2f &s, float ss, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const varying ThinMicrofacetDielectric *uniform self = (const varying ThinMicrofacetDielectric *uniform)super; diff --git a/modules/cpu/render/bsdfs/Transmission.ih b/modules/cpu/render/bsdfs/Transmission.ih index 8efc18bed..4cd610737 100644 --- a/modules/cpu/render/bsdfs/Transmission.ih +++ b/modules/cpu/render/bsdfs/Transmission.ih @@ -11,7 +11,7 @@ OSPRAY_BEGIN_ISPC_NAMESPACE inline BSDF_EvalRes Transmission_eval(const varying BSDF *uniform, const vec3f &, const vec3f &, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { return make_BSDF_EvalRes_zero(); } @@ -20,7 +20,7 @@ inline BSDF_SampleRes Transmission_sample(const varying BSDF *uniform self, const vec3f &wo, const vec2f &, float, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { BSDF_SampleRes res; diff --git a/modules/cpu/render/bsdfs/Velvety.ih b/modules/cpu/render/bsdfs/Velvety.ih index cf0e5b086..2a6f97dac 100644 --- a/modules/cpu/render/bsdfs/Velvety.ih +++ b/modules/cpu/render/bsdfs/Velvety.ih @@ -19,7 +19,7 @@ struct Velvety inline BSDF_EvalRes Velvety_eval(const varying BSDF *uniform super, const vec3f &wo, const vec3f &wi, - const uniform FeatureFlagsOther) + const uniform FeatureFlagsHandler &) { const varying Velvety *uniform self = (const varying Velvety *uniform)super; BSDF_EvalRes res; @@ -38,14 +38,14 @@ inline BSDF_SampleRes Velvety_sample(const varying BSDF *uniform super, const vec3f &wo, const vec2f &s, float, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { BSDF_SampleRes res; const vec3f localDir = cosineSampleHemisphere(s); res.wi = getFrame(super) * localDir; res.type = BSDF_DIFFUSE_REFLECTION; - BSDF_EvalRes eval = Velvety_eval(super, wo, res.wi, ffo); + BSDF_EvalRes eval = Velvety_eval(super, wo, res.wi, ffh); res.pdf = eval.pdf; res.weight = eval.value * rcp(eval.pdf); return res; diff --git a/modules/cpu/render/debug/DebugRenderer.cpp b/modules/cpu/render/debug/DebugRenderer.cpp index a0dd7cd19..df27fd27b 100644 --- a/modules/cpu/render/debug/DebugRenderer.cpp +++ b/modules/cpu/render/debug/DebugRenderer.cpp @@ -12,8 +12,6 @@ #include "render/debug/DebugRenderer_ispc.h" #else #include "DebugRenderer.ih" - -constexpr sycl::specialization_id specFeatureFlags; #endif namespace ospray { @@ -91,31 +89,33 @@ AsyncEvent DebugRenderer::renderTasks(FrameBuffer *fb, const uint32_t *taskIDsPtr = taskIDs.data(); event = device.getSyclQueue().submit([&](sycl::handler &cgh) { FeatureFlags ff = world->getFeatureFlags(); - ff.other |= featureFlags; - ff.other |= fb->getFeatureFlagsOther(); - ff.other |= camera->getFeatureFlagsOther(); - cgh.set_specialization_constant(ff); + ff |= featureFlags; + ff |= fb->getFeatureFlags(); + ff |= camera->getFeatureFlags(); + cgh.set_specialization_constant(ff); + + cgh.set_specialization_constant(rendererSh->type); const sycl::nd_range<1> dispatchRange = device.computeDispatchRange(numTasks, 16); cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { if (taskIndex.get_global_id(0) < numTasks) { - const FeatureFlags ff = - kh.get_specialization_constant(); + ispc::FeatureFlagsHandler ffh(kh); ispc::DebugRenderer_renderTask(&rendererSh->super, fbSh, cameraSh, worldSh, taskIDsPtr, taskIndex.get_global_id(0), - ff); + ffh); } }); }); if (wait) event.wait_and_throw(); + #else (void)wait; ispc::DebugRenderer_renderTasks( diff --git a/modules/cpu/render/debug/DebugRenderer.ih b/modules/cpu/render/debug/DebugRenderer.ih index d277adbc2..c4b9bae8d 100644 --- a/modules/cpu/render/debug/DebugRenderer.ih +++ b/modules/cpu/render/debug/DebugRenderer.ih @@ -1,6 +1,8 @@ // Copyright 2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +#include "common/FeatureFlags.ih" + OSPRAY_BEGIN_ISPC_NAMESPACE #ifdef OSPRAY_TARGET_SYCL @@ -10,7 +12,7 @@ SYCL_EXTERNAL void DebugRenderer_renderTask(Renderer *uniform self, World *uniform world, const uint32 *uniform taskIDs, const int taskIndex0, - const uniform ospray::FeatureFlags &ff); + const uniform FeatureFlagsHandler &ffh); #endif OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/debug/DebugRenderer.ispc b/modules/cpu/render/debug/DebugRenderer.ispc index 98d43ac3f..c87efc6c4 100644 --- a/modules/cpu/render/debug/DebugRenderer.ispc +++ b/modules/cpu/render/debug/DebugRenderer.ispc @@ -4,6 +4,7 @@ // ospray #include "camera/Camera.ih" #include "camera/CameraDispatch.ih" +#include "common/FeatureFlagsEnum.h" #include "common/Intersect.ih" #include "common/World.ih" #include "fb/FrameBuffer.ih" @@ -27,13 +28,13 @@ OSPRAY_BEGIN_ISPC_NAMESPACE inline bool hitBackground(Renderer *uniform self, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { - traceRay(world, sample.ray, ff); + traceRay(world, sample.ray, ffh); sample.z = sample.ray.t; sample.alpha = 1.f; - sample.rgb = make_vec3f(Renderer_getBackground(self, sample.pos, ff.other)); + sample.rgb = make_vec3f(Renderer_getBackground(self, sample.pos, ffh)); return noHit(sample.ray); } @@ -71,9 +72,9 @@ static void DebugRenderer_rayDir( static void DebugRenderer_eyeLight(Renderer *uniform self, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { - if (hitBackground(self, world, sample, ff)) { + if (hitBackground(self, world, sample, ffh)) { return; } sample.rgb = make_vec3f(eyeLight(sample)); @@ -82,50 +83,50 @@ static void DebugRenderer_eyeLight(Renderer *uniform self, static void DebugRenderer_Ng(Renderer *uniform self, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { - if (hitBackground(self, world, sample, ff)) + if (hitBackground(self, world, sample, ffh)) return; DifferentialGeometry dg; - postIntersect(world, self, dg, sample.ray, DG_NORMALIZE | DG_NG, ff); + postIntersect(world, self, dg, sample.ray, DG_NORMALIZE | DG_NG, ffh); sample.rgb = absf(dg.Ng); } static void DebugRenderer_Ns(Renderer *uniform self, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { - if (hitBackground(self, world, sample, ff)) + if (hitBackground(self, world, sample, ffh)) return; DifferentialGeometry dg; - postIntersect(world, self, dg, sample.ray, DG_NORMALIZE | DG_NS, ff); + postIntersect(world, self, dg, sample.ray, DG_NORMALIZE | DG_NS, ffh); sample.rgb = absf(dg.Ns); } static void DebugRenderer_texCoord(Renderer *uniform self, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { - if (hitBackground(self, world, sample, ff)) + if (hitBackground(self, world, sample, ffh)) return; DifferentialGeometry dg; - postIntersect(world, self, dg, sample.ray, DG_TEXCOORD, ff); + postIntersect(world, self, dg, sample.ray, DG_TEXCOORD, ffh); sample.rgb = abs(make_vec3f(dg.st.x, dg.st.y, 0.0f)); } static void DebugRenderer_dPds(Renderer *uniform self, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { - if (hitBackground(self, world, sample, ff)) + if (hitBackground(self, world, sample, ffh)) return; DifferentialGeometry dg; - postIntersect(world, self, dg, sample.ray, DG_TANGENTS, ff); + postIntersect(world, self, dg, sample.ray, DG_TANGENTS, ffh); sample.rgb = normalize(dg.dPds); if (sample.rgb.x < 0.f) sample.rgb.x = sample.rgb.x * -0.3f; @@ -138,13 +139,13 @@ static void DebugRenderer_dPds(Renderer *uniform self, static void DebugRenderer_dPdt(Renderer *uniform self, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { - if (hitBackground(self, world, sample, ff)) + if (hitBackground(self, world, sample, ffh)) return; DifferentialGeometry dg; - postIntersect(world, self, dg, sample.ray, DG_TANGENTS, ff); + postIntersect(world, self, dg, sample.ray, DG_TANGENTS, ffh); sample.rgb = normalize(dg.dPdt); if (sample.rgb.x < 0.f) sample.rgb.x = sample.rgb.x * -0.3f; @@ -157,13 +158,13 @@ static void DebugRenderer_dPdt(Renderer *uniform self, static void DebugRenderer_vertexColor(Renderer *uniform self, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { - if (hitBackground(self, world, sample, ff)) + if (hitBackground(self, world, sample, ffh)) return; DifferentialGeometry dg; - postIntersect(world, self, dg, sample.ray, DG_COLOR | DG_NS, ff); + postIntersect(world, self, dg, sample.ray, DG_COLOR | DG_NS, ffh); sample.rgb = make_vec3f(dg.color) * abs(dot(normalize(sample.ray.dir), normalize(dg.Ns))); } @@ -171,9 +172,9 @@ static void DebugRenderer_vertexColor(Renderer *uniform self, static void DebugRenderer_primID(Renderer *uniform self, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { - if (hitBackground(self, world, sample, ff)) + if (hitBackground(self, world, sample, ffh)) return; sample.rgb = eyeLight(sample) * makeRandomColor(sample.ray.primID); @@ -182,9 +183,9 @@ static void DebugRenderer_primID(Renderer *uniform self, static void DebugRenderer_instID(Renderer *uniform self, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { - if (hitBackground(self, world, sample, ff)) + if (hitBackground(self, world, sample, ffh)) return; sample.rgb = eyeLight(sample) * makeRandomColor(sample.ray.instID); @@ -193,9 +194,9 @@ static void DebugRenderer_instID(Renderer *uniform self, static void DebugRenderer_geomID(Renderer *uniform self, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { - if (hitBackground(self, world, sample, ff)) + if (hitBackground(self, world, sample, ffh)) return; sample.rgb = eyeLight(sample) * makeRandomColor(sample.ray.geomID); @@ -204,9 +205,9 @@ static void DebugRenderer_geomID(Renderer *uniform self, static void DebugRenderer_backfacing_Ng(Renderer *uniform self, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { - if (hitBackground(self, world, sample, ff)) + if (hitBackground(self, world, sample, ffh)) return; sample.rgb = make_vec3f(eyeLight(sample)); @@ -217,13 +218,13 @@ static void DebugRenderer_backfacing_Ng(Renderer *uniform self, static void DebugRenderer_backfacing_Ns(Renderer *uniform self, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { - if (hitBackground(self, world, sample, ff)) + if (hitBackground(self, world, sample, ffh)) return; DifferentialGeometry dg; - postIntersect(world, self, dg, sample.ray, DG_NORMALIZE | DG_NS, ff); + postIntersect(world, self, dg, sample.ray, DG_NORMALIZE | DG_NS, ffh); const float c = dot(dg.Ns, sample.ray.dir); sample.rgb = make_vec3f(.2f + .8f * abs(c)); if (c > 0.f) @@ -234,11 +235,13 @@ static void DebugRenderer_volume(Renderer *uniform self, FrameBuffer *uniform fb, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { #ifdef OSPRAY_ENABLE_VOLUMES - if (!(ff.volume & FFV_VOLUME)) + const uniform FeatureFlags ff = getFeatureFlags(ffh); + if (!(ff.other & FFO_VOLUME_IN_SCENE)) { return; + } LDSampler ldSamplerObj; varying LDSampler *uniform ldSampler = &ldSamplerObj; @@ -250,15 +253,16 @@ static void DebugRenderer_volume(Renderer *uniform self, vec3f &color = sample.rgb; float &alpha = sample.alpha; - vec4f bgColor = Renderer_getBackground(self, sample.pos, FFO_ALL); + vec4f bgColor = Renderer_getBackground(self, sample.pos, ffh); color = make_vec3f(bgColor); alpha = bgColor.w; VolumeInterval vInterval; traceVolumeRay(world, sample.ray, vInterval); - if (!hasInterval(vInterval)) + if (!hasInterval(vInterval)) { return; + } VolumetricModel *varying model = vInterval.volumetricModel; @@ -279,27 +283,45 @@ static void DebugRenderer_volume(Renderer *uniform self, Volume *uniform volume = m->volume; float time = 0.5f; - VKLIntervalIterator intervalIterator = vklInitIntervalIteratorV( #if OPENVKL_VERSION_MAJOR == 1 - m->vklIntervalContext, + // We know if we have OpenVKL version 1 we're not targetting SYCL + VKLIntervalIterator intervalIterator = + vklInitIntervalIteratorV(m->vklIntervalContext, + (varying vkl_vec3f *)&ray.org, + (varying vkl_vec3f *)&ray.dir, + (varying vkl_range1f *)&vInterval.interval, + &time, + (void *uniform)intervalIteratorBuffer); #else - &m->vklIntervalContext, -#endif - (varying vkl_vec3f *)&ray.org, - (varying vkl_vec3f *)&ray.dir, - (varying vkl_range1f *)&vInterval.interval, + VKLIntervalIterator intervalIterator = + vklInitIntervalIteratorV(&m->vklIntervalContext, + (varying vkl_vec3f *)&ray.org, + (varying vkl_vec3f *)&ray.dir, + (varying vkl_range1f *)&vInterval.interval, #ifdef OSPRAY_TARGET_SYCL - time, + time, #else - &time, + &time, +#endif + (void *uniform)intervalIteratorBuffer +#ifdef OSPRAY_TARGET_SYCL + , + ff.volume +#endif + ); #endif - (void *uniform)intervalIteratorBuffer); - VKLInterval interval; static const uniform float samplingRate = 0.5f; - while (vklIterateIntervalV(intervalIterator, &interval) && alpha < 0.99f) { + while (vklIterateIntervalV(intervalIterator, + &interval +#ifdef OSPRAY_TARGET_SYCL + , + ff.volume +#endif + ) + && alpha < 0.99f) { const float nominalSamplingDt = interval.nominalDeltaT / samplingRate; // initial sub interval, based on our renderer-defined sampling rate @@ -315,14 +337,21 @@ static void DebugRenderer_volume(Renderer *uniform self, // Get volume sample vec3f p = ray.org + ray.t0 * ray.dir; - const float sample = vklComputeSampleV( #if OPENVKL_VERSION_MAJOR == 1 - volume->vklSampler, + // We know if we have OpenVKL version 1 we're not targetting SYCL + const float sample = vklComputeSampleV( + volume->vklSampler, (const varying vkl_vec3f *uniform) & p); #else - &volume->vklSampler, + const float sample = vklComputeSampleV(&volume->vklSampler, + (const varying vkl_vec3f *uniform) & p +#ifdef OSPRAY_TARGET_SYCL + , + 0, + time, + ff.volume +#endif + ); #endif - (const varying vkl_vec3f *uniform) & p); - if (!isnan(sample)) { vec4f sampleColorOpacity = TransferFunction_dispatch_get(m->transferFunction, sample); @@ -349,10 +378,16 @@ static void DebugRenderer_renderSample(Renderer *uniform _self, FrameBuffer *uniform fb, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { +#ifdef OSPRAY_TARGET_SYCL + const DebugRendererType debugType = + ffh.kernel_handler.get_specialization_constant(); +#else DebugRenderer *uniform self = (DebugRenderer * uniform) _self; - switch (self->type) { + const DebugRendererType debugType = self->type; +#endif + switch (debugType) { case TEST_FRAME: DebugRenderer_testFrame(_self, world, sample); break; @@ -360,43 +395,43 @@ static void DebugRenderer_renderSample(Renderer *uniform _self, DebugRenderer_rayDir(_self, world, sample); break; case EYE_LIGHT: - DebugRenderer_eyeLight(_self, world, sample, ff); + DebugRenderer_eyeLight(_self, world, sample, ffh); break; case NG: - DebugRenderer_Ng(_self, world, sample, ff); + DebugRenderer_Ng(_self, world, sample, ffh); break; case NS: - DebugRenderer_Ns(_self, world, sample, ff); + DebugRenderer_Ns(_self, world, sample, ffh); break; case COLOR: - DebugRenderer_vertexColor(_self, world, sample, ff); + DebugRenderer_vertexColor(_self, world, sample, ffh); break; case TEX_COORD: - DebugRenderer_texCoord(_self, world, sample, ff); + DebugRenderer_texCoord(_self, world, sample, ffh); break; case DPDS: - DebugRenderer_dPds(_self, world, sample, ff); + DebugRenderer_dPds(_self, world, sample, ffh); break; case DPDT: - DebugRenderer_dPdt(_self, world, sample, ff); + DebugRenderer_dPdt(_self, world, sample, ffh); break; case PRIM_ID: - DebugRenderer_primID(_self, world, sample, ff); + DebugRenderer_primID(_self, world, sample, ffh); break; case GEOM_ID: - DebugRenderer_geomID(_self, world, sample, ff); + DebugRenderer_geomID(_self, world, sample, ffh); break; case INST_ID: - DebugRenderer_instID(_self, world, sample, ff); + DebugRenderer_instID(_self, world, sample, ffh); break; case BACKFACING_NG: - DebugRenderer_backfacing_Ng(_self, world, sample, ff); + DebugRenderer_backfacing_Ng(_self, world, sample, ffh); break; case BACKFACING_NS: - DebugRenderer_backfacing_Ns(_self, world, sample, ff); + DebugRenderer_backfacing_Ns(_self, world, sample, ffh); break; case VOLUME: - DebugRenderer_volume(_self, fb, world, sample, ff); + DebugRenderer_volume(_self, fb, world, sample, ffh); break; default: DebugRenderer_testFrame(_self, world, sample); @@ -415,9 +450,10 @@ SYCL_EXTERNAL void DebugRenderer_renderTask(Renderer *uniform self, World *uniform world, const uint32 *uniform taskIDs, const int taskIndex0, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { - Renderer_default_renderTask(self, fb, camera, world, taskIDs, taskIndex0, ff); + Renderer_default_renderTask( + self, fb, camera, world, taskIDs, taskIndex0, ffh); } #else export void DebugRenderer_renderTasks(void *uniform _self, @@ -432,8 +468,9 @@ export void DebugRenderer_renderTasks(void *uniform _self, Camera *uniform camera = (Camera * uniform) _camera; World *uniform world = (World * uniform) _world; const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; + uniform FeatureFlagsHandler ffh; launch[numTasks] Renderer_default_renderTask( - self, fb, camera, world, taskIDs, ffAll()); + self, fb, camera, world, taskIDs, ffh); sync; } #endif diff --git a/modules/cpu/render/debug/DebugRendererShared.h b/modules/cpu/render/debug/DebugRendererShared.h index 033a8883b..ee0c8535a 100644 --- a/modules/cpu/render/debug/DebugRendererShared.h +++ b/modules/cpu/render/debug/DebugRendererShared.h @@ -39,4 +39,10 @@ struct DebugRenderer }; #ifdef __cplusplus } + +#ifdef OSPRAY_TARGET_SYCL +inline constexpr sycl::specialization_id + debugRendererType; +#endif + #endif diff --git a/modules/cpu/render/materials/OBJ.ih b/modules/cpu/render/materials/OBJ.ih index 566897961..05e623384 100644 --- a/modules/cpu/render/materials/OBJ.ih +++ b/modules/cpu/render/materials/OBJ.ih @@ -20,12 +20,12 @@ SYCL_EXTERNAL const varying BSDF *varying OBJ_getBSDF( const DifferentialGeometry &dg, const Ray &ray, const Medium ¤tMedium, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); SYCL_EXTERNAL vec3f OBJ_getTransparency(const uniform Material *uniform super, const DifferentialGeometry &dg, const Ray &ray, const Medium ¤tMedium, - const uniform FeatureFlagsOther ffo); + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/materials/OBJ.ispc b/modules/cpu/render/materials/OBJ.ispc index d21c9db81..f6220262f 100644 --- a/modules/cpu/render/materials/OBJ.ispc +++ b/modules/cpu/render/materials/OBJ.ispc @@ -22,7 +22,7 @@ SYCL_EXTERNAL const varying BSDF *varying OBJ_getBSDF( const DifferentialGeometry &dg, const Ray &, const Medium &, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { uniform const OBJ *uniform self = (uniform const OBJ *uniform)super; varying BSDF *uniform bsdf = MultiBSDF_create(ctx); @@ -30,10 +30,12 @@ SYCL_EXTERNAL const varying BSDF *varying OBJ_getBSDF( /*! normal map */ varying linear3f *uniform shadingFrame = LinearSpace3f_create( - ctx, makeShadingFrame_ff(dg, self->bumpMap, self->bumpRot, ffo)); + ctx, makeShadingFrame_ff(dg, self->bumpMap, self->bumpRot, ffh)); /*! cut-out opacity */ - float d = self->d * get1f_ff(self->dMap, dg, 1.f, ffo) * dg.color.w; + float d = self->d * get1f_ff(self->dMap, dg, 1.f, ffh) * dg.color.w; + + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); /*! diffuse component */ vec3f Kd = self->Kd; @@ -56,8 +58,8 @@ SYCL_EXTERNAL const varying BSDF *varying OBJ_getBSDF( bsdf, Transmission_create(ctx, shadingFrame, T), 1.f, luminance(T)); /*! specular component */ - float Ns = self->Ns * get1f_ff(self->NsMap, dg, 1.0f, ffo); - vec3f Ks = d * self->Ks * get3f_ff(self->KsMap, dg, make_vec3f(1.f), ffo); + float Ns = self->Ns * get1f_ff(self->NsMap, dg, 1.0f, ffh); + vec3f Ks = d * self->Ks * get3f_ff(self->KsMap, dg, make_vec3f(1.f), ffh); if (reduce_max(Ks) > 0.0f) { MultiBSDF_add( bsdf, Specular_create(ctx, shadingFrame, Ks, Ns), 1.f, luminance(Ks)); @@ -69,12 +71,13 @@ SYCL_EXTERNAL vec3f OBJ_getTransparency(const uniform Material *uniform super, const DifferentialGeometry &dg, const Ray &, const Medium &, - const uniform FeatureFlagsOther ffo) + const uniform FeatureFlagsHandler &ffh) { uniform const OBJ *uniform self = (uniform const OBJ *uniform)super; + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); /*! cut-out opacity */ - float d = self->d * get1f_ff(self->dMap, dg, 1.f, ffo) * dg.color.w; + float d = self->d * get1f_ff(self->dMap, dg, 1.f, ffh) * dg.color.w; if (hasAlpha(self->KdMap) && (ffo & FFO_TEXTURE_IN_MATERIAL)) { vec4f Kd_from_map = get4f(self->KdMap, dg); d *= Kd_from_map.w; diff --git a/modules/cpu/render/pathtracer/GeometryLight.ih b/modules/cpu/render/pathtracer/GeometryLight.ih index f22961609..83d6f9fb1 100644 --- a/modules/cpu/render/pathtracer/GeometryLight.ih +++ b/modules/cpu/render/pathtracer/GeometryLight.ih @@ -3,6 +3,7 @@ #pragma once +#include "common/FeatureFlags.ih" #include "rkcommon/math/vec.ih" OSPRAY_BEGIN_ISPC_NAMESPACE @@ -24,6 +25,7 @@ SYCL_EXTERNAL vec3f evaluateGeometryLights(const PathContext &pathContext, SYCL_EXTERNAL Light_SampleRes GeometryLight_sample(const Light *uniform super, const DifferentialGeometry &dg, const vec2f &s, - const float time); + const float time, + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/pathtracer/GeometryLight.ispc b/modules/cpu/render/pathtracer/GeometryLight.ispc index 0f14ef1d4..afe32b798 100644 --- a/modules/cpu/render/pathtracer/GeometryLight.ispc +++ b/modules/cpu/render/pathtracer/GeometryLight.ispc @@ -57,7 +57,8 @@ SYCL_EXTERNAL vec3f evaluateGeometryLights(const PathContext &pathContext, SYCL_EXTERNAL Light_SampleRes GeometryLight_sample(const Light *uniform super, const DifferentialGeometry &dg, const vec2f &s, - const float time) + const float time, + const uniform FeatureFlagsHandler &ffh) { const GeometryLight *uniform self = (GeometryLight * uniform) super; const GeometricModel *uniform model = self->model; @@ -85,11 +86,12 @@ SYCL_EXTERNAL Light_SampleRes GeometryLight_sample(const Light *uniform super, foreach_unique (utime in time) { #endif const uniform affine3f xfm = Instance_getTransform(instance, utime); - as = Geometry_dispatch_sampleArea(geo, primID, xfm, rcp(xfm), ns, time); + as = Geometry_dispatch_sampleArea( + geo, primID, xfm, rcp(xfm), ns, time, ffh); } } else { as = Geometry_dispatch_sampleArea( - geo, primID, instance->xfm, instance->rcp_xfm, ns, time); + geo, primID, instance->xfm, instance->rcp_xfm, ns, time, ffh); } // note that sample.pdf/primitives * sampleArea(worldspace).pdf == self->pdf diff --git a/modules/cpu/render/pathtracer/NextEventEstimation.ih b/modules/cpu/render/pathtracer/NextEventEstimation.ih index 1d1157fc2..b3d24a4e0 100644 --- a/modules/cpu/render/pathtracer/NextEventEstimation.ih +++ b/modules/cpu/render/pathtracer/NextEventEstimation.ih @@ -15,6 +15,6 @@ struct PathVertex; SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, const PathState &pathState, PathVertex &pathVertex, - const uniform FeatureFlags &ff); + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/pathtracer/NextEventEstimation.ispc b/modules/cpu/render/pathtracer/NextEventEstimation.ispc index 956595211..4f41cfb78 100644 --- a/modules/cpu/render/pathtracer/NextEventEstimation.ispc +++ b/modules/cpu/render/pathtracer/NextEventEstimation.ispc @@ -24,8 +24,10 @@ OSPRAY_BEGIN_ISPC_NAMESPACE SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, const PathState &pathState, PathVertex &pathVertex, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlags ff = getFeatureFlags(ffh); + // direct lighting including shadows and MIS vec3f L = make_vec3f(0.f); @@ -51,8 +53,7 @@ SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, #else foreach_unique (l in light) { #endif - ls = - Light_dispatch_sample(l, pathVertex.dg, s2, pathState.time, ff.other); + ls = Light_dispatch_sample(l, pathVertex.dg, s2, pathState.time, ffh); } // adjust the contibution with the probabiltiy of selecting the light source ls.weight = ls.weight / lightSelectionProb; @@ -64,7 +65,7 @@ SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, // evaluate BSDF Scattering_EvalRes fe; - if (pathVertex.type == SURFACE) { + if (ff.geometry && pathVertex.type == SURFACE) { #ifdef OSPRAY_TARGET_SYCL { const BSDF *f = pathVertex.bsdf; @@ -72,11 +73,11 @@ SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, foreach_unique (f in pathVertex.bsdf) { #endif if (f != NULL) - fe = BSDF_dispatch_eval(f, pathVertex.wo, ls.dir, ff.other); + fe = BSDF_dispatch_eval(f, pathVertex.wo, ls.dir, ffh); } } else { #ifdef OSPRAY_ENABLE_VOLUMES - if (ff.volume & FFV_VOLUME) { + if (ff.other & FFO_VOLUME_IN_SCENE) { #ifdef OSPRAY_TARGET_SYCL { const VolumetricModel *v = pathVertex.volume; @@ -105,7 +106,7 @@ SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, // Trace ray in clipping geometries scene, fill array with ray intervals RayIntervals rayIntervals; - traceClippingRay(pathContext.world, shadowRay, rayIntervals); + traceClippingRay(pathContext.world, shadowRay, rayIntervals, ffh); const vec3f throughput = pathState.throughput * fe.value; @@ -125,14 +126,15 @@ SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, shadowRay, rayIntervals, pathState.currentMedium, - ff); + ffh); if (reduce_max(lightContrib) > 0) { #ifdef OSPRAY_ENABLE_VOLUMES - if (ff.volume & FFV_VOLUME) { + if (ff.other & FFO_VOLUME_IN_SCENE) { const float T = volumeTransmittance(pathContext.world, shadowRay, rayIntervals, - pathContext.randomSampler); + pathContext.randomSampler, + ffh); if (reduce_max(T) > 0) { // we have to use an independent transmittance estimate for MIS to get // a correct result @@ -141,7 +143,8 @@ SYCL_EXTERNAL vec3f nextEventEstimation(const PathContext &pathContext, : volumeTransmittance(pathContext.world, shadowRay, rayIntervals, - pathContext.randomSampler); + pathContext.randomSampler, + ffh); L = L + T * lightContrib * misHeuristic(pathState, ls.pdf, fe.pdf * T_mis); diff --git a/modules/cpu/render/pathtracer/PathSampler.ih b/modules/cpu/render/pathtracer/PathSampler.ih index c81f28990..30fddc02a 100644 --- a/modules/cpu/render/pathtracer/PathSampler.ih +++ b/modules/cpu/render/pathtracer/PathSampler.ih @@ -17,6 +17,6 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, PathState &pathState, Ray &ray, ScreenSample &sample, - const uniform FeatureFlags &ff); + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/pathtracer/PathSampler.ispc b/modules/cpu/render/pathtracer/PathSampler.ispc index 61e42a54e..bb6532258 100644 --- a/modules/cpu/render/pathtracer/PathSampler.ispc +++ b/modules/cpu/render/pathtracer/PathSampler.ispc @@ -32,20 +32,21 @@ inline void postIntersect(const PathContext &pathContext, const PathState &, PathVertex &pathVertex, Ray &ray, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { const PathTracer *uniform pt = pathContext.context; - if (pathVertex.type == SURFACE) { + const uniform FeatureFlags ff = getFeatureFlags(ffh); + if (ff.geometry && pathVertex.type == SURFACE) { postIntersect(pathContext.world, &pt->super, pathVertex.dg, ray, DG_NS | DG_NG | DG_FACEFORWARD | DG_NORMALIZE | DG_TEXCOORD | DG_COLOR | DG_TANGENTS | DG_MOTIONBLUR, - ff); + ffh); } #ifdef OSPRAY_ENABLE_VOLUMES - if ((pathVertex.type == VOLUME) && (ff.volume & FFV_VOLUME)) { + if ((ff.other & FFO_VOLUME_IN_SCENE) && pathVertex.type == VOLUME) { pathVertex.dg.P = ray.org + ray.t * ray.dir; pathVertex.dg.renderer = &pt->super; @@ -79,14 +80,16 @@ inline void postIntersect(const PathContext &pathContext, inline Scattering_SampleRes sampleDirection(const PathContext &pathContext, const PathState &pathState, PathVertex &pathVertex, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlags ff = getFeatureFlags(ffh); + const vec2f ss = LDSampler_getFloat2(pathContext.ldSampler, pathState.sampleDim); const float s = LDSampler_getFloat(pathContext.ldSampler, pathState.sampleDim + 2); Scattering_SampleRes fs; - if (pathVertex.type == SURFACE) { + if (ff.geometry && pathVertex.type == SURFACE) { #ifdef OSPRAY_TARGET_SYCL { const BSDF *f = pathVertex.bsdf; @@ -94,14 +97,14 @@ inline Scattering_SampleRes sampleDirection(const PathContext &pathContext, foreach_unique (f in pathVertex.bsdf) { #endif if (f != NULL) { - fs = BSDF_dispatch_sample(f, pathVertex.wo, ss, s, ff.other); + fs = BSDF_dispatch_sample(f, pathVertex.wo, ss, s, ffh); pathVertex.wi = fs.wi; pathVertex.pdf_w = fs.pdf; } } } #ifdef OSPRAY_ENABLE_VOLUMES - if ((pathVertex.type == VOLUME) && (ff.volume & FFV_VOLUME)) { + if ((ff.other & FFO_VOLUME_IN_SCENE) && pathVertex.type == VOLUME) { #ifdef OSPRAY_TARGET_SYCL { const VolumetricModel *v = pathVertex.volume; @@ -123,7 +126,7 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, PathState &pathState, Ray &ray, ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { PathVertex lastVertex; lastVertex.type = CAMERA; @@ -139,6 +142,7 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, uniform ShadingContext ctx; ShadingContext_Constructor(&ctx); + const uniform FeatureFlags ff = getFeatureFlags(ffh); if (pathContext.context->shadowCatcher) { const Hit hit = intersectPlane( @@ -153,10 +157,12 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, // Trace ray in clipping geometries scene, fill array with ray intervals RayIntervals rayIntervals; - traceClippingRay(pathContext.world, ray, rayIntervals); + traceClippingRay(pathContext.world, ray, rayIntervals, ffh); - // Trace ray intervals in geometry - traceGeometryRayIntervals(pathContext.world, ray, rayIntervals, ff); + if (ff.geometry) { + // Trace ray intervals in geometry + traceGeometryRayIntervals(pathContext.world, ray, rayIntervals, ffh); + } PathVertex pathVertex; pathVertex.bsdf = NULL; @@ -164,20 +170,20 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, #ifdef OSPRAY_ENABLE_VOLUMES pathVertex.volume = NULL; #endif - if (noHit(ray)) { + if (noHit(ray) || !ff.geometry) { pathVertex.type = ENVIRONMENT; } else { pathVertex.type = SURFACE; } - if (shadowCatcher(pathContext, pathState, pathVertex, ray, sample, ff)) { + if (shadowCatcher(pathContext, pathState, pathVertex, ray, sample, ffh)) { pathVertex.type = ENVIRONMENT; } pathVertex.wo = neg(ray.dir); #ifdef OSPRAY_ENABLE_VOLUMES - if (ff.volume & FFV_VOLUME) { + if (ff.other & FFO_VOLUME_IN_SCENE) { float extinctionCoefficient; float freePath = volumeSampleFreePath(pathContext.world, ray, @@ -185,7 +191,8 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, pathContext.randomSampler, &pathVertex.volume, extinctionCoefficient, - pathVertex.albedo); + pathVertex.albedo, + ffh); if (freePath < inf) { pathVertex.type = VOLUME; pathState.throughput = pathState.throughput * pathVertex.albedo; @@ -203,7 +210,7 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, ? pathState.specularTransmissionPath : pathState.straightPath)) { vec4f bg = Renderer_getBackground( - &pathContext.context->super, *pathContext.pixel, ff.other); + &pathContext.context->super, *pathContext.pixel, ffh); pathState.contribution = pathState.contribution + pathState.throughput * make_vec3f(bg); sample.alpha = 1.0f - luminance(pathState.throughput); @@ -214,7 +221,7 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, && pathVertex.type != VOLUME) { pathState.contribution = pathState.contribution + evaluateVirtualLights( - pathContext, pathState, lastVertex, pathVertex, ray, ff); + pathContext, pathState, lastVertex, pathVertex, ray, ffh); } if (pathVertex.type == ENVIRONMENT) { break; @@ -231,7 +238,7 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, break; } - postIntersect(pathContext, pathState, pathVertex, ray, ff); + postIntersect(pathContext, pathState, pathVertex, ray, ffh); if (!pathState.disableFWD && (pathVertex.type != VOLUME) && (ff.other & FFO_LIGHT_GEOMETRY)) { @@ -250,13 +257,14 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, // terminate after evaluation of lights and before next shading to always // have both samples for MIS if (pathState.depth >= pathContext.context->super.maxDepth - || pathState.scatteringEvents >= pathContext.context->maxScatteringEvents) { + || pathState.scatteringEvents + >= pathContext.context->maxScatteringEvents) { break; } // shade surface ShadingContext_Constructor(&ctx); - if (pathVertex.type == SURFACE) { + if (ff.geometry && pathVertex.type == SURFACE) { Material *material = (Material *)pathVertex.dg.material; #ifdef OSPRAY_TARGET_SYCL { @@ -266,7 +274,7 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, #endif if (m != NULL) { pathVertex.bsdf = Material_dispatch_getBSDF( - m, &ctx, pathVertex.dg, ray, pathState.currentMedium, ff.other); + m, &ctx, pathVertex.dg, ray, pathState.currentMedium, ffh); if (pathVertex.bsdf != NULL) { pathVertex.albedo = pathVertex.bsdf->albedo; } @@ -281,11 +289,11 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, // next event estimation if (!pathState.disableNEE && isSmooth(pathVertex)) { pathState.contribution = pathState.contribution - + nextEventEstimation(pathContext, pathState, pathVertex, ff); + + nextEventEstimation(pathContext, pathState, pathVertex, ffh); } Scattering_SampleRes fs = - sampleDirection(pathContext, pathState, pathVertex, ff); + sampleDirection(pathContext, pathState, pathVertex, ffh); if (pathState.auxFree && (fs.type & SCATTERING_SMOOTH)) { updateAuxilliaryData(pathState, pathVertex, sample); @@ -318,7 +326,7 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, * expf(pathState.currentMedium.attenuation * ray.t); vec3f ray_org = pathVertex.dg.P; - if (pathVertex.type == SURFACE) { + if (ff.geometry && pathVertex.type == SURFACE) { // update currentMedium if we hit a medium interface // TODO: support nested dielectrics if (fs.type & SCATTERING_TRANSMISSION) { @@ -332,7 +340,7 @@ SYCL_EXTERNAL void samplePath(const PathContext &pathContext, #endif if (m != NULL) { Material_dispatch_selectNextMedium( - m, pathVertex.dg, pathState.currentMedium, ff.other); + m, pathVertex.dg, pathState.currentMedium, ffh); } } } diff --git a/modules/cpu/render/pathtracer/PathTracer.cpp b/modules/cpu/render/pathtracer/PathTracer.cpp index d4afd41ac..0c3c19c1e 100644 --- a/modules/cpu/render/pathtracer/PathTracer.cpp +++ b/modules/cpu/render/pathtracer/PathTracer.cpp @@ -13,6 +13,7 @@ #ifdef OSPRAY_TARGET_SYCL #include +#include "common/FeatureFlags.ih" namespace ispc { SYCL_EXTERNAL void PathTracer_renderTask(Renderer *uniform _self, FrameBuffer *uniform fb, @@ -20,9 +21,8 @@ SYCL_EXTERNAL void PathTracer_renderTask(Renderer *uniform _self, World *uniform world, const uint32 *uniform taskIDs, const int taskIndex0, - const uniform ospray::FeatureFlags &ff); + const uniform FeatureFlagsHandler &ffh); } -constexpr sycl::specialization_id specFeatureFlags; #else // ispc exports #include "math/Distribution1D_ispc.h" @@ -79,7 +79,7 @@ void *PathTracer::beginFrame(FrameBuffer *, World *world) rkcommon::make_unique( *world, importanceSampleGeometryLights, *this); if (pathtracerData->getSh()->numGeoLights) - featureFlags |= FFO_LIGHT_GEOMETRY; + featureFlags.other |= FFO_LIGHT_GEOMETRY; world->getSh()->pathtracerData = pathtracerData->getSh(); world->pathtracerData = std::move(pathtracerData); @@ -105,25 +105,24 @@ AsyncEvent PathTracer::renderTasks(FrameBuffer *fb, const uint32_t *taskIDsPtr = taskIDs.data(); event = device.getSyclQueue().submit([&](sycl::handler &cgh) { FeatureFlags ff = world->getFeatureFlags(); - ff.other |= featureFlags; - ff.other |= fb->getFeatureFlagsOther(); - ff.other |= camera->getFeatureFlagsOther(); - cgh.set_specialization_constant(ff); + ff |= featureFlags; + ff |= fb->getFeatureFlags(); + ff |= camera->getFeatureFlags(); + cgh.set_specialization_constant(ff); const sycl::nd_range<1> dispatchRange = device.computeDispatchRange(numTasks, 16); cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { if (taskIndex.get_global_id(0) < numTasks) { - const FeatureFlags ff = - kh.get_specialization_constant(); + ispc::FeatureFlagsHandler ffh(kh); ispc::PathTracer_renderTask(&rendererSh->super, fbSh, cameraSh, worldSh, taskIDsPtr, taskIndex.get_global_id(0), - ff); + ffh); } }); }); diff --git a/modules/cpu/render/pathtracer/PathTracer.ispc b/modules/cpu/render/pathtracer/PathTracer.ispc index e8efb0c83..09cd59151 100644 --- a/modules/cpu/render/pathtracer/PathTracer.ispc +++ b/modules/cpu/render/pathtracer/PathTracer.ispc @@ -6,6 +6,7 @@ #include "PathTracerDefines.ih" #include "PathTracerUtil.ih" +#include "common/FeatureFlagsEnum.h" #include "render/Renderer.ih" #include "render/bsdfs/BSDF.ih" #include "render/bsdfs/MicrofacetAlbedoTables.ih" @@ -37,7 +38,7 @@ static ScreenSample PathTraceIntegrator_Li(const PathTracer *uniform self, Ray &ray, varying LDSampler *uniform ldSampler, varying RandomSampler *uniform randomSampler, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { ScreenSample sample; sample.rgb = make_vec3f(0.f); @@ -88,7 +89,7 @@ static ScreenSample PathTraceIntegrator_Li(const PathTracer *uniform self, pathState.debug = false; - samplePath(pathContext, pathState, ray, sample, ff); + samplePath(pathContext, pathState, ray, sample, ffh); return sample; } @@ -100,7 +101,7 @@ static ScreenSample PathTracer_renderPixel(PathTracer *uniform self, const uint32 ix, const uint32 iy, const uint32 accumID, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { ScreenSample screenSample; screenSample.rgb = make_vec3f(0.f); @@ -148,9 +149,9 @@ static ScreenSample PathTracer_renderPixel(PathTracer *uniform self, cameraSample.lens = LDSampler_getFloat2(ldSampler, 2); cameraSample.time = LDSampler_getFloat(ldSampler, 4); - Camera_dispatch_initRay(camera, screenSample.ray, cameraSample, ff.other); + Camera_dispatch_initRay(camera, screenSample.ray, cameraSample, ffh); const float tMax = - Renderer_getMaxDepth(&self->super, cameraSample.screen, ff.other); + Renderer_getMaxDepth(&self->super, cameraSample.screen, ffh); screenSample.ray.t = min(screenSample.ray.t, tMax); @@ -160,7 +161,7 @@ static ScreenSample PathTracer_renderPixel(PathTracer *uniform self, screenSample.ray, ldSampler, randomSampler, - ff); + ffh); screenSample.rgb = screenSample.rgb + min(sample.rgb, make_vec3f(self->maxRadiance)); @@ -196,11 +197,11 @@ task #ifdef OSPRAY_TARGET_SYCL const int taskIndex0, #endif - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { PathTracer *uniform self = (PathTracer * uniform) _self; uniform RenderTaskDesc taskDesc = - FrameBuffer_dispatch_getRenderTaskDesc(fb, taskIDs[taskIndex0], ff.other); + FrameBuffer_dispatch_getRenderTaskDesc(fb, taskIDs[taskIndex0], ffh); if (fb->cancelRender || isEmpty(taskDesc.region)) { return; @@ -217,12 +218,11 @@ task x = taskDesc.region.lower.x... taskDesc.region.upper.x) { #endif ScreenSample screenSample = PathTracer_renderPixel( - self, fb, camera, world, x, y, taskDesc.accumID, ff); + self, fb, camera, world, x, y, taskDesc.accumID, ffh); - FrameBuffer_dispatch_accumulateSample( - fb, screenSample, taskDesc, ff.other); + FrameBuffer_dispatch_accumulateSample(fb, screenSample, taskDesc, ffh); } - FrameBuffer_dispatch_completeTask(fb, taskDesc, ff.other); + FrameBuffer_dispatch_completeTask(fb, taskDesc, ffh); } // Exports (called from C++) ////////////////////////////////////////////////// @@ -240,9 +240,8 @@ export void PathTracer_renderTasks(void *uniform _self, Camera *uniform camera = (Camera * uniform) _camera; World *uniform world = (World * uniform) _world; const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; - - launch[numTasks] PathTracer_renderTask( - self, fb, camera, world, taskIDs, ffAll()); + uniform FeatureFlagsHandler ffh; + launch[numTasks] PathTracer_renderTask(self, fb, camera, world, taskIDs, ffh); } #endif diff --git a/modules/cpu/render/pathtracer/ShadowCatcher.ih b/modules/cpu/render/pathtracer/ShadowCatcher.ih index c886fcde8..38afe712a 100644 --- a/modules/cpu/render/pathtracer/ShadowCatcher.ih +++ b/modules/cpu/render/pathtracer/ShadowCatcher.ih @@ -18,6 +18,6 @@ SYCL_EXTERNAL bool shadowCatcher(const PathContext &pathContext, PathVertex &pathVertex, const Ray &ray, ScreenSample &sample, - const uniform FeatureFlags &ff); + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/pathtracer/ShadowCatcher.ispc b/modules/cpu/render/pathtracer/ShadowCatcher.ispc index ddd178824..493c00079 100644 --- a/modules/cpu/render/pathtracer/ShadowCatcher.ispc +++ b/modules/cpu/render/pathtracer/ShadowCatcher.ispc @@ -22,7 +22,7 @@ SYCL_EXTERNAL bool shadowCatcher(const PathContext &pathContext, PathVertex &pathVertex, const Ray &ray, ScreenSample &, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { if (!(pathContext.context->backgroundRefraction @@ -56,8 +56,8 @@ SYCL_EXTERNAL bool shadowCatcher(const PathContext &pathContext, const Light *uniform light = pathtracerData.lights[i]; const vec2f s = LDSampler_getFloat2( pathContext.ldSampler, pathState.sampleDim + 4 + i * 2); - Light_SampleRes ls = Light_dispatch_sample( - light, pathVertex.dg, s, pathState.time, ff.other); + Light_SampleRes ls = + Light_dispatch_sample(light, pathVertex.dg, s, pathState.time, ffh); // skip when zero contribution from light if (reduce_max(ls.weight) <= 0.0f | ls.pdf <= PDF_CULLING) @@ -77,7 +77,7 @@ SYCL_EXTERNAL bool shadowCatcher(const PathContext &pathContext, // Trace ray in clipping geometries scene, fill array with ray intervals RayIntervals rayIntervals; - traceClippingRay(pathContext.world, shadowRay, rayIntervals); + traceClippingRay(pathContext.world, shadowRay, rayIntervals, ffh); const vec3f unshadedLightContrib = pathState.throughput * ls.weight * brdf; // * misHeuristic(pathState, ls.pdf, brdf); @@ -89,7 +89,7 @@ SYCL_EXTERNAL bool shadowCatcher(const PathContext &pathContext, shadowRay, rayIntervals, pathState.currentMedium, - ff); + ffh); } // order of args important to filter NaNs (in case unshaded.X is zero) const vec3f ratio = min( diff --git a/modules/cpu/render/pathtracer/TransparentShadow.ih b/modules/cpu/render/pathtracer/TransparentShadow.ih index 342a7418b..34cacbbca 100644 --- a/modules/cpu/render/pathtracer/TransparentShadow.ih +++ b/modules/cpu/render/pathtracer/TransparentShadow.ih @@ -20,5 +20,6 @@ SYCL_EXTERNAL vec3f transparentShadow(const uniform PathTracer *uniform self, Ray &shadowRay, RayIntervals &rayIntervals, Medium medium, - const uniform FeatureFlags &ff); + const uniform FeatureFlagsHandler &ffh); + OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/pathtracer/TransparentShadow.ispc b/modules/cpu/render/pathtracer/TransparentShadow.ispc index f52241a07..61201d0fb 100644 --- a/modules/cpu/render/pathtracer/TransparentShadow.ispc +++ b/modules/cpu/render/pathtracer/TransparentShadow.ispc @@ -21,13 +21,19 @@ SYCL_EXTERNAL vec3f transparentShadow(const uniform PathTracer *uniform self, Ray &shadowRay, RayIntervals &rayIntervals, Medium medium, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlags ff = getFeatureFlags(ffh); + // It's not possible to have transpareny shadows if we don't have any geometry + if (!ff.geometry) { + return lightContrib; + } + uniform uint32 maxDepth = self->super.maxDepth; const float tOriginal = shadowRay.t; while (1) { - traceGeometryRayIntervals(world, shadowRay, rayIntervals, ff); + traceGeometryRayIntervals(world, shadowRay, rayIntervals, ffh); if (noHit(shadowRay)) return lightContrib; @@ -39,7 +45,7 @@ SYCL_EXTERNAL vec3f transparentShadow(const uniform PathTracer *uniform self, shadowRay, DG_NS | DG_NG | DG_FACEFORWARD | DG_NORMALIZE | DG_TEXCOORD | DG_COLOR | DG_MOTIONBLUR, - ff); + ffh); uniform Material *material = (uniform Material *)dg.material; vec3f transparency; @@ -52,8 +58,8 @@ SYCL_EXTERNAL vec3f transparentShadow(const uniform PathTracer *uniform self, foreach_unique (m in material) { #endif if (m != NULL) { - transparency = Material_dispatch_getTransparency( - m, dg, shadowRay, medium, ff.other); + transparency = + Material_dispatch_getTransparency(m, dg, shadowRay, medium, ffh); } } @@ -77,7 +83,7 @@ SYCL_EXTERNAL vec3f transparentShadow(const uniform PathTracer *uniform self, foreach_unique (m in material) { #endif if (m != NULL) { - Material_dispatch_selectNextMedium(m, dg, medium, ff.other); + Material_dispatch_selectNextMedium(m, dg, medium, ffh); } } diff --git a/modules/cpu/render/pathtracer/VirtualLight.ih b/modules/cpu/render/pathtracer/VirtualLight.ih index 420c20931..b3be00ef5 100644 --- a/modules/cpu/render/pathtracer/VirtualLight.ih +++ b/modules/cpu/render/pathtracer/VirtualLight.ih @@ -17,6 +17,6 @@ SYCL_EXTERNAL vec3f evaluateVirtualLights(const PathContext &pathContext, const PathVertex &lastVertex, const PathVertex &pathVertex, Ray &ray, - const uniform FeatureFlags &ff); + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/pathtracer/VirtualLight.ispc b/modules/cpu/render/pathtracer/VirtualLight.ispc index 0b1cd9aa8..7bc06a21d 100644 --- a/modules/cpu/render/pathtracer/VirtualLight.ispc +++ b/modules/cpu/render/pathtracer/VirtualLight.ispc @@ -40,8 +40,10 @@ SYCL_EXTERNAL vec3f evaluateVirtualLights(const PathContext &pathContext, const PathVertex &lastVertex, const PathVertex &pathVertex, Ray &ray, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlags ff = getFeatureFlags(ffh); + // add light from *virtual* lights by intersecting them vec3f L = make_vec3f(0.f); @@ -71,7 +73,7 @@ SYCL_EXTERNAL vec3f evaluateVirtualLights(const PathContext &pathContext, intervalLightDist.lower, intervalLightDist.upper, pathState.time, - ff.other); + ffh); if (reduce_max(le.radiance) > 0.0f) { Ray shadowRay; setRay(shadowRay, @@ -83,15 +85,16 @@ SYCL_EXTERNAL vec3f evaluateVirtualLights(const PathContext &pathContext, // Trace ray in clipping geometries scene, fill array with ray intervals RayIntervals rayIntervals; - traceClippingRay(pathContext.world, shadowRay, rayIntervals); + traceClippingRay(pathContext.world, shadowRay, rayIntervals, ffh); float T = 1.f; #ifdef OSPRAY_ENABLE_VOLUMES - if (ff.volume & FFV_VOLUME) { + if (ff.other & FFO_VOLUME_IN_SCENE) { T = volumeTransmittance(pathContext.world, shadowRay, rayIntervals, - pathContext.randomSampler); + pathContext.randomSampler, + ffh); } #endif L = L diff --git a/modules/cpu/render/pathtracer/volumes/VolumeSampler.ih b/modules/cpu/render/pathtracer/volumes/VolumeSampler.ih index 7e9d15266..2819eb8ae 100644 --- a/modules/cpu/render/pathtracer/volumes/VolumeSampler.ih +++ b/modules/cpu/render/pathtracer/volumes/VolumeSampler.ih @@ -4,6 +4,7 @@ #pragma once +#include "common/FeatureFlagsEnum.h" #include "render/pathtracer/PathStructs.ih" #include "render/pathtracer/volumes/HenyeyGreenstein.ih" #include "rkcommon/math/vec.ih" @@ -22,12 +23,14 @@ SYCL_EXTERNAL float volumeSampleFreePath(const World *uniform world, varying RandomSampler *uniform randomSampler, const VolumetricModel *varying *uniform sampledInstance, float &sampledExtinctionCoefficient, - vec3f &sampledAlbedo); + vec3f &sampledAlbedo, + const uniform FeatureFlagsHandler &ffh); SYCL_EXTERNAL float volumeTransmittance(const World *uniform world, Ray &ray, RayIntervals &rayIntervals, - varying RandomSampler *uniform randomSampler); + varying RandomSampler *uniform randomSampler, + const uniform FeatureFlagsHandler &ffh); inline bool isSmoothVolumeVertex(const PathVertex &pathVertex) { diff --git a/modules/cpu/render/pathtracer/volumes/VolumeSampler.ispc b/modules/cpu/render/pathtracer/volumes/VolumeSampler.ispc index b68a6e11b..fc1bc431d 100644 --- a/modules/cpu/render/pathtracer/volumes/VolumeSampler.ispc +++ b/modules/cpu/render/pathtracer/volumes/VolumeSampler.ispc @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 #ifdef OSPRAY_ENABLE_VOLUMES +#include "common/FeatureFlagsEnum.h" #include "common/World.ih" #include "render/pathtracer/volumes/VolumeSampler.ih" #include "volume/transferFunction/TransferFunctionDispatch.ih" @@ -15,8 +16,11 @@ float delta_tracking(const VolumetricModel *uniform vModel, const vec3f &w, const float &time, float &mu_t, // sampled extinction coefficint - vec3f &albedo) // sampled albedo (color) + vec3f &albedo, + const uniform FeatureFlagsHandler &ffh) // sampled albedo (color) { + const uniform FeatureFlags ff = getFeatureFlags(ffh); + #ifdef OSPRAY_TARGET_SYCL // No alloca on GPU, need to be conservative for now and use the // VKL_MAX_INTERVAL_ITERATOR_SIZE @@ -31,25 +35,42 @@ float delta_tracking(const VolumetricModel *uniform vModel, )); #endif - VKLIntervalIterator intervalIterator = vklInitIntervalIteratorV( #if OPENVKL_VERSION_MAJOR == 1 - vModel->vklIntervalContext, + // We know if we have OpenVKL version 1 we're not targetting SYCL + VKLIntervalIterator intervalIterator = + vklInitIntervalIteratorV(vModel->vklIntervalContext, + (varying vkl_vec3f *)&o, + (varying vkl_vec3f *)&w, + (varying vkl_range1f *)&rInterval, + &time, + intervalIteratorBuffer); #else - &vModel->vklIntervalContext, -#endif - (varying vkl_vec3f *)&o, - (varying vkl_vec3f *)&w, - (varying vkl_range1f *)&rInterval, + VKLIntervalIterator intervalIterator = + vklInitIntervalIteratorV(&vModel->vklIntervalContext, + (varying vkl_vec3f *)&o, + (varying vkl_vec3f *)&w, + (varying vkl_range1f *)&rInterval, #ifdef OSPRAY_TARGET_SYCL - time, + time, #else - &time, + &time, +#endif + intervalIteratorBuffer +#ifdef OSPRAY_TARGET_SYCL + , + ff.volume +#endif + ); #endif - intervalIteratorBuffer); - float t = 0.f; VKLInterval interval; - while (vklIterateIntervalV(intervalIterator, &interval)) { + while (vklIterateIntervalV(intervalIterator, + &interval +#ifdef OSPRAY_TARGET_SYCL + , + ff.volume +#endif + )) { t = interval.tRange.lower; const float maxOpacity = @@ -70,13 +91,21 @@ float delta_tracking(const VolumetricModel *uniform vModel, xi = RandomSampler_getFloat(randomSampler); const vec3f p = o + t * w; - const float sample = vklComputeSampleV( #if OPENVKL_VERSION_MAJOR == 1 - vModel->volume->vklSampler, + // We know if we have OpenVKL version 1 we're not targetting SYCL + const float sample = vklComputeSampleV( + vModel->volume->vklSampler, (const varying vkl_vec3f *uniform) & p); #else - &vModel->volume->vklSampler, + const float sample = vklComputeSampleV(&vModel->volume->vklSampler, + (const varying vkl_vec3f *uniform) & p +#ifdef OSPRAY_TARGET_SYCL + , + 0, + time, + ff.volume +#endif + ); #endif - (const varying vkl_vec3f *uniform) & p); if (isnan(sample)) continue; @@ -98,7 +127,8 @@ SYCL_EXTERNAL float volumeSampleFreePath(const World *uniform world, varying RandomSampler *uniform randomSampler, const VolumetricModel *varying *uniform sampledInstance, float &sampledExtinctionCoefficient, - vec3f &sampledAlbedo) + vec3f &sampledAlbedo, + const uniform FeatureFlagsHandler &ffh) { #ifdef OSPRAY_TARGET_SYCL // We only support a single volume interval on the GPU @@ -177,7 +207,8 @@ SYCL_EXTERNAL float volumeSampleFreePath(const World *uniform world, transformedVolumeRay.dir, transformedVolumeRay.time, extinctionCoefficient, - albedo); + albedo, + ffh); if (dist < inf) { if (dist < min_dist) { min_dist = dist; @@ -203,7 +234,8 @@ SYCL_EXTERNAL float volumeSampleFreePath(const World *uniform world, SYCL_EXTERNAL float volumeTransmittance(const World *uniform world, Ray &ray, RayIntervals &rayIntervals, - varying RandomSampler *uniform randomSampler) + varying RandomSampler *uniform randomSampler, + const uniform FeatureFlagsHandler &ffh) { #ifdef OSPRAY_TARGET_SYCL // We only support a single volume interval on the GPU @@ -276,7 +308,8 @@ SYCL_EXTERNAL float volumeTransmittance(const World *uniform world, transformedVolumeRay.dir, transformedVolumeRay.time, extinctionCoefficient, - albedo); + albedo, + ffh); transmittance = transmittance * ((dist < rInterval.upper) ? 0.f : 1.f); } } diff --git a/modules/cpu/render/scivis/SciVis.cpp b/modules/cpu/render/scivis/SciVis.cpp index 09b03b622..a8bc914ea 100644 --- a/modules/cpu/render/scivis/SciVis.cpp +++ b/modules/cpu/render/scivis/SciVis.cpp @@ -12,6 +12,7 @@ // ispc exports #include "render/scivis/SciVis_ispc.h" #else +#include "common/FeatureFlags.ih" namespace ispc { SYCL_EXTERNAL void SciVis_renderTask(Renderer *uniform self, FrameBuffer *uniform fb, @@ -19,9 +20,8 @@ SYCL_EXTERNAL void SciVis_renderTask(Renderer *uniform self, World *uniform world, const uint32 *uniform taskIDs, const int taskIndex0, - const uniform ospray::FeatureFlags &ff); + const uniform FeatureFlagsHandler &ffh); } -constexpr sycl::specialization_id specFeatureFlags; #endif namespace ospray { @@ -81,25 +81,24 @@ AsyncEvent SciVis::renderTasks(FrameBuffer *fb, const uint32_t *taskIDsPtr = taskIDs.data(); event = device.getSyclQueue().submit([&](sycl::handler &cgh) { FeatureFlags ff = world->getFeatureFlags(); - ff.other |= featureFlags; - ff.other |= fb->getFeatureFlagsOther(); - ff.other |= camera->getFeatureFlagsOther(); - cgh.set_specialization_constant(ff); + ff |= featureFlags; + ff |= fb->getFeatureFlags(); + ff |= camera->getFeatureFlags(); + cgh.set_specialization_constant(ff); const sycl::nd_range<1> dispatchRange = device.computeDispatchRange(numTasks, 16); cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { if (taskIndex.get_global_id(0) < numTasks) { - const FeatureFlags ff = - kh.get_specialization_constant(); + ispc::FeatureFlagsHandler ffh(kh); ispc::SciVis_renderTask(&rendererSh->super, fbSh, cameraSh, worldSh, taskIDsPtr, taskIndex.get_global_id(0), - ff); + ffh); } }); }); diff --git a/modules/cpu/render/scivis/SciVis.ih b/modules/cpu/render/scivis/SciVis.ih index 3ed0371e9..958d7f360 100644 --- a/modules/cpu/render/scivis/SciVis.ih +++ b/modules/cpu/render/scivis/SciVis.ih @@ -31,7 +31,7 @@ SYCL_EXTERNAL vec3f lightAlpha(const uniform SciVis *uniform self, varying LDSampler *uniform ldSampler, vec3f weight, uniform float quality, - const uniform FeatureFlags &ff); + const uniform FeatureFlagsHandler &ffh); SYCL_EXTERNAL vec3f SciVis_computeAO(const uniform SciVis *uniform self, const World *uniform world, @@ -40,6 +40,6 @@ SYCL_EXTERNAL vec3f SciVis_computeAO(const uniform SciVis *uniform self, const uniform int sampleCnt, const uniform float aoRadius, const varying vec3i &sampleID, - const uniform FeatureFlags &ff); + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/scivis/SciVis.ispc b/modules/cpu/render/scivis/SciVis.ispc index fee05db76..d6eeeca02 100644 --- a/modules/cpu/render/scivis/SciVis.ispc +++ b/modules/cpu/render/scivis/SciVis.ispc @@ -4,6 +4,7 @@ #include "SciVis.ih" #include "camera/Camera.ih" #include "camera/CameraDispatch.ih" +#include "common/FeatureFlagsEnum.h" #include "common/World.ih" #include "fb/FrameBuffer.ih" #include "fb/FrameBufferDispatch.ih" @@ -23,10 +24,12 @@ static void SciVis_renderSample(Renderer *uniform _self, FrameBuffer *uniform fb, World *uniform world, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { uniform SciVis *uniform self = (uniform SciVis * uniform) _self; + const uniform FeatureFlags ff = getFeatureFlags(ffh); + LDSampler ldSamplerObj; varying LDSampler *uniform ldSampler = &ldSamplerObj; LDSampler_init(ldSampler, @@ -59,19 +62,21 @@ static void SciVis_renderSample(Renderer *uniform _self, // First trace the ray across clipping scene to calculate ray intervals, // this step should keep ray structure unchanged RayIntervals rayIntervals; - traceClippingRay(world, ray, rayIntervals); + traceClippingRay(world, ray, rayIntervals, ffh); // Iterate over all translucent geometry till we are fully opaque vec3f outColor = make_vec3f(0.f); vec3f outTransmission = make_vec3f(1.f); while (true) { - // Then trace normal geometry using calculated ray intervals, - // if hit ray.t will be updated - traceGeometryRayIntervals(world, ray, rayIntervals, ff); + if (ff.geometry) { + // Then trace normal geometry using calculated ray intervals, + // if hit ray.t will be updated + traceGeometryRayIntervals(world, ray, rayIntervals, ffh); + } #ifdef OSPRAY_ENABLE_VOLUMES Ray volumeRay = ray; - if (ff.volume & FFV_VOLUME) { + if (ff.other & FFO_VOLUME_IN_SCENE) { // Determine volume intervals by tracing ray in the volume scene traceVolumeRay(world, volumeRay, volumeIntervals); @@ -90,7 +95,7 @@ static void SciVis_renderSample(Renderer *uniform _self, volumeRay, ldSampler, self->volumeSamplingRate, - ff); + ffh); // Blend volume outColor = outColor + outTransmission * make_vec3f(volumeColor); @@ -135,12 +140,12 @@ static void SciVis_renderSample(Renderer *uniform _self, if (self->visibleLights) { dg.P = ray.org; outColor = - outColor + outTransmission * evaluateLights(world, dg, ray, ff); + outColor + outTransmission * evaluateLights(world, dg, ray, ffh); } // If any geometry has been hit const bool rayHadHit = hadHit(ray); - if (rayHadHit) { + if (ff.geometry && rayHadHit) { // Prepare differential geometry structure postIntersect(world, &self->super, @@ -148,12 +153,12 @@ static void SciVis_renderSample(Renderer *uniform _self, ray, DG_NG | DG_NS | DG_NORMALIZE | DG_FACEFORWARD | DG_COLOR | DG_TEXCOORD, - ff); + ffh); // Shade geometry SSI surfaceShading; surfaceShading = SciVis_computeShading( - self, world, dg, sample, ldSampler, ray.dir, ff); + self, world, dg, sample, ldSampler, ray.dir, ffh); // Initialize other per sample data with first hit values // In addition to considering the first hit, all previous, fully @@ -196,7 +201,7 @@ static void SciVis_renderSample(Renderer *uniform _self, } else { // Blend background with output final color vec4f backgroundColor = - Renderer_getBackground(&self->super, sample.pos, ff.other); + Renderer_getBackground(&self->super, sample.pos, ffh); outColor = outColor + outTransmission * make_vec3f(backgroundColor); outTransmission = outTransmission * (1.f - backgroundColor.w); @@ -232,7 +237,7 @@ SYCL_EXTERNAL vec3f SciVis_computeAO(const uniform SciVis *uniform self, const uniform int sampleCnt, const uniform float aoRadius, const varying vec3i &sampleID, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { const uniform int accumID = reduce_max(sampleID.z) * sampleCnt; @@ -266,7 +271,7 @@ SYCL_EXTERNAL vec3f SciVis_computeAO(const uniform SciVis *uniform self, hits = hits + (1.f - lightAlpha( - self, ao_ray, world, ldSampler, make_vec3f(1.f), 0.1f, ff)); + self, ao_ray, world, ldSampler, make_vec3f(1.f), 0.1f, ffh)); } // the cosTheta of cosineSampleHemispherePDF and dot(shadingNormal, ao_dir) @@ -285,9 +290,10 @@ SYCL_EXTERNAL void SciVis_renderTask(Renderer *uniform self, World *uniform world, const uint32 *uniform taskIDs, const int taskIndex0, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { - Renderer_default_renderTask(self, fb, camera, world, taskIDs, taskIndex0, ff); + Renderer_default_renderTask( + self, fb, camera, world, taskIDs, taskIndex0, ffh); } #else export void SciVis_renderTasks(void *uniform _self, @@ -302,8 +308,9 @@ export void SciVis_renderTasks(void *uniform _self, Camera *uniform camera = (Camera * uniform) _camera; World *uniform world = (World * uniform) _world; const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; + uniform FeatureFlagsHandler ffh; launch[numTasks] Renderer_default_renderTask( - self, fb, camera, world, taskIDs, ffAll()); + self, fb, camera, world, taskIDs, ffh); sync; } #endif diff --git a/modules/cpu/render/scivis/lightAlpha.ispc b/modules/cpu/render/scivis/lightAlpha.ispc index abd1b2a10..508a7f932 100644 --- a/modules/cpu/render/scivis/lightAlpha.ispc +++ b/modules/cpu/render/scivis/lightAlpha.ispc @@ -1,6 +1,7 @@ // Copyright 2009 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +#include "common/FeatureFlagsEnum.h" #include "math/random.ih" #include "math/sampling.ih" #include "render/util.ih" @@ -21,11 +22,13 @@ SYCL_EXTERNAL vec3f lightAlpha(const uniform SciVis *uniform self, varying LDSampler *uniform ldSampler, vec3f weight, uniform float quality, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { vec3f alpha = make_vec3f(1.f); const float org_t_max = ray.t; + const uniform FeatureFlags ff = getFeatureFlags(ffh); + #ifdef OSPRAY_ENABLE_VOLUMES #ifdef OSPRAY_TARGET_SYCL // We only support a single volume interval on the GPU @@ -40,23 +43,24 @@ SYCL_EXTERNAL vec3f lightAlpha(const uniform SciVis *uniform self, allocVolumeIntervals(volumeIntervals); #endif #endif + // First trace the ray across clipping scene to calculate ray intervals, // this step should keep ray structure unchanged RayIntervals rayIntervals; - traceClippingRay(world, ray, rayIntervals); + traceClippingRay(world, ray, rayIntervals, ffh); while (true) { // Then trace normal geometry using calculated ray intervals, // if hit ray.t will be updated // WA for https://jira.devtools.intel.com/browse/XDEPS-4875 #ifndef OSPRAY_TARGET_SYCL - traceGeometryRayIntervals(world, ray, rayIntervals, ff); + traceGeometryRayIntervals(world, ray, rayIntervals, ffh); #else - traceGeometryRay(world, ray, ff); + traceGeometryRay(world, ray, ffh); #endif #ifdef OSPRAY_ENABLE_VOLUMES - if (ff.volume & FFV_VOLUME) { + if (ff.other & FFO_VOLUME_IN_SCENE) { // Determine volume intervals by tracing ray in the volume scene Ray volumeRay = ray; traceVolumeRay(world, volumeRay, volumeIntervals); @@ -67,7 +71,8 @@ SYCL_EXTERNAL vec3f lightAlpha(const uniform SciVis *uniform self, rayIntervals, volumeRay, ldSampler, - self->volumeSamplingRate * quality); + self->volumeSamplingRate * quality, + ffh); alpha = alpha * make_vec3f(1.f - volumeColor.w); } @@ -79,9 +84,9 @@ SYCL_EXTERNAL vec3f lightAlpha(const uniform SciVis *uniform self, if (rayHadHit) { // Prepare differential geometry structure DifferentialGeometry dg; - postIntersect(world, &self->super, dg, ray, DG_COLOR | DG_TEXCOORD, ff); + postIntersect(world, &self->super, dg, ray, DG_COLOR | DG_TEXCOORD, ffh); - const SciVisBSDF bsdf = evalMaterial(dg, ff.other); + const SciVisBSDF bsdf = evalMaterial(dg, ffh); alpha = alpha * bsdf.transmission; // Prepare ray for next loop iteration, diff --git a/modules/cpu/render/scivis/surfaces.ih b/modules/cpu/render/scivis/surfaces.ih index 6809a8a25..5c47671dc 100644 --- a/modules/cpu/render/scivis/surfaces.ih +++ b/modules/cpu/render/scivis/surfaces.ih @@ -4,6 +4,7 @@ #pragma once #include "SciVis.ih" +#include "common/FeatureFlagsEnum.h" #include "render/Material.ih" #include "render/materials/OBJ.ih" // c++ shared @@ -27,12 +28,12 @@ SYCL_EXTERNAL SSI SciVis_computeShading(const SciVis *uniform self, ScreenSample &sample, varying LDSampler *uniform ldSampler, const varying vec3f &inDir, - const uniform FeatureFlags &ff); + const uniform FeatureFlagsHandler &ff); SYCL_EXTERNAL vec3f evaluateLights(const World *uniform world, const DifferentialGeometry &dg, const Ray &ray, - const uniform FeatureFlags &ff); + const uniform FeatureFlagsHandler &ffh); struct SciVisBSDF { @@ -45,8 +46,10 @@ struct SciVisBSDF }; inline SciVisBSDF evalMaterial( - const DifferentialGeometry &dg, const uniform FeatureFlagsOther ffo) + const DifferentialGeometry &dg, const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlagsOther ffo = getFeatureFlagsOther(ffh); + const OBJ *mat = (const OBJ *)dg.material; // defaults @@ -64,7 +67,7 @@ inline SciVisBSDF evalMaterial( foreach_unique (m in mat) { #endif if (m != NULL && m->super.type == MATERIAL_TYPE_OBJ) { - float d = m->d * get1f_ff(m->dMap, dg, 1.f, ffo) * dg.color.w; + float d = m->d * get1f_ff(m->dMap, dg, 1.f, ffh) * dg.color.w; res.diffuse = res.diffuse * m->Kd; if (valid(m->KdMap) && (ffo & FFO_TEXTURE_IN_MATERIAL)) { vec4f Kd_from_map = get4f(m->KdMap, dg); @@ -72,8 +75,8 @@ inline SciVisBSDF evalMaterial( d *= Kd_from_map.w; } res.diffuse = res.diffuse * d; - res.specular = d * m->Ks * get3f_ff(m->KsMap, dg, make_vec3f(1.f), ffo); - res.shininess = m->Ns * get1f_ff(m->NsMap, dg, 1.f, ffo); + res.specular = d * m->Ks * get3f_ff(m->KsMap, dg, make_vec3f(1.f), ffh); + res.shininess = m->Ns * get1f_ff(m->NsMap, dg, 1.f, ffh); res.transmission = m->Tf * d + make_vec3f(1.f - d); res.opacity = d; } diff --git a/modules/cpu/render/scivis/surfaces.ispc b/modules/cpu/render/scivis/surfaces.ispc index e5e863b79..cb402ba95 100644 --- a/modules/cpu/render/scivis/surfaces.ispc +++ b/modules/cpu/render/scivis/surfaces.ispc @@ -1,6 +1,7 @@ // Copyright 2009 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +#include "common/FeatureFlagsEnum.h" #include "lights/LightDispatch.ih" #include "math/random.ih" #include "math/sampling.ih" @@ -20,7 +21,7 @@ vec3f directIllumination(const uniform SciVis *uniform self, varying LDSampler *uniform ldSampler, const varying SciVisBSDF &bsdf, const varying vec3f &inDir, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { vec3f color = make_vec3f(0.f); @@ -37,7 +38,7 @@ vec3f directIllumination(const uniform SciVis *uniform self, const vec2f s = make_vec2f(0.0f); // sample center of area lights const Light_SampleRes light = - Light_dispatch_sample(l, dg, s, sample.ray.time, ff.other); + Light_dispatch_sample(l, dg, s, sample.ray.time, ffh); if (reduce_max(light.weight) > 0.f) { // any potential contribution? const float cosNL = dot(light.dir, dg.Ns); @@ -60,7 +61,7 @@ vec3f directIllumination(const uniform SciVis *uniform self, setRay(shadowRay, P, light.dir, 0.0f, light.dist); vec3f light_alpha = lightAlpha( - self, shadowRay, world, ldSampler, light_contrib, 0.25f, ff); + self, shadowRay, world, ldSampler, light_contrib, 0.25f, ffh); color = color + light_alpha * light_contrib; } @@ -79,15 +80,15 @@ SYCL_EXTERNAL SSI SciVis_computeShading(const SciVis *uniform self, ScreenSample &sample, varying LDSampler *uniform ldSampler, const varying vec3f &inDir, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { SSI retval; - const SciVisBSDF bsdf = evalMaterial(dg, ff.other); + const SciVisBSDF bsdf = evalMaterial(dg, ffh); retval.albedo = bsdf.albedo; vec3f color = - directIllumination(self, world, dg, sample, ldSampler, bsdf, inDir, ff); + directIllumination(self, world, dg, sample, ldSampler, bsdf, inDir, ffh); vec3f ao = make_vec3f(1.f); const uniform SciVisData &scivisData = @@ -101,7 +102,7 @@ SYCL_EXTERNAL SSI SciVis_computeShading(const SciVis *uniform self, self->aoSamples, self->aoRadius, sample.sampleID, - ff); + ffh); color = color + bsdf.diffuse * ao * scivisData.aoColorPi; @@ -115,7 +116,7 @@ SYCL_EXTERNAL SSI SciVis_computeShading(const SciVis *uniform self, SYCL_EXTERNAL vec3f evaluateLights(const World *uniform world, const DifferentialGeometry &dg, const Ray &ray, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { // Iterate through all lights vec3f color = make_vec3f(0.f); @@ -129,7 +130,7 @@ SYCL_EXTERNAL vec3f evaluateLights(const World *uniform world, // Evaluate light contribution const Light_EvalRes le = - Light_dispatch_eval(l, dg, ray.dir, ray.t0, ray.t, ray.time, ff.other); + Light_dispatch_eval(l, dg, ray.dir, ray.t0, ray.t, ray.time, ffh); color = color + le.radiance; } return color; diff --git a/modules/cpu/render/scivis/volumes.ih b/modules/cpu/render/scivis/volumes.ih index 3d4ae24e9..2a4d032b0 100644 --- a/modules/cpu/render/scivis/volumes.ih +++ b/modules/cpu/render/scivis/volumes.ih @@ -5,6 +5,7 @@ #pragma once #include "common/Clipping.ih" +#include "common/FeatureFlagsEnum.h" #include "common/Ray.ih" #include "common/VolumeIntervals.ih" @@ -18,7 +19,7 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervalsGradient(SciVisRenderContext &rc, Ray &ray, varying LDSampler *uniform ldSampler, const uniform float samplingRate, - const uniform FeatureFlags &ff); + const uniform FeatureFlagsHandler &ffh); OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/cpu/render/scivis/volumes.ispc b/modules/cpu/render/scivis/volumes.ispc index abe79a5b2..fd4548c29 100644 --- a/modules/cpu/render/scivis/volumes.ispc +++ b/modules/cpu/render/scivis/volumes.ispc @@ -1,5 +1,6 @@ // Copyright 2009 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +#include "common/FeatureFlagsEnum.h" #ifdef OSPRAY_ENABLE_VOLUMES #include "math/random.ih" @@ -35,8 +36,9 @@ static void sampleVolume(SciVisRenderContext &rc, Ray &ray, const VolumeInterval &vi, const uniform float samplingRate, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlags ff = getFeatureFlags(ffh); // We have to iterate till we get a valid sample value float dt = 0.f; float sampleVal = nan; @@ -47,7 +49,13 @@ static void sampleVolume(SciVisRenderContext &rc, while (vc.iuDistance > vc.iuLength) { // Get next VKL interval const float prevUpper = vc.interval.tRange.upper; - if (vklIterateIntervalV(vc.intervalIterator, &vc.interval)) { + if (vklIterateIntervalV(vc.intervalIterator, + &vc.interval +#ifdef OSPRAY_TARGET_SYCL + , + ff.volume +#endif + )) { // Intervals may not be contiguous, accumulate empty space emptySpace += max(vc.interval.tRange.lower - prevUpper, 0.f); @@ -79,14 +87,21 @@ static void sampleVolume(SciVisRenderContext &rc, p = vc.org + newDistance * vc.dir; // Sample volume value in given point - sampleVal = vklComputeSampleV( #if OPENVKL_VERSION_MAJOR == 1 - m->volume->vklSampler, + // We know if we have OpenVKL version 1 we're not targetting SYCL + sampleVal = vklComputeSampleV( + m->volume->vklSampler, (const varying vkl_vec3f *uniform) & p); #else - &m->volume->vklSampler, + sampleVal = vklComputeSampleV(&m->volume->vklSampler, + (const varying vkl_vec3f *uniform) & p +#ifdef OSPRAY_TARGET_SYCL + , + 0, + 0.5f, + ff.volume +#endif + ); #endif - (const varying vkl_vec3f *uniform) & p); - // Go to the next sub-interval vc.iuDistance += 1.f; dt = newDistance - vc.distance - emptySpace; @@ -99,7 +114,7 @@ static void sampleVolume(SciVisRenderContext &rc, // compute gradient shading lighting if (m->gradientShadingScale > 0.0f) { - vec3f ns = Volume_getGradient(m->volume, p); + vec3f ns = Volume_getGradient(m->volume, p, ffh); if (dot(ns, ns) > 1e-6f) { // assume that opacity directly correlates to volume scalar field, i.e. // that "outside" has lower values; because the gradient point towards @@ -117,7 +132,7 @@ static void sampleVolume(SciVisRenderContext &rc, normalize(xfmVector(transposed(vi.instance->rcp_xfm.l), ns)); dg.P = ray.org + vc.distance * ray.dir; SSI shading = SciVis_computeShading( - rc.renderer, rc.world, dg, rc.sample, rc.ldSampler, ray.dir, ff); + rc.renderer, rc.world, dg, rc.sample, rc.ldSampler, ray.dir, ffh); vec4f shadedColor = make_vec4f( shading.shadedColor, 1.f - luminance(shading.transmission)); vc.sample = lerp(m->gradientShadingScale, vc.sample, shadedColor); @@ -134,7 +149,7 @@ static float sampleAllVolumes(SciVisRenderContext &rc, Ray &ray, const uniform float samplingRate, vec4f &sampledColor, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { // Look for the closest sample across all volumes float minDist = inf; @@ -154,7 +169,7 @@ static float sampleAllVolumes(SciVisRenderContext &rc, #else foreach_unique (m in vi.volumetricModel) { #endif - sampleVolume(rc, vc, m, ray, vi, samplingRate, ff); + sampleVolume(rc, vc, m, ray, vi, samplingRate, ffh); } vc.ready = 1; } @@ -189,7 +204,7 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervalsGradient(SciVisRenderContext &rc, Ray &ray, varying LDSampler *uniform ldSampler, const uniform float samplingRate, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { #ifdef OSPRAY_TARGET_SYCL // Only a single volume context is supported on the GPU, no dynamic allocation @@ -202,6 +217,7 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervalsGradient(SciVisRenderContext &rc, pushTLS(reduce_max(volumeIntervals.numVolumeIntervals) * sizeof(varying VolumeContext)); #endif + const uniform FeatureFlags ff = getFeatureFlags(ffh); // Sampling position jitter const float jitter = LDSampler_getFloat(ldSampler, 0); @@ -278,12 +294,16 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervalsGradient(SciVisRenderContext &rc, foreach_unique (m in model) { #endif // Create volume interval iterator - vc.intervalIterator = vklInitIntervalIteratorV( #if OPENVKL_VERSION_MAJOR == 1 - m->vklIntervalContext, + // We know if we have OpenVKL version 1 we're not targetting SYCL + vc.intervalIterator = vklInitIntervalIteratorV(m->vklIntervalContext, + (varying vkl_vec3f *)&vc.org, + (varying vkl_vec3f *)&vc.dir, + (varying vkl_range1f *)&rInterval, + &time, + vc.intervalIteratorBuffer); #else - &m->vklIntervalContext, -#endif + vc.intervalIterator = vklInitIntervalIteratorV(&m->vklIntervalContext, (varying vkl_vec3f *)&vc.org, (varying vkl_vec3f *)&vc.dir, (varying vkl_range1f *)&rInterval, @@ -292,7 +312,13 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervalsGradient(SciVisRenderContext &rc, #else &time, #endif - vc.intervalIteratorBuffer); + vc.intervalIteratorBuffer +#ifdef OSPRAY_TARGET_SYCL + , + ff.volume +#endif + ); +#endif } } @@ -306,7 +332,7 @@ SYCL_EXTERNAL vec4f integrateVolumeIntervalsGradient(SciVisRenderContext &rc, ray, samplingRate, sampledColor, - ff); + ffh); // Exit loop if nothing sampled if (dist == inf) diff --git a/modules/cpu/render/util.ih b/modules/cpu/render/util.ih index 3ab1fe677..638ce960c 100644 --- a/modules/cpu/render/util.ih +++ b/modules/cpu/render/util.ih @@ -4,6 +4,7 @@ #pragma once #include "OSPConfig.h" +#include "common/FeatureFlagsEnum.h" #include "common/World.ih" #include "rkcommon/math/vec.ih" @@ -15,7 +16,7 @@ SYCL_EXTERNAL float computeAO(const Renderer *uniform, const uniform int sampleCnt, const uniform float aoRadius, const varying vec3i &sampleID, - const uniform FeatureFlags &ff); + const uniform FeatureFlagsHandler &ff); // struct that stores a precomputed z-order for tiles of TILE_SIZE x TILE_SIZE // pixels diff --git a/modules/cpu/render/util.ispc b/modules/cpu/render/util.ispc index 2bf494e3b..1b67e30ee 100644 --- a/modules/cpu/render/util.ispc +++ b/modules/cpu/render/util.ispc @@ -1,6 +1,7 @@ // Copyright 2009 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +#include "common/FeatureFlagsEnum.h" #include "common/World.ih" #include "math/random.ih" #include "math/sampling.ih" @@ -29,7 +30,7 @@ SYCL_EXTERNAL float computeAO(const Renderer *uniform renderer, const uniform int sampleCnt, const uniform float aoRadius, const varying vec3i &sampleID, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { const uniform int accumID = reduce_max(sampleID.z) * sampleCnt; @@ -59,7 +60,7 @@ SYCL_EXTERNAL float computeAO(const Renderer *uniform renderer, Ray ao_ray; setRay(ao_ray, dg.P, ao_dir, dg.epsilon, aoRadius); - if (isOccluded(world, ao_ray, ff)) + if (isOccluded(world, ao_ray, ffh)) hits++; } diff --git a/modules/cpu/volume/Volume.cpp b/modules/cpu/volume/Volume.cpp index 2e7dd805f..9652dfe83 100644 --- a/modules/cpu/volume/Volume.cpp +++ b/modules/cpu/volume/Volume.cpp @@ -29,7 +29,7 @@ namespace ospray { Volume::Volume(api::ISPCDevice &device, const std::string &type) : AddStructShared(device.getIspcrtContext(), device), vklType(type), - featureFlags(FFV_VOLUME) + vklFeatureFlags(VKL_FEATURE_FLAGS_NONE) { // check VKL has default config for VDB if (type == "vdb" @@ -93,6 +93,10 @@ void Volume::commit() vklSampler = vklNewSampler(vklVolume); vklCommit(vklSampler); +#if OPENVKL_VERSION_MAJOR > 1 + vklFeatureFlags = vklGetFeatureFlags(vklSampler); +#endif + // Setup Embree user-defined geometry rtcSetGeometryUserData(embreeGeometry, getSh()); rtcSetGeometryUserPrimitiveCount(embreeGeometry, 1); diff --git a/modules/cpu/volume/Volume.h b/modules/cpu/volume/Volume.h index 8fa8afdd8..3717031f7 100644 --- a/modules/cpu/volume/Volume.h +++ b/modules/cpu/volume/Volume.h @@ -5,8 +5,8 @@ #pragma once #include "ISPCDeviceObject.h" -#include "common/StructShared.h" #include "common/FeatureFlagsEnum.h" +#include "common/StructShared.h" // embree #include "common/Embree.h" // openvkl @@ -29,7 +29,7 @@ struct OSPRAY_SDK_INTERFACE Volume std::string toString() const override; void commit() override; - FeatureFlagsVolume getFeatureFlagsVolume() const; + FeatureFlags getFeatureFlags() const; private: void checkDataStride(const Data *) const; @@ -49,14 +49,16 @@ struct OSPRAY_SDK_INTERFACE Volume std::string vklType; - FeatureFlagsVolume featureFlags; + VKLFeatureFlags vklFeatureFlags = VKL_FEATURE_FLAGS_NONE; }; OSPTYPEFOR_SPECIALIZATION(Volume *, OSP_VOLUME); -inline FeatureFlagsVolume Volume::getFeatureFlagsVolume() const +inline FeatureFlags Volume::getFeatureFlags() const { - return featureFlags; + FeatureFlags ff; + ff.volume = vklFeatureFlags; + return ff; } } // namespace ospray diff --git a/modules/cpu/volume/Volume.ih b/modules/cpu/volume/Volume.ih index 8ba3d9f6c..355d8a8d8 100644 --- a/modules/cpu/volume/Volume.ih +++ b/modules/cpu/volume/Volume.ih @@ -18,6 +18,7 @@ #include "openvkl/device/openvkl.h" #endif #endif +#include "common/FeatureFlags.ih" #include "rkcommon/math/box.ih" // c++ shared #include "VolumeShared.h" @@ -37,15 +38,26 @@ inline float Volume_getSample(const Volume *uniform volume, const vec3f &P) &((const vkl_vec3f &)P)); } -inline vec3f Volume_getGradient(const Volume *uniform volume, const vec3f &P) +inline vec3f Volume_getGradient(const Volume *uniform volume, + const vec3f &P, + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlags ff = getFeatureFlags(ffh); + vkl_vec3f result = vklComputeGradientV( #if OPENVKL_VERSION_MAJOR == 1 volume->vklSampler, #else &volume->vklSampler, #endif - &((const vkl_vec3f &)P)); + &((const vkl_vec3f &)P) +#ifdef OSPRAY_TARGET_SYCL + , + 0, + 0.f, + ff.volume +#endif + ); // TODO: remove it once VKL no longer returns sporadic NaNs if (isnan(result.x)) diff --git a/modules/cpu/volume/VolumetricModel.h b/modules/cpu/volume/VolumetricModel.h index 38b7f3db3..b6a5ac0c8 100644 --- a/modules/cpu/volume/VolumetricModel.h +++ b/modules/cpu/volume/VolumetricModel.h @@ -31,9 +31,7 @@ struct OSPRAY_SDK_INTERFACE VolumetricModel Ref getVolume() const; - FeatureFlagsGeometry getFeatureFlagsGeometry() const; - FeatureFlagsVolume getFeatureFlagsVolume() const; - FeatureFlagsOther getFeatureFlagsOther() const; + FeatureFlags getFeatureFlags() const; private: box3f volumeBounds; @@ -44,19 +42,9 @@ struct OSPRAY_SDK_INTERFACE VolumetricModel OSPTYPEFOR_SPECIALIZATION(VolumetricModel *, OSP_VOLUMETRIC_MODEL); -inline FeatureFlagsGeometry VolumetricModel::getFeatureFlagsGeometry() const +inline FeatureFlags VolumetricModel::getFeatureFlags() const { - return FFG_NONE; -} - -inline FeatureFlagsVolume VolumetricModel::getFeatureFlagsVolume() const -{ - return volume->getFeatureFlagsVolume(); -} - -inline FeatureFlagsOther VolumetricModel::getFeatureFlagsOther() const -{ - return FFO_NONE; + return volume->getFeatureFlags(); } } // namespace ospray diff --git a/modules/mpi/ospray/CMakeLists.txt b/modules/mpi/ospray/CMakeLists.txt index f110e94bf..6ce9ada22 100644 --- a/modules/mpi/ospray/CMakeLists.txt +++ b/modules/mpi/ospray/CMakeLists.txt @@ -94,7 +94,6 @@ add_definitions_ispc( -DOSPRAY_BEGIN_ISPC_NAMESPACE= -DOSPRAY_END_ISPC_NAMESPACE= -DSYCL_EXTERNAL= - -D__noinline= ) if (OSPRAY_ENABLE_VOLUMES) diff --git a/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp b/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp index 35c2824b0..5582bfc52 100644 --- a/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp +++ b/modules/mpi/ospray/fb/DistributedFrameBuffer.cpp @@ -370,7 +370,53 @@ const void *DFB::mapBuffer(OSPFrameBufferChannel channel) "#osp:mpi:dfb: tried to 'ospMap()' a frame " "buffer that doesn't have a host-side correspondence"); } - return localFBonMaster->mapBuffer(channel); + + const void *buf = nullptr; + + // DFB writes directly to the localFB's host-side memory, so we don't want + // to call map/unmap here because it'll copy over the unused/empty GPU + // buffers for the channel. + switch (channel) { + case OSP_FB_COLOR: { + buf = localFBonMaster->colorBuffer ? localFBonMaster->colorBuffer->data() + : nullptr; + } break; + case OSP_FB_DEPTH: { + buf = localFBonMaster->depthBuffer ? localFBonMaster->depthBuffer->data() + : nullptr; + } break; + case OSP_FB_NORMAL: { + buf = localFBonMaster->normalBuffer ? localFBonMaster->normalBuffer->data() + : nullptr; + } break; + case OSP_FB_ALBEDO: { + buf = localFBonMaster->albedoBuffer ? localFBonMaster->albedoBuffer->data() + : nullptr; + } break; + case OSP_FB_ID_PRIMITIVE: { + buf = localFBonMaster->primitiveIDBuffer + ? localFBonMaster->primitiveIDBuffer->data() + : nullptr; + } break; + case OSP_FB_ID_OBJECT: { + buf = localFBonMaster->objectIDBuffer + ? localFBonMaster->objectIDBuffer->data() + : nullptr; + } break; + case OSP_FB_ID_INSTANCE: { + buf = localFBonMaster->instanceIDBuffer + ? localFBonMaster->instanceIDBuffer->data() + : nullptr; + } break; + default: + break; + } + + if (buf) { + this->refInc(); + } + + return buf; } void DFB::unmap(const void *mappedMem) @@ -381,7 +427,9 @@ void DFB::unmap(const void *mappedMem) "buffer that doesn't have a host-side color " "buffer"); } - localFBonMaster->unmap(mappedMem); + if (mappedMem) { + this->refDec(); + } } void DFB::waitUntilFinished() diff --git a/modules/mpi/ospray/render/DistributedLoadBalancer.cpp b/modules/mpi/ospray/render/DistributedLoadBalancer.cpp index 310d74553..77985f572 100644 --- a/modules/mpi/ospray/render/DistributedLoadBalancer.cpp +++ b/modules/mpi/ospray/render/DistributedLoadBalancer.cpp @@ -409,7 +409,7 @@ void DistributedLoadBalancer::renderFrameReplicatedDynamicLB( } #ifdef ENABLE_PROFILING - start = ProfilingPoint(); + auto start = ProfilingPoint(); #endif const int sparseFbChannelFlags = @@ -517,12 +517,20 @@ void DistributedLoadBalancer::renderFrameReplicatedStaticLB( const utility::ArrayView tiles = ownedTilesFb->getTiles(); const utility::ArrayView tileIDs = ownedTilesFb->getTileIDs(); +#ifdef ENABLE_PROFILING + auto startRenderTasks = ProfilingPoint(); +#endif + renderer->renderTasks(ownedTilesFb, camera, world, perFrameData, ownedTilesFb->getRenderTaskIDs(renderer->errorThreshold)); +#ifdef ENABLE_PROFILING + auto endRenderTasks = ProfilingPoint(); +#endif + // TODO: Now the tile setting happens as a bulk-sync operation after // rendering, because we still need to send them through the compositing // pipeline. The ISPC-side rendering code doesn't know about this and in the @@ -535,6 +543,18 @@ void DistributedLoadBalancer::renderFrameReplicatedStaticLB( } dfb->setTile(tiles[i]); }); +#ifdef ENABLE_PROFILING + auto endWriteTiles = ProfilingPoint(); + + std::cout << "Render tasks took: " + << elapsedTimeMs(startRenderTasks, endRenderTasks) + << "ms, CPU %: " << cpuUtilization(startRenderTasks, endRenderTasks) + << "%\n" + << "Parallel write tiles took: " + << elapsedTimeMs(endRenderTasks, endWriteTiles) + << "ms, CPU %: " << cpuUtilization(endRenderTasks, endWriteTiles) + << "%\n"; +#endif } } // namespace mpi diff --git a/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp b/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp index eb39f43f0..e02e67ca8 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp +++ b/modules/mpi/ospray/render/distributed/DistributedRaycast.cpp @@ -22,6 +22,7 @@ #ifndef OSPRAY_TARGET_SYCL #include "render/distributed/DistributedRaycast_ispc.h" #else +#include "common/FeatureFlags.ih" namespace ispc { SYCL_EXTERNAL void DistributedRaycast_renderRegionToTileTask(void *_self, void *_fb, @@ -31,9 +32,8 @@ SYCL_EXTERNAL void DistributedRaycast_renderRegionToTileTask(void *_self, void *perFrameData, const void *_taskIDs, const int taskIndex0, - const uniform FeatureFlags &ff); + const uniform FeatureFlagsHandler &ffh); } -constexpr sycl::specialization_id specFeatureFlags; #endif namespace ospray { @@ -120,10 +120,10 @@ void DistributedRaycastRenderer::renderRegionTasks(SparseFrameBuffer *fb, auto event = device.getSyclQueue().submit([&](sycl::handler &cgh) { FeatureFlags ff = world->getFeatureFlags(); - ff.other |= featureFlags; - ff.other |= fb->getFeatureFlagsOther(); - ff.other |= camera->getFeatureFlagsOther(); - cgh.set_specialization_constant(ff); + ff |= featureFlags; + ff |= fb->getFeatureFlags(); + ff |= camera->getFeatureFlags(); + cgh.set_specialization_constant(ff); const sycl::nd_range<1> dispatchRange = device.computeDispatchRange(numTasks, 16); @@ -131,8 +131,7 @@ void DistributedRaycastRenderer::renderRegionTasks(SparseFrameBuffer *fb, [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { const box3f regionCopy = region; if (taskIndex.get_global_id(0) < numTasks) { - const FeatureFlags ff = - kh.get_specialization_constant(); + ispc::FeatureFlagsHandler ffh(kh); ispc::DistributedRaycast_renderRegionToTileTask(&rendererSh->super, fbSh, cameraSh, @@ -141,7 +140,7 @@ void DistributedRaycastRenderer::renderRegionTasks(SparseFrameBuffer *fb, perFrameData, taskIDsPtr, taskIndex.get_global_id(0), - ff); + ffh); } }); }); diff --git a/modules/mpi/ospray/render/distributed/DistributedRaycast.ih b/modules/mpi/ospray/render/distributed/DistributedRaycast.ih deleted file mode 100644 index 94b25feb0..000000000 --- a/modules/mpi/ospray/render/distributed/DistributedRaycast.ih +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "common/DistributedWorldShared.h" -#include "fb/SparseFBShared.h" -#include "render/Renderer.ih" -#include "render/distributed/DistributedRaycastShared.h" - -#include "rkcommon/math/box.ih" -#include "rkcommon/math/vec.ih" - -OSPRAY_BEGIN_ISPC_NAMESPACE - -SYCL_EXTERNAL void DRR_renderRegionSample(DistributedRenderer *uniform _self, - SparseFB *uniform fb, - DistributedWorld *uniform world, - const box3f *uniform region, - const vec2f ®ionInterval, - void *uniform perFrameData, - varying ScreenSample &sample); - -OSPRAY_END_ISPC_NAMESPACE diff --git a/modules/mpi/ospray/render/distributed/DistributedRaycast.ispc b/modules/mpi/ospray/render/distributed/DistributedRaycast.ispc index a7a353e31..40472e076 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRaycast.ispc +++ b/modules/mpi/ospray/render/distributed/DistributedRaycast.ispc @@ -5,6 +5,7 @@ #include "camera/Camera.ih" #include "camera/CameraDispatch.ih" +#include "common/FeatureFlags.ih" #include "common/Instance.ih" #include "common/Intersect.ih" #include "common/VolumeIntervals.ih" @@ -39,11 +40,14 @@ OSPRAY_BEGIN_ISPC_NAMESPACE // The distributed raycast renderer uses its own volume interval integration // because we want to apply the jitter before offsetting our step size to stay // inside the region, not after. -vec4f DRR_integrateVolumeInterval(const ScreenSample &sample, +inline vec4f DRR_integrateVolumeInterval(const ScreenSample &sample, const VolumeInterval &interval, Ray &ray, - uniform float samplingRate) + uniform float samplingRate, + const uniform FeatureFlagsHandler &ffh) { + const uniform FeatureFlags ff = getFeatureFlags(ffh); + VolumetricModel *varying volModel = interval.volumetricModel; // Note: required to WA compiler dropping symbols on link incorrectly if (volModel == NULL) { @@ -68,24 +72,44 @@ vec4f DRR_integrateVolumeInterval(const ScreenSample &sample, TransferFunction *uniform tf = vm->transferFunction; float time = 0.5f; - VKLIntervalIterator intervalIterator = vklInitIntervalIteratorV( + #if OPENVKL_VERSION_MAJOR == 1 - vm->vklIntervalContext, + // We know if we have OpenVKL version 1 we're not targetting SYCL + VKLIntervalIterator intervalIterator = + vklInitIntervalIteratorV(vm->vklIntervalContext, + (varying vkl_vec3f *)&ray.org, + (varying vkl_vec3f *)&ray.dir, + (varying vkl_range1f *)&interval.interval, + &time, + (void *uniform)intervalIteratorBuffer); #else - &vm->vklIntervalContext, -#endif - (varying vkl_vec3f *)&ray.org, - (varying vkl_vec3f *)&ray.dir, - (varying vkl_range1f *)&interval.interval, + VKLIntervalIterator intervalIterator = + vklInitIntervalIteratorV(&vm->vklIntervalContext, + (varying vkl_vec3f *)&ray.org, + (varying vkl_vec3f *)&ray.dir, + (varying vkl_range1f *)&interval.interval, #ifdef OSPRAY_TARGET_SYCL - time, + time, #else - &time, + &time, +#endif + (void *uniform)intervalIteratorBuffer +#ifdef OSPRAY_TARGET_SYCL + , + ff.volume +#endif + ); #endif - (void *uniform)intervalIteratorBuffer); VKLInterval interval; - while (vklIterateIntervalV(intervalIterator, &interval) && alpha < 0.99f) { + while (vklIterateIntervalV(intervalIterator, + &interval +#ifdef OSPRAY_TARGET_SYCL + , + ff.volume +#endif + ) + && alpha < 0.99f) { const float nominalSamplingDt = interval.nominalDeltaT / samplingRate; // initial sub interval, based on our renderer-defined sampling rate @@ -101,13 +125,21 @@ vec4f DRR_integrateVolumeInterval(const ScreenSample &sample, // Get volume sample vec3f p = transformedRay.org + transformedRay.t0 * transformedRay.dir; - const float sample = vklComputeSampleV( #if OPENVKL_VERSION_MAJOR == 1 - volume->vklSampler, + // We know if we have OpenVKL version 1 we're not targetting SYCL + const float sample = vklComputeSampleV( + volume->vklSampler, (const varying vkl_vec3f *uniform) & p); #else - &volume->vklSampler, + const float sample = vklComputeSampleV(&volume->vklSampler, + (const varying vkl_vec3f *uniform) & p +#ifdef OSPRAY_TARGET_SYCL + , + 0, + time, + ff.volume +#endif + ); #endif - (const varying vkl_vec3f *uniform) & p); if (!isnan(sample)) { vec4f sampleColorOpacity = @@ -140,7 +172,7 @@ inline float computeAO(const DistributedRaycastRenderer *uniform self, const World *uniform world, const varying vec3i &sampleID, const varying DifferentialGeometry &dg, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { const uniform int &sampleCnt = self->aoSamples; const uniform int accumID = reduce_max(sampleID.z) * sampleCnt; @@ -169,7 +201,7 @@ inline float computeAO(const DistributedRaycastRenderer *uniform self, Ray ao_ray; setRay(ao_ray, dg.P, ao_dir, dg.epsilon, self->aoRadius); - if (isOccluded(world, ao_ray, ff)) + if (isOccluded(world, ao_ray, ffh)) hits++; } @@ -178,13 +210,13 @@ inline float computeAO(const DistributedRaycastRenderer *uniform self, return 1.0f - (hits / (float)sampleCnt); } -vec4f DRR_shadeSurface(const DistributedRaycastRenderer *uniform self, +inline vec4f DRR_shadeSurface(const DistributedRaycastRenderer *uniform self, const FrameBuffer *uniform fb, const DistributedWorld *uniform world, const vec3i &sampleID, const Ray &ray, const DifferentialGeometry &dg, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { // TODO: DRR should have its own support for OBJ material and lighting model @@ -211,7 +243,7 @@ vec4f DRR_shadeSurface(const DistributedRaycastRenderer *uniform self, const float eyeLightIntensity = absf(dot(dg.Ns, ray.dir)); vec3f color = surfaceColor * eyeLightIntensity; if (self->aoSamples > 0) { - float ao = computeAO(self, fb, &world->super, sampleID, dg, ff); + float ao = computeAO(self, fb, &world->super, sampleID, dg, ffh); color = color * ao; } return make_vec4f(color, opacity); @@ -264,25 +296,26 @@ vec4f DRR_shadeSurface(const DistributedRaycastRenderer *uniform self, } // TODO: Better separate geometry and volume code in this function -SYCL_EXTERNAL void DRR_renderRegionSample(Renderer *uniform _self, +inline void DRR_renderRegionSample(Renderer *uniform _self, SparseFB *uniform fb, DistributedWorld *uniform world, const box3f *uniform region, const vec2f ®ionInterval, void *uniform perFrameData, varying ScreenSample &sample, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { DistributedRaycastRenderer *uniform self = (DistributedRaycastRenderer * uniform) _self; + const uniform FeatureFlags ff = getFeatureFlags(ffh); Ray &geomRay = sample.ray; Ray volumeRay = sample.ray; - traceRay(&world->super, geomRay, ff); + traceRay(&world->super, geomRay, ffh); #ifdef OSPRAY_ENABLE_VOLUMES VolumeInterval volumeInterval; - if (ff.volume & FFV_VOLUME) { + if (ff.volume) { traceVolumeRay(&world->super, volumeRay, volumeInterval); volumeInterval.interval.lower = max(volumeInterval.interval.lower, regionInterval.x); @@ -299,19 +332,18 @@ SYCL_EXTERNAL void DRR_renderRegionSample(Renderer *uniform _self, // TODO: Doesn't seem like much jittering happens with the volume integration vec4f outputColor = make_vec4f(0.f); while (outputColor.w < 0.99f) { - const bool haveGeometryHit = hadHit(geomRay) - && geomRay.t >= regionInterval.x && geomRay.t <= regionInterval.y; + const bool haveGeometryHit = ff.geometry ? hadHit(geomRay) + && geomRay.t >= regionInterval.x && geomRay.t <= regionInterval.y + : false; #ifdef OSPRAY_ENABLE_VOLUMES - const bool haveVolumeHit = - (ff.volume & FFV_VOLUME) ? hasInterval(volumeInterval) : false; + const bool haveVolumeHit = ff.volume ? hasInterval(volumeInterval) : false; const bool bothHit = haveGeometryHit && haveVolumeHit; const bool eitherHit = haveGeometryHit || haveVolumeHit; - const bool volumeFirst = (ff.volume & FFV_VOLUME) - ? volumeInterval.interval.lower < geomRay.t - : false; + const bool volumeFirst = + ff.volume ? volumeInterval.interval.lower < geomRay.t : false; #else const bool bothHit = false; const bool eitherHit = haveGeometryHit; @@ -324,21 +356,21 @@ SYCL_EXTERNAL void DRR_renderRegionSample(Renderer *uniform _self, vec4f volumeColor = make_vec4f(0.f); vec4f surfaceColor = make_vec4f(0.f); DifferentialGeometry dg; - if (haveGeometryHit) { + if (haveGeometryHit && ff.geometry) { postIntersect(&world->super, &self->super, dg, geomRay, DG_NG | DG_NS | DG_NORMALIZE | DG_FACEFORWARD | DG_COLOR | DG_TEXCOORD, - ff); + ffh); surfaceColor = DRR_shadeSurface( - self, &fb->super, world, sample.sampleID, geomRay, dg, ff); + self, &fb->super, world, sample.sampleID, geomRay, dg, ffh); } #ifdef OSPRAY_ENABLE_VOLUMES // Always just integrate the volume when it comes in front of the geometry - if (haveVolumeHit && volumeFirst && (ff.volume & FFV_VOLUME)) { + if (haveVolumeHit && volumeFirst && ff.volume) { volumeInterval.interval.upper = min(geomRay.t, volumeInterval.interval.upper); @@ -376,7 +408,7 @@ SYCL_EXTERNAL void DRR_renderRegionSample(Renderer *uniform _self, volumeInterval.interval.lower, volumeInterval.interval.upper); volumeColor = DRR_integrateVolumeInterval( - sample, volumeInterval, volumeRay, self->volumeSamplingRate); + sample, volumeInterval, volumeRay, self->volumeSamplingRate, ffh); } #endif @@ -400,10 +432,11 @@ SYCL_EXTERNAL void DRR_renderRegionSample(Renderer *uniform _self, // Step the volume ray forwards as well volumeRay = geomRay; - traceRay(&world->super, geomRay, ff); + traceRay(&world->super, geomRay, ffh); #ifdef OSPRAY_ENABLE_VOLUMES - if (ff.volume & FFV_VOLUME) + if (ff.volume) { traceVolumeRay(&world->super, volumeRay, volumeInterval); + } #endif } sample.rgb = make_vec3f(outputColor); @@ -426,7 +459,7 @@ SYCL_EXTERNAL void DistributedRaycast_renderRegionToTileTask( void *uniform perFrameData, const void *uniform _taskIDs, const int taskIndex0, - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { Renderer *uniform self = (Renderer * uniform) _self; SparseFB *uniform fb = (SparseFB * uniform) _fb; @@ -435,7 +468,7 @@ SYCL_EXTERNAL void DistributedRaycast_renderRegionToTileTask( const box3f *uniform region = (const box3f *uniform)_region; const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; DR_default_renderRegionToTile( - self, fb, camera, world, region, perFrameData, taskIDs, taskIndex0, ff); + self, fb, camera, world, region, perFrameData, taskIDs, taskIndex0, ffh); } #else export void DistributedRaycast_renderRegionToTileTask(void *uniform _self, @@ -453,8 +486,9 @@ export void DistributedRaycast_renderRegionToTileTask(void *uniform _self, DistributedWorld *uniform world = (DistributedWorld * uniform) _world; const box3f *uniform region = (const box3f *uniform)_region; const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; + uniform FeatureFlagsHandler ffh; launch[numTasks] DR_default_renderRegionToTile( - self, fb, camera, world, region, perFrameData, taskIDs, ffAll()); + self, fb, camera, world, region, perFrameData, taskIDs, ffh); sync; } #endif diff --git a/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp b/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp index 2a79108cb..1a4e7d437 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp +++ b/modules/mpi/ospray/render/distributed/DistributedRenderer.cpp @@ -9,6 +9,7 @@ #ifndef OSPRAY_TARGET_SYCL #include "render/distributed/DistributedRenderer_ispc.h" #else +#include "common/FeatureFlags.ih" namespace ispc { SYCL_EXTERNAL void DR_default_computeRegionVisibility(Renderer *uniform self, SparseFB *uniform fb, @@ -18,9 +19,8 @@ SYCL_EXTERNAL void DR_default_computeRegionVisibility(Renderer *uniform self, void *uniform perFrameData, const uint32 *uniform taskIDs, const int taskIndex0, - const uniform ospray::FeatureFlags &ff); + const uniform FeatureFlagsHandler &ff); } -constexpr sycl::specialization_id specFeatureFlags; #endif namespace ospray { @@ -62,18 +62,22 @@ void DistributedRenderer::computeRegionVisibility(SparseFrameBuffer *fb, auto event = device.getSyclQueue().submit([&](sycl::handler &cgh) { FeatureFlags ff = world->getFeatureFlags(); - ff.other |= featureFlags; - ff.other |= fb->getFeatureFlagsOther(); - ff.other |= camera->getFeatureFlagsOther(); - cgh.set_specialization_constant(ff); + ff.other = FFO_NONE; + ff |= fb->getFeatureFlags(); + ff |= camera->getFeatureFlags(); + // Disable features we don't need for the region visibility computation + ff.geometry = FFG_BOX | FFG_USER_GEOMETRY; +#ifdef OSPRAY_ENABLE_VOLUMES + ff.volume = VKL_FEATURE_FLAGS_NONE; +#endif + cgh.set_specialization_constant(ff); const sycl::nd_range<1> dispatchRange = device.computeDispatchRange(numTasks, 16); cgh.parallel_for(dispatchRange, [=](sycl::nd_item<1> taskIndex, sycl::kernel_handler kh) { if (taskIndex.get_global_id(0) < numTasks) { - const FeatureFlags ff = - kh.get_specialization_constant(); + ispc::FeatureFlagsHandler ffh(kh); ispc::DR_default_computeRegionVisibility(rendererSh, fbSh, cameraSh, @@ -82,7 +86,7 @@ void DistributedRenderer::computeRegionVisibility(SparseFrameBuffer *fb, perFrameData, taskIDsPtr, taskIndex.get_global_id(0), - ff); + ffh); } }); }); diff --git a/modules/mpi/ospray/render/distributed/DistributedRenderer.ispc b/modules/mpi/ospray/render/distributed/DistributedRenderer.ispc index 9290da0ea..b1f83cacd 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRenderer.ispc +++ b/modules/mpi/ospray/render/distributed/DistributedRenderer.ispc @@ -34,6 +34,7 @@ OSPRAY_BEGIN_ISPC_NAMESPACE struct RayQueryContextRegion { RTCRayQueryContext ectx; + const FeatureFlagsHandler *uniform ffh; uint8 *regionVisible; }; @@ -83,7 +84,7 @@ task #ifdef OSPRAY_TARGET_SYCL const int taskIndex0, #endif - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { ScreenSample screenSample; screenSample.z = inf; @@ -92,7 +93,7 @@ task CameraSample cameraSample; uniform RenderTaskDesc taskDesc = FrameBuffer_dispatch_getRenderTaskDesc( - &fb->super, taskIDs[taskIndex0], ff.other); + &fb->super, taskIDs[taskIndex0], ffh); if (fb->super.cancelRender || isEmpty(taskDesc.region)) { return; @@ -154,13 +155,13 @@ task cameraSample.lens.y = 0.0f; cameraSample.time = 0.5f; - Camera_dispatch_initRay( - camera, screenSample.ray, cameraSample, ff.other); + Camera_dispatch_initRay(camera, screenSample.ray, cameraSample, ffh); screenSample.ray.t = min(screenSample.ray.t, tMax); if (world->regionScene) { uniform RayQueryContextRegion context; rtcInitRayQueryContext(&context.ectx); + context.ffh = &ffh; context.regionVisible = tileRegionVisible; uniform RTCIntersectArguments intersectArgs; @@ -194,6 +195,7 @@ export void DistributedRenderer_computeRegionVisibility(void *uniform _self, { Renderer *uniform self = (uniform Renderer * uniform) _self; const uint32 *uniform taskIDs = (const uint32 *uniform)_taskIDs; + uniform FeatureFlagsHandler ffh; launch[numTasks] DR_default_computeRegionVisibility(self, (SparseFB * uniform) fb, (Camera * uniform) camera, @@ -201,9 +203,10 @@ export void DistributedRenderer_computeRegionVisibility(void *uniform _self, regionVisible, perFrameData, taskIDs, - ffAll()); + ffh); } +#ifndef OSPRAY_TARGET_SYCL export void DistributedRenderer_pick(const void *uniform _self, const void *uniform _fb, const void *uniform _camera, @@ -216,6 +219,7 @@ export void DistributedRenderer_pick(const void *uniform _self, uniform float &depth, uniform int32 &hit) { + uniform FeatureFlagsHandler ffh; const Renderer *uniform self = (const Renderer *uniform)_self; const FrameBuffer *uniform fb = (const FrameBuffer *uniform)_fb; const Camera *uniform camera = (const Camera *uniform)_camera; @@ -231,8 +235,8 @@ export void DistributedRenderer_pick(const void *uniform _self, cameraSample.time = 0.5f; Ray ray; - Camera_dispatch_initRay(camera, ray, cameraSample, FFO_ALL); - ray.t = min(ray.t, Renderer_getMaxDepth(self, cameraSample.screen, FFO_ALL)); + Camera_dispatch_initRay(camera, ray, cameraSample, ffh); + ray.t = min(ray.t, Renderer_getMaxDepth(self, cameraSample.screen, ffh)); // Clip the ray to each region this rank owns and trace the clipped ray to // find the picked object @@ -249,7 +253,7 @@ export void DistributedRenderer_pick(const void *uniform _self, && isect.entry.t <= regionRay.t) { regionRay.t0 = isect.entry.t; regionRay.t = min(regionRay.t, isect.exit.t); - traceRay(&world->super, regionRay, ffAll()); + traceRay(&world->super, regionRay, ffh); if (hadHit(regionRay)) { closestHit = regionRay.t; @@ -267,6 +271,7 @@ export void DistributedRenderer_pick(const void *uniform _self, } } } +#endif #endif diff --git a/modules/mpi/ospray/render/distributed/DistributedRendererRenderTaskFn.inl b/modules/mpi/ospray/render/distributed/DistributedRendererRenderTaskFn.inl index 0c5c0c5dc..01a30f32c 100644 --- a/modules/mpi/ospray/render/distributed/DistributedRendererRenderTaskFn.inl +++ b/modules/mpi/ospray/render/distributed/DistributedRendererRenderTaskFn.inl @@ -15,7 +15,7 @@ task #ifdef OSPRAY_TARGET_SYCL const int taskIndex0, #endif - const uniform FeatureFlags &ff) + const uniform FeatureFlagsHandler &ffh) { const uniform int32 spp = self->spp; @@ -26,7 +26,7 @@ task CameraSample cameraSample; uniform RenderTaskDesc taskDesc = FrameBuffer_dispatch_getRenderTaskDesc( - &fb->super, taskIDs[taskIndex0], ff.other); + &fb->super, taskIDs[taskIndex0], ffh); const uniform int startSampleID = max(taskDesc.accumID, 0) * spp; @@ -58,7 +58,7 @@ task vec2f center = make_vec2f(screenSample.sampleID.x, screenSample.sampleID.y) + 0.5f; const float tMax = - Renderer_getMaxDepth(self, center * fb->super.rcpSize, ff.other); + Renderer_getMaxDepth(self, center * fb->super.rcpSize, ffh); vec3f col = make_vec3f(0.f); float alpha = 0.f; vec3f normal = make_vec3f(0.f); @@ -81,8 +81,7 @@ task cameraSample.lens.y = 0.0f; cameraSample.time = 0.5f; - Camera_dispatch_initRay( - camera, screenSample.ray, cameraSample, ff.other); + Camera_dispatch_initRay(camera, screenSample.ray, cameraSample, ffh); screenSample.ray.t = min(screenSample.ray.t, tMax); // TODO: We could store and use the region t intervals from when @@ -103,7 +102,7 @@ task make_vec2f(regionEnter, regionExit), perFrameData, screenSample, - ff); + ffh); col = col + screenSample.rgb; alpha += screenSample.alpha; @@ -118,7 +117,7 @@ task screenSample.albedo = albedo * rspp; FrameBuffer_dispatch_accumulateSample( - &fb->super, screenSample, taskDesc, ff.other); + &fb->super, screenSample, taskDesc, ffh); } - FrameBuffer_dispatch_completeTask(&fb->super, taskDesc, ff.other); + FrameBuffer_dispatch_completeTask(&fb->super, taskDesc, ffh); } diff --git a/scripts/tests/run_gpu_tests.sh b/scripts/tests/run_gpu_tests.sh index 3c10110cb..f09e853a0 100755 --- a/scripts/tests/run_gpu_tests.sh +++ b/scripts/tests/run_gpu_tests.sh @@ -35,56 +35,34 @@ done mkdir build_regression_tests cd build_regression_tests -cmake -D OSPRAY_TEST_ISA=AVX2 "${SOURCEDIR}/test_image_data" +cmake -D OSPRAY_TEST_ISA=AVX512SKX "${SOURCEDIR}/test_image_data" make -j 4 ospray_test_data # Excluded tests on GPU test_filters="ClippingParallel.planes" -test_filters+=":Intersection/SpherePrecision.sphere/8" -test_filters+=":Intersection/SpherePrecision.sphere/9" -test_filters+=":Intersection/SpherePrecision.sphere/10" -test_filters+=":Intersection/SpherePrecision.sphere/11" -test_filters+=":Intersection/SpherePrecision.sphere/20" -test_filters+=":Intersection/SpherePrecision.sphere/21" -test_filters+=":Intersection/SpherePrecision.sphere/22" -test_filters+=":Intersection/SpherePrecision.sphere/23" -test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/3" -test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/4" -test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/5" +# Subdivision surfaces unsupported test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/15" test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/16" test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/17" -test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/21" -test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/22" -test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/23" +# Instancing test also makes use of motion blur, which is unsupported test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/24" test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/25" test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/26" +# Clipping unsupported test_filters+=":TestScenesClipping/FromOsprayTesting.*" -test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/1" +# Multiple volumes, unsupported test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/3" test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/4" test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/5" -test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/6" -test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/7" -test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/8" -test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/9" -test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/10" -test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/11" -test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/12" -test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/13" -test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/14" -test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/15" -test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/16" -test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/17" -test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/18" -test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/19" -test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/20" +# Multiple volumes unsupported test_filters+=":TestScenesVolumesStrictParams/FromOsprayTesting.*" -test_filters+=":Transparency/FromOsprayTesting.*" +# Clipping unsupported test_filters+=":TestScenesMaxDepth/FromOsprayTestingMaxDepth.test_scenes/1" test_filters+=":TestScenesMaxDepth/FromOsprayTestingMaxDepth.test_scenes/2" -test_filters+=":TestScenesVolumes/UnstructuredVolume.*" +# Almost all working, some remaining bug on cell-valued unstructured volumes +test_filters+=":TestScenesVolumes/UnstructuredVolume.1" +test_filters+=":TestScenesVolumes/UnstructuredVolume.3" +# Motion blur unsupported test_filters+=":TestMotionBlur/MotionBlurBoxes.*" test_filters+=":CameraRollingShutter/MotionBlurBoxes.*" test_filters+=":CameraStereoRollingShutter/MotionBlurBoxes.*" @@ -92,24 +70,19 @@ test_filters+=":Camera/MotionCamera.*" test_filters+=":CameraOrtho/MotionCamera.*" test_filters+=":CameraStereoRollingShutter/MotionCamera.*" test_filters+=":LightMotionBlur/*" +# Crashing test_filters+=":Primitive/IDBuffer.*" +# Requires non-overlapping multiple volume support on GPU test_filters+=":ObjectInstance/IDBuffer.*" +# Subdivision surfaces not supported on GPU test_filters+=":Color/Interpolation.Interpolation/4" test_filters+=":Color/Interpolation.Interpolation/5" test_filters+=":Color/Interpolation.Interpolation/6" test_filters+=":Color/Interpolation.Interpolation/7" test_filters+=":Texcoord/Interpolation.Interpolation/2" test_filters+=":Texcoord/Interpolation.Interpolation/3" -test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/10" -test_filters+=":TestScenesGeometry/Curves.test_scenes/1" -test_filters+=":TestScenesGeometry/Curves.test_scenes/4" -test_filters+=":TestScenesGeometry/Curves.test_scenes/7" -test_filters+=":TestScenesGeometry/Curves.test_scenes/10" -test_filters+=":TestScenesGeometry/Curves.test_scenes/13" -test_filters+=":TestScenesGeometry/Curves.test_scenes/16" +# Variance termination is not quite right test_filters+=":TestScenesVariance/FromOsprayTestingVariance.testScenes/0" -test_filters+=":Appearance/PTBackgroundRefraction.backgroundRefraction/0" -test_filters+=":Appearance/PTBackgroundRefraction.backgroundRefraction/1" export ONEAPI_DEVICE_SELECTOR=level_zero:* From 2f013f89214ef4d65ddca58db733dc1b0c1f171b Mon Sep 17 00:00:00 2001 From: Will Usher Date: Mon, 12 Jun 2023 16:25:13 -0700 Subject: [PATCH 34/42] ispc updated release --- scripts/superbuild/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/superbuild/CMakeLists.txt b/scripts/superbuild/CMakeLists.txt index 22551ef85..8b562901e 100644 --- a/scripts/superbuild/CMakeLists.txt +++ b/scripts/superbuild/CMakeLists.txt @@ -77,7 +77,7 @@ if (DOWNLOAD_ISPC) mark_as_advanced(CLEAR ISPC_VERSION) if (ISPC_VERSION STREQUAL "1.20.0") if (APPLE) - set(ISPC_HASH "a675ac08e6587d6ad7eb563df3db3027a3c18482f404d5b9592bf2ef4a4fab9a") + set(ISPC_HASH "2667fa119761d793c63b6c7ee44e104dcccdaccca8212802b340edb8f8e3027d") elseif (WIN32) set(ISPC_HASH "e212ebfb4e8afb57adc103a2579c52673a3ca49610fbc2a5eae643d3d378548d") else() From 3656baab16b5202c2f26f4b1306189159221d00d Mon Sep 17 00:00:00 2001 From: Will Usher Date: Mon, 12 Jun 2023 16:40:56 -0700 Subject: [PATCH 35/42] Update CI env --- .github/workflows/gfx-ubuntu22-internal.env | 2 +- .github/workflows/gfx-ubuntu22.env | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gfx-ubuntu22-internal.env b/.github/workflows/gfx-ubuntu22-internal.env index 6eaa34133..cd34e64bd 100644 --- a/.github/workflows/gfx-ubuntu22-internal.env +++ b/.github/workflows/gfx-ubuntu22-internal.env @@ -1 +1 @@ -GFX_DRIVER_VERSION=neo-builds/ci/master/ci-neo-master-025812/artifacts/linux/ubuntu/22.04 +GFX_DRIVER_VERSION=neo-builds/ci/master/ci-neo-master-026248/artifacts/linux/ubuntu/22.04 diff --git a/.github/workflows/gfx-ubuntu22.env b/.github/workflows/gfx-ubuntu22.env index fb7a3e1d0..c25ff4a49 100644 --- a/.github/workflows/gfx-ubuntu22.env +++ b/.github/workflows/gfx-ubuntu22.env @@ -1 +1 @@ -GFX_DRIVER_VERSION=gfx-driver-builds/ci/comp_igc/gfx-driver-ci-comp_igc-19476/artifacts/Linux/Ubuntu/22.04/Release +GFX_DRIVER_VERSION=gfx-driver-builds/ci/comp_igc/gfx-driver-ci-comp_igc-21027/artifacts/Linux/Ubuntu/22.04/Release From 21234717b3db29e1341baac69c3127c99fe3ea6c Mon Sep 17 00:00:00 2001 From: Will Usher Date: Tue, 13 Jun 2023 14:49:54 -0700 Subject: [PATCH 36/42] Report ospRenderFrame time, and app framerate, exclude GL overhead --- .../benchmarks/GravitySpheresVolume.cpp | 2 +- apps/ospExamples/GLFWOSPRayWindow.cpp | 24 +++++++++++++------ apps/ospExamples/GLFWOSPRayWindow.h | 1 + 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/apps/ospBenchmark/benchmarks/GravitySpheresVolume.cpp b/apps/ospBenchmark/benchmarks/GravitySpheresVolume.cpp index 54b436f38..0a9ce43fd 100644 --- a/apps/ospBenchmark/benchmarks/GravitySpheresVolume.cpp +++ b/apps/ospBenchmark/benchmarks/GravitySpheresVolume.cpp @@ -35,4 +35,4 @@ OSPRAY_DEFINE_BENCHMARK( OSPRAY_DEFINE_BENCHMARK( GravitySpheres, "gravity_spheres_volume", 512, "pathtracer"); OSPRAY_DEFINE_SETUP_BENCHMARK( - GravitySpheres, "gravity_spheres_volume", 128, "pathtracer"); \ No newline at end of file + GravitySpheres, "gravity_spheres_volume", 128, "pathtracer"); diff --git a/apps/ospExamples/GLFWOSPRayWindow.cpp b/apps/ospExamples/GLFWOSPRayWindow.cpp index 237b20a99..710e98e9a 100644 --- a/apps/ospExamples/GLFWOSPRayWindow.cpp +++ b/apps/ospExamples/GLFWOSPRayWindow.cpp @@ -10,6 +10,7 @@ #include "imgui.h" // std #include +#include #include #include #include @@ -355,7 +356,8 @@ void GLFWOSPRayWindow::motion(const vec2f &position) void GLFWOSPRayWindow::display() { - static auto displayStart = std::chrono::high_resolution_clock::now(); + static float totalRenderTime = 0.f; + static auto displayStart = std::chrono::steady_clock::now(); if (showUi) buildUI(); @@ -368,17 +370,24 @@ void GLFWOSPRayWindow::display() static bool firstFrame = true; if (firstFrame || currentFrame.isReady()) { // display frame rate in window title - auto displayEnd = std::chrono::high_resolution_clock::now(); - auto durationMilliseconds = + const auto displayEnd = std::chrono::steady_clock::now(); + const auto displayMilliseconds = std::chrono::duration_cast( displayEnd - displayStart); + const auto renderTime = currentFrame.duration(); + totalRenderTime += renderTime; - // update FPS every second + // update fps every 10 frames or every second framesCounter++; - if (durationMilliseconds > std::chrono::seconds(1)) { + if (framesCounter > 9 || totalRenderTime > 1.f + || displayMilliseconds > std::chrono::seconds(1)) { + displayFPS = 1000.f * float(framesCounter) / displayMilliseconds.count(); + latestFPS = float(framesCounter) / totalRenderTime; + displayStart = displayEnd; - latestFPS = 1000.0f * float(framesCounter) / durationMilliseconds.count(); + totalRenderTime = 0.f; framesCounter = 0; + updateTitleBar(); } @@ -516,7 +525,8 @@ void GLFWOSPRayWindow::addObjectToCommit(OSPObject obj) void GLFWOSPRayWindow::updateTitleBar() { std::stringstream windowTitle; - windowTitle << "OSPRay: " << std::setprecision(3) << latestFPS << " fps"; + windowTitle << "OSPRay: " << std::setprecision(4) << " render: " << latestFPS + << " fps, app: " << displayFPS << " fps"; if (latestFPS > 0.f && latestFPS < 2.f) { float progress = currentFrame.progress(); windowTitle << " | "; diff --git a/apps/ospExamples/GLFWOSPRayWindow.h b/apps/ospExamples/GLFWOSPRayWindow.h index 0f1b26db7..b45556964 100644 --- a/apps/ospExamples/GLFWOSPRayWindow.h +++ b/apps/ospExamples/GLFWOSPRayWindow.h @@ -127,4 +127,5 @@ class GLFWOSPRayWindow // FPS measurement of last frame uint32_t framesCounter{0}; float latestFPS{0.f}; + float displayFPS{0.f}; }; From e97e0278cee8314b1cabd4f041e5959aed3e70e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCnther?= Date: Thu, 15 Jun 2023 16:06:05 +0200 Subject: [PATCH 37/42] Fix CI --- .github/workflows/ci.linux.gpu.yml | 18 +++++------------- scripts/tests/run_gpu_tests.sh | 7 +++++++ 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/.github/workflows/ci.linux.gpu.yml b/.github/workflows/ci.linux.gpu.yml index 1f52a741c..f3b7a9513 100644 --- a/.github/workflows/ci.linux.gpu.yml +++ b/.github/workflows/ci.linux.gpu.yml @@ -21,15 +21,14 @@ jobs: with: force-delete: true # guarantees .gitattributes are respected in working dir image: ubuntu:22.04 - env-from-files: .github/workflows/dpcpp-sycl-nightly.env .github/workflows/gfx-ubuntu22.env + env-from-files: .github/workflows/dpcpp-sycl-nightly.env level-zero-version: public/1.9.9 - install-gfx-driver: false submodules: true cmd: | export SYCL_BUNDLE_ROOT=$DPCPP_ROOT export CC=clang export CXX=clang++ - scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_MODULE_MULTIDEVICE=OFF -DBUILD_GPU_SUPPORT=ON -DBUILD_OIDN=ON -DOPENVKL_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.openvkl.git -DOPENVKL_BRANCH=bcca9b98 -DRKCOMMON_VERSION=devel + scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_OSPRAY_MODULE_MULTIDEVICE=OFF -DBUILD_GPU_SUPPORT=ON -DBUILD_OIDN=ON -DOPENVKL_URL=https://${RENDERKIT_GITHUB_TOKEN}@github.com/intel-innersource/libraries.graphics.renderkit.openvkl.git -DOPENVKL_BRANCH=gpu-devel -DRKCOMMON_VERSION=devel artifact-out: build-ubuntu2204 artifact-path: build/install/ospray build/install/embree build/CMakeCache.txt build/*/build/CMakeCache.txt @@ -39,9 +38,8 @@ jobs: with: force-delete: true # guarantees .gitattributes are respected in working dir image: ubuntu:22.04 - env-from-files: .github/workflows/dpcpp-sycl-nightly.env .github/workflows/gfx-ubuntu22-internal.env + env-from-files: .github/workflows/dpcpp-sycl-nightly.env level-zero-version: public/1.9.9 - install-gfx-driver: false submodules: true cmd: | export SYCL_BUNDLE_ROOT=$DPCPP_ROOT @@ -57,9 +55,8 @@ jobs: with: force-delete: true # guarantees .gitattributes are respected in working dir image: ubuntu:22.04 - env-from-files: .github/workflows/dpcpp-sycl-public.env .github/workflows/gfx-ubuntu22.env + env-from-files: .github/workflows/dpcpp-sycl-public.env level-zero-version: public/1.9.9 - install-gfx-driver: false submodules: true cmd: | sudo apt remove openmpi-* libopenmpi3 -y @@ -76,9 +73,8 @@ jobs: with: force-delete: true # guarantees .gitattributes are respected in working dir image: opensuse/leap:15.3 - env-from-files: .github/workflows/dpcpp-sycl-nightly.env .github/workflows/gfx-ubuntu22.env + env-from-files: .github/workflows/dpcpp-sycl-nightly.env level-zero-version: public/1.9.9 - install-gfx-driver: false submodules: true cmd: | export SYCL_BUNDLE_ROOT=$DPCPP_ROOT @@ -98,7 +94,6 @@ jobs: with: image: ubuntu:22.04 env-from-files: .github/workflows/dpcpp-sycl-nightly.env .github/workflows/gfx-ubuntu22.env - install-gfx-driver: true submodules: true options: --device=/dev/dri:/dev/dri runs-on: '[ "Linux", "docker", "dg2" ]' @@ -121,7 +116,6 @@ jobs: with: image: ubuntu:22.04 env-from-files: .github/workflows/dpcpp-sycl-nightly.env .github/workflows/gfx-ubuntu22.env - install-gfx-driver: true submodules: true options: --device=/dev/dri:/dev/dri runs-on: '[ "Linux", "docker", "pvc" ]' @@ -144,7 +138,6 @@ jobs: with: image: ubuntu:22.04 env-from-files: .github/workflows/dpcpp-sycl-nightly.env .github/workflows/gfx-ubuntu22-internal.env - install-gfx-driver: true submodules: true options: --device=/dev/dri:/dev/dri runs-on: '[ "Linux", "docker", "dg2" ]' @@ -168,7 +161,6 @@ jobs: # allow-failure: true # gfx-driver-version: gfx-driver-builds/ci/comp_igc/gfx-driver-ci-comp_igc-19476/artifacts/Linux/Ubuntu/22.04/Release # dpcpp-version: intel/2023.0 -# install-gfx-driver: true # submodules: true # image: ubuntu:22.04 # options: --device=/dev/dri:/dev/dri diff --git a/scripts/tests/run_gpu_tests.sh b/scripts/tests/run_gpu_tests.sh index f09e853a0..f6a4bb008 100755 --- a/scripts/tests/run_gpu_tests.sh +++ b/scripts/tests/run_gpu_tests.sh @@ -44,16 +44,23 @@ test_filters="ClippingParallel.planes" test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/15" test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/16" test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/17" +# Artifacts on PVC +test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/21" +test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/22" # Instancing test also makes use of motion blur, which is unsupported test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/24" test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/25" test_filters+=":TestScenesGeometry/FromOsprayTesting.test_scenes/26" # Clipping unsupported test_filters+=":TestScenesClipping/FromOsprayTesting.*" +# Different noise +test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/1" # Multiple volumes, unsupported test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/3" test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/4" test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/5" +# Line artifacts +test_filters+=":TestScenesVolumes/FromOsprayTesting.test_scenes/18" # Multiple volumes unsupported test_filters+=":TestScenesVolumesStrictParams/FromOsprayTesting.*" # Clipping unsupported From b2517884cb739756e95d3845dabd8dfd7805f7cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCnther?= Date: Wed, 14 Jun 2023 18:01:05 +0200 Subject: [PATCH 38/42] When rendering on CPU, let OIDN select the best device (GPU if available) copy buffers if necessary from host to device fix settings: default affinity (on) and HDR mode --- doc/api.md | 13 +- modules/denoiser/DenoiseFrameOp.cpp | 186 ++++++++++++++---- modules/denoiser/DenoiseFrameOp.h | 1 + .../superbuild/dependencies/dep_oidn.cmake | 2 +- .../AVX2/DenoiserOp_DenoiserOp.hdr.md5 | 2 +- .../AVX512SKX/DenoiserOp_DenoiserOp.hdr.md5 | 2 +- 6 files changed, 160 insertions(+), 46 deletions(-) diff --git a/doc/api.md b/doc/api.md index 639053b25..1163b0370 100644 --- a/doc/api.md +++ b/doc/api.md @@ -3054,11 +3054,14 @@ parameters to the values listed in the table below. #### Denoiser -OSPRay comes with a module that adds support for Intel® Open Image Denoise. -This is provided as an optional module as it creates an additional project -dependency at compile time. The module implements a "`denoiser`" -frame operation, which denoises the entire frame before the frame is -completed. +OSPRay comes with a module that adds support for Intel® Open Image +Denoise (OIDN). This is provided as an optional module as it creates an +additional project dependency at compile time. The module implements a +"`denoiser`" frame operation, which denoises the entire frame before the +frame is completed. OIDN will automatically select the fastest device, +using a GPU when available. The device selection be overriden by the +environment valiable `OIDN_DEFAULT_DEVICE`, possible values are `cpu`, +`sycl`, `cuda`, `hip`, or a physical device ID Rendering diff --git a/modules/denoiser/DenoiseFrameOp.cpp b/modules/denoiser/DenoiseFrameOp.cpp index 43776f33d..83da1fe5a 100644 --- a/modules/denoiser/DenoiseFrameOp.cpp +++ b/modules/denoiser/DenoiseFrameOp.cpp @@ -10,63 +10,130 @@ namespace ospray { struct OSPRAY_MODULE_DENOISER_EXPORT LiveDenoiseFrameOp : public LiveFrameOpInterface { - LiveDenoiseFrameOp(FrameBufferView &fbView, OIDNDevice oidnDevice) - : oidnDevice(oidnDevice), + LiveDenoiseFrameOp( + FrameBufferView &fbView, OIDNDevice oidnDevice, const bool sharedMem) + : fbView(fbView), + oidnDevice(oidnDevice), filter(oidnNewFilter(oidnDevice, "RT")), - fbView(fbView) + sharedMem(sharedMem) + { oidnRetainDevice(oidnDevice); - float *fbColor = static_cast(fbView.colorBuffer); - oidnSetSharedFilterImage(filter, - "color", - fbColor, - OIDN_FORMAT_FLOAT3, - fbView.fbDims.x, - fbView.fbDims.y, - 0, - sizeof(float) * 4, - 0); - - if (fbView.normalBuffer) + if (sharedMem) { + float *fbColor = static_cast(fbView.colorBuffer); oidnSetSharedFilterImage(filter, - "normal", - fbView.normalBuffer, + "color", + fbColor, OIDN_FORMAT_FLOAT3, fbView.fbDims.x, fbView.fbDims.y, 0, - 0, + sizeof(float) * 4, 0); - if (fbView.albedoBuffer) oidnSetSharedFilterImage(filter, - "albedo", - fbView.albedoBuffer, + "output", + fbColor, OIDN_FORMAT_FLOAT3, fbView.fbDims.x, fbView.fbDims.y, 0, + sizeof(float) * 4, + 0); + } else { + byteFloatBufferSize = sizeof(float) * fbView.fbDims.product(); + size_t sz = 4 * byteFloatBufferSize; + output = oidnNewBufferWithStorage(oidnDevice, sz, OIDN_STORAGE_DEVICE); + + if (fbView.normalBuffer) { + byteNormalOffset = sz; + sz += 3 * byteFloatBufferSize; + } + if (fbView.albedoBuffer) { + byteAlbedoOffset = sz; + sz += 3 * byteFloatBufferSize; + } + input = oidnNewBufferWithStorage(oidnDevice, sz, OIDN_STORAGE_DEVICE); + + oidnSetFilterImage(filter, + "color", + input, + OIDN_FORMAT_FLOAT3, + fbView.fbDims.x, + fbView.fbDims.y, 0, + sizeof(float) * 4, 0); - oidnSetSharedFilterImage(filter, - "output", - fbColor, - OIDN_FORMAT_FLOAT3, - fbView.fbDims.x, - fbView.fbDims.y, - 0, - sizeof(float) * 4, - 0); + oidnSetFilterImage(filter, + "output", + output, + OIDN_FORMAT_FLOAT3, + fbView.fbDims.x, + fbView.fbDims.y, + 0, + sizeof(float) * 4, + 0); + } + + if (fbView.normalBuffer) { + if (sharedMem) + oidnSetSharedFilterImage(filter, + "normal", + fbView.normalBuffer, + OIDN_FORMAT_FLOAT3, + fbView.fbDims.x, + fbView.fbDims.y, + 0, + 0, + 0); + else + oidnSetFilterImage(filter, + "normal", + input, + OIDN_FORMAT_FLOAT3, + fbView.fbDims.x, + fbView.fbDims.y, + byteNormalOffset, + 0, + 0); + } + + if (fbView.albedoBuffer) { + if (sharedMem) + oidnSetSharedFilterImage(filter, + "albedo", + fbView.albedoBuffer, + OIDN_FORMAT_FLOAT3, + fbView.fbDims.x, + fbView.fbDims.y, + 0, + 0, + 0); + else + oidnSetFilterImage(filter, + "albedo", + input, + OIDN_FORMAT_FLOAT3, + fbView.fbDims.x, + fbView.fbDims.y, + byteAlbedoOffset, + 0, + 0); + } - oidnSetFilterBool(filter, "hdr", false); + oidnSetFilterBool(filter, "hdr", true); oidnCommitFilter(filter); } ~LiveDenoiseFrameOp() override { + if (!sharedMem) { + oidnReleaseBuffer(input); + oidnReleaseBuffer(output); + } oidnReleaseFilter(filter); oidnReleaseDevice(oidnDevice); } @@ -75,9 +142,31 @@ struct OSPRAY_MODULE_DENOISER_EXPORT LiveDenoiseFrameOp { if (waitEvent) oidnExecuteSYCLFilterAsync(filter, nullptr, 0, (sycl::event *)waitEvent); - else + else { +// TODO if (!fbView.originalFB->getSh()->numPixelsRendered) +// return; // skip denoising when no new pixels XXX only works on CPU + + if (!sharedMem) { + oidnWriteBuffer(input, 0, 4 * byteFloatBufferSize, fbView.colorBuffer); + + if (fbView.normalBuffer) + oidnWriteBuffer(input, + byteNormalOffset, + 3 * byteFloatBufferSize, + fbView.normalBuffer); + if (fbView.albedoBuffer) + oidnWriteBuffer(input, + byteAlbedoOffset, + 3 * byteFloatBufferSize, + fbView.albedoBuffer); + } + oidnExecuteFilter(filter); + if (!sharedMem) + oidnReadBuffer(output, 0, 4 * byteFloatBufferSize, fbView.colorBuffer); + } + const char *errorMessage = nullptr; auto error = oidnGetDeviceError(oidnDevice, &errorMessage); @@ -87,10 +176,17 @@ struct OSPRAY_MODULE_DENOISER_EXPORT LiveDenoiseFrameOp } } + FrameBufferView fbView; + OIDNDevice oidnDevice; OIDNFilter filter; - - FrameBufferView fbView; + bool sharedMem; + // needed only without shared mem: + OIDNBuffer input; + OIDNBuffer output; + size_t byteFloatBufferSize; + size_t byteNormalOffset; + size_t byteAlbedoOffset; }; DenoiseFrameOp::DenoiseFrameOp(api::Device &device) @@ -101,10 +197,23 @@ DenoiseFrameOp::DenoiseFrameOp(api::Device &device) if (syclQueuePtr) oidnDevice = oidnNewSYCLDevice(syclQueuePtr, 1); else - oidnDevice = oidnNewDevice(OIDN_DEVICE_TYPE_CPU); + oidnDevice = oidnNewDevice(OIDN_DEVICE_TYPE_DEFAULT); + + const char *errorMessage = nullptr; + auto error = oidnGetDeviceError(oidnDevice, &errorMessage); + + if (error != OIDN_ERROR_NONE) { + throw std::runtime_error( + "Error running OIDN: " + std::string(errorMessage)); + } + + // OIDN has inverted verbose levels vs. OSPRay + oidnSetDeviceInt(oidnDevice, "verbose", OSP_LOG_NONE - device.logLevel); - oidnSetDeviceBool(oidnDevice, "setAffinity", false); oidnCommitDevice(oidnDevice); + + sharedMem = + syclQueuePtr || oidnGetDeviceBool(oidnDevice, "systemMemorySupported"); } DenoiseFrameOp::~DenoiseFrameOp() @@ -120,7 +229,8 @@ std::unique_ptr DenoiseFrameOp::attach( "DenoiseFrameOp must be used with an RGBA32F " "color format framebuffer!"); - return rkcommon::make_unique(fbView, oidnDevice); + return rkcommon::make_unique( + fbView, oidnDevice, sharedMem); } std::string DenoiseFrameOp::toString() const @@ -128,4 +238,4 @@ std::string DenoiseFrameOp::toString() const return "ospray::DenoiseFrameOp"; } -} // namespace ospray \ No newline at end of file +} // namespace ospray diff --git a/modules/denoiser/DenoiseFrameOp.h b/modules/denoiser/DenoiseFrameOp.h index d2629c2a9..a98fd3612 100644 --- a/modules/denoiser/DenoiseFrameOp.h +++ b/modules/denoiser/DenoiseFrameOp.h @@ -24,6 +24,7 @@ struct OSPRAY_MODULE_DENOISER_EXPORT DenoiseFrameOp : public FrameOpInterface private: OIDNDevice oidnDevice; + bool sharedMem{false}; }; } // namespace ospray diff --git a/scripts/superbuild/dependencies/dep_oidn.cmake b/scripts/superbuild/dependencies/dep_oidn.cmake index 75df364a9..5cf984762 100644 --- a/scripts/superbuild/dependencies/dep_oidn.cmake +++ b/scripts/superbuild/dependencies/dep_oidn.cmake @@ -32,7 +32,7 @@ if (BUILD_OIDN_FROM_SOURCE) -DCMAKE_INSTALL_BINDIR=${CMAKE_INSTALL_BINDIR} $<$:-DTBB_ROOT=${TBB_PATH}> $<$:-DISPC_EXECUTABLE=${ISPC_PATH}> - -DCMAKE_BUILD_TYPE=${DEPENDENCIES_BUILD_TYPE} + -DCMAKE_BUILD_TYPE=Release # XXX debug builds are currently broken -DOIDN_APPS=OFF -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DCMAKE_OSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET} diff --git a/test_image_data/baseline/AVX2/DenoiserOp_DenoiserOp.hdr.md5 b/test_image_data/baseline/AVX2/DenoiserOp_DenoiserOp.hdr.md5 index f22a849d9..830c70725 100644 --- a/test_image_data/baseline/AVX2/DenoiserOp_DenoiserOp.hdr.md5 +++ b/test_image_data/baseline/AVX2/DenoiserOp_DenoiserOp.hdr.md5 @@ -1 +1 @@ -34af3898958f056c55d3bfa09c641584 +b9daebc436edf61e05a3ebff9211bee1 diff --git a/test_image_data/baseline/AVX512SKX/DenoiserOp_DenoiserOp.hdr.md5 b/test_image_data/baseline/AVX512SKX/DenoiserOp_DenoiserOp.hdr.md5 index f22a849d9..830c70725 100644 --- a/test_image_data/baseline/AVX512SKX/DenoiserOp_DenoiserOp.hdr.md5 +++ b/test_image_data/baseline/AVX512SKX/DenoiserOp_DenoiserOp.hdr.md5 @@ -1 +1 @@ -34af3898958f056c55d3bfa09c641584 +b9daebc436edf61e05a3ebff9211bee1 From 51719801a4f20cb76e5b4deedeb6bd74dc2c4dfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCnther?= Date: Thu, 15 Jun 2023 14:46:29 +0200 Subject: [PATCH 39/42] Move to Rocky Linux for release binaries --- .github/workflows/ci.linux.yml | 40 ++++++++++--------- .github/workflows/dpcpp-sycl-public.env | 2 +- .github/workflows/release.yml | 10 ++--- scripts/build/kw.sh | 52 ------------------------- scripts/build/linux.sh | 1 - scripts/build/win.ps1 | 1 - scripts/release/linux.sh | 8 ++-- scripts/release/macosx.sh | 2 - scripts/release/win.ps1 | 2 - scripts/superbuild/CMakeLists.txt | 2 +- 10 files changed, 31 insertions(+), 89 deletions(-) delete mode 100755 scripts/build/kw.sh diff --git a/.github/workflows/ci.linux.yml b/.github/workflows/ci.linux.yml index a5372fd3a..07abc6836 100644 --- a/.github/workflows/ci.linux.yml +++ b/.github/workflows/ci.linux.yml @@ -62,7 +62,7 @@ jobs: force-delete: true # guarantees .gitattributes are respected in working dir image: centos:7 cmd: | - scripts/build/linux.sh -DBUILD_ISA_AVX512=OFF # TODO Embree source build incompatible with older GCC for AVX512 + scripts/build/linux.sh -DBUILD_EMBREE_FROM_SOURCE=ON -DBUILD_ISA_AVX512=OFF artifact-out: build-centos7-gcc artifact-path: build/install/ospray build/CMakeCache.txt build/*/build/CMakeCache.txt @@ -108,11 +108,10 @@ jobs: export CXXFLAGS="-fhonor-infinities -fhonor-nans" export CFLAGS=$CXXFLAGS export LDFLAGS="-static-intel" - scripts/build/linux.sh + scripts/build/linux.sh -DBUILD_EMBREE_FROM_SOURCE=ON artifact-out: build-centos7-icx artifact-path: build/install/ospray build/CMakeCache.txt build/*/build/CMakeCache.txt - # TODO: Enable AVX512 testing after Embree4 release build-centos7-mpi-impi: uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main secrets: inherit @@ -122,11 +121,10 @@ jobs: cmd: | module load cmake module load impi/2021.6 - scripts/build/linux.sh -DBUILD_OSPRAY_MODULE_MPI=ON -DBUILD_ISA_AVX512=OFF + scripts/build/linux.sh -DBUILD_EMBREE_FROM_SOURCE=ON -DBUILD_OSPRAY_MODULE_MPI=ON -DBUILD_ISA_AVX512=OFF artifact-out: build-centos7-mpi-impi artifact-path: build/install/ospray build/CMakeCache.txt build/*/build/CMakeCache.txt - # TODO: Enable AVX512 testing after Embree4 release build-centos7-mpi-mpich: uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main secrets: inherit @@ -136,11 +134,10 @@ jobs: cmd: | module load cmake module load mpi/mpich-x86_64 - scripts/build/linux.sh -DBUILD_OSPRAY_MODULE_MPI=ON -DBUILD_ISA_AVX512=OFF + scripts/build/linux.sh -DBUILD_EMBREE_FROM_SOURCE=ON -DBUILD_OSPRAY_MODULE_MPI=ON -DBUILD_ISA_AVX512=OFF artifact-out: build-centos7-mpi-mpich artifact-path: build/install/ospray build/CMakeCache.txt build/*/build/CMakeCache.txt - # TODO: Enable AVX512 testing after Embree4 release build-centos7-mpi-openmpi: uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main secrets: inherit @@ -150,7 +147,7 @@ jobs: cmd: | module load cmake module load mpi/openmpi-x86_64 - scripts/build/linux.sh -DBUILD_OSPRAY_MODULE_MPI=ON -DBUILD_ISA_AVX512=OFF + scripts/build/linux.sh -DBUILD_EMBREE_FROM_SOURCE=ON -DBUILD_OSPRAY_MODULE_MPI=ON -DBUILD_ISA_AVX512=OFF build-centos8: uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main @@ -197,7 +194,6 @@ jobs: artifact-out: build-linux-debug artifact-path: build/install/ospray build/CMakeCache.txt build/*/build/CMakeCache.txt - # TODO: Enable AVX512 testing after Embree4 release build-ubuntu1804-gcc: uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main secrets: inherit @@ -205,7 +201,7 @@ jobs: force-delete: true # guarantees .gitattributes are respected in working dir image: ubuntu:18.04 cmd: | - scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF -DBUILD_ISA_AVX512=OFF + scripts/build/linux.sh -G Ninja -DBUILD_GLFW=OFF artifact-out: build-ubuntu1804-gcc artifact-path: build/install/ospray build/CMakeCache.txt build/*/build/CMakeCache.txt @@ -227,7 +223,6 @@ jobs: uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main with: image: centos:7 - runs-on: '["Linux", "avx2"]' cmd: | LD_LIBRARY_PATH="$GITHUB_WORKSPACE/build/install/ospray/lib:$LD_LIBRARY_PATH" PATH="$GITHUB_WORKSPACE/build/install/ospray/bin:$PATH" scripts/tests/run_tests.sh "$GITHUB_WORKSPACE" AVX2 artifact-in: build-centos7-gcc @@ -256,7 +251,6 @@ jobs: uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main with: image: centos:7 - runs-on: '["Linux", "avx2"]' cmd: | module load cmake module load mpi/mpich-x86_64 @@ -279,6 +273,19 @@ jobs: artifact-path: build_regression_tests/tests*.xml build_regression_tests/failed* artifact-on-failure: true + test-centos7-icx-avx512skx: + needs: build-centos7-icx + uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main + with: + image: centos:7 + runs-on: '["Linux", "avx512"]' + cmd: | + LD_LIBRARY_PATH="$GITHUB_WORKSPACE/build/install/ospray/lib:$LD_LIBRARY_PATH" PATH="$GITHUB_WORKSPACE/build/install/ospray/bin:$PATH" scripts/tests/run_tests.sh "$GITHUB_WORKSPACE" AVX512SKX + artifact-in: build-centos7-icx + artifact-out: test-centos7-icx-avx512skx + artifact-path: build_regression_tests/tests*.xml build_regression_tests/failed* + artifact-on-failure: true + test-centos8-avx512skx: needs: build-centos8 uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main @@ -305,18 +312,14 @@ jobs: cmake --build . artifact-in: build-centos7-gcc - # TODO: Enable AVX512 testing after Embree4 release test-ubuntu1804-gcc-avx512skx: needs: build-ubuntu1804-gcc uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main with: image: ubuntu:18.04 - # runs-on: '["Linux", "avx512"]' - runs-on: '["Linux", "avx2"]' - # cmd: | - # LD_LIBRARY_PATH="$GITHUB_WORKSPACE/build/install/ospray/lib:$LD_LIBRARY_PATH" PATH="$GITHUB_WORKSPACE/build/install/ospray/bin:$PATH" scripts/tests/run_tests.sh "$GITHUB_WORKSPACE" AVX512SKX + runs-on: '["Linux", "avx512"]' cmd: | - LD_LIBRARY_PATH="$GITHUB_WORKSPACE/build/install/ospray/lib:$LD_LIBRARY_PATH" PATH="$GITHUB_WORKSPACE/build/install/ospray/bin:$PATH" scripts/tests/run_tests.sh "$GITHUB_WORKSPACE" AVX2 + LD_LIBRARY_PATH="$GITHUB_WORKSPACE/build/install/ospray/lib:$LD_LIBRARY_PATH" PATH="$GITHUB_WORKSPACE/build/install/ospray/bin:$PATH" scripts/tests/run_tests.sh "$GITHUB_WORKSPACE" AVX512SKX artifact-in: build-ubuntu1804-gcc artifact-out: test-ubuntu1804-gcc-avx512skx artifact-path: build_regression_tests/tests*.xml build_regression_tests/failed* @@ -353,7 +356,6 @@ jobs: uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main with: image: centos:7 - runs-on: '["Linux", "avx2"]' # TODO: Remove it after Embree4 release cmd: scripts/tests/run_tutorials.sh artifact-in: build-centos7-gcc diff --git a/.github/workflows/dpcpp-sycl-public.env b/.github/workflows/dpcpp-sycl-public.env index d6181b175..40914a687 100644 --- a/.github/workflows/dpcpp-sycl-public.env +++ b/.github/workflows/dpcpp-sycl-public.env @@ -1 +1 @@ -DPCPP_VERSION=intel/2023.0 +DPCPP_VERSION=intel-llvm/sycl-nightly/20230304 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 186de8e09..3286f1660 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -39,11 +39,11 @@ jobs: uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main with: force-delete: true # guarantees .gitattributes are respected in working dir - image: centos:7 + image: rockylinux:8.7 cmd: | module load cmake/3.15.2 module load intel/2023.0 - module load mpich/3.2 + module load mpi/mpich-x86_64 export CC=icx export CXX=icpx export CXXFLAGS="-fhonor-infinities -fhonor-nans" @@ -95,7 +95,7 @@ jobs: module load cmake module load intel/2021.2 echo "Configure dependencies" - cmake -L -S scripts/superbuild -B build_deps -DBUILD_DEPENDENCIES_ONLY=ON -DBUILD_EMBREE_FROM_SOURCE=ON -DBUILD_ISA_AVX512=OFF -DBUILD_OIDN=ON -DBUILD_OIDN_FROM_SOURCE=OFF -DBUILD_OSPRAY_MODULE_MPI=ON -DINSTALL_IN_SEPARATE_DIRECTORIES=OFF + cmake -L -S scripts/superbuild -B build_deps -DBUILD_DEPENDENCIES_ONLY=ON -DBUILD_EMBREE_FROM_SOURCE=ON -DBUILD_OIDN=ON -DBUILD_OIDN_FROM_SOURCE=ON -DBUILD_OSPRAY_MODULE_MPI=ON -DINSTALL_IN_SEPARATE_DIRECTORIES=OFF -DBUILD_ISA_AVX512=OFF echo "Build dependencies" cmake --build build_deps INSTALL_DIR=$(pwd)/build_deps/install @@ -139,10 +139,10 @@ jobs: needs: release-linux uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main with: - image: centos:7 + image: rockylinux:8.7 cmd: | module load cmake/3.15.2 - module load mpich/3.2 + module load mpi/mpich-x86_64 tar -xzf build_release/*.gz export PATH=$GITHUB_WORKSPACE/$(ls -d1 ./ospray-*/bin):$PATH scripts/tests/run_tests.sh $GITHUB_WORKSPACE AVX512SKX TEST_MPI diff --git a/scripts/build/kw.sh b/scripts/build/kw.sh deleted file mode 100755 index 3236b8ef6..000000000 --- a/scripts/build/kw.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash -## Copyright 2019 Intel Corporation -## SPDX-License-Identifier: Apache-2.0 - -set -e - -KW_SERVER_PATH=$KW_PATH/server -KW_CLIENT_PATH=$KW_PATH/client - -export KLOCWORK_LTOKEN=/tmp/ltoken -echo "$KW_SERVER_IP;$KW_SERVER_PORT;$KW_USER;$KW_LTOKEN" > $KLOCWORK_LTOKEN - -mkdir -p $CI_PROJECT_DIR/klocwork -log_file=$CI_PROJECT_DIR/klocwork/build.log - - -mkdir build -cd build - -# NOTE(jda) - Some Linux OSs need to have lib/ on LD_LIBRARY_PATH at build time -export LD_LIBRARY_PATH=`pwd`/install/lib:${LD_LIBRARY_PATH} - -cmake --version - -cmake -L \ - -DBUILD_DEPENDENCIES_ONLY=ON \ - -DBUILD_EMBREE_FROM_SOURCE=OFF \ - -DBUILD_OIDN=ON \ - -DBUILD_OIDN_FROM_SOURCE=OFF \ - -DBUILD_OSPRAY_MODULE_MPI=ON \ - -DCMAKE_INSTALL_LIBDIR=lib \ - -DINSTALL_IN_SEPARATE_DIRECTORIES=OFF \ - "$@" ../scripts/superbuild - -cmake --build . - -INSTALL_DIR=`pwd`/install - -mkdir ospray_build -cd ospray_build - -export CMAKE_PREFIX_PATH=${INSTALL_DIR} - -cmake -DISPC_EXECUTABLE=$INSTALL_DIR/bin/ispc -DTBB_ROOT=$INSTALL_DIR -DOSPRAY_MODULE_DENOISER=ON -DOSPRAY_MODULE_BILINEAR_PATCH=ON -DOSPRAY_MODULE_MPI=ON ../.. - -$KW_CLIENT_PATH/bin/kwinject make -j `nproc` | tee -a $log_file -$KW_SERVER_PATH/bin/kwbuildproject --classic --url http://$KW_SERVER_IP:$KW_SERVER_PORT/$KW_PROJECT_NAME --tables-directory $CI_PROJECT_DIR/kw_tables kwinject.out | tee -a $log_file -$KW_SERVER_PATH/bin/kwadmin --url http://$KW_SERVER_IP:$KW_SERVER_PORT/ load --force --name build-$CI_JOB_ID $KW_PROJECT_NAME $CI_PROJECT_DIR/kw_tables | tee -a $log_file - -# Store kw build name for check status later -echo "build-$CI_JOB_ID" > $CI_PROJECT_DIR/klocwork/build_name - diff --git a/scripts/build/linux.sh b/scripts/build/linux.sh index e7366a7f3..62ff7fe6d 100755 --- a/scripts/build/linux.sh +++ b/scripts/build/linux.sh @@ -11,7 +11,6 @@ cmake --version cmake -L \ -D CMAKE_INSTALL_LIBDIR=lib \ - -D BUILD_EMBREE_FROM_SOURCE=ON \ "$@" \ ../scripts/superbuild diff --git a/scripts/build/win.ps1 b/scripts/build/win.ps1 index 27e6f8178..3e3dfca9d 100755 --- a/scripts/build/win.ps1 +++ b/scripts/build/win.ps1 @@ -9,7 +9,6 @@ cmake --version cmake -L ` -G $($args[0]) ` -T $($args[1]) ` - -D BUILD_EMBREE_FROM_SOURCE=OFF ` -D CMAKE_BUILD_TYPE=$($args[2]) ` -D DEPENDENCIES_BUILD_TYPE=$($args[2]) ` -D BUILD_OSPRAY_MODULE_MPI=$($args[3]) ` diff --git a/scripts/release/linux.sh b/scripts/release/linux.sh index 632395428..19cb681d3 100755 --- a/scripts/release/linux.sh +++ b/scripts/release/linux.sh @@ -73,9 +73,7 @@ cmake \ -D BUILD_DEPENDENCIES_ONLY=ON \ -D CMAKE_INSTALL_PREFIX=$DEP_DIR \ -D CMAKE_INSTALL_LIBDIR=lib \ - -D BUILD_EMBREE_FROM_SOURCE=ON \ -D BUILD_OIDN=ON \ - -D BUILD_OIDN_FROM_SOURCE=OFF \ -D BUILD_OSPRAY_MODULE_MPI=ON \ -D INSTALL_IN_SEPARATE_DIRECTORIES=OFF \ ../scripts/superbuild @@ -116,9 +114,9 @@ make -j $THREADS preinstall # verify libs for lib in libospray.so libospray_module_cpu.so libospray_module_mpi_offload.so libospray_module_mpi_distributed_cpu.so ; do echo "checking $lib..." - check_symbols $lib GLIBC 2 17 0 - check_symbols $lib GLIBCXX 3 4 19 - check_symbols $lib CXXABI 1 3 7 + check_symbols $lib GLIBC 2 28 0 + check_symbols $lib GLIBCXX 3 4 22 + check_symbols $lib CXXABI 1 3 11 check_lib_dependency_error $lib libimf.so check_lib_dependency_error $lib libsvml.so done diff --git a/scripts/release/macosx.sh b/scripts/release/macosx.sh index 0fcf701fe..15142c1a9 100755 --- a/scripts/release/macosx.sh +++ b/scripts/release/macosx.sh @@ -45,9 +45,7 @@ cmake \ -D BUILD_DEPENDENCIES_ONLY=ON \ -D CMAKE_INSTALL_PREFIX=$DEP_DIR \ -D CMAKE_INSTALL_LIBDIR=lib \ - -D BUILD_EMBREE_FROM_SOURCE=ON \ -D BUILD_OIDN=ON \ - -D BUILD_OIDN_FROM_SOURCE=OFF \ -D INSTALL_IN_SEPARATE_DIRECTORIES=OFF \ ../scripts/superbuild diff --git a/scripts/release/win.ps1 b/scripts/release/win.ps1 index 2ad35392d..8e411aaa3 100755 --- a/scripts/release/win.ps1 +++ b/scripts/release/win.ps1 @@ -16,10 +16,8 @@ cmake -L ` -D BUILD_DEPENDENCIES_ONLY=ON ` -D CMAKE_INSTALL_PREFIX=$DEP_DIR ` -D CMAKE_INSTALL_LIBDIR=lib ` - -D BUILD_EMBREE_FROM_SOURCE=ON ` -D BUILD_ISA_AVX512=OFF ` -D BUILD_OIDN=ON ` - -D BUILD_OIDN_FROM_SOURCE=OFF ` -D BUILD_OSPRAY_MODULE_MPI=ON ` -D INSTALL_IN_SEPARATE_DIRECTORIES=OFF ` ../scripts/superbuild diff --git a/scripts/superbuild/CMakeLists.txt b/scripts/superbuild/CMakeLists.txt index 8b562901e..0681b4d00 100644 --- a/scripts/superbuild/CMakeLists.txt +++ b/scripts/superbuild/CMakeLists.txt @@ -117,7 +117,7 @@ else() mark_as_advanced(FORCE BUILD_TBB_FROM_SOURCE) endif() -option(BUILD_EMBREE_FROM_SOURCE "Build Embree or use pre-built version?" ON) +option(BUILD_EMBREE_FROM_SOURCE "Build Embree or use pre-built version?" OFF) set(EMBREE_VERSION "4.1.0" CACHE STRING "Which version of Embree to build?") if (EMBREE_VERSION STREQUAL "4.1.0") if (BUILD_EMBREE_FROM_SOURCE) From a356451a9ceffd353ecaf1c1d90063579412de27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCnther?= Date: Thu, 15 Jun 2023 15:36:23 +0200 Subject: [PATCH 40/42] Update TBB and TPPs --- scripts/superbuild/CMakeLists.txt | 12 +- third-party-programs-OIDN.txt | 304 ++++++++++++++++++++++++++++-- third-party-programs-oneDNN.txt | 238 ++++++++++++++++++----- third-party-programs-oneTBB.txt | 179 ++++++++---------- 4 files changed, 557 insertions(+), 176 deletions(-) diff --git a/scripts/superbuild/CMakeLists.txt b/scripts/superbuild/CMakeLists.txt index 0681b4d00..027334242 100644 --- a/scripts/superbuild/CMakeLists.txt +++ b/scripts/superbuild/CMakeLists.txt @@ -95,20 +95,20 @@ endif() option(DOWNLOAD_TBB "Download TBB or use the one found in the system environment?" ON) if (DOWNLOAD_TBB) - set(TBB_VERSION "2021.8.0" CACHE STRING "Which version of TBB to download?") + set(TBB_VERSION "2021.9.0" CACHE STRING "Which version of TBB to download?") mark_as_advanced(CLEAR TBB_VERSION) option(BUILD_TBB_FROM_SOURCE "Build TBB from source or use pre-built version?" OFF) mark_as_advanced(CLEAR BUILD_TBB_FROM_SOURCE) - if (TBB_VERSION STREQUAL "2021.8.0") + if (TBB_VERSION STREQUAL "2021.9.0") if (BUILD_TBB_FROM_SOURCE) - set(TBB_HASH "a99635a411d86315b37c72ab525de59cd47aa5bd765c5544640ab08f971f83fe") + set(TBB_HASH "fcebb93cb9f7e882f62cd351b1c093dbefdcae04b616227dc716b0a5efa9e8ab") else() if (APPLE) - set(TBB_HASH "9d620781d12d36a279bb27f4feb8c3b25d12c133e0d5b8661b867405d2445ee8") + set(TBB_HASH "2892f2a51aa404ba95bf20b6a9d5763bb8b6250aa0dc40a876d72c94f35748f0") elseif (WIN32) - set(TBB_HASH "b9265d4dc5b74e27176c6a6b696882935f605191d014a62c010c9610904e7f65") + set(TBB_HASH "0c4d96edd0469cc93f611f94365ec07f3a0dd529b96d4e6ceac378de63920b1c") else() - set(TBB_HASH "ee410e991bb44ce11437fbf93abedc7c4b1d0cb254e9b91cc2e0ddb0f5375566") + set(TBB_HASH "1e8f4d584c209b1a1d1935e72c0a86c16e65e6d2859cb96736ec6ed72c6123f5") endif() endif() endif() diff --git a/third-party-programs-OIDN.txt b/third-party-programs-OIDN.txt index 79b42ded8..f3d76ae1f 100644 --- a/third-party-programs-OIDN.txt +++ b/third-party-programs-OIDN.txt @@ -8,17 +8,23 @@ limitation, third party license terms, other Intel software license terms, and open source software license terms. These separate license terms govern your use of the third party programs as set forth in the "third-party-programs.txt" or other similarly named text file. - + Third party programs and their corresponding required notices and/or license terms are listed below. -------------------------------------------------------------------------------- 1. Intel(R) oneAPI Deep Neural Network Library (oneDNN) - Copyright 2016 Intel Corporation + Copyright 2016-2023 Intel Corporation Intel(R) oneAPI Threading Building Blocks (oneTBB) - Copyright 2005 Intel Corporation + Copyright 2005-2023 Intel Corporation + + Intel® Embree (snippets) + Copyright Intel Corporation + + Intel® OSPRay (snippets) + Copyright Intel Corporation Apache License Version 2.0, January 2004 @@ -244,23 +250,23 @@ terms are listed below. All contributions by Facebook: Copyright (c) 2016 Facebook Inc. - + All contributions by Google: Copyright (c) 2015 Google Inc. All rights reserved. - + All contributions by Yangqing Jia: Copyright (c) 2015 Yangqing Jia All rights reserved. - + All contributions from Caffe: Copyright(c) 2013, 2014, 2015, the respective contributors All rights reserved. - + All other contributions: Copyright(c) 2015, 2016 the respective contributors All rights reserved. - + Caffe2 uses a copyright model similar to Caffe: each contributor holds copyright over their contributions to Caffe2. The project versioning records all such contribution and copyright details. If a contributor wants to further @@ -270,7 +276,6 @@ terms are listed below. All rights reserved. -The 3-Clause BSD License (customized) Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -302,8 +307,18 @@ POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- 3. pytorch-msssim - Copyright (c) 2019 Vainn - + Copyright (c) 2019 Gongfan Fang + + ROCmSoftwarePlatform/composable_kernel + Copyright (c) 2018- , Advanced Micro Devices, Inc. (Chao Liu, Jing Zhang) + Copyright (c) 2019- , Advanced Micro Devices, Inc. (Letao Qin, Qianfeng Zhang, Liang Huang, Shaojie Wang) + Copyright (c) 2022- , Advanced Micro Devices, Inc. (Anthony Chang, Chunyu Lai, Illia Silin, Adam Osewski, Poyen Chen, Jehandad Khan) + Copyright (c) 2019-2021, Advanced Micro Devices, Inc. (Hanwen Chang) + Copyright (c) 2019-2020, Advanced Micro Devices, Inc. (Tejash Shah) + Copyright (c) 2020 , Advanced Micro Devices, Inc. (Xiaoyan Zhou) + Copyright (c) 2021-2022, Advanced Micro Devices, Inc. (Jianfeng Yan) + Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. + MIT License Permission is hereby granted, free of charge, to any person obtaining a copy @@ -327,7 +342,7 @@ SOFTWARE. -------------------------------------------------------------------------------- 4. Catch2 - Copyright 2021 Two Blue Cubes Ltd. All rights reserved. + Copyright (c) 2021 Two Blue Cubes Ltd. All rights reserved. Boost Software License - Version 1.0 - August 17th, 2003 @@ -355,6 +370,266 @@ DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- +5. CUTLASS + Copyright (c) 2017 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +SPDX-License-Identifier: BSD-3-Clause + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +6. oneAPI Data Parallel C++ Compiler + Copyright Intel Corporation + +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +-------------------------------------------------------------------------------- + The following third party programs have their own third party programs. These additional third party program files are as follows: @@ -364,6 +639,7 @@ additional third party program files are as follows: 2. Intel(R) oneAPI Threading Building Blocks (oneTBB) ./third-party-programs-oneTBB.txt --------------------------------------------------------------------------------- +3. oneAPI DPC++ Compiler + ./third-party-programs-DPCPP.txt -Other names and brands may be claimed as the property of others. +-------------------------------------------------------------------------------- \ No newline at end of file diff --git a/third-party-programs-oneDNN.txt b/third-party-programs-oneDNN.txt index 383060e4d..1b8bbccb3 100644 --- a/third-party-programs-oneDNN.txt +++ b/third-party-programs-oneDNN.txt @@ -12,14 +12,13 @@ govern your use of the third party programs as set forth in in the Third party programs and their corresponding required notices and/or license terms are listed below. -3-clause BSD License -==================== - -XByak (src/cpu/xbyak/) ----------------------- +-------------------------------------------------------------------------------- +1. XByak (src/cpu/xbyak/) Copyright (c) 2007 MITSUNARI Shigeo All rights reserved. +3-Clause BSD License + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -43,7 +42,8 @@ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ + + ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た す場合に限り、再頒布および使用が許可されます。 @@ -64,8 +64,8 @@ THE POSSIBILITY OF SUCH DAMAGE. 損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、 一切責任を負わないものとします。 -gtest (tests/gtests/gtest/) ---------------------------- +-------------------------------------------------------------------------------- +2. Googletest (tests/gtests/gtest/) Copyright 2005, Google Inc. Copyright 2006, Google Inc. Copyright 2007, Google Inc. @@ -73,6 +73,8 @@ Copyright 2008, Google Inc. Copyright 2015, Google Inc. All rights reserved. +3-Clause BSD License + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -99,10 +101,12 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -ittnotify (src/cpu/jit_utils/jitprofiling/) -------------------------------------------- -Copyright (c) 2011, Intel Corporation -All rights reserved. +-------------------------------------------------------------------------------- +3. Instrumentation and Tracing Technology API (src/common/ittnotify/) +Copyright (c) 2011, Intel Corporation. All rights reserved. +Copyright (c) 2005-2014 Intel Corporation. All rights reserved. + +3-Clause BSD License Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -114,7 +118,7 @@ modification, are permitted provided that the following conditions are met: this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -3. Neither the name of the copyright holder nor the names of its +3. Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -129,12 +133,14 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -CMake (cmake/FindOpenCL.cmake, cmake/FindBLAS.cmake) ------------------------------- +-------------------------------------------------------------------------------- +4. CMake (cmake/FindOpenCL.cmake, cmake/FindBLAS.cmake, cmake/FindACL.cmake) CMake - Cross Platform Makefile Generator Copyright 2000-2020 Kitware, Inc. and Contributors All rights reserved. +3-Clause BSD License + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -162,14 +168,113 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -Apache License, Version 2.0 -=========================== +------------------------------------------------------------------------------ + +The following individuals and institutions are among the Contributors: + +* Aaron C. Meadows +* Adriaan de Groot +* Aleksey Avdeev +* Alexander Neundorf +* Alexander Smorkalov +* Alexey Sokolov +* Alex Merry +* Alex Turbov +* Andreas Pakulat +* Andreas Schneider +* André Rigland Brodtkorb +* Axel Huebl, Helmholtz-Zentrum Dresden - Rossendorf +* Benjamin Eikel +* Bjoern Ricks +* Brad Hards +* Christopher Harvey +* Christoph Grüninger +* Clement Creusot +* Daniel Blezek +* Daniel Pfeifer +* Enrico Scholz +* Eran Ifrah +* Esben Mose Hansen, Ange Optimization ApS +* Geoffrey Viola +* Google Inc +* Gregor Jasny +* Helio Chissini de Castro +* Ilya Lavrenov +* Insight Software Consortium +* Jan Woetzel +* Julien Schueller +* Kelly Thompson +* Konstantin Podsvirov +* Laurent Montel +* Mario Bensi +* Martin Gräßlin +* Mathieu Malaterre +* Matthaeus G. Chajdas +* Matthias Kretz +* Matthias Maennich +* Michael Hirsch, Ph.D. +* Michael Stürmer +* Miguel A. Figueroa-Villanueva +* Mike Jackson +* Mike McQuaid +* Nicolas Bock +* Nicolas Despres +* Nikita Krupen'ko +* NVIDIA Corporation +* OpenGamma Ltd. +* Patrick Stotko +* Per Øyvind Karlsen +* Peter Collingbourne +* Petr Gotthard +* Philip Lowman +* Philippe Proulx +* Raffi Enficiaud, Max Planck Society +* Raumfeld +* Roger Leigh +* Rolf Eike Beer +* Roman Donchenko +* Roman Kharitonov +* Ruslan Baratov +* Sebastian Holtermann +* Stephen Kelly +* Sylvain Joubert +* The Qt Company Ltd. +* Thomas Sondergaard +* Tobias Hunger +* Todd Gamblin +* Tristan Carel +* University of Dundee +* Vadim Zhukov +* Will Dicharry + +See version control history for details of individual contributions. + +The above copyright and license notice applies to distributions of +CMake in source and binary form. Third-party software packages supplied +with CMake under compatible licenses provide their own copyright notices +documented in corresponding subdirectories or source files. + +------------------------------------------------------------------------------ + +CMake was initially developed by Kitware with the following sponsorship: + + * National Library of Medicine at the National Institutes of Health + as part of the Insight Segmentation and Registration Toolkit (ITK). + + * US National Labs (Los Alamos, Livermore, Sandia) ASC Parallel + Visualization Initiative. + + * National Alliance for Medical Image Computing (NAMIC) is funded by the + National Institutes of Health through the NIH Roadmap for Medical Research, + Grant U54 EB005149. + + * Kitware, Inc. + +-------------------------------------------------------------------------------- +5. Xbyak_aarch64 (src/cpu/aarch64/xbyak_aarch64/) +Copyright 2019-2020 FUJITSU LIMITED -MathJax (doc/assets/mathjax/) ------------------------------ -Copyright (c) 2009-2018 The MathJax Consortium -Copyright (c) 2015-2017 Martin Hensel -Copyright (c) 2007, Apostolos Syropoulos ( + Copyright (c) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. @@ -144,30 +159,30 @@ ________________________________________________________________________________ TERMS AND CONDITIONS 0. Definitions. - This License refers to version 3 of the GNU General Public License. + "This License" refers to version 3 of the GNU General Public License. - Copyright also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. + "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. - The Program refers to any copyrightable work licensed under this License. Each licensee is addressed as you. Licensees and recipients may be individuals or organizations. + "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. - To modify a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a modified version of the earlier work or a work based on the earlier work. + To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. - A covered work means either the unmodified Program or a work based on the Program. + A "covered work" means either the unmodified Program or a work based on the Program. - To propagate a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. + To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. - To convey a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. + To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. - An interactive user interface displays Appropriate Legal Notices to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. + An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. - The source code for a work means the preferred form of the work for making modifications to it. Object code means any non-source form of a work. + The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. - A Standard Interface means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. + A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. - The System Libraries of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A Major Component, in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. + The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. - The Corresponding Source for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. + The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. @@ -194,10 +209,10 @@ ________________________________________________________________________________ You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. - b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to keep intact all notices. + b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. - A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an aggregate if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. + A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: @@ -209,9 +224,9 @@ ________________________________________________________________________________ e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. - A User Product is either (1) a consumer product, which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, normally used refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. + A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. - Installation Information for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. + "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). @@ -220,7 +235,7 @@ ________________________________________________________________________________ Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. - Additional permissions are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. + "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. @@ -232,7 +247,7 @@ ________________________________________________________________________________ d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. - All other non-permissive additional terms are considered further restrictions within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. + All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. @@ -253,24 +268,24 @@ ________________________________________________________________________________ 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. - An entity transaction is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. + An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. - A contributor is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's contributor version. + A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". - A contributor's essential patent claims are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, control includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. + A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. - In the following three paragraphs, a patent license is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To grant such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. + In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. - If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. Knowingly relying means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. + If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. - A patent license is discriminatory if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. + A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. @@ -283,14 +298,14 @@ ________________________________________________________________________________ 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. - Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License or any later version applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. + Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM AS IS WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. @@ -303,7 +318,7 @@ ________________________________________________________________________________ How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. - To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the copyright line and a pointer to where the full notice is found. + To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) @@ -328,9 +343,9 @@ ________________________________________________________________________________ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. - The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an about box. + The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". - You should also get your employer (if you work as a programmer) or school, if any, to sign a copyright disclaimer for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . + You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . @@ -339,7 +354,7 @@ ________________________________________________________________________________ Version 3.1, 31 March 2009 - Copyright 2009 Free Software Foundation, Inc. + Copyright (c) 2009 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. @@ -368,53 +383,9 @@ ________________________________________________________________________________ _______________________________________________________________________________________________________ -5. Portable Hardware Locality (hwloc): - Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana University Research and Technology Corporation. All rights reserved. - Copyright (c) 2004-2005 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. - Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. - Copyright (c) 2004-2005 The Regents of the University of California. All rights reserved. - Copyright (c) 2009 CNRS - Copyright (c) 2009-2016 Inria. All rights reserved. - Copyright (c) 2009-2015 Universite Bordeaux - Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. - Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. - Copyright (c) 2010 IBM - Copyright (c) 2010 Jirka Hladky - Copyright (c) 2012 Aleksej Saushev, The NetBSD Foundation - Copyright (c) 2012 Blue Brain Project, EPFL. All rights reserved. - Copyright (c) 2013-2014 University of Wisconsin-La Crosse. All rights reserved. - Copyright (c) 2015 Research Organization for Information Science and Technology (RIST). All rights reserved. - Copyright (c) 2015-2016 Intel, Inc. All rights reserved. - See COPYING in top-level directory. - - New BSD License - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -_______________________________________________________________________________________________________ - -6. Doctest +5. Doctest - Copyright (c) 2016-2019 Viktor Kirilov + Copyright (c) 2016-2021 Viktor Kirilov The MIT License (MIT) From 73cf4204f1c93ffac2853c5a4b252b834a90bd1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCnther?= Date: Thu, 15 Jun 2023 15:40:12 +0200 Subject: [PATCH 41/42] Try OIDN for x64 on ARM cross-compile --- .github/workflows/ci.macos.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.macos.yml b/.github/workflows/ci.macos.yml index af10603a8..5e9e114f9 100644 --- a/.github/workflows/ci.macos.yml +++ b/.github/workflows/ci.macos.yml @@ -44,7 +44,7 @@ jobs: force-delete: true # guarantees .gitattributes are respected in working dir runs-on: '["macOS", "arm", "build"]' cmd: | - scripts/build/macosx.sh -DCMAKE_TOOLCHAIN_FILE=../scripts/superbuild/toolchains/macos-rosetta.cmake -DBUILD_TBB_FROM_SOURCE=ON -DBUILD_EMBREE_FROM_SOURCE=ON -DBUILD_ISA_AVX=OFF -DBUILD_ISA_AVX2=OFF -DBUILD_ISA_AVX512=OFF + scripts/build/macosx.sh -DCMAKE_TOOLCHAIN_FILE=../scripts/superbuild/toolchains/macos-rosetta.cmake -DBUILD_TBB_FROM_SOURCE=ON -DBUILD_EMBREE_FROM_SOURCE=ON -DBUILD_OIDN=ON -DBUILD_OIDN_FROM_SOURCE=ON -DBUILD_ISA_AVX=OFF -DBUILD_ISA_AVX2=OFF -DBUILD_ISA_AVX512=OFF artifact-out: build-osx-x64 artifact-path: build/install/ospray build/CMakeCache.txt build/*/build/CMakeCache.txt From aa6534c5e8ddea0f9de2c3d23c16a61ee7bb3aa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCnther?= Date: Thu, 15 Jun 2023 15:37:10 +0200 Subject: [PATCH 42/42] Sync README and CHANGELOG, remove Gitter --- CHANGELOG.md | 10 ++++++++-- README.md | 16 ++++++++-------- doc/Makefile | 4 ++-- doc/gitter.md | 2 -- doc/gitter_badge.svg | 1 - 5 files changed, 18 insertions(+), 15 deletions(-) delete mode 100644 doc/gitter.md delete mode 100644 doc/gitter_badge.svg diff --git a/CHANGELOG.md b/CHANGELOG.md index d300a872c..448569e62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,11 +3,17 @@ Version History ### Changes in v2.12.0: +- Support denoising on the GPU with OIDN 2.0, which is the new minimum + version. The `denoiser` now uses HDR mode - New parameter `maxScatteringEvents` for the `pathtracer` which limits the number of non-specular (i.e., diffuse and glossy) bounces -- Support denoising on the GPU with OIDN 2.0, which is the new minimum - version +- Optimized dynamic load balancing for MPI devices +- Fix crash when using small image resolution and many MPI ranks +- Fix crash in `pathtracer` when `lightSamples > 0` but lights in the + scene +- Fix transparent shadows with too high `minContribution` setting - The new minimum version for ISPC is v1.20.0 +- Release binaries on Linux are built on Rocky 8 ### Changes in v2.11.0: diff --git a/README.md b/README.md index 95d2d70e2..726eaafa5 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ OSPRay ====== -This is release v2.12.0 (devel) of Intel® OSPRay. For changes and new -features see the [changelog](CHANGELOG.md). Visit http://www.ospray.org -for more information. +This is release v2.12.0 of Intel® OSPRay. For changes and new features +see the [changelog](CHANGELOG.md). Visit http://www.ospray.org for more +information. OSPRay Overview =============== @@ -46,9 +46,6 @@ missing features please contact us via email at To receive release announcements simply [“Watch” the OSPRay repository](https://github.com/ospray/OSPRay) on GitHub. -[![Join the chat at -https://gitter.im/ospray/ospray](https://ospray.github.io/images/gitter_badge.svg)](https://gitter.im/ospray/ospray?utm_source=badge&utm_medium=badge&utm_content=badge) - Building and Finding OSPRay =========================== @@ -3086,10 +3083,13 @@ exposure bias to match 18% middle gray. #### Denoiser OSPRay comes with a module that adds support for Intel® Open Image -Denoise. This is provided as an optional module as it creates an +Denoise (OIDN). This is provided as an optional module as it creates an additional project dependency at compile time. The module implements a “`denoiser`” frame operation, which denoises the entire frame before the -frame is completed. +frame is completed. OIDN will automatically select the fastest device, +using a GPU when available. The device selection be overriden by the +environment valiable `OIDN_DEFAULT_DEVICE`, possible values are `cpu`, +`sycl`, `cuda`, `hip`, or a physical device ID Rendering --------- diff --git a/doc/Makefile b/doc/Makefile index 69265b456..9b28df750 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -6,7 +6,7 @@ tmptexfiles := $(addprefix tmp/, $(addsuffix .tex, overview changelog compilatio images_jpg := $(addprefix images/, $(addsuffix .jpg, exampleViewer $(addprefix camera_, perspective architectural stereo orthographic panoramic) $(addprefix material_, OBJ Principled CarPaint Metal Alloy Glass ThinGlass MetallicPaint Luminous) ColoredWindow $(addprefix ospMPIDistribTutorial, Volume Spheres _firstFrame _accumulatedFrame))) images_png := $(addprefix images/, $(addsuffix .png, diffuse_rooms normalmap_frustum tutorial_accumulatedframe tutorial_firstframe ospExamples renderSunSky)) images_fig := spot_light c-gamma_coords quad_light hdri_light -images_svg := gitter_badge structured_spherical_coords vdb_structure +images_svg := structured_spherical_coords vdb_structure images_fig2pdf := $(addprefix tmp/, $(addsuffix .pdf, $(images_fig))) images_svg2pdf := $(addprefix images/, $(addsuffix .pdf, $(images_svg))) @@ -186,7 +186,7 @@ tmp/compilation.md: prerequisites.md ../scripts/superbuild/README.md compilation tmp/tutorials.md: tutorials.md tmp/links_local.md tmp/images_web.md $(PANDOC) $+ --indented-code-classes=sh -t markdown-fenced_code_attributes -o $@ -../README.md: tmp/readme_head.md tmp/overview.md gitter.md tmp/compilation.md tmp/documentation.md tmp/api.md tmp/tutorials.md +../README.md: tmp/readme_head.md tmp/overview.md tmp/compilation.md tmp/documentation.md tmp/api.md tmp/tutorials.md $(PANDOC) --file-scope $+ --markdown-headings=setext -t gfm | $(convert_nbsp_width) > $@ ### pdf diff --git a/doc/gitter.md b/doc/gitter.md deleted file mode 100644 index 48a3fcc9a..000000000 --- a/doc/gitter.md +++ /dev/null @@ -1,2 +0,0 @@ -[![Join the chat at https://gitter.im/ospray/ospray](https://ospray.github.io/images/gitter_badge.svg)](https://gitter.im/ospray/ospray?utm_source=badge&utm_medium=badge&utm_content=badge) - diff --git a/doc/gitter_badge.svg b/doc/gitter_badge.svg deleted file mode 100644 index 7064d7f43..000000000 --- a/doc/gitter_badge.svg +++ /dev/null @@ -1 +0,0 @@ -chatchaton gitteron gitter \ No newline at end of file