From b9a5c389a23b8859246e04936f5e68c606536114 Mon Sep 17 00:00:00 2001 From: Merlin Nimier-David Date: Fri, 30 Sep 2022 15:56:17 +0200 Subject: [PATCH 1/4] Mesh: support naive ray intersection --- include/mitsuba/render/mesh.h | 12 +++++++++++ src/render/mesh.cpp | 39 +++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/include/mitsuba/render/mesh.h b/include/mitsuba/render/mesh.h index 00cc0ad3c..4ab6884ad 100644 --- a/include/mitsuba/render/mesh.h +++ b/include/mitsuba/render/mesh.h @@ -264,6 +264,18 @@ class MI_EXPORT_LIB Mesh : public Shape { MI_DECLARE_RAY_INTERSECT_TRI_PACKET(8) MI_DECLARE_RAY_INTERSECT_TRI_PACKET(16) + template + std::tuple, dr::uint32_array_t, + dr::uint32_array_t> + ray_intersect_preliminary_impl(const Ray3fP &ray, + dr::mask_t active) const; + + template + dr::mask_t ray_test_impl(const Ray3fP &ray, + dr::mask_t active) const; + MI_SHAPE_DEFINE_RAY_INTERSECT_METHODS() + + #if defined(MI_ENABLE_EMBREE) /// Return the Embree version of this shape virtual RTCGeometry embree_geometry(RTCDevice device) override; diff --git a/src/render/mesh.cpp b/src/render/mesh.cpp index 093e9fbe2..f7b4911c0 100644 --- a/src/render/mesh.cpp +++ b/src/render/mesh.cpp @@ -621,6 +621,45 @@ Mesh::barycentric_coordinates(const SurfaceInteraction3f &si, return {w, u, v}; } +MI_VARIANT +template +std::tuple, dr::uint32_array_t, + dr::uint32_array_t> +Mesh::ray_intersect_preliminary_impl( + const Ray3fP &ray, dr::mask_t active) const { + MI_MASK_ARGUMENT(active); + + FloatP t = dr::Infinity; + Point uv = dr::NaN; + dr::uint32_array_t prim_index = (uint32_t) -1; + for (size_t index = 0; index < m_face_count; ++index) { + auto [prim_t, prim_uv] = ray_intersect_triangle_impl(index, ray, active); + dr::mask_t valid = dr::isfinite(prim_t) && (prim_t < t); + dr::masked(t, valid) = prim_t; + dr::masked(uv, valid) = prim_uv; + dr::masked(prim_index, valid) = index; + } + + // Cannot determine the shape index, will be up to the caller. + uint32_t shape_index = (uint32_t) -1; + return { t, uv, shape_index, prim_index }; +} + +MI_VARIANT +template +dr::mask_t +Mesh::ray_test_impl(const Ray3fP &ray, + dr::mask_t active) const { + MI_MASK_ARGUMENT(active); + + dr::mask_t hit = false; + for (size_t index = 0; index < m_face_count; ++index) { + FloatP prim_t = ray_intersect_triangle_impl(index, ray, active).first; + hit |= dr::neq(prim_t, dr::Infinity); + } + + return hit; +} MI_VARIANT typename Mesh::SurfaceInteraction3f Mesh::compute_surface_interaction(const Ray3f &ray, From c06cf69e5f52a5d0e3c00424ec5c8ef7e8caed81 Mon Sep 17 00:00:00 2001 From: Merlin Nimier-David Date: Fri, 30 Sep 2022 15:59:46 +0200 Subject: [PATCH 2/4] Scene: support using naive queries This can be useful to completely bypass OptiX or other heavy acceleration data structures when the scene is trivial (e.g. a single cube). --- include/mitsuba/render/scene.h | 1 + include/mitsuba/render/shape.h | 53 +++++++++-------- src/render/scene.cpp | 105 +++++++++++++++++++++++++++------ 3 files changed, 114 insertions(+), 45 deletions(-) diff --git a/include/mitsuba/render/scene.h b/include/mitsuba/render/scene.h index 86ee4af82..863ebe1e7 100644 --- a/include/mitsuba/render/scene.h +++ b/include/mitsuba/render/scene.h @@ -574,6 +574,7 @@ class MI_EXPORT_LIB Scene : public Object { ScalarFloat m_emitter_pmf; bool m_shapes_grad_enabled; + bool m_use_naive_intersection; }; /// Dummy function which can be called to ensure that the librender shared library is loaded diff --git a/include/mitsuba/render/shape.h b/include/mitsuba/render/shape.h index 83e63f682..42941be04 100644 --- a/include/mitsuba/render/shape.h +++ b/include/mitsuba/render/shape.h @@ -581,56 +581,57 @@ class MI_EXPORT_LIB Shape : public Object { MI_EXTERN_CLASS(Shape) NAMESPACE_END(mitsuba) -#define MI_IMPLEMENT_RAY_INTERSECT_PACKET(N) \ - using typename Base::FloatP##N; \ - using typename Base::UInt32P##N; \ - using typename Base::MaskP##N; \ - using typename Base::Point2fP##N; \ - using typename Base::Point3fP##N; \ - using typename Base::Ray3fP##N; \ - std::tuple \ - ray_intersect_preliminary_packet( \ - const Ray3fP##N &ray, MaskP##N active) const override { \ - (void) ray; (void) active; \ - if constexpr (!dr::is_cuda_v) \ - return ray_intersect_preliminary_impl(ray, active); \ +#define MI_IMPLEMENT_RAY_INTERSECT_PACKET(N) \ + std::tuple \ + ray_intersect_preliminary_packet(const typename Base::Ray3fP##N &ray, \ + typename Base::MaskP##N active) \ + const override { \ + (void) ray; \ + (void) active; \ + if constexpr (!dr::is_cuda_v) \ + return ray_intersect_preliminary_impl( \ + ray, active); \ else \ Throw("ray_intersect_preliminary_packet() CUDA not supported"); \ } \ - MaskP##N ray_test_packet(const Ray3fP##N &ray, MaskP##N active) \ + typename Base::MaskP##N ray_test_packet( \ + const typename Base::Ray3fP##N &ray, typename Base::MaskP##N active) \ const override { \ - (void) ray; (void) active; \ - if constexpr (!dr::is_cuda_v) \ - return ray_test_impl(ray, active); \ + (void) ray; \ + (void) active; \ + if constexpr (!dr::is_cuda_v) \ + return ray_test_impl(ray, active); \ else \ Throw("ray_intersect_preliminary_packet() CUDA not supported"); \ } // Macro to define ray intersection methods given an *_impl() templated implementation -#define MI_SHAPE_DEFINE_RAY_INTERSECT_METHODS() \ +#define MI_SHAPE_DEFINE_RAY_INTERSECT_METHODS() \ PreliminaryIntersection3f ray_intersect_preliminary( \ const Ray3f &ray, Mask active) const override { \ - MI_MASK_ARGUMENT(active); \ - PreliminaryIntersection3f pi = dr::zeros(); \ + MI_MASK_ARGUMENT(active); \ + PreliminaryIntersection3f pi = dr::zeros(); \ std::tie(pi.t, pi.prim_uv, pi.shape_index, pi.prim_index) = \ ray_intersect_preliminary_impl(ray, active); \ pi.shape = this; \ return pi; \ } \ Mask ray_test(const Ray3f &ray, Mask active) const override { \ - MI_MASK_ARGUMENT(active); \ + MI_MASK_ARGUMENT(active); \ return ray_test_impl(ray, active); \ } \ - using typename Base::ScalarRay3f; \ std::tuple \ - ray_intersect_preliminary_scalar(const ScalarRay3f &ray) const override { \ + ray_intersect_preliminary_scalar(const typename Base::ScalarRay3f &ray) \ + const override { \ return ray_intersect_preliminary_impl(ray, true); \ } \ - ScalarMask ray_test_scalar(const ScalarRay3f &ray) const override { \ + ScalarMask ray_test_scalar(const typename Base::ScalarRay3f &ray) \ + const override { \ return ray_test_impl(ray, true); \ } \ - MI_IMPLEMENT_RAY_INTERSECT_PACKET(4) \ - MI_IMPLEMENT_RAY_INTERSECT_PACKET(8) \ + MI_IMPLEMENT_RAY_INTERSECT_PACKET(4) \ + MI_IMPLEMENT_RAY_INTERSECT_PACKET(8) \ MI_IMPLEMENT_RAY_INTERSECT_PACKET(16) // ----------------------------------------------------------------------- diff --git a/src/render/scene.cpp b/src/render/scene.cpp index 7a13f3bdd..6abb2e6bd 100644 --- a/src/render/scene.cpp +++ b/src/render/scene.cpp @@ -61,10 +61,22 @@ MI_VARIANT Scene::Scene(const Properties &props) { for (Sensor *sensor: m_sensors) sensor->set_scene(this); - if constexpr (dr::is_cuda_v) - accel_init_gpu(props); - else - accel_init_cpu(props); + // Decide whether to use acceleration data structures for ray intersections + // TODO: do we even want a heuristic? Could lead to surprising changes in performance for the user. + bool naive_intersection_desirable = m_shapes.size() <= 5; + m_use_naive_intersection = + props.get("use_naive_intersection", naive_intersection_desirable); + if (m_use_naive_intersection) + Log(Info, "The scene will not use acceleration data structures " + "for ray intersections."); + + // Build acceleration data structures if needed + if (!m_use_naive_intersection) { + if constexpr (dr::is_cuda_v) + accel_init_gpu(props); + else + accel_init_cpu(props); + } if (!m_emitters.empty()) { // Inform environment emitters etc. about the scene bounds @@ -84,10 +96,12 @@ MI_VARIANT Scene::Scene(const Properties &props) { } MI_VARIANT Scene::~Scene() { - if constexpr (dr::is_cuda_v) - accel_release_gpu(); - else - accel_release_cpu(); + if (!m_use_naive_intersection) { + if constexpr (dr::is_cuda_v) + accel_release_gpu(); + else + accel_release_cpu(); + } // Trigger deallocation of all instances m_emitters.clear(); @@ -111,6 +125,27 @@ Scene::ray_intersect(const Ray3f &ray, uint32_t ray_flags, Mask MI_MASKED_FUNCTION(ProfilerPhase::RayIntersect, active); DRJIT_MARK_USED(coherent); + if (m_use_naive_intersection) { + // Naive intersection mode: bypass all acceleration data structures, + // test for intersections explicitly against each shape. + PreliminaryIntersection3f pi = dr::zeros(); + + for (size_t shape_index = 0; shape_index < m_shapes.size(); ++shape_index) { + PreliminaryIntersection3f prim_pi = + m_shapes[shape_index]->ray_intersect_preliminary(ray, active); + // TODO: fix masked struct assignment + Mask valid = prim_pi.is_valid() && prim_pi.t < pi.t; + dr::masked(pi.t, valid) = prim_pi.t; + dr::masked(pi.prim_uv, valid) = prim_pi.prim_uv; + dr::masked(pi.prim_index, valid) = prim_pi.prim_index; + dr::masked(pi.shape, valid) = prim_pi.shape; + dr::masked(pi.instance, valid) = prim_pi.instance; + dr::masked(pi.shape_index, valid) = shape_index; + } + return pi.compute_surface_interaction(ray, ray_flags, + active && pi.is_valid()); + } + if constexpr (dr::is_cuda_v) return ray_intersect_gpu(ray, ray_flags, active); else @@ -120,6 +155,28 @@ Scene::ray_intersect(const Ray3f &ray, uint32_t ray_flags, Mask MI_VARIANT typename Scene::PreliminaryIntersection3f Scene::ray_intersect_preliminary(const Ray3f &ray, Mask coherent, Mask active) const { DRJIT_MARK_USED(coherent); + + if (m_use_naive_intersection) { + // Naive intersection mode: bypass all acceleration data structures, + // test for intersections explicitly against each shape. + PreliminaryIntersection3f pi = dr::zeros(); + + for (size_t shape_index = 0; shape_index < m_shapes.size(); ++shape_index) { + PreliminaryIntersection3f prim_pi = + m_shapes[shape_index]->ray_intersect_preliminary(ray, active); + // TODO: fix masked struct assignment + Mask valid = prim_pi.is_valid() && prim_pi.t < pi.t; + dr::masked(pi.t, valid) = prim_pi.t; + dr::masked(pi.prim_uv, valid) = prim_pi.prim_uv; + dr::masked(pi.prim_index, valid) = prim_pi.prim_index; + dr::masked(pi.shape, valid) = prim_pi.shape; + dr::masked(pi.instance, valid) = prim_pi.instance; + dr::masked(pi.shape_index, valid) = shape_index; + } + + return pi; + } + if constexpr (dr::is_cuda_v) return ray_intersect_preliminary_gpu(ray, active); else @@ -131,6 +188,14 @@ Scene::ray_test(const Ray3f &ray, Mask coherent, Mask active) c MI_MASKED_FUNCTION(ProfilerPhase::RayTest, active); DRJIT_MARK_USED(coherent); + if (m_use_naive_intersection) { + Mask hit = false; + for (const auto &shape : m_shapes) { + hit |= shape->ray_test(ray, active); + } + return hit; + } + if constexpr (dr::is_cuda_v) return ray_test_gpu(ray, active); else @@ -299,18 +364,20 @@ MI_VARIANT void Scene::parameters_changed(const std::vectorset_scene(this); // TODO use parameters_changed({"scene"}) - bool accel_is_dirty = false; - for (auto &s : m_shapes) { - accel_is_dirty = s->dirty(); - if (accel_is_dirty) - break; - } + if (!m_use_naive_intersection) { + bool accel_is_dirty = false; + for (auto &s : m_shapes) { + accel_is_dirty = s->dirty(); + if (accel_is_dirty) + break; + } - if (accel_is_dirty) { - if constexpr (dr::is_cuda_v) - accel_parameters_changed_gpu(); - else - accel_parameters_changed_cpu(); + if (accel_is_dirty) { + if constexpr (dr::is_cuda_v) + accel_parameters_changed_gpu(); + else + accel_parameters_changed_cpu(); + } } // Check whether any shape parameters have gradient tracking enabled From dd0c7d4ed5dd799cefe46ef51df22b19e0d62f3a Mon Sep 17 00:00:00 2001 From: Merlin Nimier-David Date: Fri, 30 Sep 2022 16:14:19 +0200 Subject: [PATCH 3/4] Tests for naive ray intersections Mostly adapted from the existing render tests. --- src/render/tests/test_accel_bypass.py | 214 ++++++++++++++++++++++++++ src/render/tests/test_renders.py | 5 +- 2 files changed, 216 insertions(+), 3 deletions(-) create mode 100644 src/render/tests/test_accel_bypass.py diff --git a/src/render/tests/test_accel_bypass.py b/src/render/tests/test_accel_bypass.py new file mode 100644 index 000000000..538b9d928 --- /dev/null +++ b/src/render/tests/test_accel_bypass.py @@ -0,0 +1,214 @@ +import time + +import pytest +import drjit as dr +import mitsuba as mi +import numpy as np + +from mitsuba.scalar_rgb.test.util import fresolver_append_path +from .test_renders import z_test, bitmap_extract, xyz_to_rgb_bmp + + +def make_test_scene(resx, resy, simple=False, integrator=None, **kwargs): + def color_to_dict(color): + if isinstance(color, (float, int)): + return color + return {'type': 'rgb', 'value': color} + def checkerboard(color0, color1=None): + d = { + 'type': 'checkerboard', + 'to_uv': mi.ScalarTransform4f.scale((4, 4, 4)), + } + if color0 is not None: + d['color0'] = color_to_dict(color0) + if color1 is not None: + d['color1'] = color_to_dict(color1) + return d + + scene = dict({ + 'type': 'scene', + 'shape1': { + 'type': 'sphere', + 'to_world': mi.ScalarTransform4f.translate([1.0, 0, 0]), + 'bsdf': { + 'type': 'dielectric', + } + }, + 'shape2': { + 'type': 'cylinder', + 'to_world': ( + mi.ScalarTransform4f.translate([-0.85, 1.5, 0]) + @ mi.ScalarTransform4f.scale((1, 3, 1)) + @ mi.ScalarTransform4f.rotate(axis=(1, 0, 0), angle=90) + ), + 'bsdf': { + 'type': 'roughconductor', + 'alpha': checkerboard(0.1, 0.01), + } + }, + 'shape3': { + 'type': 'disk', + 'to_world': ( + mi.ScalarTransform4f.translate([-2.5, 0, 1]) + @ mi.ScalarTransform4f.rotate(axis=(1, 0, 0), angle=180) + ), + 'bsdf': { + 'type': 'diffuse', + 'reflectance': checkerboard([0, 0, 1]), + } + }, + 'shape4': { + 'type': 'cube', + 'to_world': ( + mi.ScalarTransform4f.translate([2.5, 0, 1]) + @ mi.ScalarTransform4f.rotate(axis=(1, 0, 0), angle=180) + ), + 'bsdf': { + 'type': 'diffuse', + 'reflectance': checkerboard([0, 1, 1]), + } + }, + 'shape5': { + 'type': 'rectangle', + 'to_world': ( + mi.ScalarTransform4f.translate([-1.5, 1.0, -0.5]) + @ mi.ScalarTransform4f.rotate(axis=(1, 0, 0), angle=205) + ), + 'bsdf': { + 'type': 'diffuse', + 'reflectance': checkerboard([1, 1, 0]), + } + }, + 'shape6': { + 'type': 'obj', + 'filename': 'resources/data/scenes/cbox/meshes/cbox_smallbox.obj', + 'to_world': ( + mi.ScalarTransform4f.translate([0, -2.5, -0.5]) + @ mi.ScalarTransform4f.scale((0.01, 0.01, 0.01)) + @ mi.ScalarTransform4f.translate([-150, 0, 0]) + ), + 'bsdf': { + 'type': 'diffuse', + 'reflectance': checkerboard([1, 0, 1]), + } + }, + 'sensor': { + 'type': 'perspective', + 'fov': 60, + 'to_world': mi.ScalarTransform4f.look_at( + origin=[0, 0, -10], + target=[0, 0, 0], + up=[0, 1, 0], + ), + 'film': { + 'type': 'hdrfilm', + 'width': resx, + 'height': resy, + } + }, + 'integrator': { + # Moment integrator to estimate variance for the z-test + 'type': 'moment', + 'sub_integrator': { + 'type': 'path', + 'max_depth': 8, + }, + }, + 'emitter': { + 'type': 'envmap', + 'filename': 'resources/data/common/textures/museum.exr', + } + }, **kwargs) + + if simple: + del scene['shape2'], scene['shape3'], scene['shape4'], scene['shape5'], scene['shape6'] + if integrator is not None: + scene['integrator'] = integrator + + return mi.load_dict(scene) + + +@fresolver_append_path +def test01_bypass_correctness(variants_all_backends_once): + """Rendering with and without acceleration data structures should result in the same images.""" + # Adapted from test_renders.test_render() + significance_level = 0.01 + resx, resy = (103, 51) + + # Compute spp budget + sample_budget = int(1e6) + pixel_count = resx * resy + spp = sample_budget // pixel_count + + results = {} + for bypass in (True, False): + scene = make_test_scene(resx, resy, use_naive_intersection=bypass) + # Render the scene, including a variance estimate + scene.integrator().render(scene, seed=0, spp=spp) + + bmp = scene.sensors()[0].film().bitmap(raw=False) + img, var_img = bitmap_extract(bmp, require_variance=True) + results[bypass] = (img, var_img) + + # Compute Z-test p-value + p_value = z_test(results[0][0], spp, results[1][0], results[1][1]) + + # Apply the Sidak correction term, since we'll be conducting multiple independent + # hypothesis tests. This accounts for the fact that the probability of a failure + # increases quickly when several hypothesis tests are run in sequence. + alpha = 1.0 - (1.0 - significance_level) ** (1.0 / pixel_count) + + success = (p_value > alpha) + if (np.count_nonzero(success) / 3) >= (0.9975 * pixel_count): + print(f'Accepted the null hypothesis (min(p-value) = {np.min(p_value)}, ' + f'significance level = {alpha})') + else: + print(f'Rejected the null hypothesis (min(p-value) = {np.min(p_value)}, ' + f'significance level = {alpha})') + + # Note: images are in the XYZ color space by default + for bypass in results: + xyz_to_rgb_bmp(results[bypass][0]).write( + f'test_{mi.variant()}_{"naive" if bypass else "accel"}.exr') + + assert False, 'Z-test failed' + + +# Useful to investigate performance, but probably not reliable enough +# to run on the CI. +@pytest.mark.skip +@fresolver_append_path +def test02_speed(variants_all_backends_once): + log_level = mi.Thread.thread().logger().log_level() + mi.set_log_level(mi.LogLevel.Warn) + + results = {} + for bypass in (True, False): + results[bypass] = [] + scene = make_test_scene( + 512, 256, simple=True, + integrator={'type': 'direct'}, + use_naive_intersection=bypass) + + for i in range(12): + dr.eval() + dr.sync_thread() + t0 = time.time() + img = mi.render(scene, spp=32) + dr.eval(img) + dr.sync_thread() + + if i >= 2: + elapsed = time.time() - t0 + results[bypass].append(elapsed) + + if i == 0: + mi.Bitmap(img).write(f'test_speed_{mi.variant()}_{bypass}.exr') + + print(f'\n--- {mi.variant()} ---') + for bypass, values in results.items(): + print(f'{"naive" if bypass else "accel"}: {1000 * np.mean(values):.6f} ms') + print(values) + print(f'------------------\n\n') + + mi.set_log_level(log_level) diff --git a/src/render/tests/test_renders.py b/src/render/tests/test_renders.py index b42509e13..2277e98e5 100644 --- a/src/render/tests/test_renders.py +++ b/src/render/tests/test_renders.py @@ -129,10 +129,9 @@ def read_rgb_bmp_to_xyz(fname): def bitmap_extract(bmp, require_variance=True): """Extract different channels from moment integrator AOVs""" - # AVOs from the moment integrator are in XYZ (float32) + # AOVs from the moment integrator are in XYZ (float32) split = bmp.split() if len(split) == 1: - print('hello!') if require_variance: raise RuntimeError( 'Could not extract variance image from bitmap. ' @@ -231,7 +230,7 @@ def test_render(variant, scene_fname, integrator_type, jit_flags_key): print('Accepted the null hypothesis (min(p-value) = %f, significance level = %f)' % (np.min(p_value), alpha)) else: - print('Reject the null hypothesis (min(p-value) = %f, significance level = %f)' % + print('Rejected the null hypothesis (min(p-value) = %f, significance level = %f)' % (np.min(p_value), alpha)) output_dir = join(dirname(scene_fname), 'error_output') From 52d24f60590df3df96b6f5912d0139fc6229ca47 Mon Sep 17 00:00:00 2001 From: Merlin Nimier-David Date: Thu, 6 Oct 2022 13:34:24 +0200 Subject: [PATCH 4/4] WIP AABB custom shape --- include/mitsuba/render/interaction.h | 25 +- src/render/scene.cpp | 23 +- src/render/tests/test_accel_bypass.py | 16 +- src/shapes/CMakeLists.txt | 9 +- src/shapes/aabb.cpp | 421 ++++++++++++++++++++++++++ src/shapes/optix/aabb.cuh | 52 ++++ 6 files changed, 518 insertions(+), 28 deletions(-) create mode 100644 src/shapes/aabb.cpp create mode 100644 src/shapes/optix/aabb.cuh diff --git a/include/mitsuba/render/interaction.h b/include/mitsuba/render/interaction.h index caed651ba..d112bbfba 100644 --- a/include/mitsuba/render/interaction.h +++ b/include/mitsuba/render/interaction.h @@ -534,6 +534,20 @@ MI_DECLARE_ENUM_OPERATORS(RayFlags) // ----------------------------------------------------------------------------- +// TODO: move to DrJit if this is actually needed +namespace detail { + template + struct unmasked { + using type = T; + }; + template + struct unmasked> { + using type = typename T::Unmasked; + }; + template + using unmasked_t = typename unmasked::type; +} + /** * \brief Stores preliminary information related to a ray intersection * @@ -552,13 +566,16 @@ struct PreliminaryIntersection { // ============================================================= using Float = Float_; - using ShapePtr = dr::replace_scalar_t; MI_IMPORT_CORE_TYPES() - using Index = typename CoreAliases::UInt32; - using Ray3f = typename Shape_::Ray3f; - using Spectrum = typename Ray3f::Spectrum; + + using UnmaskedShape = detail::unmasked_t; + using Ray3f = typename UnmaskedShape::Ray3f; + using Spectrum_ = typename Ray3f::Spectrum; + using Spectrum = std::conditional_t, + dr::masked_t, Spectrum_>; + using ShapePtr = dr::replace_scalar_t; //! @} // ============================================================= diff --git a/src/render/scene.cpp b/src/render/scene.cpp index 6abb2e6bd..ff9972fd3 100644 --- a/src/render/scene.cpp +++ b/src/render/scene.cpp @@ -62,10 +62,7 @@ MI_VARIANT Scene::Scene(const Properties &props) { sensor->set_scene(this); // Decide whether to use acceleration data structures for ray intersections - // TODO: do we even want a heuristic? Could lead to surprising changes in performance for the user. - bool naive_intersection_desirable = m_shapes.size() <= 5; - m_use_naive_intersection = - props.get("use_naive_intersection", naive_intersection_desirable); + m_use_naive_intersection = props.get("use_naive_intersection", false); if (m_use_naive_intersection) Log(Info, "The scene will not use acceleration data structures " "for ray intersections."); @@ -133,14 +130,8 @@ Scene::ray_intersect(const Ray3f &ray, uint32_t ray_flags, Mask for (size_t shape_index = 0; shape_index < m_shapes.size(); ++shape_index) { PreliminaryIntersection3f prim_pi = m_shapes[shape_index]->ray_intersect_preliminary(ray, active); - // TODO: fix masked struct assignment Mask valid = prim_pi.is_valid() && prim_pi.t < pi.t; - dr::masked(pi.t, valid) = prim_pi.t; - dr::masked(pi.prim_uv, valid) = prim_pi.prim_uv; - dr::masked(pi.prim_index, valid) = prim_pi.prim_index; - dr::masked(pi.shape, valid) = prim_pi.shape; - dr::masked(pi.instance, valid) = prim_pi.instance; - dr::masked(pi.shape_index, valid) = shape_index; + dr::masked(pi, valid) = prim_pi; } return pi.compute_surface_interaction(ray, ray_flags, active && pi.is_valid()); @@ -164,14 +155,8 @@ Scene::ray_intersect_preliminary(const Ray3f &ray, Mask coheren for (size_t shape_index = 0; shape_index < m_shapes.size(); ++shape_index) { PreliminaryIntersection3f prim_pi = m_shapes[shape_index]->ray_intersect_preliminary(ray, active); - // TODO: fix masked struct assignment - Mask valid = prim_pi.is_valid() && prim_pi.t < pi.t; - dr::masked(pi.t, valid) = prim_pi.t; - dr::masked(pi.prim_uv, valid) = prim_pi.prim_uv; - dr::masked(pi.prim_index, valid) = prim_pi.prim_index; - dr::masked(pi.shape, valid) = prim_pi.shape; - dr::masked(pi.instance, valid) = prim_pi.instance; - dr::masked(pi.shape_index, valid) = shape_index; + Mask valid = prim_pi.is_valid() && (prim_pi.t < pi.t) && (prim_pi.t > 0.f); + dr::masked(pi, valid) = prim_pi; } return pi; diff --git a/src/render/tests/test_accel_bypass.py b/src/render/tests/test_accel_bypass.py index 538b9d928..f92851ff1 100644 --- a/src/render/tests/test_accel_bypass.py +++ b/src/render/tests/test_accel_bypass.py @@ -67,6 +67,9 @@ def checkerboard(color0, color1=None): 'type': 'diffuse', 'reflectance': checkerboard([0, 1, 1]), } + # 'bsdf': { + # 'type': 'dielectric', + # }, }, 'shape5': { 'type': 'rectangle', @@ -129,11 +132,13 @@ def checkerboard(color0, color1=None): @fresolver_append_path +# def test01_bypass_correctness(variant_scalar_rgb): def test01_bypass_correctness(variants_all_backends_once): """Rendering with and without acceleration data structures should result in the same images.""" # Adapted from test_renders.test_render() significance_level = 0.01 - resx, resy = (103, 51) + resx, resy = (207, 101) + # resx, resy = (103, 51) # Compute spp budget sample_budget = int(1e6) @@ -150,6 +155,13 @@ def test01_bypass_correctness(variants_all_backends_once): img, var_img = bitmap_extract(bmp, require_variance=True) results[bypass] = (img, var_img) + # TODO: remove this + # b = mi.Bitmap(img, mi.Bitmap.PixelFormat.XYZ) + # breakpoint() + # xyz_to_rgb_bmp(img).write( + bmp.split()[0][1].write( + f'test_{mi.variant()}_{"naive" if bypass else "accel"}.exr') + # Compute Z-test p-value p_value = z_test(results[0][0], spp, results[1][0], results[1][1]) @@ -176,6 +188,8 @@ def test01_bypass_correctness(variants_all_backends_once): # Useful to investigate performance, but probably not reliable enough # to run on the CI. +# @fresolver_append_path +# def test02_speed(variant_llvm_ad_rgb): @pytest.mark.skip @fresolver_append_path def test02_speed(variants_all_backends_once): diff --git a/src/shapes/CMakeLists.txt b/src/shapes/CMakeLists.txt index 0ca6fcb34..52245f34c 100644 --- a/src/shapes/CMakeLists.txt +++ b/src/shapes/CMakeLists.txt @@ -1,23 +1,24 @@ set(MI_PLUGIN_PREFIX "shapes") +add_plugin(blender blender.cpp) add_plugin(obj obj.cpp) add_plugin(ply ply.cpp) -add_plugin(blender blender.cpp) add_plugin(serialized serialized.cpp) +# add_plugin(aabb aabb.cpp) # TODO +add_plugin(cube cube.cpp) add_plugin(cylinder cylinder.cpp) add_plugin(disk disk.cpp) add_plugin(rectangle rectangle.cpp) add_plugin(sphere sphere.cpp) -add_plugin(cube cube.cpp) -add_plugin(shapegroup shapegroup.cpp) add_plugin(instance instance.cpp) add_plugin(merge merge.cpp) +add_plugin(shapegroup shapegroup.cpp) if (MI_ENABLE_EMBREE) - target_link_libraries(sphere PRIVATE embree) target_link_libraries(instance PRIVATE embree) + target_link_libraries(sphere PRIVATE embree) endif() set(MI_PLUGIN_TARGETS "${MI_PLUGIN_TARGETS}" PARENT_SCOPE) diff --git a/src/shapes/aabb.cpp b/src/shapes/aabb.cpp new file mode 100644 index 000000000..1f7f9c60b --- /dev/null +++ b/src/shapes/aabb.cpp @@ -0,0 +1,421 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(MI_ENABLE_CUDA) + #include "optix/aabb.cuh" +#endif + +NAMESPACE_BEGIN(mitsuba) + +/**! + +.. _shape-aabb: + +Axis-aligned cube (:monosp:`aabb`) +------------------------------------------------- + +This shape plugin describes a simple axis-aligned cube shape. + +TODO: documentation. + */ + +template +class AxisAlignedBox final : public Shape { +public: + MI_IMPORT_BASE(Shape, m_to_world, m_to_object, m_is_instance, initialize, + mark_dirty, get_children_string, parameters_grad_enabled) + MI_IMPORT_TYPES(ShapePtr) + + using typename Base::ScalarSize; + + AxisAlignedBox(const Properties &props) : Base(props) { + /// Are the box's normals pointing inwards? default: no + m_flip_normals = props.get("flip_normals", false); + + update(); + initialize(); + } + + void update() { + // TODO: ensure that there is only translation and scaling in m_to_world + + m_bbox = BoundingBox3f( + m_to_world.scalar() * ScalarPoint3f(0.f), + m_to_world.scalar() * ScalarPoint3f(1.f) + ); + + m_inv_surface_area = dr::rcp(surface_area()); + + dr::make_opaque(m_bbox); + mark_dirty(); + } + + ScalarBoundingBox3f bbox() const override { + return m_bbox.scalar(); + } + + Float surface_area() const override { + return m_bbox.value().surface_area(); + } + + // ============================================================= + //! @{ \name Sampling routines + // ============================================================= + + PositionSample3f sample_position(Float time, const Point2f &sample, + Mask active) const override { + MI_MASK_ARGUMENT(active); + + NotImplementedError("sample_position"); +#if 0 + Point3f local = warp::square_to_uniform_sphere(sample); + + PositionSample3f ps = dr::zeros(); + ps.p = dr::fmadd(local, m_radius.value(), m_center.value()); + ps.n = local; + + if (m_flip_normals) + ps.n = -ps.n; + + ps.time = time; + ps.delta = m_radius.value() == 0.f; + ps.pdf = m_inv_surface_area; + ps.uv = sample; + + return ps; +#endif + } + + Float pdf_position(const PositionSample3f & /*ps*/, Mask active) const override { + MI_MASK_ARGUMENT(active); + return m_inv_surface_area; + } + + DirectionSample3f sample_direction(const Interaction3f &it, const Point2f &sample, + Mask active) const override { + MI_MASK_ARGUMENT(active); + + NotImplementedError("sample_direction"); +#if 0 + DirectionSample3f result = dr::zeros(); + + Vector3f dc_v = m_center.value() - it.p; + Float dc_2 = dr::squared_norm(dc_v); + + Float radius_adj = m_radius.value() * (m_flip_normals ? + (1.f + math::RayEpsilon) : + (1.f - math::RayEpsilon)); + Mask outside_mask = active && dc_2 > dr::sqr(radius_adj); + if (likely(dr::any_or(outside_mask))) { + Float inv_dc = dr::rsqrt(dc_2), + sin_theta_max = m_radius.value() * inv_dc, + sin_theta_max_2 = dr::sqr(sin_theta_max), + inv_sin_theta_max = dr::rcp(sin_theta_max), + cos_theta_max = dr::safe_sqrt(1.f - sin_theta_max_2); + + /* Fall back to a Taylor series expansion for small angles, where + the standard approach suffers from severe cancellation errors */ + Float sin_theta_2 = dr::select(sin_theta_max_2 > 0.00068523f, /* sin^2(1.5 deg) */ + 1.f - dr::sqr(dr::fmadd(cos_theta_max - 1.f, sample.x(), 1.f)), + sin_theta_max_2 * sample.x()), + cos_theta = dr::safe_sqrt(1.f - sin_theta_2); + + // Based on https://www.akalin.com/sampling-visible-sphere + Float cos_alpha = sin_theta_2 * inv_sin_theta_max + + cos_theta * dr::safe_sqrt(dr::fnmadd(sin_theta_2, dr::sqr(inv_sin_theta_max), 1.f)), + sin_alpha = dr::safe_sqrt(dr::fnmadd(cos_alpha, cos_alpha, 1.f)); + + auto [sin_phi, cos_phi] = dr::sincos(sample.y() * (2.f * dr::Pi)); + + Vector3f d = Frame3f(dc_v * -inv_dc).to_world(Vector3f( + cos_phi * sin_alpha, + sin_phi * sin_alpha, + cos_alpha)); + + DirectionSample3f ds = dr::zeros(); + ds.p = dr::fmadd(d, m_radius.value(), m_center.value()); + ds.n = d; + ds.d = ds.p - it.p; + + Float dist2 = dr::squared_norm(ds.d); + ds.dist = dr::sqrt(dist2); + ds.d = ds.d / ds.dist; + ds.pdf = warp::square_to_uniform_cone_pdf(dr::zeros(), cos_theta_max); + dr::masked(ds.pdf, dr::eq(ds.dist, 0.f)) = 0.f; + + dr::masked(result, outside_mask) = ds; + } + + Mask inside_mask = dr::andnot(active, outside_mask); + if (unlikely(dr::any_or(inside_mask))) { + Vector3f d = warp::square_to_uniform_sphere(sample); + DirectionSample3f ds = dr::zeros(); + ds.p = dr::fmadd(d, m_radius.value(), m_center.value()); + ds.n = d; + ds.d = ds.p - it.p; + + Float dist2 = dr::squared_norm(ds.d); + ds.dist = dr::sqrt(dist2); + ds.d = ds.d / ds.dist; + ds.pdf = m_inv_surface_area * dist2 / dr::abs_dot(ds.d, ds.n); + + dr::masked(result, inside_mask) = ds; + } + + result.time = it.time; + result.delta = m_radius.value() == 0.f; + + if (m_flip_normals) + result.n = -result.n; + + return result; + #endif + } + + Float pdf_direction(const Interaction3f &it, const DirectionSample3f &ds, + Mask active) const override { + MI_MASK_ARGUMENT(active); + + NotImplementedError("sample_direction"); +#if 0 + // Sine of the angle of the cone containing the sphere as seen from 'it.p'. + Float sin_alpha = m_radius.value() * dr::rcp(dr::norm(m_center.value() - it.p)), + cos_alpha = dr::safe_sqrt(1.f - sin_alpha * sin_alpha); + + return dr::select(sin_alpha < dr::OneMinusEpsilon, + // Reference point lies outside the sphere + warp::square_to_uniform_cone_pdf(dr::zeros(), cos_alpha), + m_inv_surface_area * dr::sqr(ds.dist) / dr::abs_dot(ds.d, ds.n) + ); +#endif + } + + //! @} + // ============================================================= + + // ============================================================= + //! @{ \name Ray tracing routines + // ============================================================= + + template + std::tuple, dr::uint32_array_t, + dr::uint32_array_t> + ray_intersect_preliminary_impl(const Ray3fP &ray, + dr::mask_t active) const { + MI_MASK_ARGUMENT(active); + using Value = std::conditional_t || + dr::is_diff_v, + dr::float32_array_t, + dr::float64_array_t>; + + const auto &bbox = m_bbox.value(); + auto [hit, mint, maxt] = bbox.ray_intersect(ray); + dr::mask_t starts_outside = mint > 0.f; + Value t = dr::select(starts_outside, mint, maxt); + hit &= active && (t <= ray.maxt) && (t > math::RayEpsilon); + t = dr::select(hit, t, dr::Infinity); + + // TODO: UVs, shape index, instance index? + + return { t, dr::zeros>(), ((uint32_t) -1), 0 }; + } + + template + dr::mask_t ray_test_impl(const Ray3fP &ray, + dr::mask_t active) const { + MI_MASK_ARGUMENT(active); + using Value = + std::conditional_t || dr::is_diff_v, + dr::float32_array_t, + dr::float64_array_t>; + using Mask = dr::mask_t; + + auto [hit, mint, maxt] = m_bbox.value().ray_intersect(ray); + Mask starts_outside = mint > 0.f; + Value t = dr::select(starts_outside, mint, maxt); + return active && hit && (t <= ray.maxt) && + (t > math::RayEpsilon); + } + + MI_SHAPE_DEFINE_RAY_INTERSECT_METHODS() + + SurfaceInteraction3f compute_surface_interaction(const Ray3f &ray, + const PreliminaryIntersection3f &pi, + uint32_t ray_flags, + uint32_t recursion_depth, + Mask active) const override { + // using ShapePtr = dr::replace_scalar_t; + MI_MASK_ARGUMENT(active); + // TODO + SurfaceInteraction3f si = dr::zeros(); + si.t = pi.t; + si.time = ray.time; + si.wavelengths = ray.wavelengths; + si.p = ray(si.t); + + // Normal vector: assuming axis-aligned bbox, figure + // out the normal direction based on the relative position + // of the intersection point to the bbox's center. + const auto &bbox = m_bbox.value(); + Point3f p_local = (si.p - bbox.center()) / bbox.extents(); + // The axis with the largest local coordinate (magnitude) + // is the axis of the normal vector. + Point3f p_local_abs = dr::abs(p_local); + Float vmax = dr::max(p_local_abs); + Normal3f n(dr::eq(p_local_abs.x(), vmax), dr::eq(p_local_abs.y(), vmax), + dr::eq(p_local_abs.z(), vmax)); + Mask hit = pi.is_valid(); + // Normal always points to the outside of the bbox, independently + // of the ray direction. + n = dr::normalize(dr::sign(p_local) * n); + si.n = dr::select(hit, n, -ray.d); + + si.shape = dr::select(hit, dr::opaque(this), dr::zeros()); + si.uv = 0.f; // TODO: proper UVs + si.sh_frame.n = si.n; + if (has_flag(ray_flags, RayFlags::ShadingFrame)) + si.initialize_sh_frame(); + si.wi = dr::select(hit, si.to_local(-ray.d), -ray.d); + return si; + +#if 0 + // Early exit when tracing isn't necessary + if (!m_is_instance && recursion_depth > 0) + return dr::zeros(); + + // Recompute ray intersection to get differentiable t + Float t = pi.t; + if constexpr (dr::is_diff_v) + t = dr::replace_grad(t, ray_intersect_preliminary(ray, active).t); + + // TODO handle RayFlags::FollowShape and RayFlags::DetachShape + + // Fields requirement dependencies + bool need_dn_duv = has_flag(ray_flags, RayFlags::dNSdUV) || + has_flag(ray_flags, RayFlags::dNGdUV); + bool need_dp_duv = has_flag(ray_flags, RayFlags::dPdUV) || need_dn_duv; + bool need_uv = has_flag(ray_flags, RayFlags::UV) || need_dp_duv; + + SurfaceInteraction3f si = dr::zeros(); + si.t = dr::select(active, t, dr::Infinity); + + si.sh_frame.n = dr::normalize(ray(t) - m_center.value()); + + // Re-project onto the sphere to improve accuracy + si.p = dr::fmadd(si.sh_frame.n, m_radius.value(), m_center.value()); + + if (likely(need_uv)) { + Vector3f local = m_to_object.value().transform_affine(si.p); + + Float rd_2 = dr::sqr(local.x()) + dr::sqr(local.y()), + theta = unit_angle_z(local), + phi = dr::atan2(local.y(), local.x()); + + dr::masked(phi, phi < 0.f) += 2.f * dr::Pi; + + si.uv = Point2f(phi * dr::InvTwoPi, theta * dr::InvPi); + if (likely(need_dp_duv)) { + si.dp_du = Vector3f(-local.y(), local.x(), 0.f); + + Float rd = dr::sqrt(rd_2), + inv_rd = dr::rcp(rd), + cos_phi = local.x() * inv_rd, + sin_phi = local.y() * inv_rd; + + si.dp_dv = Vector3f(local.z() * cos_phi, + local.z() * sin_phi, + -rd); + + Mask singularity_mask = active && dr::eq(rd, 0.f); + if (unlikely(dr::any_or(singularity_mask))) + si.dp_dv[singularity_mask] = Vector3f(1.f, 0.f, 0.f); + + si.dp_du = m_to_world.value() * si.dp_du * (2.f * dr::Pi); + si.dp_dv = m_to_world.value() * si.dp_dv * dr::Pi; + } + } + + if (m_flip_normals) + si.sh_frame.n = -si.sh_frame.n; + + si.n = si.sh_frame.n; + + if (need_dn_duv) { + Float inv_radius = + (m_flip_normals ? -1.f : 1.f) * dr::rcp(m_radius.value()); + si.dn_du = si.dp_du * inv_radius; + si.dn_dv = si.dp_dv * inv_radius; + } + + si.shape = this; + si.instance = nullptr; + + if (unlikely(has_flag(ray_flags, RayFlags::BoundaryTest))) + si.boundary_test = dr::abs(dr::dot(si.sh_frame.n, -ray.d)); + + return si; +#endif + } + + //! @} + // ============================================================= + + void traverse(TraversalCallback *callback) override { + callback->put_parameter("to_world", *m_to_world.ptr(), +ParamFlags::NonDifferentiable); + Base::traverse(callback); + } + + void parameters_changed(const std::vector &keys) override { + if (keys.empty() || string::contains(keys, "to_world")) { + // Update the scalar value of the matrix + m_to_world = m_to_world.value(); + update(); + } + Base::parameters_changed(); + } + +#if defined(MI_ENABLE_CUDA) + using Base::m_optix_data_ptr; + + void optix_prepare_geometry() override { + if constexpr (dr::is_cuda_v) { + NotImplementedError("optix_prepare_geometry"); + if (!m_optix_data_ptr) + m_optix_data_ptr = jit_malloc(AllocType::Device, sizeof(OptixAABBData)); + + OptixAABBData data = { bbox() }; + jit_memcpy(JitBackend::CUDA, m_optix_data_ptr, &data, + sizeof(OptixAABBData)); + } + } +#endif + + std::string to_string() const override { + std::ostringstream oss; + oss << "AxisAlignedBox[" << std::endl + << " bbox = " << string::indent(m_bbox, 13) << "," << std::endl + << " to_world = " << string::indent(m_to_world, 13) << "," << std::endl + << " surface_area = " << surface_area() << "," << std::endl + << " " << string::indent(get_children_string()) << std::endl + << "]"; + return oss.str(); + } + + MI_DECLARE_CLASS() +private: + /// Axis-aligned bounding box in world space + field m_bbox; + Float m_inv_surface_area; + bool m_flip_normals; +}; + +MI_IMPLEMENT_CLASS_VARIANT(AxisAlignedBox, Shape) +MI_EXPORT_PLUGIN(AxisAlignedBox, "AxisAlignedBox intersection primitive"); +NAMESPACE_END(mitsuba) diff --git a/src/shapes/optix/aabb.cuh b/src/shapes/optix/aabb.cuh new file mode 100644 index 000000000..32fbfeeca --- /dev/null +++ b/src/shapes/optix/aabb.cuh @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include + +struct OptixAABBData { + optix::BoundingBox3f bbox; +}; + +#ifdef __CUDACC__ + +extern "C" __global__ void __intersection__aabb() { + const OptixHitGroupData *sbt_data = (OptixHitGroupData*) optixGetSbtDataPointer(); + OptixAABBData *aabb = (OptixAABBData *)sbt_data->data; + + // TODO + + // // Ray in instance-space + // Ray3f ray = get_ray(); + + // Vector3f o = ray.o - sphere->center; + // Vector3f d = ray.d; + + // float A = squared_norm(d); + // float B = 2.f * dot(o, d); + // float C = squared_norm(o) - sqr(sphere->radius); + + // float near_t, far_t; + // bool solution_found = solve_quadratic(A, B, C, near_t, far_t); + + // // Sphere doesn't intersect with the segment on the ray + // bool out_bounds = !(near_t <= ray.maxt && far_t >= 0.f); // NaN-aware conditionals + + // // Sphere fully contains the segment of the ray + // bool in_bounds = near_t < 0.f && far_t > ray.maxt; + + // float t = (near_t < 0.f ? far_t: near_t); + + // if (solution_found && !out_bounds && !in_bounds) + // optixReportIntersection(t, OPTIX_HIT_KIND_TRIANGLE_FRONT_FACE); + +} + +extern "C" __global__ void __closesthit__aabb() { + const OptixHitGroupData *sbt_data = (OptixHitGroupData *) optixGetSbtDataPointer(); + + // TODO + // set_preliminary_intersection_to_payload( + // optixGetRayTmax(), Vector2f(), 0, sbt_data->shape_registry_id); +} +#endif