From 9c10e06be0b2f81972c0fd014fa86139f7e07856 Mon Sep 17 00:00:00 2001 From: Jacco Bikker Date: Sun, 10 Nov 2024 11:33:46 +0100 Subject: [PATCH] Remove nanort.h. --- external/nanort.h | 2795 --------------------------------------------- 1 file changed, 2795 deletions(-) delete mode 100644 external/nanort.h diff --git a/external/nanort.h b/external/nanort.h deleted file mode 100644 index 659e5ef..0000000 --- a/external/nanort.h +++ /dev/null @@ -1,2795 +0,0 @@ -// -// NanoRT, single header only modern ray tracing kernel. -// - -// -// Notes : The number of primitives are up to 2G. If you want to render large -// data, please split data into chunks(~ 2G prims) and use NanoSG scene graph -// library(`${nanort}/examples/nanosg`). -// - -/* -The MIT License (MIT) - -Copyright (c) 2015 - 2019 Light Transport Entertainment, Inc. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef NANORT_H_ -#define NANORT_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// compiler macros -// -// NANORT_USE_CPP11_FEATURE : Enable C++11 feature -// NANORT_ENABLE_PARALLEL_BUILD : Enable parallel BVH build. -// NANORT_ENABLE_SERIALIZATION : Enable serialization feature for built BVH. -// -// Parallelized BVH build is supported on C++11 thread version. -// OpenMP version is not fully tested. -// thus turn off if you face a problem when building BVH in parallel. -// #define NANORT_ENABLE_PARALLEL_BUILD - -// Some constants -#define kNANORT_MAX_STACK_DEPTH (512) -#define kNANORT_MIN_PRIMITIVES_FOR_PARALLEL_BUILD (1024 * 8) -#define kNANORT_SHALLOW_DEPTH (4) // will create 2**N subtrees - -#ifdef NANORT_USE_CPP11_FEATURE -// Assume C++11 compiler has thread support. -// In some situation (e.g. embedded system, JIT compilation), thread feature -// may not be available though... -#include -#include -#include - -#define kNANORT_MAX_THREADS (256) - -// Parallel build should work well for C++11 version, thus force enable it. -#ifndef NANORT_ENABLE_PARALLEL_BUILD -#define NANORT_ENABLE_PARALLEL_BUILD -#endif - -#endif - -namespace nanort { - -// RayType -typedef enum { - RAY_TYPE_NONE = 0x0, - RAY_TYPE_PRIMARY = 0x1, - RAY_TYPE_SECONDARY = 0x2, - RAY_TYPE_DIFFUSE = 0x4, - RAY_TYPE_REFLECTION = 0x8, - RAY_TYPE_REFRACTION = 0x10 -} RayType; - -#ifdef __clang__ -#pragma clang diagnostic push -#if __has_warning("-Wzero-as-null-pointer-constant") -#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif - -#if __has_warning("-Wunsafe-buffer-usage") -#pragma clang diagnostic ignored "-Wunsafe-buffer-usage" -#endif -#endif - -// ---------------------------------------------------------------------------- -// Small vector class useful for multi-threaded environment. -// -// stack_container.h -// -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// This allocator can be used with STL containers to provide a stack buffer -// from which to allocate memory and overflows onto the heap. This stack buffer -// would be allocated on the stack and allows us to avoid heap operations in -// some situations. -// -// STL likes to make copies of allocators, so the allocator itself can't hold -// the data. Instead, we make the creator responsible for creating a -// StackAllocator::Source which contains the data. Copying the allocator -// merely copies the pointer to this shared source, so all allocators created -// based on our allocator will share the same stack buffer. -// -// This stack buffer implementation is very simple. The first allocation that -// fits in the stack buffer will use the stack buffer. Any subsequent -// allocations will not use the stack buffer, even if there is unused room. -// This makes it appropriate for array-like containers, but the caller should -// be sure to reserve() in the container up to the stack buffer size. Otherwise -// the container will allocate a small array which will "use up" the stack -// buffer. -template -class StackAllocator : public std::allocator { - public: - typedef T* pointer; - typedef typename std::allocator::size_type size_type; - - // Backing store for the allocator. The container owner is responsible for - // maintaining this for as long as any containers using this allocator are - // live. - struct Source { - Source() : used_stack_buffer_(false) {} - - // Casts the buffer in its right type. - T *stack_buffer() { return reinterpret_cast(stack_buffer_); } - const T *stack_buffer() const { - return reinterpret_cast(stack_buffer_); - } - - // - // IMPORTANT: Take care to ensure that stack_buffer_ is aligned - // since it is used to mimic an array of T. - // Be careful while declaring any unaligned types (like bool) - // before stack_buffer_. - // - - // The buffer itself. It is not of type T because we don't want the - // constructors and destructors to be automatically called. Define a POD - // buffer of the right size instead. - char stack_buffer_[sizeof(T[stack_capacity])]; - - // Set when the stack buffer is used for an allocation. We do not track - // how much of the buffer is used, only that somebody is using it. - bool used_stack_buffer_; - }; - - // Used by containers when they want to refer to an allocator of type U. - template - struct rebind { - typedef StackAllocator other; - }; - - // For the straight up copy c-tor, we can share storage. - StackAllocator(const StackAllocator &rhs) - : source_(rhs.source_) {} - - // ISO C++ requires the following constructor to be defined, - // and std::vector in VC++2008SP1 Release fails with an error - // in the class _Container_base_aux_alloc_real (from ) - // if the constructor does not exist. - // For this constructor, we cannot share storage; there's - // no guarantee that the Source buffer of Ts is large enough - // for Us. - // TODO(Google): If we were fancy pants, perhaps we could share storage - // iff sizeof(T) == sizeof(U). - template - StackAllocator(const StackAllocator &other) - : source_(NULL) { - (void)other; - } - - explicit StackAllocator(Source *source) : source_(source) {} - - // Actually do the allocation. Use the stack buffer if nobody has used it yet - // and the size requested fits. Otherwise, fall through to the standard - // allocator. - pointer allocate(size_type n, void *hint = 0) { - if (source_ != NULL && !source_->used_stack_buffer_ && - n <= stack_capacity) { - source_->used_stack_buffer_ = true; - return source_->stack_buffer(); - } else { -#if __cplusplus >= 201703L - return std::allocator_traits>::allocate(*this, n, hint); -#else - return std::allocator::allocate(n, hint); -#endif - } - } - - // Free: when trying to free the stack buffer, just mark it as free. For - // non-stack-buffer pointers, just fall though to the standard allocator. - void deallocate(pointer p, size_type n) { - if (source_ != NULL && p == source_->stack_buffer()) - source_->used_stack_buffer_ = false; - else - std::allocator::deallocate(p, n); - } - - private: - Source *source_; -}; - -// A wrapper around STL containers that maintains a stack-sized buffer that the -// initial capacity of the vector is based on. Growing the container beyond the -// stack capacity will transparently overflow onto the heap. The container must -// support reserve(). -// -// WATCH OUT: the ContainerType MUST use the proper StackAllocator for this -// type. This object is really intended to be used only internally. You'll want -// to use the wrappers below for different types. -template -class StackContainer { - public: - typedef TContainerType ContainerType; - typedef typename ContainerType::value_type ContainedType; - typedef StackAllocator Allocator; - - // Allocator must be constructed before the container! - StackContainer() : allocator_(&stack_data_), container_(allocator_) { - // Make the container use the stack allocation by reserving our buffer size - // before doing anything else. - container_.reserve(stack_capacity); - } - - // Getters for the actual container. - // - // Danger: any copies of this made using the copy constructor must have - // shorter lifetimes than the source. The copy will share the same allocator - // and therefore the same stack buffer as the original. Use std::copy to - // copy into a "real" container for longer-lived objects. - ContainerType &container() { return container_; } - const ContainerType &container() const { return container_; } - - // Support operator-> to get to the container. This allows nicer syntax like: - // StackContainer<...> foo; - // std::sort(foo->begin(), foo->end()); - ContainerType *operator->() { return &container_; } - const ContainerType *operator->() const { return &container_; } - -#ifdef UNIT_TEST - // Retrieves the stack source so that that unit tests can verify that the - // buffer is being used properly. - const typename Allocator::Source &stack_data() const { return stack_data_; } -#endif - - protected: - typename Allocator::Source stack_data_; - unsigned char pad_[7]; - Allocator allocator_; - ContainerType container_; - - // DISALLOW_EVIL_CONSTRUCTORS(StackContainer); - StackContainer(const StackContainer &); - void operator=(const StackContainer &); -}; - -// StackVector -// -// Example: -// StackVector foo; -// foo->push_back(22); // we have overloaded operator-> -// foo[0] = 10; // as well as operator[] -template -class StackVector - : public StackContainer >, - stack_capacity> { - public: - StackVector() - : StackContainer >, - stack_capacity>() {} - - // We need to put this in STL containers sometimes, which requires a copy - // constructor. We can't call the regular copy constructor because that will - // take the stack buffer from the original. Here, we create an empty object - // and make a stack buffer of its own. - StackVector(const StackVector &other) - : StackContainer >, - stack_capacity>() { - this->container().assign(other->begin(), other->end()); - } - - StackVector &operator=( - const StackVector &other) { - this->container().assign(other->begin(), other->end()); - return *this; - } - - // Vectors are commonly indexed, which isn't very convenient even with - // operator-> (using "->at()" does exception stuff we don't want). - T &operator[](size_t i) { return this->container().operator[](i); } - const T &operator[](size_t i) const { - return this->container().operator[](i); - } -}; - -// ---------------------------------------------------------------------------- - -template -class real3 { - public: - real3() {} - real3(T x) { - v[0] = x; - v[1] = x; - v[2] = x; - } - real3(T xx, T yy, T zz) { - v[0] = xx; - v[1] = yy; - v[2] = zz; - } - explicit real3(const T *p) { - v[0] = p[0]; - v[1] = p[1]; - v[2] = p[2]; - } - - inline T x() const { return v[0]; } - inline T y() const { return v[1]; } - inline T z() const { return v[2]; } - - real3 operator*(T f) const { return real3(x() * f, y() * f, z() * f); } - real3 operator-(const real3 &f2) const { - return real3(x() - f2.x(), y() - f2.y(), z() - f2.z()); - } - real3 operator*(const real3 &f2) const { - return real3(x() * f2.x(), y() * f2.y(), z() * f2.z()); - } - real3 operator+(const real3 &f2) const { - return real3(x() + f2.x(), y() + f2.y(), z() + f2.z()); - } - real3 &operator+=(const real3 &f2) { - v[0] += f2.x(); - v[1] += f2.y(); - v[2] += f2.z(); - return (*this); - } - real3 operator/(const real3 &f2) const { - return real3(x() / f2.x(), y() / f2.y(), z() / f2.z()); - } - real3 operator-() const { return real3(-x(), -y(), -z()); } - T operator[](int i) const { return v[i]; } - T &operator[](int i) { return v[i]; } - - T v[3]; - // T pad; // for alignment (when T = float) -}; - -template -inline real3 operator*(T f, const real3 &v) { - return real3(v.x() * f, v.y() * f, v.z() * f); -} - -template -inline real3 vneg(const real3 &rhs) { - return real3(-rhs.x(), -rhs.y(), -rhs.z()); -} - -template -inline T vlength(const real3 &rhs) { - return std::sqrt(rhs.x() * rhs.x() + rhs.y() * rhs.y() + rhs.z() * rhs.z()); -} - -template -inline real3 vnormalize(const real3 &rhs) { - real3 v = rhs; - T len = vlength(rhs); - if (std::fabs(len) > 1e-30f) { - T inv_len = static_cast(1.0) / len; - v.v[0] *= inv_len; - v.v[1] *= inv_len; - v.v[2] *= inv_len; - } - return v; -} - -template -inline real3 vcross(const real3 a, const real3 b) { - real3 c; - c[0] = a[1] * b[2] - a[2] * b[1]; - c[1] = a[2] * b[0] - a[0] * b[2]; - c[2] = a[0] * b[1] - a[1] * b[0]; - return c; -} - -template -inline T vdot(const real3 a, const real3 b) { - return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; -} - -template -inline real3 vsafe_inverse(const real3 v) { - real3 r; - -#ifdef NANORT_USE_CPP11_FEATURE - - if (std::fabs(v[0]) < 1e-30f) { - r[0] = 1e30f * - std::copysign(static_cast(1), v[0]); - } else { - r[0] = static_cast(1.0) / v[0]; - } - - if (std::fabs(v[1]) < 1e-30f) { - r[1] = 1e30f * - std::copysign(static_cast(1), v[1]); - } else { - r[1] = static_cast(1.0) / v[1]; - } - - if (std::fabs(v[2]) < 1e-30f) { - r[2] = 1e30f * - std::copysign(static_cast(1), v[2]); - } else { - r[2] = static_cast(1.0) / v[2]; - } -#else - - if (std::fabs(v[0]) < 1e-30f) { - T sgn = (v[0] < static_cast(0)) ? static_cast(-1) : static_cast(1); - r[0] = 1e30f * sgn; - } else { - r[0] = static_cast(1.0) / v[0]; - } - - if (std::fabs(v[1]) < 1e-30f) { - T sgn = (v[1] < static_cast(0)) ? static_cast(-1) : static_cast(1); - r[1] = 1e30f * sgn; - } else { - r[1] = static_cast(1.0) / v[1]; - } - - if (std::fabs(v[2]) < 1e-30f) { - T sgn = (v[2] < static_cast(0)) ? static_cast(-1) : static_cast(1); - r[2] = 1e30f * sgn; - } else { - r[2] = static_cast(1.0) / v[2]; - } -#endif - - return r; -} - -template -inline const real *get_vertex_addr(const real *p, const size_t idx, - const size_t stride_bytes) { - return reinterpret_cast( - reinterpret_cast(p) + idx * stride_bytes); -} - -template -class Ray { - public: - Ray() - : min_t(static_cast(0.0)), - max_t(1e30f), - type(RAY_TYPE_NONE) { - org[0] = static_cast(0.0); - org[1] = static_cast(0.0); - org[2] = static_cast(0.0); - dir[0] = static_cast(0.0); - dir[1] = static_cast(0.0); - dir[2] = static_cast(-1.0); - } - - T org[3]; // must set - T dir[3]; // must set - T min_t; // minimum ray hit distance. - T max_t; // maximum ray hit distance. - unsigned int type; // ray type - - // TODO(LTE): Align sizeof(Ray) -}; - -template -class BVHNode { - public: - BVHNode() {} - BVHNode(const BVHNode &rhs) { - bmin[0] = rhs.bmin[0]; - bmin[1] = rhs.bmin[1]; - bmin[2] = rhs.bmin[2]; - flag = rhs.flag; - - bmax[0] = rhs.bmax[0]; - bmax[1] = rhs.bmax[1]; - bmax[2] = rhs.bmax[2]; - axis = rhs.axis; - - data[0] = rhs.data[0]; - data[1] = rhs.data[1]; - } - - BVHNode &operator=(const BVHNode &rhs) { - bmin[0] = rhs.bmin[0]; - bmin[1] = rhs.bmin[1]; - bmin[2] = rhs.bmin[2]; - flag = rhs.flag; - - bmax[0] = rhs.bmax[0]; - bmax[1] = rhs.bmax[1]; - bmax[2] = rhs.bmax[2]; - axis = rhs.axis; - - data[0] = rhs.data[0]; - data[1] = rhs.data[1]; - - return (*this); - } - - ~BVHNode() {} - - T bmin[3]; - T bmax[3]; - - int flag; // 1 = leaf node, 0 = branch node - int axis; - - // leaf - // data[0] = npoints - // data[1] = index - // - // branch - // data[0] = child[0] - // data[1] = child[1] - unsigned int data[2]; -}; - -template -class IntersectComparator { - public: - bool operator()(const H &a, const H &b) const { return a.t < b.t; } -}; - -/// BVH build option. -template -struct BVHBuildOptions { - T cost_t_aabb; - unsigned int min_leaf_primitives; - unsigned int max_tree_depth; - unsigned int bin_size; - unsigned int shallow_depth; - unsigned int min_primitives_for_parallel_build; - - // Cache bounding box computation. - // Requires more memory, but BVHbuild can be faster. - bool cache_bbox; - unsigned char pad[3]; - - // Set default value: Taabb = 0.2 - BVHBuildOptions() - : cost_t_aabb(static_cast(0.2)), - min_leaf_primitives(4), - max_tree_depth(256), - bin_size(64), - shallow_depth(kNANORT_SHALLOW_DEPTH), - min_primitives_for_parallel_build( - kNANORT_MIN_PRIMITIVES_FOR_PARALLEL_BUILD), - cache_bbox(false) {} -}; - -/// BVH build statistics. -class BVHBuildStatistics { - public: - unsigned int max_tree_depth; - unsigned int num_leaf_nodes; - unsigned int num_branch_nodes; - float build_secs; - - // Set default value: Taabb = 0.2 - BVHBuildStatistics() - : max_tree_depth(0), - num_leaf_nodes(0), - num_branch_nodes(0), - build_secs(0.0f) {} -}; - -/// -/// @brief BVH trace option. -/// -class BVHTraceOptions { - public: - // Hit only for face IDs in indexRange. - // This feature is good to mimic something like glDrawArrays() - unsigned int prim_ids_range[2]; - - // Prim ID to skip for avoiding self-intersection - // -1 = no skipping - unsigned int skip_prim_id; - - bool cull_back_face; - unsigned char pad[3]; ///< Padding (not used) - - BVHTraceOptions() { - prim_ids_range[0] = 0; - prim_ids_range[1] = 0x7FFFFFFF; // Up to 2G face IDs. - - skip_prim_id = static_cast(-1); - cull_back_face = false; - } -}; - -/// -/// @brief Bounding box. -/// -template -class BBox { - public: - real3 bmin; - real3 bmax; - - BBox() { - bmin[0] = bmin[1] = bmin[2] = 1e30f; - bmax[0] = bmax[1] = bmax[2] = -1e30f; - } -}; - -/// -/// @brief Hit class for traversing nodes. -/// -/// Stores hit information of node traversal. -/// Node traversal is used for two-level ray tracing(efficient ray traversal of a scene hierarchy) -/// -template -class NodeHit { - public: - NodeHit() - : t_min(1e30f), - t_max(-1e30f), - node_id(static_cast(-1)) {} - - NodeHit(const NodeHit &rhs) { - t_min = rhs.t_min; - t_max = rhs.t_max; - node_id = rhs.node_id; - } - - NodeHit &operator=(const NodeHit &rhs) { - t_min = rhs.t_min; - t_max = rhs.t_max; - node_id = rhs.node_id; - - return (*this); - } - - ~NodeHit() {} - - T t_min; - T t_max; - unsigned int node_id; -}; - -/// -/// @brief Comparator object for NodeHit. -/// -/// Comparator object for finding nearest hit point in node traversal. -/// -template -class NodeHitComparator { - public: - inline bool operator()(const NodeHit &a, const NodeHit &b) { - return a.t_min < b.t_min; - } -}; - -/// -/// @brief Bounding Volume Hierarchy acceleration. -/// -/// BVHAccel is central part of ray tracing(ray traversal). -/// BVHAccel takes an input geometry(primitive) information and build a data structure -/// for efficient ray tracing(`O(log2 N)` in theory, where N is the number of primitive in the scene). -/// -/// @tparam T real value type(float or double). -/// -template -class BVHAccel { - public: - BVHAccel() : pad0_(0) { (void)pad0_; } - ~BVHAccel() {} - - /// - /// Build BVH for input primitives. - /// - /// @tparam Prim Primitive(e.g. Triangle) accessor class. - /// @tparam Pred Predicator(comparator class object for `Prim` class to find nearest hit point) - /// - /// @param[in] num_primitives The number of primitive. - /// @param[in] p Primitive accessor class object. - /// @param[in] pred Predicator object. - /// - /// @return true upon success. - /// - template - bool Build(const unsigned int num_primitives, const Prim &p, const Pred &pred, - const BVHBuildOptions &options = BVHBuildOptions()); - - /// - /// Get statistics of built BVH tree. Valid after `Build()` - /// - /// @return BVH build statistics. - /// - BVHBuildStatistics GetStatistics() const { return stats_; } - -#if defined(NANORT_ENABLE_SERIALIZATION) - /// - /// Dump built BVH to the file. - /// - bool Dump(const char *filename) const; - bool Dump(FILE *fp) const; - - /// - /// Load BVH binary - /// - bool Load(const char *filename); - bool Load(FILE *fp); -#endif - - void Debug(); - - /// - /// @brief Traverse into BVH along ray and find closest hit point & primitive if - /// found - /// - /// @tparam I Intersector class - /// @tparam H Hit class - /// - /// @param[in] ray Input ray - /// @param[in] intersector Intersector object. This object is called for each possible intersection of ray and BVH during traversal. - /// @param[out] isect Intersection point information(filled when closest hit point was found) - /// @param[in] options Traversal options. - /// - /// @return true if the closest hit point found. - /// - template - bool Traverse(const Ray &ray, const I &intersector, H *isect, - const BVHTraceOptions &options = BVHTraceOptions()) const; - -#if 0 - /// Multi-hit ray traversal - /// Returns `max_intersections` frontmost intersections - template - bool MultiHitTraverse(const Ray &ray, - int max_intersections, - const I &intersector, - StackVector *isects, - const BVHTraceOptions &options = BVHTraceOptions()) const; -#endif - - /// - /// List up nodes which intersects along the ray. - /// This function is useful for two-level BVH traversal. - /// See `examples/nanosg` for example. - /// - /// @tparam I Intersection class - /// - /// - /// - template - bool ListNodeIntersections(const Ray &ray, int max_intersections, - const I &intersector, - StackVector, 128> *hits) const; - - const std::vector > &GetNodes() const { return nodes_; } - const std::vector &GetIndices() const { return indices_; } - - /// - /// Returns bounding box of built BVH. - /// - void BoundingBox(T bmin[3], T bmax[3]) const { - if (nodes_.empty()) { - bmin[0] = bmin[1] = bmin[2] = 1e30f; - bmax[0] = bmax[1] = bmax[2] = -1e30f; - } else { - bmin[0] = nodes_[0].bmin[0]; - bmin[1] = nodes_[0].bmin[1]; - bmin[2] = nodes_[0].bmin[2]; - bmax[0] = nodes_[0].bmax[0]; - bmax[1] = nodes_[0].bmax[1]; - bmax[2] = nodes_[0].bmax[2]; - } - } - - bool IsValid() const { return nodes_.size() > 0; } - - private: -#if defined(NANORT_ENABLE_PARALLEL_BUILD) - typedef struct { - unsigned int left_idx; - unsigned int right_idx; - unsigned int offset; - } ShallowNodeInfo; - - // Used only during BVH construction - std::vector shallow_node_infos_; - - /// Builds shallow BVH tree recursively. - template - unsigned int BuildShallowTree(std::vector > *out_nodes, - unsigned int left_idx, unsigned int right_idx, - unsigned int depth, - unsigned int max_shallow_depth, const P &p, - const Pred &pred); -#endif - - /// Builds BVH tree recursively. - template - unsigned int BuildTree(BVHBuildStatistics *out_stat, - std::vector > *out_nodes, - unsigned int left_idx, unsigned int right_idx, - unsigned int depth, const P &p, const Pred &pred); - - template - bool TestLeafNode(const BVHNode &node, const Ray &ray, - const I &intersector) const; - - template - bool TestLeafNodeIntersections( - const BVHNode &node, const Ray &ray, const int max_intersections, - const I &intersector, - std::priority_queue, std::vector >, - NodeHitComparator > *isect_pq) const; - -#if 0 - template - bool MultiHitTestLeafNode(std::priority_queue, Comp> *isect_pq, - int max_intersections, - const BVHNode &node, const Ray &ray, - const I &intersector) const; -#endif - - std::vector > nodes_; - std::vector indices_; // max 4G triangles. - std::vector > bboxes_; - BVHBuildOptions options_; - BVHBuildStatistics stats_; - unsigned int pad0_; -}; - -// Predefined SAH predicator for triangle. -template -class TriangleSAHPred { - public: - TriangleSAHPred( - const T *vertices, const unsigned int *faces, - size_t vertex_stride_bytes) // e.g. 12 for sizeof(float) * XYZ - : axis_(0), - pos_(static_cast(0.0)), - vertices_(vertices), - faces_(faces), - vertex_stride_bytes_(vertex_stride_bytes) {} - - TriangleSAHPred(const TriangleSAHPred &rhs) - : axis_(rhs.axis_), - pos_(rhs.pos_), - vertices_(rhs.vertices_), - faces_(rhs.faces_), - vertex_stride_bytes_(rhs.vertex_stride_bytes_) {} - - void Set(int axis, T pos) const { - axis_ = axis; - pos_ = pos; - } - - bool operator()(unsigned int i) const { - int axis = axis_; - T pos = pos_; - - unsigned int i0 = faces_[3 * i + 0]; - unsigned int i1 = faces_[3 * i + 1]; - unsigned int i2 = faces_[3 * i + 2]; - - real3 p0(get_vertex_addr(vertices_, i0, vertex_stride_bytes_)); - real3 p1(get_vertex_addr(vertices_, i1, vertex_stride_bytes_)); - real3 p2(get_vertex_addr(vertices_, i2, vertex_stride_bytes_)); - - T center = p0[axis] + p1[axis] + p2[axis]; - return (center < pos * static_cast(3.0)); - } - - private: - mutable int axis_; - mutable T pos_; - const T *vertices_; - const unsigned int *faces_; - const size_t vertex_stride_bytes_; -}; - -// Predefined Triangle mesh geometry. -template -class TriangleMesh { - public: - TriangleMesh( - const T *vertices, const unsigned int *faces, - const size_t vertex_stride_bytes) // e.g. 12 for sizeof(float) * XYZ - : vertices_(vertices), - faces_(faces), - vertex_stride_bytes_(vertex_stride_bytes) {} - - /// Compute bounding box for `prim_index`th triangle. - /// This function is called for each primitive in BVH build. - void BoundingBox(real3 *bmin, real3 *bmax, - unsigned int prim_index) const { - unsigned vertex = faces_[3 * prim_index + 0]; - - (*bmin)[0] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[0]; - (*bmin)[1] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[1]; - (*bmin)[2] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[2]; - (*bmax)[0] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[0]; - (*bmax)[1] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[1]; - (*bmax)[2] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[2]; - - // remaining two vertices of the primitive - for (unsigned int i = 1; i < 3; i++) { - // xyz - for (int k = 0; k < 3; k++) { - T coord = get_vertex_addr(vertices_, faces_[3 * prim_index + i], - vertex_stride_bytes_)[k]; - - (*bmin)[k] = min((*bmin)[k], coord); - (*bmax)[k] = max((*bmax)[k], coord); - } - } - } - - void BoundingBoxAndCenter(real3* bmin, real3* bmax, real3* center, unsigned int prim_index) const { - unsigned int i0 = faces_[3 * prim_index + 0]; - unsigned int i1 = faces_[3 * prim_index + 1]; - unsigned int i2 = faces_[3 * prim_index + 2]; - - real3 p0(get_vertex_addr(vertices_, i0, vertex_stride_bytes_)); - real3 p1(get_vertex_addr(vertices_, i1, vertex_stride_bytes_)); - real3 p2(get_vertex_addr(vertices_, i2, vertex_stride_bytes_)); - for (int k = 0; k < 3; ++k) { - (*bmin)[k] = min(p0[k], min(p1[k], p2[k])); - (*bmax)[k] = max(p0[k], max(p1[k], p2[k])); - } - *center = (p0 + p1 + p2) * (T(1) / T(3)); - } - - const T *vertices_; - const unsigned int *faces_; - const size_t vertex_stride_bytes_; - - // - // Accessors - // - const T *GetVertices() const { - return vertices_; - } - - const unsigned int *GetFaces() const { - return faces_; - } - - size_t GetVertexStrideBytes() const { - return vertex_stride_bytes_; - } -}; - -/// -/// Stores intersection point information for triangle geometry. -/// -template -class TriangleIntersection { - public: - T u; - T v; - - // Required member variables. - T t; - unsigned int prim_id; -}; - -/// -/// Intersector is a template class which implements intersection method and stores -/// intesection point information(`H`) -/// -/// @tparam T Precision(float or double) -/// @tparam H Intersection point information struct -/// -template > -class TriangleIntersector { - public: - - // Initialize from mesh object. - // M: mesh class - template - TriangleIntersector(const M &m) - : vertices_(m.GetVertices()), - faces_(m.GetFaces()), - vertex_stride_bytes_(m.GetVertexStrideBytes()) {} - - template - TriangleIntersector(const M *m) - : vertices_(m->GetVertices()), - faces_(m->GetFaces()), - vertex_stride_bytes_(m->GetVertexStrideBytes()) {} - - TriangleIntersector(const T *vertices, const unsigned int *faces, - const size_t vertex_stride_bytes) // e.g. - // vertex_stride_bytes - // = 12 = sizeof(float) - // * 3 - : vertices_(vertices), - faces_(faces), - vertex_stride_bytes_(vertex_stride_bytes) {} - - // For Watertight Ray/Triangle Intersection. - typedef struct { - T Sx; - T Sy; - T Sz; - int kx; - int ky; - int kz; - } RayCoeff; - - /// Do ray intersection stuff for `prim_index` th primitive and return hit - /// distance `t`, barycentric coordinate `u` and `v`. - /// Returns true if there's intersection. - bool Intersect(T *t_inout, const unsigned int prim_index) const { - if ((prim_index < trace_options_.prim_ids_range[0]) || - (prim_index >= trace_options_.prim_ids_range[1])) { - return false; - } - - // Self-intersection test. - if (prim_index == trace_options_.skip_prim_id) { - return false; - } - - const unsigned int f0 = faces_[3 * prim_index + 0]; - const unsigned int f1 = faces_[3 * prim_index + 1]; - const unsigned int f2 = faces_[3 * prim_index + 2]; - - const real3 p0(get_vertex_addr(vertices_, f0 + 0, vertex_stride_bytes_)); - const real3 p1(get_vertex_addr(vertices_, f1 + 0, vertex_stride_bytes_)); - const real3 p2(get_vertex_addr(vertices_, f2 + 0, vertex_stride_bytes_)); - - const real3 A = p0 - ray_org_; - const real3 B = p1 - ray_org_; - const real3 C = p2 - ray_org_; - - const T Ax = A[ray_coeff_.kx] - ray_coeff_.Sx * A[ray_coeff_.kz]; - const T Ay = A[ray_coeff_.ky] - ray_coeff_.Sy * A[ray_coeff_.kz]; - const T Bx = B[ray_coeff_.kx] - ray_coeff_.Sx * B[ray_coeff_.kz]; - const T By = B[ray_coeff_.ky] - ray_coeff_.Sy * B[ray_coeff_.kz]; - const T Cx = C[ray_coeff_.kx] - ray_coeff_.Sx * C[ray_coeff_.kz]; - const T Cy = C[ray_coeff_.ky] - ray_coeff_.Sy * C[ray_coeff_.kz]; - - T U = Cx * By - Cy * Bx; - T V = Ax * Cy - Ay * Cx; - T W = Bx * Ay - By * Ax; - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" -#endif - - // Fall back to test against edges using double precision. - if (U == static_cast(0.0) || V == static_cast(0.0) || - W == static_cast(0.0)) { - double CxBy = static_cast(Cx) * static_cast(By); - double CyBx = static_cast(Cy) * static_cast(Bx); - U = static_cast(CxBy - CyBx); - - double AxCy = static_cast(Ax) * static_cast(Cy); - double AyCx = static_cast(Ay) * static_cast(Cx); - V = static_cast(AxCy - AyCx); - - double BxAy = static_cast(Bx) * static_cast(Ay); - double ByAx = static_cast(By) * static_cast(Ax); - W = static_cast(BxAy - ByAx); - } - - if (U < static_cast(0.0) || V < static_cast(0.0) || - W < static_cast(0.0)) { - if (trace_options_.cull_back_face || - (U > static_cast(0.0) || V > static_cast(0.0) || - W > static_cast(0.0))) { - return false; - } - } - - T det = U + V + W; - if (det == static_cast(0.0)) return false; - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - - const T Az = ray_coeff_.Sz * A[ray_coeff_.kz]; - const T Bz = ray_coeff_.Sz * B[ray_coeff_.kz]; - const T Cz = ray_coeff_.Sz * C[ray_coeff_.kz]; - const T D = U * Az + V * Bz + W * Cz; - - const T rcpDet = static_cast(1.0) / det; - T tt = D * rcpDet; - - if (tt > (*t_inout)) { - return false; - } - - if (tt < t_min_) { - return false; - } - - (*t_inout) = tt; - // Use Möller-Trumbore style barycentric coordinates - // U + V + W = 1.0 and interp(p) = U * p0 + V * p1 + W * p2 - // We want interp(p) = (1 - u - v) * p0 + u * v1 + v * p2; - // => u = V, v = W. - u_ = V * rcpDet; - v_ = W * rcpDet; - - return true; - } - - /// Returns the nearest hit distance. - T GetT() const { return t_; } - - /// Update is called when initializing intersection and nearest hit is found. - void Update(T t, unsigned int prim_idx) const { - t_ = t; - prim_id_ = prim_idx; - } - - /// Prepare BVH traversal (e.g. compute inverse ray direction) - /// This function is called only once in BVH traversal. - void PrepareTraversal(const Ray &ray, - const BVHTraceOptions &trace_options) const { - ray_org_[0] = ray.org[0]; - ray_org_[1] = ray.org[1]; - ray_org_[2] = ray.org[2]; - - // Calculate dimension where the ray direction is maximal. - ray_coeff_.kz = 0; - T absDir = std::fabs(ray.dir[0]); - if (absDir < std::fabs(ray.dir[1])) { - ray_coeff_.kz = 1; - absDir = std::fabs(ray.dir[1]); - } - if (absDir < std::fabs(ray.dir[2])) { - ray_coeff_.kz = 2; - absDir = std::fabs(ray.dir[2]); - } - - ray_coeff_.kx = ray_coeff_.kz + 1; - if (ray_coeff_.kx == 3) ray_coeff_.kx = 0; - ray_coeff_.ky = ray_coeff_.kx + 1; - if (ray_coeff_.ky == 3) ray_coeff_.ky = 0; - - // Swap kx and ky dimension to preserve winding direction of triangles. - if (ray.dir[ray_coeff_.kz] < static_cast(0.0)) - std::swap(ray_coeff_.kx, ray_coeff_.ky); - - // Calculate shear constants. - ray_coeff_.Sx = ray.dir[ray_coeff_.kx] / ray.dir[ray_coeff_.kz]; - ray_coeff_.Sy = ray.dir[ray_coeff_.ky] / ray.dir[ray_coeff_.kz]; - ray_coeff_.Sz = static_cast(1.0) / ray.dir[ray_coeff_.kz]; - - trace_options_ = trace_options; - - t_min_ = ray.min_t; - - u_ = static_cast(0.0); - v_ = static_cast(0.0); - } - - /// Post BVH traversal stuff. - /// Fill `isect` if there is a hit. - void PostTraversal(const Ray &ray, bool hit, H *isect) const { - if (hit && isect) { - (*isect).t = t_; - (*isect).u = u_; - (*isect).v = v_; - (*isect).prim_id = prim_id_; - } - (void)ray; - } - - private: - const T *vertices_; - const unsigned int *faces_; - const size_t vertex_stride_bytes_; - - mutable real3 ray_org_; - mutable RayCoeff ray_coeff_; - mutable BVHTraceOptions trace_options_; - mutable T t_min_; - - mutable T t_; - mutable T u_; - mutable T v_; - mutable unsigned int prim_id_; -}; - -// -// Robust BVH Ray Traversal : http://jcgt.org/published/0002/02/02/paper.pdf -// - -// NaN-safe min and max function. -template -const T &safemin(const T &a, const T &b) { - return (a < b) ? a : b; -} -template -const T &safemax(const T &a, const T &b) { - return (a > b) ? a : b; -} - -// -// SAH functions -// -template -struct Bin { - BBox bbox; - size_t count; - T cost; - - Bin() - : count(0), cost(0) - { - // Note: bbox is initialized to be empty - } -}; - -template -struct BinBuffer { - explicit BinBuffer(unsigned int size) { - bin_size = size; - bin.resize(3 * size); - clear(); - } - - void clear() { - std::fill(bin.begin(), bin.end(), Bin()); - } - - std::vector > bin; - unsigned int bin_size; - unsigned int pad0; -}; - -template -inline T CalculateSurfaceArea(const real3 &min, const real3 &max) { - real3 box = max - min; - return static_cast(2.0) * - (box[0] * box[1] + box[1] * box[2] + box[2] * box[0]); -} - -template -inline void GetBoundingBoxOfTriangle(real3 *bmin, real3 *bmax, - const T *vertices, - const unsigned int *faces, - unsigned int index) { - unsigned int f0 = faces[3 * index + 0]; - unsigned int f1 = faces[3 * index + 1]; - unsigned int f2 = faces[3 * index + 2]; - - real3 p[3]; - - p[0] = real3(&vertices[3 * f0]); - p[1] = real3(&vertices[3 * f1]); - p[2] = real3(&vertices[3 * f2]); - - (*bmin) = p[0]; - (*bmax) = p[0]; - - for (int i = 1; i < 3; i++) { - (*bmin)[0] = min((*bmin)[0], p[i][0]); - (*bmin)[1] = min((*bmin)[1], p[i][1]); - (*bmin)[2] = min((*bmin)[2], p[i][2]); - - (*bmax)[0] = max((*bmax)[0], p[i][0]); - (*bmax)[1] = max((*bmax)[1], p[i][1]); - (*bmax)[2] = max((*bmax)[2], p[i][2]); - } -} - -template -inline void ContributeBinBuffer(BinBuffer *bins, // [out] - const real3 &scene_min, - const real3 &scene_max, - unsigned int *indices, unsigned int left_idx, - unsigned int right_idx, const P &p) { - T bin_size = static_cast(bins->bin_size); - - // Calculate extent - real3 scene_size, scene_inv_size; - scene_size = scene_max - scene_min; - - for (int i = 0; i < 3; ++i) { - assert(scene_size[i] >= static_cast(0.0)); - - if (scene_size[i] > static_cast(0.0)) { - scene_inv_size[i] = bin_size / scene_size[i]; - } else { - scene_inv_size[i] = static_cast(0.0); - } - } - - bins->clear(); - - for (size_t i = left_idx; i < right_idx; i++) { - // - // Quantize the center position into [0, BIN_SIZE) - // - // q[i] = (int)(p[i] - scene_bmin) / scene_size - // - - real3 bmin, bmax, center; - p.BoundingBoxAndCenter(&bmin, &bmax, ¢er, indices[i]); - real3 quantized_center = (center - scene_min) * scene_inv_size; - - for (int j = 0; j < 3; ++j) { - // idx is now in [0, BIN_SIZE) - unsigned idx = min(bins->bin_size - 1, unsigned(max(0, int(quantized_center[j])))); - - // Increment bin counter + extend bounding box of bin - unsigned int bin_idx = static_cast(j) * bins->bin_size + idx; - - // TODO: assert when out-of-bounds access?. - if (bin_idx < bins->bin_size) { - Bin& bin = bins->bin[static_cast(j) * bins->bin_size + idx]; - bin.count++; - for (int k = 0; k < 3; ++k) { - bin.bbox.bmin[k] = min(bin.bbox.bmin[k], bmin[k]); - bin.bbox.bmax[k] = max(bin.bbox.bmax[k], bmax[k]); - } - } - } - } -} - -template -inline T SAH(size_t ns1, T leftArea, size_t ns2, T rightArea, T invS, T Taabb, - T Ttri) { - T sah; - - sah = static_cast(2.0) * Taabb + - (leftArea * invS) * static_cast(ns1) * Ttri + - (rightArea * invS) * static_cast(ns2) * Ttri; - - return sah; -} - -template -inline bool FindCutFromBinBuffer(T *cut_pos, // [out] xyz - int *minCostAxis, // [out] - BinBuffer *bins, const real3 &bmin, - const real3 &bmax) { - T minCost[3]; - for (int j = 0; j < 3; ++j) { - minCost[j] = 1e30f; - - // Sweep left to accumulate bounding boxes and compute the right-hand side of the cost - size_t count = 0; - BBox accumulated_bbox; - for (size_t i = bins->bin_size - 1; i > 0; --i) { - Bin& bin = bins->bin[static_cast(j) * bins->bin_size + i]; - for (int k = 0; k < 3; ++k) { - accumulated_bbox.bmin[k] = min(bin.bbox.bmin[k], accumulated_bbox.bmin[k]); - accumulated_bbox.bmax[k] = max(bin.bbox.bmax[k], accumulated_bbox.bmax[k]); - } - count += bin.count; - bin.cost = T(count) * CalculateSurfaceArea(accumulated_bbox.bmin, accumulated_bbox.bmax); - } - - // Sweep right to compute the full cost - count = 0; - accumulated_bbox = BBox(); - size_t minBin = 1; - for (size_t i = 0; i < bins->bin_size - 1; i++) { - Bin& bin = bins->bin[static_cast(j) * bins->bin_size + i]; - Bin& next_bin = bins->bin[static_cast(j) * bins->bin_size + i + 1]; - for (int k = 0; k < 3; ++k) { - accumulated_bbox.bmin[k] = min(bin.bbox.bmin[k], accumulated_bbox.bmin[k]); - accumulated_bbox.bmax[k] = max(bin.bbox.bmax[k], accumulated_bbox.bmax[k]); - } - count += bin.count; - // Traversal cost and intersection cost are irrelevant for minimization - T cost = T(count) * CalculateSurfaceArea(accumulated_bbox.bmin, accumulated_bbox.bmax) + next_bin.cost; - if (cost < minCost[j]) { - minCost[j] = cost; - // Store the beginning of the right partition - minBin = i + 1; - } - } - cut_pos[j] = T(minBin) * ((bmax[j] - bmin[j]) / T(bins->bin_size)) + bmin[j]; - } - *minCostAxis = 0; - if (minCost[0] > minCost[1]) *minCostAxis = 1; - if (minCost[*minCostAxis] > minCost[2]) *minCostAxis = 2; - - return true; -} - -#ifdef _OPENMP -template -void ComputeBoundingBoxOMP(real3 *bmin, real3 *bmax, - const unsigned int *indices, unsigned int left_index, - unsigned int right_index, const P &p) { - { p.BoundingBox(bmin, bmax, indices[left_index]); } - - T local_bmin[3] = {(*bmin)[0], (*bmin)[1], (*bmin)[2]}; - T local_bmax[3] = {(*bmax)[0], (*bmax)[1], (*bmax)[2]}; - - unsigned int n = right_index - left_index; - -#pragma omp parallel firstprivate(local_bmin, local_bmax) if (n > (1024 * 128)) - { -#pragma omp parallel for - // for each face - for (int i = int(left_index); i < int(right_index); i++) { - unsigned int idx = indices[i]; - - real3 bbox_min, bbox_max; - - p.BoundingBox(&bbox_min, &bbox_max, idx); - - // xyz - for (int k = 0; k < 3; k++) { - (*bmin)[k] = min((*bmin)[k], bbox_min[k]); - (*bmax)[k] = max((*bmax)[k], bbox_max[k]); - } - } - -#pragma omp critical - { - for (int k = 0; k < 3; k++) { - (*bmin)[k] = min((*bmin)[k], local_bmin[k]); - (*bmax)[k] = max((*bmax)[k], local_bmax[k]); - } - } - } -} -#endif - -#ifdef NANORT_USE_CPP11_FEATURE -template -inline void ComputeBoundingBoxThreaded(real3 *bmin, real3 *bmax, - const unsigned int *indices, - unsigned int left_index, - unsigned int right_index, const P &p) { - unsigned int n = right_index - left_index; - - size_t num_threads = min( - size_t(kNANORT_MAX_THREADS), - max(size_t(1), size_t(std::thread::hardware_concurrency()))); - - if (n < num_threads) { - num_threads = n; - } - - std::vector workers; - - size_t ndiv = n / num_threads; - - std::vector local_bmins(3 * num_threads); // 3 = xyz - std::vector local_bmaxs(3 * num_threads); // 3 = xyz - - for (size_t t = 0; t < num_threads; t++) { - workers.emplace_back(std::thread([&, t]() { - size_t si = left_index + t * ndiv; - size_t ei = (t == (num_threads - 1)) ? size_t(right_index) : min(left_index + (t + 1) * ndiv, size_t(right_index)); - - local_bmins[3 * t + 0] = 1e30f; - local_bmins[3 * t + 1] = 1e30f; - local_bmins[3 * t + 2] = 1e30f; - local_bmaxs[3 * t + 0] = -1e30f; - local_bmaxs[3 * t + 1] = -1e30f; - local_bmaxs[3 * t + 2] = -1e30f; - - // for each face - for (size_t i = si; i < ei; i++) { - unsigned int idx = indices[i]; - - real3 bbox_min, bbox_max; - p.BoundingBox(&bbox_min, &bbox_max, idx); - - // xyz - for (size_t k = 0; k < 3; k++) { - local_bmins[3 * t + k] = - min(local_bmins[3 * t + k], bbox_min[int(k)]); - local_bmaxs[3 * t + k] = - max(local_bmaxs[3 * t + k], bbox_max[int(k)]); - } - } - })); - } - - for (auto &t : workers) { - t.join(); - } - - // merge bbox - for (size_t k = 0; k < 3; k++) { - (*bmin)[int(k)] = local_bmins[k]; - (*bmax)[int(k)] = local_bmaxs[k]; - } - - for (size_t t = 1; t < num_threads; t++) { - for (size_t k = 0; k < 3; k++) { - (*bmin)[int(k)] = min((*bmin)[int(k)], local_bmins[3 * t + k]); - (*bmax)[int(k)] = max((*bmax)[int(k)], local_bmaxs[3 * t + k]); - } - } -} -#endif - -template -inline void ComputeBoundingBox(real3 *bmin, real3 *bmax, - const unsigned int *indices, - unsigned int left_index, - unsigned int right_index, const P &p) { - unsigned int idx = indices[left_index]; - p.BoundingBox(bmin, bmax, idx); - - { - // for each primitive - for (unsigned int i = left_index + 1; i < right_index; i++) { - idx = indices[i]; - real3 bbox_min, bbox_max; - p.BoundingBox(&bbox_min, &bbox_max, idx); - - // xyz - for (int k = 0; k < 3; k++) { - (*bmin)[k] = min((*bmin)[k], bbox_min[k]); - (*bmax)[k] = max((*bmax)[k], bbox_max[k]); - } - } - } -} - -template -inline void GetBoundingBox(real3 *bmin, real3 *bmax, - const std::vector > &bboxes, - unsigned int *indices, unsigned int left_index, - unsigned int right_index) { - unsigned int i = left_index; - unsigned int idx = indices[i]; - - (*bmin)[0] = bboxes[idx].bmin[0]; - (*bmin)[1] = bboxes[idx].bmin[1]; - (*bmin)[2] = bboxes[idx].bmin[2]; - (*bmax)[0] = bboxes[idx].bmax[0]; - (*bmax)[1] = bboxes[idx].bmax[1]; - (*bmax)[2] = bboxes[idx].bmax[2]; - - // for each face - for (i = left_index + 1; i < right_index; i++) { - idx = indices[i]; - - // xyz - for (int k = 0; k < 3; k++) { - (*bmin)[k] = min((*bmin)[k], bboxes[idx].bmin[k]); - (*bmax)[k] = max((*bmax)[k], bboxes[idx].bmax[k]); - } - } -} - -// -// -- -// - -#if defined(NANORT_ENABLE_PARALLEL_BUILD) -template -template -unsigned int BVHAccel::BuildShallowTree(std::vector > *out_nodes, - unsigned int left_idx, - unsigned int right_idx, - unsigned int depth, - unsigned int max_shallow_depth, - const P &p, const Pred &pred) { - assert(left_idx <= right_idx); - - unsigned int offset = static_cast(out_nodes->size()); - - if (stats_.max_tree_depth < depth) { - stats_.max_tree_depth = depth; - } - - real3 bmin, bmax; - -#if defined(NANORT_USE_CPP11_FEATURE) && defined(NANORT_ENABLE_PARALLEL_BUILD) - ComputeBoundingBoxThreaded(&bmin, &bmax, &indices_.at(0), left_idx, right_idx, - p); -#else - ComputeBoundingBox(&bmin, &bmax, &indices_.at(0), left_idx, right_idx, p); -#endif - - unsigned int n = right_idx - left_idx; - if ((n <= options_.min_leaf_primitives) || - (depth >= options_.max_tree_depth)) { - // Create leaf node. - BVHNode leaf; - - leaf.bmin[0] = bmin[0]; - leaf.bmin[1] = bmin[1]; - leaf.bmin[2] = bmin[2]; - - leaf.bmax[0] = bmax[0]; - leaf.bmax[1] = bmax[1]; - leaf.bmax[2] = bmax[2]; - - assert(left_idx < 0xffffffff); - - leaf.flag = 1; // leaf - leaf.data[0] = n; - leaf.data[1] = left_idx; - - out_nodes->push_back(leaf); // atomic update - - stats_.num_leaf_nodes++; - - return offset; - } - - // - // Create branch node. - // - if (depth >= max_shallow_depth) { - // Delay to build tree - ShallowNodeInfo info; - info.left_idx = left_idx; - info.right_idx = right_idx; - info.offset = offset; - shallow_node_infos_.push_back(info); - - // Add dummy node. - BVHNode node; - node.axis = -1; - node.flag = -1; - out_nodes->push_back(node); - - return offset; - - } else { - // - // TODO(LTE): multi-threaded SAH computation, or use simple object median or - // spacial median for shallow tree to speeding up the parallel build. - // - - // - // Compute SAH and find best split axis and position - // - int min_cut_axis = 0; - T cut_pos[3] = {0.0, 0.0, 0.0}; - - BinBuffer bins(options_.bin_size); - ContributeBinBuffer(&bins, bmin, bmax, &indices_.at(0), left_idx, right_idx, - p); - FindCutFromBinBuffer(cut_pos, &min_cut_axis, &bins, bmin, bmax); - - // Try all 3 axis until good cut position avaiable. - unsigned int mid_idx = left_idx; - int cut_axis = min_cut_axis; - - for (int axis_try = 0; axis_try < 3; axis_try++) { - unsigned int *begin = &indices_[left_idx]; - unsigned int *end = - &indices_[right_idx - 1] + 1; // mimics end() iterator - unsigned int *mid = 0; - - // try min_cut_axis first. - cut_axis = (min_cut_axis + axis_try) % 3; - - pred.Set(cut_axis, cut_pos[cut_axis]); - // - // Split at (cut_axis, cut_pos) - // indices_ will be modified. - // - mid = std::partition(begin, end, pred); - - mid_idx = left_idx + static_cast((mid - begin)); - - if ((mid_idx == left_idx) || (mid_idx == right_idx)) { - // Can't split well. - // Switch to object median (which may create unoptimized tree, but - // stable) - mid_idx = left_idx + (n >> 1); - - // Try another axis if there's an axis to try. - - } else { - // Found good cut. exit loop. - break; - } - } - - BVHNode node; - node.axis = cut_axis; - node.flag = 0; // 0 = branch - - out_nodes->push_back(node); - - unsigned int left_child_index = 0; - unsigned int right_child_index = 0; - - left_child_index = BuildShallowTree(out_nodes, left_idx, mid_idx, depth + 1, - max_shallow_depth, p, pred); - - right_child_index = BuildShallowTree(out_nodes, mid_idx, right_idx, - depth + 1, max_shallow_depth, p, pred); - - //std::cout << "shallow[" << offset << "] l and r = " << left_child_index << ", " << right_child_index << std::endl; - (*out_nodes)[offset].data[0] = left_child_index; - (*out_nodes)[offset].data[1] = right_child_index; - - (*out_nodes)[offset].bmin[0] = bmin[0]; - (*out_nodes)[offset].bmin[1] = bmin[1]; - (*out_nodes)[offset].bmin[2] = bmin[2]; - - (*out_nodes)[offset].bmax[0] = bmax[0]; - (*out_nodes)[offset].bmax[1] = bmax[1]; - (*out_nodes)[offset].bmax[2] = bmax[2]; - } - - stats_.num_branch_nodes++; - - return offset; -} -#endif - -template -template -unsigned int BVHAccel::BuildTree(BVHBuildStatistics *out_stat, - std::vector > *out_nodes, - unsigned int left_idx, - unsigned int right_idx, unsigned int depth, - const P &p, const Pred &pred) { - assert(left_idx <= right_idx); - - unsigned int offset = static_cast(out_nodes->size()); - - if (out_stat->max_tree_depth < depth) { - out_stat->max_tree_depth = depth; - } - - real3 bmin, bmax; - if (!bboxes_.empty()) { - GetBoundingBox(&bmin, &bmax, bboxes_, &indices_.at(0), left_idx, right_idx); - } else { - ComputeBoundingBox(&bmin, &bmax, &indices_.at(0), left_idx, right_idx, p); - } - - unsigned int n = right_idx - left_idx; - if ((n <= options_.min_leaf_primitives) || - (depth >= options_.max_tree_depth)) { - // Create leaf node. - BVHNode leaf; - - leaf.bmin[0] = bmin[0]; - leaf.bmin[1] = bmin[1]; - leaf.bmin[2] = bmin[2]; - - leaf.bmax[0] = bmax[0]; - leaf.bmax[1] = bmax[1]; - leaf.bmax[2] = bmax[2]; - - assert(left_idx < 0xffffffff); - - leaf.flag = 1; // leaf - leaf.data[0] = n; - leaf.data[1] = left_idx; - - out_nodes->push_back(leaf); // atomic update - - out_stat->num_leaf_nodes++; - - return offset; - } - - // - // Create branch node. - // - - // - // Compute SAH and find best split axis and position - // - int min_cut_axis = 0; - T cut_pos[3] = {0.0, 0.0, 0.0}; - - BinBuffer bins(options_.bin_size); - ContributeBinBuffer(&bins, bmin, bmax, &indices_.at(0), left_idx, right_idx, - p); - FindCutFromBinBuffer(cut_pos, &min_cut_axis, &bins, bmin, bmax); - - // Try all 3 axis until good cut position avaiable. - unsigned int mid_idx = left_idx; - int cut_axis = min_cut_axis; - - for (int axis_try = 0; axis_try < 3; axis_try++) { - unsigned int *begin = &indices_[left_idx]; - unsigned int *end = &indices_[right_idx - 1] + 1; // mimics end() iterator. - unsigned int *mid = 0; - - // try min_cut_axis first. - cut_axis = (min_cut_axis + axis_try) % 3; - - pred.Set(cut_axis, cut_pos[cut_axis]); - - // - // Split at (cut_axis, cut_pos) - // indices_ will be modified. - // - mid = std::partition(begin, end, pred); - - mid_idx = left_idx + static_cast((mid - begin)); - - if ((mid_idx == left_idx) || (mid_idx == right_idx)) { - // Can't split well. - // Switch to object median(which may create unoptimized tree, but - // stable) - mid_idx = left_idx + (n >> 1); - - // Try another axis to find better cut. - - } else { - // Found good cut. exit loop. - break; - } - } - - BVHNode node; - node.axis = cut_axis; - node.flag = 0; // 0 = branch - - out_nodes->push_back(node); - - unsigned int left_child_index = 0; - unsigned int right_child_index = 0; - - left_child_index = - BuildTree(out_stat, out_nodes, left_idx, mid_idx, depth + 1, p, pred); - - right_child_index = - BuildTree(out_stat, out_nodes, mid_idx, right_idx, depth + 1, p, pred); - - { - (*out_nodes)[offset].data[0] = left_child_index; - (*out_nodes)[offset].data[1] = right_child_index; - - (*out_nodes)[offset].bmin[0] = bmin[0]; - (*out_nodes)[offset].bmin[1] = bmin[1]; - (*out_nodes)[offset].bmin[2] = bmin[2]; - - (*out_nodes)[offset].bmax[0] = bmax[0]; - (*out_nodes)[offset].bmax[1] = bmax[1]; - (*out_nodes)[offset].bmax[2] = bmax[2]; - } - - out_stat->num_branch_nodes++; - - return offset; -} - -template -template -bool BVHAccel::Build(unsigned int num_primitives, const Prim &p, - const Pred &pred, const BVHBuildOptions &options) { - options_ = options; - stats_ = BVHBuildStatistics(); - - nodes_.clear(); - bboxes_.clear(); -#if defined(NANORT_ENABLE_PARALLEL_BUILD) - shallow_node_infos_.clear(); -#endif - - assert(options_.bin_size > 1); - - if (num_primitives == 0) { - return false; - } - - unsigned int n = num_primitives; - - // - // 1. Create triangle indices(this will be permutated in BuildTree) - // - indices_.resize(n); - -#if defined(NANORT_USE_CPP11_FEATURE) - { - size_t num_threads = min( - size_t(kNANORT_MAX_THREADS), - max(size_t(1), size_t(std::thread::hardware_concurrency()))); - - if (n < num_threads) { - num_threads = n; - } - - std::vector workers; - - size_t ndiv = n / num_threads; - - for (size_t t = 0; t < num_threads; t++) { - workers.emplace_back(std::thread([&, t]() { - size_t si = t * ndiv; - size_t ei = (t == (num_threads - 1)) ? n : min((t + 1) * ndiv, size_t(n)); - - for (size_t k = si; k < ei; k++) { - indices_[k] = static_cast(k); - } - })); - } - - for (auto &t : workers) { - t.join(); - } - } - -#else - -#ifdef _OPENMP -#pragma omp parallel for -#endif - for (int i = 0; i < static_cast(n); i++) { - indices_[static_cast(i)] = static_cast(i); - } -#endif // !NANORT_USE_CPP11_FEATURE - - // - // 2. Compute bounding box (optional). - // - real3 bmin, bmax; - - if (options.cache_bbox) { - bmin[0] = bmin[1] = bmin[2] = 1e30f; - bmax[0] = bmax[1] = bmax[2] = -1e30f; - - bboxes_.resize(n); - - for (size_t i = 0; i < n; i++) { // for each primitive - unsigned int idx = indices_[i]; - - BBox bbox; - p.BoundingBox(&(bbox.bmin), &(bbox.bmax), static_cast(i)); - bboxes_[idx] = bbox; - - // xyz - for (int k = 0; k < 3; k++) { - bmin[k] = min(bmin[k], bbox.bmin[k]); - bmax[k] = max(bmax[k], bbox.bmax[k]); - } - } - - } else { -#if defined(NANORT_USE_CPP11_FEATURE) - ComputeBoundingBoxThreaded(&bmin, &bmax, &indices_.at(0), 0, n, p); -#elif defined(_OPENMP) - ComputeBoundingBoxOMP(&bmin, &bmax, &indices_.at(0), 0, n, p); -#else - ComputeBoundingBox(&bmin, &bmax, &indices_.at(0), 0, n, p); -#endif - } - -// -// 3. Build tree -// -#if defined(NANORT_ENABLE_PARALLEL_BUILD) -#if defined(NANORT_USE_CPP11_FEATURE) - - // Do parallel build for large enough datasets. - if (n > options.min_primitives_for_parallel_build) { - BuildShallowTree(&nodes_, 0, n, /* root depth */ 0, options.shallow_depth, - p, pred); // [0, n) - - assert(shallow_node_infos_.size() > 0); - - // Build deeper tree in parallel - std::vector > > local_nodes( - shallow_node_infos_.size()); - std::vector local_stats(shallow_node_infos_.size()); - - size_t num_threads = min( - size_t(kNANORT_MAX_THREADS), - max(size_t(1), size_t(std::thread::hardware_concurrency()))); - if (shallow_node_infos_.size() < num_threads) { - num_threads = shallow_node_infos_.size(); - } - - std::vector workers; - std::atomic i(0); - - for (size_t t = 0; t < num_threads; t++) { - workers.emplace_back(std::thread([&]() { - uint32_t idx = 0; - while ((idx = (i++)) < shallow_node_infos_.size()) { - // Create thread-local copy of Pred since some mutable variables are - // modified during SAH computation. - const Pred local_pred = pred; - unsigned int left_idx = shallow_node_infos_[size_t(idx)].left_idx; - unsigned int right_idx = shallow_node_infos_[size_t(idx)].right_idx; - BuildTree(&(local_stats[size_t(idx)]), &(local_nodes[size_t(idx)]), - left_idx, right_idx, options.shallow_depth, p, local_pred); - } - })); - } - - for (auto &t : workers) { - t.join(); - } - - // Join local nodes - for (size_t ii = 0; ii < local_nodes.size(); ii++) { - assert(!local_nodes[ii].empty()); - size_t offset = nodes_.size(); - - // Add offset to child index (for branch node). - for (size_t j = 0; j < local_nodes[ii].size(); j++) { - if (local_nodes[ii][j].flag == 0) { // branch - local_nodes[ii][j].data[0] += offset - 1; - local_nodes[ii][j].data[1] += offset - 1; - } - } - - // replace - nodes_[shallow_node_infos_[ii].offset] = local_nodes[ii][0]; - - // Skip root element of the local node. - nodes_.insert(nodes_.end(), local_nodes[ii].begin() + 1, - local_nodes[ii].end()); - } - - // Join statistics - for (size_t ii = 0; ii < local_nodes.size(); ii++) { - stats_.max_tree_depth = - max(stats_.max_tree_depth, local_stats[ii].max_tree_depth); - stats_.num_leaf_nodes += local_stats[ii].num_leaf_nodes; - stats_.num_branch_nodes += local_stats[ii].num_branch_nodes; - } - - } else { - // Single thread. - BuildTree(&stats_, &nodes_, 0, n, - /* root depth */ 0, p, pred); // [0, n) - } - -#elif defined(_OPENMP) - - // Do parallel build for large enough datasets. - if (n > options.min_primitives_for_parallel_build) { - BuildShallowTree(&nodes_, 0, n, /* root depth */ 0, options.shallow_depth, - p, pred); // [0, n) - - assert(shallow_node_infos_.size() > 0); - - // Build deeper tree in parallel - std::vector > > local_nodes( - shallow_node_infos_.size()); - std::vector local_stats(shallow_node_infos_.size()); - -#pragma omp parallel for - for (int i = 0; i < static_cast(shallow_node_infos_.size()); i++) { - unsigned int left_idx = shallow_node_infos_[size_t(i)].left_idx; - unsigned int right_idx = shallow_node_infos_[size_t(i)].right_idx; - const Pred local_pred = pred; - BuildTree(&(local_stats[size_t(i)]), &(local_nodes[size_t(i)]), left_idx, - right_idx, options.shallow_depth, p, local_pred); - } - - // Join local nodes - for (size_t i = 0; i < local_nodes.size(); i++) { - assert(!local_nodes[size_t(i)].empty()); - size_t offset = nodes_.size(); - - // Add offset to child index (for branch node). - for (size_t j = 0; j < local_nodes[i].size(); j++) { - if (local_nodes[i][j].flag == 0) { // branch - local_nodes[i][j].data[0] += offset - 1; - local_nodes[i][j].data[1] += offset - 1; - } - } - - // replace - nodes_[shallow_node_infos_[i].offset] = local_nodes[i][0]; - - // Skip root element of the local node. - nodes_.insert(nodes_.end(), local_nodes[i].begin() + 1, - local_nodes[i].end()); - } - - // Join statistics - for (size_t i = 0; i < local_nodes.size(); i++) { - stats_.max_tree_depth = - max(stats_.max_tree_depth, local_stats[i].max_tree_depth); - stats_.num_leaf_nodes += local_stats[i].num_leaf_nodes; - stats_.num_branch_nodes += local_stats[i].num_branch_nodes; - } - - } else { - // Single thread - BuildTree(&stats_, &nodes_, 0, n, - /* root depth */ 0, p, pred); // [0, n) - } - -#else // !NANORT_ENABLE_PARALLEL_BUILD - { - BuildTree(&stats_, &nodes_, 0, n, - /* root depth */ 0, p, pred); // [0, n) - } -#endif -#else // !_OPENMP - - // Single thread BVH build - { - BuildTree(&stats_, &nodes_, 0, n, - /* root depth */ 0, p, pred); // [0, n) - } -#endif - - return true; -} - -template -void BVHAccel::Debug() { - for (size_t i = 0; i < indices_.size(); i++) { - printf("index[%d] = %d\n", int(i), int(indices_[i])); - } - - for (size_t i = 0; i < nodes_.size(); i++) { - printf("node[%d] : bmin %f, %f, %f, bmax %f, %f, %f\n", int(i), - nodes_[i].bmin[0], nodes_[i].bmin[1], nodes_[i].bmin[2], - nodes_[i].bmax[0], nodes_[i].bmax[1], nodes_[i].bmax[2]); - } -} - -#if defined(NANORT_ENABLE_SERIALIZATION) -template -bool BVHAccel::Dump(const char *filename) const { - FILE *fp = fopen(filename, "wb"); - if (!fp) { - // fprintf(stderr, "[BVHAccel] Cannot write a file: %s\n", filename); - return false; - } - - size_t numNodes = nodes_.size(); - assert(nodes_.size() > 0); - - size_t numIndices = indices_.size(); - - size_t r = 0; - r = fwrite(&numNodes, sizeof(size_t), 1, fp); - assert(r == 1); - - r = fwrite(&nodes_.at(0), sizeof(BVHNode), numNodes, fp); - assert(r == numNodes); - - r = fwrite(&numIndices, sizeof(size_t), 1, fp); - assert(r == 1); - - r = fwrite(&indices_.at(0), sizeof(unsigned int), numIndices, fp); - assert(r == numIndices); - - fclose(fp); - - return true; -} - -template -bool BVHAccel::Dump(FILE *fp) const { - size_t numNodes = nodes_.size(); - assert(nodes_.size() > 0); - - size_t numIndices = indices_.size(); - - size_t r = 0; - r = fwrite(&numNodes, sizeof(size_t), 1, fp); - assert(r == 1); - - r = fwrite(&nodes_.at(0), sizeof(BVHNode), numNodes, fp); - assert(r == numNodes); - - r = fwrite(&numIndices, sizeof(size_t), 1, fp); - assert(r == 1); - - r = fwrite(&indices_.at(0), sizeof(unsigned int), numIndices, fp); - assert(r == numIndices); - - return true; -} - -template -bool BVHAccel::Load(const char *filename) { - FILE *fp = fopen(filename, "rb"); - if (!fp) { - // fprintf(stderr, "Cannot open file: %s\n", filename); - return false; - } - - size_t numNodes; - size_t numIndices; - - size_t r = 0; - r = fread(&numNodes, sizeof(size_t), 1, fp); - assert(r == 1); - assert(numNodes > 0); - - nodes_.resize(numNodes); - r = fread(&nodes_.at(0), sizeof(BVHNode), numNodes, fp); - assert(r == numNodes); - - r = fread(&numIndices, sizeof(size_t), 1, fp); - assert(r == 1); - - indices_.resize(numIndices); - - r = fread(&indices_.at(0), sizeof(unsigned int), numIndices, fp); - assert(r == numIndices); - - fclose(fp); - - return true; -} - -template -bool BVHAccel::Load(FILE *fp) { - size_t numNodes; - size_t numIndices; - - size_t r = 0; - r = fread(&numNodes, sizeof(size_t), 1, fp); - assert(r == 1); - assert(numNodes > 0); - - nodes_.resize(numNodes); - r = fread(&nodes_.at(0), sizeof(BVHNode), numNodes, fp); - assert(r == numNodes); - - r = fread(&numIndices, sizeof(size_t), 1, fp); - assert(r == 1); - - indices_.resize(numIndices); - - r = fread(&indices_.at(0), sizeof(unsigned int), numIndices, fp); - assert(r == numIndices); - - return true; -} -#endif - -template -inline bool IntersectRayAABB(T *tminOut, // [out] - T *tmaxOut, // [out] - T min_t, T max_t, const T bmin[3], const T bmax[3], - real3 ray_org, real3 ray_inv_dir, - int ray_dir_sign[3]); -template <> -inline bool IntersectRayAABB(float *tminOut, // [out] - float *tmaxOut, // [out] - float min_t, float max_t, - const float bmin[3], const float bmax[3], - real3 ray_org, - real3 ray_inv_dir, - int ray_dir_sign[3]) { - float tmin, tmax; - - const float min_x = ray_dir_sign[0] ? bmax[0] : bmin[0]; - const float min_y = ray_dir_sign[1] ? bmax[1] : bmin[1]; - const float min_z = ray_dir_sign[2] ? bmax[2] : bmin[2]; - const float max_x = ray_dir_sign[0] ? bmin[0] : bmax[0]; - const float max_y = ray_dir_sign[1] ? bmin[1] : bmax[1]; - const float max_z = ray_dir_sign[2] ? bmin[2] : bmax[2]; - - // X - const float tmin_x = (min_x - ray_org[0]) * ray_inv_dir[0]; - // MaxMult robust BVH traversal(up to 4 ulp). - // 1.0000000000000004 for double precision. - const float tmax_x = (max_x - ray_org[0]) * ray_inv_dir[0] * 1.00000024f; - - // Y - const float tmin_y = (min_y - ray_org[1]) * ray_inv_dir[1]; - const float tmax_y = (max_y - ray_org[1]) * ray_inv_dir[1] * 1.00000024f; - - // Z - const float tmin_z = (min_z - ray_org[2]) * ray_inv_dir[2]; - const float tmax_z = (max_z - ray_org[2]) * ray_inv_dir[2] * 1.00000024f; - - tmin = safemax(tmin_z, safemax(tmin_y, safemax(tmin_x, min_t))); - tmax = safemin(tmax_z, safemin(tmax_y, safemin(tmax_x, max_t))); - - if (tmin <= tmax) { - (*tminOut) = tmin; - (*tmaxOut) = tmax; - - return true; - } - return false; // no hit -} - -template <> -inline bool IntersectRayAABB(double *tminOut, // [out] - double *tmaxOut, // [out] - double min_t, double max_t, - const double bmin[3], const double bmax[3], - real3 ray_org, - real3 ray_inv_dir, - int ray_dir_sign[3]) { - double tmin, tmax; - - const double min_x = ray_dir_sign[0] ? bmax[0] : bmin[0]; - const double min_y = ray_dir_sign[1] ? bmax[1] : bmin[1]; - const double min_z = ray_dir_sign[2] ? bmax[2] : bmin[2]; - const double max_x = ray_dir_sign[0] ? bmin[0] : bmax[0]; - const double max_y = ray_dir_sign[1] ? bmin[1] : bmax[1]; - const double max_z = ray_dir_sign[2] ? bmin[2] : bmax[2]; - - // X - const double tmin_x = (min_x - ray_org[0]) * ray_inv_dir[0]; - // MaxMult robust BVH traversal(up to 4 ulp). - const double tmax_x = - (max_x - ray_org[0]) * ray_inv_dir[0] * 1.0000000000000004; - - // Y - const double tmin_y = (min_y - ray_org[1]) * ray_inv_dir[1]; - const double tmax_y = - (max_y - ray_org[1]) * ray_inv_dir[1] * 1.0000000000000004; - - // Z - const double tmin_z = (min_z - ray_org[2]) * ray_inv_dir[2]; - const double tmax_z = - (max_z - ray_org[2]) * ray_inv_dir[2] * 1.0000000000000004; - - tmin = safemax(tmin_z, safemax(tmin_y, safemax(tmin_x, min_t))); - tmax = safemin(tmax_z, safemin(tmax_y, safemin(tmax_x, max_t))); - - if (tmin <= tmax) { - (*tminOut) = tmin; - (*tmaxOut) = tmax; - - return true; - } - return false; // no hit -} - -template -template -inline bool BVHAccel::TestLeafNode(const BVHNode &node, const Ray &ray, - const I &intersector) const { - bool hit = false; - - unsigned int num_primitives = node.data[0]; - unsigned int offset = node.data[1]; - - T t = intersector.GetT(); // current hit distance - - real3 ray_org; - ray_org[0] = ray.org[0]; - ray_org[1] = ray.org[1]; - ray_org[2] = ray.org[2]; - - real3 ray_dir; - ray_dir[0] = ray.dir[0]; - ray_dir[1] = ray.dir[1]; - ray_dir[2] = ray.dir[2]; - - for (unsigned int i = 0; i < num_primitives; i++) { - unsigned int prim_idx = indices_[i + offset]; - - T local_t = t; - if (intersector.Intersect(&local_t, prim_idx)) { - // Update isect state - t = local_t; - - intersector.Update(t, prim_idx); - hit = true; - } - } - - return hit; -} - -#if 0 // TODO(LTE): Implement -template template -bool BVHAccel::MultiHitTestLeafNode( - std::priority_queue, Comp> *isect_pq, - int max_intersections, - const BVHNode &node, - const Ray &ray, - const I &intersector) const { - bool hit = false; - - unsigned int num_primitives = node.data[0]; - unsigned int offset = node.data[1]; - - T t = 1e30f; - if (isect_pq->size() >= static_cast(max_intersections)) { - t = isect_pq->top().t; // current furthest hit distance - } - - real3 ray_org; - ray_org[0] = ray.org[0]; - ray_org[1] = ray.org[1]; - ray_org[2] = ray.org[2]; - - real3 ray_dir; - ray_dir[0] = ray.dir[0]; - ray_dir[1] = ray.dir[1]; - ray_dir[2] = ray.dir[2]; - - for (unsigned int i = 0; i < num_primitives; i++) { - unsigned int prim_idx = indices_[i + offset]; - - T local_t = t, u = 0.0f, v = 0.0f; - - if (intersector.Intersect(&local_t, &u, &v, prim_idx)) - { - // Update isect state - if ((local_t > ray.min_t)) - { - if (isect_pq->size() < static_cast(max_intersections)) - { - H isect; - t = local_t; - isect.t = t; - isect.u = u; - isect.v = v; - isect.prim_id = prim_idx; - isect_pq->push(isect); - - // Update t to furthest distance. - t = ray.max_t; - - hit = true; - } - else if (local_t < isect_pq->top().t) - { - // delete furthest intersection and add new intersection. - isect_pq->pop(); - - H hit; - hit.t = local_t; - hit.u = u; - hit.v = v; - hit.prim_id = prim_idx; - isect_pq->push(hit); - - // Update furthest hit distance - t = isect_pq->top().t; - - hit = true; - } - } - } - } - - return hit; -} -#endif - -template -template -bool BVHAccel::Traverse(const Ray &ray, const I &intersector, H *isect, - const BVHTraceOptions &options) const { - const int kMaxStackDepth = 512; - (void)kMaxStackDepth; - - T hit_t = ray.max_t; - - int node_stack_index = 0; - unsigned int node_stack[512]; - node_stack[0] = 0; - - // Init isect info as no hit - intersector.Update(hit_t, static_cast(-1)); - - intersector.PrepareTraversal(ray, options); - - int dir_sign[3]; - dir_sign[0] = ray.dir[0] < static_cast(0.0) ? 1 : 0; - dir_sign[1] = ray.dir[1] < static_cast(0.0) ? 1 : 0; - dir_sign[2] = ray.dir[2] < static_cast(0.0) ? 1 : 0; - - real3 ray_inv_dir; - real3 ray_dir; - ray_dir[0] = ray.dir[0]; - ray_dir[1] = ray.dir[1]; - ray_dir[2] = ray.dir[2]; - - ray_inv_dir = vsafe_inverse(ray_dir); - - real3 ray_org; - ray_org[0] = ray.org[0]; - ray_org[1] = ray.org[1]; - ray_org[2] = ray.org[2]; - - T min_t = 1e30f; - T max_t = -1e30f; - - while (node_stack_index >= 0) { - unsigned int index = node_stack[node_stack_index]; - const BVHNode &node = nodes_[index]; - - node_stack_index--; - - bool hit = IntersectRayAABB(&min_t, &max_t, ray.min_t, hit_t, node.bmin, - node.bmax, ray_org, ray_inv_dir, dir_sign); - - if (hit) { - // Branch node - if (node.flag == 0) { - int order_near = dir_sign[node.axis]; - int order_far = 1 - order_near; - - // Traverse near first. - node_stack[++node_stack_index] = node.data[order_far]; - node_stack[++node_stack_index] = node.data[order_near]; - } else if (TestLeafNode(node, ray, intersector)) { // Leaf node - hit_t = intersector.GetT(); - } - } - } - - assert(node_stack_index < kNANORT_MAX_STACK_DEPTH); - - bool hit = (intersector.GetT() < ray.max_t); - intersector.PostTraversal(ray, hit, isect); - - return hit; -} - -template -template -inline bool BVHAccel::TestLeafNodeIntersections( - const BVHNode &node, const Ray &ray, const int max_intersections, - const I &intersector, - std::priority_queue, std::vector >, - NodeHitComparator > *isect_pq) const { - bool hit = false; - - unsigned int num_primitives = node.data[0]; - unsigned int offset = node.data[1]; - - real3 ray_org; - ray_org[0] = ray.org[0]; - ray_org[1] = ray.org[1]; - ray_org[2] = ray.org[2]; - - real3 ray_dir; - ray_dir[0] = ray.dir[0]; - ray_dir[1] = ray.dir[1]; - ray_dir[2] = ray.dir[2]; - - intersector.PrepareTraversal(ray); - - for (unsigned int i = 0; i < num_primitives; i++) { - unsigned int prim_idx = indices_[i + offset]; - - T min_t, max_t; - - if (intersector.Intersect(&min_t, &max_t, prim_idx)) { - // Always add to isect lists. - NodeHit isect; - isect.t_min = min_t; - isect.t_max = max_t; - isect.node_id = prim_idx; - - if (isect_pq->size() < static_cast(max_intersections)) { - isect_pq->push(isect); - } else if (min_t < isect_pq->top().t_min) { - // delete the furthest intersection and add a new intersection. - isect_pq->pop(); - - isect_pq->push(isect); - } - } - } - - return hit; -} - -template -template -bool BVHAccel::ListNodeIntersections( - const Ray &ray, int max_intersections, const I &intersector, - StackVector, 128> *hits) const { - const int kMaxStackDepth = 512; - - T hit_t = ray.max_t; - - int node_stack_index = 0; - unsigned int node_stack[512]; - node_stack[0] = 0; - - // Stores furthest intersection at top - std::priority_queue, std::vector >, - NodeHitComparator > - isect_pq; - - (*hits)->clear(); - - int dir_sign[3]; - dir_sign[0] = ray.dir[0] < static_cast(0.0) ? 1 : 0; - dir_sign[1] = ray.dir[1] < static_cast(0.0) ? 1 : 0; - dir_sign[2] = ray.dir[2] < static_cast(0.0) ? 1 : 0; - - real3 ray_inv_dir; - real3 ray_dir; - - ray_dir[0] = ray.dir[0]; - ray_dir[1] = ray.dir[1]; - ray_dir[2] = ray.dir[2]; - - ray_inv_dir = vsafe_inverse(ray_dir); - - real3 ray_org; - ray_org[0] = ray.org[0]; - ray_org[1] = ray.org[1]; - ray_org[2] = ray.org[2]; - - T min_t, max_t; - - while (node_stack_index >= 0) { - unsigned int index = node_stack[node_stack_index]; - const BVHNode &node = nodes_[static_cast(index)]; - - node_stack_index--; - - bool hit = IntersectRayAABB(&min_t, &max_t, ray.min_t, hit_t, node.bmin, - node.bmax, ray_org, ray_inv_dir, dir_sign); - - if (hit) { - // Branch node - if (node.flag == 0) { - int order_near = dir_sign[node.axis]; - int order_far = 1 - order_near; - - // Traverse near first. - node_stack[++node_stack_index] = node.data[order_far]; - node_stack[++node_stack_index] = node.data[order_near]; - } else { // Leaf node - TestLeafNodeIntersections(node, ray, max_intersections, intersector, - &isect_pq); - } - } - } - - assert(node_stack_index < kMaxStackDepth); - (void)kMaxStackDepth; - - if (!isect_pq.empty()) { - // Store intesection in reverse order (make it frontmost order) - size_t n = isect_pq.size(); - (*hits)->resize(n); - - for (size_t i = 0; i < n; i++) { - const NodeHit &isect = isect_pq.top(); - (*hits)[n - i - 1] = isect; - isect_pq.pop(); - } - - return true; - } - - return false; -} - -#if 0 // TODO(LTE): Implement -template template -bool BVHAccel::MultiHitTraverse(const Ray &ray, - int max_intersections, - const I &intersector, - StackVector *hits, - const BVHTraceOptions& options) const { - const int kMaxStackDepth = 512; - - T hit_t = ray.max_t; - - int node_stack_index = 0; - unsigned int node_stack[512]; - node_stack[0] = 0; - - // Stores furthest intersection at top - std::priority_queue, Comp> isect_pq; - - (*hits)->clear(); - - // Init isect info as no hit - intersector.Update(hit_t, static_cast(-1)); - - intersector.PrepareTraversal(ray, options); - - int dir_sign[3]; - dir_sign[0] = ray.dir[0] < static_cast(0.0) ? static_cast(1) : static_cast(0); - dir_sign[1] = ray.dir[1] < static_cast(0.0) ? static_cast(1) : static_cast(0); - dir_sign[2] = ray.dir[2] < static_cast(0.0) ? static_cast(1) : static_cast(0); - - real3 ray_inv_dir; - real3 ray_dir; - - ray_dir[0] = ray.dir[0]; - ray_dir[1] = ray.dir[1]; - ray_dir[2] = ray.dir[2]; - - ray_inv_dir = vsafe_inverse(ray_dir); - - real3 ray_org; - ray_org[0] = ray.org[0]; - ray_org[1] = ray.org[1]; - ray_org[2] = ray.org[2]; - - T min_t, max_t; - - while (node_stack_index >= 0) - { - unsigned int index = node_stack[node_stack_index]; - const BVHNode &node = nodes_[static_cast(index)]; - - node_stack_index--; - - bool hit = IntersectRayAABB(&min_t, &max_t, ray.min_t, hit_t, node.bmin, - node.bmax, ray_org, ray_inv_dir, dir_sign); - - // branch node - if(hit) - { - if (node.flag == 0) - { - int order_near = dir_sign[node.axis]; - int order_far = 1 - order_near; - - // Traverse near first. - node_stack[++node_stack_index] = node.data[order_far]; - node_stack[++node_stack_index] = node.data[order_near]; - } - else - { - if (MultiHitTestLeafNode(&isect_pq, max_intersections, node, ray, intersector)) - { - // Only update `hit_t` when queue is full. - if (isect_pq.size() >= static_cast(max_intersections)) - { - hit_t = isect_pq.top().t; - } - } - } - } - } - - assert(node_stack_index < kMaxStackDepth); - (void)kMaxStackDepth; - - if (!isect_pq.empty()) - { - // Store intesection in reverse order (make it frontmost order) - size_t n = isect_pq.size(); - (*hits)->resize(n); - - for (size_t i = 0; i < n; i++) - { - const H &isect = isect_pq.top(); - (*hits)[n - i - 1] = isect; - isect_pq.pop(); - } - - return true; - } - - return false; -} -#endif - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - -} // namespace nanort - -#endif // NANORT_H_ \ No newline at end of file