From d2196b654db3f48806dba7f929b0c8c54a2cb43b Mon Sep 17 00:00:00 2001 From: bradgrantham-lunarg <50641407+bradgrantham-lunarg@users.noreply.github.com> Date: Tue, 20 Aug 2024 16:33:15 -0700 Subject: [PATCH 1/5] add missing C_FLAGS to x86_32 toolchain (#1676) --- cmake/toolchain/linux_x86_32.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/toolchain/linux_x86_32.cmake b/cmake/toolchain/linux_x86_32.cmake index 15700d9998..7ab09e0235 100644 --- a/cmake/toolchain/linux_x86_32.cmake +++ b/cmake/toolchain/linux_x86_32.cmake @@ -1,2 +1,3 @@ +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -m32") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -m32") From 744c0efcc76b13203065dc1546245e83a9f3d7be Mon Sep 17 00:00:00 2001 From: David Pinedo Date: Wed, 21 Aug 2024 12:57:06 -0600 Subject: [PATCH 2/5] Dump resources: Fix scaling of dump resources images When scaling up a dump resources image, the size of the temp memory used was calculated based on the size of the pre-scaled image, resulting in an attempt to access beyond the end of the temp memory. Also removed method GetImageResourceSizesLinear since it is not referenced anywhere in the code. --- framework/graphics/vulkan_resources_util.cpp | 52 ++------------------ framework/graphics/vulkan_resources_util.h | 26 +--------- 2 files changed, 5 insertions(+), 73 deletions(-) diff --git a/framework/graphics/vulkan_resources_util.cpp b/framework/graphics/vulkan_resources_util.cpp index b3ffa0464a..7bd1ba3a66 100644 --- a/framework/graphics/vulkan_resources_util.cpp +++ b/framework/graphics/vulkan_resources_util.cpp @@ -391,49 +391,6 @@ uint64_t VulkanResourcesUtil::GetImageResourceSizesOptimal(VkImage return resource_size; } -uint64_t VulkanResourcesUtil::GetImageResourceSizesLinear(VkImage image, - VkFormat format, - const VkExtent3D& extent, - uint32_t mip_levels, - uint32_t array_layers, - VkImageAspectFlagBits aspect, - std::vector* subresource_offsets, - std::vector* subresource_sizes, - bool all_layers_per_level) -{ - assert(mip_levels <= 1 + floor(log2(std::max(std::max(extent.width, extent.height), extent.depth)))); - - subresource_offsets->clear(); - subresource_sizes->clear(); - - const double texel_size = vkuFormatTexelSizeWithAspect(format, aspect); - // Not expecting a fractional number from a linear image - assert(texel_size == static_cast(texel_size)); - - uint64_t offset = 0; - for (uint32_t m = 0; m < mip_levels; ++m) - { - for (uint32_t l = 0; l < array_layers; ++l) - { - const uint32_t mip_width = std::max(1u, (extent.width >> m)); - const uint32_t mip_height = std::max(1u, (extent.height >> m)); - const uint64_t stride = mip_width * static_cast(texel_size); - const uint64_t size = all_layers_per_level ? stride * mip_height * array_layers : stride * mip_height; - - subresource_offsets->push_back(offset); - subresource_sizes->push_back(size); - offset += size; - - if (all_layers_per_level) - { - break; - } - } - } - - return offset; -} - VkResult VulkanResourcesUtil::CreateStagingBuffer(VkDeviceSize size) { assert(size); @@ -1214,6 +1171,9 @@ VkResult VulkanResourcesUtil::ReadFromImageResourceStaging(VkImage subresource_offsets.clear(); subresource_sizes.clear(); + scaled_extent.width = std::max(scaled_extent.width * scale, 1.0f); + scaled_extent.height = std::max(scaled_extent.height * scale, 1.0f); + resource_size = GetImageResourceSizesOptimal(image, format, type, @@ -1609,7 +1569,7 @@ VkResult VulkanResourcesUtil::BlitImage(VkImage image, VkFormat format, VkImageType type, const VkExtent3D& extent, - VkExtent3D& scaled_extent, + const VkExtent3D& scaled_extent, uint32_t mip_levels, uint32_t array_layers, VkImageAspectFlagBits aspect, @@ -1619,7 +1579,6 @@ VkResult VulkanResourcesUtil::BlitImage(VkImage image, VkDeviceMemory& scaled_image_mem, bool& scaling_supported) { - scaled_extent = extent; scaled_image = VK_NULL_HANDLE; scaled_image_mem = VK_NULL_HANDLE; VkImageTiling tiling; @@ -1683,9 +1642,6 @@ VkResult VulkanResourcesUtil::BlitImage(VkImage image, return VK_SUCCESS; } - scaled_extent.width = std::max(scaled_extent.width * scale, 1.0f); - scaled_extent.height = std::max(scaled_extent.height * scale, 1.0f); - // Create a scaled image and then blit to scaled image VkImageCreateInfo create_info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; create_info.pNext = nullptr; diff --git a/framework/graphics/vulkan_resources_util.h b/framework/graphics/vulkan_resources_util.h index b874c9ae3f..a0a2fad793 100644 --- a/framework/graphics/vulkan_resources_util.h +++ b/framework/graphics/vulkan_resources_util.h @@ -93,30 +93,6 @@ class VulkanResourcesUtil std::vector* subresource_sizes = nullptr, bool all_layers_per_level = false); - // Will return the size requirements and offsets for each subresource contained in the specified image. - // Sizes and offsets are calculated in such a way that the each subresource will be tightly packed. - // - // This function will use texel size values from the Vulkan Utilities Library and is intented to be used - // with images that can be created with Linear tiling and therefore it is possible to be accesses directly - // without using a staging buffer. - // - // The sizes are returned in the subresource_sizes vector and will be in the order: - // M0 L0 L1 ... La M1 L0 L1 ... La ... Mm L0 L1 ... La - // Where M denotes the mip map levels and L the array layers. - // The offsets will be returned in the subresource_offsets vector in the same manner. - // all_layers_per_level boolean determines if all array layer per mip map level will be accounted as one. - // - // Return value is the total size of the image. - uint64_t GetImageResourceSizesLinear(VkImage image, - VkFormat format, - const VkExtent3D& extent, - uint32_t mip_levels, - uint32_t array_layers, - VkImageAspectFlagBits aspect, - std::vector* subresource_offsets = nullptr, - std::vector* subresource_sizes = nullptr, - bool all_layers_per_level = false); - // Use this function to dump an image sub resources into data vector. // This function is intented to be used when accessing the image content directly is not possible // and a staging buffer is required. @@ -242,7 +218,7 @@ class VulkanResourcesUtil VkFormat format, VkImageType type, const VkExtent3D& extent, - VkExtent3D& scaled_extent, + const VkExtent3D& scaled_extent, uint32_t mip_levels, uint32_t array_layers, VkImageAspectFlagBits aspect, From 9ec3d4015b8a315379d5d516ca82ab9ff803a081 Mon Sep 17 00:00:00 2001 From: Per Date: Thu, 22 Aug 2024 16:48:43 +0200 Subject: [PATCH 3/5] Handle soft signal interrupts during file processing (#1597) * Handle soft signal interrupts during file processing These signals are sometimes randomly encountered on networked filesystems and should be handled by resuming the operation where they left off. --- framework/decode/dx12_dump_resources.cpp | 6 +- framework/decode/file_processor.cpp | 9 ++- framework/decode/file_transformer.cpp | 18 ++++-- framework/decode/json_writer.cpp | 4 +- framework/decode/preload_file_processor.cpp | 2 +- framework/decode/vulkan_cpp_util_datapack.cpp | 3 +- .../decode/vulkan_replay_consumer_base.cpp | 2 +- .../vulkan_replay_dump_resources_json.cpp | 8 +-- framework/graphics/fps_info.cpp | 7 +- framework/util/buffer_writer.cpp | 8 +-- framework/util/file_output_stream.cpp | 8 +-- framework/util/file_output_stream.h | 4 +- framework/util/image_writer.cpp | 28 ++++---- framework/util/json_util.cpp | 11 +--- framework/util/logging.cpp | 2 +- framework/util/memory_output_stream.cpp | 4 +- framework/util/memory_output_stream.h | 2 +- framework/util/output_stream.h | 2 +- framework/util/platform.h | 64 +++++++++++++------ tools/extract/main.cpp | 3 +- 20 files changed, 109 insertions(+), 86 deletions(-) diff --git a/framework/decode/dx12_dump_resources.cpp b/framework/decode/dx12_dump_resources.cpp index efe8343348..419b3101da 100644 --- a/framework/decode/dx12_dump_resources.cpp +++ b/framework/decode/dx12_dump_resources.cpp @@ -1731,7 +1731,7 @@ void DefaultDx12DumpResourcesDelegate::WriteBlockEnd() // Dominates profiling (2/2): const std::string block = json_data_.dump(json_options_.format == util::JsonFormat::JSONL ? -1 : util::kJsonIndentWidth); - util::platform::FileWriteNoLock(block.data(), sizeof(std::string::value_type), block.length(), json_file_handle_); + util::platform::FileWriteNoLock(block.data(), block.length() * sizeof(std::string::value_type), json_file_handle_); util::platform::FileFlush(json_file_handle_); /// @todo Implement a FileFlushNoLock() for all platforms. } @@ -1743,9 +1743,9 @@ bool DefaultDx12DumpResourcesDelegate::WriteBinaryFile(const std::string& FILE* file_output = nullptr; if (util::platform::FileOpen(&file_output, filename.c_str(), "wb") == 0) { - util::platform::FileWrite(data.data() + offset, size, 1, file_output); + bool success = util::platform::FileWrite(data.data() + offset, size, file_output); util::platform::FileClose(file_output); - return true; + return success; } return false; } diff --git a/framework/decode/file_processor.cpp b/framework/decode/file_processor.cpp index aa473f7c16..304fe3bc2f 100644 --- a/framework/decode/file_processor.cpp +++ b/framework/decode/file_processor.cpp @@ -467,9 +467,12 @@ bool FileProcessor::ReadCompressedParameterBuffer(size_t compressed_buffer_size bool FileProcessor::ReadBytes(void* buffer, size_t buffer_size) { - size_t bytes_read = util::platform::FileRead(buffer, 1, buffer_size, file_descriptor_); - bytes_read_ += bytes_read; - return (bytes_read == buffer_size); + if (util::platform::FileRead(buffer, buffer_size, file_descriptor_)) + { + bytes_read_ += buffer_size; + return true; + } + return false; } bool FileProcessor::SkipBytes(size_t skip_size) diff --git a/framework/decode/file_transformer.cpp b/framework/decode/file_transformer.cpp index e15ef85543..4d4aefd372 100644 --- a/framework/decode/file_transformer.cpp +++ b/framework/decode/file_transformer.cpp @@ -352,16 +352,22 @@ bool FileTransformer::ReadCompressedParameterBuffer(size_t compressed_buffer_si bool FileTransformer::ReadBytes(void* buffer, size_t buffer_size) { - size_t bytes_read = util::platform::FileRead(buffer, 1, buffer_size, input_file_); - bytes_read_ += bytes_read; - return (bytes_read == buffer_size); + if (util::platform::FileRead(buffer, buffer_size, input_file_)) + { + bytes_read_ += buffer_size; + return true; + } + return false; } bool FileTransformer::WriteBytes(const void* buffer, size_t buffer_size) { - size_t bytes_written = util::platform::FileWrite(buffer, 1, buffer_size, output_file_); - bytes_written_ += bytes_written; - return (bytes_written == buffer_size); + if (util::platform::FileWrite(buffer, buffer_size, output_file_)) + { + bytes_written_ += buffer_size; + return true; + } + return false; } bool FileTransformer::SkipBytes(uint64_t skip_size) diff --git a/framework/decode/json_writer.cpp b/framework/decode/json_writer.cpp index 1bfb44296f..5dfc0ade13 100644 --- a/framework/decode/json_writer.cpp +++ b/framework/decode/json_writer.cpp @@ -206,9 +206,9 @@ bool JsonWriter::WriteBinaryFile(const std::string& filename, uint64_t data_size FILE* file_output = nullptr; if (util::platform::FileOpen(&file_output, filename.c_str(), "wb") == 0) { - util::platform::FileWrite(data, static_cast(data_size), 1, file_output); + bool success = util::platform::FileWrite(data, static_cast(data_size), file_output); util::platform::FileClose(file_output); - return true; + return success; } return false; } diff --git a/framework/decode/preload_file_processor.cpp b/framework/decode/preload_file_processor.cpp index 5f3a8b7514..7c9d86f6ae 100644 --- a/framework/decode/preload_file_processor.cpp +++ b/framework/decode/preload_file_processor.cpp @@ -335,7 +335,7 @@ bool PreloadFileProcessor::ReadBytes(void* buffer, size_t buffer_size) } else { - bytes_read = util::platform::FileRead(buffer, 1, buffer_size, file_descriptor_); + bytes_read = util::platform::FileRead(buffer, buffer_size, file_descriptor_); bytes_read_ += bytes_read; } return bytes_read == buffer_size; diff --git a/framework/decode/vulkan_cpp_util_datapack.cpp b/framework/decode/vulkan_cpp_util_datapack.cpp index 9a25e4ca4f..d2dbd16d49 100644 --- a/framework/decode/vulkan_cpp_util_datapack.cpp +++ b/framework/decode/vulkan_cpp_util_datapack.cpp @@ -86,8 +86,7 @@ void DataFilePacker::WriteContentsToFile(const std::string& file_path, util::platform::FileSeek(fp, fileOffset, util::platform::FileSeekCurrent); - size_t written_size = util::platform::FileWrite(data, sizeof(uint8_t), size, fp); - if (written_size != size) + if (!util::platform::FileWrite(data, size, fp)) { fprintf(stderr, "Error while saving data into %s\n", file_path.c_str()); } diff --git a/framework/decode/vulkan_replay_consumer_base.cpp b/framework/decode/vulkan_replay_consumer_base.cpp index bf0eec803c..e7335b0aaf 100644 --- a/framework/decode/vulkan_replay_consumer_base.cpp +++ b/framework/decode/vulkan_replay_consumer_base.cpp @@ -5641,7 +5641,7 @@ VkResult VulkanReplayConsumerBase::OverrideCreateShaderModule( size_t file_size = static_cast(util::platform::FileTell(fp)); file_code = std::make_unique(file_size); util::platform::FileSeek(fp, 0L, util::platform::FileSeekSet); - util::platform::FileRead(file_code.get(), sizeof(char), file_size, fp); + util::platform::FileRead(file_code.get(), file_size, fp); override_info.pCode = (uint32_t*)file_code.get(); override_info.codeSize = file_size; GFXRECON_LOG_INFO("Replacement shader found: %s", file_path.c_str()); diff --git a/framework/decode/vulkan_replay_dump_resources_json.cpp b/framework/decode/vulkan_replay_dump_resources_json.cpp index fcecd76eca..948e5ed166 100644 --- a/framework/decode/vulkan_replay_dump_resources_json.cpp +++ b/framework/decode/vulkan_replay_dump_resources_json.cpp @@ -63,7 +63,7 @@ bool VulkanReplayDumpResourcesJson::InitializeFile(const std::string& filename) return false; } - util::platform::FileWrite("[\n", 2, 1, file_); + util::platform::FileWrite("[\n", 2, file_); BlockStart(); json_data_["header"] = header_; @@ -110,7 +110,7 @@ void VulkanReplayDumpResourcesJson::Close() { if (file_ != nullptr) { - util::platform::FileWrite("]", 1, 1, file_); + util::platform::FileWrite("]", 1, file_); gfxrecon::util::platform::FileClose(file_); file_ = nullptr; } @@ -130,13 +130,13 @@ void VulkanReplayDumpResourcesJson::BlockEnd() if (!first_block_) { - util::platform::FileWrite(",\n", 2, 1, file_); + util::platform::FileWrite(",\n", 2, file_); } first_block_ = false; const std::string block = json_data_.dump(util::kJsonIndentWidth); - util::platform::FileWrite(block.c_str(), block.size(), 1, file_); + util::platform::FileWrite(block.c_str(), block.size(), file_); } nlohmann::ordered_json& VulkanReplayDumpResourcesJson::InsertSubEntry(const std::string& entry_name) diff --git a/framework/graphics/fps_info.cpp b/framework/graphics/fps_info.cpp index eb52010309..db20444a16 100644 --- a/framework/graphics/fps_info.cpp +++ b/framework/graphics/fps_info.cpp @@ -147,12 +147,8 @@ void FpsInfo::EndFrame(uint64_t frame) { const std::string json_string = file_content.dump(util::kJsonIndentWidth); - const size_t size_written = - util::platform::FileWrite(json_string.data(), 1, json_string.size(), file_pointer); - util::platform::FileClose(file_pointer); - // It either writes a fully valid file, or it doesn't write anything ! - if (size_written != json_string.size()) + if (!util::platform::FileWrite(json_string.data(), json_string.size(), file_pointer)) { GFXRECON_LOG_ERROR("Failed to write to measurements file '%s'.", measurement_file_name_.c_str()); @@ -166,6 +162,7 @@ void FpsInfo::EndFrame(uint64_t frame) remove_result); } } + util::platform::FileClose(file_pointer); } else { diff --git a/framework/util/buffer_writer.cpp b/framework/util/buffer_writer.cpp index 88566009ce..344e0db6e9 100644 --- a/framework/util/buffer_writer.cpp +++ b/framework/util/buffer_writer.cpp @@ -44,15 +44,11 @@ bool WriteBuffer(const std::string& filename, const void* data, size_t size) return false; } - size_t ret = util::platform::FileWrite(data, size, 1, file); - if (ret != 1) - { - return false; - } + bool success = util::platform::FileWrite(data, size, file); util::platform::FileClose(file); - return true; + return success; } GFXRECON_END_NAMESPACE(gfxrecon) diff --git a/framework/util/file_output_stream.cpp b/framework/util/file_output_stream.cpp index c19fffae2d..608301dbe4 100644 --- a/framework/util/file_output_stream.cpp +++ b/framework/util/file_output_stream.cpp @@ -69,14 +69,14 @@ void FileOutputStream::Reset(FILE* file) file_ = file; } -size_t FileOutputStream::Write(const void* data, size_t len) +bool FileOutputStream::Write(const void* data, size_t len) { - return platform::FileWrite(data, 1, len, file_); + return platform::FileWrite(data, len, file_); } -size_t FileNoLockOutputStream::Write(const void* data, size_t len) +bool FileNoLockOutputStream::Write(const void* data, size_t len) { - return platform::FileWriteNoLock(data, 1, len, file_); + return platform::FileWriteNoLock(data, len, file_); } GFXRECON_END_NAMESPACE(util) diff --git a/framework/util/file_output_stream.h b/framework/util/file_output_stream.h index 3c46caedb2..e8c65f1792 100644 --- a/framework/util/file_output_stream.h +++ b/framework/util/file_output_stream.h @@ -54,7 +54,7 @@ class FileOutputStream : public OutputStream virtual bool IsValid() override { return (file_ != nullptr); } - virtual size_t Write(const void* data, size_t len) override; + virtual bool Write(const void* data, size_t len) override; virtual void Flush() override { platform::FileFlush(file_); } @@ -75,7 +75,7 @@ class FileNoLockOutputStream : public FileOutputStream {} FileNoLockOutputStream(FILE* file, bool owned = false) : FileOutputStream(file, owned) {} - virtual size_t Write(const void* data, size_t len) override; + virtual bool Write(const void* data, size_t len) override; }; GFXRECON_END_NAMESPACE(util) diff --git a/framework/util/image_writer.cpp b/framework/util/image_writer.cpp index 4ec2e2f6e2..01020adc0e 100644 --- a/framework/util/image_writer.cpp +++ b/framework/util/image_writer.cpp @@ -215,9 +215,9 @@ static float Ufloat10ToFloat(uint16_t val) } } -#define CheckFwriteRetVal(_val_, _expected_, _file_) \ +#define CheckFwriteRetVal(_val_, _file_) \ { \ - if (_val_ != _expected_) \ + if (!_val_) \ { \ GFXRECON_LOG_ERROR("%s() (%u): fwrite failed (%s)", __func__, __LINE__, strerror(errno)); \ util::platform::FileClose(_file_); \ @@ -711,11 +711,11 @@ bool WriteBmpImage(const std::string& filename, info_header.clr_used = 0; info_header.clr_important = 0; - size_t ret = util::platform::FileWrite(&file_header, sizeof(file_header), 1, file); - CheckFwriteRetVal(ret, 1, file); + bool ret = util::platform::FileWrite(&file_header, sizeof(file_header), file); + CheckFwriteRetVal(ret, file); - ret = util::platform::FileWrite(&info_header, sizeof(info_header), 1, file); - CheckFwriteRetVal(ret, 1, file); + ret = util::platform::FileWrite(&info_header, sizeof(info_header), file); + CheckFwriteRetVal(ret, file); // Y needs to be inverted when writing the bitmap data. auto height_1 = height - 1; @@ -725,8 +725,8 @@ bool WriteBmpImage(const std::string& filename, for (uint32_t y = 0; y < height; ++y) { const uint8_t* bytes = reinterpret_cast(data); - ret = util::platform::FileWrite(&bytes[(height_1 - y) * data_pitch], 1, data_pitch, file); - CheckFwriteRetVal(ret, bmp_pitch, file); + ret = util::platform::FileWrite(&bytes[(height_1 - y) * data_pitch], data_pitch, file); + CheckFwriteRetVal(ret, file); } } else @@ -735,8 +735,8 @@ bool WriteBmpImage(const std::string& filename, ConvertIntoTemporaryBuffer(width, height, data, data_pitch, format, false, write_alpha); for (uint32_t y = 0; y < height; ++y) { - ret = util::platform::FileWrite(&bytes[(height_1 - y) * bmp_pitch], 1, bmp_pitch, file); - CheckFwriteRetVal(ret, bmp_pitch, file); + ret = util::platform::FileWrite(&bytes[(height_1 - y) * bmp_pitch], bmp_pitch, file); + CheckFwriteRetVal(ret, file); } } @@ -845,12 +845,12 @@ bool WriteAstcImage(const std::string& filename, if (!result && file != nullptr) { // Write the header - int ret = util::platform::FileWrite(&header, sizeof(header), 1, file); - CheckFwriteRetVal(ret, 1, file); + bool ret = util::platform::FileWrite(&header, sizeof(header), file); + CheckFwriteRetVal(ret, file); // Write the binary payload - ret = util::platform::FileWrite(data, size, 1, file); - CheckFwriteRetVal(ret, 1, file); + ret = util::platform::FileWrite(data, size, file); + CheckFwriteRetVal(ret, file); if (!ferror(file)) { diff --git a/framework/util/json_util.cpp b/framework/util/json_util.cpp index 969bc4a31b..e2fda88abd 100644 --- a/framework/util/json_util.cpp +++ b/framework/util/json_util.cpp @@ -352,15 +352,10 @@ static bool WriteBinaryFile(const std::string& filename, uint64_t data_size, con bool written_all = false; if (util::platform::FileOpen(&file_output, filename.c_str(), "wb") == 0) { - const uint64_t written = util::platform::FileWrite(data, 1, static_cast(data_size), file_output); - if (written >= data_size) + written_all = util::platform::FileWrite(data, static_cast(data_size), file_output); + if (!written_all) { - written_all = true; - } - else - { - GFXRECON_LOG_ERROR( - "Only wrote %" PRIu64 " bytes of %" PRIu64 " data to file %s.", written, data_size, filename.c_str()); + GFXRECON_LOG_ERROR("Failed to write %" PRIu64 " bytes to file %s.", data_size, filename.c_str()); } util::platform::FileClose(file_output); } diff --git a/framework/util/logging.cpp b/framework/util/logging.cpp index bd3c4e4cd7..2411dd8e44 100644 --- a/framework/util/logging.cpp +++ b/framework/util/logging.cpp @@ -293,7 +293,7 @@ void Log::LogMessage( // Write the newline since we want to separate each log-line but don't // want the messages themselves to have to add it. output_message = "\n"; - platform::FileWrite(output_message.c_str(), 1, 1, log_file_ptr); + platform::FileWrite(output_message.c_str(), 1, log_file_ptr); if (settings_.flush_after_write || settings_.leave_file_open) { diff --git a/framework/util/memory_output_stream.cpp b/framework/util/memory_output_stream.cpp index 966ffa4bfd..855a5c1412 100644 --- a/framework/util/memory_output_stream.cpp +++ b/framework/util/memory_output_stream.cpp @@ -46,12 +46,12 @@ MemoryOutputStream::MemoryOutputStream(const void* initial_data, size_t initial_ MemoryOutputStream::~MemoryOutputStream() {} -size_t MemoryOutputStream::Write(const void* data, size_t len) +bool MemoryOutputStream::Write(const void* data, size_t len) { const uint8_t* bytes = reinterpret_cast(data); buffer_.insert(buffer_.end(), bytes, bytes + len); - return len; + return true; } GFXRECON_END_NAMESPACE(util) diff --git a/framework/util/memory_output_stream.h b/framework/util/memory_output_stream.h index 0fd6b62345..ea28802b47 100644 --- a/framework/util/memory_output_stream.h +++ b/framework/util/memory_output_stream.h @@ -51,7 +51,7 @@ class MemoryOutputStream : public OutputStream virtual void Clear() { buffer_.clear(); }; - virtual size_t Write(const void* data, size_t len) override; + virtual bool Write(const void* data, size_t len) override; virtual const uint8_t* GetData() const { return buffer_.data(); } diff --git a/framework/util/output_stream.h b/framework/util/output_stream.h index d9026e6542..83526ffc11 100644 --- a/framework/util/output_stream.h +++ b/framework/util/output_stream.h @@ -39,7 +39,7 @@ class OutputStream virtual bool IsValid() { return false; } - virtual size_t Write(const void* data, size_t len) = 0; + virtual bool Write(const void* data, size_t len) = 0; virtual void Flush() {} }; diff --git a/framework/util/platform.h b/framework/util/platform.h index 598da69eb6..f82db57851 100644 --- a/framework/util/platform.h +++ b/framework/util/platform.h @@ -191,14 +191,14 @@ inline bool FileSeek(FILE* stream, int64_t offset, FileSeekOrigin origin) return (result == 0); } -inline size_t FileWriteNoLock(const void* buffer, size_t element_size, size_t element_count, FILE* stream) +inline bool FileWriteNoLock(const void* buffer, size_t bytes, FILE* stream) { - return _fwrite_nolock(buffer, element_size, element_count, stream); + return _fwrite_nolock(buffer, bytes, 1, stream) == 1; } -inline size_t FileReadNoLock(void* buffer, size_t element_size, size_t element_count, FILE* stream) +inline bool FileReadNoLock(void* buffer, size_t bytes, FILE* stream) { - return _fread_nolock(buffer, element_size, element_count, stream); + return _fread_nolock(buffer, bytes, 1, stream) == 1; } inline int32_t FileVprintf(FILE* stream, const char* format, va_list vlist) @@ -440,22 +440,36 @@ inline bool FileSeek(FILE* stream, int64_t offset, FileSeekOrigin origin) return (result == 0); } -inline size_t FileWriteNoLock(const void* buffer, size_t element_size, size_t element_count, FILE* stream) +inline bool FileWriteNoLock(const void* buffer, size_t bytes, FILE* stream) { + size_t write_count = 0; + int err = 0; + do + { #if defined(__APPLE__) || (defined(__ANDROID__) && (__ANDROID_API__ < 28)) - return fwrite(buffer, element_size, element_count, stream); + write_count = fwrite(buffer, bytes, 1, stream); #else - return fwrite_unlocked(buffer, element_size, element_count, stream); + write_count = fwrite_unlocked(buffer, bytes, 1, stream); #endif + err = ferror(stream); + } while (write_count < 1 && (err == EWOULDBLOCK || err == EINTR || err == EAGAIN)); + return (write_count == 1 || bytes == 0); } -inline size_t FileReadNoLock(void* buffer, size_t element_size, size_t element_count, FILE* stream) +inline bool FileReadNoLock(void* buffer, size_t bytes, FILE* stream) { + size_t read_count = 0; + int err = 0; + do + { #if defined(__APPLE__) || (defined(__ANDROID__) && (__ANDROID_API__ < 28)) - return fread(buffer, element_size, element_count, stream); + read_count = fread(buffer, bytes, 1, stream); #else - return fread_unlocked(buffer, element_size, element_count, stream); + read_count = fread_unlocked(buffer, bytes, 1, stream); #endif + err = ferror(stream); + } while (!feof(stream) && read_count < 1 && (err == EWOULDBLOCK || err == EINTR || err == EAGAIN)); + return (read_count == 1 || bytes == 0); } inline int32_t FileVprintf(FILE* stream, const char* format, va_list vlist) @@ -622,24 +636,38 @@ inline bool StringContains(const char* text, const char* substring) return strstr(text, substring) != nullptr; } -inline int32_t FilePuts(const char* char_string, FILE* stream) +inline int32_t FileFlush(FILE* stream) { - return fputs(char_string, stream); + return fflush(stream); } -inline int32_t FileFlush(FILE* stream) +inline bool FileWrite(const void* buffer, size_t bytes, FILE* stream) { - return fflush(stream); + size_t write_count = 0; + int err = 0; + do + { + write_count = fwrite(buffer, bytes, 1, stream); + err = ferror(stream); + } while (write_count < 1 && (err == EWOULDBLOCK || err == EINTR || err == EAGAIN)); + return (write_count == 1 || bytes == 0); } -inline size_t FileWrite(const void* buffer, size_t element_size, size_t element_count, FILE* stream) +inline bool FilePuts(const char* char_string, FILE* stream) { - return fwrite(buffer, element_size, element_count, stream); + return FileWrite(char_string, strlen(char_string), stream); } -inline size_t FileRead(void* buffer, size_t element_size, size_t element_count, FILE* stream) +inline bool FileRead(void* buffer, size_t bytes, FILE* stream) { - return fread(buffer, element_size, element_count, stream); + size_t read_count = 0; + int err = 0; + do + { + read_count = fread(buffer, bytes, 1, stream); + err = ferror(stream); + } while (!feof(stream) && read_count < 1 && (err == EWOULDBLOCK || err == EINTR || err == EAGAIN)); + return (read_count == 1 || bytes == 0); } inline int32_t SetFileBufferSize(FILE* stream, size_t buffer_size) diff --git a/tools/extract/main.cpp b/tools/extract/main.cpp index c1cdf1bb75..6f185b481e 100644 --- a/tools/extract/main.cpp +++ b/tools/extract/main.cpp @@ -133,8 +133,7 @@ class VulkanExtractConsumer : public gfxrecon::decode::VulkanConsumer int32_t result = gfxrecon::util::platform::FileOpen(&fp, file_path.c_str(), "wb"); if (result == 0) { - size_t written_size = gfxrecon::util::platform::FileWrite(orig_code, sizeof(char), orig_size, fp); - if (written_size != orig_size) + if (!gfxrecon::util::platform::FileWrite(orig_code, orig_size, fp)) { GFXRECON_WRITE_CONSOLE("Error while writing file %s: Could not complete", file_name.c_str()); } From 77cb11b5971727bf25a2274f7861b38b322000ec Mon Sep 17 00:00:00 2001 From: davidd-lunarg <73848817+davidd-lunarg@users.noreply.github.com> Date: Tue, 7 May 2024 16:20:10 -0700 Subject: [PATCH 4/5] Support D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS Add support for BuildRaytracingAccelerationStructure when D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS is used in the build desc. --- .../decode/dx12_resource_value_mapper.cpp | 100 ++++++++++++++++-- framework/format/format.h | 13 +-- 2 files changed, 99 insertions(+), 14 deletions(-) diff --git a/framework/decode/dx12_resource_value_mapper.cpp b/framework/decode/dx12_resource_value_mapper.cpp index 54afd87d0e..71088a59ac 100644 --- a/framework/decode/dx12_resource_value_mapper.cpp +++ b/framework/decode/dx12_resource_value_mapper.cpp @@ -477,11 +477,18 @@ void Dx12ResourceValueMapper::PostProcessBuildRaytracingAccelerationStructure( format::HandleId resource_id = format::kNullHandleId; bool found = false; - reverse_gpu_va_map_.Map(build_desc->Inputs.InstanceDescs, - &resource_id, - &found, - build_desc->Inputs.InstanceDescs + - build_desc->Inputs.NumDescs * sizeof(D3D12_RAYTRACING_INSTANCE_DESC)); + + auto min_end_gpu_va = build_desc->Inputs.InstanceDescs; + if (build_desc->Inputs.DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY) + { + min_end_gpu_va += build_desc->Inputs.NumDescs * sizeof(D3D12_RAYTRACING_INSTANCE_DESC); + } + else + { + min_end_gpu_va += build_desc->Inputs.NumDescs * sizeof(D3D12_GPU_VIRTUAL_ADDRESS); + } + + reverse_gpu_va_map_.Map(build_desc->Inputs.InstanceDescs, &resource_id, &found, min_end_gpu_va); if (resource_id != format::kNullHandleId) { @@ -514,11 +521,22 @@ void Dx12ResourceValueMapper::PostProcessBuildRaytracingAccelerationStructure( { nullptr, nullptr, 0 } }); } } + else if (build_desc->Inputs.DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS) + { + constexpr auto instance_desc_pointer_stride = sizeof(D3D12_GPU_VIRTUAL_ADDRESS); + for (UINT i = 0; i < build_desc->Inputs.NumDescs; ++i) + { + resource_value_infos.insert({ offset_to_instance_descs_start + instance_desc_pointer_stride * i, + ResourceValueType::kRaytracingInstanceDescPointer, + sizeof(D3D12_GPU_VIRTUAL_ADDRESS), + nullptr, + { nullptr, nullptr, 0 } }); + } + } else { - // TODO: Support D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS. - GFXRECON_LOG_WARNING("Application built acceleration structure with unsupported layout: " - "D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS"); + GFXRECON_LOG_ERROR("Unknown BuildRaytracingAccelerationStructure DescsLayout: %d", + static_cast(build_desc->Inputs.DescsLayout)); } } else @@ -1290,6 +1308,72 @@ bool Dx12ResourceValueMapper::MapValue(const ResourceValueInfo& value_info, } return false; } + else if (value_info.type == ResourceValueType::kRaytracingInstanceDescPointer) + { + GFXRECON_ASSERT(value_info.size == sizeof(D3D12_GPU_VIRTUAL_ADDRESS)); + + // Map the GPU_VA in the array of instance desc pointers. + ResourceValueInfo rvi = value_info; + rvi.type = ResourceValueType::kGpuVirtualAddress; + MapValue(rvi, result_data, resource_id, resource_info, indirect_values_map); + + // Read instance desc GPU_VA from the array of pointers. + D3D12_GPU_VIRTUAL_ADDRESS instance_desc_gpu_va = 0; + util::platform::MemoryCopy(&instance_desc_gpu_va, + sizeof(instance_desc_gpu_va), + result_data.data() + value_info.offset, + sizeof(instance_desc_gpu_va)); + + GFXRECON_ASSERT(value_info.offset == final_offset); + + // Insert new RV infos for instance desc's AccelerationStructure, which will queue it for mapping. + if (instance_desc_gpu_va != 0) + { + // The spec requires that instance descs are aligned to D3D12_RAYTRACING_INSTANCE_DESCS_BYTE_ALIGNMENT. If + // the instance descs are not aligned behavior may be undefined. + GFXRECON_ASSERT((instance_desc_gpu_va % D3D12_RAYTRACING_INSTANCE_DESCS_BYTE_ALIGNMENT) == 0); + + // Find the resource that contains the address referenced by instance_desc_gpu_va. + format::HandleId instance_desc_resource_id = format::kNullHandleId; + bool found = false; + reverse_gpu_va_map_.Map(instance_desc_gpu_va, + &instance_desc_resource_id, + &found, + instance_desc_gpu_va + sizeof(D3D12_RAYTRACING_INSTANCE_DESC)); + + if (instance_desc_resource_id != format::kNullHandleId) + { + GFXRECON_ASSERT(found); + + auto resource_object_info = get_object_info_func_(instance_desc_resource_id); + GFXRECON_ASSERT(resource_object_info != nullptr); + GFXRECON_ASSERT(resource_object_info->object != nullptr); + + auto instance_desc_resource = static_cast(resource_object_info->object); + GFXRECON_ASSERT(instance_desc_gpu_va >= instance_desc_resource->GetGPUVirtualAddress()); + auto offset_to_instance_desc_start = + instance_desc_gpu_va - instance_desc_resource->GetGPUVirtualAddress(); + + constexpr auto accel_struct_gpu_va_offset = + offsetof(D3D12_RAYTRACING_INSTANCE_DESC, AccelerationStructure); + + auto& resource_value_infos = indirect_values_map[resource_object_info]; + resource_value_infos.insert({ offset_to_instance_desc_start + accel_struct_gpu_va_offset, + ResourceValueType::kGpuVirtualAddress, + sizeof(D3D12_GPU_VIRTUAL_ADDRESS), + nullptr, + { nullptr, nullptr, 0 } }); + } + else + { + GFXRECON_LOG_ERROR("Failed to find the resource containing the D3D12_GPU_VIRTUAL_ADDRESS (%" PRIu64 + ") of InstanceDescs in call to BuildRaytracingAccelerationStructure. GPU addresses " + "pointed to by InstanceDescs may be incorrect.", + instance_desc_gpu_va); + } + } + return true; + } else { GFXRECON_ASSERT(false && "Unrecognized resource value type."); diff --git a/framework/format/format.h b/framework/format/format.h index 31ed94a225..c5abe9f274 100644 --- a/framework/format/format.h +++ b/framework/format/format.h @@ -226,12 +226,13 @@ struct EnabledOptions // Resource values are values contained in resource data that may require special handling (e.g., mapping for replay). enum class ResourceValueType : uint8_t { - kUnknown = 0, - kGpuVirtualAddress = 1, - kGpuDescriptorHandle = 2, - kShaderIdentifier = 3, - kIndirectArgumentDispatchRays = 4, - kExecuteIndirectCountBuffer = 5 + kUnknown = 0, + kGpuVirtualAddress = 1, + kGpuDescriptorHandle = 2, + kShaderIdentifier = 3, + kIndirectArgumentDispatchRays = 4, + kExecuteIndirectCountBuffer = 5, + kRaytracingInstanceDescPointer = 6, }; #pragma pack(push) From 3f9f91b8383072db0454cbb890fcda4ca62236b0 Mon Sep 17 00:00:00 2001 From: davidd-lunarg <73848817+davidd-lunarg@users.noreply.github.com> Date: Mon, 15 Jul 2024 17:38:07 -0700 Subject: [PATCH 5/5] Trim support for Build TLAS with ARRAY_OF_POINTERS When tracking acceleration structure build input data for trimming, use D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_VISUALIZATION_DECODE_FOR_TOOLS to save the inputs for TLAS builds that use DescsLayout value D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS. Also use default heap instead of readback heap for the resources used for saving BuildAS inputs. This is required to support the above change. --- framework/encode/dx12_object_wrapper_info.h | 30 ++- framework/encode/dx12_state_tracker.cpp | 208 +++++++++++++++++--- framework/encode/dx12_state_writer.cpp | 53 ++++- framework/encode/dx12_state_writer.h | 3 +- framework/graphics/dx12_util.cpp | 27 +-- 5 files changed, 261 insertions(+), 60 deletions(-) diff --git a/framework/encode/dx12_object_wrapper_info.h b/framework/encode/dx12_object_wrapper_info.h index 16ed8efdb4..04ed2f4b88 100644 --- a/framework/encode/dx12_object_wrapper_info.h +++ b/framework/encode/dx12_object_wrapper_info.h @@ -190,6 +190,7 @@ struct DxAccelerationStructureBuildInfo std::vector inputs_geometry_descs; uint64_t input_data_size{ 0 }; + uint64_t input_data_header_size{ 0 }; graphics::dx12::ID3D12ResourceComPtr input_data_resource{ nullptr }; // Copy state. @@ -199,6 +200,8 @@ struct DxAccelerationStructureBuildInfo D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE copy_mode{}; ///< Copy mode used to create this acceleration structure bool was_copy_source{ false }; ///< Was this acceleration structure copied to another? + + bool is_tlas_with_array_of_pointers{ false }; }; struct IDXGIKeyedMutexInfo : public DxgiWrapperInfo @@ -290,12 +293,35 @@ struct ID3D12QueryHeapInfo : public DxWrapperInfo struct ID3D12CommandSignatureInfo : public DxWrapperInfo {}; +struct AccelerationStructureBuildTrackingObjects +{ + AccelerationStructureBuildTrackingObjects( + graphics::dx12::ID3D12ResourceComPtr _resource, + graphics::dx12::ID3D12CommandAllocatorComPtr _post_build_copy_cmd_allocator, + graphics::dx12::ID3D12GraphicsCommandList4ComPtr _post_build_copy_cmd_list) : + resource(_resource), + post_build_copy_cmd_allocator(_post_build_copy_cmd_allocator), + post_build_copy_cmd_list(_post_build_copy_cmd_list) + {} + + // Target resource for build inputs. + graphics::dx12::ID3D12ResourceComPtr resource = nullptr; + + // Objects used to copy inputs for TLAS builds that use D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS for DescLayout. + graphics::dx12::ID3D12CommandAllocatorComPtr post_build_copy_cmd_allocator = nullptr; + graphics::dx12::ID3D12GraphicsCommandList4ComPtr post_build_copy_cmd_list = nullptr; +}; + struct ID3D12CommandQueueInfo : public DxWrapperInfo { //// Begin state tracking members - graphics::dx12::ID3D12FenceComPtr acceleration_structure_build_fence; - std::map pending_acceleration_structure_build_resources; + // Fence that is signalled on the queue after AS builds. + graphics::dx12::ID3D12FenceComPtr acceleration_structure_build_tracking_fence; + + // Objects that need to be kept alive until the associated AS build has completed on the GPU. Key of the map is the + // fence signal value to indicate the AS build is complete. + std::map acceleration_structure_build_tracking_objects; }; struct ID3D12PipelineLibraryInfo : public DxWrapperInfo diff --git a/framework/encode/dx12_state_tracker.cpp b/framework/encode/dx12_state_tracker.cpp index 3bedbb44db..c377d9cad0 100644 --- a/framework/encode/dx12_state_tracker.cpp +++ b/framework/encode/dx12_state_tracker.cpp @@ -192,6 +192,19 @@ void Dx12StateTracker::TrackExecuteCommandLists(ID3D12CommandQueue_Wrapper* queu UINT num_lists, ID3D12CommandList* const* lists) { + GFXRECON_ASSERT(queue_wrapper != nullptr); + GFXRECON_ASSERT(queue_wrapper->GetObjectInfo() != nullptr); + auto queue_info = queue_wrapper->GetObjectInfo(); + auto queue = queue_wrapper->GetWrappedObjectAs(); + auto device = graphics::dx12::GetDeviceComPtrFromChild(queue); + + graphics::dx12::ID3D12CommandAllocatorComPtr post_build_as_copy_cmd_allocator = nullptr; + graphics::dx12::ID3D12GraphicsCommandList4ComPtr post_build_as_copy_cmd_list = nullptr; + + HRESULT result = S_OK; + uint64_t highest_as_build_id = 0; + bool executing_acceleration_structure_build = false; + for (UINT i = 0; i < num_lists; ++i) { auto list_wrapper = reinterpret_cast(lists[i]); @@ -227,61 +240,169 @@ void Dx12StateTracker::TrackExecuteCommandLists(ID3D12CommandQueue_Wrapper* queu } } - GFXRECON_ASSERT(queue_wrapper != nullptr); - GFXRECON_ASSERT(queue_wrapper->GetObjectInfo() != nullptr); - auto queue_info = queue_wrapper->GetObjectInfo(); - bool has_acceleration_structure_build = !list_info->acceleration_structure_builds.empty() || !list_info->acceleration_structure_copies.empty(); if (has_acceleration_structure_build) { + executing_acceleration_structure_build = true; + bool has_tlas_with_array_of_pointers = false; + for (const auto& as_build_info : list_info->acceleration_structure_builds) + { + if (as_build_info.is_tlas_with_array_of_pointers) + { + has_tlas_with_array_of_pointers = true; + } + } + + // If the command list contains a TLAS build that uses the ARRAY_OF_POINTER DescLayout then use + // COPY_MODE_VISUALIZATION to save the inputs to the build. + if (has_tlas_with_array_of_pointers) + { + D3D12_COMMAND_LIST_TYPE list_type = queue->GetDesc().Type; + + if (post_build_as_copy_cmd_allocator == nullptr) + { + result = device->CreateCommandAllocator(list_type, IID_PPV_ARGS(&post_build_as_copy_cmd_allocator)); + } + if (SUCCEEDED(result)) + { + if (post_build_as_copy_cmd_list == nullptr) + { + result = device->CreateCommandList(0, + list_type, + post_build_as_copy_cmd_allocator, + nullptr, + IID_PPV_ARGS(&post_build_as_copy_cmd_list)); + } + if (SUCCEEDED(result)) + { + for (const auto& as_build_info : list_info->acceleration_structure_builds) + { + if (as_build_info.is_tlas_with_array_of_pointers && + as_build_info.input_data_resource != nullptr) + { + { + D3D12_RESOURCE_TRANSITION_BARRIER pre_transition; + pre_transition.pResource = as_build_info.input_data_resource; + pre_transition.Subresource = 0; + pre_transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + pre_transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + D3D12_RESOURCE_BARRIER pre_barrier; + pre_barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + pre_barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + pre_barrier.Transition = pre_transition; + post_build_as_copy_cmd_list->ResourceBarrier(1, &pre_barrier); + } + + { + auto dst_gpuva = as_build_info.input_data_resource->GetGPUVirtualAddress(); + auto src_gpuva = as_build_info.dest_gpu_va; + post_build_as_copy_cmd_list->CopyRaytracingAccelerationStructure( + dst_gpuva, + src_gpuva, + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_VISUALIZATION_DECODE_FOR_TOOLS); + } + + { + D3D12_RESOURCE_TRANSITION_BARRIER post_transition; + post_transition.pResource = as_build_info.input_data_resource; + post_transition.Subresource = 0; + post_transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + post_transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; + D3D12_RESOURCE_BARRIER post_barrier; + post_barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + post_barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + post_barrier.Transition = post_transition; + post_build_as_copy_cmd_list->ResourceBarrier(1, &post_barrier); + } + } + } + } + } + + if (FAILED(result)) + { + GFXRECON_LOG_ERROR("Failed to record command list to copy instance descs for TLAS build."); + } + } + // Add acceleration structure build infos to their destination resources. - uint64_t highest_build_id = 0; for (auto& accel_struct_build : list_info->acceleration_structure_builds) { - auto build_id = CommitAccelerationStructureBuildInfo(accel_struct_build); - highest_build_id = std::max(build_id, highest_build_id); - queue_info->pending_acceleration_structure_build_resources[build_id] = - accel_struct_build.input_data_resource; + auto build_id = CommitAccelerationStructureBuildInfo(accel_struct_build); + highest_as_build_id = std::max(build_id, highest_as_build_id); + GFXRECON_ASSERT(queue_info->acceleration_structure_build_tracking_objects.count(build_id) == 0); + queue_info->acceleration_structure_build_tracking_objects.emplace( + build_id, + AccelerationStructureBuildTrackingObjects(accel_struct_build.input_data_resource, + post_build_as_copy_cmd_allocator, + post_build_as_copy_cmd_list)); } // Add acceleration structure copy infos to their resources. for (auto& accel_struct_copy : list_info->acceleration_structure_copies) { graphics::dx12::ID3D12ResourceComPtr inputs_data_resource; - auto build_id = CommitAccelerationStructureCopyInfo(accel_struct_copy, inputs_data_resource); - highest_build_id = std::max(build_id, highest_build_id); - queue_info->pending_acceleration_structure_build_resources[build_id] = inputs_data_resource; + auto build_id = CommitAccelerationStructureCopyInfo(accel_struct_copy, inputs_data_resource); + highest_as_build_id = std::max(build_id, highest_as_build_id); + GFXRECON_ASSERT(queue_info->acceleration_structure_build_tracking_objects.count(build_id) == 0); + queue_info->acceleration_structure_build_tracking_objects.emplace( + build_id, + AccelerationStructureBuildTrackingObjects( + inputs_data_resource, post_build_as_copy_cmd_allocator, post_build_as_copy_cmd_list)); } GFXRECON_ASSERT(queue_wrapper->GetWrappedObject() != nullptr); auto queue = queue_wrapper->GetWrappedObjectAs(); // Create the fence that will be signaled by the queue to indicate that builds are complete. - if (queue_info->acceleration_structure_build_fence == nullptr) + if (queue_info->acceleration_structure_build_tracking_fence == nullptr) { auto device = graphics::dx12::GetDeviceComPtrFromChild(queue); - GFXRECON_ASSERT(device); + GFXRECON_ASSERT(device != nullptr); auto hr = device->CreateFence( - 0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&queue_info->acceleration_structure_build_fence)); + 0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&queue_info->acceleration_structure_build_tracking_fence)); GFXRECON_ASSERT(SUCCEEDED(hr)); } + } + } - // Add a signal to the queue to indicate that the acceleration structure builds have completed. - queue->Signal(queue_info->acceleration_structure_build_fence, highest_build_id); + if (executing_acceleration_structure_build) + { + // Execute the commands to copy the TLAS build inputs. + if (post_build_as_copy_cmd_list != nullptr) + { + result = post_build_as_copy_cmd_list->Close(); + if (SUCCEEDED(result)) + { + ID3D12CommandList* cmd_list[] = { post_build_as_copy_cmd_list }; + queue->ExecuteCommandLists(1, cmd_list); + } + else + { + GFXRECON_LOG_ERROR("Failed to close command list to copy instance descs for TLAS build."); + } } - // Clear out any completed pending_acceleration_structure_build_resources. - if (!queue_info->pending_acceleration_structure_build_resources.empty()) + // Add a signal to the queue to indicate that the acceleration structure builds have completed. + if ((queue_info->acceleration_structure_build_tracking_fence != nullptr) && (highest_as_build_id > 0)) { - GFXRECON_ASSERT(queue_info->acceleration_structure_build_fence != nullptr); - auto& resources_map = queue_info->pending_acceleration_structure_build_resources; - auto completed_end = - resources_map.upper_bound(queue_info->acceleration_structure_build_fence->GetCompletedValue()); - resources_map.erase(resources_map.begin(), completed_end); + GFXRECON_ASSERT(queue_info->acceleration_structure_build_tracking_fence->GetCompletedValue() <= + highest_as_build_id); + queue->Signal(queue_info->acceleration_structure_build_tracking_fence, highest_as_build_id); } } + + // Clear out any completed entries in acceleration_structure_build_tracking_objects. + if (!queue_info->acceleration_structure_build_tracking_objects.empty()) + { + GFXRECON_ASSERT(queue_info->acceleration_structure_build_tracking_fence != nullptr); + auto& objects_map = queue_info->acceleration_structure_build_tracking_objects; + auto completed_end = + objects_map.upper_bound(queue_info->acceleration_structure_build_tracking_fence->GetCompletedValue()); + objects_map.erase(objects_map.begin(), completed_end); + } } void Dx12StateTracker::TrackResourceCreation(ID3D12Resource_Wrapper* resource_wrapper, @@ -599,6 +720,8 @@ void Dx12StateTracker::TrackBuildRaytracingAccelerationStructure( // Save build input arguments. build_info.inputs = desc->Inputs; + build_info.is_tlas_with_array_of_pointers = false; + // Save a copy of the input's geometry desc pointers. if (desc->Inputs.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL) { @@ -618,6 +741,20 @@ void Dx12StateTracker::TrackBuildRaytracingAccelerationStructure( build_info.inputs.pGeometryDescs = nullptr; build_info.inputs.ppGeometryDescs = nullptr; } + else if (desc->Inputs.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL) + { + // This code path adds support for top level AS builds where `DescsLayout == + // D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS`. Any top level AS build--regardless of DescLayout value--could + // also use this code path, but this path is newer and not as thoroughly tested so use the original code + // path where possible. + if (desc->Inputs.DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS) + { + // In order to store TLAS instance descs, use CopyRaytracingAccelerationStructure with + // D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_VISUALIZATION_DECODE_FOR_TOOLS after the build + // command list has completed. + build_info.is_tlas_with_array_of_pointers = true; + } + } // Compute the required inputs buffer size and entry information. uint64_t inputs_buffer_size = 0; @@ -625,10 +762,21 @@ void Dx12StateTracker::TrackBuildRaytracingAccelerationStructure( graphics::dx12::GetAccelerationStructureInputsBufferEntries( build_info.inputs, build_info.inputs_geometry_descs.data(), inputs_buffer_size, inputs_buffer_entries); + // TLAS builds shouldn't have more than one input buffer entry. + GFXRECON_ASSERT((desc->Inputs.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL) || + (inputs_buffer_entries.size() <= 1)); + // Save input data to a secodary resource. build_info.input_data_size = inputs_buffer_size; - if (inputs_buffer_size > 0) + if (build_info.input_data_size > 0) { + if (build_info.is_tlas_with_array_of_pointers) + { + build_info.input_data_header_size = + sizeof(D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_TOOLS_VISUALIZATION_HEADER); + build_info.input_data_size += build_info.input_data_header_size; + } + // Sort the entries by GPU VA so that entries from the same resource are contiguous. std::sort(inputs_buffer_entries.begin(), inputs_buffer_entries.end(), @@ -644,7 +792,7 @@ void Dx12StateTracker::TrackBuildRaytracingAccelerationStructure( auto existing_accel_struct = resource_info->acceleration_structure_builds.find(build_info.dest_gpu_va); if (existing_accel_struct != resource_info->acceleration_structure_builds.end()) { - if ((existing_accel_struct->second.input_data_size == inputs_buffer_size) && + if ((existing_accel_struct->second.input_data_size == build_info.input_data_size) && (!existing_accel_struct->second.was_copy_source) && (existing_accel_struct->second.copy_source_gpu_va == 0)) { @@ -656,10 +804,10 @@ void Dx12StateTracker::TrackBuildRaytracingAccelerationStructure( if (inputs_data_resource == nullptr) { inputs_data_resource = graphics::dx12::CreateBufferResource(device5, - inputs_buffer_size, - D3D12_HEAP_TYPE_READBACK, + build_info.input_data_size, + D3D12_HEAP_TYPE_DEFAULT, D3D12_RESOURCE_STATE_COPY_DEST, - D3D12_RESOURCE_FLAG_NONE); + D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); } GFXRECON_ASSERT(inputs_data_resource); build_info.input_data_resource = inputs_data_resource; @@ -674,7 +822,7 @@ void Dx12StateTracker::TrackBuildRaytracingAccelerationStructure( // Add CopyBufferRegion(s) and ResourceBarrier(s) to command list to save the build input resource data. auto curr_entry_iter = inputs_buffer_entries.begin(); auto end_entry_iter = inputs_buffer_entries.end(); - while (curr_entry_iter != end_entry_iter) + while (!build_info.is_tlas_with_array_of_pointers && curr_entry_iter != end_entry_iter) { ID3D12Resource_Wrapper* src_resource_wrapper = nullptr; { diff --git a/framework/encode/dx12_state_writer.cpp b/framework/encode/dx12_state_writer.cpp index 0beb62dc54..fabf0d9b6d 100644 --- a/framework/encode/dx12_state_writer.cpp +++ b/framework/encode/dx12_state_writer.cpp @@ -1373,26 +1373,44 @@ void Dx12StateWriter::WriteEnableDRED() void Dx12StateWriter::WriteAccelerationStructuresState(const Dx12StateTable& state_table) { std::map build_infos; + ID3D12Device_Wrapper* device_wrapper = nullptr; // Find all acceleration structures that exist on resources. + uint64_t max_inputs_buffer_size = 0; state_table.VisitWrappers([&](ID3D12Resource_Wrapper* resource_wrapper) { GFXRECON_ASSERT(resource_wrapper != nullptr); GFXRECON_ASSERT(resource_wrapper->GetObjectInfo() != nullptr); const auto resource_info = resource_wrapper->GetObjectInfo(); + if (device_wrapper == nullptr) + { + device_wrapper = resource_info->device_wrapper; + } + for (const auto& pair : resource_info->acceleration_structure_builds) { GFXRECON_ASSERT(build_infos.count(pair.second.id) == 0); build_infos[pair.second.id] = &pair.second; + max_inputs_buffer_size = std::max(max_inputs_buffer_size, pair.second.input_data_size); } }); - WriteAccelerationStructuresState(build_infos); + if (build_infos.size() > 0) + { + GFXRECON_ASSERT(device_wrapper != nullptr); + ID3D12Device* device = device_wrapper->GetWrappedObjectAs(); + GFXRECON_ASSERT(device != nullptr); + + std::unique_ptr resource_data_util = + std::make_unique(device, max_inputs_buffer_size); + WriteAccelerationStructuresState(build_infos, resource_data_util.get()); + } } void Dx12StateWriter::WriteAccelerationStructuresState( - std::map as_builds) + std::map as_builds, + graphics::Dx12ResourceDataUtil* resource_data_util) { uint64_t accel_struct_file_bytes = 0; std::set blas_addresses; @@ -1418,9 +1436,27 @@ void Dx12StateWriter::WriteAccelerationStructuresState( } uint8_t* inputs_data_ptr = nullptr; - HRESULT hr = graphics::dx12::MapSubresource(as_build.input_data_resource, 0, nullptr, inputs_data_ptr); - if (SUCCEEDED(hr) && (inputs_data_ptr != nullptr)) + temp_subresource_data_.clear(); + temp_subresource_sizes_.clear(); + temp_subresource_offsets_.clear(); + + // Read the build inputs data from the resource. + HRESULT hr = resource_data_util->ReadFromResource( + as_build.input_data_resource, + false, + { { D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_BARRIER_FLAG_NONE } }, + { { D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_BARRIER_FLAG_NONE } }, + temp_subresource_data_, + temp_subresource_offsets_, + temp_subresource_sizes_); + + if (SUCCEEDED(hr)) { + GFXRECON_ASSERT(temp_subresource_sizes_.size() == 1); + GFXRECON_ASSERT(temp_subresource_sizes_[0] == as_build.input_data_size); + + inputs_data_ptr = temp_subresource_data_.data() + as_build.input_data_header_size; + // Check that the instance desc addresses are correct for TLAS. if (as_build.inputs.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL) { @@ -1498,7 +1534,7 @@ void Dx12StateWriter::WriteAccelerationStructuresState( cmd.inputs_num_geometry_descs = 0; if (write_build_data) { - cmd.inputs_data_size = as_build.input_data_size; + cmd.inputs_data_size = as_build.input_data_size - as_build.input_data_header_size; if (as_build.inputs.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL) { cmd.inputs_num_geometry_descs = as_build.inputs.NumDescs; @@ -1512,15 +1548,15 @@ void Dx12StateWriter::WriteAccelerationStructuresState( GFXRECON_ASSERT(false && "Invalid D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE."); } - GFXRECON_CHECK_CONVERSION_DATA_LOSS(size_t, as_build.input_data_size); - inputs_data_ptr_file_size = static_cast(as_build.input_data_size); + GFXRECON_CHECK_CONVERSION_DATA_LOSS(size_t, cmd.inputs_data_size); + inputs_data_ptr_file_size = static_cast(cmd.inputs_data_size); if (compressor_ != nullptr) { // Compress block data. size_t compressed_size = compressor_->Compress(inputs_data_ptr_file_size, inputs_data_ptr, &compressed_parameter_buffer_, 0); - if ((compressed_size > 0) && (compressed_size < as_build.input_data_size)) + if ((compressed_size > 0) && (compressed_size < cmd.inputs_data_size)) { cmd.meta_header.block_header.type = format::BlockType::kCompressedMetaDataBlock; @@ -1578,7 +1614,6 @@ void Dx12StateWriter::WriteAccelerationStructuresState( // Write inputs data. output_stream_->Write(inputs_data_ptr, inputs_data_ptr_file_size); accel_struct_file_bytes += inputs_data_ptr_file_size; - as_build.input_data_resource->Unmap(0, nullptr); } // Track which accel struct addresses have been written to the trim state block. diff --git a/framework/encode/dx12_state_writer.h b/framework/encode/dx12_state_writer.h index 579c5fcdbe..96969a1256 100644 --- a/framework/encode/dx12_state_writer.h +++ b/framework/encode/dx12_state_writer.h @@ -172,7 +172,8 @@ class Dx12StateWriter void WriteAccelerationStructuresState(const Dx12StateTable& state_table); - void WriteAccelerationStructuresState(std::map build_infos); + void WriteAccelerationStructuresState(std::map build_infos, + graphics::Dx12ResourceDataUtil* resource_data_util); void WriteStateObjectsState(const Dx12StateTable& state_table); diff --git a/framework/graphics/dx12_util.cpp b/framework/graphics/dx12_util.cpp index aebde29d77..658f4c0209 100644 --- a/framework/graphics/dx12_util.cpp +++ b/framework/graphics/dx12_util.cpp @@ -384,25 +384,16 @@ void GetAccelerationStructureInputsBufferEntries(D3D12_BUILD_RAYTRACING_ACCELERA } else if (inputs_desc.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL) { - if (inputs_desc.DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY) + if (inputs_desc.NumDescs > 0) { - if (inputs_desc.NumDescs > 0) - { - GFXRECON_ASSERT(inputs_desc.InstanceDescs != 0); - - inputs_buffer_size = inputs_desc.NumDescs * sizeof(D3D12_RAYTRACING_INSTANCE_DESC); - InputsBufferEntry entry{}; - entry.desc_gpu_va = &inputs_desc.InstanceDescs; - entry.offset = 0; - entry.size = inputs_buffer_size; - entries.push_back(entry); - } - } - else - { - GFXRECON_LOG_ERROR( - "Unsupported instance descs layout (DescsLayout=%d) used in BuildRaytracingAccelerationStructure.", - inputs_desc.DescsLayout); + GFXRECON_ASSERT(inputs_desc.InstanceDescs != 0); + + inputs_buffer_size = inputs_desc.NumDescs * sizeof(D3D12_RAYTRACING_INSTANCE_DESC); + InputsBufferEntry entry{}; + entry.desc_gpu_va = &inputs_desc.InstanceDescs; + entry.offset = 0; + entry.size = inputs_buffer_size; + entries.push_back(entry); } } else