From efc2d757803c8d66a4872706ff1f43d0e77ef89c Mon Sep 17 00:00:00 2001 From: chuang13 Date: Thu, 21 Dec 2023 16:14:10 +0800 Subject: [PATCH] Update xgl from commit daeb5249 * Update PAL Version in XGL 841 * Update Khronos Vulkan Headers to 1.3.271 * Update to new DevDriver Settings * Bump Gpurt Version to 41 * Bump LLPC version to 69 * Generate debug file amdvlk64.so.debug to debug with release driver * Power Optimization for ML workloads * Alert, not assert, when ML power opt fails * Add ImageType PAL DMA constraint in XGL * Add setting for debug printf buffer creation * Rename OverrideNumVGPRsAvailable to camelCase * Simplify VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT * Bypass MALL when device_coherent_memory extension is used * Export "NV_COMPUTE_SHADER_DERIVATIVES" for vkd3d engine to run DX12 games on steam * Cleanup VKI_BUILD_NAVI31 and VKI_BUILD_GFX11 * Cleanup VariableName field in settings_xgl.json * Remove ICD_BUILD_{LLPC,SPV}ONLY * Remove the option 'enableRobustUberFetchShader' * Use core names for dynamic rendering instead of KHR names * Pass option cpsFlag to LLPC * Force PwsMode::NoLateAcquirePoint for GfxIp11 and BG3 * Improve CPU performance in QueuePresent * Move ac01WaNotNeeded to UpdatePalSettings * CS2 - App detect for waDisableAc01 on NV3x * CS2 optimizations * Fix incorrect attachment clears for dynamic rendering * Fix pipeline creation failures for mesh shader pipelines during renderdoc replay * Fix graphics pipeline library * Fix RenderPassClearUpfront OOM handling * Fix memory leak and delay BIL conversion again * Fix crash when image is blit via gpu_decode_layer * Fix warning about ignored 'const' qualifier * Fix compilation failure due to designated initializers * Fix RT Apps fail with Wave32 --- CMakeLists.txt | 9 - cmake/XglOptions.cmake | 1 - cmake/XglOverrides.cmake | 5 - cmake/XglVersions.cmake | 6 +- icd/CMakeLists.txt | 31 +- icd/Loader/LunarG/Lnx/amd-icd.json | 4 +- icd/api/app_profile.cpp | 17 +- icd/api/app_shader_optimizer.cpp | 13 +- icd/api/appopt/gpu_decode_layer.cpp | 16 +- icd/api/compiler_solution_llpc.cpp | 1 + icd/api/debug_printf.cpp | 5 +- icd/api/entry.cpp | 4 +- icd/api/graphics_pipeline_common.cpp | 34 +- icd/api/include/app_profile.h | 1 + .../khronos/sdk-1.3/vulkan/vulkan_core.h | 27 +- icd/api/include/vk_cmdbuffer.h | 18 +- icd/api/include/vk_conv.h | 42 +- icd/api/include/vk_device.h | 1 + icd/api/include/vk_extensions.h | 1 + icd/api/include/vk_framebuffer.h | 2 +- icd/api/include/vk_semaphore.h | 1 + icd/api/pipeline_compiler.cpp | 56 +- icd/api/raytrace/ray_tracing_device.cpp | 14 +- icd/api/renderpass/renderpass_builder.cpp | 14 +- icd/api/strings/extensions.txt | 1 + icd/api/vk_buffer.cpp | 2 +- icd/api/vk_cmdbuffer.cpp | 104 ++-- icd/api/vk_cmdbuffer_transfer.cpp | 4 +- icd/api/vk_device.cpp | 20 +- icd/api/vk_fence.cpp | 6 +- icd/api/vk_framebuffer.cpp | 2 +- icd/api/vk_graphics_pipeline_library.cpp | 7 +- icd/api/vk_image.cpp | 18 +- icd/api/vk_instance.cpp | 4 +- icd/api/vk_memory.cpp | 3 +- icd/api/vk_physical_device.cpp | 63 +- icd/api/vk_physical_device_manager.cpp | 3 +- icd/api/vk_queue.cpp | 11 +- icd/api/vk_semaphore.cpp | 6 +- icd/api/vk_swapchain.cpp | 22 +- icd/res/ver.h | 4 +- icd/settings/settings.cpp | 178 ++++-- icd/settings/settings.h | 38 +- icd/settings/settings_xgl.json | 536 ++++++++---------- 44 files changed, 732 insertions(+), 623 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 33ab615f..43739b50 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,13 +103,6 @@ endif() # for intellisense used by VScode, YouCompleteMe, etc set(CMAKE_EXPORT_COMPILE_COMMANDS 1) -# Strip the symbols for release build -if(UNIX) - if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - string(APPEND CMAKE_SHARED_LINKER_FLAGS_RELEASE " -s") - endif() -endif() - ### Set Compiler ###################################################################################################### xgl_set_compiler() @@ -309,9 +302,7 @@ else() endif() # icd -if(NOT ICD_BUILD_LLPCONLY) add_subdirectory(icd) -endif() # VKGC compiler add_subdirectory(${XGL_VKGC_PATH} ${CMAKE_BINARY_DIR}/compiler) diff --git a/cmake/XglOptions.cmake b/cmake/XglOptions.cmake index 4000f672..a788820f 100644 --- a/cmake/XglOptions.cmake +++ b/cmake/XglOptions.cmake @@ -78,7 +78,6 @@ macro(xgl_options) #endif option(ICD_BUILD_LLPC "Build LLPC?" ON) - option(ICD_BUILD_LLPCONLY "Build LLPC Only?" OFF) option(XGL_LLVM_UPSTREAM "Build with upstreamed LLVM?" OFF) diff --git a/cmake/XglOverrides.cmake b/cmake/XglOverrides.cmake index 042315f9..5bbefe76 100644 --- a/cmake/XglOverrides.cmake +++ b/cmake/XglOverrides.cmake @@ -206,11 +206,6 @@ macro(xgl_overrides) set(XGL_BUILD_TOOLS ON CACHE BOOL "XGL_BUILD_TOOLS override by XGL_BUILD_TESTS." FORCE) endif() - if(ICD_BUILD_LLPCONLY) - set(ICD_BUILD_LLPC ON CACHE BOOL "ICD_BUILD_LLPC override." FORCE) - set(XGL_BUILD_TOOLS ON CACHE BOOL "XGL_BUILD_TOOLS override by ICD_BUILD_LLPCONLY." FORCE) - endif() - if(NOT ICD_BUILD_LLPC) set(XGL_LLVM_UPSTREAM OFF CACHE BOOL "XGL_LLVM_UPSTREAM is overrided to false." FORCE) endif() diff --git a/cmake/XglVersions.cmake b/cmake/XglVersions.cmake index 35657268..65022cf9 100644 --- a/cmake/XglVersions.cmake +++ b/cmake/XglVersions.cmake @@ -28,7 +28,7 @@ include_guard() # This will become the value of PAL_CLIENT_INTERFACE_MAJOR_VERSION. It describes the version of the PAL interface # that the ICD supports. PAL uses this value to enable backwards-compatibility for older interface versions. # It must be updated on each PAL promotion after handling all of the interface changes described in palLib.h. -set(ICD_PAL_CLIENT_MAJOR_VERSION "834") +set(ICD_PAL_CLIENT_MAJOR_VERSION "841") # This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION if ICD_GPUOPEN_DEVMODE_BUILD=1. # It describes the interface version of the gpuopen shared module (part of PAL) that the ICD supports. @@ -37,9 +37,9 @@ set(ICD_GPUOPEN_CLIENT_MAJOR_VERSION "42") #if VKI_RAY_TRACING # This will become the value of GPURT_CLIENT_INTERFACE_MAJOR_VERSION if VKI_RAY_TRACING=1. # It describes the interface version of the GpuRT shared module that the ICD supports. -set(ICD_GPURT_CLIENT_MAJOR_VERSION "40") +set(ICD_GPURT_CLIENT_MAJOR_VERSION "41") #endif # This will become the value of LLPC_CLIENT_INTERFACE_MAJOR_VERSION if ICD_BUILD_LLPC=1. # It describes the version of the interface version of LLPC that the ICD supports. -set(ICD_LLPC_CLIENT_MAJOR_VERSION "68") +set(ICD_LLPC_CLIENT_MAJOR_VERSION "69") diff --git a/icd/CMakeLists.txt b/icd/CMakeLists.txt index 8589d2c9..d23cf120 100644 --- a/icd/CMakeLists.txt +++ b/icd/CMakeLists.txt @@ -261,19 +261,26 @@ target_sources(xgl PRIVATE api/strings/strings.cpp) ### ICD Auto-generated Settings Files ######################################### # ICD settings code generation main script -set(ICD_GEN_SETTINGS ${ICD_GENDIR}/genSettingsCode.py) -set(ICD_GEN_SETTINGS_FILES ${ICD_GEN_SETTINGS} ${ICD_GENDIR}/vulkanSettingsCodeTemplates.py) +set(XGL_DEVDRIVER_PATH ${XGL_PAL_PATH}/shared/devdriver) +set(ICD_DD_GENDIR ${XGL_DEVDRIVER_PATH}/apis/settings/codegen) -set(ICD_SETTINGS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/settings) +set(ICD_GEN_SETTINGS ${ICD_DD_GENDIR}/settings_codegen.py) -set(PAL_GENDIR ${XGL_PAL_PATH}/tools/generate) +set(ICD_GEN_SETTINGS_FILES ${ICD_GEN_SETTINGS}) -set(COMPONENT_NAME vulkan) +set(ICD_SETTINGS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/settings) add_custom_command( OUTPUT ${ICD_SETTINGS_DIR}/g_settings.cpp ${ICD_SETTINGS_DIR}/g_settings.h - COMMAND ${PYTHON_CMD} ${ICD_GEN_SETTINGS} ${XGL_PAL_PATH} ${COMPONENT_NAME} + COMMAND ${PYTHON_CMD} ${ICD_GEN_SETTINGS} + -i ${ICD_SETTINGS_DIR}/settings_xgl.json + -o ${ICD_SETTINGS_DIR} + -g settings + -s settings/settings.h + --namespaces vk + --settings-struct-name RuntimeSettings + --include-headers pal.h palImage.h DEPENDS ${ICD_GEN_SETTINGS_FILES} ${ICD_SETTINGS_DIR}/settings_xgl.json COMMENT "Generating Vulkan settings code from settings_xgl.json" ) @@ -381,6 +388,18 @@ target_link_libraries(xgl_cache_support INTERFACE pal) target_link_libraries(xgl PRIVATE xgl_cache_support) +# Strip the symbols for release build, generate debug file +if(UNIX) + if("${CMAKE_BUILD_TYPE}" STREQUAL "Release") + add_custom_command( + TARGET xgl POST_BUILD + COMMAND objcopy --only-keep-debug ${CMAKE_CURRENT_BINARY_DIR}/${ICD_TARGET}.so ${CMAKE_CURRENT_BINARY_DIR}/${ICD_TARGET}.so.debug + COMMAND strip ${CMAKE_CURRENT_BINARY_DIR}/${ICD_TARGET}.so + COMMAND objcopy --add-gnu-debuglink=${CMAKE_CURRENT_BINARY_DIR}/${ICD_TARGET}.so.debug ${CMAKE_CURRENT_BINARY_DIR}/${ICD_TARGET}.so + ) + endif() +endif() + ### ICD loader configuration ########################################################################################### if(UNIX) include(GNUInstallDirs) diff --git a/icd/Loader/LunarG/Lnx/amd-icd.json b/icd/Loader/LunarG/Lnx/amd-icd.json index 07e518d3..39860a5a 100644 --- a/icd/Loader/LunarG/Lnx/amd-icd.json +++ b/icd/Loader/LunarG/Lnx/amd-icd.json @@ -2,13 +2,13 @@ "file_format_version": "1.0.0", "ICD": { "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.269" + "api_version": "1.3.271" }, "layer": { "name": "VK_LAYER_AMD_switchable_graphics_@ISABITS@", "type": "GLOBAL", "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.269", + "api_version": "1.3.271", "implementation_version": "1", "description": "AMD switchable graphics layer", "functions": { diff --git a/icd/api/app_profile.cpp b/icd/api/app_profile.cpp index 8449e4d6..c55643ee 100644 --- a/icd/api/app_profile.cpp +++ b/icd/api/app_profile.cpp @@ -406,18 +406,13 @@ constexpr AppProfilePatternEntry AppNameSpidermanRemastered = "spider-man.exe" }; -#if VKI_RAY_TRACING constexpr AppProfilePatternEntry AppEngineVKD3D = { PatternEngineNameLower, - Util::MetroHash::Hash{{{ - 0x32778d0a, - 0x05b56a84, - 0x8f0c25bc, - 0x1d75f3eb - }}} + "vkd3d" }; +#if VKI_RAY_TRACING constexpr AppProfilePatternEntry AppNameControlDX12 = { PatternAppNameLower, @@ -1435,6 +1430,14 @@ AppProfilePattern AppPatternTable[] = AppNameEnscape, PatternEnd } + }, + + { + AppProfile::Vkd3dEngine, + { + AppEngineVKD3D, + PatternEnd + } } }; diff --git a/icd/api/app_shader_optimizer.cpp b/icd/api/app_shader_optimizer.cpp index 739969e1..7fe347ce 100644 --- a/icd/api/app_shader_optimizer.cpp +++ b/icd/api/app_shader_optimizer.cpp @@ -903,10 +903,10 @@ void ShaderOptimizer::BuildTuningProfile() pPattern->codeHash.lower = m_settings.overrideShaderHashLower; pPattern->codeHash.upper = m_settings.overrideShaderHashUpper; - if (m_settings.overrideNumVGPRsAvailable != 0) + if (m_settings.overrideNumVgprsAvailable != 0) { pAction->shaderCreate.apply.vgprLimit = true; - pAction->shaderCreate.tuningOptions.vgprLimit = m_settings.overrideNumVGPRsAvailable; + pAction->shaderCreate.tuningOptions.vgprLimit = m_settings.overrideNumVgprsAvailable; } if (m_settings.overrideMaxLdsSpillDwords != 0) @@ -1180,15 +1180,6 @@ void ShaderOptimizer::BuildAppProfileLlpc() pEntry->action.shaders[ShaderStage::ShaderStageVertex].shaderCreate.tuningOptions.disableFastMathFlags = 8u | 32u; } - if (appProfile == AppProfile::CSGO) - { - i = m_appProfile.entryCount++; - PipelineProfileEntry *pEntry = &m_appProfile.pEntries[i]; - pEntry->pattern.match.always = true; - pEntry->action.shaders[ShaderStage::ShaderStageFragment].shaderCreate.apply.disableFastMathFlags = true; - pEntry->action.shaders[ShaderStage::ShaderStageFragment].shaderCreate.tuningOptions.disableFastMathFlags = 32u; - } - if (appProfile == AppProfile::WarHammerIII) { i = m_appProfile.entryCount++; diff --git a/icd/api/appopt/gpu_decode_layer.cpp b/icd/api/appopt/gpu_decode_layer.cpp index 362cb9d7..a8d695ae 100755 --- a/icd/api/appopt/gpu_decode_layer.cpp +++ b/icd/api/appopt/gpu_decode_layer.cpp @@ -545,7 +545,7 @@ static VkResult gpuBlitImage( uint32_t maxObj = pCmdBuffer->EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ImageCopyRegion)); VkFormat dstFormat = pDstImage->GetFormat(); - if(settings.enableBC3Encoder) + if(settings.enableBc3Encoder) { if (type == GpuTexDecoder::InternalTexConvertCsType::ConvertRGBA8ToBc3) { @@ -661,7 +661,7 @@ static VkResult gpuBlitBuffer( GpuTexDecoder::CompileTimeConstants constInfo = {}; Pal::SwizzledFormat sourceViewFormat = {}; - if(settings.enableBC3Encoder) + if(settings.enableBc3Encoder) { if (type != GpuTexDecoder::InternalTexConvertCsType::ConvertETC2ToBc3) { @@ -722,8 +722,8 @@ static VkResult gpuBlitBuffer( pPalRegions[i] = VkToPalMemoryImageCopyRegion( pRegions[regionIdx + i], dstSwzFormat.format, - pDstImage->GetArraySize(), plane, + pDstImage->GetArraySize(), pSrcBuffer->MemOffset()); } @@ -765,7 +765,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdCopyImage( if (Formats::IsASTCFormat(pDstImage->GetFormat())) { - if (settings.enableBC3Encoder != 0) + if (settings.enableBc3Encoder != 0) { // ASTC one step convert to BC3 haven't implemented, so force two steps if Bc3 encoder is enabled twoStepsOp = true; @@ -774,7 +774,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdCopyImage( } else if (Formats::IsEtc2Format(pDstImage->GetFormat())) { - switch (settings.enableBC3Encoder) + switch (settings.enableBc3Encoder) { case 0: convType = GpuTexDecoder::InternalTexConvertCsType::ConvertETC2ToRGBA8; @@ -896,7 +896,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdCopyBufferToImage( if (Formats::IsASTCFormat(pDstImage->GetFormat())) { - if (settings.enableBC3Encoder != 0) + if (settings.enableBc3Encoder != 0) { // ASTC one step convert to BC3 haven't implemented, so force two steps if Bc3 encoder is enabled twoStepsOp = true; @@ -905,7 +905,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdCopyBufferToImage( } else if (Formats::IsEtc2Format(pDstImage->GetFormat())) { - switch (settings.enableBC3Encoder) + switch (settings.enableBc3Encoder) { case 0: convType = GpuTexDecoder::InternalTexConvertCsType::ConvertETC2ToRGBA8; @@ -1101,7 +1101,7 @@ VKAPI_ATTR void VKAPI_CALL vkDestroyImage( GpuDecoderLayer* pDecodeWrapper = pDevice->GetGpuDecoderLayer(); const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); - if (settings.enableBC3Encoder) + if (settings.enableBc3Encoder) { pDevice->GetGpuDecoderLayer()->ClearStagingResources(image); } diff --git a/icd/api/compiler_solution_llpc.cpp b/icd/api/compiler_solution_llpc.cpp index 4ca0f430..11909c0a 100644 --- a/icd/api/compiler_solution_llpc.cpp +++ b/icd/api/compiler_solution_llpc.cpp @@ -192,6 +192,7 @@ void CompilerSolutionLlpc::FreeShaderModule(ShaderModuleHandle* pShaderModule) auto pInstance = m_pPhysicalDevice->Manager()->VkInstance(); pInstance->FreeMem(pShaderModule->pLlpcShaderModule); + pShaderModule->pLlpcShaderModule = nullptr; } // ===================================================================================================================== diff --git a/icd/api/debug_printf.cpp b/icd/api/debug_printf.cpp index 13a5c8ed..fd7df8c6 100644 --- a/icd/api/debug_printf.cpp +++ b/icd/api/debug_printf.cpp @@ -98,7 +98,10 @@ void DebugPrintf::BindPipeline( allocInfo.pal.size = Util::Pow2Align(settings.debugPrintfBufferSize, PAL_PAGE_BYTES); allocInfo.pal.alignment = PAL_PAGE_BYTES; allocInfo.pal.priority = Pal::GpuMemPriority::Normal; - pDevice->MemMgr()->GetCommonPool(InternalPoolCpuCacheableGpuUncached, &allocInfo); + InternalSubAllocPool bufferType = settings.enableHangOutput ? + InternalPoolCpuCacheableGpuUncached : InternalPoolDebugCpuRead; + + pDevice->MemMgr()->GetCommonPool(bufferType, &allocInfo); VkResult result = pDevice->MemMgr()->AllocGpuMem(allocInfo, &m_printfMemory, pDevice->GetPalDeviceMask(), diff --git a/icd/api/entry.cpp b/icd/api/entry.cpp index a84a301d..0a731e7a 100644 --- a/icd/api/entry.cpp +++ b/icd/api/entry.cpp @@ -1219,8 +1219,8 @@ VKAPI_ATTR void VKAPI_CALL vkCmdWriteBufferMarker2AMD( // ===================================================================================================================== VKAPI_ATTR void VKAPI_CALL vkCmdBeginRendering( - VkCommandBuffer commandBuffer, - const VkRenderingInfoKHR* pRenderingInfo) + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo) { ApiCmdBuffer::ObjectFromHandle(commandBuffer)->BeginRendering(pRenderingInfo); } diff --git a/icd/api/graphics_pipeline_common.cpp b/icd/api/graphics_pipeline_common.cpp index 93b6a8f1..61626720 100644 --- a/icd/api/graphics_pipeline_common.cpp +++ b/icd/api/graphics_pipeline_common.cpp @@ -329,9 +329,9 @@ void GraphicsPipelineCommon::GetSubpassSampleCount( // ===================================================================================================================== static VkFormat GetDepthFormat( - const RenderPass* pRenderPass, - const uint32_t subpassIndex, - const VkPipelineRenderingCreateInfoKHR* pPipelineRenderingCreateInfoKHR + const RenderPass* pRenderPass, + const uint32_t subpassIndex, + const VkPipelineRenderingCreateInfo* pPipelineRenderingCreateInfo ) { VkFormat format = VK_FORMAT_UNDEFINED; @@ -340,11 +340,11 @@ static VkFormat GetDepthFormat( { format = pRenderPass->GetDepthStencilAttachmentFormat(subpassIndex); } - else if (pPipelineRenderingCreateInfoKHR != nullptr) + else if (pPipelineRenderingCreateInfo != nullptr) { - format = (pPipelineRenderingCreateInfoKHR->depthAttachmentFormat != VK_FORMAT_UNDEFINED) ? - pPipelineRenderingCreateInfoKHR->depthAttachmentFormat : - pPipelineRenderingCreateInfoKHR->stencilAttachmentFormat; + format = (pPipelineRenderingCreateInfo->depthAttachmentFormat != VK_FORMAT_UNDEFINED) ? + pPipelineRenderingCreateInfo->depthAttachmentFormat : + pPipelineRenderingCreateInfo->stencilAttachmentFormat; } return format; @@ -352,13 +352,13 @@ static VkFormat GetDepthFormat( // ===================================================================================================================== static uint32_t GetColorAttachmentCount( - const RenderPass* pRenderPass, - const uint32_t subpassIndex, - const VkPipelineRenderingCreateInfoKHR* pPipelineRenderingCreateInfoKHR + const RenderPass* pRenderPass, + const uint32_t subpassIndex, + const VkPipelineRenderingCreateInfo* pPipelineRenderingCreateInfo ) { return (pRenderPass != nullptr) ? pRenderPass->GetSubpassColorReferenceCount(subpassIndex) : - (pPipelineRenderingCreateInfoKHR != nullptr) ? pPipelineRenderingCreateInfoKHR->colorAttachmentCount : + (pPipelineRenderingCreateInfo != nullptr) ? pPipelineRenderingCreateInfo->colorAttachmentCount : 0u; } @@ -1537,7 +1537,7 @@ static void BuildDepthStencilState( // ===================================================================================================================== static void BuildColorBlendState( const Device* pDevice, - const VkPipelineRenderingCreateInfoKHR* pRendering, + const VkPipelineRenderingCreateInfo* pRendering, const VkPipelineColorBlendStateCreateInfo* pCb, const RenderPass* pRenderPass, const uint32_t subpass, @@ -1705,7 +1705,7 @@ static void BuildColorBlendState( // ===================================================================================================================== static void BuildRenderingState( const Device* pDevice, - const VkPipelineRenderingCreateInfoKHR* pRendering, + const VkPipelineRenderingCreateInfo* pRendering, const VkPipelineColorBlendStateCreateInfo* pCb, const RenderPass* pRenderPass, GraphicsPipelineObjectCreateInfo* pInfo) @@ -1750,7 +1750,7 @@ static void BuildVertexInputInterfaceState( pInfo->immedInfo.inputAssemblyState.topology = Pal::PrimitiveTopology::TriangleList; pInfo->pipeline.iaState.topologyInfo.primitiveType = Pal::PrimitiveType::Triangle; - if (pIa != nullptr) + if ((pIa != nullptr) && (hasMesh == false)) { pInfo->immedInfo.inputAssemblyState.topology = VkToPalPrimitiveTopology(pIa->topology); pInfo->pipeline.iaState.topologyInfo.primitiveType = VkToPalPrimitiveType(pIa->topology); @@ -1923,7 +1923,7 @@ static void BuildFragmentOutputInterfaceState( EXTRACT_VK_STRUCTURES_0( renderingCreateInfo, PipelineRenderingCreateInfoKHR, - static_cast(pIn->pNext), + static_cast(pIn->pNext), PIPELINE_RENDERING_CREATE_INFO_KHR); pInfo->dbFormat = GetDepthFormat(pRenderPass, subpass, pPipelineRenderingCreateInfoKHR); @@ -2806,7 +2806,7 @@ void GraphicsPipelineCommon::GenerateHashForFragmentShaderState( EXTRACT_VK_STRUCTURES_0( renderingCreateInfo, PipelineRenderingCreateInfoKHR, - static_cast(pCreateInfo->pNext), + static_cast(pCreateInfo->pNext), PIPELINE_RENDERING_CREATE_INFO_KHR); if ((pCreateInfo->pDepthStencilState != nullptr) && @@ -2839,7 +2839,7 @@ void GraphicsPipelineCommon::GenerateHashForFragmentOutputInterfaceState( EXTRACT_VK_STRUCTURES_0( renderingCreateInfo, PipelineRenderingCreateInfoKHR, - static_cast(pCreateInfo->pNext), + static_cast(pCreateInfo->pNext), PIPELINE_RENDERING_CREATE_INFO_KHR); uint32 colorAttachmentCount = 0; diff --git a/icd/api/include/app_profile.h b/icd/api/include/app_profile.h index 42c09c89..dd47f6af 100644 --- a/icd/api/include/app_profile.h +++ b/icd/api/include/app_profile.h @@ -138,6 +138,7 @@ enum class AppProfile : uint32_t TheSurge2, // The Surge 2 AsyncPostProcessLVr, // AsyncPostProcessing sample app for AMD Liquid VR SDK Enscape, // Enscape by Chaos + Vkd3dEngine, // vkd3d-proton for steam games }; struct ProfileSettings diff --git a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h index 904ac6fc..bc0949c8 100644 --- a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h +++ b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h @@ -69,7 +69,7 @@ extern "C" { #define VK_API_VERSION_1_0 VK_MAKE_API_VERSION(0, 1, 0, 0)// Patch version should always be set to 0 // Version of this file -#define VK_HEADER_VERSION 269 +#define VK_HEADER_VERSION 271 // Complete version of this file #define VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION(0, 1, 3, VK_HEADER_VERSION) @@ -611,6 +611,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT = 1000102000, VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT = 1000102001, VK_STRUCTURE_TYPE_HDR_METADATA_EXT = 1000105000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RELAXED_LINE_RASTERIZATION_FEATURES_IMG = 1000110000, VK_STRUCTURE_TYPE_SHARED_PRESENT_SURFACE_CAPABILITIES_KHR = 1000111000, VK_STRUCTURE_TYPE_IMPORT_FENCE_WIN32_HANDLE_INFO_KHR = 1000114000, VK_STRUCTURE_TYPE_EXPORT_FENCE_WIN32_HANDLE_INFO_KHR = 1000114001, @@ -10778,8 +10779,8 @@ typedef enum VkDebugReportObjectTypeEXT { VK_DEBUG_REPORT_OBJECT_TYPE_CU_FUNCTION_NVX_EXT = 1000029001, VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR_EXT = 1000150000, VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV_EXT = 1000165000, - VK_DEBUG_REPORT_OBJECT_TYPE_CUDA_MODULE_NV = 1000307000, - VK_DEBUG_REPORT_OBJECT_TYPE_CUDA_FUNCTION_NV = 1000307001, + VK_DEBUG_REPORT_OBJECT_TYPE_CUDA_MODULE_NV_EXT = 1000307000, + VK_DEBUG_REPORT_OBJECT_TYPE_CUDA_FUNCTION_NV_EXT = 1000307001, VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_COLLECTION_FUCHSIA_EXT = 1000366000, VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT, VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT, @@ -11913,6 +11914,18 @@ VKAPI_ATTR void VKAPI_CALL vkSetHdrMetadataEXT( #endif +// VK_IMG_relaxed_line_rasterization is a preprocessor guard. Do not pass it to API calls. +#define VK_IMG_relaxed_line_rasterization 1 +#define VK_IMG_RELAXED_LINE_RASTERIZATION_SPEC_VERSION 1 +#define VK_IMG_RELAXED_LINE_RASTERIZATION_EXTENSION_NAME "VK_IMG_relaxed_line_rasterization" +typedef struct VkPhysicalDeviceRelaxedLineRasterizationFeaturesIMG { + VkStructureType sType; + void* pNext; + VkBool32 relaxedLineRasterization; +} VkPhysicalDeviceRelaxedLineRasterizationFeaturesIMG; + + + // VK_EXT_external_memory_dma_buf is a preprocessor guard. Do not pass it to API calls. #define VK_EXT_external_memory_dma_buf 1 #define VK_EXT_EXTERNAL_MEMORY_DMA_BUF_SPEC_VERSION 1 @@ -17278,7 +17291,7 @@ typedef struct VkDirectDriverLoadingInfoLUNARG { typedef struct VkDirectDriverLoadingListLUNARG { VkStructureType sType; - void* pNext; + const void* pNext; VkDirectDriverLoadingModeLUNARG mode; uint32_t driverCount; const VkDirectDriverLoadingInfoLUNARG* pDrivers; @@ -17777,7 +17790,7 @@ typedef struct VkPhysicalDeviceDynamicRenderingUnusedAttachmentsFeaturesEXT { // VK_NV_low_latency2 is a preprocessor guard. Do not pass it to API calls. #define VK_NV_low_latency2 1 -#define VK_NV_LOW_LATENCY_2_SPEC_VERSION 1 +#define VK_NV_LOW_LATENCY_2_SPEC_VERSION 2 #define VK_NV_LOW_LATENCY_2_EXTENSION_NAME "VK_NV_low_latency2" typedef enum VkLatencyMarkerNV { @@ -17845,6 +17858,7 @@ typedef struct VkLatencyTimingsFrameReportNV { typedef struct VkGetLatencyMarkerInfoNV { VkStructureType sType; const void* pNext; + uint32_t timingCount; VkLatencyTimingsFrameReportNV* pTimings; } VkGetLatencyMarkerInfoNV; @@ -17876,7 +17890,7 @@ typedef struct VkLatencySurfaceCapabilitiesNV { typedef VkResult (VKAPI_PTR *PFN_vkSetLatencySleepModeNV)(VkDevice device, VkSwapchainKHR swapchain, const VkLatencySleepModeInfoNV* pSleepModeInfo); typedef VkResult (VKAPI_PTR *PFN_vkLatencySleepNV)(VkDevice device, VkSwapchainKHR swapchain, const VkLatencySleepInfoNV* pSleepInfo); typedef void (VKAPI_PTR *PFN_vkSetLatencyMarkerNV)(VkDevice device, VkSwapchainKHR swapchain, const VkSetLatencyMarkerInfoNV* pLatencyMarkerInfo); -typedef void (VKAPI_PTR *PFN_vkGetLatencyTimingsNV)(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pTimingCount, VkGetLatencyMarkerInfoNV* pLatencyMarkerInfo); +typedef void (VKAPI_PTR *PFN_vkGetLatencyTimingsNV)(VkDevice device, VkSwapchainKHR swapchain, VkGetLatencyMarkerInfoNV* pLatencyMarkerInfo); typedef void (VKAPI_PTR *PFN_vkQueueNotifyOutOfBandNV)(VkQueue queue, const VkOutOfBandQueueTypeInfoNV* pQueueTypeInfo); #ifndef VK_NO_PROTOTYPES @@ -17898,7 +17912,6 @@ VKAPI_ATTR void VKAPI_CALL vkSetLatencyMarkerNV( VKAPI_ATTR void VKAPI_CALL vkGetLatencyTimingsNV( VkDevice device, VkSwapchainKHR swapchain, - uint32_t* pTimingCount, VkGetLatencyMarkerInfoNV* pLatencyMarkerInfo); VKAPI_ATTR void VKAPI_CALL vkQueueNotifyOutOfBandNV( diff --git a/icd/api/include/vk_cmdbuffer.h b/icd/api/include/vk_cmdbuffer.h index bfef652f..a8e1676b 100644 --- a/icd/api/include/vk_cmdbuffer.h +++ b/icd/api/include/vk_cmdbuffer.h @@ -814,7 +814,7 @@ class CmdBuffer const VkDependencyInfoKHR* pDependencyInfo); void BeginRendering( - const VkRenderingInfoKHR* pRenderingInfo); + const VkRenderingInfo* pRenderingInfo); void EndRendering(); @@ -1358,7 +1358,7 @@ class CmdBuffer } #endif - const uint32_t GetPipelineScratchSize(uint32_t deviceIdx) const; + uint32_t GetPipelineScratchSize(uint32_t deviceIdx) const; void BindDescriptorBuffers( uint32_t bufferCount, @@ -1527,7 +1527,7 @@ class CmdBuffer void RPSyncPostLoadOpColorClear(); void BindTargets( - const VkRenderingInfoKHR* pRenderingInfo, + const VkRenderingInfo* pRenderingInfo, const VkRenderingFragmentShadingRateAttachmentInfoKHR* pRenderingFragmentShadingRateAttachmentInfoKHR); void ResolveImage( @@ -1536,11 +1536,11 @@ class CmdBuffer void LoadOpClearColor( const Pal::Rect* pDeviceGroupRenderArea, - const VkRenderingInfoKHR* pRenderingInfo); + const VkRenderingInfo* pRenderingInfo); void LoadOpClearDepthStencil( const Pal::Rect* pDeviceGroupRenderArea, - const VkRenderingInfoKHR* pRenderingInfo); + const VkRenderingInfo* pRenderingInfo); void GetImageLayout( VkImageView imageView, @@ -1550,7 +1550,7 @@ class CmdBuffer Pal::ImageLayout* palImageLayout); void StoreAttachmentInfo( - const VkRenderingAttachmentInfoKHR& renderingAttachmentInfo, + const VkRenderingAttachmentInfo& renderingAttachmentInfo, DynamicRenderingAttachments* pDynamicRenderingAttachement); Pal::ImageLayout RPGetAttachmentLayout( @@ -2523,11 +2523,11 @@ VKAPI_ATTR void VKAPI_CALL vkCmdWriteBufferMarker2AMD( uint32_t marker); VKAPI_ATTR void VKAPI_CALL vkCmdBeginRendering( - VkCommandBuffer commandBuffer, - const VkRenderingInfoKHR* pRenderingInfo); + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo); VKAPI_ATTR void VKAPI_CALL vkCmdEndRendering( - VkCommandBuffer commandBuffer); + VkCommandBuffer commandBuffer); VKAPI_ATTR void VKAPI_CALL vkCmdSetCullMode( VkCommandBuffer commandBuffer, diff --git a/icd/api/include/vk_conv.h b/icd/api/include/vk_conv.h index a9651116..df1d2537 100755 --- a/icd/api/include/vk_conv.h +++ b/icd/api/include/vk_conv.h @@ -1817,7 +1817,7 @@ inline Pal::SwizzledFormat VkToPalFormat(VkFormat format, const RuntimeSettings& #if VKI_GPU_DECOMPRESS if (settings.enableShaderDecode) { - format = convertCompressedFormat(format, settings.enableBC3Encoder); + format = convertCompressedFormat(format, settings.enableBc3Encoder); } #endif return convert::VkToPalSwizzledFormatLookupTableStorage[format]; @@ -2484,7 +2484,7 @@ inline uint32_t VkToPalPipelineStageFlags( if (stageMask & VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT_KHR) { palPipelineStageMask |= Pal::PipelineStageFetchIndices | - Pal::PipelineStageVs; + Pal::PipelineStagePostPrefetch; } if (stageMask & VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT) @@ -2492,8 +2492,12 @@ inline uint32_t VkToPalPipelineStageFlags( palPipelineStageMask |= Pal::PipelineStageStreamOut; } - if (stageMask & (VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT_KHR | - VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT_KHR)) + if (stageMask & VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT_KHR) + { + palPipelineStageMask |= Pal::PipelineStagePostPrefetch; + } + + if (stageMask & VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT_KHR) { palPipelineStageMask |= Pal::PipelineStageVs; } @@ -2514,8 +2518,7 @@ inline uint32_t VkToPalPipelineStageFlags( palPipelineStageMask |= Pal::PipelineStageGs; } - if (stageMask & (VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT_KHR | - VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)) + if (stageMask & VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT_KHR) { palPipelineStageMask |= Pal::PipelineStageVs | Pal::PipelineStageHs | @@ -2523,6 +2526,11 @@ inline uint32_t VkToPalPipelineStageFlags( Pal::PipelineStageGs; } + if (stageMask & VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) + { + palPipelineStageMask |= Pal::PipelineStageSampleRate; + } + if (stageMask & VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR) { palPipelineStageMask |= Pal::PipelineStagePs; @@ -2547,12 +2555,14 @@ inline uint32_t VkToPalPipelineStageFlags( { palPipelineStageMask |= Pal::PipelineStageTopOfPipe | Pal::PipelineStageFetchIndirectArgs | + Pal::PipelineStagePostPrefetch | Pal::PipelineStageFetchIndices | Pal::PipelineStageVs | Pal::PipelineStageHs | Pal::PipelineStageDs | Pal::PipelineStageGs | Pal::PipelineStagePs | + Pal::PipelineStageSampleRate | Pal::PipelineStageEarlyDsTarget | Pal::PipelineStageLateDsTarget | Pal::PipelineStageColorTarget; @@ -2727,7 +2737,9 @@ inline VkCompositeAlphaFlagsKHR PalToVkSupportedCompositeAlphaMode(uint32 compos // for the image creation flags so we have to return the constructed flag set as a uint32_t) inline uint32_t VkToPalImageCreateFlags(VkImageCreateFlags imageCreateFlags, VkFormat format, - VkImageUsageFlags imageUsage) + VkImageUsageFlags imageUsage, + VkImageType type, + uint32_t mipLevels) { Pal::ImageCreateFlags flags = {}; @@ -2735,8 +2747,20 @@ inline uint32_t VkToPalImageCreateFlags(VkImageCreateFlags imageCreateFlags, flags.prt = (imageCreateFlags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) ? 1 : 0; flags.invariant = (imageCreateFlags & VK_IMAGE_CREATE_ALIAS_BIT) ? 1 : 0; flags.tmzProtected = (imageCreateFlags & VK_IMAGE_CREATE_PROTECTED_BIT) ? 1 : 0; - flags.view3dAs2dArray = (imageCreateFlags & - (VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT | VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT)) ? 1 : 0; + + { + const bool is2dCompatible = imageCreateFlags & + (VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT | VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT); + const bool isBlockTexelViewCompatible = imageCreateFlags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT; + const bool is3dImage = type == VK_IMAGE_TYPE_3D; + // Originally, VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT was not permitted by 3D images, but the + // restriction was later removed from the spec. Our solution to HW degrading mip levels differently for + // uncompressed texels vs compressed blocks only works for 2D block sizes, and view3dAs2dArray will make sure + // to use them even for 3D images. + const bool hasMipMaps = mipLevels > 1; + + flags.view3dAs2dArray = (is2dCompatible || (isBlockTexelViewCompatible && is3dImage && hasMipMaps)) ? 1 : 0; + } // Always provide pQuadSamplePattern to PalCmdResolveImage for depth formats to allow optimizations flags.sampleLocsAlwaysKnown = Formats::HasDepth(format) ? 1 : 0; diff --git a/icd/api/include/vk_device.h b/icd/api/include/vk_device.h index 55965230..8e6d2924 100644 --- a/icd/api/include/vk_device.h +++ b/icd/api/include/vk_device.h @@ -114,6 +114,7 @@ class RayTracingDevice; // VkImportSemaphoreWin32HandleInfoKHR. Please refer to the vkspec for the defination of members. struct ImportSemaphoreInfo { + const void* pNext; VkExternalSemaphoreHandleTypeFlagBits handleType; Pal::OsExternalHandle handle; VkSemaphoreImportFlags importFlags; diff --git a/icd/api/include/vk_extensions.h b/icd/api/include/vk_extensions.h index 873e1190..2f8f91ae 100644 --- a/icd/api/include/vk_extensions.h +++ b/icd/api/include/vk_extensions.h @@ -456,6 +456,7 @@ class DeviceExtensions final : public Extensions GOOGLE_HLSL_FUNCTIONALITY1, GOOGLE_USER_TYPE, + NV_COMPUTE_SHADER_DERIVATIVES, VALVE_MUTABLE_DESCRIPTOR_TYPE, Count }; diff --git a/icd/api/include/vk_framebuffer.h b/icd/api/include/vk_framebuffer.h index eb24cc4a..4959e899 100644 --- a/icd/api/include/vk_framebuffer.h +++ b/icd/api/include/vk_framebuffer.h @@ -90,7 +90,7 @@ class Framebuffer final : public NonDispatchable void SetImageViews(const VkRenderPassAttachmentBeginInfo* pRenderPassAttachmentBeginInfo); - void SetImageViews(const VkRenderingInfoKHR* pRenderingInfo); + void SetImageViews(const VkRenderingInfo* pRenderingInfo); const Pal::GlobalScissorParams& GetGlobalScissorParams() const { diff --git a/icd/api/include/vk_semaphore.h b/icd/api/include/vk_semaphore.h index 9d8316ea..8070c58e 100644 --- a/icd/api/include/vk_semaphore.h +++ b/icd/api/include/vk_semaphore.h @@ -102,6 +102,7 @@ class Semaphore final : public NonDispatchable VkResult GetShareHandle( Device* device, + const void* pNext, VkExternalSemaphoreHandleTypeFlagBits handleType, Pal::OsExternalHandle* pHandle); diff --git a/icd/api/pipeline_compiler.cpp b/icd/api/pipeline_compiler.cpp index 81263dd1..08a60367 100644 --- a/icd/api/pipeline_compiler.cpp +++ b/icd/api/pipeline_compiler.cpp @@ -293,7 +293,7 @@ VkResult PipelineCompiler::Initialize() 0, 0, nullptr, - settings.enableOnDiskInternalPipelineCaches); + settings.enableInternalPipelineCachingToDisk); // This isn't a terminal failure, the device can continue without the pipeline cache if need be. VK_ALERT(m_pBinaryCache == nullptr); @@ -641,12 +641,10 @@ VkResult PipelineCompiler::BuildShaderModule( StoreShaderModuleToCache(flags, internalShaderFlags, compilerMask, uniqueHash, pBinaryCache, pShaderModule); } - else + else if ((pSettings->enablePipelineDump) + ) { - if (pSettings->enablePipelineDump) - { - Vkgc::IPipelineDumper::DumpSpirvBinary(pSettings->pipelineDumpDir, &finalData); - } + Vkgc::IPipelineDumper::DumpSpirvBinary(pSettings->pipelineDumpDir, &finalData); } if (findReplaceShader) @@ -2343,7 +2341,7 @@ static void BuildPipelineShadersInfo( static void BuildColorBlendState( const Device* pDevice, const VkPipelineColorBlendStateCreateInfo* pCb, - const VkPipelineRenderingCreateInfoKHR* pRendering, + const VkPipelineRenderingCreateInfo* pRendering, uint64_t dynamicStateFlags, const RenderPass* pRenderPass, const uint32_t subpass, @@ -2495,7 +2493,7 @@ static void BuildVertexInputInterfaceState( GraphicsPipelineBinaryCreateInfo* pCreateInfo) { pCreateInfo->pipelineInfo.iaState.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - if (pIn->pInputAssemblyState) + if ((pIn->pInputAssemblyState) && (Util::TestAnyFlagSet(activeStages, VK_SHADER_STAGE_MESH_BIT_EXT) == false)) { pCreateInfo->pipelineInfo.iaState.topology = pIn->pInputAssemblyState->topology; pCreateInfo->pipelineInfo.iaState.disableVertexReuse = false; @@ -2629,7 +2627,7 @@ static void BuildFragmentOutputInterfaceState( EXTRACT_VK_STRUCTURES_0( dynamicRendering, PipelineRenderingCreateInfoKHR, - reinterpret_cast(pIn->pNext), + reinterpret_cast(pIn->pNext), PIPELINE_RENDERING_CREATE_INFO_KHR) BuildMultisampleStateInFoi(pIn->pMultisampleState, dynamicStateFlags, pCreateInfo); @@ -3489,6 +3487,9 @@ VkResult PipelineCompiler::ConvertRayTracingPipelineInfo( static_assert(RaytracingContinuations == static_cast(Vkgc::LlpcRaytracingMode::Continuations)); pCreateInfo->pipelineInfo.mode = static_cast(settings.llpcRaytracingMode); + static_assert(CpsFlagStackInGlobalMem == Vkgc::CpsFlagStackInGlobalMem); + pCreateInfo->pipelineInfo.cpsFlags = settings.cpsFlags; + pCreateInfo->pipelineInfo.isReplay = isReplay; // pLibraryInterface must be populated (per spec) if the pipeline is a library or has libraries @@ -4861,33 +4862,30 @@ uint32_t PipelineCompiler::BuildUberFetchShaderInternalDataImp( uint32_t maxLocation = 0; void* pAttribInternalBase = pUberFetchShaderInternalData; - if (settings.enableRobustUberFetchShader) + uint64_t locationMask = 0; + for (uint32_t i = 0; i < vertexAttributeDescriptionCount; i++) { - uint64_t locationMask = 0; - for (uint32_t i = 0; i < vertexAttributeDescriptionCount; i++) + Vkgc::UberFetchShaderAttribInfo attribInfo = {}; + auto pAttrib = &pVertexAttributeDescriptions[i]; + bool hasDualLocation = Formats::IsDvec3Or4(pAttrib->format); + if (pAttrib->location >= maxLocation) { - Vkgc::UberFetchShaderAttribInfo attribInfo = {}; - auto pAttrib = &pVertexAttributeDescriptions[i]; - bool hasDualLocation = Formats::IsDvec3Or4(pAttrib->format); - if (pAttrib->location >= maxLocation) - { - maxLocation = hasDualLocation ? (pAttrib->location + 1) : pAttrib->location; - } - locationMask |= (1ull << pAttrib->location); - if (hasDualLocation) - { - locationMask |= (1ull << (pAttrib->location + 1)); - } + maxLocation = hasDualLocation ? (pAttrib->location + 1) : pAttrib->location; } - - pAttribInternalBase = Util::VoidPtrInc(pUberFetchShaderInternalData, sizeof(uint64_t)); - if (vertexAttributeDescriptionCount > 0) + locationMask |= (1ull << pAttrib->location); + if (hasDualLocation) { - memcpy(pUberFetchShaderInternalData, &locationMask, sizeof(uint64_t)); - memset(pAttribInternalBase, 0, (maxLocation + 1) * sizeof(Vkgc::UberFetchShaderAttribInfo)); + locationMask |= (1ull << (pAttrib->location + 1)); } } + pAttribInternalBase = Util::VoidPtrInc(pUberFetchShaderInternalData, sizeof(uint64_t)); + if (vertexAttributeDescriptionCount > 0) + { + memcpy(pUberFetchShaderInternalData, &locationMask, sizeof(uint64_t)); + memset(pAttribInternalBase, 0, (maxLocation + 1) * sizeof(Vkgc::UberFetchShaderAttribInfo)); + } + for (uint32_t i = 0; i < vertexAttributeDescriptionCount; i++) { Vkgc::UberFetchShaderAttribInfo attribInfo = {}; diff --git a/icd/api/raytrace/ray_tracing_device.cpp b/icd/api/raytrace/ray_tracing_device.cpp index d7bba065..f951bc99 100644 --- a/icd/api/raytrace/ray_tracing_device.cpp +++ b/icd/api/raytrace/ray_tracing_device.cpp @@ -158,7 +158,7 @@ void RayTracingDevice::CreateGpuRtDeviceSettings( *pDeviceSettings = {}; const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); - pDeviceSettings->bvhCollapse = settings.rtEnableBVHCollapse; + pDeviceSettings->bvhCollapse = settings.rtEnableBvhCollapse; pDeviceSettings->topDownBuild = settings.rtEnableTopDownBuild; pDeviceSettings->rebraidType = ConvertGpuRtRebraidType(settings.rtEnableTreeRebraid); @@ -181,7 +181,7 @@ void RayTracingDevice::CreateGpuRtDeviceSettings( pDeviceSettings->fp16BoxModeMixedSaThresh = Util::Clamp(fp16BoxMixedThreshold, 1.0f, 8.0f); pDeviceSettings->enableMortonCode30 = settings.rtEnableMortonCode30; pDeviceSettings->enableVariableBitsMortonCodes = settings.enableVariableBitsMortonCodes; - pDeviceSettings->enablePrefixScanDLB = settings.rtEnablePrefixScanDLB; + pDeviceSettings->enablePrefixScanDLB = settings.rtEnablePrefixScanDlb; switch (settings.rtTriangleCompressionMode) { @@ -204,8 +204,8 @@ void RayTracingDevice::CreateGpuRtDeviceSettings( pDeviceSettings->bvhBuildModeDefault = ConvertGpuRtBvhBuildMode(settings.rtBvhBuildModeDefault); pDeviceSettings->bvhBuildModeFastTrace = ConvertGpuRtBvhBuildMode(settings.rtBvhBuildModeFastTrace); pDeviceSettings->bvhBuildModeFastBuild = ConvertGpuRtBvhBuildMode(settings.rtBvhBuildModeFastBuild); - pDeviceSettings->bvhBuildModeOverrideBLAS = ConvertGpuRtBvhBuildMode(settings.bvhBuildModeOverrideBLAS); - pDeviceSettings->bvhBuildModeOverrideTLAS = ConvertGpuRtBvhBuildMode(settings.bvhBuildModeOverrideTLAS); + pDeviceSettings->bvhBuildModeOverrideBLAS = ConvertGpuRtBvhBuildMode(settings.bvhBuildModeOverrideBlas); + pDeviceSettings->bvhBuildModeOverrideTLAS = ConvertGpuRtBvhBuildMode(settings.bvhBuildModeOverrideTlas); pDeviceSettings->enableParallelUpdate = settings.rtEnableUpdateParallel; pDeviceSettings->enableParallelBuild = settings.rtEnableBuildParallel; pDeviceSettings->parallelBuildWavesPerSimd = settings.buildParallelWavesPerSimd; @@ -226,10 +226,10 @@ void RayTracingDevice::CreateGpuRtDeviceSettings( pDeviceSettings->enableMergeSort = settings.enableMergeSort; pDeviceSettings->fastBuildThreshold = settings.fastBuildThreshold; pDeviceSettings->lbvhBuildThreshold = settings.lbvhBuildThreshold; - pDeviceSettings->enableBVHBuildDebugCounters = settings.enableBVHBuildDebugCounters; - pDeviceSettings->enableInsertBarriersInBuildAS = settings.enableInsertBarriersInBuildAS; + pDeviceSettings->enableBVHBuildDebugCounters = settings.enableBvhBuildDebugCounters; + pDeviceSettings->enableInsertBarriersInBuildAS = settings.enableInsertBarriersInBuildAs; pDeviceSettings->numMortonSizeBits = settings.numMortonSizeBits; - pDeviceSettings->allowFp16BoxNodesInUpdatableBvh = settings.rtAllowFp16BoxNodesInUpdatableBVH; + pDeviceSettings->allowFp16BoxNodesInUpdatableBvh = settings.rtAllowFp16BoxNodesInUpdatableBvh; pDeviceSettings->enableBuildAccelStructScratchDumping = pDeviceSettings->enableBuildAccelStructDumping && settings.rtEnableAccelerationStructureScratchMemoryDump; diff --git a/icd/api/renderpass/renderpass_builder.cpp b/icd/api/renderpass/renderpass_builder.cpp index 04147a9e..2a644035 100644 --- a/icd/api/renderpass/renderpass_builder.cpp +++ b/icd/api/renderpass/renderpass_builder.cpp @@ -324,15 +324,15 @@ VkResult RenderPassBuilder::Build( result = BuildSubpass(subpass); } - if (m_pInfo->doClearsUpfront && - (m_pSubpasses[0].syncTop.barrier.flags.preColorClearSync || - m_pSubpasses[0].syncTop.barrier.flags.preDsClearSync)) - { - PostProcessSyncPoint(&m_pSubpasses[0].syncTop); - } - if (result == Pal::Result::Success) { + if (m_pInfo->doClearsUpfront && + (m_pSubpasses[0].syncTop.barrier.flags.preColorClearSync || + m_pSubpasses[0].syncTop.barrier.flags.preDsClearSync)) + { + PostProcessSyncPoint(&m_pSubpasses[0].syncTop); + } + result = BuildEndState(); } diff --git a/icd/api/strings/extensions.txt b/icd/api/strings/extensions.txt index 224aca61..d30c74d7 100644 --- a/icd/api/strings/extensions.txt +++ b/icd/api/strings/extensions.txt @@ -167,6 +167,7 @@ VK_EXT_dynamic_rendering_unused_attachments VK_KHR_format_feature_flags2 VK_EXT_extended_dynamic_state2 VK_EXT_descriptor_buffer +VK_NV_compute_shader_derivatives VK_EXT_graphics_pipeline_library VK_KHR_copy_commands2 VK_EXT_ycbcr_image_arrays diff --git a/icd/api/vk_buffer.cpp b/icd/api/vk_buffer.cpp index 78edd608..586f2efa 100644 --- a/icd/api/vk_buffer.cpp +++ b/icd/api/vk_buffer.cpp @@ -150,7 +150,7 @@ VkResult Buffer::Create( VK_ASSERT(palResult == Pal::Result::Success); } -#if defined(__unix__) +#if PAL_AMDGPU_BUILD gpuMemoryCreateInfo.flags.initializeToZero = pDevice->GetRuntimeSettings().initializeVramToZero; #endif diff --git a/icd/api/vk_cmdbuffer.cpp b/icd/api/vk_cmdbuffer.cpp index afcb3b55..bdaf4951 100644 --- a/icd/api/vk_cmdbuffer.cpp +++ b/icd/api/vk_cmdbuffer.cpp @@ -1181,7 +1181,6 @@ void CmdBuffer::PalCmdDrawMeshTasksIndirect( // The indirect argument should be in the range of the given buffer size VK_ASSERT((stride + offset) <= pBuffer->PalMemory(DefaultDeviceIndex)->Desc().size); - const Pal::gpusize paramOffset = pBuffer->MemOffset() + offset; Pal::gpusize countVirtAddr = 0; utils::IterateMask deviceGroup(m_curDeviceMask); @@ -1196,10 +1195,14 @@ void CmdBuffer::PalCmdDrawMeshTasksIndirect( countVirtAddr = pCountBuffer->GpuVirtAddr(deviceIdx) + countOffset; } + Pal::GpuVirtAddrAndStride gpuVirtAddrAndStride = + { + pBuffer->GpuVirtAddr(deviceIdx) + static_cast(offset), + stride + }; + PalCmdBuffer(deviceIdx)->CmdDispatchMeshIndirectMulti( - *pBuffer->PalMemory(deviceIdx), - paramOffset, - stride, + gpuVirtAddrAndStride, count, countVirtAddr); } @@ -1252,9 +1255,7 @@ void CmdBuffer::PalCmdDispatchIndirect( // TODO use device group dispatch offsets here. // Note: check spec to see if offset setting is applications' responsibility. - PalCmdBuffer(deviceIdx)->CmdDispatchIndirect( - *pBuffer->PalMemory(deviceIdx), - pBuffer->MemOffset() + offset); + PalCmdBuffer(deviceIdx)->CmdDispatchIndirect(pBuffer->GpuVirtAddr(deviceIdx) + offset); } while (deviceGroup.IterateNext()); } @@ -1291,7 +1292,7 @@ VkResult CmdBuffer::Begin( RenderPass* pRenderPass = nullptr; Framebuffer* pFramebuffer = nullptr; - const VkCommandBufferInheritanceRenderingInfoKHR* pInheritanceRenderingInfoKHR = nullptr; + const VkCommandBufferInheritanceRenderingInfo* pInheritanceRenderingInfo = nullptr; m_cbBeginDeviceMask = m_pDevice->GetPalDeviceMask(); @@ -1360,21 +1361,21 @@ VkResult CmdBuffer::Begin( inheritedStateParams.stateFlags.predication = pExtInfo->conditionalRenderingEnable; m_flags.hasConditionalRendering = pExtInfo->conditionalRenderingEnable; } - else if (pHeader->sType == VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDERING_INFO_KHR) + else if (pHeader->sType == VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDERING_INFO) { VK_ASSERT(m_flags.is2ndLvl); - pInheritanceRenderingInfoKHR = static_cast(pNext); + pInheritanceRenderingInfo = static_cast(pNext); - inheritedStateParams.colorTargetCount = pInheritanceRenderingInfoKHR->colorAttachmentCount; + inheritedStateParams.colorTargetCount = pInheritanceRenderingInfo->colorAttachmentCount; inheritedStateParams.stateFlags.targetViewState = 1; for (uint32_t i = 0; i < inheritedStateParams.colorTargetCount; i++) { inheritedStateParams.colorTargetSwizzledFormats[i] = - VkToPalFormat(pInheritanceRenderingInfoKHR->pColorAttachmentFormats[i], settings); + VkToPalFormat(pInheritanceRenderingInfo->pColorAttachmentFormats[i], settings); - inheritedStateParams.sampleCount[i] = pInheritanceRenderingInfoKHR->rasterizationSamples; + inheritedStateParams.sampleCount[i] = pInheritanceRenderingInfo->rasterizationSamples; } } @@ -1461,13 +1462,13 @@ VkResult CmdBuffer::Begin( m_renderPassInstance.subpass = currentSubPass; } - if (pInheritanceRenderingInfoKHR != nullptr) + if (pInheritanceRenderingInfo != nullptr) { m_allGpuState.dynamicRenderingInstance.viewMask = - pInheritanceRenderingInfoKHR->viewMask; + pInheritanceRenderingInfo->viewMask; m_allGpuState.dynamicRenderingInstance.colorAttachmentCount = - pInheritanceRenderingInfoKHR->colorAttachmentCount; + pInheritanceRenderingInfo->colorAttachmentCount; for (uint32_t i = 0; i < m_allGpuState.dynamicRenderingInstance.colorAttachmentCount; ++i) { @@ -1475,17 +1476,17 @@ VkResult CmdBuffer::Begin( &m_allGpuState.dynamicRenderingInstance.colorAttachments[i]; pDynamicAttachment->pImageView = nullptr; - pDynamicAttachment->attachmentFormat = pInheritanceRenderingInfoKHR->pColorAttachmentFormats[i]; - pDynamicAttachment->rasterizationSamples = pInheritanceRenderingInfoKHR->rasterizationSamples; + pDynamicAttachment->attachmentFormat = pInheritanceRenderingInfo->pColorAttachmentFormats[i]; + pDynamicAttachment->rasterizationSamples = pInheritanceRenderingInfo->rasterizationSamples; } m_allGpuState.dynamicRenderingInstance.depthAttachment.attachmentFormat = - (pInheritanceRenderingInfoKHR->depthAttachmentFormat != VK_FORMAT_UNDEFINED) ? - pInheritanceRenderingInfoKHR->depthAttachmentFormat : - pInheritanceRenderingInfoKHR->stencilAttachmentFormat; + (pInheritanceRenderingInfo->depthAttachmentFormat != VK_FORMAT_UNDEFINED) ? + pInheritanceRenderingInfo->depthAttachmentFormat : + pInheritanceRenderingInfo->stencilAttachmentFormat; m_allGpuState.dynamicRenderingInstance.depthAttachment.rasterizationSamples = - pInheritanceRenderingInfoKHR->rasterizationSamples; + pInheritanceRenderingInfo->rasterizationSamples; } // if input frame buffer object pointer is NULL, it means @@ -1500,7 +1501,7 @@ VkResult CmdBuffer::Begin( m_flags.isRecording = true; - if ((pRenderPass != nullptr) || (pInheritanceRenderingInfoKHR != nullptr)) + if ((pRenderPass != nullptr) || (pInheritanceRenderingInfo != nullptr)) // secondary VkCommandBuffer will be used inside VkRenderPass { VK_ASSERT(m_flags.is2ndLvl); @@ -3116,7 +3117,6 @@ void CmdBuffer::DrawIndirect( if ((stride + offset) <= pBuffer->PalMemory(DefaultDeviceIndex)->Desc().size) { - const Pal::gpusize paramOffset = pBuffer->MemOffset() + offset; Pal::gpusize countVirtAddr = 0; utils::IterateMask deviceGroup(m_curDeviceMask); @@ -3125,6 +3125,12 @@ void CmdBuffer::DrawIndirect( { const uint32_t deviceIdx = deviceGroup.Index(); + Pal::GpuVirtAddrAndStride gpuVirtAddrAndStride = + { + pBuffer->GpuVirtAddr(deviceIdx) + static_cast(offset), + stride + }; + if (useBufferCount) { Buffer* pCountBuffer = Buffer::ObjectFromHandle(countBuffer); @@ -3134,18 +3140,14 @@ void CmdBuffer::DrawIndirect( if (indexed == false) { PalCmdBuffer(deviceIdx)->CmdDrawIndirectMulti( - *pBuffer->PalMemory(deviceIdx), - paramOffset, - stride, + gpuVirtAddrAndStride, count, countVirtAddr); } else { PalCmdBuffer(deviceIdx)->CmdDrawIndexedIndirectMulti( - *pBuffer->PalMemory(deviceIdx), - paramOffset, - stride, + gpuVirtAddrAndStride, count, countVirtAddr); } @@ -4351,11 +4353,11 @@ LoadOpClearSubresRanges( // Clear Color for VK_KHR_dynamic_rendering void CmdBuffer::LoadOpClearColor( const Pal::Rect* pDeviceGroupRenderArea, - const VkRenderingInfoKHR* pRenderingInfo) + const VkRenderingInfo* pRenderingInfo) { for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; ++i) { - const VkRenderingAttachmentInfoKHR& attachmentInfo = pRenderingInfo->pColorAttachments[i]; + const VkRenderingAttachmentInfo& attachmentInfo = pRenderingInfo->pColorAttachments[i]; if (attachmentInfo.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) { @@ -4424,7 +4426,7 @@ void CmdBuffer::LoadOpClearColor( // Clear Depth Stencil for VK_KHR_dynamic_rendering void CmdBuffer::LoadOpClearDepthStencil( const Pal::Rect* pDeviceGroupRenderArea, - const VkRenderingInfoKHR* pRenderingInfo) + const VkRenderingInfo* pRenderingInfo) { // Note that no allocation will be performed, so Util::Vector allocator is nullptr. Util::Vector clearSubresRanges{ nullptr }; @@ -4438,8 +4440,8 @@ void CmdBuffer::LoadOpClearDepthStencil( float clearDepth = 0.0f; uint8 clearStencil = 0; - const VkRenderingAttachmentInfoKHR* pDepthAttachmentInfo = pRenderingInfo->pDepthAttachment; - const VkRenderingAttachmentInfoKHR* pStencilAttachmentInfo = pRenderingInfo->pStencilAttachment; + const VkRenderingAttachmentInfo* pDepthAttachmentInfo = pRenderingInfo->pDepthAttachment; + const VkRenderingAttachmentInfo* pStencilAttachmentInfo = pRenderingInfo->pStencilAttachment; if ((pStencilAttachmentInfo != nullptr) && (pStencilAttachmentInfo->imageView != VK_NULL_HANDLE)) @@ -4519,7 +4521,7 @@ void CmdBuffer::LoadOpClearDepthStencil( // ===================================================================================================================== // StoreAttachment for VK_KHR_dynamic_rendering void CmdBuffer::StoreAttachmentInfo( - const VkRenderingAttachmentInfoKHR& renderingAttachmentInfo, + const VkRenderingAttachmentInfo& renderingAttachmentInfo, DynamicRenderingAttachments* pDynamicRenderingAttachement) { const ImageView* const pImageView = ImageView::ObjectFromHandle(renderingAttachmentInfo.imageView); @@ -4533,7 +4535,7 @@ void CmdBuffer::StoreAttachmentInfo( 0, this); - pDynamicRenderingAttachement->attachmentFormat = pColorImage->GetFormat(); + pDynamicRenderingAttachement->attachmentFormat = pImageView->GetViewFormat(); pDynamicRenderingAttachement->resolveMode = renderingAttachmentInfo.resolveMode; pDynamicRenderingAttachement->pImageView = pImageView; pDynamicRenderingAttachement->imageLayout = colorImageLayout; @@ -4557,14 +4559,14 @@ void CmdBuffer::StoreAttachmentInfo( // ===================================================================================================================== // vkCmdBeginRendering for VK_KHR_dynamic_rendering void CmdBuffer::BeginRendering( - const VkRenderingInfoKHR* pRenderingInfo) + const VkRenderingInfo* pRenderingInfo) { VK_ASSERT(pRenderingInfo != nullptr); DbgBarrierPreCmd(DbgBarrierBeginRendering); - bool isResuming = (pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT_KHR); - bool isSuspended = (pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT_KHR); + bool isResuming = (pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT); + bool isSuspended = (pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT); bool skipEverything = isResuming && m_flags.isRenderingSuspended; bool skipClears = isResuming && (m_flags.isRenderingSuspended == false); @@ -4670,7 +4672,7 @@ void CmdBuffer::BeginRendering( for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; ++i) { - const VkRenderingAttachmentInfoKHR& colorAttachmentInfo = pRenderingInfo->pColorAttachments[i]; + const VkRenderingAttachmentInfo& colorAttachmentInfo = pRenderingInfo->pColorAttachments[i]; m_allGpuState.dynamicRenderingInstance.enableResolveTarget |= (colorAttachmentInfo.resolveImageView != VK_NULL_HANDLE); @@ -4682,7 +4684,7 @@ void CmdBuffer::BeginRendering( if (pRenderingInfo->pDepthAttachment != nullptr) { - const VkRenderingAttachmentInfoKHR& depthAttachmentInfo = *pRenderingInfo->pDepthAttachment; + const VkRenderingAttachmentInfo& depthAttachmentInfo = *pRenderingInfo->pDepthAttachment; m_allGpuState.dynamicRenderingInstance.enableResolveTarget |= (depthAttachmentInfo.resolveImageView != VK_NULL_HANDLE); @@ -4694,7 +4696,7 @@ void CmdBuffer::BeginRendering( if (pRenderingInfo->pStencilAttachment != nullptr) { - const VkRenderingAttachmentInfoKHR& stencilAttachmentInfo = *pRenderingInfo->pStencilAttachment; + const VkRenderingAttachmentInfo& stencilAttachmentInfo = *pRenderingInfo->pStencilAttachment; m_allGpuState.dynamicRenderingInstance.enableResolveTarget |= (stencilAttachmentInfo.resolveImageView != VK_NULL_HANDLE); @@ -8504,7 +8506,7 @@ void CmdBuffer::GetImageLayout( // ===================================================================================================================== // Binds color/depth targets for VK_KHR_dynamic_rendering void CmdBuffer::BindTargets( - const VkRenderingInfoKHR* pRenderingInfo, + const VkRenderingInfo* pRenderingInfo, const VkRenderingFragmentShadingRateAttachmentInfoKHR* pRenderingFragmentShadingRateAttachmentInfoKHR) { Pal::BindTargetParams params = {}; @@ -8520,7 +8522,7 @@ void CmdBuffer::BindTargets( for (uint32_t i = 0; i < params.colorTargetCount; ++i) { - const VkRenderingAttachmentInfoKHR& renderingAttachmentInfo = pRenderingInfo->pColorAttachments[i]; + const VkRenderingAttachmentInfo& renderingAttachmentInfo = pRenderingInfo->pColorAttachments[i]; if (renderingAttachmentInfo.imageView != VK_NULL_HANDLE) { @@ -8553,7 +8555,7 @@ void CmdBuffer::BindTargets( } } - const VkRenderingAttachmentInfoKHR* pStencilAttachmentInfo = pRenderingInfo->pStencilAttachment; + const VkRenderingAttachmentInfo* pStencilAttachmentInfo = pRenderingInfo->pStencilAttachment; if ((pStencilAttachmentInfo != nullptr) && (pStencilAttachmentInfo->imageView != VK_NULL_HANDLE)) @@ -8575,7 +8577,7 @@ void CmdBuffer::BindTargets( params.depthTarget.stencilLayout = stencilLayout; } - const VkRenderingAttachmentInfoKHR* pDepthAttachmentInfo = pRenderingInfo->pDepthAttachment; + const VkRenderingAttachmentInfo* pDepthAttachmentInfo = pRenderingInfo->pDepthAttachment; if ((pDepthAttachmentInfo != nullptr) && (pDepthAttachmentInfo->imageView != VK_NULL_HANDLE)) @@ -10717,8 +10719,8 @@ void CmdBuffer::TraceRaysIndirectPerDevice( initUserData.constantsVa = initConstantsVa; initUserData.inputBufferVa = indirectDeviceAddress; - initUserData.outputBufferVa = pScratchMemory->GpuVirtAddr(deviceIdx); - initUserData.outputConstantsVa = constGpuAddr + offsetof(GpuRt::DispatchRaysConstants, constData);; + initUserData.outputBufferVa = static_cast(pScratchMemory->GpuVirtAddr(deviceIdx)); + initUserData.outputConstantsVa = constGpuAddr + offsetof(GpuRt::DispatchRaysConstants, constData); initUserData.outputCounterMetaVa = 0uLL; m_pDevice->RayTrace()->TraceIndirectDispatch(deviceIdx, @@ -10770,7 +10772,7 @@ void CmdBuffer::TraceRaysIndirectPerDevice( 1, &constGpuAddrLow); - PalCmdBuffer(deviceIdx)->CmdDispatchIndirect(*pScratchMemory->PalMemory(deviceIdx), pScratchMemory->Offset()); + PalCmdBuffer(deviceIdx)->CmdDispatchIndirect(pScratchMemory->GpuVirtAddr(deviceIdx)); DbgBarrierPostCmd(DbgTraceRays); } @@ -11032,7 +11034,7 @@ void CmdBuffer::InsertDebugMarker( } // ===================================================================================================================== -const uint32_t CmdBuffer::GetPipelineScratchSize( +uint32_t CmdBuffer::GetPipelineScratchSize( uint32_t deviceIdx) const { uint32_t scratchSize = 0; diff --git a/icd/api/vk_cmdbuffer_transfer.cpp b/icd/api/vk_cmdbuffer_transfer.cpp index 3caecc21..9b5b2d60 100644 --- a/icd/api/vk_cmdbuffer_transfer.cpp +++ b/icd/api/vk_cmdbuffer_transfer.cpp @@ -118,8 +118,8 @@ void CmdBuffer::PalCmdCopyImage( uint32_t regionCount, Pal::ImageCopyRegion* pRegions) { - if ((pSrcImage->GetImageSamples() == pDstImage->GetImageSamples()) && - (pSrcImage->GetImageSamples() > 1) && + if ((((pSrcImage->GetImageSamples() == pDstImage->GetImageSamples()) && (pSrcImage->GetImageSamples() > 1)) || + (pSrcImage->GetImageType() != pDstImage->GetImageType())) && (m_palQueueType == Pal::QueueType::QueueTypeDma)) { SwitchToBackupCmdBuffer(); diff --git a/icd/api/vk_device.cpp b/icd/api/vk_device.cpp index 1365a814..89abed31 100644 --- a/icd/api/vk_device.cpp +++ b/icd/api/vk_device.cpp @@ -1408,6 +1408,13 @@ VkResult Device::Initialize( result = AllocBorderColorPalette(); } + if (IsExtensionEnabled(DeviceExtensions::KHR_COOPERATIVE_MATRIX)) + { + VkResult powerRes = PalToVkResult(PalDevice(DefaultDeviceIndex)->SetMlPowerOptimization(true)); + + VK_ALERT(powerRes != VK_SUCCESS); + } + return result; } @@ -1648,6 +1655,13 @@ uint32_t Device::GetDefaultSamplePatternIndex( // Destroy Vulkan device. Destroy underlying PAL device, call destructor and free memory. VkResult Device::Destroy(const VkAllocationCallbacks* pAllocator) { + if (IsExtensionEnabled(DeviceExtensions::KHR_COOPERATIVE_MATRIX)) + { + VkResult powerRes = PalToVkResult(PalDevice(DefaultDeviceIndex)->SetMlPowerOptimization(false)); + + VK_ALERT(powerRes != VK_SUCCESS); + } + #if ICD_GPUOPEN_DEVMODE_BUILD if (VkInstance()->GetDevModeMgr() != nullptr) { @@ -1866,7 +1880,8 @@ VkResult Device::CreateInternalComputePipeline( if (forceWave64) { - pShaderInfo->options.waveSize = 64; + pShaderInfo->options.waveSize = 64; + pShaderInfo->options.subgroupSize = 64; } Pal::ShaderHash codeHash = ShaderModule::GetCodeHash( @@ -3743,7 +3758,7 @@ void Device::GetDeviceAccelerationStructureCompatibility( const uint8_t* pData, VkAccelerationStructureCompatibilityKHR* pCompatibility) { - if (m_settings.disableASCompatibilityCheck) + if (m_settings.disableAsCompatibilityCheck) { *pCompatibility = VkAccelerationStructureCompatibilityKHR::VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR; } @@ -4685,6 +4700,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkImportSemaphoreFdKHR( const VkImportSemaphoreFdInfoKHR* pImportSemaphoreFdInfo) { ImportSemaphoreInfo importInfo = {}; + importInfo.pNext = pImportSemaphoreFdInfo->pNext; importInfo.handleType = pImportSemaphoreFdInfo->handleType; importInfo.handle = pImportSemaphoreFdInfo->fd; importInfo.importFlags = pImportSemaphoreFdInfo->flags; diff --git a/icd/api/vk_fence.cpp b/icd/api/vk_fence.cpp index 78a49d07..ce29f567 100644 --- a/icd/api/vk_fence.cpp +++ b/icd/api/vk_fence.cpp @@ -66,6 +66,9 @@ VkResult Fence::Create( { case VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO: { + // Mark this fence as shareable. + palFenceCreateInfo.flags.shareable = 1; + break; } default: @@ -246,7 +249,8 @@ VkResult Fence::ImportFenceFd( (static_cast(pImportFenceFdInfo->fd) == InvalidFd)) { Pal::FenceCreateInfo palFenceCreateInfo = {}; - palFenceCreateInfo.flags.signaled = 1; + palFenceCreateInfo.flags.signaled = 1; + palFenceCreateInfo.flags.shareable = 1; result = PalToVkResult(pDevice->PalDevice(DefaultDeviceIndex)->CreateFence( palFenceCreateInfo, diff --git a/icd/api/vk_framebuffer.cpp b/icd/api/vk_framebuffer.cpp index 5b2df6a5..1530a2fb 100644 --- a/icd/api/vk_framebuffer.cpp +++ b/icd/api/vk_framebuffer.cpp @@ -212,7 +212,7 @@ void Framebuffer::SetImageViews( // ===================================================================================================================== // Set ImageViews for a Framebuffer attachment void Framebuffer::SetImageViews( - const VkRenderingInfoKHR* pRenderingInfo) + const VkRenderingInfo* pRenderingInfo) { Attachment* pAttachments = static_cast(Util::VoidPtrInc(this, GetAttachmentsOffset())); diff --git a/icd/api/vk_graphics_pipeline_library.cpp b/icd/api/vk_graphics_pipeline_library.cpp index 4ba52659..b3bef155 100644 --- a/icd/api/vk_graphics_pipeline_library.cpp +++ b/icd/api/vk_graphics_pipeline_library.cpp @@ -630,6 +630,7 @@ VkResult GraphicsPipelineLibrary::Destroy( { PipelineCompiler* pCompiler = pDevice->GetCompiler(DefaultDeviceIndex); + uint32_t libraryMask = 0; for (uint32_t i = 0; i < ShaderStage::ShaderStageGfxCount; ++i) { if (m_tempModuleStates[i].stage != ShaderStage::ShaderStageInvalid) @@ -642,12 +643,14 @@ VkResult GraphicsPipelineLibrary::Destroy( { pCompiler->FreeShaderModule(m_tempModules + i); } + libraryMask |= (1 << GetGraphicsLibraryType(m_tempModuleStates[i].stage)); } } - for (Pal::IShaderLibrary* pShaderLib : m_pBinaryCreateInfo->pShaderLibraries) + for (uint32_t i = 0; i < ArrayLen(m_pBinaryCreateInfo->pShaderLibraries); ++i) { - if (pShaderLib != nullptr) + Pal::IShaderLibrary* pShaderLib = m_pBinaryCreateInfo->pShaderLibraries[i]; + if (Util::TestAnyFlagSet(libraryMask, 1 << i) && (pShaderLib != nullptr)) { pShaderLib->Destroy(); pAllocator->pfnFree(pAllocator->pUserData, pShaderLib); diff --git a/icd/api/vk_image.cpp b/icd/api/vk_image.cpp index 4e252ebb..7179bb65 100644 --- a/icd/api/vk_image.cpp +++ b/icd/api/vk_image.cpp @@ -271,7 +271,12 @@ void Image::ConvertImageCreateInfo( pPalCreateInfo->tilingPreference = settings.imageTilingPreference; } - pPalCreateInfo->flags.u32All = VkToPalImageCreateFlags(pCreateInfo->flags, createInfoFormat, imageUsage); + pPalCreateInfo->flags.u32All = VkToPalImageCreateFlags( + pCreateInfo->flags, + createInfoFormat, + imageUsage, + pCreateInfo->imageType, + pCreateInfo->mipLevels); pPalCreateInfo->usageFlags = VkToPalImageUsageFlags( imageUsage, pCreateInfo->samples, @@ -296,15 +301,6 @@ void Image::ConvertImageCreateInfo( pPalCreateInfo->flags.optimalShareable = 1; } - if (((pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) == 0) && - ((pCreateInfo->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) != 0) && - (pCreateInfo->mipLevels > 1) && - Pal::Formats::IsBlockCompressed(pPalCreateInfo->swizzledFormat.format) && - (pCreateInfo->imageType == VK_IMAGE_TYPE_3D)) - { - pPalCreateInfo->flags.view3dAs2dArray = 1; - } - ExternalMemoryFlags externalFlags; externalFlags.u32All = 0; @@ -608,7 +604,7 @@ static VkResult InitSparseVirtualMemory( sparseMemCreateInfo.heapCount = 0; sparseMemCreateInfo.heapAccess = Pal::GpuHeapAccess::GpuHeapAccessExplicit; -#if defined(__unix__) +#if PAL_AMDGPU_BUILD sparseMemCreateInfo.flags.initializeToZero = pDevice->GetRuntimeSettings().initializeVramToZero; #endif diff --git a/icd/api/vk_instance.cpp b/icd/api/vk_instance.cpp index 8c07ffa5..5cd3f0de 100644 --- a/icd/api/vk_instance.cpp +++ b/icd/api/vk_instance.cpp @@ -458,7 +458,7 @@ VkResult Instance::Init( PhysicalDevice* pPhysicalDevice = ApiPhysicalDevice::ObjectFromHandle(devices[DefaultDeviceIndex]); Pal::DeviceProperties info; pPhysicalDevice->PalDevice()->GetProperties(&info); - if (pPhysicalDevice->GetRuntimeSettings().enableSPP && info.gfxipProperties.flags.supportSpp) + if (pPhysicalDevice->GetRuntimeSettings().enableSpp && info.gfxipProperties.flags.supportSpp) { wchar_t executableName[PATH_MAX]; wchar_t executablePath[PATH_MAX]; @@ -658,7 +658,7 @@ void Instance::UpdateSettingsWithAppProfile( ReloadAppProfileSettings(nullptr, this, &profileSettings, - pSettings->appGpuID); + pSettings->appGpuId); pSettings->vulkanTexFilterQuality = static_cast(profileSettings.texFilterQuality); diff --git a/icd/api/vk_memory.cpp b/icd/api/vk_memory.cpp index 97976891..562fe7fd 100644 --- a/icd/api/vk_memory.cpp +++ b/icd/api/vk_memory.cpp @@ -95,7 +95,7 @@ VkResult Memory::Create( VK_ASSERT(pAllocInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); createInfo.size = pAllocInfo->allocationSize; -#if defined(__unix__) +#if PAL_AMDGPU_BUILD createInfo.flags.initializeToZero = settings.initializeVramToZero; #endif @@ -167,6 +167,7 @@ VkResult Memory::Create( pDevice->GetEnabledFeatures().deviceCoherentMemory) { createInfo.flags.gl2Uncached = 1; + createInfo.mallPolicy = Pal::GpuMemMallPolicy::Never; } if ((propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) diff --git a/icd/api/vk_physical_device.cpp b/icd/api/vk_physical_device.cpp index 201f3c20..32ff4a20 100644 --- a/icd/api/vk_physical_device.cpp +++ b/icd/api/vk_physical_device.cpp @@ -840,7 +840,7 @@ VkResult PhysicalDevice::Initialize() Util::Max(heapProperties[Pal::GpuHeapInvisible].physicalSize, heapProperties[Pal::GpuHeapInvisible].logicalSize); - if (settings.forceUMA) + if (settings.forceUma) { heapProperties[Pal::GpuHeapInvisible].physicalSize = 0; heapProperties[Pal::GpuHeapLocal].physicalSize = 0; @@ -1958,6 +1958,7 @@ VkResult PhysicalDevice::GetImageFormatProperties( // increase our exposed limits for compressed formats even though PAL/HW operating in terms of // blocks makes that possible. const uint64_t bytesPerPixel = Pal::Formats::BytesPerPixel(palFormat.format); + const uint64_t bitsPerPixel = Pal::Formats::BitsPerPixel(palFormat.format); // Block-compressed formats are not supported for 1D textures (PAL image creation will fail). if (Pal::Formats::IsBlockCompressed(palFormat.format) && (type == VK_IMAGE_TYPE_1D)) @@ -1971,8 +1972,12 @@ VkResult PhysicalDevice::GetImageFormatProperties( return VK_ERROR_FORMAT_NOT_SUPPORTED; } - // Currently we just disable the support of linear 3d surfaces, since they aren't required by spec. - if (type == VK_IMAGE_TYPE_3D && tiling == VK_IMAGE_TILING_LINEAR) + // 3D images have a different, interleaved memory layout which requires special handling + // to be able to access each mipLevel and slice. Furthermore, PAL code handles 3D images with 96bpp differently + // by scaling the original image to use X32Y32Z32_Uint/X32Y32Z32_Sint/X32Y32Z32_Float image as X32_Uint formatted view, + // which is somehow broken and needs debugging. + if ((type == VK_IMAGE_TYPE_3D) && (tiling == VK_IMAGE_TILING_LINEAR) && + (settings.disable3dLinearImageFormatSupport || (bitsPerPixel == 96))) { return VK_ERROR_FORMAT_NOT_SUPPORTED; } @@ -2007,6 +2012,14 @@ VkResult PhysicalDevice::GetImageFormatProperties( } } + // We are setting `view3dAs2dArray=1` in vk_conv.cpp::VkToPalImageCreateFlags() for uncompressed views + // of compressed 3d images. view3dAs2dArray is not going to work in combination with sparse images. + // So we disable block texel views of sparse 3d images upfront. + if ((flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) && (type == VK_IMAGE_TYPE_3D)) + { + return VK_ERROR_FORMAT_NOT_SUPPORTED; + } + const bool supported = // Currently we only support optimally tiled sparse images (tiling == VK_IMAGE_TILING_OPTIMAL) @@ -4349,6 +4362,18 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_COOPERATIVE_MATRIX)); } + bool exposeNvComputeShaderDerivatives = false; + + if ((pPhysicalDevice == nullptr) || (pPhysicalDevice->GetRuntimeSettings().exportNvComputeShaderDerivatives)) + { + exposeNvComputeShaderDerivatives = true; + } + + if (exposeNvComputeShaderDerivatives) + { + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(NV_COMPUTE_SHADER_DERIVATIVES)); + } + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_MAINTENANCE5)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_PUSH_DESCRIPTOR)); @@ -4383,7 +4408,7 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( if (pPhysicalDevice != nullptr) { const RuntimeSettings& settings = pPhysicalDevice->GetRuntimeSettings(); - disableAMDVendorExtensions = settings.disableAMDVendorExtensions; + disableAMDVendorExtensions = settings.disableAmdVendorExtensions; } // AMD Extensions @@ -4806,9 +4831,13 @@ VkResult PhysicalDevice::GetRandROutputDisplay( VkResult PhysicalDevice::ReleaseDisplay( VkDisplayKHR display) { +#if PAL_AMDGPU_BUILD Pal::IScreen* pScreen = reinterpret_cast(display); return PalToVkResult(pScreen->ReleaseScreenAccess()); +#else + return VK_ERROR_UNKNOWN; +#endif } #endif @@ -4845,7 +4874,7 @@ void PhysicalDevice::GetPhysicalDeviceIDProperties( uint32_t* pDeviceNumber = nullptr; uint32_t* pFunctionNumber = nullptr; - if (GetRuntimeSettings().useOldDeviceUUIDCalculation == false) + if (GetRuntimeSettings().useOldDeviceUuidCalculation == false) { pDomainNumber = reinterpret_cast(pDeviceUUID); pBusNumber = reinterpret_cast(pDeviceUUID + 4); @@ -4863,7 +4892,7 @@ void PhysicalDevice::GetPhysicalDeviceIDProperties( memset(pDeviceUUID, 0, VK_UUID_SIZE); memset(pDriverUUID, 0, VK_UUID_SIZE); - if (GetRuntimeSettings().useOldDeviceUUIDCalculation == false) + if (GetRuntimeSettings().useOldDeviceUuidCalculation == false) { *pDomainNumber = props.pciProperties.domainNumber; } @@ -6650,9 +6679,9 @@ size_t PhysicalDevice::GetFeatures2( break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR: + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES: { - auto* pExtInfo = reinterpret_cast(pHeader); + auto* pExtInfo = reinterpret_cast(pHeader); if (updateFeatures) { @@ -6718,6 +6747,20 @@ size_t PhysicalDevice::GetFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: + { + auto* pExtInfo = reinterpret_cast(pHeader); + + if (updateFeatures) + { + pExtInfo->computeDerivativeGroupQuads = VK_TRUE; + pExtInfo->computeDerivativeGroupLinear = VK_TRUE; + } + + structSize = sizeof(*pExtInfo); + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_FEATURES_EXT: { auto* pExtInfo = reinterpret_cast(pHeader); @@ -8345,8 +8388,8 @@ void PhysicalDevice::GetExternalSemaphoreProperties( VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; } else if ((pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) && - isTimeline == false && - (props.osProperties.supportSyncFileSemaphore)) + props.osProperties.supportSyncFileSemaphore && + (isTimeline == false)) { pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; diff --git a/icd/api/vk_physical_device_manager.cpp b/icd/api/vk_physical_device_manager.cpp index 0aed04e0..f812d401 100644 --- a/icd/api/vk_physical_device_manager.cpp +++ b/icd/api/vk_physical_device_manager.cpp @@ -273,7 +273,8 @@ VkResult PhysicalDeviceManager::UpdateLockedPhysicalDeviceList(void) if (pLoader != nullptr) { - settingsArray[i] = VK_PLACEMENT_NEW(pLoader) VulkanSettingsLoader(pPalDeviceList[i], m_pInstance->PalPlatform(), i); + settingsArray[i] = VK_PLACEMENT_NEW(pLoader) VulkanSettingsLoader(pPalDeviceList[i], + m_pInstance->PalPlatform()); } else { diff --git a/icd/api/vk_queue.cpp b/icd/api/vk_queue.cpp index c7614fd7..69ceed44 100644 --- a/icd/api/vk_queue.cpp +++ b/icd/api/vk_queue.cpp @@ -430,7 +430,7 @@ void Queue::ConstructQueueCreateInfo( pQueueCreateInfo->queueType = palQueueType; pQueueCreateInfo->priority = VkToPalGlobalPriority(queuePriority, palProperties.engineProperties[pQueueCreateInfo->engineType].capabilities[pQueueCreateInfo->engineIndex]); -#if defined(__unix__) +#if PAL_AMDGPU_BUILD pQueueCreateInfo->enableGpuMemoryPriorities = 1; #endif } @@ -478,6 +478,7 @@ Pal::Result Queue::CreatePalQueue( if ((palResult == Pal::Result::Unsupported) || (palResult == Pal::Result::ErrorInvalidValue) || + (palResult == Pal::Result::ErrorUnknown) || (palResult == Pal::Result::ErrorUnavailable)) { palResult = Pal::Result::Success; @@ -2593,6 +2594,14 @@ bool Queue::BuildPostProcessCommands( frameInfo.debugOverlay.presentMode = Pal::PresentMode::Unknown; } + frameInfo.srcImageLayout = + { + .usages = Pal::LayoutPresentWindowed | Pal::LayoutPresentFullscreen, + .engines = ((PalQueue(DefaultDeviceIndex)->GetEngineType() == Pal::EngineTypeCompute) ? + Pal::LayoutComputeEngine : + Pal::LayoutUniversalEngine) + }; + frameInfo.fullScreenFrameMetadataControlFlags.u32All = m_palFrameMetadataControl.flags.u32All; bool wasGpuWorkAdded = false; diff --git a/icd/api/vk_semaphore.cpp b/icd/api/vk_semaphore.cpp index 0a348297..6729cd30 100644 --- a/icd/api/vk_semaphore.cpp +++ b/icd/api/vk_semaphore.cpp @@ -242,6 +242,7 @@ VkResult Semaphore::Create( // Get external handle from the semaphore object. VkResult Semaphore::GetShareHandle( Device* device, + const void* pNext, VkExternalSemaphoreHandleTypeFlagBits handleType, Pal::OsExternalHandle* pHandle) { @@ -251,6 +252,7 @@ VkResult Semaphore::GetShareHandle( Pal::QueueSemaphoreExportInfo palExportInfo = {}; palExportInfo.flags.isReference = (handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT); + *pHandle = m_pPalSemaphores[0]->ExportExternalHandle(palExportInfo); #endif @@ -277,6 +279,7 @@ VkResult Semaphore::ImportSemaphore( PAL_ASSERT((handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) || (handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)); palOpenInfo.flags.isReference = (handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT); + #endif //Todo: Check whether pDevice is the same as the one created the semaphore. @@ -296,7 +299,7 @@ VkResult Semaphore::ImportSemaphore( if (pMemory) { Pal::IQueueSemaphore* pPalSemaphores[MaxPalDevices] = { nullptr }; -#if defined(__unix__) +#if PAL_AMDGPU_BUILD // According to the spec, If handleType is VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, the special value -1 // for fd is treated like a valid sync file descriptor referring to an object that has already signaled. @@ -526,6 +529,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetSemaphoreFdKHR( VkResult result = Semaphore::ObjectFromHandle(pGetFdInfo->semaphore)->GetShareHandle( ApiDevice::ObjectFromHandle(device), + pGetFdInfo->pNext, pGetFdInfo->handleType, &handle); diff --git a/icd/api/vk_swapchain.cpp b/icd/api/vk_swapchain.cpp index a9724851..51b6189e 100644 --- a/icd/api/vk_swapchain.cpp +++ b/icd/api/vk_swapchain.cpp @@ -145,7 +145,7 @@ VkResult SwapChain::Create( properties.imageCreateInfo.swizzledFormat = VkToPalFormat(pCreateInfo->imageFormat, settings); properties.imageCreateInfo.flags.stereo = properties.flags.stereo; properties.imageCreateInfo.flags.peerWritable = (pDevice->NumPalDevices() > 1) ? 1 : 0; -#if defined(__unix__) +#if PAL_AMDGPU_BUILD properties.imageCreateInfo.flags.initializeToZero = settings.initializeVramToZero; #endif @@ -1064,7 +1064,7 @@ Pal::IQueue* SwapChain::PrePresent( bool SwapChain::IsSuboptimal(uint32_t deviceIdx) { bool suboptimal = false; - VkSurfaceCapabilitiesKHR surfaceCapabilities = { }; + VkExtent2D currentExtent = { }; Pal::OsDisplayHandle displayHandle = 0; VkResult result = VK_SUCCESS; @@ -1074,21 +1074,24 @@ bool SwapChain::IsSuboptimal(uint32_t deviceIdx) if (m_pPalSwapChain->NeedWindowSizeChangedCheck()) { + VkSurfaceCapabilitiesKHR surfaceCapabilities = { }; + result = m_pDevice->VkPhysicalDevice(deviceIdx)->GetSurfaceCapabilities( Surface::HandleFromObject(m_properties.pSurface), displayHandle, &surfaceCapabilities); + currentExtent = surfaceCapabilities.currentExtent; + if (result == VK_SUCCESS) { // Magic width/height value meaning that the surface is resized to match the swapchain's extent. constexpr uint32_t SwapchainBasedSize = 0xFFFFFFFF; - if ((surfaceCapabilities.currentExtent.width != SwapchainBasedSize) || - (surfaceCapabilities.currentExtent.height != SwapchainBasedSize)) + if ((currentExtent.width != SwapchainBasedSize) || (currentExtent.height != SwapchainBasedSize)) { - suboptimal = ((surfaceCapabilities.currentExtent.width != m_properties.imageCreateInfo.extent.width) - || (surfaceCapabilities.currentExtent.height != m_properties.imageCreateInfo.extent.height)); + suboptimal = ((currentExtent.width != m_properties.imageCreateInfo.extent.width) + || (currentExtent.height != m_properties.imageCreateInfo.extent.height)); } } } @@ -1529,11 +1532,8 @@ void FullscreenMgr::UpdatePresentInfo( // Present mode does not matter in DXGI as it is completely OS handled. This is for our internal tracking only if (pSwapChain->IsDxgiEnabled()) { - // If KMD reported we're in Indpendent Flip and our window is fullscreen compatible, it is safe to assume - // that DXGI acquired FSE. - bool isFullscreen = (IsFullscreenOwnershipSafe() == Pal::Result::Success) && flipFlags.iFlip; - - pPresentInfo->presentMode = isFullscreen ? Pal::PresentMode::Fullscreen : Pal::PresentMode::Windowed; + // If KMD reported we're in Indpendent Flip we can assume that DXGI acquired FSE. + pPresentInfo->presentMode = flipFlags.iFlip ? Pal::PresentMode::Fullscreen : Pal::PresentMode::Windowed; } // Try to enter (or remain in) exclusive access mode on this swap chain's screen for this present else diff --git a/icd/res/ver.h b/icd/res/ver.h index e80e5777..bb34e42e 100644 --- a/icd/res/ver.h +++ b/icd/res/ver.h @@ -36,7 +36,7 @@ #define VERSION_MAJOR_STR MAKE_VERSION_STRING(VULKAN_ICD_MAJOR_VERSION) "\0" // Bump up after each promotion to mainline -#define VULKAN_ICD_BUILD_VERSION 291 +#define VULKAN_ICD_BUILD_VERSION 295 // String version is needed with leading zeros and extra termination (unicode) #define VERSION_NUMBER_MINOR VULKAN_ICD_BUILD_VERSION @@ -45,7 +45,7 @@ // These values specify the driver ID and driver info string #define VULKAN_DRIVER_ID VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR // "AMDOPEN" #define VULKAN_DRIVER_NAME_STR "AMD open-source driver" -#define VULKAN_DRIVER_INFO_STR "2023.Q4.2" +#define VULKAN_DRIVER_INFO_STR "2023.Q4.3" #define VULKAN_DRIVER_INFO_STR_LLPC "(LLPC)" // These values tell which version of the conformance test the driver is compliant against diff --git a/icd/settings/settings.cpp b/icd/settings/settings.cpp index cf7ce6a7..97b6c296 100644 --- a/icd/settings/settings.cpp +++ b/icd/settings/settings.cpp @@ -38,9 +38,11 @@ #include "palAssert.h" #include "palInlineFuncs.h" #include "palSysMemory.h" +#include "palPlatform.h" #include "devDriverServer.h" #include "protocols/ddSettingsService.h" +#include "dd_settings_service.h" #include "../layers/include/query_dlist.h" @@ -59,47 +61,22 @@ namespace vk // Constructor for the SettingsLoader object. VulkanSettingsLoader::VulkanSettingsLoader( Pal::IDevice* pDevice, - Pal::IPlatform* pPlatform, - uint32_t deviceId) + Pal::IPlatform* pPlatform) : - ISettingsLoader(pPlatform, static_cast(&m_settings), g_vulkanNumSettings), + DevDriver::SettingsBase(&m_settings, sizeof(m_settings)), m_pDevice(pDevice), m_pPlatform(pPlatform) { - Util::Snprintf(m_pComponentName, sizeof(m_pComponentName), "Vulkan%d", deviceId); - memset(&m_settings, 0, sizeof(RuntimeSettings)); } // ===================================================================================================================== VulkanSettingsLoader::~VulkanSettingsLoader() { - auto* pDevDriverServer = m_pPlatform->GetDevDriverServer(); - if (pDevDriverServer != nullptr) - { - auto* pSettingsService = pDevDriverServer->GetSettingsService(); - if (pSettingsService != nullptr) - { - pSettingsService->UnregisterComponent(m_pComponentName); - } - } } Result VulkanSettingsLoader::Init() { - Result ret = m_settingsInfoMap.Init(); - - if (ret == Result::Success) - { - // Init Settings Info HashMap - InitSettingsInfo(); - - // Setup default values for the settings - SetupDefaults(); - - m_state = Pal::SettingsLoaderState::EarlyInit; - } - - return ret; + return (SetupDefaultsAndPopulateMap() == DD_RESULT_SUCCESS) ? Result::Success : Result::ErrorUnknown; } // ===================================================================================================================== // Append sub path to root path to generate an absolute path. @@ -205,6 +182,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( ForceDccFor3DShaderStorage | ForceDccFor32BppShaderStorage | ForceDccFor64BppShaderStorage); + m_settings.optImgMaskToApplyShaderReadUsageForTransferSrc |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; #if VKI_RAY_TRACING @@ -520,6 +498,73 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( if (appProfile == AppProfile::CSGO) { + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + { + + if (pInfo->revision == Pal::AsicRevision::Navi21) + { + m_settings.csWaveSize = 32; + m_settings.fsWaveSize = 32; + + m_settings.mallNoAllocCtPolicy = MallNoAllocCtPolicy::MallNoAllocCtAsSnsr; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrPolicy::MallNoAllocSsrAsSnsr; + } + + if (pInfo->revision == Pal::AsicRevision::Navi22) + { + m_settings.mallNoAllocDsPolicy = MallNoAllocDsPolicy::MallNoAllocDsAsSnsr; + m_settings.mallNoAllocCtPolicy = MallNoAllocCtPolicy::MallNoAllocCtAsSnsr; + } + + if (pInfo->revision == Pal::AsicRevision::Navi23) + { + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrPolicy::MallNoAllocCtSsrAsSnsr; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrPolicy::MallNoAllocSsrAsSnsr; + } + + if (pInfo->revision == Pal::AsicRevision::Navi24) + { + m_settings.csWaveSize = 64; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrPolicy::MallNoAllocSsrAsSnsr; + } + } + +#if VKI_BUILD_GFX11 + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + { + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrPolicy::MallNoAllocSsrAsSnsr; + m_settings.ac01WaNotNeeded = true; + + if (pInfo->gpuType == Pal::GpuType::Discrete) + { + m_settings.rpmViewsBypassMall = RpmViewBypassMall::RpmViewBypassMallOnCbDbWrite | + RpmViewBypassMall::RpmViewBypassMallOnRead; + } + +#if VKI_BUILD_NAVI31 + if (pInfo->revision == Pal::AsicRevision::Navi31) + { + m_settings.mallNoAllocCtPolicy = MallNoAllocCtPolicy::MallNoAllocCtAsSnsr; + } +#endif + +#if VKI_BUILD_NAVI32 + if (pInfo->revision == Pal::AsicRevision::Navi32) + { + m_settings.mallNoAllocCtPolicy = MallNoAllocCtPolicy::MallNoAllocCtAsSnsr; + } +#endif + +#if VKI_BUILD_NAVI33 + if (pInfo->revision == Pal::AsicRevision::Navi33) + { + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrPolicy::MallNoAllocCtSsrAsSnsr; + } +#endif + } +#endif + + m_settings.enableUberFetchShader = true; } if (appProfile == AppProfile::Source2Engine) @@ -816,6 +861,8 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( } m_settings.ac01WaNotNeeded = true; + + m_settings.disable3dLinearImageFormatSupport = false; } if (appProfile == AppProfile::GhostReconBreakpoint) @@ -855,12 +902,11 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; } - - if (pInfo->revision == Pal::AsicRevision::Navi22) + else if (pInfo->revision == Pal::AsicRevision::Navi22) { m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | - ForceDccFor3DShaderStorage | - ForceDccForColorAttachments); + ForceDccFor3DShaderStorage | + ForceDccForColorAttachments); m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; @@ -875,6 +921,8 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( { m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; + + m_settings.memoryDeviceOverallocationAllowed = true; } } @@ -1054,7 +1102,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( if (appProfile == AppProfile::IdTechLauncher) { - m_settings.enableOnDiskInternalPipelineCaches = false; + m_settings.enableInternalPipelineCachingToDisk = false; } if (appProfile == AppProfile::SaschaWillemsExamples) @@ -1293,6 +1341,11 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( #endif } + if (appProfile == AppProfile::Vkd3dEngine) + { + m_settings.exportNvComputeShaderDerivatives = true; + } + pAllocCb->pfnFree(pAllocCb->pUserData, pInfo); } @@ -1339,11 +1392,11 @@ VkResult VulkanSettingsLoader::ProcessSettings( VkResult result = VkResult::VK_SUCCESS; // The following lines to load profile settings have been copied from g_settings.cpp - static_cast(m_pDevice)->ReadSetting(pForceAppProfileEnableStr, + static_cast(m_pDevice)->ReadSetting(pForceAppProfileEnableHashStr, Pal::SettingScope::Driver, Util::ValueType::Boolean, &m_settings.forceAppProfileEnable); - static_cast(m_pDevice)->ReadSetting(pForceAppProfileValueStr, + static_cast(m_pDevice)->ReadSetting(pForceAppProfileValueHashStr, Pal::SettingScope::Driver, Util::ValueType::Uint, &m_settings.forceAppProfileValue); @@ -1383,17 +1436,14 @@ VkResult VulkanSettingsLoader::ProcessSettings( m_settings.pipelineLayoutMode = PipelineLayoutMode::PipelineLayoutAngle; } - if (m_settings.ac01WaNotNeeded) - { - Pal::PalPublicSettings* pPalSettings = m_pDevice->GetPublicSettings(); - pPalSettings->ac01WaNotNeeded = true; - } - DumpAppProfileChanges(*pAppProfile); - // Register with the DevDriver settings service - DevDriverRegister(); - m_state = Pal::SettingsLoaderState::LateInit; + auto pSettingsRpcService = m_pPlatform->GetSettingsRpcService(); + + if (pSettingsRpcService != nullptr) + { + pSettingsRpcService->RegisterSettingsComponent(this); + } } return result; @@ -1412,7 +1462,7 @@ void VulkanSettingsLoader::ReadPublicSettings() &appGpuID, sizeof(appGpuID))) { - m_settings.appGpuID = appGpuID; + m_settings.appGpuId = appGpuID; } // Read TFQ global key @@ -1493,12 +1543,12 @@ void VulkanSettingsLoader::ValidateSettings() buildMode = BvhBuildModePLOC; } - m_settings.bvhBuildModeOverrideBLAS = buildMode; - m_settings.bvhBuildModeOverrideTLAS = buildMode; + m_settings.bvhBuildModeOverrideBlas = buildMode; + m_settings.bvhBuildModeOverrideTlas = buildMode; } // Compression is not compatible with collapse or triangle splitting. - if (m_settings.rtEnableBVHCollapse || m_settings.rtEnableTriangleSplitting) + if (m_settings.rtEnableBvhCollapse || m_settings.rtEnableTriangleSplitting) { m_settings.rtTriangleCompressionMode = NoTriangleCompression; } @@ -1576,6 +1626,8 @@ void VulkanSettingsLoader::UpdatePalSettings() // The color cache fetch size is limited to 256Bytes MAX regardless of other register settings. pPalSettings->limitCbFetch256B = m_settings.limitCbFetch256B; + pPalSettings->rpmViewsBypassMall = static_cast(m_settings.rpmViewsBypassMall); + // Controls PWS enable mode: disabled, fully enabled or partially enabled. Only takes effect if HW supports PWS and // Acq-rel barriers if (m_settings.useAcquireReleaseInterface) @@ -1588,6 +1640,11 @@ void VulkanSettingsLoader::UpdatePalSettings() pPalSettings->pwsMode = static_cast(m_settings.forcePwsMode); } + if (m_settings.ac01WaNotNeeded) + { + pPalSettings->ac01WaNotNeeded = true; + } + } // ===================================================================================================================== @@ -1598,17 +1655,17 @@ void VulkanSettingsLoader::UpdatePalSettings() void VulkanSettingsLoader::GenerateSettingHash() { // Temporarily ignore these CCC settings when computing a settings hash as described in the function header. - uint32 appGpuID = m_settings.appGpuID; - m_settings.appGpuID = 0; + uint32 appGpuID = m_settings.appGpuId; + m_settings.appGpuId = 0; TextureFilterOptimizationSettings vulkanTexFilterQuality = m_settings.vulkanTexFilterQuality; m_settings.vulkanTexFilterQuality = TextureFilterOptimizationsDisabled; MetroHash128::Hash( reinterpret_cast(&m_settings), sizeof(RuntimeSettings), - m_settingHash.bytes); + m_settingsHash.bytes); - m_settings.appGpuID = appGpuID; + m_settings.appGpuId = appGpuID; m_settings.vulkanTexFilterQuality = vulkanTexFilterQuality; } @@ -1624,9 +1681,22 @@ void VulkanSettingsLoader::FinalizeSettings( m_settings.enableFmaskBasedMsaaRead = false; } - m_state = Pal::SettingsLoaderState::Final; - GenerateSettingHash(); } +// ===================================================================================================================== +bool VulkanSettingsLoader::ReadSetting( + const char* pSettingName, + Util::ValueType valueType, + void* pValue, + size_t bufferSize) +{ + return m_pDevice->ReadSetting( + pSettingName, + Pal::SettingScope::Driver, + valueType, + pValue, + bufferSize); +} + }; diff --git a/icd/settings/settings.h b/icd/settings/settings.h index 6411ed9c..8c03b514 100644 --- a/icd/settings/settings.h +++ b/icd/settings/settings.h @@ -34,7 +34,8 @@ #ifndef __SETTINGS_SETTINGS_H__ #define __SETTINGS_SETTINGS_H__ -#include "palSettingsLoader.h" +#include "palMetroHash.h" +#include // g_settings.h is generated in the same dir on Linux and Windows. // However, if g_settings.h is generated out of source tree, @@ -54,13 +55,13 @@ namespace vk // ===================================================================================================================== // This class is responsible for loading and processing the Vulkan runtime settings structure encapsulated in the Vulkan // Settings Loader object. -class VulkanSettingsLoader : public Pal::ISettingsLoader +class VulkanSettingsLoader : public DevDriver::SettingsBase { public: - explicit VulkanSettingsLoader(Pal::IDevice* pDevice, Pal::IPlatform* pPlatform, uint32_t deviceId); + explicit VulkanSettingsLoader(Pal::IDevice* pDevice, Pal::IPlatform* pPlatform); virtual ~VulkanSettingsLoader(); - virtual Util::Result Init() override; + Pal::Result Init(); VkResult ProcessSettings( const VkAllocationCallbacks* pAllocCb, @@ -74,9 +75,17 @@ class VulkanSettingsLoader : public Pal::ISettingsLoader void FinalizeSettings( const DeviceExtensions::Enabled& enabledExtensions); + Util::MetroHash::Hash GetSettingsHash() const { return m_settingsHash; } + const RuntimeSettings& GetSettings() const { return m_settings; }; RuntimeSettings* GetSettingsPtr() { return &m_settings; } + // auto-generated functions + virtual const char* GetComponentName() const override; + virtual DD_RESULT SetupDefaultsAndPopulateMap() override; + virtual void ReadSettings() override; + virtual uint64_t GetSettingsBlobHash() const override; + private: PAL_DISALLOW_COPY_AND_ASSIGN(VulkanSettingsLoader); PAL_DISALLOW_DEFAULT_CTOR(VulkanSettingsLoader); @@ -84,6 +93,12 @@ class VulkanSettingsLoader : public Pal::ISettingsLoader // Generate the settings hash void GenerateSettingHash(); + bool ReadSetting( + const char* pSettingName, + Util::ValueType valueType, + void* pValue, + size_t bufferSize = 0); + VkResult OverrideProfiledSettings( const VkAllocationCallbacks* pAllocCb, uint32_t appVersion, @@ -96,17 +111,10 @@ class VulkanSettingsLoader : public Pal::ISettingsLoader void ReadPublicSettings(); - Pal::IDevice* m_pDevice; - Pal::IPlatform* m_pPlatform; - RuntimeSettings m_settings; - - // auto-generated functions - virtual void SetupDefaults() override; - virtual void ReadSettings() override; - virtual void InitSettingsInfo() override; - virtual void DevDriverRegister() override; - - char m_pComponentName[10]; + Pal::IDevice* m_pDevice; + Pal::IPlatform* m_pPlatform; + RuntimeSettings m_settings; + Util::MetroHash::Hash m_settingsHash; }; } //vk diff --git a/icd/settings/settings_xgl.json b/icd/settings/settings_xgl.json index e8e20a73..8dbd6b95 100644 --- a/icd/settings/settings_xgl.json +++ b/icd/settings/settings_xgl.json @@ -1,8 +1,5 @@ { "ComponentName": "Vulkan", - "DriverState": [ - "HaltedOnDeviceInit" - ], "Tags": [ "Render Passes", "Command Buffer Options", @@ -467,7 +464,9 @@ "Value": 3 } ], - "Name": "Pal::GpuHeap" + "Name": "Pal::GpuHeap", + "IsEnum": true, + "SkipGen": true }, "Description": "The primary heap for the internal CmdAllocator for command data.", "Tags": [ @@ -525,7 +524,9 @@ "Value": 3 } ], - "Name": "Pal::GpuHeap" + "Name": "Pal::GpuHeap", + "IsEnum": true, + "SkipGen": true }, "Description": "The primary heap for the internal CmdAllocator for embedded data.", "Tags": [ @@ -626,7 +627,9 @@ "Value": 3 } ], - "Name": "Pal::GpuHeap" + "Name": "Pal::GpuHeap", + "IsEnum": true, + "SkipGen": true }, "Scope": "Driver", "Type": "enum" @@ -926,12 +929,11 @@ }, "Defaults": { "Default": "vkDump/appShaderProfile", - "WinDefault": "vkDump\\appShaderProfile.json", - "LnxDefault": "vkDump/appShaderProfile.json" + "Windows": "vkDump\\appShaderProfile.json", + "Linux": "vkDump/appShaderProfile.json" }, "Name": "PipelineProfileDumpFile", "Type": "string", - "Size": 260, "Scope": "Driver" }, { @@ -941,14 +943,13 @@ "Pipeline Options" ], "Flags": { - "IsPath": true + "IsDir": true }, "Defaults": { "Default": "" }, "Scope": "Driver", - "Type": "string", - "Size": 512 + "Type": "string" }, { "Name": "PipelineProfileDbgPrintProfileMatch", @@ -1071,8 +1072,8 @@ "Scope": "Driver" }, { - "Name": "FullscreenFrameMetadataSupport", - "Description": "Support Fullscreen Frame Metadata.", + "Name": "FullScreenFrameMetadataSupport", + "Description": "Support FullScreen Frame Metadata.", "Tags": [ "Present" ], @@ -1080,8 +1081,7 @@ "Default": true }, "Scope": "Driver", - "Type": "bool", - "VariableName": "fullScreenFrameMetadataSupport" + "Type": "bool" }, { "Name": "UseSdmaCompositingBlt", @@ -1134,11 +1134,6 @@ { "Name": "DxgiPresent", "Description": "Enable Presentation via DXGI", - "DependsOn": { - "OS": [ - "Windows" - ] - }, "BuildTypes": [ "defined(_WIN32)" ], @@ -1175,11 +1170,6 @@ { "Name": "DxgiSettings", "Description": "Bitmask of various DXGI presentation settings.", - "DependsOn": { - "OS": [ - "Windows" - ] - }, "BuildTypes": [ "defined(_WIN32)" ], @@ -1271,18 +1261,6 @@ "Scope": "Driver", "Type": "bool" }, - { - "Name": "EnableRobustUberFetchShader", - "Description": "Enable robust access in uber fetch shder.", - "Tags": [ - "SPIRV Options" - ], - "Defaults": { - "Default": true - }, - "Scope": "Driver", - "Type": "bool" - }, { "Name": "EnableEarlyCompile", "Description": "Enable pipeline early compile.", @@ -1400,17 +1378,16 @@ "SPIRV Options" ], "Flags": { - "IsPath": true + "IsDir": true }, "Defaults": { "Default": "spvPipeline", - "WinDefault": "SpvPipeline", - "LnxDefault": "spvPipeline" + "Windows": "SpvPipeline", + "Linux": "spvPipeline" }, "Scope": "Driver", "Name": "PipelineDumpDir", - "Type": "string", - "Size": 256 + "Type": "string" }, { "Description": "Append Executable Name in PipelineDumpDir", @@ -1473,8 +1450,7 @@ }, "Scope": "Driver", "Name": "LogFileName", - "Type": "string", - "Size": 256 + "Type": "string" }, { "Description": "Enable output diagnostic info. This info may added to AMD IL or external debug log files.", @@ -1501,8 +1477,7 @@ }, "Scope": "Driver", "Name": "DebugLogFileName", - "Type": "string", - "Size": 256 + "Type": "string" }, { "ValidValues": { @@ -1558,17 +1533,16 @@ "SPIRV Options" ], "Flags": { - "IsPath": true + "IsDir": true }, "Defaults": { "Default": "ShaderReplace", - "WinDefault": "ShaderReplace", - "LnxDefault": "ShaderReplace" + "Windows": "ShaderReplace", + "Linux": "ShaderReplace" }, "Scope": "Driver", "Name": "ShaderReplaceDir", - "Type": "string", - "Size": 256 + "Type": "string" }, { "Description": "Only valid if shaderReplaceMode is set to 2 and 4. This is a comma separated pipeline hash list. Hash number is in big case, example hash list looks like: 0xAD033E031BF7CB6C,0x1B707F37B7DA34E3", @@ -1580,8 +1554,7 @@ }, "Scope": "Driver", "Name": "ShaderReplacePipelineHashes", - "Type": "string", - "Size": 512 + "Type": "string" }, { "Description": "Drop specified instruction in pipeline binary for quick debugging shader. It replaces all matching instruction opcodes with NOPs and needs to work with DropPipelineBinaryInstToken and DropPipelineBinaryInstSize together.", @@ -2107,16 +2080,6 @@ "Defaults": { "Default": 0 }, - "DependsOn": { - "Settings": [ - { - "Values": [ - true - ], - "Name": "EnableDropPipelineBinaryInst" - } - ] - }, "Scope": "Driver", "Type": "uint32", "Name": "DropPipelineBinaryInstToken" @@ -2129,16 +2092,6 @@ "Defaults": { "Default": 1 }, - "DependsOn": { - "Settings": [ - { - "Values": [ - true - ], - "Name": "EnableDropPipelineBinaryInst" - } - ] - }, "Scope": "Driver", "Type": "uint32", "Name": "DropPipelineBinaryInstSize" @@ -2168,7 +2121,7 @@ "Name": "EnableSpvValidation" }, { - "Description": "GFX11 plus-specific tessellation factor optimization.", + "Description": "Tessellation factor optimization.", "Tags": [ "SPIRV Options" ], @@ -2177,10 +2130,7 @@ }, "Scope": "Driver", "Type": "bool", - "Name": "OptimizeTessFactor", - "BuildTypes": [ - "VKI_BUILD_GFX11" - ] + "Name": "OptimizeTessFactor" }, { "Description": "[BIL ONLY] Zero initialize AMD IL registers. ", @@ -2192,8 +2142,7 @@ }, "Scope": "Driver", "Type": "bool", - "VariableName": "zeroInitIlRegs", - "Name": "ZeroInitILRegs" + "Name": "ZeroInitIlRegs" }, { "Description": "[BIL ONLY] Skip unsupported SPIR-V instructions. ", @@ -2269,8 +2218,7 @@ }, "Scope": "Driver", "Name": "LlpcOptions", - "Type": "string", - "Size": 256 + "Type": "string" }, { "ValidValues": { @@ -2349,7 +2297,6 @@ "AndroidDefault": false }, "Type": "bool", - "VariableName": "enableOnDiskInternalPipelineCaches", "Scope": "Driver" }, { @@ -2386,8 +2333,7 @@ "Default": "AMD_VK_USE_PIPELINE_CACHE" }, "Scope": "Driver", - "Type": "string", - "Size": 256 + "Type": "string" }, { "Name": "UsePipelineCachingDefaultLocation", @@ -2409,12 +2355,11 @@ ], "Defaults": { "Default": "/AMD/VkCache/", - "WinDefault": "\\AMD\\VkCache\\", - "LnxDefault": "/AMD/VkCache/" + "Windows": "\\AMD\\VkCache\\", + "Linux": "/AMD/VkCache/" }, "Scope": "Driver", - "Type": "string", - "Size": 256 + "Type": "string" }, { "Name": "PipelineCacheDefaultLocationLimitation", @@ -2439,8 +2384,8 @@ ], "Defaults": { "Default": false, - "WinDefault": false, - "LnxDefault": true + "Windows": false, + "Linux": true }, "Type": "bool", "Scope": "Driver" @@ -2704,8 +2649,7 @@ "Name": "RtInternalPipelineSpvPassMask" }, { - "Name": "TraceRayCounterMode", - "VariableName": "rtTraceRayCounterMode", + "Name": "RtTraceRayCounterMode", "Type": "enum", "Description": "Enable ray tracing counters. Written to the directory specified by RayTracingCapturePath. Press the RayTracingCaptureHotKey to dump when enabled.", "Scope": "Driver", @@ -2756,7 +2700,7 @@ } }, { - "Name": "RaytracingThreadGroupSizeX", + "Name": "RtThreadGroupSizeX", "Description": "Thread group size in x-dimension for ray tracing. This value is ignored if RaytracingFlattenThreadGroupSize is not 0.", "Tags": [ "Ray Tracing" @@ -2768,11 +2712,10 @@ "Default": 8 }, "Type": "uint32", - "Scope": "Driver", - "VariableName": "rtThreadGroupSizeX" + "Scope": "Driver" }, { - "Name": "RaytracingThreadGroupSizeY", + "Name": "RtThreadGroupSizeY", "Description": "Thread group size in y-dimension for ray tracing. This value is ignored if RaytracingFlattenThreadGroupSize is not 0.", "Tags": [ "Ray Tracing" @@ -2784,11 +2727,10 @@ "Default": 4 }, "Type": "uint32", - "Scope": "Driver", - "VariableName": "rtThreadGroupSizeY" + "Scope": "Driver" }, { - "Name": "RaytracingThreadGroupSizeZ", + "Name": "RtThreadGroupSizeZ", "Description": "Thread group size in z-dimension for ray tracing. This value is ignored if RaytracingFlattenThreadGroupSize is not 0.", "Tags": [ "Ray Tracing" @@ -2800,11 +2742,10 @@ "Default": 1 }, "Type": "uint32", - "Scope": "Driver", - "VariableName": "rtThreadGroupSizeZ" + "Scope": "Driver" }, { - "Name": "RaytracingFlattenThreadGroupSize", + "Name": "RtFlattenThreadGroupSize", "Description": "Ray tracing ThreadGroup Size in mode which flatten width and height. 0 indicates this mode is off. Otherwise, it is on and indicates the size in x-dimension of thread group size. If the mode is on, RaytracingThreadGroupSizeX/Y/Z are ignored.", "Tags": [ "Ray Tracing" @@ -2816,11 +2757,10 @@ "Default": 0 }, "Type": "uint32", - "Scope": "Driver", - "VariableName": "rtFlattenThreadGroupSize" + "Scope": "Driver" }, { - "Name": "TraceRayProfileMaxIteration", + "Name": "RtProfileMaxIteration", "Description": "Maximum trace ray loop iteration count when TraceRayProfileForceMaxIteration is set.", "Tags": [ "Ray Tracing" @@ -2832,11 +2772,10 @@ "Default": 4294967295 }, "Type": "uint32", - "Scope": "Driver", - "VariableName": "rtProfileMaxIteration" + "Scope": "Driver" }, { - "Name": "TraceRayProfileFlags", + "Name": "RtTraceRayProfileFlags", "Description": "Trace ray profile flags", "Tags": [ "Ray Tracing" @@ -2886,8 +2825,7 @@ "Default": "TraceRayProfileDisable" }, "Type": "enum", - "Scope": "Driver", - "VariableName": "rtTraceRayProfileFlags" + "Scope": "Driver" }, { "Name": "RtMaxRayRecursionDepth", @@ -2905,7 +2843,7 @@ "Scope": "Driver" }, { - "Name": "IndirectStageMask", + "Name": "RtIndirectStageMask", "Description": "Indicate which stages should use indirect call for recursive ray-tracing pipeline or inlining is disabled. This flag is a hint to compiler, final indirect call stages may different with initial setting.", "Tags": [ "Ray Tracing" @@ -2917,8 +2855,7 @@ "Default": "0xffffffff" }, "Type": "uint32", - "Scope": "Driver", - "VariableName": "rtIndirectStageMask" + "Scope": "Driver" }, { "Name": "IndirectCallTargetOccupancyPerSimd", @@ -2951,37 +2888,19 @@ "Scope": "Driver" }, { - "Name": "EnableHwIntersectRay", - "Description": "Enables use of the BVH intersect ray instructions if they are supported by the HW.", - "Tags": [ - "Ray Tracing" - ], - "BuildTypes": [ - "VKI_RAY_TRACING" - ], - "Defaults": { - "Default": true - }, - "Type": "bool", - "Scope": "Driver", - "VariableName": "rtEnableHwIntersectRay" - }, - { - "Name": "EnableNodePointerFlags", + "Name": "RtEnableNodePointerFlags", "Description": "Encode flags into pointer bits", "Tags": [ "Ray Tracing" ], "BuildTypes": [ - "VKI_RAY_TRACING", - "VKI_BUILD_GFX11" + "VKI_RAY_TRACING" ], "Defaults": { "Default": true }, "Type": "bool", - "Scope": "Driver", - "VariableName": "rtEnableNodePointerFlags" + "Scope": "Driver" }, { "Description": "Box sorting heuristic", @@ -3040,7 +2959,7 @@ "Scope": "Driver" }, { - "Name": "EnableBVHCollapse", + "Name": "RtEnableBVHCollapse", "Description": "Enable BVH Collapse", "Tags": [ "Ray Tracing" @@ -3052,8 +2971,7 @@ "Default": false }, "Type": "bool", - "Scope": "Driver", - "VariableName": "rtEnableBVHCollapse" + "Scope": "Driver" }, { "Description": "Mode for which interior box nodes in BLAS use fp16 bounding boxes", @@ -3093,8 +3011,7 @@ ] }, "Type": "enum", - "VariableName": "rtFp16BoxNodesInBlasMode", - "Name": "Fp16BoxNodesInBlasMode", + "Name": "RtFp16BoxNodesInBlasMode", "Scope": "Driver" }, { @@ -3113,7 +3030,7 @@ "Scope": "Driver" }, { - "Name": "AllowFp16BoxNodesInUpdatableBVH", + "Name": "RtAllowFp16BoxNodesInUpdatableBVH", "Description": "Allow fp16BoxNodesInBlasMode to take effect in updatable BVHs", "Tags": [ "Ray Tracing" @@ -3125,8 +3042,7 @@ "Default": false }, "Type": "bool", - "Scope": "Driver", - "VariableName": "rtAllowFp16BoxNodesInUpdatableBVH" + "Scope": "Driver" }, { "Description": "Surface area (sa) threshold used by Fp16BoxNodesInBlasMode mode DxcFp16BoxNodesInBlasModeMixed. An interior BLAS node is converted from fp32 to fp16 when (saAsFp16 < (threshold * saAsFp32)). A value of 0 means use the default 1.5. Value is clamped from 1.0f - 8.0f", @@ -3140,8 +3056,7 @@ "Default": 0.0 }, "Type": "float", - "VariableName": "rtFp16BoxNodesInBlasModeMixedThreshold", - "Name": "Fp16BoxNodesInBlasModeMixedThreshold", + "Name": "RtFp16BoxNodesInBlasModeMixedThreshold", "Scope": "Driver" }, { @@ -3156,8 +3071,7 @@ "Default": true }, "Type": "bool", - "VariableName": "rtEnableTopDownBuild", - "Name": "EnableTopDownBuild", + "Name": "RtEnableTopDownBuild", "Scope": "Driver" }, { @@ -3193,8 +3107,7 @@ ] }, "Type": "enum", - "VariableName": "rtEnableTreeRebraid", - "Name": "EnableTreeRebraid", + "Name": "RtEnableTreeRebraid", "Scope": "Driver" }, { @@ -3224,8 +3137,7 @@ "Default": false }, "Type": "bool", - "VariableName": "rtEnableTriangleSplitting", - "Name": "EnableTriangleSplitting", + "Name": "RtEnableTriangleSplitting", "Scope": "Driver" }, { @@ -3237,15 +3149,14 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": "1.3f" + "Default": 1.3 }, "Type": "float", - "VariableName": "rtTriangleSplittingFactor", - "Name": "TriangleSplittingFactor", + "Name": "RtTriangleSplittingFactor", "Scope": "Driver" }, { - "Name": "EnableMortonCode30", + "Name": "RtEnableMortonCode30", "Description": "Enable Morton Code 30 bits", "Tags": [ "Ray Tracing" @@ -3257,8 +3168,7 @@ "Default": true }, "Type": "bool", - "Scope": "Driver", - "VariableName": "rtEnableMortonCode30" + "Scope": "Driver" }, { "Description": "Enable Variable Bits Morton Codes - Selects the largest axis per bit rather than rotating xyzxyz", @@ -3276,7 +3186,7 @@ "Scope": "Driver" }, { - "Name": "EnablePrefixScanDLB", + "Name": "RtEnablePrefixScanDLB", "Description": "Enable Prefix Scan with Decoupled Look-back", "Tags": [ "Ray Tracing" @@ -3288,11 +3198,10 @@ "Default": false }, "Type": "bool", - "Scope": "Driver", - "VariableName": "rtEnablePrefixScanDLB" + "Scope": "Driver" }, { - "Name": "TriangleCompressionAutoMode", + "Name": "RtTriangleCompressionAutoMode", "Description": "Set which BVH build flags are required to enable triangle compression when TriangleCompressionMode is Auto.", "Tags": [ "Ray Tracing" @@ -3355,7 +3264,6 @@ ] }, "Type": "enum", - "VariableName": "rtTriangleCompressionAutoMode", "Scope": "Driver" }, { @@ -3572,7 +3480,7 @@ "Scope": "Driver" }, { - "Name": "TriangleCompressionMode", + "Name": "RtTriangleCompressionMode", "Description": "Modifies triangle compression algorithm if AccelerationStructureType is set to the HW-supported format DxcAccelTypeBVH4.", "Tags": [ "Ray Tracing" @@ -3605,8 +3513,7 @@ ] }, "Type": "enum", - "Scope": "Driver", - "VariableName": "rtTriangleCompressionMode" + "Scope": "Driver" }, { "Description": "Use a cost calculation when batching triangles for pair compression.", @@ -3656,8 +3563,7 @@ ] }, "Type": "enum", - "VariableName": "rtBvhBuildModeOverride", - "Name": "BvhBuildModeOverride", + "Name": "RtBvhBuildModeOverride", "Scope": "Driver" }, { @@ -3697,7 +3603,7 @@ "Scope": "Driver" }, { - "Name": "BvhBuildModeDefault", + "Name": "RtBvhBuildModeDefault", "Description": "BVH build mode for default acceleration structure builds", "Tags": [ "Ray Tracing" @@ -3712,11 +3618,10 @@ "Name": "BvhBuildMode" }, "Type": "enum", - "Scope": "Driver", - "VariableName": "rtBvhBuildModeDefault" + "Scope": "Driver" }, { - "Name": "BvhBuildModeFastTrace", + "Name": "RtBvhBuildModeFastTrace", "Description": "BVH build mode for acceleration structure builds for fast trace", "Tags": [ "Ray Tracing" @@ -3731,11 +3636,10 @@ "Name": "BvhBuildMode" }, "Type": "enum", - "Scope": "Driver", - "VariableName": "rtBvhBuildModeFastTrace" + "Scope": "Driver" }, { - "Name": "BvhBuildModeFastBuild", + "Name": "RtBvhBuildModeFastBuild", "Description": "BVH build mode for fast acceleration structure builds", "Tags": [ "Ray Tracing" @@ -3750,8 +3654,7 @@ "Name": "BvhBuildMode" }, "Type": "enum", - "Scope": "Driver", - "VariableName": "rtBvhBuildModeFastBuild" + "Scope": "Driver" }, { "Description": "Disable validating image SRD type in shader code. Only in ray tracing shader code. Image resource type checking is neccessary depending on the HW.", @@ -3891,7 +3794,7 @@ "Scope": "Driver" }, { - "Name": "BvhCpuBuildMode", + "Name": "RtBvhCpuBuildMode", "Description": "BVH build mode for default acceleration structure host builds", "Tags": [ "Ray Tracing" @@ -3919,11 +3822,10 @@ ] }, "Type": "enum", - "Scope": "Driver", - "VariableName": "rtBvhCpuBuildMode" + "Scope": "Driver" }, { - "Name": "EnableUpdateParallel", + "Name": "RtEnableUpdateParallel", "Description": "Enable update parallel optimization during acceleration structure builds", "Tags": [ "Ray Tracing" @@ -3935,8 +3837,7 @@ "Default": true }, "Type": "bool", - "Scope": "Driver", - "VariableName": "rtEnableUpdateParallel" + "Scope": "Driver" }, { "Description": "Enable parallel BVH build (no barriers)", @@ -3950,8 +3851,7 @@ "Default": true }, "Type": "bool", - "VariableName": "rtEnableBuildParallel", - "Name": "EnableBuildParallel", + "Name": "RtEnableBuildParallel", "Scope": "Driver" }, { @@ -3970,7 +3870,7 @@ "Scope": "Driver" }, { - "Name": "EnableAcquireReleaseInterface", + "Name": "RtEnableAcquireReleaseInterface", "Description": "Enable Acquire/release-based barrier interface if PAL reports the ASIC supports it.", "Tags": [ "Ray Tracing" @@ -3982,8 +3882,7 @@ "Default": false }, "Type": "bool", - "Scope": "Driver", - "VariableName": "rtEnableAcquireReleaseInterface" + "Scope": "Driver" }, { "Name": "EnableFusedInstanceNode", @@ -4013,7 +3912,7 @@ } }, { - "Name": "EnableAccelerationStructureScratchMemoryDump", + "Name": "RtEnableAccelerationStructureScratchMemoryDump", "Description": "Dumps scratch memory from acceleration structures. Written to the directory specified by BaseLogDirPath.", "Tags": [ "Ray Tracing" @@ -4025,11 +3924,10 @@ "Default": false }, "Type": "bool", - "Scope": "Driver", - "VariableName": "rtEnableAccelerationStructureScratchMemoryDump" + "Scope": "Driver" }, { - "Name": "EnableBuildAccelStructStats", + "Name": "RtEnableBuildAccelStructStats", "Description": "Dump built acceleration stats. (Pending implementation)", "Tags": [ "Ray Tracing" @@ -4041,8 +3939,7 @@ "Default": false }, "Type": "bool", - "Scope": "Driver", - "VariableName": "rtEnableBuildAccelStructStats" + "Scope": "Driver" }, { "Name": "IndirectCallConvention", @@ -4196,7 +4093,7 @@ "Scope": "Driver" }, { - "Name": "EnableCompilePipelineLibrary", + "Name": "RtEnableCompilePipelineLibrary", "Description": "Compile pipeline library as a ShaderLibrary.", "Tags": [ "Ray Tracing" @@ -4208,11 +4105,10 @@ "Default": true }, "Type": "bool", - "Scope": "Driver", - "VariableName": "rtEnableCompilePipelineLibrary" + "Scope": "Driver" }, { - "Name": "EnableRayTracingAccelerationStructureIndirectBuild", + "Name": "RtEnableAccelStructIndirectBuild", "Description": "Enable ray tracing acceleration structure indirect build", "Tags": [ "Ray Tracing" @@ -4224,11 +4120,10 @@ "Default": true }, "Type": "bool", - "Scope": "Driver", - "VariableName": "rtEnableAccelStructIndirectBuild" + "Scope": "Driver" }, { - "Name": "EnableRayQueryCsSwizzle", + "Name": "RtEnableRayQueryCsSwizzle", "Description": "Determines if the driver performs swizzling logic on the thread + group indices for compute shaders that use the ray query feature.", "Tags": [ "Ray Tracing" @@ -4240,8 +4135,7 @@ "Default": false }, "Type": "bool", - "Scope": "Driver", - "VariableName": "rtEnableRayQueryCsSwizzle" + "Scope": "Driver" }, { "Description": "Swizzling mode on the thread + group indices for compute shaders that use the ray query feature.", @@ -4258,12 +4152,11 @@ "Name": "ThreadGroupSwizzleMode" }, "Type": "enum", - "VariableName": "rayQueryCsSwizzle", "Name": "RayQueryCsSwizzle", "Scope": "Driver" }, { - "Name": "EnableDispatchRaysOuterSwizzle", + "Name": "RtEnableDispatchRaysOuterSwizzle", "Description": "Determines if the driver performs swizzling logic on the thread group indices used for ray tracing shaders. When this is false, a standard row major pattern is used.", "Tags": [ "Ray Tracing" @@ -4275,11 +4168,10 @@ "Default": true }, "Type": "bool", - "Scope": "Driver", - "VariableName": "rtEnableDispatchRaysOuterSwizzle" + "Scope": "Driver" }, { - "Name": "EnableDispatchRaysInnerSwizzle", + "Name": "RtEnableDispatchRaysInnerSwizzle", "Description": "Determines if the driver performs swizzling logic on the thread indices inside ray tracing thread groups. When this is false, a standard row major pattern is used.", "Tags": [ "Ray Tracing" @@ -4291,11 +4183,10 @@ "Default": true }, "Type": "bool", - "Scope": "Driver", - "VariableName": "rtEnableDispatchRaysInnerSwizzle" + "Scope": "Driver" }, { - "Name": "OuterTileSize", + "Name": "RtOuterTileSize", "Description": "The size of outer tile for ray tracing shader thread swizzling.", "Tags": [ "Ray Tracing" @@ -4307,8 +4198,7 @@ "Default": 4 }, "Type": "uint32", - "Scope": "Driver", - "VariableName": "rtOuterTileSize" + "Scope": "Driver" }, { "Description": "Enable ray tracing support", @@ -4379,10 +4269,7 @@ { "Name": "EmulatedRtIpLevel2_0", "Value": 3, - "Description": "Emulate ray tracing IP level 2.0 feature set.", - "BuildTypes": [ - "VKI_BUILD_GFX11" - ] + "Description": "Emulate ray tracing IP level 2.0 feature set." } ] }, @@ -4400,7 +4287,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": "0.1f" + "Default": 0.1 }, "Type": "float", "Scope": "Driver" @@ -4429,7 +4316,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": "6" + "Default": 6 }, "Type": "uint32", "Name": "NumMortonSizeBits", @@ -4444,10 +4331,9 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": "10" + "Default": 10 }, "Type": "uint32", - "VariableName": "plocRadius", "Name": "PlocRadius", "Scope": "Driver" }, @@ -4463,12 +4349,11 @@ "Default": true }, "Type": "bool", - "VariableName": "rtAutoSkipAabbIntersections", - "Name": "AutoSkipAabbIntersections", + "Name": "RtAutoSkipAabbIntersections", "Scope": "Driver" }, { - "Name": "EmitRayTracingShaderDataToken", + "Name": "RtEmitRayTracingShaderDataToken", "Description": "Emit Ray Tracing Shader Data Token", "Tags": [ "Ray Tracing" @@ -4480,8 +4365,7 @@ "Default": false }, "Type": "bool", - "Scope": "Driver", - "VariableName": "rtEmitRayTracingShaderDataToken" + "Scope": "Driver" }, { "Description": "Enable using LDS for function arguments. This is enabled for non-recursive indirect pipelines only", @@ -4528,6 +4412,7 @@ }, "ValidValues": { "IsEnum": true, + "Is64Bit": true, "Values": [ { "Name": "DbgBarrierDrawNonIndexed", @@ -5171,7 +5056,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": "0" + "Default": 0 }, "Type": "uint32", "Name": "FastBuildThreshold", @@ -5186,7 +5071,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": "0" + "Default": 0 }, "Type": "uint32", "Name": "lbvhBuildThreshold", @@ -5278,14 +5163,13 @@ "Optimization" ], "Flags": { - "IsPath": true + "IsDir": true }, "Defaults": { "Default": "" }, "Name": "AppProfileDumpDir", "Type": "string", - "Size": 512, "Scope": "Driver" }, { @@ -5731,7 +5615,7 @@ "Memory" ], "Defaults": { - "Default": "0.1f" + "Default": 0.1 }, "Scope": "Driver", "Type": "float", @@ -6093,16 +5977,6 @@ "Defaults": { "Default": 0 }, - "DependsOn": { - "Settings": [ - { - "Values": [ - true - ], - "Name": "ForceAppProfileEnable" - } - ] - }, "Scope": "Driver", "Type": "uint32", "Name": "ForceAppProfileValue" @@ -6208,11 +6082,6 @@ "Defaults": { "Default": true }, - "DependsOn": { - "OS": [ - "Linux" - ] - }, "Scope": "Driver", "Type": "bool", "Name": "UseFlipHint" @@ -6225,11 +6094,6 @@ "Defaults": { "Default": false }, - "DependsOn": { - "OS": [ - "Linux" - ] - }, "Scope": "Driver", "Type": "bool", "Name": "IgnorePreferredPresentMode" @@ -6361,7 +6225,9 @@ "Description": "Optimize tiling mode for rendering performance." } ], - "Name": "Pal::TilingOptMode" + "Name": "Pal::TilingOptMode", + "SkipGen": true, + "IsEnum": true }, "Description": "Hints to PAL to select the appropriate tiling mode for an optimization target. 0: Balanced 1: OptForSpace 2: OptForSpeed", "Tags": [ @@ -6405,7 +6271,6 @@ }, "Scope": "Driver", "Type": "uint32", - "VariableName": "strictImageSizeRequirements", "Name": "StrictImageSizeRequirements" }, { @@ -6437,7 +6302,9 @@ "Description": "Prefer interleaved coordinate swizzle modes." } ], - "Name": "Pal::ImageTilingPattern" + "Name": "Pal::ImageTilingPattern", + "IsEnum": true, + "SkipGen": true }, "Description": "Hints to pal to identify a preference for how this image is organized. 0: Default 1: Standard 2: XMajor 3: YMajor 4: Interleaved", "Tags": [ @@ -6479,7 +6346,9 @@ "Description": "Prefer interleaved coordinate swizzle modes." } ], - "Name": "Pal::ImageTilingPattern" + "Name": "Pal::ImageTilingPattern", + "IsEnum": true, + "SkipGen": true }, "Description": "Override the ImageTilingPreference setting value for 3D images used as color attachments or shader storage.", "Tags": [ @@ -6574,8 +6443,7 @@ }, "Scope": "Driver", "Type": "uint32", - "VariableName": "optImgMaskToApplyShaderReadUsageForTransferSrc", - "Name": "TransferSrcUsageAsShaderReadMask" + "Name": "OptImgMaskToApplyShaderReadUsageForTransferSrc" }, { "Description": "For image created with usage of transfer dst, add shader_write in PAL usage if VkImageUsageFlagBits in this mask is set.", @@ -6587,8 +6455,7 @@ }, "Scope": "Driver", "Type": "uint32", - "VariableName": "optImgMaskToApplyShaderWriteUsageForTransferDst", - "Name": "TransferDstUsageAsShaderWriteMask" + "Name": "OptImgMaskToApplyShaderWriteUsageForTransferDst" }, { "Description": "If set, an image with color target usage bit does not implicitly allow to be in resolve_src or resolve_dst layout.", @@ -6599,8 +6466,7 @@ "Default": false }, "Type": "bool", - "VariableName": "optColorTargetUsageDoesNotContainResolveLayout", - "Name": "ColorTargetUsageDoesNotContainResolveLayout", + "Name": "OptColorTargetUsageDoesNotContainResolveLayout", "Scope": "Driver" }, { @@ -6616,7 +6482,7 @@ "Scope": "Driver" }, { - "Name": "RenderStateCacheEnable", + "Name": "OptRenderStateCacheEnable", "Description": "This bitmask denotes which subset of pipeline render state is cached at the device-level. Caching render state enables more efficient redundancy checking when recording command buffers. This redundancy checking is mainly limited to pipeline state that is either invisible to the API (e.g. certain PAL objects) or state that the application has marked in the pipeline as non-dynamic, meaning its values are programmed during pipeline bind.", "Tags": [ "Optimization" @@ -6688,8 +6554,7 @@ ] }, "Scope": "Driver", - "Type": "uint32", - "VariableName": "optRenderStateCacheEnable" + "Type": "uint32" }, { "Description": "If set, the MUTABLE flag on image is ignored.", @@ -7023,7 +6888,7 @@ "Optimization" ], "Defaults": { - "Default": "false" + "Default": false }, "Scope": "Driver", "Type": "bool", @@ -7035,7 +6900,7 @@ "Optimization" ], "Defaults": { - "Default": "false" + "Default": false }, "Scope": "Driver", "Type": "bool", @@ -7333,8 +7198,7 @@ }, "Scope": "Driver", "Type": "string", - "Name": "DevModeStartFrameDebugUtilsLabel", - "Size": 512 + "Name": "DevModeStartFrameDebugUtilsLabel" }, { "Description": "Determines the string that's used to trigger an end-frame delimiter via vkQueueInsertDebugUtilsLabelEXT. This string is \"AmdFrameEnd\" by default", @@ -7346,8 +7210,7 @@ }, "Scope": "Driver", "Type": "string", - "Name": "DevModeEndFrameDebugUtilsLabel", - "Size": 512 + "Name": "DevModeEndFrameDebugUtilsLabel" }, { "Description": "Injects a queue WaitIdle() at the end of a debug utils end-frame delimiter to avoid overlapping frames.", @@ -7391,17 +7254,16 @@ "Developer Mode" ], "Flags": { - "IsPath": true + "IsDir": true }, "Defaults": { "Default": "PipelineReplace", - "WinDefault": "PipelineReplace", - "LnxDefault": "PipelineReplace" + "Windows": "PipelineReplace", + "Linux": "PipelineReplace" }, "Scope": "Driver", "Name": "DevModeElfReplacementDirectory", - "Type": "string", - "Size": 512 + "Type": "string" }, { "Description": "This controls whether RGP traces will include shader code of created pipelines.", @@ -7592,16 +7454,6 @@ "Defaults": { "Default": 0 }, - "DependsOn": { - "Settings": [ - { - "Values": [ - true - ], - "Name": "DevModeSqttTraceBeginEndTagEnable" - } - ] - }, "Scope": "Driver", "Type": "uint64", "Name": "DevModeSqttTraceBeginTagValue" @@ -7614,16 +7466,6 @@ "Defaults": { "Default": 0 }, - "DependsOn": { - "Settings": [ - { - "Values": [ - true - ], - "Name": "DevModeSqttTraceBeginEndTagEnable" - } - ] - }, "Flags": { "IsHex": true }, @@ -7701,13 +7543,12 @@ }, "Defaults": { "Default": "trace.rgp", - "WinDefault": "trace.rgp", - "LnxDefault": "trace.rgp" + "Windows": "trace.rgp", + "Linux": "trace.rgp" }, "Scope": "Driver", "Name": "DevModeRgpTraceDumpFile", - "Type": "string", - "Size": 256 + "Type": "string" }, { "Name": "RgpInstTraceBarrierEnabled", @@ -7772,7 +7613,7 @@ "Shader Tuning" ], "Defaults": { - "Default": "FragmentShader" + "Default": 6 }, "ValidValues": { "IsEnum": true, @@ -7832,7 +7673,7 @@ "Default": 0 }, "Type": "uint32", - "Name": "OverrideNumVGPRsAvailable", + "Name": "OverrideNumVgprsAvailable", "Scope": "Driver" }, { @@ -8547,11 +8388,10 @@ "Default": false }, "Type": "bool", - "VariableName": "forceInvalidAccelStruct", "Scope": "Driver" }, { - "Name": "MaxRayLength", + "Name": "RtMaxRayLength", "Description": "Override the maximum extent of the ray, this value overrides the application specified rayDesc.tMax value.", "Tags": [ "Ray Tracing" @@ -8563,7 +8403,6 @@ "Default": 0.0 }, "Type": "float", - "VariableName": "rtMaxRayLength", "Scope": "Driver" }, { @@ -8594,8 +8433,7 @@ }, "Scope": "Driver", "Name": "WaitForDebuggerExecutableName", - "Type": "string", - "Size": 256 + "Type": "string" }, { "Description": "Timeout the driver in millisecond to give debuggers a chance to load all of the symbols", @@ -8636,19 +8474,30 @@ "Scope": "Driver", "Name": "DebugPrintfBufferSize" }, + { + "Description": "Enable print output for GPU hang", + "Tags": [ + "Debug Printf" + ], + "Defaults": { + "Default": false + }, + "Type": "bool", + "Scope": "Driver", + "Name": "EnableHangOutput" + }, { "Description": "Relative directory where printf is dumped. Root directory is determined by AMD_DEBUG_DIR environment variable. ", "Tags": [ "Debug Printf" ], "Flags": { - "IsPath": true + "IsDir": true }, "Defaults": { "Default": "DebugPrintfDump" }, "Type": "string", - "Size": 256, "Scope": "Driver", "Name": "DebugPrintfDumpFolder" }, @@ -8684,11 +8533,6 @@ "Defaults": { "Default": true }, - "DependsOn": { - "OS": [ - "Linux" - ] - }, "Type": "bool", "Scope": "Driver", "Name": "InitializeVramToZero" @@ -8754,6 +8598,72 @@ "Type": "bool", "Scope": "Driver", "Name": "ReportSuboptimalPresentAsOutOfDate" + }, + { + "Name": "ExportNVComputeShaderDerivatives", + "Description": "Export extension NV_compute_shader_derivatives", + "Tags": [ + "General" + ], + "Defaults": { + "Default": false + }, + "Type": "bool", + "Scope": "Driver" + }, + { + "Description": "[GFX10+ only] MALL RPM Views policy.", + "Tags": [ + "Memory" + ], + "Defaults": { + "Default": "RpmViewBypassMallOff" + }, + "Flags": { + "IsHex": true, + "IsBitmask": true + }, + "ValidValues": { + "IsEnum": true, + "Name": "RpmViewBypassMall", + "Values": [ + { + "Name": "RpmViewBypassMallOff", + "Value": 0, + "Description": "Disable MALL bypass" + }, + { + "Name": "RpmViewBypassMallOnRead", + "Value": 1, + "Description": "Skip MALL for read access of views created in RPM" + }, + { + "Name": "RpmViewBypassMallOnWrite", + "Value": 2, + "Description": "Skip MALL for write access of views created in RPM" + }, + { + "Name": "RpmViewBypassMallOnCbDbWrite", + "Value": 4, + "Description": "Skipp MALL for CB and DB writes." + } + ] + }, + "Type": "uint32", + "Name": "RpmViewsBypassMall", + "Scope": "Driver" + }, + { + "Description": "Disable exposing image format support if image type is 3D and tiling is linear", + "Tags": [ + "General" + ], + "Defaults": { + "Default": true + }, + "Type": "bool", + "Scope": "Driver", + "Name": "Disable3dLinearImageFormatSupport" } ] } \ No newline at end of file