From a367518e0bf308056492d994c5713e06af9429af Mon Sep 17 00:00:00 2001 From: qiaojbao Date: Wed, 27 Nov 2024 14:00:51 +0800 Subject: [PATCH] Update xgl from commit 36e1c4ef Update Khronos Vulkan Headers to 1.3.301 Update compliant CTS version to 1.3.9.2 Fix buffer overflow when the count of display modes exceeds MaxModePerScreen(64) (xgl issues #179) Expose extension VK_EXT_swapchain_colorspace Fix command buffer stack usage estimates Add persistent waves to CPS path Extend GFX11 app opts to Strix Copy cbState struct as a whole Add BG3 layer to modify a barrier Disable dcc when the application transitions to the feedback loop image layout (AMDVLK Issue #375) Fix NSA threshold for World War Z and Sniper Elite 5 Get WaveSize from GPURT compiler options Remove unnecessary UpdateFrameTraceController call Update RtFp16BoxNodes Values Change VK_ASSERT_MSG to VK_ASSERT_ALWAYS_MSG Set AllowVaryWaveSize for GPURT shaders Remove VK_ALLOC_A macro Fix nsaThreshold definitions in app_profile Bail out on error when parsing shader profile Enable the option OptimizePointSizeWrite Fix CTS failures in dEQP-VK.pipeline.pipeline_library.graphics_library.misc.other.* Disable descriptorBufferCaptureReplay Disabling DCC for VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR Update some settings' value [RT] Fix the use of 'forceInvalidAccelStruct' setting Fix usage of LayoutUncompressed for LAYOUT_RENDERING_LOCAL_READ Fix coding standard issues in vk_defines Expose more graphics and compute queues Disable htile for external memory when creating image to work-around the corruption issue in interop with mesa GL Initializing the fence count to 0 for every device index. [DPRINTF] Fix debug printf crash Fix sample shading is enabled for POPS incorrectly Expose VK_KHR_compute_shader_derivatives Fix menu list corruption in Blender 4.3.0 Beta no rgp markers for rgd Bump up GPURT version to 51 Enable GpuRt setting for persistent waves Update PAL Version in XGL 909 VK_EXT_shader_replicated_composites - Driver Implementation Fix "Unknown()" events appearing in RGP captures Specify SRD stride for SrvTable and TypedSrvTables as 4 Fix memory leaks Reset plane index according to the latest aspectMask to fix corruption in Blender 4.3.0 Beta: EEVEE render engine Fix an assertion of incorrect primitive type Remove m_stringTableId from devmode Bugs fix for Proton Raytracing Games Acq rel for CmdWaitEvents Clean up macros and build options --- CMakeLists.txt | 108 +-- cmake/XglCompileDefinitions.cmake | 29 +- cmake/XglCompilerOptions.cmake | 6 +- cmake/XglHelper.cmake | 24 +- cmake/XglOptions.cmake | 31 +- cmake/XglOverrides.cmake | 120 ++-- cmake/XglVersions.cmake | 10 +- icd/CMakeLists.txt | 117 ++-- icd/Loader/LunarG/Lnx/amd-icd.json | 4 +- icd/api/app_profile.cpp | 28 +- icd/api/app_resource_optimizer.cpp | 1 + icd/api/appopt/bvh_batch_layer.cpp | 2 +- icd/api/appopt/gpu_decode_layer.cpp | 2 +- icd/api/appopt/gravity_mark_layer.cpp | 119 ---- icd/api/appopt/gravity_mark_layer.h | 57 -- .../llpc/generic/SniperElite5/profile.json | 23 + .../llpc/generic/WorldWarZ/profile.json | 17 +- icd/api/barrier_policy.cpp | 19 +- icd/api/debug_printf.cpp | 176 ++--- icd/api/devmode/devmode_mgr.h | 3 - icd/api/devmode/devmode_rgp.cpp | 8 +- icd/api/devmode/devmode_rgp.h | 7 - icd/api/devmode/devmode_ubertrace.cpp | 10 +- icd/api/devmode/devmode_ubertrace.h | 6 - icd/api/include/app_profile.h | 4 +- icd/api/include/barrier_policy.h | 8 +- icd/api/include/graphics_pipeline_common.h | 6 +- icd/api/include/khronos/GLSL.ext.AMD.h | 37 - .../khronos/sdk-1.3/vulkan/vulkan_beta.h | 28 +- .../khronos/sdk-1.3/vulkan/vulkan_core.h | 226 ++++-- icd/api/include/khronos/vulkan.h | 3 - icd/api/include/pipeline_binary_cache.h | 10 - icd/api/include/pipeline_compiler.h | 2 - icd/api/include/vk_alloccb.h | 2 +- icd/api/include/vk_cmdbuffer.h | 34 +- icd/api/include/vk_compute_pipeline.h | 1 + icd/api/include/vk_defines.h | 44 +- icd/api/include/vk_device.h | 2 +- icd/api/include/vk_dispatch.h | 2 +- icd/api/include/vk_extensions.h | 1 + icd/api/include/vk_graphics_pipeline.h | 4 +- icd/api/include/vk_physical_device.h | 6 + icd/api/include/vk_pipeline_layout.h | 25 +- icd/api/include/vk_queue.h | 12 +- icd/api/include/vk_utils.h | 15 +- icd/api/pipeline_binary_cache.cpp | 49 +- icd/api/pipeline_compiler.cpp | 43 +- icd/api/raytrace/ray_tracing_device.cpp | 53 +- icd/api/raytrace/vk_acceleration_structure.h | 1 - icd/api/raytrace/vk_ray_tracing_pipeline.cpp | 68 +- icd/api/raytrace/vk_ray_tracing_pipeline.h | 6 + icd/api/sqtt/sqtt_layer.cpp | 95 ++- icd/api/sqtt/sqtt_layer.h | 12 +- icd/api/sqtt/sqtt_mgr.cpp | 11 + icd/api/sqtt/sqtt_rgp_annotations.h | 1 + icd/api/strings/extensions.txt | 1 + icd/api/strings/strings.h | 1 + icd/api/vk_alloccb.cpp | 10 +- icd/api/vk_cmd_pool.cpp | 2 - icd/api/vk_cmdbuffer.cpp | 650 +++++++++++------- icd/api/vk_cmdbuffer_transfer.cpp | 14 +- icd/api/vk_compute_pipeline.cpp | 6 +- icd/api/vk_device.cpp | 244 ++++--- icd/api/vk_graphics_pipeline.cpp | 45 +- icd/api/vk_graphics_pipeline_library.cpp | 15 + icd/api/vk_image.cpp | 42 +- icd/api/vk_instance.cpp | 14 +- icd/api/vk_physical_device.cpp | 129 +++- icd/api/vk_pipeline.cpp | 40 +- icd/api/vk_pipeline_cache.cpp | 2 - icd/api/vk_pipeline_layout.cpp | 103 ++- icd/api/vk_queue.cpp | 46 +- icd/api/vk_swapchain.cpp | 5 +- icd/imported/gputexdecoder/gpuTexDecoder.cpp | 10 +- icd/res/ver.h | 6 +- icd/settings/settings.cpp | 80 ++- icd/settings/settings_xgl.json | 134 ++-- icd/tools/generate/genShaderProfile.py | 10 +- icd/tools/generate/shaderProfileTemplate.py | 6 + 79 files changed, 1812 insertions(+), 1541 deletions(-) delete mode 100644 icd/api/appopt/gravity_mark_layer.cpp delete mode 100644 icd/api/appopt/gravity_mark_layer.h create mode 100644 icd/api/appopt/shader_profiles/llpc/generic/SniperElite5/profile.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 40cd61cf..0d941189 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,8 +28,8 @@ cmake_policy(SET CMP0091 NEW) # This part set before "project(XGL VERSION 1 LANGUAGES C CXX)". # In a system has both gcc and clang compiler. -option(XGL_USE_CLANG "Build with clang?" OFF) -if(UNIX AND XGL_USE_CLANG) +option(VKI_USE_CLANG "Build with clang?" OFF) +if(UNIX AND VKI_USE_CLANG) list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) include(XglCompilerOptions) xgl_use_clang_compiler() @@ -108,75 +108,75 @@ xgl_set_compiler() ### Sanitizers ######################################################################################################## # Temporary variables -set(ICD_SANITIZER_LINK_FLAGS "") -set(ICD_SANITIZER_COMPILE_FLAGS "") +set(VKI_SANITIZER_LINK_FLAGS "") +set(VKI_SANITIZER_COMPILE_FLAGS "") -if(XGL_USE_SANITIZER) +if(VKI_USE_SANITIZER) # -pthread is needed sometimes to fix a cmake bug: https://gitlab.kitware.com/cmake/cmake/issues/16609 - string(APPEND ICD_SANITIZER_LINK_FLAGS " -pthread") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -pthread") if(UNIX) - if(XGL_USE_SANITIZER MATCHES "Address") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -shared-libasan") + if(VKI_USE_SANITIZER MATCHES "Address") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -shared-libasan") endif() - if(XGL_USE_SANITIZER STREQUAL "Address") + if(VKI_USE_SANITIZER STREQUAL "Address") xgl_append_common_sanitizer_flags() - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize=address") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -fsanitize=address") - elseif (XGL_USE_SANITIZER MATCHES "Memory(WithOrigins)?") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize=address") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -fsanitize=address") + elseif (VKI_USE_SANITIZER MATCHES "Memory(WithOrigins)?") xgl_append_common_sanitizer_flags() - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize=memory") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -fsanitize=memory") - if(XGL_USE_SANITIZER STREQUAL "MemoryWithOrigins") - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize-memory-track-origins") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -fsanitize-memory-track-origins") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize=memory") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -fsanitize=memory") + if(VKI_USE_SANITIZER STREQUAL "MemoryWithOrigins") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize-memory-track-origins") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -fsanitize-memory-track-origins") endif() - elseif(XGL_USE_SANITIZER STREQUAL "Undefined") + elseif(VKI_USE_SANITIZER STREQUAL "Undefined") xgl_append_common_sanitizer_flags() - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize=undefined -fno-sanitize=vptr,function \ + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize=undefined -fno-sanitize=vptr,function \ -fno-sanitize-recover=all") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -fsanitize=undefined") - elseif(XGL_USE_SANITIZER STREQUAL "Thread") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -fsanitize=undefined") + elseif(VKI_USE_SANITIZER STREQUAL "Thread") xgl_append_common_sanitizer_flags() - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize=thread") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -fsanitize=thread") - elseif(XGL_USE_SANITIZER STREQUAL "Address;Undefined" OR - XGL_USE_SANITIZER STREQUAL "Undefined;Address") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize=thread") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -fsanitize=thread") + elseif(VKI_USE_SANITIZER STREQUAL "Address;Undefined" OR + VKI_USE_SANITIZER STREQUAL "Undefined;Address") xgl_append_common_sanitizer_flags() - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize=address,undefined -fno-sanitize=vptr,function \ + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize=address,undefined -fno-sanitize=vptr,function \ -fno-sanitize-recover=all") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -fsanitize=address,undefined") - elseif(XGL_USE_SANITIZER STREQUAL "Leaks") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -fsanitize=address,undefined") + elseif(VKI_USE_SANITIZER STREQUAL "Leaks") xgl_append_common_sanitizer_flags() - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize=leak") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -fsanitize=leak") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize=leak") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -fsanitize=leak") else() - message(FATAL_ERROR "Unsupported value of XGL_USE_SANITIZER: ${XGL_USE_SANITIZER}") + message(FATAL_ERROR "Unsupported value of VKI_USE_SANITIZER: ${VKI_USE_SANITIZER}") endif() elseif(MSVC) - if(XGL_USE_SANITIZER STREQUAL "Address") + if(VKI_USE_SANITIZER STREQUAL "Address") xgl_append_common_sanitizer_flags() - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize=address") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -fsanitize=address") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize=address") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -fsanitize=address") else() - message(FATAL_ERROR "This sanitizer not yet supported in the MSVC environment: ${XGL_USE_SANITIZER}") + message(FATAL_ERROR "This sanitizer not yet supported in the MSVC environment: ${VKI_USE_SANITIZER}") endif() else() - message(FATAL_ERROR "XGL_USE_SANITIZER is not supported on this platform.") + message(FATAL_ERROR "VKI_USE_SANITIZER is not supported on this platform.") endif() - if(XGL_USE_SANITIZER MATCHES "(Undefined;)?Address(;Undefined)?") - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize-address-use-after-scope") + if(VKI_USE_SANITIZER MATCHES "(Undefined;)?Address(;Undefined)?") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize-address-use-after-scope") endif() endif() -string(APPEND CMAKE_EXE_LINKER_FLAGS "${ICD_SANITIZER_LINK_FLAGS}") -string(APPEND CMAKE_SHARED_LINKER_FLAGS "${ICD_SANITIZER_LINK_FLAGS}") -string(APPEND CMAKE_C_FLAGS "${ICD_SANITIZER_COMPILE_FLAGS}") -string(APPEND CMAKE_CXX_FLAGS "${ICD_SANITIZER_COMPILE_FLAGS}") +string(APPEND CMAKE_EXE_LINKER_FLAGS "${VKI_SANITIZER_LINK_FLAGS}") +string(APPEND CMAKE_SHARED_LINKER_FLAGS "${VKI_SANITIZER_LINK_FLAGS}") +string(APPEND CMAKE_C_FLAGS "${VKI_SANITIZER_COMPILE_FLAGS}") +string(APPEND CMAKE_CXX_FLAGS "${VKI_SANITIZER_COMPILE_FLAGS}") # LLVM libc++ -if(XGL_ENABLE_LIBCXX) +if(VKI_ENABLE_LIBCXX) if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") string(APPEND CMAKE_EXE_LINKER_FLAGS " -stdlib=libc++") string(APPEND CMAKE_SHARED_LINKER_FLAGS " -stdlib=libc++") @@ -189,24 +189,24 @@ endif() ### GCOV source code coverage ######################################################################################### # Temporary variables -set(ICD_GCOV_LINK_FLAGS "") -set(ICD_GCOV_COMPILE_FLAGS "") +set(VKI_GCOV_LINK_FLAGS "") +set(VKI_GCOV_COMPILE_FLAGS "") -if(XGL_ENABLE_GCOV) +if(VKI_ENABLE_GCOV) if(UNIX) - message(STATUS "This gcov is supported on the UNIX environment: ${XGL_ENABLE_GCOV}") + message(STATUS "This gcov is supported on the UNIX environment: ${VKI_ENABLE_GCOV}") xgl_append_gcov_coverage_flags() elseif(MSVC) - message(FATAL_ERROR "This gcov not yet supported in the MSVC environment: ${XGL_ENABLE_GCOV}") + message(FATAL_ERROR "This gcov not yet supported in the MSVC environment: ${VKI_ENABLE_GCOV}") else() - message(FATAL_ERROR "XGL_ENABLE_GCOV is not supported on this platform.") + message(FATAL_ERROR "VKI_ENABLE_GCOV is not supported on this platform.") endif() endif() -string(APPEND CMAKE_EXE_LINKER_FLAGS "${ICD_GCOV_LINK_FLAGS}") -string(APPEND CMAKE_SHARED_LINKER_FLAGS "${ICD_GCOV_LINK_FLAGS}") -string(APPEND CMAKE_C_FLAGS "${ICD_GCOV_COMPILE_FLAGS}") -string(APPEND CMAKE_CXX_FLAGS "${ICD_GCOV_COMPILE_FLAGS}") +string(APPEND CMAKE_EXE_LINKER_FLAGS "${VKI_GCOV_LINK_FLAGS}") +string(APPEND CMAKE_SHARED_LINKER_FLAGS "${VKI_GCOV_LINK_FLAGS}") +string(APPEND CMAKE_C_FLAGS "${VKI_GCOV_COMPILE_FLAGS}") +string(APPEND CMAKE_CXX_FLAGS "${VKI_GCOV_COMPILE_FLAGS}") ### Generator Dependencies ############################################################################################ # Python3 @@ -228,7 +228,7 @@ endif() set(PERL_CMD ${PERL_EXECUTABLE}) # Wayland required -if (BUILD_WAYLAND_SUPPORT) +if (VKI_BUILD_WAYLAND) find_package(PkgConfig REQUIRED) pkg_check_modules(WAYLAND REQUIRED wayland-client) endif() diff --git a/cmake/XglCompileDefinitions.cmake b/cmake/XglCompileDefinitions.cmake index 797ac5ed..fbf82015 100644 --- a/cmake/XglCompileDefinitions.cmake +++ b/cmake/XglCompileDefinitions.cmake @@ -30,20 +30,13 @@ macro(xgl_set_compile_definitions) target_compile_definitions(xgl PRIVATE ${TARGET_ARCHITECTURE_ENDIANESS}ENDIAN_CPU) if(TARGET_ARCHITECTURE_BITS EQUAL 32) - target_compile_definitions(xgl PRIVATE ICD_X86_BUILD) + target_compile_definitions(xgl PRIVATE VKI_X86_BUILD) elseif(TARGET_ARCHITECTURE_BITS EQUAL 64) - target_compile_definitions(xgl PRIVATE ICD_X64_BUILD) + target_compile_definitions(xgl PRIVATE VKI_X64_BUILD) endif() # Turn on the memory tracker if enabled. - if(ICD_MEMTRACK) - target_compile_definitions(xgl PRIVATE ICD_MEMTRACK) - endif() - - # Enable relevant GPUOpen preprocessor definitions - if(ICD_GPUOPEN_DEVMODE_BUILD) - target_compile_definitions(xgl PRIVATE ICD_GPUOPEN_DEVMODE_BUILD) - endif() + target_compile_definitions(xgl PRIVATE $<$:VKI_MEMTRACK>) if(ICD_BUILD_LLPC) target_compile_definitions(xgl PRIVATE ICD_BUILD_LLPC) @@ -68,13 +61,13 @@ macro(xgl_set_compile_definitions) target_compile_definitions(xgl PRIVATE PAL_BUILD_GFX9=1) #if VKI_BUILD_GFX115 - if(XGL_BUILD_GFX115) + if(VKI_BUILD_GFX115) target_compile_definitions(xgl PRIVATE VKI_BUILD_GFX115=1) endif() #endif #if VKI_BUILD_STRIX1 - if(XGL_BUILD_STRIX1) + if(VKI_BUILD_STRIX1) target_compile_definitions(xgl PRIVATE VKI_BUILD_STRIX1=1) endif() #endif @@ -87,17 +80,11 @@ macro(xgl_set_compile_definitions) endif() #endif -#if VKI_NORMALIZED_TRIG_FUNCTIONS - if(VKI_NORMALIZED_TRIG_FUNCTIONS) - target_compile_definitions(xgl PRIVATE VKI_NORMALIZED_TRIG_FUNCTIONS) - endif() -#endif - #if VKI_RAY_TRACING #endif - if (XGL_ENABLE_GCOV) - target_compile_definitions(xgl PRIVATE ICD_ENABLE_GCOV) + if (VKI_ENABLE_GCOV) + target_compile_definitions(xgl PRIVATE VKI_ENABLE_GCOV) endif() #if VKI_GPU_DECOMPRESS @@ -109,7 +96,7 @@ macro(xgl_set_compile_definitions) #if VKI_RAY_TRACING #endif - if(BUILD_WAYLAND_SUPPORT) + if(VKI_BUILD_WAYLAND) target_compile_definitions(xgl PRIVATE VK_USE_PLATFORM_WAYLAND_KHR) endif() diff --git a/cmake/XglCompilerOptions.cmake b/cmake/XglCompilerOptions.cmake index 293d5c0e..0195a326 100644 --- a/cmake/XglCompilerOptions.cmake +++ b/cmake/XglCompilerOptions.cmake @@ -156,7 +156,7 @@ macro(xgl_set_compiler) endif() # Assertions - if(XGL_ENABLE_ASSERTIONS) + if(VKI_ENABLE_ASSERTIONS) # MSVC doesn't like _DEBUG on release builds. if(NOT MSVC) add_definitions(-D_DEBUG) @@ -221,7 +221,7 @@ function(xgl_compiler_options TARGET) -Wno-unused-parameter ) - if(ICD_ANALYSIS_WARNINGS_AS_ERRORS) + if(VKI_ANALYSIS_WARNINGS_AS_ERRORS) target_compile_options(${TARGET} PRIVATE -Werror -Wno-error=comment @@ -304,7 +304,7 @@ function(xgl_compiler_options TARGET) if(CMAKE_BUILD_TYPE_RELEASE) target_compile_options(${TARGET} PRIVATE -O3) - if(XGL_ENABLE_LTO) + if(VKI_ENABLE_LTO) if(${CMAKE_CXX_COMPILER_ID} MATCHES "GNU") execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) if(GCC_VERSION VERSION_GREATER 5.3 OR GCC_VERSION VERSION_EQUAL 5.3) diff --git a/cmake/XglHelper.cmake b/cmake/XglHelper.cmake index d8afa3f4..0cfcbf62 100644 --- a/cmake/XglHelper.cmake +++ b/cmake/XglHelper.cmake @@ -28,19 +28,19 @@ include_guard() macro(xgl_append_common_sanitizer_flags) if(NOT MSVC) # Append -fno-omit-frame-pointer and turn on debug info to get better stack traces. - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fno-omit-frame-pointer") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fno-omit-frame-pointer") if (NOT CMAKE_BUILD_TYPE_DEBUG) - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -gline-tables-only") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -gline-tables-only") else() # Use -O1 even in debug mode, otherwise sanitizers slowdown is too large. - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -O1") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -O1") endif() elseif(CLANG_CL) # Keep frame pointers around. - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " /Oy-") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " /Oy-") # Always ask the linker to produce symbols with asan. - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " /Z7") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -debug") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " /Z7") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -debug") endif() endmacro() @@ -49,18 +49,18 @@ macro(xgl_append_gcov_coverage_flags) # This option is used to compile and link code instrumented for coverage analysis. # The option --coverage is a synonym for -fprofile-arcs -ftest-coverage (when compiling) and -lgcov (when linking) # Ref link: https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html#Instrumentation-Options - string(APPEND ICD_GCOV_COMPILE_FLAGS " --coverage") - string(APPEND ICD_GCOV_LINK_FLAGS " --coverage") + string(APPEND VKI_GCOV_COMPILE_FLAGS " --coverage") + string(APPEND VKI_GCOV_LINK_FLAGS " --coverage") if (NOT CMAKE_BUILD_TYPE_DEBUG) # Use -O0 even in not debug mode, otherwise code coverage is not accurate. - string(APPEND ICD_GCOV_COMPILE_FLAGS " -O0") + string(APPEND VKI_GCOV_COMPILE_FLAGS " -O0") endif() if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - string(APPEND ICD_GCOV_COMPILE_FLAGS " -Xclang -coverage-cfg-checksum") - string(APPEND ICD_GCOV_COMPILE_FLAGS " -Xclang -coverage-no-function-names-in-data") - string(APPEND ICD_GCOV_COMPILE_FLAGS " -Xclang -coverage-version='408*'") + string(APPEND VKI_GCOV_COMPILE_FLAGS " -Xclang -coverage-cfg-checksum") + string(APPEND VKI_GCOV_COMPILE_FLAGS " -Xclang -coverage-no-function-names-in-data") + string(APPEND VKI_GCOV_COMPILE_FLAGS " -Xclang -coverage-version='408*'") endif() else() message(FATAL_ERROR "Unknown compiler ID: ${CMAKE_CXX_COMPILER_ID}") diff --git a/cmake/XglOptions.cmake b/cmake/XglOptions.cmake index e018fe7c..9b950304 100644 --- a/cmake/XglOptions.cmake +++ b/cmake/XglOptions.cmake @@ -39,21 +39,18 @@ macro(xgl_options) option(VKI_DEVMODE_COMPILER_SETTINGS "Build with devmode compiler settings?" OFF) #endif - option(XGL_ENABLE_PRINTS_ASSERTS "Build with debug print enabled?" OFF) + option(VKI_ENABLE_PRINTS_ASSERTS "Build with debug print enabled?" OFF) - option(XGL_ENABLE_LTO "Build with LTO enabled?" ON) + option(VKI_ENABLE_LTO "Build with LTO enabled?" ON) - option(XGL_ENABLE_GCOV "Build with gcov source code coverage?" OFF) -#if VKI_BUILD_GFX115 - option(XGL_BUILD_GFX115 "Build vulkan for GFX115" ON) -#endif + option(VKI_ENABLE_GCOV "Build with gcov source code coverage?" OFF) #if VKI_BUILD_STRIX1 - option(XGL_BUILD_STRIX1 "Build vulkan for STRIX1" ON) + option(VKI_BUILD_STRIX1 "Build vulkan for STRIX1" ON) #endif - option(XGL_BUILD_TESTS "Build tests?" OFF) + option(VKI_BUILD_TESTS "Build tests?" OFF) - option(XGL_BUILD_TOOLS "Build tools?" OFF) + option(VKI_BUILD_TOOLS "Build tools?" OFF) #if VKI_RAY_TRACING option(VKI_RAY_TRACING "Build vulkan with RAY_TRACING" ON) @@ -65,22 +62,16 @@ macro(xgl_options) option(ICD_BUILD_LLPC "Build LLPC?" ON) - option(XGL_LLVM_UPSTREAM "Build with upstreamed LLVM?" OFF) - - option(XGL_ENABLE_ASSERTIONS "Enable assertions in release builds" OFF) - - option(XGL_ENABLE_LIBCXX "Use libc++. This is intended for MemorySanitizer support only." OFF) - - option(ICD_GPUOPEN_DEVMODE_BUILD "Build ${PROJECT_NAME} with GPU Open Developer Mode driver support?" ON) + option(VKI_ENABLE_ASSERTIONS "Enable assertions in release builds" OFF) - option(ICD_MEMTRACK "Turn on memory tracking?" ${CMAKE_BUILD_TYPE_DEBUG}) + option(VKI_ENABLE_LIBCXX "Use libc++. This is intended for MemorySanitizer support only." OFF) if(UNIX AND (NOT ANDROID)) - option(BUILD_WAYLAND_SUPPORT "Build XGL with Wayland support" ON) + option(VKI_BUILD_WAYLAND "Build XGL with Wayland support" ON) - option(BUILD_DRI3_SUPPORT "Build XGL with Dri3 support" ON) + option(VKI_BUILD_DRI3 "Build XGL with Dri3 support" ON) endif() - option(ICD_ANALYSIS_WARNINGS_AS_ERRORS "Warnings as errors?" OFF) + option(VKI_ANALYSIS_WARNINGS_AS_ERRORS "Warnings as errors?" OFF) endmacro() diff --git a/cmake/XglOverrides.cmake b/cmake/XglOverrides.cmake index e4ee602d..e717a0bb 100644 --- a/cmake/XglOverrides.cmake +++ b/cmake/XglOverrides.cmake @@ -46,7 +46,7 @@ macro(xgl_get_path) set(GPURT_DEVELOPER_MODE ON CACHE BOOL "GPURT_DEVELOPER_MODE override." FORCE) set(GPURT_CLIENT_API "VULKAN" CACHE STRING "GPURT_CLIENT_API_VULKAN override." FORCE) - set(GPURT_CLIENT_INTERFACE_MAJOR_VERSION ${ICD_GPURT_CLIENT_MAJOR_VERSION} CACHE STRING "GPURT_CLIENT_INTERFACE_MAJOR_VERSION override." FORCE) + set(GPURT_CLIENT_INTERFACE_MAJOR_VERSION ${VKI_GPURT_CLIENT_MAJOR_VERSION} CACHE STRING "GPURT_CLIENT_INTERFACE_MAJOR_VERSION override." FORCE) endif() #endif @@ -90,34 +90,32 @@ macro(xgl_overrides_pal) ### For PAL ########################################################################################################### set(PAL_BUILD_JEMALLOC OFF CACHE BOOL "Force jemalloc off" FORCE) - set(PAL_CLIENT_INTERFACE_MAJOR_VERSION ${ICD_PAL_CLIENT_MAJOR_VERSION} CACHE STRING "${PROJECT_NAME} override." FORCE) + set(PAL_CLIENT_INTERFACE_MAJOR_VERSION ${VKI_PAL_CLIENT_MAJOR_VERSION} CACHE STRING "${PROJECT_NAME} override." FORCE) set(PAL_CLIENT "VULKAN" CACHE STRING "${PROJECT_NAME} override." FORCE) - set(PAL_ENABLE_PRINTS_ASSERTS ${XGL_ENABLE_PRINTS_ASSERTS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(PAL_ENABLE_PRINTS_ASSERTS ${VKI_ENABLE_PRINTS_ASSERTS} CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(PAL_ENABLE_LTO ${XGL_ENABLE_LTO} CACHE BOOL "XGL override to build PAL with LTO support" FORCE) + set(PAL_ENABLE_LTO ${VKI_ENABLE_LTO} CACHE BOOL "XGL override to build PAL with LTO support" FORCE) - set(PAL_MEMTRACK ${ICD_MEMTRACK} CACHE BOOL "${PROJECT_NAME} override." FORCE) - - set(PAL_BUILD_GPUOPEN ${ICD_GPUOPEN_DEVMODE_BUILD} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(PAL_MEMTRACK ${VKI_MEMTRACK} CACHE BOOL "${PROJECT_NAME} override." FORCE) set(PAL_BUILD_GFX11 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) set(PAL_BUILD_PHOENIX2 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) #if VKI_BUILD_GFX115 - set(PAL_BUILD_GFX115 ${XGL_BUILD_GFX115} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(PAL_BUILD_GFX115 ${VKI_BUILD_GFX115} CACHE BOOL "${PROJECT_NAME} override." FORCE) #endif #if VKI_BUILD_STRIX1 - set(PAL_BUILD_STRIX1 ${XGL_BUILD_STRIX1} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(PAL_BUILD_STRIX1 ${VKI_BUILD_STRIX1} CACHE BOOL "${PROJECT_NAME} override." FORCE) #endif # Wayland - set(PAL_BUILD_WAYLAND ${BUILD_WAYLAND_SUPPORT} CACHE BOOL "Build PAL with Wayland support" FORCE) + set(PAL_BUILD_WAYLAND ${VKI_BUILD_WAYLAND} CACHE BOOL "Build PAL with Wayland support" FORCE) # Dri3 - set(PAL_BUILD_DRI3 ${BUILD_DRI3_SUPPORT} CACHE BOOL "PAL build with Dri3 enabled" FORCE) + set(PAL_BUILD_DRI3 ${VKI_BUILD_DRI3} CACHE BOOL "PAL build with Dri3 enabled" FORCE) if(EXISTS ${XGL_METROHASH_PATH}) set(PAL_METROHASH_PATH ${XGL_METROHASH_PATH} CACHE PATH "${PROJECT_NAME} override." FORCE) @@ -129,86 +127,86 @@ macro(xgl_overrides_pal) endmacro() -macro(xgl_overrides_vkgc) +macro(xgl_overrides_llpc) ### For LLPC ########################################################################################################## - set(LLPC_CLIENT_INTERFACE_MAJOR_VERSION ${ICD_LLPC_CLIENT_MAJOR_VERSION} CACHE STRING "${PROJECT_NAME} override." FORCE) + if(VKI_ENABLE_LIBCXX) + set(LLVM_ENABLE_LIBCXX ${VKI_ENABLE_LIBCXX} CACHE BOOL "LLVM_ENABLE_LIBCXX is overridden." FORCE) + endif() + + if(VKI_ENABLE_ASSERTIONS) + set(LLVM_ENABLE_ASSERTIONS "${VKI_ENABLE_ASSERTIONS}" CACHE BOOL "LLVM_ENABLE_ASSERTIONS is overridden." FORCE) + endif() + + set(LLVM_INCLUDE_BENCHMARKS OFF CACHE BOOL "LLVM_INCLUDE_BENCHMARKS is overriden." FORCE) + + set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "LLVM_INCLUDE_DOCS is overriden." FORCE) - set(LLPC_BUILD_TOOLS ${XGL_BUILD_TOOLS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "LLVM_INCLUDE_EXAMPLES is overriden." FORCE) - set(LLPC_BUILD_TESTS ${XGL_BUILD_TESTS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + if(VKI_USE_SANITIZER) + set(LLPC_USE_SANITIZER "${VKI_USE_SANITIZER}" CACHE STRING "LLPC_USE_SANITIZER is overridden." FORCE) + endif() + set(LLPC_ENABLE_LTO ${VKI_ENABLE_LTO} CACHE BOOL "XGL override to build LLPC with LTO support" FORCE) + set(LLPC_MEMTRACK ${VKI_MEMTRACK} CACHE BOOL "${PROJECT_NAME} override." FORCE) + # llpc still use below build options, will be removed after llpc finish the update and promote + set(XGL_ENABLE_LTO ${VKI_ENABLE_LTO}) + set(ICD_MEMTRACK ${VKI_MEMTRACK}) + set(XGL_USE_SANITIZER "${VKI_USE_SANITIZER}") - set(LLPC_BUILD_NAVI12 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_REMBRANDT ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_RAPHAEL ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_MENDOCINO ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_GFX11 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_NAVI31 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_NAVI32 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_NAVI33 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_PHOENIX1 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_PHOENIX2 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_GFX11 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) #if VKI_BUILD_GFX115 - set(LLPC_BUILD_GFX115 ${XGL_BUILD_GFX115} CACHE BOOL "${PROJECT_NAME} override." FORCE) + if(VKI_BUILD_GFX115) + set(LLPC_BUILD_GFX115 ${VKI_BUILD_GFX115} CACHE BOOL "${PROJECT_NAME} override." FORCE) + endif() #endif #if VKI_BUILD_STRIX1 - set(LLPC_BUILD_STRIX1 ${XGL_BUILD_STRIX1} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_STRIX1 ${VKI_BUILD_STRIX1} CACHE BOOL "${PROJECT_NAME} override." FORCE) #endif - set(LLPC_ENABLE_WERROR ${ICD_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_CLIENT_INTERFACE_MAJOR_VERSION ${VKI_LLPC_CLIENT_MAJOR_VERSION} CACHE STRING "${PROJECT_NAME} override." FORCE) + + set(LLPC_BUILD_TOOLS ${VKI_BUILD_TOOLS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + + set(LLPC_BUILD_TESTS ${VKI_BUILD_TESTS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + + set(LLPC_ENABLE_WERROR ${VKI_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + +#if VKI_RAY_TRACING + set(LLPC_RAY_TRACING ${VKI_RAY_TRACING} CACHE BOOL "${PROJECT_NAME} override." FORCE) +#endif endmacro() macro(xgl_overrides) - if(ICD_GPUOPEN_DEVMODE_BUILD) - set(GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION ${ICD_GPUOPEN_CLIENT_MAJOR_VERSION}) - endif() + set(GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION ${VKI_GPUOPEN_CLIENT_MAJOR_VERSION}) #if VKI_BUILD_GFX115 + set(VKI_BUILD_GFX115 OFF) #if VKI_BUILD_STRIX1 - if(XGL_BUILD_STRIX1) - set(XGL_BUILD_GFX115 ON CACHE BOOL "XGL_BUILD_GFX115 override." FORCE) + if(VKI_BUILD_STRIX1) + set(VKI_BUILD_GFX115 ON) endif() #endif #endif xgl_get_path() - if(XGL_BUILD_TESTS) - set(XGL_BUILD_TOOLS ON CACHE BOOL "XGL_BUILD_TOOLS override by XGL_BUILD_TESTS." FORCE) - endif() - - if(NOT ICD_BUILD_LLPC) - set(XGL_LLVM_UPSTREAM OFF CACHE BOOL "XGL_LLVM_UPSTREAM is overrided to false." FORCE) - endif() - - set(XGL_USE_SANITIZER "" CACHE STRING "Build with sanitizers, e.g. Address;Undefined") - - if(XGL_USE_SANITIZER) - set(LLVM_USE_SANITIZER "${XGL_USE_SANITIZER}" CACHE STRING "LLVM_USE_SANITIZER is overridden." FORCE) - endif() + set(VKI_MEMTRACK ${CMAKE_BUILD_TYPE_DEBUG}) - if(XGL_ENABLE_LIBCXX) - set(LLVM_ENABLE_LIBCXX "${XGL_ENABLE_LIBCXX}" CACHE BOOL "LLVM_ENABLE_LIBCXX is overridden." FORCE) + if(VKI_BUILD_TESTS) + set(VKI_BUILD_TOOLS ON CACHE BOOL "VKI_BUILD_TOOLS override by VKI_BUILD_TESTS." FORCE) endif() - if(XGL_ENABLE_ASSERTIONS) - set(LLVM_ENABLE_ASSERTIONS "${XGL_ENABLE_ASSERTIONS}" CACHE BOOL "LLVM_ENABLE_ASSERTIONS is overridden." FORCE) - endif() - - set(LLVM_INCLUDE_BENCHMARKS OFF CACHE BOOL "LLVM_INCLUDE_BENCHMARKS is overriden." FORCE) - - set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "LLVM_INCLUDE_DOCS is overriden." FORCE) - - set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "LLVM_INCLUDE_EXAMPLES is overriden." FORCE) + set(VKI_USE_SANITIZER "" CACHE STRING "Build with sanitizers, e.g. Address;Undefined") - set(VAM_ENABLE_WERROR ${ICD_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(VAM_ENABLE_WERROR ${VKI_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(ADDR_ENABLE_WERROR ${ICD_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(ADDR_ENABLE_WERROR ${VKI_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(METROHASH_ENABLE_WERROR ${ICD_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(METROHASH_ENABLE_WERROR ${VKI_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) ### XCB required ###################################################################################################### set(XCB_REQUIRED OFF) @@ -218,6 +216,6 @@ macro(xgl_overrides) xgl_overrides_pal() - xgl_overrides_vkgc() + xgl_overrides_llpc() endmacro() diff --git a/cmake/XglVersions.cmake b/cmake/XglVersions.cmake index 52f9a3b9..e731978b 100644 --- a/cmake/XglVersions.cmake +++ b/cmake/XglVersions.cmake @@ -30,18 +30,18 @@ include_guard() # This will become the value of PAL_CLIENT_INTERFACE_MAJOR_VERSION. It describes the version of the PAL interface # that the ICD supports. PAL uses this value to enable backwards-compatibility for older interface versions. # It must be updated on each PAL promotion after handling all of the interface changes described in palLib.h. -set(ICD_PAL_CLIENT_MAJOR_VERSION "905") +set(VKI_PAL_CLIENT_MAJOR_VERSION "909") -# This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION if ICD_GPUOPEN_DEVMODE_BUILD=1. +# This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION. # It describes the interface version of the gpuopen shared module (part of PAL) that the ICD supports. -set(ICD_GPUOPEN_CLIENT_MAJOR_VERSION "42") +set(VKI_GPUOPEN_CLIENT_MAJOR_VERSION "42") #if VKI_RAY_TRACING # This will become the value of GPURT_CLIENT_INTERFACE_MAJOR_VERSION if VKI_RAY_TRACING=1. # It describes the interface version of the GpuRT shared module that the ICD supports. -set(ICD_GPURT_CLIENT_MAJOR_VERSION "49") +set(VKI_GPURT_CLIENT_MAJOR_VERSION "51") #endif # This will become the value of LLPC_CLIENT_INTERFACE_MAJOR_VERSION if ICD_BUILD_LLPC=1. # It describes the version of the interface version of LLPC that the ICD supports. -set(ICD_LLPC_CLIENT_MAJOR_VERSION "75") +set(VKI_LLPC_CLIENT_MAJOR_VERSION "75") diff --git a/icd/CMakeLists.txt b/icd/CMakeLists.txt index e800d879..e6fe9515 100644 --- a/icd/CMakeLists.txt +++ b/icd/CMakeLists.txt @@ -26,8 +26,8 @@ ### Create XGL Library ################################################################################################# add_library(xgl SHARED) -set(ICD_TARGET amdvlk${TARGET_ARCHITECTURE_BITS}) -set_target_properties(xgl PROPERTIES OUTPUT_NAME ${ICD_TARGET}) +set(VKI_TARGET amdvlk${TARGET_ARCHITECTURE_BITS}) +set_target_properties(xgl PROPERTIES OUTPUT_NAME ${VKI_TARGET}) set_target_properties(xgl PROPERTIES PREFIX "") install(TARGETS xgl DESTINATION ${CMAKE_SOURCE_DIR}/lib/${CMAKE_BUILD_TYPE}) @@ -44,7 +44,7 @@ endif() xgl_set_compile_definitions() ### Include Directories ################################################################################################ -if (BUILD_WAYLAND_SUPPORT) +if (VKI_BUILD_WAYLAND) target_include_directories(xgl PUBLIC ${WAYLAND_INCLUDE_DIRS}) endif() @@ -155,7 +155,6 @@ target_sources(xgl PRIVATE api/appopt/barrier_filter_layer.cpp api/appopt/strange_brigade_layer.cpp api/appopt/baldurs_gate3_layer.cpp - api/appopt/gravity_mark_layer.cpp api/appopt/g_shader_profile.cpp api/render_state_cache.cpp api/renderpass/renderpass_builder.cpp @@ -193,65 +192,65 @@ if(ICD_BUILD_LLPC) endif() ### ICD Auto-generated Shader Profiles Files ################################## -# ICD_GENDIR Path to the code generation tools -set(ICD_GENDIR ${CMAKE_CURRENT_SOURCE_DIR}/tools/generate) +# VKI_GENDIR Path to the code generation tools +set(VKI_GENDIR ${CMAKE_CURRENT_SOURCE_DIR}/tools/generate) # ICD shader profile code generation main script -set(ICD_GEN_SHADER_PROFILE_SCRIPTS ${ICD_GENDIR}/genShaderProfile.py ${ICD_GENDIR}/shaderProfileTemplate.py) +set(VKI_GEN_SHADER_PROFILE_SCRIPTS ${VKI_GENDIR}/genShaderProfile.py ${VKI_GENDIR}/shaderProfileTemplate.py) -set(ICD_SHADER_PROFILE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/api/appopt) +set(VKI_SHADER_PROFILE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/api/appopt) -file(GLOB_RECURSE ICD_ALL_SHADER_PROFILE_FILES - ${ICD_SHADER_PROFILE_DIR}/shader_profiles/profile.json +file(GLOB_RECURSE VKI_ALL_SHADER_PROFILE_FILES + ${VKI_SHADER_PROFILE_DIR}/shader_profiles/profile.json ) add_custom_command( - OUTPUT ${ICD_SHADER_PROFILE_DIR}/g_shader_profile.cpp ${ICD_SHADER_PROFILE_DIR}/g_shader_profile.h - COMMAND ${PYTHON_CMD} ${ICD_GENDIR}/genShaderProfile.py ${ICD_SHADER_PROFILE_DIR}/shader_profiles - DEPENDS ${ICD_GEN_SHADER_PROFILE_SCRIPTS} ${ICD_ALL_SHADER_PROFILE_FILES} + OUTPUT ${VKI_SHADER_PROFILE_DIR}/g_shader_profile.cpp ${VKI_SHADER_PROFILE_DIR}/g_shader_profile.h + COMMAND ${PYTHON_CMD} ${VKI_GENDIR}/genShaderProfile.py ${VKI_SHADER_PROFILE_DIR}/shader_profiles + DEPENDS ${VKI_GEN_SHADER_PROFILE_SCRIPTS} ${VKI_ALL_SHADER_PROFILE_FILES} COMMENT "Generating shader profiles code from all profile.json files" ) add_custom_target( GenerateShaderProfiles - DEPENDS ${ICD_GEN_SHADER_PROFILE_SCRIPTS} ${ICD_ALL_SHADER_PROFILE_FILES} + DEPENDS ${VKI_GEN_SHADER_PROFILE_SCRIPTS} ${VKI_ALL_SHADER_PROFILE_FILES} COMMENT "Checking if re-generation is required for shader profiles" ) add_dependencies(xgl GenerateShaderProfiles) ### ICD Auto-generated String Files ########################################### -set(ICD_STRING_DIR ${CMAKE_CURRENT_SOURCE_DIR}/api/strings) +set(VKI_STRING_DIR ${CMAKE_CURRENT_SOURCE_DIR}/api/strings) # ICD settings code generation main script -set(ICD_GEN_STRINGS ${ICD_STRING_DIR}/generate_strings.py) +set(VKI_GEN_STRINGS ${VKI_STRING_DIR}/generate_strings.py) -set(ICD_GEN_STRINGS_FILES ${ICD_GEN_STRINGS} ${ICD_STRING_DIR}/func_table_template.py) +set(VKI_GEN_STRINGS_FILES ${VKI_GEN_STRINGS} ${VKI_STRING_DIR}/func_table_template.py) -set(ICD_STRING_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/api/strings) +set(VKI_STRING_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/api/strings) -set(ICD_GEN_STRINGS_OPT -w ${ICD_STRING_DIR} -d ${ICD_STRING_OUTPUT_DIR}) +set(VKI_GEN_STRINGS_OPT -w ${VKI_STRING_DIR} -d ${VKI_STRING_OUTPUT_DIR}) -set(ICD_STRING_OUTPUT_FILES ${ICD_STRING_OUTPUT_DIR}/g_entry_points_decl.h - ${ICD_STRING_OUTPUT_DIR}/g_entry_points_impl.h - ${ICD_STRING_OUTPUT_DIR}/g_extensions_decl.h - ${ICD_STRING_OUTPUT_DIR}/g_extensions_impl.h - ${ICD_STRING_OUTPUT_DIR}/g_func_table.h +set(VKI_STRING_OUTPUT_FILES ${VKI_STRING_OUTPUT_DIR}/g_entry_points_decl.h + ${VKI_STRING_OUTPUT_DIR}/g_entry_points_impl.h + ${VKI_STRING_OUTPUT_DIR}/g_extensions_decl.h + ${VKI_STRING_OUTPUT_DIR}/g_extensions_impl.h + ${VKI_STRING_OUTPUT_DIR}/g_func_table.h ) -set(ICD_STRING_SOURCE_FILES ${ICD_STRING_DIR}/entry_points.txt - ${ICD_STRING_DIR}/extensions.txt +set(VKI_STRING_SOURCE_FILES ${VKI_STRING_DIR}/entry_points.txt + ${VKI_STRING_DIR}/extensions.txt ) add_custom_command( - OUTPUT ${ICD_STRING_OUTPUT_FILES} - COMMAND ${PYTHON_CMD} ${ICD_GEN_STRINGS} ${ICD_GEN_STRINGS_OPT} - DEPENDS ${ICD_GEN_STRINGS_FILES} ${ICD_STRING_SOURCE_FILES} - COMMENT "Generating Vulkan api strings ${ICD_GEN_STRINGS_OPT}" + OUTPUT ${VKI_STRING_OUTPUT_FILES} + COMMAND ${PYTHON_CMD} ${VKI_GEN_STRINGS} ${VKI_GEN_STRINGS_OPT} + DEPENDS ${VKI_GEN_STRINGS_FILES} ${VKI_STRING_SOURCE_FILES} + COMMENT "Generating Vulkan api strings ${VKI_GEN_STRINGS_OPT}" ) add_custom_target( RunVKStringsGenerator - DEPENDS ${ICD_STRING_OUTPUT_FILES} + DEPENDS ${VKI_STRING_OUTPUT_FILES} COMMENT "Checking if re-generation is required for strings" ) @@ -263,31 +262,31 @@ target_sources(xgl PRIVATE api/strings/strings.cpp) # ICD settings code generation main script set(XGL_DEVDRIVER_PATH ${XGL_PAL_PATH}/shared/devdriver) -set(ICD_DD_GENDIR ${XGL_DEVDRIVER_PATH}/apis/settings/codegen) +set(VKI_DD_GENDIR ${XGL_DEVDRIVER_PATH}/apis/settings/codegen) -set(ICD_GEN_SETTINGS ${ICD_DD_GENDIR}/settings_codegen.py) +set(VKI_GEN_SETTINGS ${VKI_DD_GENDIR}/settings_codegen.py) -set(ICD_GEN_SETTINGS_FILES ${ICD_GEN_SETTINGS}) +set(VKI_GEN_SETTINGS_FILES ${VKI_GEN_SETTINGS}) -set(ICD_SETTINGS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/settings) +set(VKI_SETTINGS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/settings) add_custom_command( - OUTPUT ${ICD_SETTINGS_DIR}/g_settings.cpp ${ICD_SETTINGS_DIR}/g_settings.h - COMMAND ${PYTHON_CMD} ${ICD_GEN_SETTINGS} - -i ${ICD_SETTINGS_DIR}/settings_xgl.json - -o ${ICD_SETTINGS_DIR} + OUTPUT ${VKI_SETTINGS_DIR}/g_settings.cpp ${VKI_SETTINGS_DIR}/g_settings.h + COMMAND ${PYTHON_CMD} ${VKI_GEN_SETTINGS} + -i ${VKI_SETTINGS_DIR}/settings_xgl.json + -o ${VKI_SETTINGS_DIR} -g settings -s settings/settings.h --namespaces vk --settings-struct-name RuntimeSettings --include-headers pal.h palImage.h - DEPENDS ${ICD_GEN_SETTINGS_FILES} ${ICD_SETTINGS_DIR}/settings_xgl.json + DEPENDS ${VKI_GEN_SETTINGS_FILES} ${VKI_SETTINGS_DIR}/settings_xgl.json COMMENT "Generating Vulkan settings code from settings_xgl.json" ) add_custom_target( RunVKSettingsGenerator - DEPENDS ${ICD_GEN_SETTINGS_FILES} ${ICD_SETTINGS_DIR}/settings_xgl.json + DEPENDS ${VKI_GEN_SETTINGS_FILES} ${VKI_SETTINGS_DIR}/settings_xgl.json COMMENT "Checking if re-generation is required for settings" ) @@ -300,22 +299,22 @@ target_sources(xgl PRIVATE ) add_custom_command( - OUTPUT ${ICD_SETTINGS_DIR}/g_experiments.cpp ${ICD_SETTINGS_DIR}/g_experiments.h - COMMAND ${PYTHON_CMD} ${ICD_GEN_SETTINGS} - -i ${ICD_SETTINGS_DIR}/experiments_settings_xgl.json - -o ${ICD_SETTINGS_DIR} + OUTPUT ${VKI_SETTINGS_DIR}/g_experiments.cpp ${VKI_SETTINGS_DIR}/g_experiments.h + COMMAND ${PYTHON_CMD} ${VKI_GEN_SETTINGS} + -i ${VKI_SETTINGS_DIR}/experiments_settings_xgl.json + -o ${VKI_SETTINGS_DIR} -g experiments -s settings/experimentsLoader.h --namespaces vk --settings-struct-name ExpSettings --classname ExperimentsLoader - DEPENDS ${ICD_GEN_SETTINGS_FILES} ${ICD_SETTINGS_DIR}/experiments_settings_xgl.json + DEPENDS ${VKI_GEN_SETTINGS_FILES} ${VKI_SETTINGS_DIR}/experiments_settings_xgl.json COMMENT "Generating Vulkan settings code from experiments_settings_xgl.json" ) add_custom_target( RunVKExperimentsGenerator - DEPENDS ${ICD_GEN_SETTINGS_FILES} ${ICD_SETTINGS_DIR}/experiments_settings_xgl.json + DEPENDS ${VKI_GEN_SETTINGS_FILES} ${VKI_SETTINGS_DIR}/experiments_settings_xgl.json COMMENT "Checking if re-generation is required for settings" ) @@ -335,12 +334,10 @@ target_sources(xgl PRIVATE ) ### ICD api/devmode ########################################################### -if(ICD_GPUOPEN_DEVMODE_BUILD) - target_sources(xgl PRIVATE - api/devmode/devmode_rgp.cpp - api/devmode/devmode_ubertrace.cpp - ) -endif() +target_sources(xgl PRIVATE + api/devmode/devmode_rgp.cpp + api/devmode/devmode_ubertrace.cpp +) ### ICD layer ################################################################## target_sources(xgl PRIVATE @@ -362,7 +359,7 @@ if (UNIX) target_link_libraries(xgl PRIVATE -fabi-version=0 -static-intel) endif() - if(CMAKE_BUILD_TYPE_RELEASE AND XGL_ENABLE_LTO) + if(CMAKE_BUILD_TYPE_RELEASE AND VKI_ENABLE_LTO) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") target_link_options(xgl PRIVATE -Wno-stringop-overflow) execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) @@ -379,8 +376,8 @@ if (UNIX) target_link_options(xgl PRIVATE -Wl,-Bdynamic -Wl,-z,noexecstack - -Wl,-Map=$/${ICD_TARGET}.map - -Wl,-soname=${ICD_TARGET}.so.1 + -Wl,-Map=$/${VKI_TARGET}.map + -Wl,-soname=${VKI_TARGET}.so.1 ) target_link_options(xgl PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/make/amdicd.so.def) @@ -390,7 +387,7 @@ if (UNIX) target_link_options(xgl PRIVATE -static-libgcc -static-libstdc++) endif() - if(NOT XGL_USE_SANITIZER) + if(NOT VKI_USE_SANITIZER) # -Wl,--no-undefined is incompatible with asan target_link_libraries(xgl PRIVATE -Wl,--no-undefined @@ -440,9 +437,9 @@ if(UNIX) endif() add_custom_command( TARGET xgl POST_BUILD - COMMAND ${TOOLCHAIN_TARGET_PREFIX}objcopy --only-keep-debug ${CMAKE_CURRENT_BINARY_DIR}/${ICD_TARGET}.so ${CMAKE_CURRENT_BINARY_DIR}/${ICD_TARGET}.so.debug - COMMAND ${TOOLCHAIN_TARGET_PREFIX}strip ${CMAKE_CURRENT_BINARY_DIR}/${ICD_TARGET}.so - COMMAND ${TOOLCHAIN_TARGET_PREFIX}objcopy --add-gnu-debuglink=${CMAKE_CURRENT_BINARY_DIR}/${ICD_TARGET}.so.debug ${CMAKE_CURRENT_BINARY_DIR}/${ICD_TARGET}.so + COMMAND ${TOOLCHAIN_TARGET_PREFIX}objcopy --only-keep-debug ${CMAKE_CURRENT_BINARY_DIR}/${VKI_TARGET}.so ${CMAKE_CURRENT_BINARY_DIR}/${VKI_TARGET}.so.debug + COMMAND ${TOOLCHAIN_TARGET_PREFIX}strip ${CMAKE_CURRENT_BINARY_DIR}/${VKI_TARGET}.so + COMMAND ${TOOLCHAIN_TARGET_PREFIX}objcopy --add-gnu-debuglink=${CMAKE_CURRENT_BINARY_DIR}/${VKI_TARGET}.so.debug ${CMAKE_CURRENT_BINARY_DIR}/${VKI_TARGET}.so ) endif() endif() diff --git a/icd/Loader/LunarG/Lnx/amd-icd.json b/icd/Loader/LunarG/Lnx/amd-icd.json index 9248602a..9adea9cf 100644 --- a/icd/Loader/LunarG/Lnx/amd-icd.json +++ b/icd/Loader/LunarG/Lnx/amd-icd.json @@ -2,13 +2,13 @@ "file_format_version": "1.0.0", "ICD": { "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.297" + "api_version": "1.3.301" }, "layer": { "name": "VK_LAYER_AMD_switchable_graphics_@ISABITS@", "type": "GLOBAL", "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.297", + "api_version": "1.3.301", "implementation_version": "1", "description": "AMD switchable graphics layer", "functions": { diff --git a/icd/api/app_profile.cpp b/icd/api/app_profile.cpp index 44c1f3ef..beae8f8e 100644 --- a/icd/api/app_profile.cpp +++ b/icd/api/app_profile.cpp @@ -780,12 +780,6 @@ constexpr AppProfilePatternEntry AppNameHoudini = "houdini" }; -constexpr AppProfilePatternEntry AppNameGravityMark = -{ - PatternAppNameLower, - "clayapp" -}; - constexpr AppProfilePatternEntry AppNameSevenDaysToDie = { PatternAppNameLower, @@ -798,6 +792,12 @@ constexpr AppProfilePatternEntry AppNameGgmlVulkan = "ggml-vulkan" }; +constexpr AppProfilePatternEntry AppNameBlender = +{ + PatternAppNameLower, + "blender" +}; + // Section END of AppProfilePatternEntry for all games // This is a table of patterns. The first matching pattern in this table will be returned. @@ -1326,14 +1326,6 @@ AppProfilePattern AppPatternTable[] = } }, - { - AppProfile::GravityMark, - { - AppNameGravityMark, - PatternEnd - } - }, - { AppProfile::SOTTR, { @@ -1646,6 +1638,14 @@ AppProfilePattern AppPatternTable[] = } }, + { + AppProfile::Blender, + { + AppNameBlender, + PatternEnd + } + }, + }; static char* GetExecutableName(size_t* pLength, bool includeExtension = false); diff --git a/icd/api/app_resource_optimizer.cpp b/icd/api/app_resource_optimizer.cpp index dd4a8644..6b89ebfb 100644 --- a/icd/api/app_resource_optimizer.cpp +++ b/icd/api/app_resource_optimizer.cpp @@ -115,6 +115,7 @@ void ResourceOptimizer::ApplyProfileToImageViewCreateInfo( pViewInfo->flags.bypassMallRead = 1; pViewInfo->flags.bypassMallWrite = 1; } + } } } diff --git a/icd/api/appopt/bvh_batch_layer.cpp b/icd/api/appopt/bvh_batch_layer.cpp index 9fc0d5d7..dd217926 100644 --- a/icd/api/appopt/bvh_batch_layer.cpp +++ b/icd/api/appopt/bvh_batch_layer.cpp @@ -118,7 +118,7 @@ VkResult BvhBatchLayer::CreateLayer( } } - if (result == VK_SUCCESS) + if ((result == VK_SUCCESS) && (pLayer != nullptr)) { result = pLayer->Init(pDevice); } diff --git a/icd/api/appopt/gpu_decode_layer.cpp b/icd/api/appopt/gpu_decode_layer.cpp index afb6cedc..bcb81dfb 100755 --- a/icd/api/appopt/gpu_decode_layer.cpp +++ b/icd/api/appopt/gpu_decode_layer.cpp @@ -122,7 +122,7 @@ Pal::Result ClientCreateInternalComputePipeline( rootNodeCount, &rootNode[0], 0, - false, // forceWave64, + vk::ShaderWaveSize::WaveSizeAuto, &specializationInfo, &pDevice->GetInternalTexDecodePipeline()); diff --git a/icd/api/appopt/gravity_mark_layer.cpp b/icd/api/appopt/gravity_mark_layer.cpp deleted file mode 100644 index 8aeea3d6..00000000 --- a/icd/api/appopt/gravity_mark_layer.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************************************************************************/ -/** -*********************************************************************************************************************** -* @file gravity_mark_layer.cpp -* @brief Implementation Gravity Mark Layer. -*********************************************************************************************************************** -*/ - -#include "gravity_mark_layer.h" - -#include "include/vk_image.h" -#include "include/vk_cmdbuffer.h" -#include "include/vk_device.h" - -namespace vk -{ - -namespace entry -{ - -namespace gravity_mark_layer -{ - -// ===================================================================================================================== -VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier( - VkCommandBuffer cmdBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - VkDependencyFlags dependencyFlags, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) -{ - CmdBuffer* pCmdBuffer = ApiCmdBuffer::ObjectFromHandle(cmdBuffer); - OptLayer* pLayer = pCmdBuffer->VkDevice()->GetAppOptLayer(); - - // - corruption caused by incorrect barrier between CmdDispatch and CmdDrawIndexed calls which access the same - // R16G16B16A16_SFLOAT image - // - existing barrier from app specifies srcStageMask = TOP_OF_PIPE which is equivalent to VK_PIPELINE_STAGE_2_NONE - // - changing this to BOTTOM_OF_PIPE will correctly sync between the dispatch and draw calls, resolving corruption - - if ((imageMemoryBarrierCount == 1) && - (srcStageMask == VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT) && - (dstStageMask == (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT - | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT - | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT - | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)) && - (pImageMemoryBarriers != nullptr) && - (Image::ObjectFromHandle(pImageMemoryBarriers[0].image)->GetFormat() == VK_FORMAT_R16G16B16A16_SFLOAT) && - (Image::ObjectFromHandle(pImageMemoryBarriers[0].image)->GetImageSamples() == VK_SAMPLE_COUNT_1_BIT) && - (pImageMemoryBarriers[0].srcAccessMask == VK_ACCESS_NONE) && - (pImageMemoryBarriers[0].dstAccessMask == VK_ACCESS_SHADER_READ_BIT) && - (pImageMemoryBarriers[0].oldLayout == VK_IMAGE_LAYOUT_GENERAL) && - (pImageMemoryBarriers[0].newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)) - { - srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - } - - // Pass the barrier call on to the Vulkan driver - pLayer->GetNextLayer()->GetEntryPoints().vkCmdPipelineBarrier( - cmdBuffer, - srcStageMask, - dstStageMask, - dependencyFlags, - memoryBarrierCount, - pMemoryBarriers, - bufferMemoryBarrierCount, - pBufferMemoryBarriers, - imageMemoryBarrierCount, - pImageMemoryBarriers); -} - -} // namespace gravity_mark_layer - -} // namespace entry - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#define GRAVITY_MARK_OVERRIDE_ALIAS(entry_name, func_name) \ - pDispatchTable->OverrideEntryPoints()->entry_name = vk::entry::gravity_mark_layer::func_name - -#define GRAVITY_MARK_OVERRIDE_ENTRY(entry_name) GRAVITY_MARK_OVERRIDE_ALIAS(entry_name, entry_name) - -// ===================================================================================================================== -void GravityMarkLayer::OverrideDispatchTable( - DispatchTable* pDispatchTable) -{ - // Save current device dispatch table to use as the next layer. - m_nextLayer = *pDispatchTable; - - GRAVITY_MARK_OVERRIDE_ENTRY(vkCmdPipelineBarrier); -} - -} // namespace vk diff --git a/icd/api/appopt/gravity_mark_layer.h b/icd/api/appopt/gravity_mark_layer.h deleted file mode 100644 index 725b0654..00000000 --- a/icd/api/appopt/gravity_mark_layer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************************************************************************/ -/** -*********************************************************************************************************************** -* @file gravity_mark_layer.h -* @brief Contains shadowed entry points related to Gravity Mark. -*********************************************************************************************************************** -*/ - -#ifndef __GRAVITY_MARK_LAYER_H__ -#define __GRAVITY_MARK_LAYER_H__ - -#pragma once - -#include "opt_layer.h" - -namespace vk -{ -// ===================================================================================================================== -// Class for the Gravity Mark Layer to simplify calls to the overriden dispatch table from the layer's entrypoints -class GravityMarkLayer final : public OptLayer -{ -public: - GravityMarkLayer() {} - virtual ~GravityMarkLayer() {} - - virtual void OverrideDispatchTable(DispatchTable* pDispatchTable) override; - -private: - PAL_DISALLOW_COPY_AND_ASSIGN(GravityMarkLayer); -}; - -}; // namespace vk - -#endif /* __GRAVITY_MARK_LAYER_H__ */ diff --git a/icd/api/appopt/shader_profiles/llpc/generic/SniperElite5/profile.json b/icd/api/appopt/shader_profiles/llpc/generic/SniperElite5/profile.json new file mode 100644 index 00000000..32fd85c2 --- /dev/null +++ b/icd/api/appopt/shader_profiles/llpc/generic/SniperElite5/profile.json @@ -0,0 +1,23 @@ +{ + "entries": [ + { + "pattern": { + "always": true + }, + "action": { + "cs": { + "nsaThreshold": 3 + }, + "gs": { + "nsaThreshold": 3 + }, + "vs": { + "nsaThreshold": 3 + }, + "ps": { + "nsaThreshold": 3 + } + } + } + ] +} \ No newline at end of file diff --git a/icd/api/appopt/shader_profiles/llpc/generic/WorldWarZ/profile.json b/icd/api/appopt/shader_profiles/llpc/generic/WorldWarZ/profile.json index 51303768..d2ca1014 100644 --- a/icd/api/appopt/shader_profiles/llpc/generic/WorldWarZ/profile.json +++ b/icd/api/appopt/shader_profiles/llpc/generic/WorldWarZ/profile.json @@ -6,17 +6,18 @@ }, "action": { "cs": { + "nsaThreshold": 3, "unrollThreshold": 5000 - } - } - }, - { - "pattern": { - "always": true - }, - "action": { + }, "gs": { + "nsaThreshold": 3, "unrollThreshold": 5000 + }, + "vs": { + "nsaThreshold": 3 + }, + "ps": { + "nsaThreshold": 3 } } }, diff --git a/icd/api/barrier_policy.cpp b/icd/api/barrier_policy.cpp index 3e6519b1..92b733fa 100644 --- a/icd/api/barrier_policy.cpp +++ b/icd/api/barrier_policy.cpp @@ -110,11 +110,15 @@ class LayoutUsageHelper InitEntry(VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL, Pal::LayoutDepthStencilTarget | Pal::LayoutShaderRead); + // Disable metadata for avoiding corruption if one image is sampled and rendered + // in the same draw. InitEntry(VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT, - Pal::LayoutShaderRead | Pal::LayoutShaderWrite); + Pal::LayoutShaderRead | Pal::LayoutShaderWrite| Pal::LayoutUncompressed); + // Disable metadata for avoiding corruption if one image is read and rendered + // in the same draw. InitEntry(VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR, - Pal::LayoutShaderRead); + Pal::LayoutShaderRead | Pal::LayoutShaderWrite | Pal::LayoutUncompressed); } // Return layout usage index corresponding to the specified layout. @@ -918,6 +922,11 @@ void ImageBarrierPolicy::InitImageLayoutUsagePolicy( m_supportedLayoutUsageMask |= Pal::LayoutSampleRate; } + if (usage & VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) + { + m_supportedLayoutUsageMask |= Pal::LayoutUncompressed; + } + // We don't do anything special in case of transient attachment images } @@ -1086,7 +1095,7 @@ Pal::ImageLayout ImageBarrierPolicy::GetTransferLayout( // Mask determined layout usage flags by the supported layout usage mask on the given queue family index. result.usages = g_LayoutUsageHelper.GetLayoutUsage(0, usageIndex) - & GetSupportedLayoutUsageMask(queueFamilyIndex); + & GetSupportedLayoutUsageMask(queueFamilyIndex, layout); // If the layout usage is 0, it likely means that an application is trying to transition to an image layout that // is not supported by that image's usage flags. @@ -1112,7 +1121,7 @@ Pal::ImageLayout ImageBarrierPolicy::GetAspectLayout( // Mask determined layout usage flags by the supported layout usage mask on the given queue family index. result.usages = g_LayoutUsageHelper.GetLayoutUsage(plane, usageIndex) - & GetSupportedLayoutUsageMask(queueFamilyIndex); + & GetSupportedLayoutUsageMask(queueFamilyIndex, layout); // If the layout usage is 0, it likely means that an application is trying to transition to an image layout that // is not supported by that image's usage flags. @@ -1135,7 +1144,7 @@ void ImageBarrierPolicy::GetLayouts( uint32_t usageIndex = g_LayoutUsageHelper.GetLayoutUsageIndex(layout, format); // Mask determined layout usage flags by the supported layout usage mask on the corresponding queue family index. - const uint32_t supportedLayoutUsageMask = GetSupportedLayoutUsageMask(queueFamilyIndex); + const uint32_t supportedLayoutUsageMask = GetSupportedLayoutUsageMask(queueFamilyIndex, layout); results[0].usages = g_LayoutUsageHelper.GetLayoutUsage(0, usageIndex) & supportedLayoutUsageMask; results[1].usages = g_LayoutUsageHelper.GetLayoutUsage(1, usageIndex) & supportedLayoutUsageMask; results[2].usages = g_LayoutUsageHelper.GetLayoutUsage(2, usageIndex) & supportedLayoutUsageMask; diff --git a/icd/api/debug_printf.cpp b/icd/api/debug_printf.cpp index 2976d8cb..5529c67c 100644 --- a/icd/api/debug_printf.cpp +++ b/icd/api/debug_printf.cpp @@ -673,108 +673,116 @@ const void* GetMetaData( // Retrieve the formatstring section from elf void DebugPrintf::DecodeFormatStringsFromElf( const Device* pDevice, - uint32_t code, + uint32_t codeSize, const char* pCode, PrintfFormatMap* pFormatStrings) { - Util::Abi::PipelineAbiReader abiReader(pDevice->VkInstance()->Allocator(), pCode); - auto& elfReader = abiReader.GetElfReader(); - auto noteId = abiReader.GetElfReader().FindSection(".note"); - auto& noteSection = abiReader.GetElfReader().GetSection(noteId); - VK_ASSERT(noteId != 0); - VK_ASSERT(noteSection.sh_type == static_cast(Elf::SectionHeaderType::Note)); - ElfReader::Notes notes(elfReader, noteId); - unsigned noteLength = 0; - auto noteData = GetMetaData(notes, Abi::MetadataNoteType, ¬eLength); - MsgPackReader docReader; - Result result = docReader.InitFromBuffer(noteData, noteLength); - VK_ASSERT(docReader.Type() == CWP_ITEM_MAP); - const auto hashFormatStr = HashLiteralString("amdpal.format_strings"); - const auto hashIndex = HashLiteralString(".index"); - const auto hashString = HashLiteralString(".string"); - const auto hashVarsCount = HashLiteralString(".argument_count"); - const auto hashBitsPos = HashLiteralString(".64bit_arguments"); - const auto hashStrings = HashLiteralString(".strings"); - - Util::StringView key; - uint32_t palmetaSize = docReader.Get().as.map.size; - for (uint32 i = 0; i < palmetaSize; ++i) + // Elf code size is zero when graphics shader library is used, so early return. + if (codeSize == 0) { - result = docReader.Next(CWP_ITEM_STR); - const char* itemString = static_cast(docReader.Get().as.str.start); - if (Util::HashString(itemString, docReader.Get().as.str.length) == hashFormatStr) + return; + } + + Util::Abi::PipelineAbiReader abiReader(pDevice->VkInstance()->Allocator(), + Util::Span{ pCode, static_cast(codeSize) }); + if (abiReader.Init() == Result::Success) + { + auto& elfReader = abiReader.GetElfReader(); + auto noteId = abiReader.GetElfReader().FindSection(".note"); + auto& noteSection = abiReader.GetElfReader().GetSection(noteId); + VK_ASSERT(noteId != 0); + VK_ASSERT(noteSection.sh_type == static_cast(Elf::SectionHeaderType::Note)); + ElfReader::Notes notes(elfReader, noteId); + unsigned noteLength = 0; + auto noteData = GetMetaData(notes, Abi::MetadataNoteType, ¬eLength); + MsgPackReader docReader; + Result result = docReader.InitFromBuffer(noteData, noteLength); + VK_ASSERT(docReader.Type() == CWP_ITEM_MAP); + const auto hashFormatStr = HashLiteralString("amdpal.format_strings"); + const auto hashIndex = HashLiteralString(".index"); + const auto hashString = HashLiteralString(".string"); + const auto hashVarsCount = HashLiteralString(".argument_count"); + const auto hashBitsPos = HashLiteralString(".64bit_arguments"); + const auto hashStrings = HashLiteralString(".strings"); + + Util::StringView key; + uint32_t palmetaSize = docReader.Get().as.map.size; + for (uint32 i = 0; i < palmetaSize; ++i) { - result = docReader.Next(CWP_ITEM_MAP); - VK_ASSERT(docReader.Get().as.map.size == 2); - uint32_t formatStringsMap = docReader.Get().as.map.size; - for (uint32 j = 0; j < formatStringsMap; ++j) + result = docReader.Next(CWP_ITEM_STR); + const char* itemString = static_cast(docReader.Get().as.str.start); + if (Util::HashString(itemString, docReader.Get().as.str.length) == hashFormatStr) { - result = docReader.UnpackNext(&key); - itemString = static_cast(docReader.Get().as.str.start); - if (Util::HashString(key) == hashStrings) + result = docReader.Next(CWP_ITEM_MAP); + VK_ASSERT(docReader.Get().as.map.size == 2); + uint32_t formatStringsMap = docReader.Get().as.map.size; + for (uint32 j = 0; j < formatStringsMap; ++j) { - result = docReader.Next(CWP_ITEM_ARRAY); - uint32_t stringsSize = docReader.Get().as.array.size; - for (uint32 k = 0; k < stringsSize; ++k) + result = docReader.UnpackNext(&key); + itemString = static_cast(docReader.Get().as.str.start); + if (Util::HashString(key) == hashStrings) { - result = docReader.Next(CWP_ITEM_MAP); - uint64_t hashValue = 0; - uint64_t outputCount = 0; - StringView formatString; - Vector bitPos(nullptr); - uint32_t stringMap = docReader.Get().as.map.size; - for (uint32 l = 0; l < stringMap; ++l) + result = docReader.Next(CWP_ITEM_ARRAY); + uint32_t stringsSize = docReader.Get().as.array.size; + for (uint32 k = 0; k < stringsSize; ++k) { - result = docReader.UnpackNext(&key); - auto hashKey = Util::HashString(key); - switch (hashKey) - { - case hashIndex: - docReader.UnpackNext(&hashValue); - break; - case hashString: - docReader.UnpackNext(&formatString); - break; - case hashVarsCount: - docReader.UnpackNext(&outputCount); - break; - default: + result = docReader.Next(CWP_ITEM_MAP); + uint64_t hashValue = 0; + uint64_t outputCount = 0; + StringView formatString; + Vector bitPos(nullptr); + uint32_t stringMap = docReader.Get().as.map.size; + for (uint32 l = 0; l < stringMap; ++l) { - VK_ASSERT(hashKey == hashBitsPos); - docReader.UnpackNext(&bitPos); - break; + result = docReader.UnpackNext(&key); + auto hashKey = Util::HashString(key); + switch (hashKey) + { + case hashIndex: + docReader.UnpackNext(&hashValue); + break; + case hashString: + docReader.UnpackNext(&formatString); + break; + case hashVarsCount: + docReader.UnpackNext(&outputCount); + break; + default: + VK_ASSERT(hashKey == hashBitsPos); + docReader.UnpackNext(&bitPos); + break; + } } - } - } - bool found = true; - PrintfElfString* pElfString = nullptr; - result = pFormatStrings->FindAllocate(hashValue, &found, &pElfString); - if ((result == Pal::Result::Success) && (found == false)) - { - pElfString->printStr.Reserve(formatString.Length()); - for (auto& elem : formatString) - { - pElfString->printStr.PushBack(elem); - } - pElfString->bit64s.Reserve(outputCount); - for (uint32 bitIndex = 0; bitIndex < outputCount; ++bitIndex) + bool found = true; + PrintfElfString* pElfString = nullptr; + result = pFormatStrings->FindAllocate(hashValue, &found, &pElfString); + if ((result == Pal::Result::Success) && (found == false)) { - bool bitValue = (bitPos[bitIndex / 64] >> (bitIndex % 64)) & 1; - pElfString->bit64s.PushBack(bitValue); + pElfString->printStr.Reserve(formatString.Length()); + for (auto& elem : formatString) + { + pElfString->printStr.PushBack(elem); + } + pElfString->bit64s.Reserve(outputCount); + for (uint32 bitIndex = 0; bitIndex < outputCount; ++bitIndex) + { + bool bitValue = (bitPos[bitIndex / 64] >> (bitIndex % 64)) & 1; + pElfString->bit64s.PushBack(bitValue); + } } } } - } - else - { - docReader.Skip(1); + else + { + docReader.Skip(1); + } } } - } - else - { - docReader.Skip(1); + else + { + docReader.Skip(1); + } } } } diff --git a/icd/api/devmode/devmode_mgr.h b/icd/api/devmode/devmode_mgr.h index 634533dc..ff8f213a 100644 --- a/icd/api/devmode/devmode_mgr.h +++ b/icd/api/devmode/devmode_mgr.h @@ -69,7 +69,6 @@ struct AccelStructUserMarkerString // of the driver. class IDevMode { -#if ICD_GPUOPEN_DEVMODE_BUILD public: // Pipeline hash used for instruction tracing whenever no pipeline is being targetted. static constexpr uint64_t InvalidTargetPipelineHash = 0; @@ -160,8 +159,6 @@ class IDevMode virtual void LabelAccelStruct( uint64_t deviceAddress, const char* pString) {} - -#endif }; } diff --git a/icd/api/devmode/devmode_rgp.cpp b/icd/api/devmode/devmode_rgp.cpp index bab86493..055e7bb0 100644 --- a/icd/api/devmode/devmode_rgp.cpp +++ b/icd/api/devmode/devmode_rgp.cpp @@ -29,7 +29,6 @@ *********************************************************************************************************************** */ -#if ICD_GPUOPEN_DEVMODE_BUILD // Vulkan headers #include "devmode/devmode_rgp.h" #include "include/vk_cmdbuffer.h" @@ -121,7 +120,7 @@ class DevModeRgpStringTableTraceSource : public GpuUtil::StringTableTraceSource stringIdx++; } - uint32_t tableId = m_pDevMode->AcquireStringTableId(); + uint32_t tableId = AcquireTableId(); AddStringTable(tableId, numStrings, stringOffsets.Data(), stringData.Data(), stringData.size()); } @@ -401,7 +400,6 @@ DevModeRgp::DevModeRgp( m_crashAnalysisEnabled(false), m_perfCounterIds(pInstance->Allocator()), m_pipelineCaches(pInstance->Allocator()), - m_stringTableId(0), m_pStringTableTraceSource(nullptr), m_pUserMarkerHistoryTraceSource(nullptr), m_accelStructNames(64, m_pInstance->Allocator()) @@ -2929,7 +2927,7 @@ void DevModeRgp::ProcessMarkerTable( uint32 markerStringDataSize, const char* pMarkerStringData) { - uint32_t tableId = AcquireStringTableId(); + uint32_t tableId = m_pStringTableTraceSource->AcquireTableId(); m_pStringTableTraceSource->AddStringTable(tableId, numMarkerStrings, pMarkerStringOffsets, @@ -2963,5 +2961,3 @@ void DevModeRgp::LabelAccelStruct( } }; // namespace vk - -#endif diff --git a/icd/api/devmode/devmode_rgp.h b/icd/api/devmode/devmode_rgp.h index 8a83e478..b5d14c89 100644 --- a/icd/api/devmode/devmode_rgp.h +++ b/icd/api/devmode/devmode_rgp.h @@ -42,10 +42,8 @@ // gpuutil headers #include "gpuUtil/palGpaSession.h" -#if ICD_GPUOPEN_DEVMODE_BUILD // gpuopen headers #include "gpuopen.h" -#endif #include @@ -84,7 +82,6 @@ namespace vk // of the driver. class DevModeRgp final : public IDevMode { -#if ICD_GPUOPEN_DEVMODE_BUILD public: // Number of frames to wait before collecting a hardware trace. // Note: This will be replaced in the future by a remotely configurable value provided by the RGP server. @@ -183,8 +180,6 @@ class DevModeRgp final : public IDevMode const AccelStructUserMarkerTable& GetAccelStructUserMarkerTable() const { return m_accelStructNames; } - uint32_t AcquireStringTableId() { return ++m_stringTableId; } - private: static constexpr uint32_t MaxTraceQueueFamilies = Queue::MaxQueueFamilies; static constexpr uint32_t MaxTraceQueues = MaxTraceQueueFamilies * Queue::MaxQueuesPerFamily; @@ -352,13 +347,11 @@ class DevModeRgp final : public IDevMode PipelineCacheList m_pipelineCaches; Util::RWLock m_pipelineReinjectionLock; - std::atomic m_stringTableId; GpuUtil::StringTableTraceSource* m_pStringTableTraceSource; GpuUtil::UserMarkerHistoryTraceSource* m_pUserMarkerHistoryTraceSource; AccelStructUserMarkerTable m_accelStructNames; Util::Mutex m_mutex; -#endif }; } diff --git a/icd/api/devmode/devmode_ubertrace.cpp b/icd/api/devmode/devmode_ubertrace.cpp index 1111b597..63d8fcfa 100644 --- a/icd/api/devmode/devmode_ubertrace.cpp +++ b/icd/api/devmode/devmode_ubertrace.cpp @@ -29,7 +29,6 @@ *********************************************************************************************************************** */ -#if ICD_GPUOPEN_DEVMODE_BUILD // Vulkan headers #include "devmode/devmode_ubertrace.h" #include "include/vk_cmdbuffer.h" @@ -118,7 +117,7 @@ class DevModeUberTraceStringTableTraceSource : public GpuUtil::StringTableTraceS stringIdx++; } - uint32_t tableId = m_pDevMode->AcquireStringTableId(); + uint32_t tableId = AcquireTableId(); AddStringTable(tableId, numStrings, stringOffsets.Data(), stringData.Data(), stringData.size()); } @@ -144,7 +143,6 @@ DevModeUberTrace::DevModeUberTrace( m_pStringTableTraceSource(nullptr), m_pUserMarkerHistoryTraceSource(nullptr), m_pRenderOpTraceController(nullptr), - m_stringTableId(0), m_accelStructNames(64, m_pInstance->Allocator()) { m_accelStructNames.Init(); @@ -216,8 +214,6 @@ void DevModeUberTrace::NotifyFrameBegin( { // Wait for the driver to be resumed in case it's been paused. WaitForDriverResume(); - - m_pInstance->PalPlatform()->UpdateFrameTraceController(pQueue->PalQueue(DefaultDeviceIndex)); } // ===================================================================================================================== @@ -726,7 +722,7 @@ void DevModeUberTrace::ProcessMarkerTable( uint32 markerStringDataSize, const char* pMarkerStringData) { - uint32_t tableId = AcquireStringTableId(); + uint32_t tableId = m_pStringTableTraceSource->AcquireTableId(); m_pStringTableTraceSource->AddStringTable(tableId, numMarkerStrings, pMarkerStringOffsets, @@ -760,5 +756,3 @@ void DevModeUberTrace::LabelAccelStruct( } } // namespace vk - -#endif diff --git a/icd/api/devmode/devmode_ubertrace.h b/icd/api/devmode/devmode_ubertrace.h index 0f81b0c6..ce06f8aa 100644 --- a/icd/api/devmode/devmode_ubertrace.h +++ b/icd/api/devmode/devmode_ubertrace.h @@ -64,7 +64,6 @@ namespace vk // of the driver. class DevModeUberTrace final : public IDevMode { -#if ICD_GPUOPEN_DEVMODE_BUILD public: ~DevModeUberTrace(); @@ -154,9 +153,6 @@ class DevModeUberTrace final : public IDevMode const AccelStructUserMarkerTable& GetAccelStructUserMarkerTable() const { return m_accelStructNames; } - uint32_t AcquireStringTableId() - { return ++m_stringTableId; } - private: DevModeUberTrace(Instance* pInstance); @@ -177,11 +173,9 @@ class DevModeUberTrace final : public IDevMode GpuUtil::StringTableTraceSource* m_pStringTableTraceSource; GpuUtil::UserMarkerHistoryTraceSource* m_pUserMarkerHistoryTraceSource; GpuUtil::RenderOpTraceController* m_pRenderOpTraceController; - std::atomic m_stringTableId; AccelStructUserMarkerTable m_accelStructNames; Util::Mutex m_mutex; -#endif }; } diff --git a/icd/api/include/app_profile.h b/icd/api/include/app_profile.h index b6c51aef..fdd41153 100644 --- a/icd/api/include/app_profile.h +++ b/icd/api/include/app_profile.h @@ -112,7 +112,6 @@ enum class AppProfile : uint32_t BaldursGate3, // Baldur's Gate by Larian Studios Enshrouded, // Enshrouded by Keen Games HolisticEngine, // Holistic Engine by Keen Games - GravityMark, // Tellusim GravityMark IdTechEngine, // id Tech Engine (Default) #if VKI_RAY_TRACING ControlDX12, // VKD3D Control Ultimate Edition @@ -148,7 +147,8 @@ enum class AppProfile : uint32_t Archean, // Archean by batcholi Houdini, // Houdini SevenDaysToDie, // 7 Days to Die - GgmlVulkan // ggml-vulkan + GgmlVulkan, // ggml-vulkan + Blender // Blender }; struct ProfileSettings diff --git a/icd/api/include/barrier_policy.h b/icd/api/include/barrier_policy.h index 7316e418..730b01a4 100644 --- a/icd/api/include/barrier_policy.h +++ b/icd/api/include/barrier_policy.h @@ -261,13 +261,17 @@ class ImageBarrierPolicy final : public ResourceBarrierPolicy { return m_supportedLayoutUsageMask; } VK_FORCEINLINE uint32_t GetSupportedLayoutUsageMask( - uint32_t queueFamilyIndex) const + uint32_t queueFamilyIndex, + VkImageLayout layout) const { + const uint32_t extraLayoutUsages = (layout == VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR) ? + Pal::LayoutUncompressed : 0u; + // This version of the function returns the supported layout usage masks in the scope of the specified queue // family. Accordingly, the image's supported layout usage mask is limited to the layout usage mask that // is supported by the specified queue family or by other queue families that are allowed to concurrently // access the image. - return m_supportedLayoutUsageMask & + return (m_supportedLayoutUsageMask | extraLayoutUsages)& (GetQueueFamilyPolicy(queueFamilyIndex).supportedLayoutUsageMask | m_concurrentLayoutUsageMask); } diff --git a/icd/api/include/graphics_pipeline_common.h b/icd/api/include/graphics_pipeline_common.h index 931985b9..d0b92988 100644 --- a/icd/api/include/graphics_pipeline_common.h +++ b/icd/api/include/graphics_pipeline_common.h @@ -120,15 +120,15 @@ union GraphicsPipelineObjectFlags uint32_t isPointSizeUsed : 1; uint32_t bindColorBlendObject : 1; uint32_t bindMsaaObject : 1; - uint32_t viewIndexFromDeviceIndex : 1; uint32_t perpLineEndCapsEnable : 1; uint32_t shadingRateUsedInShader : 1; uint32_t fragmentShadingRateEnable : 1; + uint32_t viewIndexFromDeviceIndex : 2; #if VKI_RAY_TRACING uint32_t hasRayTracing : 1; - uint32_t reserved : 15; + uint32_t reserved : 14; #else - uint32_t reserved : 16; + uint32_t reserved : 15; #endif }; uint32_t value; diff --git a/icd/api/include/khronos/GLSL.ext.AMD.h b/icd/api/include/khronos/GLSL.ext.AMD.h index 297a6f98..8d60d679 100644 --- a/icd/api/include/khronos/GLSL.ext.AMD.h +++ b/icd/api/include/khronos/GLSL.ext.AMD.h @@ -84,41 +84,4 @@ enum GcnShaderAMD { GcnShaderCountAMD }; -#if VKI_TEXEL_BUFFER_EXPLICIT_FORMAT_SUPPORT -// SPV_AMD_shader_texel_buffer_explicit_format -static const Capability CapabilityImageBufferReadWriteWithFormatAMD = static_cast(5024); - -static const Op OpImageBufferReadAMD = static_cast(5025); -static const Op OpImageBufferWriteAMD = static_cast(5026); - -static const ImageFormat ImageFormatRgb32fAMD = static_cast(5028); -static const ImageFormat ImageFormatRgb32uiAMD = static_cast(5029); -static const ImageFormat ImageFormatRgb32iAMD = static_cast(5030); -static const ImageFormat ImageFormatR10G11B11fAMD = static_cast(5031); -static const ImageFormat ImageFormatRgb10A2SnormAMD = static_cast(5032); -static const ImageFormat ImageFormatRgb10A2iAMD = static_cast(5033); -static const ImageFormat ImageFormatRgba16SscaledAMD = static_cast(5034); -static const ImageFormat ImageFormatRgb10A2SscaledAMD = static_cast(5035); -static const ImageFormat ImageFormatRg16SscaledAMD = static_cast(5036); -static const ImageFormat ImageFormatRgba8SscaledAMD = static_cast(5037); -static const ImageFormat ImageFormatRg8SscaledAMD = static_cast(5038); -static const ImageFormat ImageFormatR16SscaledAMD = static_cast(5039); -static const ImageFormat ImageFormatR8SscaledAMD = static_cast(5040); -static const ImageFormat ImageFormatRgba16UscaledAMD = static_cast(5041); -static const ImageFormat ImageFormatRgb10A2UscaledAMD = static_cast(5042); -static const ImageFormat ImageFormatRg16UscaledAMD = static_cast(5043); -static const ImageFormat ImageFormatRgba8USscaledAMD = static_cast(5044); -static const ImageFormat ImageFormatRg8UscaledAMD = static_cast(5045); -static const ImageFormat ImageFormatR16UscaledAMD = static_cast(5046); -static const ImageFormat ImageFormatR8UscaledAMD = static_cast(5047); -#endif - -#if VKI_NORMALIZED_TRIG_FUNCTIONS -// SPV_AMD_normalized_trig - Internal Use Only -static const Capability CapabilityTrigNormalizedAMD = static_cast(5058); - -static const Op OpSinNormalizedAMD = static_cast(5059); -static const Op OpCosNormalizedAMD = static_cast(5060); -#endif - #endif diff --git a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_beta.h b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_beta.h index df18b404..f5e94750 100644 --- a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_beta.h +++ b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_beta.h @@ -53,13 +53,14 @@ typedef struct VkPhysicalDevicePortabilitySubsetPropertiesKHR { // VK_AMDX_shader_enqueue is a preprocessor guard. Do not pass it to API calls. #define VK_AMDX_shader_enqueue 1 -#define VK_AMDX_SHADER_ENQUEUE_SPEC_VERSION 1 +#define VK_AMDX_SHADER_ENQUEUE_SPEC_VERSION 2 #define VK_AMDX_SHADER_ENQUEUE_EXTENSION_NAME "VK_AMDX_shader_enqueue" #define VK_SHADER_INDEX_UNUSED_AMDX (~0U) typedef struct VkPhysicalDeviceShaderEnqueueFeaturesAMDX { VkStructureType sType; void* pNext; VkBool32 shaderEnqueue; + VkBool32 shaderMeshEnqueue; } VkPhysicalDeviceShaderEnqueueFeaturesAMDX; typedef struct VkPhysicalDeviceShaderEnqueuePropertiesAMDX { @@ -70,12 +71,16 @@ typedef struct VkPhysicalDeviceShaderEnqueuePropertiesAMDX { uint32_t maxExecutionGraphShaderPayloadSize; uint32_t maxExecutionGraphShaderPayloadCount; uint32_t executionGraphDispatchAddressAlignment; + uint32_t maxExecutionGraphWorkgroupCount[3]; + uint32_t maxExecutionGraphWorkgroups; } VkPhysicalDeviceShaderEnqueuePropertiesAMDX; typedef struct VkExecutionGraphPipelineScratchSizeAMDX { VkStructureType sType; void* pNext; - VkDeviceSize size; + VkDeviceSize minSize; + VkDeviceSize maxSize; + VkDeviceSize sizeGranularity; } VkExecutionGraphPipelineScratchSizeAMDX; typedef struct VkExecutionGraphPipelineCreateInfoAMDX { @@ -116,12 +121,12 @@ typedef struct VkPipelineShaderStageNodeCreateInfoAMDX { } VkPipelineShaderStageNodeCreateInfoAMDX; typedef VkResult (VKAPI_PTR *PFN_vkCreateExecutionGraphPipelinesAMDX)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkExecutionGraphPipelineCreateInfoAMDX* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); -typedef VkResult (VKAPI_PTR *PFN_vkGetExecutionGraphPipelineScratchSizeAMDX)(VkDevice device, VkPipeline executionGraph, VkExecutionGraphPipelineScratchSizeAMDX* pSizeInfo); -typedef VkResult (VKAPI_PTR *PFN_vkGetExecutionGraphPipelineNodeIndexAMDX)(VkDevice device, VkPipeline executionGraph, const VkPipelineShaderStageNodeCreateInfoAMDX* pNodeInfo, uint32_t* pNodeIndex); -typedef void (VKAPI_PTR *PFN_vkCmdInitializeGraphScratchMemoryAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch); -typedef void (VKAPI_PTR *PFN_vkCmdDispatchGraphAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, const VkDispatchGraphCountInfoAMDX* pCountInfo); -typedef void (VKAPI_PTR *PFN_vkCmdDispatchGraphIndirectAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, const VkDispatchGraphCountInfoAMDX* pCountInfo); -typedef void (VKAPI_PTR *PFN_vkCmdDispatchGraphIndirectCountAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, VkDeviceAddress countInfo); +typedef VkResult (VKAPI_PTR *PFN_vkGetExecutionGraphPipelineScratchSizeAMDX)(VkDevice device, VkPipeline executionGraph, VkExecutionGraphPipelineScratchSizeAMDX* pSizeInfo); +typedef VkResult (VKAPI_PTR *PFN_vkGetExecutionGraphPipelineNodeIndexAMDX)(VkDevice device, VkPipeline executionGraph, const VkPipelineShaderStageNodeCreateInfoAMDX* pNodeInfo, uint32_t* pNodeIndex); +typedef void (VKAPI_PTR *PFN_vkCmdInitializeGraphScratchMemoryAMDX)(VkCommandBuffer commandBuffer, VkPipeline executionGraph, VkDeviceAddress scratch, VkDeviceSize scratchSize); +typedef void (VKAPI_PTR *PFN_vkCmdDispatchGraphAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, VkDeviceSize scratchSize, const VkDispatchGraphCountInfoAMDX* pCountInfo); +typedef void (VKAPI_PTR *PFN_vkCmdDispatchGraphIndirectAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, VkDeviceSize scratchSize, const VkDispatchGraphCountInfoAMDX* pCountInfo); +typedef void (VKAPI_PTR *PFN_vkCmdDispatchGraphIndirectCountAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, VkDeviceSize scratchSize, VkDeviceAddress countInfo); #ifndef VK_NO_PROTOTYPES VKAPI_ATTR VkResult VKAPI_CALL vkCreateExecutionGraphPipelinesAMDX( @@ -145,21 +150,26 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetExecutionGraphPipelineNodeIndexAMDX( VKAPI_ATTR void VKAPI_CALL vkCmdInitializeGraphScratchMemoryAMDX( VkCommandBuffer commandBuffer, - VkDeviceAddress scratch); + VkPipeline executionGraph, + VkDeviceAddress scratch, + VkDeviceSize scratchSize); VKAPI_ATTR void VKAPI_CALL vkCmdDispatchGraphAMDX( VkCommandBuffer commandBuffer, VkDeviceAddress scratch, + VkDeviceSize scratchSize, const VkDispatchGraphCountInfoAMDX* pCountInfo); VKAPI_ATTR void VKAPI_CALL vkCmdDispatchGraphIndirectAMDX( VkCommandBuffer commandBuffer, VkDeviceAddress scratch, + VkDeviceSize scratchSize, const VkDispatchGraphCountInfoAMDX* pCountInfo); VKAPI_ATTR void VKAPI_CALL vkCmdDispatchGraphIndirectCountAMDX( VkCommandBuffer commandBuffer, VkDeviceAddress scratch, + VkDeviceSize scratchSize, VkDeviceAddress countInfo); #endif diff --git a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h index 1dc7e23e..d9cdd137 100644 --- a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h +++ b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h @@ -69,7 +69,7 @@ extern "C" { #define VK_API_VERSION_1_0 VK_MAKE_API_VERSION(0, 1, 0, 0)// Patch version should always be set to 0 // Version of this file -#define VK_HEADER_VERSION 297 +#define VK_HEADER_VERSION 301 // Complete version of this file #define VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION(0, 1, 3, VK_HEADER_VERSION) @@ -508,10 +508,6 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_ADD_INFO_KHR = 1000040005, VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR = 1000040006, VK_STRUCTURE_TYPE_TEXTURE_LOD_GATHER_FORMAT_PROPERTIES_AMD = 1000041000, - VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR = 1000044006, - VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_INFO_EXT = 1000044007, - VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD = 1000044008, - VK_STRUCTURE_TYPE_MULTIVIEW_PER_VIEW_ATTRIBUTES_INFO_NVX = 1000044009, VK_STRUCTURE_TYPE_STREAM_DESCRIPTOR_SURFACE_CREATE_INFO_GGP = 1000049000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CORNER_SAMPLED_IMAGE_FEATURES_NV = 1000050000, VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_NV = 1000056000, @@ -553,6 +549,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_SWAPCHAIN_COUNTER_CREATE_INFO_EXT = 1000091003, VK_STRUCTURE_TYPE_PRESENT_TIMES_INFO_GOOGLE = 1000092000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PER_VIEW_ATTRIBUTES_PROPERTIES_NVX = 1000097000, + VK_STRUCTURE_TYPE_MULTIVIEW_PER_VIEW_ATTRIBUTES_INFO_NVX = 1000044009, VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV = 1000098000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT = 1000099000, VK_STRUCTURE_TYPE_PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT = 1000099001, @@ -612,6 +609,7 @@ typedef enum VkStructureType { #ifdef VK_ENABLE_BETA_EXTENSIONS VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NODE_CREATE_INFO_AMDX = 1000134004, #endif + VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD = 1000044008, VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT = 1000143000, VK_STRUCTURE_TYPE_RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT = 1000143001, VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT = 1000143002, @@ -703,6 +701,8 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXCLUSIVE_SCISSOR_FEATURES_NV = 1000205002, VK_STRUCTURE_TYPE_CHECKPOINT_DATA_NV = 1000206000, VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_NV = 1000206001, + VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_2_NV = 1000314008, + VK_STRUCTURE_TYPE_CHECKPOINT_DATA_2_NV = 1000314009, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_FUNCTIONS_2_FEATURES_INTEL = 1000209000, VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_QUERY_CREATE_INFO_INTEL = 1000210000, VK_STRUCTURE_TYPE_INITIALIZE_PERFORMANCE_API_INFO_INTEL = 1000210001, @@ -718,11 +718,13 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_FEATURES_EXT = 1000218000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_PROPERTIES_EXT = 1000218001, VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT = 1000218002, + VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_INFO_EXT = 1000044007, VK_STRUCTURE_TYPE_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR = 1000226000, VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR = 1000226001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR = 1000226002, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR = 1000226003, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_KHR = 1000226004, + VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR = 1000044006, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD = 1000227000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD = 1000229000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_LOCAL_READ_FEATURES_KHR = 1000232000, @@ -848,8 +850,6 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_IMPORT_METAL_IO_SURFACE_INFO_EXT = 1000311009, VK_STRUCTURE_TYPE_EXPORT_METAL_SHARED_EVENT_INFO_EXT = 1000311010, VK_STRUCTURE_TYPE_IMPORT_METAL_SHARED_EVENT_INFO_EXT = 1000311011, - VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_2_NV = 1000314008, - VK_STRUCTURE_TYPE_CHECKPOINT_DATA_2_NV = 1000314009, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT = 1000316000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_DENSITY_MAP_PROPERTIES_EXT = 1000316001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT = 1000316002, @@ -1163,6 +1163,11 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_IMAGE_ALIGNMENT_CONTROL_CREATE_INFO_MESA = 1000575002, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLAMP_CONTROL_FEATURES_EXT = 1000582000, VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_DEPTH_CLAMP_CONTROL_CREATE_INFO_EXT = 1000582001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HDR_VIVID_FEATURES_HUAWEI = 1000590000, + VK_STRUCTURE_TYPE_HDR_VIVID_DYNAMIC_METADATA_HUAWEI = 1000590001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_2_FEATURES_NV = 1000593000, + VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_FLEXIBLE_DIMENSIONS_PROPERTIES_NV = 1000593001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_2_PROPERTIES_NV = 1000593002, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES, // VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT is a deprecated alias @@ -1172,7 +1177,6 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDERING_INFO_KHR = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDERING_INFO, - VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_NV = VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD, VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES, @@ -1246,6 +1250,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, VK_STRUCTURE_TYPE_SPARSE_IMAGE_MEMORY_REQUIREMENTS_2_KHR = VK_STRUCTURE_TYPE_SPARSE_IMAGE_MEMORY_REQUIREMENTS_2, VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO, + VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_NV = VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD, VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO, VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO_KHR = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO, VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO_KHR = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO, @@ -2647,8 +2652,6 @@ typedef enum VkPipelineCreateFlagBits { VK_PIPELINE_CREATE_DISPATCH_BASE_BIT = 0x00000010, VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT = 0x00000100, VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT = 0x00000200, - VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = 0x00200000, - VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT = 0x00400000, VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR = 0x00004000, VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR = 0x00008000, VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR = 0x00010000, @@ -2657,6 +2660,8 @@ typedef enum VkPipelineCreateFlagBits { VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR = 0x00002000, VK_PIPELINE_CREATE_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR = 0x00080000, VK_PIPELINE_CREATE_DEFER_COMPILE_BIT_NV = 0x00000020, + VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT = 0x00400000, + VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = 0x00200000, VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR = 0x00000040, VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR = 0x00000080, VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV = 0x00040000, @@ -2674,12 +2679,12 @@ typedef enum VkPipelineCreateFlagBits { VK_PIPELINE_CREATE_NO_PROTECTED_ACCESS_BIT_EXT = 0x08000000, VK_PIPELINE_CREATE_PROTECTED_ACCESS_ONLY_BIT_EXT = 0x40000000, VK_PIPELINE_CREATE_DISPATCH_BASE = VK_PIPELINE_CREATE_DISPATCH_BASE_BIT, - // VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR is a deprecated alias - VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR, - // VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT is a deprecated alias - VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT = VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT_KHR = VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT, VK_PIPELINE_CREATE_DISPATCH_BASE_KHR = VK_PIPELINE_CREATE_DISPATCH_BASE, + // VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT is a deprecated alias + VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT = VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT, + // VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR is a deprecated alias + VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR, VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT = VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT, VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT = VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT, VK_PIPELINE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF @@ -8903,38 +8908,6 @@ typedef VkPhysicalDeviceDynamicRenderingFeatures VkPhysicalDeviceDynamicRenderin typedef VkCommandBufferInheritanceRenderingInfo VkCommandBufferInheritanceRenderingInfoKHR; -typedef struct VkRenderingFragmentShadingRateAttachmentInfoKHR { - VkStructureType sType; - const void* pNext; - VkImageView imageView; - VkImageLayout imageLayout; - VkExtent2D shadingRateAttachmentTexelSize; -} VkRenderingFragmentShadingRateAttachmentInfoKHR; - -typedef struct VkRenderingFragmentDensityMapAttachmentInfoEXT { - VkStructureType sType; - const void* pNext; - VkImageView imageView; - VkImageLayout imageLayout; -} VkRenderingFragmentDensityMapAttachmentInfoEXT; - -typedef struct VkAttachmentSampleCountInfoAMD { - VkStructureType sType; - const void* pNext; - uint32_t colorAttachmentCount; - const VkSampleCountFlagBits* pColorAttachmentSamples; - VkSampleCountFlagBits depthStencilAttachmentSamples; -} VkAttachmentSampleCountInfoAMD; - -typedef VkAttachmentSampleCountInfoAMD VkAttachmentSampleCountInfoNV; - -typedef struct VkMultiviewPerViewAttributesInfoNVX { - VkStructureType sType; - const void* pNext; - VkBool32 perViewAttributes; - VkBool32 perViewAttributesPositionXOnly; -} VkMultiviewPerViewAttributesInfoNVX; - typedef void (VKAPI_PTR *PFN_vkCmdBeginRenderingKHR)(VkCommandBuffer commandBuffer, const VkRenderingInfo* pRenderingInfo); typedef void (VKAPI_PTR *PFN_vkCmdEndRenderingKHR)(VkCommandBuffer commandBuffer); @@ -10309,6 +10282,14 @@ typedef struct VkPhysicalDeviceFragmentShadingRateKHR { VkExtent2D fragmentSize; } VkPhysicalDeviceFragmentShadingRateKHR; +typedef struct VkRenderingFragmentShadingRateAttachmentInfoKHR { + VkStructureType sType; + const void* pNext; + VkImageView imageView; + VkImageLayout imageLayout; + VkExtent2D shadingRateAttachmentTexelSize; +} VkRenderingFragmentShadingRateAttachmentInfoKHR; + typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceFragmentShadingRatesKHR)(VkPhysicalDevice physicalDevice, uint32_t* pFragmentShadingRateCount, VkPhysicalDeviceFragmentShadingRateKHR* pFragmentShadingRates); typedef void (VKAPI_PTR *PFN_vkCmdSetFragmentShadingRateKHR)(VkCommandBuffer commandBuffer, const VkExtent2D* pFragmentSize, const VkFragmentShadingRateCombinerOpKHR combinerOps[2]); @@ -10890,27 +10871,12 @@ typedef VkCommandBufferSubmitInfo VkCommandBufferSubmitInfoKHR; typedef VkPhysicalDeviceSynchronization2Features VkPhysicalDeviceSynchronization2FeaturesKHR; -typedef struct VkQueueFamilyCheckpointProperties2NV { - VkStructureType sType; - void* pNext; - VkPipelineStageFlags2 checkpointExecutionStageMask; -} VkQueueFamilyCheckpointProperties2NV; - -typedef struct VkCheckpointData2NV { - VkStructureType sType; - void* pNext; - VkPipelineStageFlags2 stage; - void* pCheckpointMarker; -} VkCheckpointData2NV; - typedef void (VKAPI_PTR *PFN_vkCmdSetEvent2KHR)(VkCommandBuffer commandBuffer, VkEvent event, const VkDependencyInfo* pDependencyInfo); typedef void (VKAPI_PTR *PFN_vkCmdResetEvent2KHR)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags2 stageMask); typedef void (VKAPI_PTR *PFN_vkCmdWaitEvents2KHR)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, const VkDependencyInfo* pDependencyInfos); typedef void (VKAPI_PTR *PFN_vkCmdPipelineBarrier2KHR)(VkCommandBuffer commandBuffer, const VkDependencyInfo* pDependencyInfo); typedef void (VKAPI_PTR *PFN_vkCmdWriteTimestamp2KHR)(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkQueryPool queryPool, uint32_t query); typedef VkResult (VKAPI_PTR *PFN_vkQueueSubmit2KHR)(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2* pSubmits, VkFence fence); -typedef void (VKAPI_PTR *PFN_vkCmdWriteBufferMarker2AMD)(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker); -typedef void (VKAPI_PTR *PFN_vkGetQueueCheckpointData2NV)(VkQueue queue, uint32_t* pCheckpointDataCount, VkCheckpointData2NV* pCheckpointData); #ifndef VK_NO_PROTOTYPES VKAPI_ATTR void VKAPI_CALL vkCmdSetEvent2KHR( @@ -10944,18 +10910,6 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit2KHR( uint32_t submitCount, const VkSubmitInfo2* pSubmits, VkFence fence); - -VKAPI_ATTR void VKAPI_CALL vkCmdWriteBufferMarker2AMD( - VkCommandBuffer commandBuffer, - VkPipelineStageFlags2 stage, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - uint32_t marker); - -VKAPI_ATTR void VKAPI_CALL vkGetQueueCheckpointData2NV( - VkQueue queue, - uint32_t* pCheckpointDataCount, - VkCheckpointData2NV* pCheckpointData); #endif @@ -11198,6 +11152,9 @@ typedef VkFlags64 VkPipelineCreateFlagBits2KHR; static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_DISABLE_OPTIMIZATION_BIT_KHR = 0x00000001ULL; static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_ALLOW_DERIVATIVES_BIT_KHR = 0x00000002ULL; static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_DERIVATIVE_BIT_KHR = 0x00000004ULL; +#ifdef VK_ENABLE_BETA_EXTENSIONS +static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_EXECUTION_GRAPH_BIT_AMDX = 0x100000000ULL; +#endif static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_ENABLE_LEGACY_DITHERING_BIT_EXT = 0x400000000ULL; static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_VIEW_INDEX_FROM_DEVICE_INDEX_BIT_KHR = 0x00000008ULL; static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_DISPATCH_BASE_BIT_KHR = 0x00000010ULL; @@ -11243,7 +11200,9 @@ static const VkBufferUsageFlagBits2KHR VK_BUFFER_USAGE_2_STORAGE_BUFFER_BIT_KHR static const VkBufferUsageFlagBits2KHR VK_BUFFER_USAGE_2_INDEX_BUFFER_BIT_KHR = 0x00000040ULL; static const VkBufferUsageFlagBits2KHR VK_BUFFER_USAGE_2_VERTEX_BUFFER_BIT_KHR = 0x00000080ULL; static const VkBufferUsageFlagBits2KHR VK_BUFFER_USAGE_2_INDIRECT_BUFFER_BIT_KHR = 0x00000100ULL; +#ifdef VK_ENABLE_BETA_EXTENSIONS static const VkBufferUsageFlagBits2KHR VK_BUFFER_USAGE_2_EXECUTION_GRAPH_SCRATCH_BIT_AMDX = 0x02000000ULL; +#endif static const VkBufferUsageFlagBits2KHR VK_BUFFER_USAGE_2_CONDITIONAL_RENDERING_BIT_EXT = 0x00000200ULL; static const VkBufferUsageFlagBits2KHR VK_BUFFER_USAGE_2_SHADER_BINDING_TABLE_BIT_KHR = 0x00000400ULL; static const VkBufferUsageFlagBits2KHR VK_BUFFER_USAGE_2_RAY_TRACING_BIT_NV = 0x00000400ULL; @@ -13008,6 +12967,13 @@ typedef struct VkPhysicalDeviceMultiviewPerViewAttributesPropertiesNVX { VkBool32 perViewPositionAllComponents; } VkPhysicalDeviceMultiviewPerViewAttributesPropertiesNVX; +typedef struct VkMultiviewPerViewAttributesInfoNVX { + VkStructureType sType; + const void* pNext; + VkBool32 perViewAttributes; + VkBool32 perViewAttributesPositionXOnly; +} VkMultiviewPerViewAttributesInfoNVX; + // VK_NV_viewport_swizzle is a preprocessor guard. Do not pass it to API calls. @@ -13376,6 +13342,14 @@ typedef VkPhysicalDeviceSamplerFilterMinmaxProperties VkPhysicalDeviceSamplerFil #define VK_AMD_mixed_attachment_samples 1 #define VK_AMD_MIXED_ATTACHMENT_SAMPLES_SPEC_VERSION 1 #define VK_AMD_MIXED_ATTACHMENT_SAMPLES_EXTENSION_NAME "VK_AMD_mixed_attachment_samples" +typedef struct VkAttachmentSampleCountInfoAMD { + VkStructureType sType; + const void* pNext; + uint32_t colorAttachmentCount; + const VkSampleCountFlagBits* pColorAttachmentSamples; + VkSampleCountFlagBits depthStencilAttachmentSamples; +} VkAttachmentSampleCountInfoAMD; + // VK_AMD_shader_fragment_mask is a preprocessor guard. Do not pass it to API calls. @@ -13555,6 +13529,8 @@ typedef struct VkPipelineCoverageModulationStateCreateInfoNV { const float* pCoverageModulationTable; } VkPipelineCoverageModulationStateCreateInfoNV; +typedef VkAttachmentSampleCountInfoAMD VkAttachmentSampleCountInfoNV; + // VK_NV_fill_rectangle is a preprocessor guard. Do not pass it to API calls. @@ -14312,6 +14288,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetMemoryHostPointerPropertiesEXT( #define VK_AMD_BUFFER_MARKER_SPEC_VERSION 1 #define VK_AMD_BUFFER_MARKER_EXTENSION_NAME "VK_AMD_buffer_marker" typedef void (VKAPI_PTR *PFN_vkCmdWriteBufferMarkerAMD)(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker); +typedef void (VKAPI_PTR *PFN_vkCmdWriteBufferMarker2AMD)(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker); #ifndef VK_NO_PROTOTYPES VKAPI_ATTR void VKAPI_CALL vkCmdWriteBufferMarkerAMD( @@ -14320,6 +14297,13 @@ VKAPI_ATTR void VKAPI_CALL vkCmdWriteBufferMarkerAMD( VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker); + +VKAPI_ATTR void VKAPI_CALL vkCmdWriteBufferMarker2AMD( + VkCommandBuffer commandBuffer, + VkPipelineStageFlags2 stage, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + uint32_t marker); #endif @@ -14590,8 +14574,22 @@ typedef struct VkCheckpointDataNV { void* pCheckpointMarker; } VkCheckpointDataNV; +typedef struct VkQueueFamilyCheckpointProperties2NV { + VkStructureType sType; + void* pNext; + VkPipelineStageFlags2 checkpointExecutionStageMask; +} VkQueueFamilyCheckpointProperties2NV; + +typedef struct VkCheckpointData2NV { + VkStructureType sType; + void* pNext; + VkPipelineStageFlags2 stage; + void* pCheckpointMarker; +} VkCheckpointData2NV; + typedef void (VKAPI_PTR *PFN_vkCmdSetCheckpointNV)(VkCommandBuffer commandBuffer, const void* pCheckpointMarker); typedef void (VKAPI_PTR *PFN_vkGetQueueCheckpointDataNV)(VkQueue queue, uint32_t* pCheckpointDataCount, VkCheckpointDataNV* pCheckpointData); +typedef void (VKAPI_PTR *PFN_vkGetQueueCheckpointData2NV)(VkQueue queue, uint32_t* pCheckpointDataCount, VkCheckpointData2NV* pCheckpointData); #ifndef VK_NO_PROTOTYPES VKAPI_ATTR void VKAPI_CALL vkCmdSetCheckpointNV( @@ -14602,6 +14600,11 @@ VKAPI_ATTR void VKAPI_CALL vkGetQueueCheckpointDataNV( VkQueue queue, uint32_t* pCheckpointDataCount, VkCheckpointDataNV* pCheckpointData); + +VKAPI_ATTR void VKAPI_CALL vkGetQueueCheckpointData2NV( + VkQueue queue, + uint32_t* pCheckpointDataCount, + VkCheckpointData2NV* pCheckpointData); #endif @@ -14823,6 +14826,13 @@ typedef struct VkRenderPassFragmentDensityMapCreateInfoEXT { VkAttachmentReference fragmentDensityMapAttachment; } VkRenderPassFragmentDensityMapCreateInfoEXT; +typedef struct VkRenderingFragmentDensityMapAttachmentInfoEXT { + VkStructureType sType; + const void* pNext; + VkImageView imageView; + VkImageLayout imageLayout; +} VkRenderingFragmentDensityMapAttachmentInfoEXT; + // VK_EXT_scalar_block_layout is a preprocessor guard. Do not pass it to API calls. @@ -19641,7 +19651,7 @@ typedef struct VkPhysicalDeviceDeviceGeneratedCommandsPropertiesEXT { typedef struct VkGeneratedCommandsMemoryRequirementsInfoEXT { VkStructureType sType; - void* pNext; + const void* pNext; VkIndirectExecutionSetEXT indirectExecutionSet; VkIndirectCommandsLayoutEXT indirectCommandsLayout; uint32_t maxSequenceCount; @@ -19893,6 +19903,74 @@ typedef struct VkPipelineViewportDepthClampControlCreateInfoEXT { +// VK_HUAWEI_hdr_vivid is a preprocessor guard. Do not pass it to API calls. +#define VK_HUAWEI_hdr_vivid 1 +#define VK_HUAWEI_HDR_VIVID_SPEC_VERSION 1 +#define VK_HUAWEI_HDR_VIVID_EXTENSION_NAME "VK_HUAWEI_hdr_vivid" +typedef struct VkPhysicalDeviceHdrVividFeaturesHUAWEI { + VkStructureType sType; + void* pNext; + VkBool32 hdrVivid; +} VkPhysicalDeviceHdrVividFeaturesHUAWEI; + +typedef struct VkHdrVividDynamicMetadataHUAWEI { + VkStructureType sType; + const void* pNext; + size_t dynamicMetadataSize; + const void* pDynamicMetadata; +} VkHdrVividDynamicMetadataHUAWEI; + + + +// VK_NV_cooperative_matrix2 is a preprocessor guard. Do not pass it to API calls. +#define VK_NV_cooperative_matrix2 1 +#define VK_NV_COOPERATIVE_MATRIX_2_SPEC_VERSION 1 +#define VK_NV_COOPERATIVE_MATRIX_2_EXTENSION_NAME "VK_NV_cooperative_matrix2" +typedef struct VkCooperativeMatrixFlexibleDimensionsPropertiesNV { + VkStructureType sType; + void* pNext; + uint32_t MGranularity; + uint32_t NGranularity; + uint32_t KGranularity; + VkComponentTypeKHR AType; + VkComponentTypeKHR BType; + VkComponentTypeKHR CType; + VkComponentTypeKHR ResultType; + VkBool32 saturatingAccumulation; + VkScopeKHR scope; + uint32_t workgroupInvocations; +} VkCooperativeMatrixFlexibleDimensionsPropertiesNV; + +typedef struct VkPhysicalDeviceCooperativeMatrix2FeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 cooperativeMatrixWorkgroupScope; + VkBool32 cooperativeMatrixFlexibleDimensions; + VkBool32 cooperativeMatrixReductions; + VkBool32 cooperativeMatrixConversions; + VkBool32 cooperativeMatrixPerElementOperations; + VkBool32 cooperativeMatrixTensorAddressing; + VkBool32 cooperativeMatrixBlockLoads; +} VkPhysicalDeviceCooperativeMatrix2FeaturesNV; + +typedef struct VkPhysicalDeviceCooperativeMatrix2PropertiesNV { + VkStructureType sType; + void* pNext; + uint32_t cooperativeMatrixWorkgroupScopeMaxWorkgroupSize; + uint32_t cooperativeMatrixFlexibleDimensionsMaxDimension; + uint32_t cooperativeMatrixWorkgroupScopeReservedSharedMemory; +} VkPhysicalDeviceCooperativeMatrix2PropertiesNV; + +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceCooperativeMatrixFlexibleDimensionsPropertiesNV)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkCooperativeMatrixFlexibleDimensionsPropertiesNV* pProperties); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceCooperativeMatrixFlexibleDimensionsPropertiesNV( + VkPhysicalDevice physicalDevice, + uint32_t* pPropertyCount, + VkCooperativeMatrixFlexibleDimensionsPropertiesNV* pProperties); +#endif + + // VK_KHR_acceleration_structure is a preprocessor guard. Do not pass it to API calls. #define VK_KHR_acceleration_structure 1 #define VK_KHR_ACCELERATION_STRUCTURE_SPEC_VERSION 13 diff --git a/icd/api/include/khronos/vulkan.h b/icd/api/include/khronos/vulkan.h index 11faf725..b81cc1b3 100644 --- a/icd/api/include/khronos/vulkan.h +++ b/icd/api/include/khronos/vulkan.h @@ -61,9 +61,6 @@ // Internal (under development) extension definitions #include "devext/vk_amd_gpa_interface.h" -#if VKI_TEXEL_BUFFER_EXPLICIT_FORMAT_SUPPORT -#include "devext/vk_amd_shader_texel_buffer_explicit_format.h" -#endif #if VKI_RAY_TRACING #endif diff --git a/icd/api/include/pipeline_binary_cache.h b/icd/api/include/pipeline_binary_cache.h index 372476e4..6e8c619f 100644 --- a/icd/api/include/pipeline_binary_cache.h +++ b/icd/api/include/pipeline_binary_cache.h @@ -41,9 +41,7 @@ namespace Util { class IPlatformKey; -#if ICD_GPUOPEN_DEVMODE_BUILD class IDevMode; -#endif } // namespace Util namespace vk @@ -63,9 +61,7 @@ class PipelineBinaryCache const Vkgc::GfxIpVersion& gfxIp, const vk::RuntimeSettings& settings, const char* pDefaultCacheFilePath, -#if ICD_GPUOPEN_DEVMODE_BUILD vk::IDevMode* pDevMode, -#endif uint32_t expectedEntries, size_t initDataSize, const void* pInitData, @@ -128,7 +124,6 @@ class PipelineBinaryCache uint32_t srcCacheCount, const PipelineBinaryCache** ppSrcCaches); -#if ICD_GPUOPEN_DEVMODE_BUILD Util::Result LoadReinjectionBinary( const CacheId* pInternalPipelineHash, size_t* pPipelineBinarySize, @@ -153,7 +148,6 @@ class PipelineBinaryCache Util::RWLock* GetHashMappingLock() { return &m_hashMappingLock; } -#endif void FreePipelineBinary(const void* pPipelineBinary); @@ -198,13 +192,11 @@ class PipelineBinaryCache bool createArchiveLayers, const RuntimeSettings& settings); -#if ICD_GPUOPEN_DEVMODE_BUILD VkResult InitReinjectionLayer( const RuntimeSettings& settings); Util::Result InjectBinariesFromDirectory( const RuntimeSettings& settings); -#endif VkResult InitMemoryCacheLayer( const RuntimeSettings& settings); @@ -237,13 +229,11 @@ class PipelineBinaryCache Util::ICacheLayer* m_pTopLayer; // Top layer of the cache chain where queries are submitted -#if ICD_GPUOPEN_DEVMODE_BUILD vk::IDevMode* m_pDevMode; Util::ICacheLayer* m_pReinjectionLayer; // Reinjection interface layer HashMapping m_hashMapping; // Maps the internalPipelineHash to the appropriate CacheId Util::RWLock m_hashMappingLock; // Prevents collisions during writes to the map -#endif Util::ICacheLayer* m_pMemoryLayer; diff --git a/icd/api/include/pipeline_compiler.h b/icd/api/include/pipeline_compiler.h index 91ca3f51..681428fd 100644 --- a/icd/api/include/pipeline_compiler.h +++ b/icd/api/include/pipeline_compiler.h @@ -315,14 +315,12 @@ class PipelineCompiler #endif -#if ICD_GPUOPEN_DEVMODE_BUILD Util::Result RegisterAndLoadReinjectionBinary( const Pal::PipelineHash* pInternalPipelineHash, const Util::MetroHash::Hash* pCacheId, size_t* pBinarySize, const void** ppPipelineBinary, PipelineCache* pPipelineCache = nullptr); -#endif template PipelineCompilerType CheckCompilerType( diff --git a/icd/api/include/vk_alloccb.h b/icd/api/include/vk_alloccb.h index 05584969..83be9620 100644 --- a/icd/api/include/vk_alloccb.h +++ b/icd/api/include/vk_alloccb.h @@ -84,7 +84,7 @@ class PalAllocator private: PAL_DISALLOW_COPY_AND_ASSIGN(PalAllocator); -#if PAL_MEMTRACK +#if VKI_MEMTRACK // NOTE: Memory leak tracking requires an allocator in order to perform the actual allocations. We can't provide // this platform because that would result in a stack overflow. Instead, we define this simple allocator // structure which contains the necessary methods to allocate and free system memory. diff --git a/icd/api/include/vk_cmdbuffer.h b/icd/api/include/vk_cmdbuffer.h index 1860b0ec..a1e443fa 100644 --- a/icd/api/include/vk_cmdbuffer.h +++ b/icd/api/include/vk_cmdbuffer.h @@ -279,9 +279,10 @@ struct AllGpuRenderState // changed for all GPUs if it is changed for any GPU. Put DirtyGraphicsState management here will be easier to manage. DirtyGraphicsState dirtyGraphics; - // Value of VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT - // defined by the last bound GraphicsPipeline, which was not nullptr. - bool viewIndexFromDeviceIndex; + // A bit mask determining if the shader stages are making use of VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT. + // 1: pre-raster stages + // 2: fragment stage + uint32_t viewIndexFromDeviceIndex; DynamicRenderingInstance dynamicRenderingInstance; @@ -1118,7 +1119,8 @@ class CmdBuffer void PalCmdAcquire( Pal::AcquireReleaseInfo* pAcquireReleaseInfo, - const VkEvent event, + uint32_t eventCount, + const VkEvent* pEvents, Pal::MemBarrier* const pBufferBarriers, const Buffer** const ppBuffers, Pal::ImgBarrier* const pImageBarriers, @@ -1602,7 +1604,21 @@ class CmdBuffer const VkImageMemoryBarrier* pImageMemoryBarriers, Pal::BarrierInfo* pBarrier); - void ExecuteReleaseThenAcquire( + void FlushAcquireReleaseBarriers( + Pal::AcquireReleaseInfo* pAcquireReleaseInfo, + uint32_t eventCount, + const VkEvent* pEvents, + Pal::MemBarrier* const pBufferBarriers, + const Buffer** const ppBuffers, + Pal::ImgBarrier* const pImageBarriers, + const Image** const ppImages, + VirtualStackFrame* pVirtStackFrame, + const AcquireReleaseMode acquireReleaseMode, + uint32_t deviceMask); + + void ExecuteAcquireRelease( + uint32_t eventCount, + const VkEvent* pEvents, PipelineStageFlags srcStageMask, PipelineStageFlags dstStageMask, uint32_t memBarrierCount, @@ -1610,13 +1626,15 @@ class CmdBuffer uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier* pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers); + const VkImageMemoryBarrier* pImageMemoryBarriers, + const AcquireReleaseMode acquireReleaseMode, + uint32_t rgpBarrierReasonType); - void ExecuteAcquireRelease( + void ExecuteAcquireRelease2( uint32_t dependencyCount, const VkEvent* pEvents, const VkDependencyInfoKHR* pDependencyInfos, - AcquireReleaseMode acquireReleaseMode, + const AcquireReleaseMode acquireReleaseMode, uint32_t rgpBarrierReasonType); enum RebindUserDataFlag : uint32_t diff --git a/icd/api/include/vk_compute_pipeline.h b/icd/api/include/vk_compute_pipeline.h index 5f01590f..72f983f9 100644 --- a/icd/api/include/vk_compute_pipeline.h +++ b/icd/api/include/vk_compute_pipeline.h @@ -161,6 +161,7 @@ class ComputePipeline final : public Pipeline, public NonDispatchable(handle); #else return handle; diff --git a/icd/api/include/vk_extensions.h b/icd/api/include/vk_extensions.h index f3ea14ed..7b77073d 100644 --- a/icd/api/include/vk_extensions.h +++ b/icd/api/include/vk_extensions.h @@ -446,6 +446,7 @@ class DeviceExtensions final : public Extensions EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION, EXT_SHADER_IMAGE_ATOMIC_INT64, EXT_SHADER_MODULE_IDENTIFIER, + EXT_SHADER_REPLICATED_COMPOSITES, EXT_SHADER_STENCIL_EXPORT, EXT_SHADER_SUBGROUP_BALLOT, EXT_SHADER_SUBGROUP_VOTE, diff --git a/icd/api/include/vk_graphics_pipeline.h b/icd/api/include/vk_graphics_pipeline.h index 796d965a..96e45ab3 100644 --- a/icd/api/include/vk_graphics_pipeline.h +++ b/icd/api/include/vk_graphics_pipeline.h @@ -215,9 +215,7 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch bool IsPointSizeUsed() const { return m_flags.isPointSizeUsed; } - // Returns value of VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT - // defined by flags member of VkGraphicsPipelineCreateInfo. - bool ViewIndexFromDeviceIndex() const + uint32_t StageMaskForViewIndexUseDeviceIndex() const { return m_flags.viewIndexFromDeviceIndex; } GraphicsPipelineObjectFlags GetPipelineFlags() const diff --git a/icd/api/include/vk_physical_device.h b/icd/api/include/vk_physical_device.h index 7cb9ea74..f7d2940c 100644 --- a/icd/api/include/vk_physical_device.h +++ b/icd/api/include/vk_physical_device.h @@ -481,6 +481,12 @@ template uint32_t* pPropertyCount, utils::ArrayView properties) const; + void GetPhysicalDevicePipelineRobustnessProperties( + VkPipelineRobustnessBufferBehaviorEXT* defaultRobustnessStorageBuffers, + VkPipelineRobustnessBufferBehaviorEXT* defaultRobustnessUniformBuffers, + VkPipelineRobustnessBufferBehaviorEXT* defaultRobustnessVertexInputs, + VkPipelineRobustnessImageBehaviorEXT* defaultRobustnessImages) const; + void GetPhysicalDevice16BitStorageFeatures( VkBool32* pStorageBuffer16BitAccess, VkBool32* pUniformAndStorageBuffer16BitAccess, diff --git a/icd/api/include/vk_pipeline_layout.h b/icd/api/include/vk_pipeline_layout.h index b78a2b06..a4978d00 100644 --- a/icd/api/include/vk_pipeline_layout.h +++ b/icd/api/include/vk_pipeline_layout.h @@ -66,6 +66,15 @@ struct UserDataLayout // Number of user data registers used for push constants uint32_t pushConstRegCount; +#if VKI_RAY_TRACING + // Base user data register index to use for buffer storing ray tracing dispatch arguments + // The number of user data registers used is always 1 + uint32_t dispatchRaysArgsPtrRegBase; + + // Base user data register index to use for ray tracing capture replay VA mapping internal buffer + uint32_t rtCaptureReplayConstBufRegBase; +#endif + } common; union @@ -90,11 +99,6 @@ struct UserDataLayout // Base use data register for debug printf uint32_t debugPrintfRegBase; -#if VKI_RAY_TRACING - // Base user data register index to use for ray tracing capture replay VA mapping internal buffer - uint32_t rtCaptureReplayConstBufRegBase; -#endif - // Base user data register index to use for thread group order reversal state uint32_t threadGroupReversalRegBase; @@ -115,21 +119,10 @@ struct UserDataLayout // Base use data register for debug printf uint32_t debugPrintfRegBase; -#if VKI_RAY_TRACING - // Base user data register index to use for buffer storing ray tracing dispatch arguments - // The number of user data registers used is always 1 - uint32_t dispatchRaysArgsPtrRegBase; -#endif - // Base user data register index to use for the constant buffer used in uber-fetch shader // The number of user data register used is always 2 uint32_t uberFetchConstBufRegBase; -#if VKI_RAY_TRACING - // Base user data register index to use for ray tracing capture replay VA mapping internal buffer - uint32_t rtCaptureReplayConstBufRegBase; -#endif - // Base user data register index to use for thread group order reversal state uint32_t threadGroupReversalRegBase; diff --git a/icd/api/include/vk_queue.h b/icd/api/include/vk_queue.h index 87add35f..03a2a7ee 100644 --- a/icd/api/include/vk_queue.h +++ b/icd/api/include/vk_queue.h @@ -209,15 +209,9 @@ class Queue VkResult CreateSqttState( void* pMemory); - enum - { - MaxQueueFamilies = Pal::QueueTypeCount // Maximum number of queue families - , - MaxQueuesPerFamily = 8, // Maximum number of queues per family - MaxMultiQueues = 4, - - MaxSubQueuesInGroup = MaxQueueFamilies * MaxQueuesPerFamily // Maximum number of queues per group - }; + static constexpr uint32_t MaxQueueFamilies = Pal::QueueTypeCount // Maximum number of queue families + ; + static constexpr uint32_t MaxQueuesPerFamily = 8; // Maximum number of queues per family VK_FORCEINLINE Pal::IQueue* PalQueue(int32_t idx) const { diff --git a/icd/api/include/vk_utils.h b/icd/api/include/vk_utils.h index cda14997..25a0d182 100644 --- a/icd/api/include/vk_utils.h +++ b/icd/api/include/vk_utils.h @@ -59,6 +59,7 @@ // Reuse some PAL macros here #define VK_ASSERT PAL_ASSERT #define VK_ASSERT_MSG PAL_ASSERT_MSG +#define VK_ASSERT_ALWAYS_MSG PAL_ASSERT_ALWAYS_MSG #define VK_DEBUG_BUILD_ONLY_ASSERT PAL_DEBUG_BUILD_ONLY_ASSERT #define VK_ALERT PAL_ALERT #define VK_ALERT_ALWAYS_MSG PAL_ALERT_ALWAYS_MSG @@ -85,9 +86,6 @@ #define VK_FORCEINLINE inline #endif -// Wrap _malloca and _freea for compilers other than MSVS -#define VK_ALLOC_A(_numBytes) alloca(_numBytes) - // Default alignment for memory allocation #define VK_DEFAULT_MEM_ALIGN 16 @@ -131,6 +129,17 @@ constexpr uint32_t RayTraceShaderStages = typedef VkPipelineStageFlags2KHR PipelineStageFlags; typedef VkAccessFlags2KHR AccessFlags; +// ===================================================================================================================== +constexpr bool IsGfx11( + Pal::GfxIpLevel gfxLevel) +{ + return ((gfxLevel == Pal::GfxIpLevel::GfxIp11_0) +#if VKI_BUILD_GFX115 + || (gfxLevel == Pal::GfxIpLevel::GfxIp11_5) +#endif + ); +} + namespace utils { diff --git a/icd/api/pipeline_binary_cache.cpp b/icd/api/pipeline_binary_cache.cpp index bd438d7e..32ffb253 100644 --- a/icd/api/pipeline_binary_cache.cpp +++ b/icd/api/pipeline_binary_cache.cpp @@ -39,11 +39,8 @@ #include "palHashMapImpl.h" #include "palFile.h" #include "palLiterals.h" - -#if ICD_GPUOPEN_DEVMODE_BUILD #include "palPipelineAbiReader.h" #include "devmode/devmode_mgr.h" -#endif #include using namespace Util::Literals; @@ -62,9 +59,7 @@ static constexpr size_t ElfTypeStringLen = sizeof(ElfTypeString); const uint32_t PipelineBinaryCache::ArchiveType = Util::HashString(ArchiveTypeString, ArchiveTypeStringLen); const uint32_t PipelineBinaryCache::ElfType = Util::HashString(ElfTypeString, ElfTypeStringLen); -#if ICD_GPUOPEN_DEVMODE_BUILD static Util::Hash128 ParseHash128(const char* str); -#endif bool PipelineBinaryCache::IsValidBlob( VkAllocationCallbacks* pAllocationCallbacks, @@ -108,9 +103,7 @@ PipelineBinaryCache* PipelineBinaryCache::Create( const Vkgc::GfxIpVersion& gfxIp, const RuntimeSettings& settings, const char* pDefaultCacheFilePath, -#if ICD_GPUOPEN_DEVMODE_BUILD vk::IDevMode* pDevMode, -#endif uint32_t expectedEntries, size_t initDataSize, const void* pInitData, @@ -127,10 +120,7 @@ PipelineBinaryCache* PipelineBinaryCache::Create( if (pMem != nullptr) { pObj = VK_PLACEMENT_NEW(pMem) PipelineBinaryCache(pAllocationCallbacks, gfxIp, expectedEntries); - -#if ICD_GPUOPEN_DEVMODE_BUILD pObj->m_pDevMode = pDevMode; -#endif if (pObj->Initialize(settings, createArchiveLayers, pDefaultCacheFilePath, pKey) != VK_SUCCESS) { @@ -192,11 +182,9 @@ PipelineBinaryCache::PipelineBinaryCache( m_palAllocator { pAllocationCallbacks }, m_pPlatformKey { nullptr }, m_pTopLayer { nullptr }, -#if ICD_GPUOPEN_DEVMODE_BUILD m_pDevMode { nullptr }, m_pReinjectionLayer { nullptr }, m_hashMapping { 32, &m_palAllocator }, -#endif m_pMemoryLayer { nullptr }, m_pCompressingLayer { nullptr }, m_expectedEntries { expectedEntries }, @@ -247,12 +235,10 @@ PipelineBinaryCache::~PipelineBinaryCache() FreeMem(m_pCompressingLayer); } -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pReinjectionLayer != nullptr) { m_pReinjectionLayer->Destroy(); } -#endif } // ===================================================================================================================== @@ -405,7 +391,6 @@ Util::Result PipelineBinaryCache::GetPipelineBinary( return m_pTopLayer->Load(pQeuryId, pPipelineBinary); } -#if ICD_GPUOPEN_DEVMODE_BUILD // ===================================================================================================================== // Introduces a mapping from an internal pipeline hash to a cache ID void PipelineBinaryCache::RegisterHashMapping( @@ -495,7 +480,8 @@ Util::Result PipelineBinaryCache::StoreReinjectionBinary( uint32_t gfxIpMinor = 0u; uint32_t gfxIpStepping = 0u; - Util::Abi::PipelineAbiReader reader(&m_palAllocator, pPipelineBinary); + Util::Abi::PipelineAbiReader reader(&m_palAllocator, + Util::Span{ pPipelineBinary, pipelineBinarySize}); reader.GetGfxIpVersion(&gfxIpMajor, &gfxIpMinor, &gfxIpStepping); if (gfxIpMajor == m_gfxIp.major && @@ -516,7 +502,6 @@ Util::Result PipelineBinaryCache::StoreReinjectionBinary( return result; } -#endif // ===================================================================================================================== // Free memory allocated by our allocator void PipelineBinaryCache::FreePipelineBinary( @@ -529,12 +514,10 @@ void PipelineBinaryCache::FreePipelineBinary( // Destroy PipelineBinaryCache itself void PipelineBinaryCache::Destroy() { -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevMode != nullptr) { m_pDevMode->DeregisterPipelineCache(this); } -#endif VkAllocationCallbacks* pAllocationCallbacks = m_pAllocationCallbacks; void* pMem = this; @@ -571,7 +554,6 @@ VkResult PipelineBinaryCache::Initialize( result = OrderLayers(settings); } -#if ICD_GPUOPEN_DEVMODE_BUILD if ((result == VK_SUCCESS) && (m_pReinjectionLayer != nullptr)) { @@ -593,7 +575,6 @@ VkResult PipelineBinaryCache::Initialize( PAL_ASSERT_ALWAYS(); } } -#endif if (result == VK_SUCCESS) { @@ -604,7 +585,6 @@ VkResult PipelineBinaryCache::Initialize( return result; } -#if ICD_GPUOPEN_DEVMODE_BUILD // ===================================================================================================================== // Initialize reinjection cache layer VkResult PipelineBinaryCache::InitReinjectionLayer( @@ -805,7 +785,6 @@ Util::Result PipelineBinaryCache::InjectBinariesFromDirectory( return result; } -#endif // ===================================================================================================================== // Initialize memory layer @@ -825,7 +804,7 @@ VkResult PipelineBinaryCache::InitMemoryCacheLayer( // Reason: CTS generates a large number of cache applications and cause insufficient memory in 32-bit system. // Purpose: To limit the maximun value of MemorySize in 32-bit system. -#ifdef ICD_X86_BUILD +#ifdef VKI_X86_BUILD createInfo.maxMemorySize = 192_MiB; #else createInfo.maxMemorySize = 4_GiB; @@ -929,14 +908,7 @@ Util::IArchiveFile* PipelineBinaryCache::OpenReadOnlyArchive( { Util::Result openResult = Util::OpenArchiveFile(&info, pMem, &pFile); - if (openResult == Util::Result::Success) - { - if (info.useBufferedReadMemory) - { - pFile->Preload(0, info.maxReadBufferMem); - } - } - else + if (openResult != Util::Result::Success) { FreeMem(pMem); pFile = nullptr; @@ -992,14 +964,7 @@ Util::IArchiveFile* PipelineBinaryCache::OpenWritableArchive( } } - if (openResult == Util::Result::Success) - { - if (info.useBufferedReadMemory) - { - pFile->Preload(0, info.maxReadBufferMem); - } - } - else + if (openResult != Util::Result::Success) { FreeMem(pMem); pFile = nullptr; @@ -1275,11 +1240,7 @@ VkResult PipelineBinaryCache::InitLayers( bool createArchiveLayers, const RuntimeSettings& settings) { -#if ICD_GPUOPEN_DEVMODE_BUILD bool injectionLayerOnline = (InitReinjectionLayer(settings) >= VK_SUCCESS); -#else - bool injectionLayerOnline = false; -#endif bool memoryLayerOnline = (InitMemoryCacheLayer(settings) >= VK_SUCCESS); diff --git a/icd/api/pipeline_compiler.cpp b/icd/api/pipeline_compiler.cpp index 168832b3..69fbc4d2 100644 --- a/icd/api/pipeline_compiler.cpp +++ b/icd/api/pipeline_compiler.cpp @@ -206,7 +206,7 @@ static bool SupportInternalModuleCache( supportInternalModuleCache = false; } -#if ICD_X86_BUILD +#if VKI_X86_BUILD supportInternalModuleCache = false; #endif @@ -363,9 +363,7 @@ VkResult PipelineCompiler::Initialize() m_gfxIp, settings, m_pPhysicalDevice->PalDevice()->GetCacheFilePath(), -#if ICD_GPUOPEN_DEVMODE_BUILD m_pPhysicalDevice->VkInstance()->GetDevModeMgr(), -#endif 0, 0, nullptr, @@ -890,7 +888,8 @@ void PipelineCompiler::ReplacePipelineIsaCode( return; } - Util::Abi::PipelineAbiReader abiReader(pDevice->VkInstance()->Allocator(), pipelineBinary.pCode); + Util::Abi::PipelineAbiReader abiReader(pDevice->VkInstance()->Allocator(), + Util::Span{ pipelineBinary.pCode, pipelineBinary.codeSize}); Pal::Result palResult = abiReader.Init(); if (palResult != Pal::Result::Success) { @@ -919,7 +918,7 @@ void PipelineCompiler::ReplacePipelineIsaCode( }; for (const auto& simbolTypeEntry : stageSymbolTypes) { - const Util::Elf::SymbolTableEntry* pEntry = abiReader.GetPipelineSymbol(simbolTypeEntry); + const Util::Elf::SymbolTableEntry* pEntry = abiReader.GetSymbolHeader(simbolTypeEntry); if (pEntry != nullptr) { shaderStageSymbols.push_back(pEntry); @@ -1772,6 +1771,10 @@ static void CopyPipelineShadersInfo( if ((shaderMask & (1 << stage)) != 0) { *pShaderInfosDst[stage] = *pShaderInfosSrc[stage]; + if (libInfo.flags & VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT) + { + pShaderInfosDst[stage]->options.viewIndexFromDeviceIndex = true; + } } } @@ -1837,6 +1840,7 @@ static void MergePipelineOptions( pDst->pageMigrationEnabled |= src.pageMigrationEnabled; pDst->optimizationLevel |= src.optimizationLevel; pDst->glState.disableTruncCoordForGather |= src.glState.disableTruncCoordForGather; + pDst->optimizePointSizeWrite |= src.optimizePointSizeWrite; pDst->shadowDescriptorTableUsage = src.shadowDescriptorTableUsage; pDst->shadowDescriptorTablePtrHigh = src.shadowDescriptorTablePtrHigh; pDst->overrideThreadGroupSizeX = src.overrideThreadGroupSizeX; @@ -1899,10 +1903,7 @@ static void CopyFragmentOutputInterfaceState( { const GraphicsPipelineBinaryCreateInfo& libInfo = pLibrary->GetPipelineBinaryCreateInfo(); - for (uint32_t i = 0; i < Vkgc::MaxColorTargets; ++i) - { - pCreateInfo->pipelineInfo.cbState.target[i] = libInfo.pipelineInfo.cbState.target[i]; - } + pCreateInfo->pipelineInfo.cbState = libInfo.pipelineInfo.cbState; pCreateInfo->pipelineInfo.rsState.perSampleShading = libInfo.pipelineInfo.rsState.perSampleShading; pCreateInfo->pipelineInfo.rsState.dynamicSampleInfo = libInfo.pipelineInfo.rsState.dynamicSampleInfo; @@ -1911,9 +1912,6 @@ static void CopyFragmentOutputInterfaceState( pCreateInfo->pipelineInfo.rsState.pixelShaderSamples = libInfo.pipelineInfo.rsState.pixelShaderSamples; pCreateInfo->dbFormat = libInfo.dbFormat; - pCreateInfo->pipelineInfo.cbState.alphaToCoverageEnable = libInfo.pipelineInfo.cbState.alphaToCoverageEnable; - pCreateInfo->pipelineInfo.cbState.dualSourceBlendEnable = libInfo.pipelineInfo.cbState.dualSourceBlendEnable; - pCreateInfo->pipelineInfo.cbState.dualSourceBlendDynamic = libInfo.pipelineInfo.cbState.dualSourceBlendDynamic; pCreateInfo->pipelineInfo.iaState.enableMultiView = libInfo.pipelineInfo.iaState.enableMultiView; pCreateInfo->cbStateHash = libInfo.cbStateHash; } @@ -3319,6 +3317,7 @@ void PipelineCompiler::ApplyPipelineOptions( pOptions->enableRelocatableShaderElf = settings.enableRelocatableShaders; pOptions->disableImageResourceCheck = settings.disableImageResourceTypeCheck; pOptions->optimizeTessFactor = settings.optimizeTessFactor != OptimizeTessFactorDisable; + pOptions->optimizePointSizeWrite = true; pOptions->forceCsThreadIdSwizzling = settings.forceCsThreadIdSwizzling; pOptions->overrideThreadGroupSizeX = settings.overrideThreadGroupSizeX; pOptions->overrideThreadGroupSizeY = settings.overrideThreadGroupSizeY; @@ -4444,6 +4443,7 @@ void PipelineCompiler::SetRayTracingState( pRtState->enableRayQueryCsSwizzle = settings.rtEnableRayQueryCsSwizzle; pRtState->enableDispatchRaysInnerSwizzle = settings.rtEnableDispatchRaysInnerSwizzle; pRtState->enableDispatchRaysOuterSwizzle = settings.rtEnableDispatchRaysOuterSwizzle; + pRtState->forceInvalidAccelStruct = settings.forceInvalidAccelStruct; pRtState->ldsStackSize = settings.ldsStackSize; pRtState->enableOptimalLdsStackSizeForIndirect = settings.enableOptimalLdsStackSizeForIndirect; pRtState->enableOptimalLdsStackSizeForUnified = settings.enableOptimalLdsStackSizeForUnified; @@ -4788,7 +4788,6 @@ bool PipelineCompiler::BuildRayTracingPipelineBinary( #endif // ===================================================================================================================== -#if ICD_GPUOPEN_DEVMODE_BUILD Util::Result PipelineCompiler::RegisterAndLoadReinjectionBinary( const Pal::PipelineHash* pInternalPipelineHash, const Util::MetroHash::Hash* pCacheId, @@ -4829,7 +4828,6 @@ Util::Result PipelineCompiler::RegisterAndLoadReinjectionBinary( return result; } -#endif // ===================================================================================================================== // Filter VkPipelineCreateFlags2KHR to only values used for pipeline caching @@ -5236,8 +5234,25 @@ uint32_t PipelineCompiler::BuildUberFetchShaderInternalDataImp( case VK_FORMAT_R8G8_SINT: case VK_FORMAT_R8G8B8A8_UINT: case VK_FORMAT_R8G8B8A8_SNORM: + case VK_FORMAT_R16G16_UNORM: + case VK_FORMAT_R16G16_SNORM: + case VK_FORMAT_R16G16_USCALED: + case VK_FORMAT_R16G16_SSCALED: + case VK_FORMAT_R16G16_UINT: + case VK_FORMAT_R16G16_SINT: case VK_FORMAT_R16G16_SFLOAT: + case VK_FORMAT_R16G16B16_UNORM: + case VK_FORMAT_R16G16B16_SNORM: + case VK_FORMAT_R16G16B16_USCALED: + case VK_FORMAT_R16G16B16_SSCALED: + case VK_FORMAT_R16G16B16_UINT: + case VK_FORMAT_R16G16B16_SINT: + case VK_FORMAT_R16G16B16A16_UNORM: + case VK_FORMAT_R16G16B16A16_SNORM: case VK_FORMAT_R16G16B16A16_USCALED: + case VK_FORMAT_R16G16B16A16_SSCALED: + case VK_FORMAT_R16G16B16A16_UINT: + case VK_FORMAT_R16G16B16A16_SINT: stride = 1; break; default: diff --git a/icd/api/raytrace/ray_tracing_device.cpp b/icd/api/raytrace/ray_tracing_device.cpp index 06e85c1f..65d0cab0 100644 --- a/icd/api/raytrace/ray_tracing_device.cpp +++ b/icd/api/raytrace/ray_tracing_device.cpp @@ -38,10 +38,7 @@ #include "palArchiveFile.h" #include "gpurt/gpurtLib.h" #include "g_gpurtOptions.h" - -#if ICD_GPUOPEN_DEVMODE_BUILD #include "devmode/devmode_mgr.h" -#endif namespace vk { @@ -233,7 +230,6 @@ void RayTracingDevice::CreateGpuRtDeviceSettings( pDeviceSettings->enableParallelUpdate = settings.rtEnableUpdateParallel; pDeviceSettings->enableParallelBuild = settings.rtEnableBuildParallel; pDeviceSettings->parallelBuildWavesPerSimd = settings.buildParallelWavesPerSimd; - pDeviceSettings->enableAcquireReleaseInterface = settings.rtEnableAcquireReleaseInterface; pDeviceSettings->bvhCpuBuildModeFastTrace = static_cast(settings.rtBvhCpuBuildMode); pDeviceSettings->bvhCpuBuildModeDefault = static_cast(settings.rtBvhCpuBuildMode); pDeviceSettings->bvhCpuBuildModeFastBuild = static_cast(settings.rtBvhCpuBuildMode); @@ -257,6 +253,7 @@ void RayTracingDevice::CreateGpuRtDeviceSettings( pDeviceSettings->enableInsertBarriersInBuildAS = settings.enableInsertBarriersInBuildAs; pDeviceSettings->numMortonSizeBits = settings.numMortonSizeBits; pDeviceSettings->allowFp16BoxNodesInUpdatableBvh = settings.rtAllowFp16BoxNodesInUpdatableBvh; + pDeviceSettings->fp16BoxNodesRequireCompaction = settings.fp16BoxNodesRequireCompactionFlag; // Enable AS stats based on panel setting pDeviceSettings->enableBuildAccelStructStats = settings.rtEnableBuildAccelStructStats; @@ -308,6 +305,9 @@ void RayTracingDevice::CollectGpurtOptions( } *optionMap.FindKey(GpuRt::ThreadTraceEnabledOptionNameHash) = threadTraceEnabled; + *optionMap.FindKey(GpuRt::PersistentLaunchEnabledOptionNameHash) = + (settings.rtPersistentDispatchRaysFactor > 0.0f) ? 1 : 0; + pGpurtOptions->Clear(); for (auto it = optionMap.Begin(); it.Get() != nullptr; it.Next()) { @@ -574,6 +574,15 @@ Pal::Result RayTracingDevice::InitCmdContext( cmdBufInfo.queueType = Pal::QueueTypeUniversal; queueHandle = m_pDevice->GetQueue(cmdBufInfo.engineType, cmdBufInfo.queueType); + + if (queueHandle == VK_NULL_HANDLE) + { + // Could not find a universal queue, try transfer + cmdBufInfo.engineType = Pal::EngineTypeDma; + cmdBufInfo.queueType = Pal::QueueTypeDma; + + queueHandle = m_pDevice->GetQueue(cmdBufInfo.engineType, cmdBufInfo.queueType); + } } Pal::Result result = (queueHandle != VK_NULL_HANDLE) ? Pal::Result::Success : Pal::Result::ErrorUnknown; @@ -914,8 +923,12 @@ Pal::Result RayTracingDevice::ClientCreateInternalComputePipeline( pSubNode->type = Vkgc::ResourceMappingNodeType::DescriptorConstBuffer; break; case GpuRt::NodeType::SrvTable: + pSubNode->type = Vkgc::ResourceMappingNodeType::DescriptorResource; + pSubNode->srdRange.strideInDwords = untypedBufferSrdSizeDw; + break; case GpuRt::NodeType::TypedSrvTable: pSubNode->type = Vkgc::ResourceMappingNodeType::DescriptorResource; + pSubNode->srdRange.strideInDwords = typedBufferSrdSizeDw; break; default: VK_NEVER_CALLED(); @@ -952,15 +965,31 @@ Pal::Result RayTracingDevice::ClientCreateInternalComputePipeline( compileConstants.pConstants }; - bool forceWave64 = false; + constexpr uint32_t CompilerOptionWaveSize = Util::HashLiteralString("waveSize"); + constexpr uint32_t CompilerOptionValueWave32 = Util::HashLiteralString("Wave32"); + constexpr uint32_t CompilerOptionValueWave64 = Util::HashLiteralString("Wave64"); - // Overide wave size for these GpuRT shader types - if (((buildInfo.shaderType == GpuRt::InternalRayTracingCsType::BuildBVHTD) || - (buildInfo.shaderType == GpuRt::InternalRayTracingCsType::BuildBVHTDTR) || - (buildInfo.shaderType == GpuRt::InternalRayTracingCsType::BuildParallel) || - (buildInfo.shaderType == GpuRt::InternalRayTracingCsType::BuildQBVH))) + ShaderWaveSize waveSize = ShaderWaveSize::WaveSizeAuto; + + for (uint32_t i = 0; i < buildInfo.hashedCompilerOptionCount; ++i) { - forceWave64 = true; + const GpuRt::PipelineCompilerOption& compilerOption = buildInfo.pHashedCompilerOptions[i]; + + switch (compilerOption.hashedOptionName) + { + case CompilerOptionWaveSize: + if (compilerOption.value == CompilerOptionValueWave32) + { + waveSize = ShaderWaveSize::WaveSize32; + } + else if (compilerOption.value == CompilerOptionValueWave64) + { + waveSize = ShaderWaveSize::WaveSize64; + } + break; + default: + VK_ASSERT_ALWAYS_MSG("Unknown GPURT setting! Handle it!"); + } } result = pDevice->CreateInternalComputePipeline(spvBin.codeSize, @@ -968,7 +997,7 @@ Pal::Result RayTracingDevice::ClientCreateInternalComputePipeline( buildInfo.nodeCount, nodes, ShaderModuleInternalRayTracingShader, - forceWave64, + waveSize, &specializationInfo, &pDevice->GetInternalRayTracingPipeline()); diff --git a/icd/api/raytrace/vk_acceleration_structure.h b/icd/api/raytrace/vk_acceleration_structure.h index a245a7e6..7391efd7 100644 --- a/icd/api/raytrace/vk_acceleration_structure.h +++ b/icd/api/raytrace/vk_acceleration_structure.h @@ -39,7 +39,6 @@ class Buffer; class DeferredHostOperation; class Device; struct GeometryConvertHelper; -class VirtualStackFrame; // ===================================================================================================================== // VkAccelerationStructureKHR (VK_KHR_acceleration_structure) diff --git a/icd/api/raytrace/vk_ray_tracing_pipeline.cpp b/icd/api/raytrace/vk_ray_tracing_pipeline.cpp index e75aad8d..cdbb344a 100644 --- a/icd/api/raytrace/vk_ray_tracing_pipeline.cpp +++ b/icd/api/raytrace/vk_ray_tracing_pipeline.cpp @@ -1708,7 +1708,6 @@ VkResult RayTracingPipeline::CreateImpl( pAllocator); } } -#if ICD_GPUOPEN_DEVMODE_BUILD // Temporarily reinject post Pal pipeline creation (when the internal pipeline hash is available). // The reinjection cache layer can be linked back into the pipeline cache chain once the // Vulkan pipeline cache key can be stored (and read back) inside the ELF as metadata. @@ -1739,7 +1738,6 @@ VkResult RayTracingPipeline::CreateImpl( palResult = Util::Result::Success; } } -#endif } result = PalToVkResult(palResult); @@ -1985,7 +1983,6 @@ static int32_t DeferredCreateRayTracingPipelineCallback( pState->pAllocator, pOperation->Workload(index)); -#if ICD_GPUOPEN_DEVMODE_BUILD if (localResult == VK_SUCCESS) { IDevMode* pDevMode = pDevice->VkInstance()->GetDevModeMgr(); @@ -2000,7 +1997,6 @@ static int32_t DeferredCreateRayTracingPipelineCallback( } } } -#endif } if (localResult != VK_SUCCESS) @@ -2244,6 +2240,7 @@ VkResult RayTracingPipeline::GetPipelineExecutableProperties( // ===================================================================================================================== VkResult RayTracingPipeline::GetRayTracingShaderDisassembly( Util::Abi::PipelineSymbolType pipelineSymbolType, + size_t binarySize, const void* pBinaryCode, size_t* pBufferSize, void* pBuffer @@ -2251,7 +2248,8 @@ VkResult RayTracingPipeline::GetRayTracingShaderDisassembly( { // To extract the shader code, we can re-parse the saved ELF binary and lookup the shader's program // instructions by examining the symbol table entry for that shader's entrypoint. - Util::Abi::PipelineAbiReader abiReader(m_pDevice->VkInstance()->Allocator(), pBinaryCode); + Util::Abi::PipelineAbiReader abiReader(m_pDevice->VkInstance()->Allocator(), + Util::Span{pBinaryCode, binarySize}); VkResult result = VK_SUCCESS; Pal::Result palResult = abiReader.Init(); @@ -2264,32 +2262,34 @@ VkResult RayTracingPipeline::GetRayTracingShaderDisassembly( VK_ASSERT((pipelineSymbolType == Util::Abi::PipelineSymbolType::ShaderDisassembly) || (pipelineSymbolType == Util::Abi::PipelineSymbolType::ShaderAmdIl)); - const Util::Elf::SymbolTableEntry* pSymbolEntry = nullptr; const char* pSectionName = nullptr; if (pipelineSymbolType == Util::Abi::PipelineSymbolType::ShaderDisassembly) { - pSymbolEntry = abiReader.GetPipelineSymbol( - Util::Abi::GetSymbolForStage( - Util::Abi::PipelineSymbolType::ShaderDisassembly, - Util::Abi::HardwareStage::Cs)); + palResult = abiReader.CopySymbol( + Util::Abi::GetSymbolForStage( + Util::Abi::PipelineSymbolType::ShaderDisassembly, + Util::Abi::HardwareStage::Cs), + pBufferSize, + pBuffer); + pSectionName = Util::Abi::AmdGpuDisassemblyName; + symbolValid = palResult == Util::Result::Success; } else if (pipelineSymbolType == Util::Abi::PipelineSymbolType::ShaderAmdIl) { - pSymbolEntry = abiReader.GetPipelineSymbol( - Util::Abi::GetSymbolForStage( - Util::Abi::PipelineSymbolType::ShaderAmdIl, - Util::Abi::ApiShaderType::Cs)); + palResult = abiReader.CopySymbol( + Util::Abi::GetSymbolForStage( + Util::Abi::PipelineSymbolType::ShaderAmdIl, + Util::Abi::ApiShaderType::Cs), + pBufferSize, + pBuffer); + pSectionName = Util::Abi::AmdGpuCommentLlvmIrName; + symbolValid = palResult == Util::Result::Success; } - if (pSymbolEntry != nullptr) - { - palResult = abiReader.GetElfReader().CopySymbol(*pSymbolEntry, pBufferSize, pBuffer); - symbolValid = palResult == Util::Result::Success; - } - else if (pSectionName != nullptr) + if ((symbolValid == false) && (pSectionName != nullptr)) { const auto& elfReader = abiReader.GetElfReader(); Util::ElfReader::SectionId disassemblySectionId = elfReader.FindSection(pSectionName); @@ -2406,6 +2406,7 @@ VkResult RayTracingPipeline::GetPipelineExecutableInternalRepresentations( // Get the text based ISA disassembly of the shader VkResult result = GetRayTracingShaderDisassembly( Util::Abi::PipelineSymbolType::ShaderDisassembly, + static_cast(binarySize), pBinaryCode, &(pInternalRepresentations[entry].dataSize), pInternalRepresentations[entry].pData); @@ -2608,9 +2609,7 @@ void RayTracingPipeline::BindToCmdBuffer( uint32_t* pCpuAddr = pPalCmdBuf->CmdAllocateEmbeddedData(dwordSize, 1, &gpuAddress); memcpy(pCpuAddr, m_captureReplayVaMappingBufferInfo.pData, m_captureReplayVaMappingBufferInfo.dataSize); - uint32_t rtCaptureReplayConstBufRegBase = (m_userDataLayout.scheme == PipelineLayoutScheme::Compact) ? - m_userDataLayout.compact.rtCaptureReplayConstBufRegBase : - m_userDataLayout.indirect.rtCaptureReplayConstBufRegBase; + const uint32_t rtCaptureReplayConstBufRegBase = m_userDataLayout.common.rtCaptureReplayConstBufRegBase; pPalCmdBuf->CmdSetUserData(Pal::PipelineBindPoint::Compute, rtCaptureReplayConstBufRegBase, @@ -2802,6 +2801,29 @@ uint32_t RayTracingPipeline::UpdateShaderGroupIndex( return (shader == VK_SHADER_UNUSED_KHR) ? VK_SHADER_UNUSED_KHR : idx; } +// ===================================================================================================================== +uint32_t RayTracingPipeline::PersistentDispatchSize( + uint32_t width, + uint32_t height, + uint32_t depth + ) const +{ + const Pal::DispatchDims dispatchSize = GetDispatchSize({ .x = width, .y = height, .z = depth }); + + // Groups needed to cover the x, y, and z dimension of a persistent dispatch + // For large dispatches, this will be limited by the size of the GPU because we want just enough groups to fill it + // For small dispatches, there will be even fewer groups; don't launch groups that will have nothing to do + const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); + const Pal::DeviceProperties& deviceProp = m_pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties(); + const auto& props = deviceProp.gfxipProperties.shaderCore; + const uint32 rayDispatchMaxGroups = settings.rtPersistentDispatchRaysFactor * + (props.numAvailableCus * props.numSimdsPerCu * props.numWavefrontsPerSimd); + const uint32 persistentDispatchSize = Util::Min(rayDispatchMaxGroups, + (dispatchSize.x * dispatchSize.y * dispatchSize.z)); + + return persistentDispatchSize; +} + // ===================================================================================================================== Pal::DispatchDims RayTracingPipeline::GetDispatchSize( Pal::DispatchDims size) const diff --git a/icd/api/raytrace/vk_ray_tracing_pipeline.h b/icd/api/raytrace/vk_ray_tracing_pipeline.h index 2cf2d6ed..d7148fab 100644 --- a/icd/api/raytrace/vk_ray_tracing_pipeline.h +++ b/icd/api/raytrace/vk_ray_tracing_pipeline.h @@ -296,11 +296,17 @@ class RayTracingPipeline final : public Pipeline, public NonDispatchableVkInstance()->Allocator()), m_userMarkerOpHistory(pCmdBuf->VkInstance()->Allocator()), m_userMarkerStrings(pCmdBuf->VkInstance()->Allocator()) @@ -325,12 +323,10 @@ void SqttCmdBufferState::Begin( m_userMarkerOpHistory.Clear(); m_userMarkerStrings.Clear(); -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevMode != nullptr) { m_instructionTrace.targetHash = m_pDevMode->GetInstructionTraceTargetHash(); } -#endif m_cbId = m_pSqttMgr->GetNextCmdBufID(m_pCmdBuf->GetQueueFamilyIndex(), pBeginInfo); @@ -349,7 +345,6 @@ void SqttCmdBufferState::Begin( // Inserts a CbEnd marker when command buffer building has finished. void SqttCmdBufferState::End() { -#if ICD_GPUOPEN_DEVMODE_BUILD // If instruction tracing was enabled for this Command List, // insert a barrier used to wait for all trace data to finish writing. if (m_instructionTrace.started && m_settings.rgpInstTraceBarrierEnabled) @@ -378,18 +373,15 @@ void SqttCmdBufferState::End() m_pCmdBuf->PalCmdBuffer(DefaultDeviceIndex)->CmdBarrier(barrierInfo); } -#endif WriteCbEndMarker(); -#if ICD_GPUOPEN_DEVMODE_BUILD if ((m_pDevMode != nullptr) && (m_instructionTrace.started)) { m_pDevMode->StopInstructionTrace(m_pCmdBuf); m_instructionTrace.started = false; } -#endif } // ===================================================================================================================== @@ -415,10 +407,25 @@ void SqttCmdBufferState::WriteMarker( ) const { VK_ASSERT(m_enabledMarkers != 0); + + WriteMarker(m_pCmdBuf->PalCmdBuffer(DefaultDeviceIndex), + pData, + dataSize, + subQueueFlags); +} + +// ===================================================================================================================== +void SqttCmdBufferState::WriteMarker( + Pal::ICmdBuffer* pPalCmdBuffer, + const void* pData, + size_t dataSize, + Pal::RgpMarkerSubQueueFlags subQueueFlags + ) +{ VK_ASSERT((dataSize % sizeof(uint32_t)) == 0); VK_ASSERT((dataSize / sizeof(uint32_t)) > 0); - m_pCmdBuf->PalCmdBuffer(DefaultDeviceIndex)->CmdInsertRgpTraceMarker( + pPalCmdBuffer->CmdInsertRgpTraceMarker( subQueueFlags, static_cast(dataSize / sizeof(uint32_t)), pData); @@ -554,6 +561,28 @@ void SqttCmdBufferState::WriteUserEventMarker( } } +// ===================================================================================================================== +// Insert an event marker for a PAL internal event such as a dispatch initiated from PAL. +void SqttCmdBufferState::WritePalInternalEventMarker( + Pal::ICmdBuffer* pPalCmdBuffer, + Pal::DispatchInfoFlags infoFlags, + Pal::RgpMarkerSubQueueFlags subQueueFlags) +{ + RgpSqttMarkerEventType apiType = RgpSqttMarkerEventType::CmdUnknown; + + if (infoFlags.devDriverOverlay) + { + apiType = RgpSqttMarkerEventType::CmdDispatchDevDriverOverlay; + } + + RgpSqttMarkerEvent marker = {}; + + marker.identifier = RgpSqttMarkerIdentifierEvent; + marker.apiType = static_cast(apiType); + + WriteMarker(pPalCmdBuffer, &marker, sizeof(marker), subQueueFlags); +} + // ==================================================================================================================== void SqttCmdBufferState::RgdAnnotateCmdBuf() { @@ -1055,7 +1084,6 @@ void SqttCmdBufferState::PipelineBound( { const Pipeline* pPipeline = Pipeline::BaseObjectFromHandle(pipeline); -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevMode != nullptr) { if ((m_instructionTrace.started == false) && @@ -1066,7 +1094,6 @@ void SqttCmdBufferState::PipelineBound( m_instructionTrace.started = true; } } -#endif } } @@ -1143,27 +1170,33 @@ void SqttCmdBufferState::DebugMarkerInsert( void SqttCmdBufferState::DebugLabelBegin( const VkDebugUtilsLabelEXT* pMarkerInfo) { - DevUserMarkerString userMarkerString = {}; - userMarkerString.length = static_cast(strlen(pMarkerInfo->pLabelName)) + 1; - Util::Strncpy(userMarkerString.string, pMarkerInfo->pLabelName, sizeof(userMarkerString.string)); - m_userMarkerStrings.PushBack(userMarkerString); + if (m_pDevMode->IsCrashAnalysisEnabled() == false) + { + DevUserMarkerString userMarkerString = {}; + userMarkerString.length = static_cast(strlen(pMarkerInfo->pLabelName)) + 1; + Util::Strncpy(userMarkerString.string, pMarkerInfo->pLabelName, sizeof(userMarkerString.string)); + m_userMarkerStrings.PushBack(userMarkerString); - Pal::Developer::UserMarkerOpInfo opInfo = {}; - opInfo.opType = static_cast(Pal::Developer::UserMarkerOpType::Push); - opInfo.strIndex = static_cast(m_userMarkerStrings.size()); - m_userMarkerOpHistory.PushBack(opInfo.u32All); + Pal::Developer::UserMarkerOpInfo opInfo = {}; + opInfo.opType = static_cast(Pal::Developer::UserMarkerOpType::Push); + opInfo.strIndex = static_cast(m_userMarkerStrings.size()); + m_userMarkerOpHistory.PushBack(opInfo.u32All); - WriteUserEventMarker(RgpSqttMarkerUserEventPush, pMarkerInfo->pLabelName); + WriteUserEventMarker(RgpSqttMarkerUserEventPush, pMarkerInfo->pLabelName); + } } // ===================================================================================================================== void SqttCmdBufferState::DebugLabelEnd() { - Pal::Developer::UserMarkerOpInfo opInfo = {}; - opInfo.opType = static_cast(Pal::Developer::UserMarkerOpType::Pop); - m_userMarkerOpHistory.PushBack(opInfo.u32All); + if (m_pDevMode->IsCrashAnalysisEnabled() == false) + { + Pal::Developer::UserMarkerOpInfo opInfo = {}; + opInfo.opType = static_cast(Pal::Developer::UserMarkerOpType::Pop); + m_userMarkerOpHistory.PushBack(opInfo.u32All); - WriteUserEventMarker(RgpSqttMarkerUserEventPop, nullptr); + WriteUserEventMarker(RgpSqttMarkerUserEventPop, nullptr); + } } // ===================================================================================================================== @@ -2347,9 +2380,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateGraphicsPipelines( } } -#if ICD_GPUOPEN_DEVMODE_BUILD pDevMode->PipelineCreated(pDevice, pPipeline); -#endif } } } @@ -2395,9 +2426,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateComputePipelines( pCreateInfos[i].stage.module; } -#if ICD_GPUOPEN_DEVMODE_BUILD pDevMode->PipelineCreated(pDevice, pPipeline); -#endif } } } @@ -2449,7 +2478,6 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateRayTracingPipelinesKHR( } } -#if ICD_GPUOPEN_DEVMODE_BUILD if (result != VK_OPERATION_DEFERRED_KHR) { pDevMode->PipelineCreated(pDevice, pPipeline); @@ -2459,7 +2487,6 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateRayTracingPipelinesKHR( pDevMode->ShaderLibrariesCreated(pDevice, pPipeline); } } -#endif } } } @@ -2621,7 +2648,6 @@ VKAPI_ATTR void VKAPI_CALL vkDestroyPipeline( SqttMgr* pSqtt = pDevice->GetSqttMgr(); IDevMode* pDevMode = pDevice->VkInstance()->GetDevModeMgr(); -#if ICD_GPUOPEN_DEVMODE_BUILD if (pDevice->GetRuntimeSettings().devModeShaderIsaDbEnable && (pDevMode != nullptr)) { if (VK_NULL_HANDLE != pipeline) @@ -2643,7 +2669,6 @@ VKAPI_ATTR void VKAPI_CALL vkDestroyPipeline( #endif } } -#endif return SQTT_CALL_NEXT_LAYER(vkDestroyPipeline)(device, pipeline, pAllocator); } @@ -2799,7 +2824,6 @@ VKAPI_ATTR VkResult VKAPI_CALL vkSetDebugUtilsObjectTagEXT( return SQTT_CALL_NEXT_LAYER(vkSetDebugUtilsObjectTagEXT)(device, pTagInfo); } -#if ICD_GPUOPEN_DEVMODE_BUILD // ===================================================================================================================== // This function looks for specific tags in a submit's command buffers to identify when to force an RGP trace start // rather than during it during vkQueuePresent(). This is done for applications that explicitly do not make present @@ -2871,7 +2895,6 @@ static void CheckRGPFrameEnd( } } } -#endif // ===================================================================================================================== VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( @@ -2884,11 +2907,9 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( SqttMgr* pSqtt = pQueue->VkDevice()->GetSqttMgr(); IDevMode* pDevMode = pQueue->VkDevice()->VkInstance()->GetDevModeMgr(); -#if ICD_GPUOPEN_DEVMODE_BUILD pDevMode->NotifyPreSubmit(); CheckRGPFrameBegin(pQueue, pDevMode, submitCount, pSubmits); -#endif if (pDevMode->IsTraceRunning()) { @@ -2935,9 +2956,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( VkResult result = SQTT_CALL_NEXT_LAYER(vkQueueSubmit)(queue, submitCount, pSubmits, fence); -#if ICD_GPUOPEN_DEVMODE_BUILD CheckRGPFrameEnd(pQueue, pDevMode, submitCount, pSubmits); -#endif return result; } diff --git a/icd/api/sqtt/sqtt_layer.h b/icd/api/sqtt/sqtt_layer.h index e105e193..0378ab5a 100644 --- a/icd/api/sqtt/sqtt_layer.h +++ b/icd/api/sqtt/sqtt_layer.h @@ -187,6 +187,11 @@ class SqttCmdBufferState void WriteUserEventMarker(RgpSqttMarkerUserEventType eventType, const char* pString) const; + static void WritePalInternalEventMarker( + Pal::ICmdBuffer* pPalCmdBuffer, + Pal::DispatchInfoFlags infoFlags, + Pal::RgpMarkerSubQueueFlags subQueueFlags); + void AddDebugTag(uint64_t tag); bool HasDebugTag(uint64_t tag) const; @@ -200,6 +205,11 @@ class SqttCmdBufferState void WriteCbEndMarker() const; void WritePipelineBindMarker(const Pal::Developer::BindPipelineData& data) const; void WriteMarker(const void* pData, size_t dataSize, Pal::RgpMarkerSubQueueFlags subQueueFlags) const; + static void WriteMarker( + Pal::ICmdBuffer* pPalCmdBuffer, + const void* pData, + size_t dataSize, + Pal::RgpMarkerSubQueueFlags subQueueFlags); void WriteBeginGeneralApiMarker(RgpSqttMarkerGeneralApiType apiType) const; void WriteEndGeneralApiMarker(RgpSqttMarkerGeneralApiType apiType) const; void WriteBarrierStartMarker(const Pal::Developer::BarrierData& data) const; @@ -233,14 +243,12 @@ class SqttCmdBufferState RgpSqttMarkerEventType m_currentEventType; // Current API type for pre-draw/dispatch event markers uint32_t m_enabledMarkers; -#if ICD_GPUOPEN_DEVMODE_BUILD struct { bool started; // True if a pipeline is currently being traced uint64_t targetHash; // Determines target pipeline used to trigger instruction tracing VkPipelineBindPoint bindPoint; // Bind point of the target pipeline } m_instructionTrace; -#endif RgpSqttMarkerUserEventWithString* m_pUserEvent; diff --git a/icd/api/sqtt/sqtt_mgr.cpp b/icd/api/sqtt/sqtt_mgr.cpp index 02005042..cb6b8e29 100644 --- a/icd/api/sqtt/sqtt_mgr.cpp +++ b/icd/api/sqtt/sqtt_mgr.cpp @@ -199,6 +199,17 @@ void SqttMgr::PalDeveloperCallback( pSqtt->PalDrawDispatchCallback(drawDispatch); } } + else if (drawDispatch.dispatch.infoFlags.u32All != 0) + { + // Handle a dispatch initiated by PAL. + // For this this dispatch we have a command buffer created by PAL + // so we depend on the PAL-provided context and information + // to instrument the dispatch. + SqttCmdBufferState::WritePalInternalEventMarker( + drawDispatch.pCmdBuffer, + drawDispatch.dispatch.infoFlags, + drawDispatch.subQueueFlags); + } } break; diff --git a/icd/api/sqtt/sqtt_rgp_annotations.h b/icd/api/sqtt/sqtt_rgp_annotations.h index 0c3cf3a2..e2d3083f 100644 --- a/icd/api/sqtt/sqtt_rgp_annotations.h +++ b/icd/api/sqtt/sqtt_rgp_annotations.h @@ -219,6 +219,7 @@ enum class RgpSqttMarkerEventType : uint32_t CmdDrawMeshTasksIndirectEXT = 43, // vkCmdDrawMeshTasksIndirectEXT CmdDrawIndirectCount = 44, // vkCmdDrawIndirectCount CmdDrawIndexedIndirectCount = 45, // vkCmdDrawIndexedIndirectCount + CmdDispatchDevDriverOverlay = 46, // DevDriverOverlay dispatch #if VKI_RAY_TRACING ShaderIndirectModeMask = 0x800000, // Used to mark whether the shader is compiled in indirect mode or not // This mask can only be used with CmdTraceRaysKHR and CmdTraceRaysIndirectKHR diff --git a/icd/api/strings/extensions.txt b/icd/api/strings/extensions.txt index 50f32292..21745df1 100644 --- a/icd/api/strings/extensions.txt +++ b/icd/api/strings/extensions.txt @@ -229,3 +229,4 @@ VK_EXT_depth_bias_control VK_MESA_image_alignment_control VK_EXT_pipeline_protected_access VK_EXT_pipeline_robustness +VK_EXT_shader_replicated_composites diff --git a/icd/api/strings/strings.h b/icd/api/strings/strings.h index 230de578..fd3bfb0d 100644 --- a/icd/api/strings/strings.h +++ b/icd/api/strings/strings.h @@ -42,6 +42,7 @@ namespace vk namespace strings { + namespace entry { #include "strings/g_entry_points_decl.h" diff --git a/icd/api/vk_alloccb.cpp b/icd/api/vk_alloccb.cpp index c652b750..4225a3ff 100644 --- a/icd/api/vk_alloccb.cpp +++ b/icd/api/vk_alloccb.cpp @@ -189,7 +189,7 @@ void PAL_STDCALL PalFreeFuncDelegator( PalAllocator::PalAllocator( VkAllocationCallbacks* pCallbacks) : -#if PAL_MEMTRACK +#if VKI_MEMTRACK m_memTrackerAlloc(pCallbacks), m_memTracker(&m_memTrackerAlloc), #endif @@ -200,7 +200,7 @@ PalAllocator::PalAllocator( // ===================================================================================================================== void PalAllocator::Init() { -#if PAL_MEMTRACK +#if VKI_MEMTRACK m_memTracker.Init(); #endif } @@ -211,7 +211,7 @@ void* PalAllocator::Alloc( { void* pMem = nullptr; -#if PAL_MEMTRACK +#if VKI_MEMTRACK pMem = m_memTracker.Alloc(allocInfo); #else pMem = allocator::PalAllocFuncDelegator( @@ -234,7 +234,7 @@ void PalAllocator::Free( { if (freeInfo.pClientMem != nullptr) { -#if PAL_MEMTRACK +#if VKI_MEMTRACK m_memTracker.Free(freeInfo); #else allocator::PalFreeFuncDelegator(m_pCallbacks, freeInfo.pClientMem); @@ -242,7 +242,7 @@ void PalAllocator::Free( } } -#if PAL_MEMTRACK +#if VKI_MEMTRACK // ===================================================================================================================== void PalAllocator::MemTrackerAllocator::Free( const Util::FreeInfo& freeInfo) diff --git a/icd/api/vk_cmd_pool.cpp b/icd/api/vk_cmd_pool.cpp index 44643bcc..70c2af88 100644 --- a/icd/api/vk_cmd_pool.cpp +++ b/icd/api/vk_cmd_pool.cpp @@ -40,9 +40,7 @@ #include "palIntrusiveListImpl.h" #include "palVectorImpl.h" -#if ICD_GPUOPEN_DEVMODE_BUILD #include "devmode/devmode_mgr.h" -#endif namespace vk { diff --git a/icd/api/vk_cmdbuffer.cpp b/icd/api/vk_cmdbuffer.cpp index 4f223b9d..ea7b5db2 100644 --- a/icd/api/vk_cmdbuffer.cpp +++ b/icd/api/vk_cmdbuffer.cpp @@ -71,9 +71,7 @@ #include -#if ICD_GPUOPEN_DEVMODE_BUILD #include "devmode/devmode_mgr.h" -#endif namespace vk { @@ -358,6 +356,10 @@ Pal::Result CreateClearSubresRanges( { subresRange.startSubres.plane = 1; } + else if (clearInfo.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) + { + subresRange.startSubres.plane = 0; + } else { hasPlaneDepthAndStencil = (clearInfo.aspectMask == @@ -814,7 +816,6 @@ VkResult CmdBuffer::Initialize( VK_ASSERT(palSize == pPalDevice->GetCmdBufferSize(groupCreateInfo, &result)); VK_ASSERT(result == Pal::Result::Success); } - } if (result == Pal::Result::Success) @@ -1267,7 +1268,7 @@ void CmdBuffer::PalCmdDispatch( utils::IterateMask deviceGroup(m_curDeviceMask); do { - PalCmdBuffer(deviceGroup.Index())->CmdDispatch({ x, y, z }); + PalCmdBuffer(deviceGroup.Index())->CmdDispatch({ x, y, z }, {}); } while (deviceGroup.IterateNext()); } @@ -2453,7 +2454,6 @@ void CmdBuffer::ReleaseResources() m_pStackAllocator = nullptr; } - } // ===================================================================================================================== @@ -3081,70 +3081,60 @@ void CmdBuffer::BindVertexBuffers( { if (bindingCount > 0) { + VK_ASSERT((firstBinding + bindingCount) <= VK_ARRAY_SIZE(PerGpuRenderState::vbBindings)); DbgBarrierPreCmd(DbgBarrierBindIndexVertexBuffer); - constexpr uint32_t MaxLowBindings = VK_ARRAY_SIZE(PerGpuRenderState::vbBindings); - - const uint32_t lowBindingCount = - (firstBinding < MaxLowBindings) ? Util::Min(bindingCount, MaxLowBindings - firstBinding) : 0u; - utils::IterateMask deviceGroup(GetDeviceMask()); do { const uint32_t deviceIdx = deviceGroup.Index(); - if (lowBindingCount > 0) - { - Pal::BufferViewInfo* const pBinding = &PerGpuState(deviceIdx)->vbBindings[firstBinding]; + Pal::BufferViewInfo* const pBinding = &PerGpuState(deviceIdx)->vbBindings[firstBinding]; - BindVertexBuffersUpdateBindingRange( - deviceIdx, - pBinding, - pBinding + lowBindingCount, - 0, - pBuffers, - pOffsets, - pSizes, - pStrides); + BindVertexBuffersUpdateBindingRange( + deviceIdx, + pBinding, + pBinding + bindingCount, + 0, + pBuffers, + pOffsets, + pSizes, + pStrides); - if (m_flags.offsetMode) + if (m_flags.offsetMode) + { + Pal::VertexBufferView vertexViews[Pal::MaxVertexBuffers] = {}; + for (uint32_t idx = 0; idx < bindingCount; idx++) { - Pal::VertexBufferView vertexViews[Pal::MaxVertexBuffers] = {}; - for (uint32_t idx = 0; idx < lowBindingCount; idx++) - { - vertexViews[idx].gpuva = pBinding[idx].gpuAddr; - vertexViews[idx].sizeInBytes = pBinding[idx].range; - vertexViews[idx].strideInBytes = pBinding[idx].stride; - } - - const Pal::VertexBufferViews bufferViews = - { - .firstBuffer = firstBinding, - .bufferCount = lowBindingCount, - .offsetMode = true, - .pVertexBufferViews = vertexViews - }; - PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); + vertexViews[idx].gpuva = pBinding[idx].gpuAddr; + vertexViews[idx].sizeInBytes = pBinding[idx].range; + vertexViews[idx].strideInBytes = pBinding[idx].stride; } - else + + const Pal::VertexBufferViews bufferViews = { - const Pal::VertexBufferViews bufferViews = - { - .firstBuffer = firstBinding, - .bufferCount = lowBindingCount, - .offsetMode = false, - .pBufferViewInfos = pBinding - }; - PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); - } + .firstBuffer = firstBinding, + .bufferCount = bindingCount, + .offsetMode = true, + .pVertexBufferViews = vertexViews + }; + PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); + } + else + { + const Pal::VertexBufferViews bufferViews = + { + .firstBuffer = firstBinding, + .bufferCount = bindingCount, + .offsetMode = false, + .pBufferViewInfos = pBinding + }; + PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); } - } while (deviceGroup.IterateNext()); - m_vbWatermark = Util::Max( - m_vbWatermark, - Util::Min(firstBinding + bindingCount, MaxLowBindings)); + m_vbWatermark = Util::Max(m_vbWatermark, firstBinding + bindingCount); DbgBarrierPostCmd(DbgBarrierBindIndexVertexBuffer); } @@ -3822,7 +3812,8 @@ void CmdBuffer::ClearColorImage( VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRanges = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(*pRanges)), MaxPalColorAspectsPerMask); + const auto maxRanges = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::SubresRange)), + MaxPalColorAspectsPerMask); auto rangeBatch = Util::Min(rangeCount * MaxPalColorAspectsPerMask, maxRanges); // Allocate space to store image subresource ranges @@ -3890,7 +3881,8 @@ void CmdBuffer::ClearDepthStencilImage( VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRanges = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(*pRanges)), MaxPalDepthAspectsPerMask); + const auto maxRanges = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::SubresRange)), + MaxPalDepthAspectsPerMask); auto rangeBatch = Util::Min(rangeCount * MaxPalDepthAspectsPerMask, maxRanges); // Allocate space to store image subresource ranges (we need a separate region per PAL aspect) @@ -3992,7 +3984,7 @@ void CmdBuffer::ClearDynamicRenderingImages( // Note: Bound target clears are pipelined by the HW, so we do not have to insert any barriers VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRects = EstimateMaxObjectsOnVirtualStack(sizeof(*pRects)); + constexpr uint32 MinRects = 8; for (uint32_t idx = 0; idx < attachmentCount; ++idx) { @@ -4012,9 +4004,12 @@ void CmdBuffer::ClearDynamicRenderingImages( const Pal::SwizzledFormat palFormat = VkToPalFormat(attachment.attachmentFormat, m_pDevice->GetRuntimeSettings()); - Util::Vector clearBoxes{ &virtStackFrame }; - Util::Vector clearSubresRanges{ &virtStackFrame }; + Util::Vector clearBoxes { &virtStackFrame }; + Util::Vector clearSubresRanges{ &virtStackFrame }; + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::Box) + + sizeof(Pal::SubresRange)), + MinRects); auto rectBatch = Util::Min(rectCount, maxRects); const auto palResult1 = clearBoxes.Reserve(rectBatch); const auto palResult2 = clearSubresRanges.Reserve(rectBatch); @@ -4091,10 +4086,13 @@ void CmdBuffer::ClearDynamicRenderingImages( // Clear only if the referenced attachment index is active if (pDepthStencilView != nullptr) { - Util::Vector clearRects{ &virtStackFrame }; - Util::Vector clearSubresRanges{ &virtStackFrame }; + Util::Vector clearRects { &virtStackFrame }; + Util::Vector clearSubresRanges{ &virtStackFrame }; - auto rectBatch = Util::Min((rectCount * MaxPalDepthAspectsPerMask), maxRects); + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::Rect) + + sizeof(Pal::SubresRange)), + MinRects); + auto rectBatch = Util::Min((rectCount * MaxPalDepthAspectsPerMask), maxRects); const auto palResult1 = clearRects.Reserve(rectBatch); const auto palResult2 = clearSubresRanges.Reserve(rectBatch); @@ -4157,10 +4155,14 @@ void CmdBuffer::ClearDynamicRenderingBoundAttachments( // Note: Bound target clears are pipelined by the HW, so we do not have to insert any barriers VirtualStackFrame virtStackFrame(m_pStackAllocator); - Util::Vector clearRegions{ &virtStackFrame }; - Util::Vector colorTargets{ &virtStackFrame }; + constexpr uint32 MinRects = 8; + + Util::Vector clearRegions{ &virtStackFrame }; + Util::Vector colorTargets{ &virtStackFrame }; - const auto maxRects = EstimateMaxObjectsOnVirtualStack(sizeof(*pRects)); + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ClearBoundTargetRegion) + + sizeof(Pal::BoundColorTarget)), + MinRects); auto rectBatch = Util::Min(rectCount, maxRects); const auto palResult1 = clearRegions.Reserve(rectBatch); const auto palResult2 = colorTargets.Reserve(attachmentCount); @@ -4287,10 +4289,14 @@ void CmdBuffer::ClearBoundAttachments( const RenderPass* pRenderPass = m_allGpuState.pRenderPass; const uint32_t subpass = m_renderPassInstance.subpass; - Util::Vector clearRegions { &virtStackFrame }; - Util::Vector colorTargets { &virtStackFrame }; + constexpr uint32 MinRects = 8; - const auto maxRects = EstimateMaxObjectsOnVirtualStack(sizeof(*pRects)); + Util::Vector clearRegions { &virtStackFrame }; + Util::Vector colorTargets { &virtStackFrame }; + + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ClearBoundTargetRegion) + + sizeof(Pal::BoundColorTarget)), + MinRects); auto rectBatch = Util::Min(rectCount, maxRects); const auto palResult1 = clearRegions.Reserve(rectBatch); const auto palResult2 = colorTargets.Reserve(attachmentCount); @@ -4550,10 +4556,11 @@ void CmdBuffer::ClearImageAttachments( { VirtualStackFrame virtStackFrame(m_pStackAllocator); + constexpr uint32 MinRects = 8; + // Get the current renderpass and subpass const RenderPass* pRenderPass = m_allGpuState.pRenderPass; const uint32_t subpass = m_renderPassInstance.subpass; - const auto maxRects = EstimateMaxObjectsOnVirtualStack(sizeof(*pRects)); // Go through each of the clear attachment infos for (uint32_t idx = 0; idx < attachmentCount; ++idx) @@ -4581,9 +4588,12 @@ void CmdBuffer::ClearImageAttachments( // Get the layout that this color attachment is currently in within the render pass const Pal::ImageLayout targetLayout = RPGetAttachmentLayout(attachmentIdx, 0); - Util::Vector clearBoxes { &virtStackFrame }; - Util::Vector clearSubresRanges { &virtStackFrame }; + Util::Vector clearBoxes { &virtStackFrame }; + Util::Vector clearSubresRanges { &virtStackFrame }; + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::Box) + + sizeof(Pal::SubresRange)), + MinRects); auto rectBatch = Util::Min(rectCount, maxRects); const auto palResult1 = clearBoxes.Reserve(rectBatch); const auto palResult2 = clearSubresRanges.Reserve(rectBatch); @@ -4652,9 +4662,12 @@ void CmdBuffer::ClearImageAttachments( const Pal::ImageLayout depthLayout = RPGetAttachmentLayout(attachmentIdx, 0); const Pal::ImageLayout stencilLayout = RPGetAttachmentLayout(attachmentIdx, 1); - Util::Vector clearRects { &virtStackFrame }; - Util::Vector clearSubresRanges { &virtStackFrame }; + Util::Vector clearRects { &virtStackFrame }; + Util::Vector clearSubresRanges { &virtStackFrame }; + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::Rect) + + sizeof(Pal::SubresRange)), + MinRects); auto rectBatch = Util::Min(rectCount, maxRects); const auto palResult1 = clearRects.Reserve(rectBatch); const auto palResult2 = clearSubresRanges.Reserve(rectBatch); @@ -4716,7 +4729,8 @@ void CmdBuffer::ResolveImage( VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(*pRects)), MaxRangePerAttachment); + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ImageResolveRegion)), + MaxRangePerAttachment); auto rectBatch = Util::Min(rectCount * MaxRangePerAttachment, maxRects); // Allocate space to store image resolve regions (we need a separate region per PAL aspect) @@ -4807,17 +4821,17 @@ void CmdBuffer::SetEvent2( if (m_flags.useSplitReleaseAcquire) { - ExecuteAcquireRelease(1, - &event, - pDependencyInfo, - Release, - RgpBarrierExternalCmdWaitEvents); + ExecuteAcquireRelease2(1, + &event, + pDependencyInfo, + Release, + RgpBarrierExternalCmdWaitEvents); } else { PipelineStageFlags stageMask = 0; - for(uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++) + for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++) { stageMask |= pDependencyInfo->pMemoryBarriers[i].srcStageMask; } @@ -5809,47 +5823,87 @@ void CmdBuffer::WaitEvents( { DbgBarrierPreCmd(DbgBarrierPipelineBarrierWaitEvents); - VirtualStackFrame virtStackFrame(m_pStackAllocator); + if (m_flags.useSplitReleaseAcquire) + { + uint32_t eventRangeCount = 0; - // Allocate space to store signaled event pointers (automatically rewound on unscope) - const Pal::IGpuEvent** ppGpuEvents = virtStackFrame.AllocArray(NumDeviceEvents(eventCount)); + for (uint32_t i = 0; i < eventCount; i += eventRangeCount) + { + eventRangeCount = 1; - if (ppGpuEvents != nullptr) + bool usesToken = Event::ObjectFromHandle(pEvents[i])->IsUseToken(); + + for (uint32_t j = i + 1; j < eventCount; j++) + { + if (Event::ObjectFromHandle(pEvents[j])->IsUseToken() == usesToken) + { + eventRangeCount++; + } + else + { + break; + } + } + + ExecuteAcquireRelease(eventRangeCount, + pEvents + i, + srcStageMask, + dstStageMask, + memoryBarrierCount, + pMemoryBarriers, + bufferMemoryBarrierCount, + pBufferMemoryBarriers, + imageMemoryBarrierCount, + pImageMemoryBarriers, + Acquire, + RgpBarrierExternalCmdWaitEvents); + } + } + else { - const uint32_t multiDeviceStride = eventCount; + VirtualStackFrame virtStackFrame(m_pStackAllocator); - for (uint32_t i = 0; i < eventCount; ++i) + // Allocate space to store signaled event pointers (automatically rewound on unscope) + const Pal::IGpuEvent** ppGpuEvents = + virtStackFrame.AllocArray(NumDeviceEvents(eventCount)); + + if (ppGpuEvents != nullptr) { - const Event* pEvent = Event::ObjectFromHandle(pEvents[i]); + const uint32_t multiDeviceStride = eventCount; - InsertDeviceEvents(ppGpuEvents, pEvent, i, multiDeviceStride); - } + for (uint32_t i = 0; i < eventCount; ++i) + { + const Event* pEvent = Event::ObjectFromHandle(pEvents[i]); - Pal::BarrierInfo barrier = {}; + InsertDeviceEvents(ppGpuEvents, pEvent, i, multiDeviceStride); + } - // Tell PAL to wait at a specific point until the given set of GpuEvent objects is signaled. - // We intentionally ignore the source stage flags (srcStagemask) as they are irrelevant in the - // presence of event objects + Pal::BarrierInfo barrier = {}; - barrier.reason = RgpBarrierExternalCmdWaitEvents; - barrier.waitPoint = VkToPalWaitPipePoint(dstStageMask); - barrier.gpuEventWaitCount = eventCount; - barrier.ppGpuEvents = ppGpuEvents; + // Tell PAL to wait at a specific point until the given set of GpuEvent objects is signaled. + // We intentionally ignore the source stage flags (srcStagemask) as they are irrelevant in the + // presence of event objects - ExecuteBarriers(&virtStackFrame, - memoryBarrierCount, - pMemoryBarriers, - bufferMemoryBarrierCount, - pBufferMemoryBarriers, - imageMemoryBarrierCount, - pImageMemoryBarriers, - &barrier); + barrier.reason = RgpBarrierExternalCmdWaitEvents; + barrier.waitPoint = VkToPalWaitPipePoint(dstStageMask); + barrier.gpuEventWaitCount = eventCount; + barrier.ppGpuEvents = ppGpuEvents; - virtStackFrame.FreeArray(ppGpuEvents); - } - else - { - m_recordingResult = VK_ERROR_OUT_OF_HOST_MEMORY; + ExecuteBarriers(&virtStackFrame, + memoryBarrierCount, + pMemoryBarriers, + bufferMemoryBarrierCount, + pBufferMemoryBarriers, + imageMemoryBarrierCount, + pImageMemoryBarriers, + &barrier); + + virtStackFrame.FreeArray(ppGpuEvents); + } + else + { + m_recordingResult = VK_ERROR_OUT_OF_HOST_MEMORY; + } } DbgBarrierPostCmd(DbgBarrierPipelineBarrierWaitEvents); @@ -5877,40 +5931,25 @@ void CmdBuffer::WaitEvents2( { eventRangeCount = 1; - if (Event::ObjectFromHandle(pEvents[i])->IsUseToken()) + bool usesToken = Event::ObjectFromHandle(pEvents[i])->IsUseToken(); + + for (uint32_t j = i + 1; j < eventCount; j++) { - for (uint32_t j = i + 1; j < eventCount; j++) + if (Event::ObjectFromHandle(pEvents[j])->IsUseToken() == usesToken) { - if (Event::ObjectFromHandle(pEvents[j])->IsUseToken()) - { - eventRangeCount++; - } - else - { - break; - } + eventRangeCount++; } - } - else - { - for (uint32_t j = i + 1; j < eventCount; j++) + else { - if (Event::ObjectFromHandle(pEvents[j])->IsUseToken()) - { - break; - } - else - { - eventRangeCount++; - } + break; } } - ExecuteAcquireRelease(eventRangeCount, - pEvents + i, - pDependencyInfos + i, - Acquire, - RgpBarrierExternalCmdWaitEvents); + ExecuteAcquireRelease2(eventRangeCount, + pEvents + i, + pDependencyInfos + i, + Acquire, + RgpBarrierExternalCmdWaitEvents); } } else @@ -6058,12 +6097,106 @@ void CmdBuffer::WaitEventsSync2ToSync1( } // ===================================================================================================================== -// Based on Dependency Info, execute Acquire or Release according to the mode. -void CmdBuffer::ExecuteAcquireRelease( +// Helper function called from ExecuteAcquireRelease* to route barrier calls based on AcquireReleaseMode +void CmdBuffer::FlushAcquireReleaseBarriers( + Pal::AcquireReleaseInfo* pAcquireReleaseInfo, + uint32_t eventCount, + const VkEvent* pEvents, + Pal::MemBarrier* const pBufferBarriers, + const Buffer** const ppBuffers, + Pal::ImgBarrier* const pImageBarriers, + const Image** const ppImages, + VirtualStackFrame* pVirtStackFrame, + const AcquireReleaseMode acquireReleaseMode, + uint32_t deviceMask) +{ + if (acquireReleaseMode == Release) + { + pAcquireReleaseInfo->dstGlobalStageMask = 0; + pAcquireReleaseInfo->dstGlobalAccessMask = 0; + + // If memoryBarrierCount is 0, set srcStageMask to Pal::PipelineStageTopOfPipe. + if (pAcquireReleaseInfo->srcGlobalStageMask == 0) + { + pAcquireReleaseInfo->srcGlobalStageMask |= Pal::PipelineStageTopOfPipe; + } + + for (uint32 i = 0; i < pAcquireReleaseInfo->memoryBarrierCount; i++) + { + pBufferBarriers[i].dstStageMask = 0; + pBufferBarriers[i].dstAccessMask = 0; + } + + for (uint32 i = 0; i < pAcquireReleaseInfo->imageBarrierCount; i++) + { + pImageBarriers[i].dstStageMask = 0; + pImageBarriers[i].dstAccessMask = 0; + } + + // The only possibility we are here would be as a result of vkCmdSetEvent2 in which case eventCount must be 1 + VK_ASSERT(eventCount == 1); + + PalCmdRelease( + pAcquireReleaseInfo, + pEvents[0], + pBufferBarriers, + ppBuffers, + pImageBarriers, + ppImages, + deviceMask); + } + else if (acquireReleaseMode == Acquire) + { + pAcquireReleaseInfo->srcGlobalStageMask = 0; + pAcquireReleaseInfo->srcGlobalAccessMask = 0; + + for (uint32 i = 0; i < pAcquireReleaseInfo->memoryBarrierCount; i++) + { + pBufferBarriers[i].srcStageMask = 0; + pBufferBarriers[i].srcAccessMask = 0; + } + + for (uint32 i = 0; i < pAcquireReleaseInfo->imageBarrierCount; i++) + { + pImageBarriers[i].srcStageMask = 0; + pImageBarriers[i].srcAccessMask = 0; + } + + // The only possibility we are here would be as a result of vkCmdWaitEvents* in which case eventCount + // must be non-zero + VK_ASSERT(eventCount != 0); + + PalCmdAcquire( + pAcquireReleaseInfo, + eventCount, + pEvents, + pBufferBarriers, + ppBuffers, + pImageBarriers, + ppImages, + pVirtStackFrame, + deviceMask); + } + else + { + PalCmdReleaseThenAcquire( + pAcquireReleaseInfo, + pBufferBarriers, + ppBuffers, + pImageBarriers, + ppImages, + deviceMask); + } +} + +// ===================================================================================================================== +// Based on Dependency Info, execute Acquire or Release according to the mode. This funtion handles the +// VK_KHR_synchronization2 barrier API calls +void CmdBuffer::ExecuteAcquireRelease2( uint32_t dependencyCount, const VkEvent* pEvents, const VkDependencyInfoKHR* pDependencyInfos, - AcquireReleaseMode acquireReleaseMode, + const AcquireReleaseMode acquireReleaseMode, uint32_t rgpBarrierReasonType) { VK_ASSERT((acquireReleaseMode == ReleaseThenAcquire) || (pEvents != nullptr)); @@ -6138,7 +6271,7 @@ void CmdBuffer::ExecuteAcquireRelease( uint32_t bufferMemoryBarrierCount = pThisDependencyInfo->bufferMemoryBarrierCount; uint32_t imageMemoryBarrierCount = pThisDependencyInfo->imageMemoryBarrierCount; - while ((memoryBarrierIdx < memBarrierCount) || + while ((memoryBarrierIdx < memBarrierCount) || (bufferMemoryBarrierIdx < bufferMemoryBarrierCount) || (imageMemoryBarrierIdx < imageMemoryBarrierCount)) { @@ -6213,8 +6346,8 @@ void CmdBuffer::ExecuteAcquireRelease( // Accounting for the max sub ranges, if we do not have enough space left for another image, // break from this loop. The info for remaining barriers will be passed to PAL in subsequent calls. while (((MaxPalAspectsPerMask + acquireReleaseInfo.imageBarrierCount) < maxImageBarrierCount) && - (locationIndex < maxLocationCount) && - (imageMemoryBarrierIdx < imageMemoryBarrierCount)) + (locationIndex < maxLocationCount) && + (imageMemoryBarrierIdx < imageMemoryBarrierCount)) { Pal::BarrierTransition tempTransition = {}; @@ -6317,7 +6450,7 @@ void CmdBuffer::ExecuteAcquireRelease( else if (pLocations != nullptr) // Could be null due to an OOM error { VK_ASSERT(static_cast(pSampleLocationsInfoEXT->sType) == - VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT); + VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT); VK_ASSERT(pImage->IsSampleLocationsCompatibleDepth()); ConvertToPalMsaaQuadSamplePattern(pSampleLocationsInfoEXT, &pLocations[locationIndex]); @@ -6340,75 +6473,17 @@ void CmdBuffer::ExecuteAcquireRelease( imageMemoryBarrierIdx++; } - if (acquireReleaseMode == Release) - { - acquireReleaseInfo.dstGlobalStageMask = 0; - acquireReleaseInfo.dstGlobalAccessMask = 0; - - // If memoryBarrierCount is 0, set srcStageMask to Pal::PipelineStageTopOfPipe. - if (acquireReleaseInfo.srcGlobalStageMask == 0) - { - acquireReleaseInfo.srcGlobalStageMask |= Pal::PipelineStageTopOfPipe; - } - - for (uint32 i = 0; i < acquireReleaseInfo.memoryBarrierCount; i++) - { - pPalBufferMemoryBarriers[i].dstStageMask = 0; - pPalBufferMemoryBarriers[i].dstAccessMask = 0; - } - - for (uint32 i = 0; i < acquireReleaseInfo.imageBarrierCount; i++) - { - pPalImageBarriers[i].dstStageMask = 0; - pPalImageBarriers[i].dstAccessMask = 0; - } - - PalCmdRelease( - &acquireReleaseInfo, - pEvents[j], - pPalBufferMemoryBarriers, - ppBuffers, - pPalImageBarriers, - ppImages, - m_curDeviceMask); - } - else if (acquireReleaseMode == Acquire) - { - acquireReleaseInfo.srcGlobalStageMask = 0; - acquireReleaseInfo.srcGlobalAccessMask = 0; - - for (uint32 i = 0; i < acquireReleaseInfo.memoryBarrierCount; i++) - { - pPalBufferMemoryBarriers[i].srcStageMask = 0; - pPalBufferMemoryBarriers[i].srcAccessMask = 0; - } - - for (uint32 i = 0; i < acquireReleaseInfo.imageBarrierCount; i++) - { - pPalImageBarriers[i].srcStageMask = 0; - pPalImageBarriers[i].srcAccessMask = 0; - } - - PalCmdAcquire( - &acquireReleaseInfo, - pEvents[j], - pPalBufferMemoryBarriers, - ppBuffers, - pPalImageBarriers, - ppImages, - &virtStackFrame, - m_curDeviceMask); - } - else - { - PalCmdReleaseThenAcquire( - &acquireReleaseInfo, - pPalBufferMemoryBarriers, - ppBuffers, - pPalImageBarriers, - ppImages, - m_curDeviceMask); - } + FlushAcquireReleaseBarriers( + &acquireReleaseInfo, + ((pEvents != nullptr) ? 1u : 0u), + ((pEvents != nullptr) ? &pEvents[j] : nullptr), + pPalBufferMemoryBarriers, + ppBuffers, + pPalImageBarriers, + ppImages, + &virtStackFrame, + acquireReleaseMode, + m_curDeviceMask); } } } @@ -6445,8 +6520,11 @@ void CmdBuffer::ExecuteAcquireRelease( } // ===================================================================================================================== -// Execute Release then acquire mode -void CmdBuffer::ExecuteReleaseThenAcquire( +// Records acquire-release barriers into PAL structures and passes them to PAL. This funtion handles the +// Synchronization_1 barrier API calls +void CmdBuffer::ExecuteAcquireRelease( + uint32_t eventCount, + const VkEvent* pEvents, PipelineStageFlags srcStageMask, PipelineStageFlags dstStageMask, uint32_t memBarrierCount, @@ -6454,9 +6532,13 @@ void CmdBuffer::ExecuteReleaseThenAcquire( uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier* pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) + const VkImageMemoryBarrier* pImageMemoryBarriers, + const AcquireReleaseMode acquireReleaseMode, + uint32_t rgpBarrierReasonType) { - if ((memBarrierCount + bufferMemoryBarrierCount + imageMemoryBarrierCount) > 0) + VK_ASSERT((acquireReleaseMode == ReleaseThenAcquire) || (pEvents != nullptr)); + + if ((memBarrierCount + bufferMemoryBarrierCount + imageMemoryBarrierCount + eventCount) > 0) { VirtualStackFrame virtStackFrame(m_pStackAllocator); @@ -6471,6 +6553,7 @@ void CmdBuffer::ExecuteReleaseThenAcquire( uint32_t bufferMemoryBarrierIdx = 0; uint32_t imageMemoryBarrierIdx = 0; + uint32_t gpuEventCount = eventCount; uint32_t maxLocationCount = Util::Min(imageMemoryBarrierCount, MaxSampleLocationCount); uint32_t maxBufferBarrierCount = Util::Min(bufferMemoryBarrierCount, MaxTransitionCount); uint32_t maxImageBarrierCount = Util::Min((MaxPalAspectsPerMask * imageMemoryBarrierCount) + 1, @@ -6504,15 +6587,16 @@ void CmdBuffer::ExecuteReleaseThenAcquire( if (bufferAllocSuccess && imageAllocSuccess) { - while ((memoryBarrierIdx < memBarrierCount) || + while ((memoryBarrierIdx < memBarrierCount) || (bufferMemoryBarrierIdx < bufferMemoryBarrierCount) || - (imageMemoryBarrierIdx < imageMemoryBarrierCount)) + (imageMemoryBarrierIdx < imageMemoryBarrierCount) || + (gpuEventCount > 0)) { Pal::AcquireReleaseInfo acquireReleaseInfo = {}; acquireReleaseInfo.pMemoryBarriers = pPalBufferMemoryBarriers; acquireReleaseInfo.pImageBarriers = pPalImageBarriers; - acquireReleaseInfo.reason = RgpBarrierExternalCmdPipelineBarrier; + acquireReleaseInfo.reason = rgpBarrierReasonType; uint32_t palSrcStageMask = VkToPalPipelineStageFlags(srcStageMask, true); uint32_t palDstStageMask = VkToPalPipelineStageFlags(dstStageMask, false); @@ -6694,13 +6778,19 @@ void CmdBuffer::ExecuteReleaseThenAcquire( imageMemoryBarrierIdx++; } - PalCmdReleaseThenAcquire( + FlushAcquireReleaseBarriers( &acquireReleaseInfo, + gpuEventCount, + pEvents, pPalBufferMemoryBarriers, ppBuffers, pPalImageBarriers, ppImages, + &virtStackFrame, + acquireReleaseMode, m_curDeviceMask); + + gpuEventCount = 0; } } else @@ -6749,16 +6839,30 @@ void CmdBuffer::PipelineBarrier( { DbgBarrierPreCmd(DbgBarrierPipelineBarrierWaitEvents); + const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); + + if (settings.syncPreviousDrawForTransferStage && + (srcStageMask == VK_PIPELINE_STAGE_TRANSFER_BIT) && + (destStageMask == VK_PIPELINE_STAGE_TRANSFER_BIT)) + { + srcStageMask |= (VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR | + VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR); + } + if (m_flags.useReleaseAcquire) { - ExecuteReleaseThenAcquire(srcStageMask, - destStageMask, - memBarrierCount, - pMemoryBarriers, - bufferMemoryBarrierCount, - pBufferMemoryBarriers, - imageMemoryBarrierCount, - pImageMemoryBarriers); + ExecuteAcquireRelease(0, + nullptr, + srcStageMask, + destStageMask, + memBarrierCount, + pMemoryBarriers, + bufferMemoryBarrierCount, + pBufferMemoryBarriers, + imageMemoryBarrierCount, + pImageMemoryBarriers, + ReleaseThenAcquire, + RgpBarrierExternalCmdPipelineBarrier); } else { @@ -6798,11 +6902,11 @@ void CmdBuffer::PipelineBarrier2( if (m_flags.useReleaseAcquire) { - ExecuteAcquireRelease(1, - nullptr, - pDependencyInfo, - ReleaseThenAcquire, - RgpBarrierExternalCmdPipelineBarrier); + ExecuteAcquireRelease2(1, + nullptr, + pDependencyInfo, + ReleaseThenAcquire, + RgpBarrierExternalCmdPipelineBarrier); } else { @@ -7509,7 +7613,8 @@ void CmdBuffer::PalCmdReleaseThenAcquire( // ===================================================================================================================== void CmdBuffer::PalCmdAcquire( Pal::AcquireReleaseInfo* pAcquireReleaseInfo, - const VkEvent event, + uint32_t eventCount, + const VkEvent* pEvents, Pal::MemBarrier* const pBufferBarriers, const Buffer** const ppBuffers, Pal::ImgBarrier* const pImageBarriers, @@ -7522,7 +7627,7 @@ void CmdBuffer::PalCmdAcquire( // in the header, but temporarily you may use the generic "unknown" reason so as not to block you. VK_ASSERT(pAcquireReleaseInfo->reason != 0); - Event* pEvent = Event::ObjectFromHandle(event); + Event* pEvent = Event::ObjectFromHandle(pEvents[0]); utils::IterateMask deviceGroup(deviceMask); do @@ -7540,19 +7645,50 @@ void CmdBuffer::PalCmdAcquire( pAcquireReleaseInfo->pImageBarriers = pImageBarriers; pAcquireReleaseInfo->pMemoryBarriers = pBufferBarriers; + // Whether syncToken is used or not is decided by the setting 'SyncTokenEnabled' if (pEvent->IsUseToken()) { - Pal::ReleaseToken syncToken = {}; + // Allocate space to store sync token values (automatically rewound on unscope) + Pal::ReleaseToken* pSyncTokens = (eventCount > 0) ? + pVirtStackFrame->AllocArray(eventCount) : nullptr; + + if (pSyncTokens != nullptr) + { + for (uint32_t i = 0; i < eventCount; ++i) + { + pSyncTokens[i] = Event::ObjectFromHandle(pEvents[i])->GetSyncToken(); + } - syncToken = pEvent->GetSyncToken(); - PalCmdBuffer(deviceIdx)->CmdAcquire(*pAcquireReleaseInfo, 1u, &syncToken); + PalCmdBuffer(deviceIdx)->CmdAcquire(*pAcquireReleaseInfo, eventCount, pSyncTokens); + + pVirtStackFrame->FreeArray(pSyncTokens); + } + else + { + m_recordingResult = VK_ERROR_OUT_OF_HOST_MEMORY; + } } else { - const Pal::IGpuEvent* pGpuEvent = {}; + // Allocate space to store signaled event pointers (automatically rewound on unscope) + const Pal::IGpuEvent** ppGpuEvents = (eventCount > 0) ? + pVirtStackFrame->AllocArray(eventCount) : nullptr; + + if (ppGpuEvents != nullptr) + { + for (uint32_t i = 0; i < eventCount; ++i) + { + ppGpuEvents[i] = Event::ObjectFromHandle(pEvents[i])->PalEvent(deviceIdx); + } + + PalCmdBuffer(deviceIdx)->CmdAcquireEvent(*pAcquireReleaseInfo, eventCount, ppGpuEvents); - pGpuEvent = pEvent->PalEvent(deviceIdx); - PalCmdBuffer(deviceIdx)->CmdAcquireEvent(*pAcquireReleaseInfo, 1u, &pGpuEvent); + pVirtStackFrame->FreeArray(ppGpuEvents); + } + else + { + m_recordingResult = VK_ERROR_OUT_OF_HOST_MEMORY; + } } } while (deviceGroup.IterateNext()); @@ -8131,6 +8267,16 @@ void CmdBuffer::RPSyncPostLoadOpColorClear( pPalTransitions, ppImages, GetRpDeviceMask()); + + if (pPalTransitions != nullptr) + { + virtStack.FreeArray(pPalTransitions); + } + + if (ppImages != nullptr) + { + virtStack.FreeArray(ppImages); + } } else { @@ -8630,9 +8776,11 @@ void CmdBuffer::RPLoadOpClearColor( VirtualStackFrame virtStackFrame(m_pStackAllocator); - Util::Vector clearRegions{ &virtStackFrame }; + constexpr uint32 MinRects = 8; - const auto maxRects = EstimateMaxObjectsOnVirtualStack(sizeof(VkClearRect)); + Util::Vector clearRegions{ &virtStackFrame }; + + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ClearBoundTargetRegion)), MinRects); auto rectBatch = Util::Min(count, maxRects); const auto palResult = clearRegions.Reserve(rectBatch); @@ -8779,9 +8927,11 @@ void CmdBuffer::RPLoadOpClearDepthStencil( VirtualStackFrame virtStackFrame(m_pStackAllocator); - Util::Vector clearRegions{ &virtStackFrame }; + constexpr uint32 MinRects = 8; + + Util::Vector clearRegions{ &virtStackFrame }; - const auto maxRects = EstimateMaxObjectsOnVirtualStack(sizeof(VkClearRect)); + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ClearBoundTargetRegion)), MinRects); auto rectBatch = Util::Min(count, maxRects); for (uint32_t i = 0; i < count; ++i) @@ -9237,8 +9387,7 @@ void CmdBuffer::SetViewInstanceMask( const uint32_t deviceViewMask = uint32_t { 0x1 } << deviceIdx; uint32_t viewMask = 0x0; - - if (m_allGpuState.viewIndexFromDeviceIndex) + if (m_allGpuState.viewIndexFromDeviceIndex && (Util::CountSetBits(deviceMask) > 1)) { // VK_KHR_multiview interaction with VK_KHR_device_group. // When GraphicsPipeline is created with flag @@ -9266,7 +9415,6 @@ void CmdBuffer::SetViewInstanceMask( // Basically each device renders all views. viewMask = subpassViewMask; } - PalCmdBuffer(deviceIdx)->CmdSetViewInstanceMask(viewMask); } while (deviceGroup.IterateNext()); @@ -11152,6 +11300,8 @@ void CmdBuffer::GetRayTracingDispatchArgs( pConstants->constData.rayDispatchWidth = width; pConstants->constData.rayDispatchHeight = height; pConstants->constData.rayDispatchDepth = depth; + pConstants->constData.rayDispatchMaxGroups = pPipeline->PersistentDispatchSize(width, height, depth); + pConstants->constData.missTableBaseAddressLo = Util::LowPart(missSbt.deviceAddress); pConstants->constData.missTableBaseAddressHi = Util::HighPart(missSbt.deviceAddress); pConstants->constData.missTableStrideInBytes = static_cast(missSbt.stride); @@ -11393,7 +11543,7 @@ void CmdBuffer::TraceRaysDispatchPerDevice( { const RayTracingPipeline* pPipeline = pCmdBuffer->m_allGpuState.pRayTracingPipeline; const Pal::DispatchDims dispatchSize = pPipeline->GetDispatchSize({ .x = width, .y = height, .z = depth }); - pCmdBuffer->PalCmdBuffer(deviceIdx)->CmdDispatch(dispatchSize); + pCmdBuffer->PalCmdBuffer(deviceIdx)->CmdDispatch(dispatchSize, {}); } // ===================================================================================================================== @@ -11791,7 +11941,6 @@ void CmdBuffer::InsertDebugMarker( const char* pLabelName, bool isBegin) { -#if ICD_GPUOPEN_DEVMODE_BUILD constexpr uint8 MarkerSourceApplication = 0; const IDevMode* pDevMode = m_pDevice->VkInstance()->GetDevModeMgr(); @@ -11806,7 +11955,6 @@ void CmdBuffer::InsertDebugMarker( Util::StringLength(pLabelName) : 0); } -#endif } // ===================================================================================================================== diff --git a/icd/api/vk_cmdbuffer_transfer.cpp b/icd/api/vk_cmdbuffer_transfer.cpp index 9b03c795..7a641e13 100644 --- a/icd/api/vk_cmdbuffer_transfer.cpp +++ b/icd/api/vk_cmdbuffer_transfer.cpp @@ -279,7 +279,7 @@ void CmdBuffer::CopyBuffer( VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRegions = EstimateMaxObjectsOnVirtualStack(sizeof(*pRegions)); + const auto maxRegions = EstimateMaxObjectsOnVirtualStack(sizeof(Pal::MemoryCopyRegion)); auto regionBatch = Util::Min(regionCount, maxRegions); // Allocate space to store memory copy regions @@ -332,7 +332,8 @@ void CmdBuffer::CopyImage( VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRegions = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(*pRegions)), MaxPalAspectsPerMask); + const auto maxRegions = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ImageCopyRegion)), + MaxPalAspectsPerMask); auto regionBatch = Util::Min(regionCount * MaxPalAspectsPerMask, maxRegions); Pal::ImageCopyRegion* pPalRegions = @@ -392,7 +393,8 @@ void CmdBuffer::BlitImage( VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRegions = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(*pRegions)), MaxPalAspectsPerMask); + const auto maxRegions = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ImageScaledCopyRegion)), + MaxPalAspectsPerMask); auto regionBatch = Util::Min(regionCount * MaxPalAspectsPerMask, maxRegions); // Allocate space to store scaled image copy regions (we need a separate region per PAL aspect) @@ -521,7 +523,7 @@ void CmdBuffer::CopyBufferToImage( VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRegions = EstimateMaxObjectsOnVirtualStack(sizeof(*pRegions)); + const auto maxRegions = EstimateMaxObjectsOnVirtualStack(sizeof(Pal::MemoryImageCopyRegion)); auto regionBatch = Util::Min(regionCount, maxRegions); // Allocate space to store memory image copy regions @@ -587,7 +589,7 @@ void CmdBuffer::CopyImageToBuffer( VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRegions = EstimateMaxObjectsOnVirtualStack(sizeof(*pRegions)); + const auto maxRegions = EstimateMaxObjectsOnVirtualStack(sizeof(Pal::MemoryImageCopyRegion)); auto regionBatch = Util::Min(regionCount, maxRegions); // Allocate space to store memory image copy regions @@ -889,7 +891,7 @@ void CmdBuffer::QueryCopy( uint32_t threadGroupCount = Util::Max(1U, (queryCount + ThreadsPerGroup - 1) / ThreadsPerGroup); - PalCmdBuffer(deviceIdx)->CmdDispatch({ threadGroupCount, 1, 1 }); + PalCmdBuffer(deviceIdx)->CmdDispatch({ threadGroupCount, 1, 1 }, {}); // Restore compute state PalCmdBuffer(deviceIdx)->CmdRestoreComputeState(Pal::ComputeStatePipelineAndUserData); diff --git a/icd/api/vk_compute_pipeline.cpp b/icd/api/vk_compute_pipeline.cpp index 312583b6..bbf447ee 100644 --- a/icd/api/vk_compute_pipeline.cpp +++ b/icd/api/vk_compute_pipeline.cpp @@ -266,9 +266,10 @@ void ComputePipeline::ConvertComputePipelineInfo( void ComputePipeline::FetchPalMetadata( PalAllocator* pAllocator, const void* pBinary, + size_t binarySize, uint32_t* pOrigThreadgroupDims) { - Util::Abi::PipelineAbiReader abiReader(pAllocator, pBinary); + Util::Abi::PipelineAbiReader abiReader(pAllocator, Util::Span{pBinary, binarySize}); Util::Result result = abiReader.Init(); if (result == Util::Result::Success) @@ -593,7 +594,6 @@ VkResult ComputePipeline::Create( Util::VoidPtrInc(pPalMem, deviceIdx * pipelineSize), &pPalPipeline[deviceIdx]); -#if ICD_GPUOPEN_DEVMODE_BUILD // Temporarily reinject post Pal pipeline creation (when the internal pipeline hash is available). // The reinjection cache layer can be linked back into the pipeline cache chain once the // Vulkan pipeline cache key can be stored (and read back) inside the ELF as metadata. @@ -624,7 +624,6 @@ VkResult ComputePipeline::Create( palResult = Util::Result::Success; } } -#endif } result = PalToVkResult(palResult); @@ -650,6 +649,7 @@ VkResult ComputePipeline::Create( uint32_t origThreadgroupDims[3]; FetchPalMetadata(pDevice->VkInstance()->Allocator(), pipelineBinaries[DefaultDeviceIndex].pCode, + pipelineBinaries[DefaultDeviceIndex].codeSize, origThreadgroupDims); // On success, wrap it up in a Vulkan object and return. diff --git a/icd/api/vk_device.cpp b/icd/api/vk_device.cpp index 66237a87..32438c8d 100644 --- a/icd/api/vk_device.cpp +++ b/icd/api/vk_device.cpp @@ -89,12 +89,9 @@ #include "appopt/barrier_filter_layer.h" #include "appopt/strange_brigade_layer.h" -#include "appopt/gravity_mark_layer.h" #include "appopt/baldurs_gate3_layer.h" -#if ICD_GPUOPEN_DEVMODE_BUILD #include "devmode/devmode_mgr.h" -#endif #include "palCmdBuffer.h" #include "palCmdAllocator.h" @@ -354,8 +351,7 @@ VkResult Device::Create( } // Dedicated Compute Units - static constexpr uint32_t MaxEngineCount = 8; - uint32_t dedicatedComputeUnits[Queue::MaxQueueFamilies][MaxEngineCount] = {}; + uint32_t dedicatedComputeUnits[Queue::MaxQueueFamilies][Queue::MaxQueuesPerFamily] = {}; VkResult vkResult = VK_SUCCESS; void* pMemory = nullptr; @@ -1311,21 +1307,6 @@ VkResult Device::Initialize( break; } - case AppProfile::GravityMark: - { - void* pMemory = VkInstance()->AllocMem(sizeof(GravityMarkLayer), VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - - if (pMemory != nullptr) - { - m_pAppOptLayer = VK_PLACEMENT_NEW(pMemory) GravityMarkLayer(); - } - else - { - result = VK_ERROR_OUT_OF_HOST_MEMORY; - } - - break; - } case AppProfile::BaldursGate3: { void* pMemory = VkInstance()->AllocMem(sizeof(BaldursGate3Layer), VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); @@ -1453,12 +1434,10 @@ VkResult Device::Initialize( } } -#if ICD_GPUOPEN_DEVMODE_BUILD if ((result == VK_SUCCESS) && (VkInstance()->GetDevModeMgr() != nullptr)) { VkInstance()->GetDevModeMgr()->PostDeviceCreate(this); } -#endif if (result == VK_SUCCESS) { @@ -1766,12 +1745,10 @@ VkResult Device::Destroy(const VkAllocationCallbacks* pAllocator) VK_ALERT(powerRes != VK_SUCCESS); } -#if ICD_GPUOPEN_DEVMODE_BUILD if (VkInstance()->GetDevModeMgr() != nullptr) { VkInstance()->GetDevModeMgr()->PreDeviceDestroy(this); } -#endif #if VKI_RAY_TRACING if (m_pRayTrace != nullptr) @@ -1924,7 +1901,7 @@ VkResult Device::CreateInternalComputePipeline( uint32_t numUserDataNodes, Vkgc::ResourceMappingRootNode* pUserDataNodes, ShaderModuleFlags flags, - bool forceWave64, + ShaderWaveSize waveSize, const VkSpecializationInfo* pSpecializationInfo, InternalPipeline* pInternalPipeline) { @@ -1979,14 +1956,30 @@ VkResult Device::CreateInternalComputePipeline( pCompiler->ApplyDefaultShaderOptions(ShaderStage::ShaderStageCompute, 0, &pShaderInfo->options); // forceWave64 is currently true for only GpuRT shaders and shouldForceWave32 should not affect GpuRT shaders - bool shouldForceWave32 = (((GetRuntimeSettings().deprecateWave64 & DeprecateWave64::DeprecateWave64Cs) || - (GetRuntimeSettings().deprecateWave64 & DeprecateWave64::DeprecateWave64All)) && - (forceWave64 == false)); + bool shouldForceWave32 = ((GetRuntimeSettings().deprecateWave64 & DeprecateWave64::DeprecateWave64Cs) || + (GetRuntimeSettings().deprecateWave64 & DeprecateWave64::DeprecateWave64All)); - if (forceWave64) + switch (waveSize) { + case ShaderWaveSize::WaveSizeAuto: + pShaderInfo->options.allowVaryWaveSize = true; + + // Only apply if the wave size was not already specified. + if (shouldForceWave32) + { + pShaderInfo->options.waveSize = 32; + } + break; + case ShaderWaveSize::WaveSize32: + pShaderInfo->options.waveSize = 32; + pShaderInfo->options.subgroupSize = 32; + break; + case ShaderWaveSize::WaveSize64: pShaderInfo->options.waveSize = 64; pShaderInfo->options.subgroupSize = 64; + break; + default: + VK_NEVER_CALLED(); } Pal::ShaderHash codeHash = ShaderModule::GetCodeHash( @@ -2012,8 +2005,6 @@ VkResult Device::CreateInternalComputePipeline( 0, options); - options.pOptions->waveSize = (shouldForceWave32) ? 32 : options.pOptions->waveSize; - // PAL Pipeline caching Util::Result cacheResult = Util::Result::NotFound; Util::MetroHash::Hash cacheId = {}; @@ -2215,7 +2206,7 @@ VkResult Device::CreateInternalPipelines() VK_ARRAY_SIZE(userDataNodes), userDataNodes, 0, - false, + ShaderWaveSize::WaveSizeAuto, nullptr, &m_timestampQueryCopyPipeline); @@ -2230,7 +2221,7 @@ VkResult Device::CreateInternalPipelines() VK_ARRAY_SIZE(userDataNodes), userDataNodes, 0, - false, + ShaderWaveSize::WaveSizeAuto, nullptr, &m_accelerationStructureQueryCopyPipeline); } @@ -2478,54 +2469,77 @@ VkResult Device::WaitForFences( VkBool32 waitAll, uint64_t timeout) { - Pal::Result palResult = Pal::Result::Success; + VirtualStackAllocator* pStackAllocator = nullptr; - Pal::IFence** ppPalFences = static_cast(VK_ALLOC_A(sizeof(Pal::IFence*) * fenceCount)); + Pal::Result palResult = m_pInstance->StackMgr()->AcquireAllocator(&pStackAllocator); - if (IsMultiGpu() == false) + if (palResult == Pal::Result::Success) { - for (uint32_t i = 0; i < fenceCount; ++i) + VirtualStackFrame virtStackFrame(pStackAllocator); + + Pal::IFence** ppPalFences = virtStackFrame.AllocArray(fenceCount); + + if (ppPalFences == nullptr) { - ppPalFences[i] = Fence::ObjectFromHandle(pFences[i])->PalFence(DefaultDeviceIndex); + palResult = Pal::Result::ErrorOutOfMemory; } - palResult = PalDevice(DefaultDeviceIndex)->WaitForFences(fenceCount, - ppPalFences, - waitAll != VK_FALSE, - Uint64ToChronoNano(timeout)); - } - else - { - for (uint32_t deviceIdx = 0; - (deviceIdx < NumPalDevices()) && (palResult == Pal::Result::Success); - deviceIdx++) + if (IsMultiGpu() == false) { - const uint32_t currentDeviceMask = 1 << deviceIdx; - - uint32_t perDeviceFenceCount = 0; - for (uint32_t i = 0; i < fenceCount; ++i) + if (palResult == Pal::Result::Success) { - Fence* pFence = Fence::ObjectFromHandle(pFences[i]); + for (uint32_t i = 0; i < fenceCount; ++i) + { + ppPalFences[i] = Fence::ObjectFromHandle(pFences[i])->PalFence(DefaultDeviceIndex); + } - // Some conformance tests will wait on fences that were never submitted, so use only the first device - // for these cases. - const bool forceWait = (pFence->GetActiveDeviceMask() == 0) && (deviceIdx == DefaultDeviceIndex); + palResult = PalDevice(DefaultDeviceIndex)->WaitForFences(fenceCount, + ppPalFences, + waitAll != VK_FALSE, + Uint64ToChronoNano(timeout)); + } + } + else + { + for (uint32_t deviceIdx = 0; + (deviceIdx < NumPalDevices()) && (palResult == Pal::Result::Success); + deviceIdx++) + { + const uint32_t currentDeviceMask = 1 << deviceIdx; - if (forceWait || ((currentDeviceMask & pFence->GetActiveDeviceMask()) != 0)) + uint32_t perDeviceFenceCount = 0; + for (uint32_t i = 0; i < fenceCount; ++i) { - ppPalFences[perDeviceFenceCount++] = pFence->PalFence(deviceIdx); + Fence* pFence = Fence::ObjectFromHandle(pFences[i]); + + // Some conformance tests will wait on fences that were never submitted, so use only the first + // device for these cases. + const bool forceWait = (pFence->GetActiveDeviceMask() == 0) && (deviceIdx == DefaultDeviceIndex); + + if (forceWait || ((currentDeviceMask & pFence->GetActiveDeviceMask()) != 0)) + { + ppPalFences[perDeviceFenceCount++] = pFence->PalFence(deviceIdx); + } } - } - if (perDeviceFenceCount > 0) - { - palResult = PalDevice(deviceIdx)->WaitForFences(perDeviceFenceCount, - ppPalFences, - waitAll != VK_FALSE, - Uint64ToChronoNano(timeout)); + if (perDeviceFenceCount > 0) + { + palResult = PalDevice(deviceIdx)->WaitForFences(perDeviceFenceCount, + ppPalFences, + waitAll != VK_FALSE, + Uint64ToChronoNano(timeout)); + } } } + + virtStackFrame.FreeArray(ppPalFences); } + + if (pStackAllocator != nullptr) + { + m_pInstance->StackMgr()->ReleaseAllocator(pStackAllocator); + } + return PalToVkResult(palResult); } @@ -2535,28 +2549,52 @@ VkResult Device::ResetFences( uint32_t fenceCount, const VkFence* pFences) { - Pal::IFence** ppPalFences = static_cast(VK_ALLOC_A(sizeof(Pal::IFence*) * fenceCount)); + VirtualStackAllocator* pStackAllocator = nullptr; - Pal::Result palResult = Pal::Result::Success; + Pal::Result palResult = m_pInstance->StackMgr()->AcquireAllocator(&pStackAllocator); - // Clear the wait masks for each fence - for (uint32_t i = 0; i < fenceCount; ++i) + if (palResult == Pal::Result::Success) { - Fence::ObjectFromHandle(pFences[i])->ClearActiveDeviceMask(); - Fence::ObjectFromHandle(pFences[i])->RestoreFence(this); - } + VirtualStackFrame virtStackFrame(pStackAllocator); - for (uint32_t deviceIdx = 0; - (deviceIdx < NumPalDevices()) && (palResult == Pal::Result::Success); - deviceIdx++) - { - for (uint32_t i = 0; i < fenceCount; ++i) + Pal::IFence** ppPalFences = virtStackFrame.AllocArray(fenceCount); + + if (ppPalFences == nullptr) + { + palResult = Pal::Result::ErrorOutOfMemory; + } + else + { + // Clear the wait masks for each fence + for (uint32_t i = 0; i < fenceCount; ++i) + { + Fence::ObjectFromHandle(pFences[i])->ClearActiveDeviceMask(); + Fence::ObjectFromHandle(pFences[i])->RestoreFence(this); + } + } + + for (uint32_t deviceIdx = 0; + (deviceIdx < NumPalDevices()) && (palResult == Pal::Result::Success); + deviceIdx++) + { + for (uint32_t i = 0; i < fenceCount; ++i) + { + Fence* pFence = Fence::ObjectFromHandle(pFences[i]); + ppPalFences[i] = pFence->PalFence(deviceIdx); + } + + palResult = PalDevice(deviceIdx)->ResetFences(fenceCount, ppPalFences); + } + + if (ppPalFences != nullptr) { - Fence* pFence = Fence::ObjectFromHandle(pFences[i]); - ppPalFences[i] = pFence->PalFence(deviceIdx); + virtStackFrame.FreeArray(ppPalFences); } + } - palResult = PalDevice(deviceIdx)->ResetFences(fenceCount, ppPalFences); + if (pStackAllocator != nullptr) + { + m_pInstance->StackMgr()->ReleaseAllocator(pStackAllocator); } return PalToVkResult(palResult); @@ -3251,27 +3289,47 @@ VkResult Device::WaitSemaphores( const VkSemaphoreWaitInfo* pWaitInfo, uint64_t timeout) { - Pal::Result palResult = Pal::Result::Success; - uint32_t flags = 0; + VirtualStackAllocator* pStackAllocator = nullptr; - Pal::IQueueSemaphore** ppPalSemaphores = static_cast(VK_ALLOC_A( - sizeof(Pal::IQueueSemaphore*) * pWaitInfo->semaphoreCount)); + Pal::Result palResult = m_pInstance->StackMgr()->AcquireAllocator(&pStackAllocator); - for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) + if (palResult == Pal::Result::Success) { - Semaphore* currentSemaphore = Semaphore::ObjectFromHandle(pWaitInfo->pSemaphores[i]); - ppPalSemaphores[i] = currentSemaphore->PalSemaphore(DefaultDeviceIndex); - currentSemaphore->RestoreSemaphore(); + VirtualStackFrame virtStackFrame(pStackAllocator); + + Pal::IQueueSemaphore** ppPalSemaphores = + virtStackFrame.AllocArray(pWaitInfo->semaphoreCount); + + if (ppPalSemaphores == nullptr) + { + palResult = Pal::Result::ErrorOutOfMemory; + } + else + { + for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) + { + Semaphore* currentSemaphore = Semaphore::ObjectFromHandle(pWaitInfo->pSemaphores[i]); + ppPalSemaphores[i] = currentSemaphore->PalSemaphore(DefaultDeviceIndex); + currentSemaphore->RestoreSemaphore(); + } + + const uint32 flags = (pWaitInfo->flags == VK_SEMAPHORE_WAIT_ANY_BIT) ? Pal::HostWaitFlags::HostWaitAny : 0; + + palResult = PalDevice(DefaultDeviceIndex)->WaitForSemaphores(pWaitInfo->semaphoreCount, ppPalSemaphores, + pWaitInfo->pValues, flags, Uint64ToChronoNano(timeout)); + } + + if (ppPalSemaphores != nullptr) + { + virtStackFrame.FreeArray(ppPalSemaphores); + } } - if (pWaitInfo->flags == VK_SEMAPHORE_WAIT_ANY_BIT) + if (pStackAllocator != nullptr) { - flags |= Pal::HostWaitFlags::HostWaitAny; + m_pInstance->StackMgr()->ReleaseAllocator(pStackAllocator); } - palResult = PalDevice(DefaultDeviceIndex)->WaitForSemaphores(pWaitInfo->semaphoreCount, ppPalSemaphores, - pWaitInfo->pValues, flags, Uint64ToChronoNano(timeout)); - return PalToVkResult(palResult); } diff --git a/icd/api/vk_graphics_pipeline.cpp b/icd/api/vk_graphics_pipeline.cpp index 478c1c80..89cad731 100644 --- a/icd/api/vk_graphics_pipeline.cpp +++ b/icd/api/vk_graphics_pipeline.cpp @@ -311,7 +311,6 @@ VkResult GraphicsPipeline::CreatePalPipelineObjects( Util::VoidPtrInc(pSystemMem, palOffset), &pPalPipeline[deviceIdx]); -#if ICD_GPUOPEN_DEVMODE_BUILD // Temporarily reinject post Pal pipeline creation (when the internal pipeline hash is available). // The reinjection cache layer can be linked back into the pipeline cache chain once the // Vulkan pipeline cache key can be stored (and read back) inside the ELF as metadata. @@ -342,7 +341,6 @@ VkResult GraphicsPipeline::CreatePalPipelineObjects( palResult = Util::Result::Success; } } -#endif VK_ASSERT(palSize == pPalDevice->GetGraphicsPipelineSize(pObjectCreateInfo->pipeline, nullptr)); palOffset += palSize; @@ -444,10 +442,9 @@ VkResult GraphicsPipeline::CreatePipelineObjects( const auto& palProperties = pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties(); const auto& info = pPalPipeline[deviceIdx]->GetInfo(); - if ((info.ps.flags.perSampleShading == 1) || - (info.ps.flags.enablePops == 1)) + if (info.ps.flags.perSampleShading == 1) { - // Override the shader rate to 1x1 if SampleId used in shader, or POPS is enabled. + // Override the shader rate to 1x1 if SampleId used in shader. Device::SetDefaultVrsRateParams(&pObjectCreateInfo->immedInfo.vrsRateParams); pObjectCreateInfo->flags.force1x1ShaderRate = true; @@ -480,6 +477,14 @@ VkResult GraphicsPipeline::CreatePipelineObjects( pObjectCreateInfo->flags.force1x1ShaderRate = true; pObjectCreateInfo->immedInfo.msaaCreateInfo.pixelShaderSamples = 1; } + else if (info.ps.flags.enablePops == 1) + { + // Override the shader rate to 1x1 if POPS is enabled and + // fragmentShadingRateWithFragmentShaderInterlock is not supported. + Device::SetDefaultVrsRateParams(&pObjectCreateInfo->immedInfo.vrsRateParams); + + pObjectCreateInfo->flags.force1x1ShaderRate = true; + } } if (pObjectCreateInfo->flags.bindMsaaObject) @@ -1051,8 +1056,30 @@ VkResult GraphicsPipeline::Create( #endif objectCreateInfo.flags.isPointSizeUsed = binaryMetadata.pointSizeUsed; objectCreateInfo.flags.shadingRateUsedInShader = binaryMetadata.shadingRateUsedInShader; - objectCreateInfo.flags.viewIndexFromDeviceIndex = Util::TestAnyFlagSet(flags, - VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT); + + if (libInfo.pPreRasterizationShaderLib != nullptr) + { + if (libInfo.pPreRasterizationShaderLib->GetPipelineBinaryCreateInfo().flags & + VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT) + { + objectCreateInfo.flags.viewIndexFromDeviceIndex |= 1 << GraphicsLibraryPreRaster; + } + } + + if (libInfo.pFragmentShaderLib != nullptr) + { + if (libInfo.pFragmentShaderLib->GetPipelineBinaryCreateInfo().flags & + VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT) + { + objectCreateInfo.flags.viewIndexFromDeviceIndex |= 1 << GraphicsLibraryFragment; + } + } + + if (Util::TestAnyFlagSet(flags, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT)) + { + objectCreateInfo.flags.viewIndexFromDeviceIndex |= + ((1 << GraphicsLibraryPreRaster) | (1 << GraphicsLibraryFragment)); + } #if VKI_RAY_TRACING objectCreateInfo.dispatchRaysUserDataOffset = pPipelineLayout->GetDispatchRaysUserData(); @@ -2232,8 +2259,8 @@ void GraphicsPipeline::BindToCmdBuffer( // because when VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT is specified // ViewMask for each VkPhysicalDevice is defined by DeviceIndex // not by current subpass during a render pass instance. - const bool oldViewIndexFromDeviceIndex = pRenderState->viewIndexFromDeviceIndex; - const bool newViewIndexFromDeviceIndex = ViewIndexFromDeviceIndex(); + const uint32_t oldViewIndexFromDeviceIndex = pRenderState->viewIndexFromDeviceIndex; + const uint32_t newViewIndexFromDeviceIndex = StageMaskForViewIndexUseDeviceIndex(); if (oldViewIndexFromDeviceIndex != newViewIndexFromDeviceIndex) { diff --git a/icd/api/vk_graphics_pipeline_library.cpp b/icd/api/vk_graphics_pipeline_library.cpp index 30cc392e..6f512ffd 100644 --- a/icd/api/vk_graphics_pipeline_library.cpp +++ b/icd/api/vk_graphics_pipeline_library.cpp @@ -500,6 +500,21 @@ VkResult GraphicsPipelineLibrary::Create( GplModuleState tempModuleStates[ShaderStage::ShaderStageGfxCount] = {}; binaryCreateInfo.pipelineInfo.iaState.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + if (pCreateInfo->pInputAssemblyState != nullptr) + { + binaryCreateInfo.pipelineInfo.iaState.topology = pCreateInfo->pInputAssemblyState->topology; + } + else if (pCreateInfo->stageCount > 0) + { + for (uint32_t stage = 0; stage < pCreateInfo->stageCount; ++stage) + { + if ((pCreateInfo->pStages[stage].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) || + (pCreateInfo->pStages[stage].stage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) + { + binaryCreateInfo.pipelineInfo.iaState.topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; + } + } + } if ((internalFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FORCE_LLPC) != 0) { diff --git a/icd/api/vk_image.cpp b/icd/api/vk_image.cpp index e69d1743..a023a61b 100644 --- a/icd/api/vk_image.cpp +++ b/icd/api/vk_image.cpp @@ -432,6 +432,9 @@ void Image::ConvertImageCreateInfo( const Pal::GfxIpLevel gfxLevel = palProperties.gfxLevel; + const uint32_t forceEnableDccMask = settings.forceEnableDcc; + const uint32_t forceDisableCompressionMask = settings.forceDisableCompression; + { // Don't force DCC to be enabled for performance reasons unless the image is larger than the minimum size set for // compression, another performance optimization. @@ -439,14 +442,11 @@ void Image::ConvertImageCreateInfo( (settings.disableSmallSurfColorCompressionSize * settings.disableSmallSurfColorCompressionSize)) && (Formats::IsColorFormat(createInfoFormat))) { - const uint32_t forceEnableDccMask = settings.forceEnableDcc; - const uint32_t bpp = Pal::Formats::BitsPerPixel(pPalCreateInfo->swizzledFormat.format); const bool isShaderStorage = (pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT); - if (isShaderStorage && ((forceEnableDccMask & (ForceDccDefault | - ForceDisableCompression | - ForceDisableCompressionForColor)) == 0)) + if (isShaderStorage && (forceEnableDccMask != 0) && + ((forceDisableCompressionMask & DisableCompressionForColor) == 0)) { const bool isColorAttachment = (pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); @@ -520,25 +520,20 @@ void Image::ConvertImageCreateInfo( pPalCreateInfo->metadataTcCompatMode = Pal::MetadataTcCompatMode::Disabled; } - const uint32_t disableBits = - ForceDisableCompression | - ((Formats::IsColorFormat(createInfoFormat)) ? ForceDisableCompressionForColor : 0) | - ((Formats::IsDepthStencilFormat(createInfoFormat)) ? ForceDisableCompressionForDepthStencil : 0) | - (externalFlags.externallyShareable ? ForceDisableCompressionForSharedImages : 0); - - // We must not use any metadata if sparse aliasing is enabled or - // settings.forceEnableDcc matches any of the disableBits. - if ((pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) || - ((settings.forceEnableDcc & disableBits) != 0)) + // We must not use any metadata if sparse aliasing is enabled + if ((pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) != 0) { pPalCreateInfo->metadataMode = Pal::MetadataMode::Disabled; } - // Disable metadata for avoiding corruption if one image is sampled and rendered - // in the same draw. - if ((pCreateInfo->usage & VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) != 0) + const uint32_t disableBits = + (externalFlags.externallyShareable ? DisableCompressionForSharedImages : 0) | + ((Formats::IsColorFormat(createInfoFormat)) ? DisableCompressionForColor : 0) | + ((Formats::IsDepthStencilFormat(createInfoFormat)) ? DisableCompressionForDepthStencil : 0); + + if ((forceDisableCompressionMask & disableBits) != 0) { - pPalCreateInfo->metadataMode = Pal::MetadataMode::Disabled; + pPalCreateInfo->metadataMode = Pal::MetadataMode::Disabled; } // Apply per application (or run-time) options @@ -550,12 +545,17 @@ void Image::ConvertImageCreateInfo( if ((extStructs.pImageCompressionControl->sType == VK_STRUCTURE_TYPE_IMAGE_COMPRESSION_CONTROL_EXT) && (extStructs.pImageCompressionControl->flags == VK_IMAGE_COMPRESSION_DISABLED_EXT)) { - pPalCreateInfo->metadataMode = Pal::MetadataMode::Disabled; - pPalCreateInfo->metadataTcCompatMode = Pal::MetadataTcCompatMode::Disabled; + pPalCreateInfo->metadataMode = Pal::MetadataMode::Disabled; + pPalCreateInfo->metadataTcCompatMode = Pal::MetadataTcCompatMode::Disabled; } } #if defined(__unix__) + if (pPalCreateInfo->flags.optimalShareable && pPalCreateInfo->usageFlags.depthStencil) + { + pPalCreateInfo->metadataMode = Pal::MetadataMode::Disabled; + } + pPalCreateInfo->modifier = DRM_FORMAT_MOD_INVALID; if (pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) diff --git a/icd/api/vk_instance.cpp b/icd/api/vk_instance.cpp index d963de9d..9bb64749 100644 --- a/icd/api/vk_instance.cpp +++ b/icd/api/vk_instance.cpp @@ -44,11 +44,9 @@ #include "include/internal_layer_hooks.h" -#if ICD_GPUOPEN_DEVMODE_BUILD #include "devmode/devmode_mgr.h" #include "devmode/devmode_rgp.h" #include "devmode/devmode_ubertrace.h" -#endif #include "res/ver.h" @@ -374,9 +372,7 @@ VkResult Instance::Init( m_nullGpuId = createInfo.nullGpuId; } -#if ICD_GPUOPEN_DEVMODE_BUILD createInfo.flags.supportRgpTraces = 1; -#endif //Check the KHR_DISPALY extension, and then determine whether to open the primaryNode. if (IsExtensionEnabled(InstanceExtensions::KHR_DISPLAY) == false) @@ -628,13 +624,11 @@ VkResult Instance::LoadAndCommitSettings( } } -#if ICD_GPUOPEN_DEVMODE_BUILD // Inform developer mode manager of settings. This also finalizes the developer mode manager. if (m_pDevMode != nullptr) { m_pDevMode->Finalize(deviceCount, settingsLoaders); } -#endif // After all of the settings have been finalized, initialize each device for (uint32_t deviceIdx = 0; ((deviceIdx < deviceCount) && (result == VK_SUCCESS)); ++deviceIdx) @@ -670,7 +664,6 @@ VkResult Instance::Destroy(void) { AmdvlkLog(m_logTagIdMask, GeneralPrint, "%s End ********\n", GetApplicationName()); -#if ICD_GPUOPEN_DEVMODE_BUILD // Pipeline binary cache is required to be freed before destroying DevMode // because DevMode manages the state of pipeline binary cache. uint32_t deviceCount = PhysicalDeviceManager::MaxPhysicalDevices; @@ -685,7 +678,6 @@ VkResult Instance::Destroy(void) { m_pDevMode->Destroy(); } -#endif // Destroy physical device manager if (m_pPhysicalDeviceManager != nullptr) @@ -784,6 +776,8 @@ const InstanceExtensions::Supported& Instance::GetSupportedExtensions() supportedExtensions.AddExtension(VK_INSTANCE_EXTENSION(KHR_GET_SURFACE_CAPABILITIES2)); + supportedExtensions.AddExtension(VK_INSTANCE_EXTENSION(EXT_SWAPCHAIN_COLORSPACE)); + supportedExtensions.AddExtension(VK_INSTANCE_EXTENSION(KHR_DEVICE_GROUP_CREATION)); supportedExtensions.AddExtension(VK_INSTANCE_EXTENSION(KHR_EXTERNAL_SEMAPHORE_CAPABILITIES)); @@ -1028,7 +1022,6 @@ void Instance::EnableCrashAnalysisSupport() // PAL devices (before physical device manager is created). void Instance::DevModeEarlyInitialize() { -#if ICD_GPUOPEN_DEVMODE_BUILD VK_ASSERT(m_pPhysicalDeviceManager == nullptr); VK_ASSERT(m_pDevMode == nullptr); @@ -1051,7 +1044,6 @@ void Instance::DevModeEarlyInitialize() VK_ASSERT(result == VK_SUCCESS); } -#endif } // ===================================================================================================================== @@ -1059,7 +1051,6 @@ void Instance::DevModeEarlyInitialize() // PAL devices (after physical device manager is created). void Instance::DevModeLateInitialize() { -#if ICD_GPUOPEN_DEVMODE_BUILD VK_ASSERT(m_pPhysicalDeviceManager != nullptr); VK_ASSERT(m_pDevMode != nullptr); @@ -1074,7 +1065,6 @@ void Instance::DevModeLateInitialize() { EnableCrashAnalysisSupport(); } -#endif } // ===================================================================================================================== diff --git a/icd/api/vk_physical_device.cpp b/icd/api/vk_physical_device.cpp index 5ec83b79..0fb34279 100644 --- a/icd/api/vk_physical_device.cpp +++ b/icd/api/vk_physical_device.cpp @@ -427,6 +427,8 @@ PhysicalDevice::PhysicalDevice( memset(&m_limits, 0, sizeof(m_limits)); memset(m_formatFeatureMsaaTarget, 0, sizeof(m_formatFeatureMsaaTarget)); memset(&m_queueFamilies, 0, sizeof(m_queueFamilies)); + memset(&m_compQueueEnginesNdx, 0, sizeof(m_compQueueEnginesNdx)); + memset(&m_universalQueueEnginesNdx, 0, sizeof(m_universalQueueEnginesNdx)); memset(&m_memoryProperties, 0, sizeof(m_memoryProperties)); memset(&m_gpaProps, 0, sizeof(m_gpaProps)); @@ -2568,6 +2570,20 @@ void PhysicalDevice::GetSparseImageFormatProperties( } } +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDevicePipelineRobustnessProperties( + VkPipelineRobustnessBufferBehaviorEXT* defaultRobustnessStorageBuffers, + VkPipelineRobustnessBufferBehaviorEXT* defaultRobustnessUniformBuffers, + VkPipelineRobustnessBufferBehaviorEXT* defaultRobustnessVertexInputs, + VkPipelineRobustnessImageBehaviorEXT* defaultRobustnessImages +) const +{ + *defaultRobustnessStorageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT; + *defaultRobustnessUniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT; + *defaultRobustnessVertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT; + *defaultRobustnessImages = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT; +} + // ===================================================================================================================== VkResult PhysicalDevice::GetPhysicalDeviceCalibrateableTimeDomainsEXT( uint32_t* pTimeDomainCount, @@ -4436,6 +4452,8 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_COOPERATIVE_MATRIX)); } + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_COMPUTE_SHADER_DERIVATIVES)); + bool exposeNvComputeShaderDerivatives = false; if ((pPhysicalDevice == nullptr) || (pPhysicalDevice->GetRuntimeSettings().exportNvComputeShaderDerivatives)) { @@ -4677,7 +4695,7 @@ void PhysicalDevice::PopulateQueueFamilies() enabledQueueFlags |= VK_QUEUE_PROTECTED_BIT; } - // find out the sub engine index of VrHighPriority and indices for compute engines that aren't exclusive. + // find out the sub engine index of VrHighPriority and indices for compute engines that are exclusive. { const auto& computeProps = m_properties.engineProperties[Pal::EngineTypeCompute]; uint32_t engineIndex = 0u; @@ -4704,23 +4722,6 @@ void PhysicalDevice::PopulateQueueFamilies() m_vrHighPrioritySubEngineIndex = subEngineIndex; } } - else if (IsNormalQueue(computeProps.capabilities[subEngineIndex])) - { - m_compQueueEnginesNdx[engineIndex++] = subEngineIndex; - } - } - } - - // find out universal engines that aren't exclusive. - { - const auto& universalProps = m_properties.engineProperties[Pal::EngineTypeUniversal]; - uint32_t engineIndex = 0u; - for (uint32_t subEngineIndex = 0; subEngineIndex < universalProps.engineCount; subEngineIndex++) - { - if (IsNormalQueue(universalProps.capabilities[subEngineIndex])) - { - m_universalQueueEnginesNdx[engineIndex++] = subEngineIndex; - } } } @@ -4808,9 +4809,64 @@ void PhysicalDevice::PopulateQueueFamilies() pQueueFamilyProps->queueCount++; } } - pQueueFamilyProps->queueCount = (engineType == Pal::EngineTypeCompute) - ? Util::Min(settings.asyncComputeQueueLimit, pQueueFamilyProps->queueCount) - : pQueueFamilyProps->queueCount; + + // if the engineType is Universal or Compute, adjust the queue count based on the settings. + // and find pal engine indices for the queues + if (pQueueFamilyProps->queueCount != 0) + { + switch (engineType) + { + case Pal::EngineTypeUniversal: + { + if (settings.forceGraphicsQueueCount != UINT32_MAX) + { + VK_ASSERT(settings.forceGraphicsQueueCount <= Queue::MaxQueuesPerFamily); + pQueueFamilyProps->queueCount = settings.forceGraphicsQueueCount; + } + + // find out pal engine indices for universal queues that aren't exclusive. + uint32_t index = 0; + while (index < pQueueFamilyProps->queueCount) + { + for (uint32_t engineIndex = 0u; engineIndex < engineProps.engineCount; ++engineIndex) + { + if (IsNormalQueue(engineProps.capabilities[engineIndex])) + { + m_universalQueueEnginesNdx[index] = engineIndex; + index++; + } + } + } + break; + } + + case Pal::EngineTypeCompute: + { + if (settings.forceComputeQueueCount != UINT32_MAX) + { + VK_ASSERT(settings.forceComputeQueueCount <= Queue::MaxQueuesPerFamily); + pQueueFamilyProps->queueCount = settings.forceComputeQueueCount; + } + + // find out pal engine indices for compute queues that aren't exclusive. + uint32_t index = 0; + while (index < pQueueFamilyProps->queueCount) + { + for (uint32_t engineIndex = 0u; engineIndex < engineProps.engineCount; ++engineIndex) + { + if (IsNormalQueue(engineProps.capabilities[engineIndex])) + { + m_compQueueEnginesNdx[index] = engineIndex; + index++; + } + } + } + break; + } + default: + break; // no-op + } + } pQueueFamilyProps->timestampValidBits = (engineProps.flags.supportsTimestamps != 0) ? 64 : 0; pQueueFamilyProps->minImageTransferGranularity = PalToVkExtent3d(engineProps.minTiledImageCopyAlignment); @@ -6949,9 +7005,8 @@ size_t PhysicalDevice::GetFeatures2( if (updateFeatures) { - const bool captureReplay = PalProperties().gfxipProperties.flags.supportCaptureReplay; pExtInfo->descriptorBuffer = VK_TRUE; - pExtInfo->descriptorBufferCaptureReplay = captureReplay ? VK_TRUE : VK_FALSE; + pExtInfo->descriptorBufferCaptureReplay = VK_FALSE; pExtInfo->descriptorBufferImageLayoutIgnored = VK_FALSE; pExtInfo->descriptorBufferPushDescriptors = VK_TRUE; } @@ -7711,6 +7766,18 @@ size_t PhysicalDevice::GetFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_REPLICATED_COMPOSITES_FEATURES_EXT: + { + auto* pExtInfo = reinterpret_cast(pHeader); + if (updateFeatures) + { + pExtInfo->shaderReplicatedComposites = VK_TRUE; + } + + structSize = sizeof(*pExtInfo); + break; + } + default: { // skip any unsupported extension structures @@ -7856,12 +7923,11 @@ VkResult PhysicalDevice::GetImageFormatProperties2( pImageCompressionProps->imageCompressionFixedRateFlags = VK_IMAGE_COMPRESSION_FIXED_RATE_NONE_EXT; const uint32_t disableBits = - ForceDisableCompression | - ((Formats::IsColorFormat(createInfoFormat)) ? ForceDisableCompressionForColor : 0) | - ((Formats::IsDepthStencilFormat(createInfoFormat)) ? ForceDisableCompressionForDepthStencil : 0); + ((Formats::IsColorFormat(createInfoFormat)) ? DisableCompressionForColor : 0) | + ((Formats::IsDepthStencilFormat(createInfoFormat)) ? DisableCompressionForDepthStencil : 0); pImageCompressionProps->imageCompressionFlags = - ((GetRuntimeSettings().forceEnableDcc & disableBits) == 0) ? + ((GetRuntimeSettings().forceDisableCompression & disableBits) == 0) ? VK_IMAGE_COMPRESSION_DEFAULT_EXT : VK_IMAGE_COMPRESSION_DISABLED_EXT; } @@ -8479,10 +8545,11 @@ void PhysicalDevice::GetDeviceProperties2( { auto* pProps = static_cast(pNext); - pProps->defaultRobustnessStorageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT; - pProps->defaultRobustnessUniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT; - pProps->defaultRobustnessVertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT; - pProps->defaultRobustnessImages = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT; + GetPhysicalDevicePipelineRobustnessProperties(&pProps->defaultRobustnessStorageBuffers, + &pProps->defaultRobustnessUniformBuffers, + &pProps->defaultRobustnessVertexInputs, + &pProps->defaultRobustnessImages); + break; } diff --git a/icd/api/vk_pipeline.cpp b/icd/api/vk_pipeline.cpp index 5a3c4ba2..ec1379d8 100644 --- a/icd/api/vk_pipeline.cpp +++ b/icd/api/vk_pipeline.cpp @@ -796,7 +796,8 @@ VkResult Pipeline::GetShaderDisassembly( // To extract the shader code, we can re-parse the saved ELF binary and lookup the shader's program // instructions by examining the symbol table entry for that shader's entrypoint. - Util::Abi::PipelineAbiReader abiReader(pDevice->VkInstance()->Allocator(), binaryInfo.pipelineBinary.pCode); + Util::Abi::PipelineAbiReader abiReader(pDevice->VkInstance()->Allocator(), + Util::Span{binaryInfo.pipelineBinary.pCode, binaryInfo.pipelineBinary.codeSize}); VkResult result = VK_SUCCESS; Pal::Result palResult = abiReader.Init(); @@ -846,32 +847,34 @@ VkResult Pipeline::GetShaderDisassembly( uint32_t hwStage = 0; if (Util::BitMaskScanForward(&hwStage, apiToHwShader.apiShaders[static_cast(apiShaderType)])) { - const Util::Elf::SymbolTableEntry* pSymbolEntry = nullptr; const char* pSectionName = nullptr; if (pipelineSymbolType == Util::Abi::PipelineSymbolType::ShaderDisassembly) { - pSymbolEntry = abiReader.GetPipelineSymbol( - Util::Abi::GetSymbolForStage( - Util::Abi::PipelineSymbolType::ShaderDisassembly, - static_cast(hwStage))); + palResult = abiReader.CopySymbol( + Util::Abi::GetSymbolForStage( + Util::Abi::PipelineSymbolType::ShaderDisassembly, + static_cast(hwStage)), + pBufferSize, + pBuffer); + pSectionName = Util::Abi::AmdGpuDisassemblyName; + symbolValid = palResult == Util::Result::Success; } else if (pipelineSymbolType == Util::Abi::PipelineSymbolType::ShaderAmdIl) { - pSymbolEntry = abiReader.GetPipelineSymbol( - Util::Abi::GetSymbolForStage( - Util::Abi::PipelineSymbolType::ShaderAmdIl, - apiShaderType)); + palResult = abiReader.CopySymbol( + Util::Abi::GetSymbolForStage( + Util::Abi::PipelineSymbolType::ShaderAmdIl, + apiShaderType), + pBufferSize, + pBuffer); + pSectionName = Util::Abi::AmdGpuCommentLlvmIrName; + symbolValid = palResult == Util::Result::Success; } - if (pSymbolEntry != nullptr) - { - palResult = abiReader.GetElfReader().CopySymbol(*pSymbolEntry, pBufferSize, pBuffer); - symbolValid = palResult == Util::Result::Success; - } - else if (pSectionName != nullptr) + if ((symbolValid == false) && (pSectionName != nullptr)) { // NOTE: LLVM doesn't add disassemble symbol in ELF disassemble section, instead, it contains // the entry name in disassemble section. so we have to search the entry name to split per @@ -1016,7 +1019,8 @@ uint32_t Pipeline::GetAvailableAmdIlSymbol( bool hasBinary = GetBinary(shaderType, &binaryInfo); if (hasBinary) { - Util::Abi::PipelineAbiReader abiReader(m_pDevice->VkInstance()->Allocator(), binaryInfo.pipelineBinary.pCode); + Util::Abi::PipelineAbiReader abiReader(m_pDevice->VkInstance()->Allocator(), + Util::Span{binaryInfo.pipelineBinary.pCode, binaryInfo.pipelineBinary.codeSize}); Pal::Result result = abiReader.Init(); if (result == Pal::Result::Success) @@ -1036,7 +1040,7 @@ uint32_t Pipeline::GetAvailableAmdIlSymbol( const Util::Elf::SymbolTableEntry* pSymbolEntry = nullptr; const char* pSectionName = nullptr; - pSymbolEntry = abiReader.GetPipelineSymbol( + pSymbolEntry = abiReader.GetSymbolHeader( Util::Abi::GetSymbolForStage( Util::Abi::PipelineSymbolType::ShaderAmdIl, abiShaderType)); diff --git a/icd/api/vk_pipeline_cache.cpp b/icd/api/vk_pipeline_cache.cpp index 98938439..5fc28f46 100644 --- a/icd/api/vk_pipeline_cache.cpp +++ b/icd/api/vk_pipeline_cache.cpp @@ -129,9 +129,7 @@ VkResult PipelineCache::Create( pDevice->GetCompiler(DefaultDeviceIndex)->GetGfxIp(), pDefaultPhysicalDevice->GetRuntimeSettings(), pDefaultPhysicalDevice->PalDevice()->GetCacheFilePath(), -#if ICD_GPUOPEN_DEVMODE_BUILD pDefaultPhysicalDevice->VkInstance()->GetDevModeMgr(), -#endif expectedEntries, initialDataSize, pInitialData, diff --git a/icd/api/vk_pipeline_layout.cpp b/icd/api/vk_pipeline_layout.cpp index 3c029508..0d166d00 100644 --- a/icd/api/vk_pipeline_layout.cpp +++ b/icd/api/vk_pipeline_layout.cpp @@ -361,9 +361,9 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( // Reserve user data nodes for vertex buffer table pPipelineInfo->numUserDataNodes += 1; - pInfo->userDataRegCount += VbTablePtrRegCount; + pInfo->userDataRegCount += VbTablePtrRegCount; // In case we need an internal vertex buffer table, add nodes required for its entries, and its set pointer. - pPipelineInfo->numRsrcMapNodes += Pal::MaxVertexBuffers; + pPipelineInfo->numRsrcMapNodes += Pal::MaxVertexBuffers; // If uber-fetch shader is not enabled for early compile, the user data entries for uber-fetch shader const // buffer is appended at the bottom of user data table. Just following vertex buffer table. @@ -371,27 +371,27 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( { VK_ASSERT(pUserDataLayout->uberFetchConstBufRegBase == InvalidReg); - pUserDataLayout->uberFetchConstBufRegBase = pInfo->userDataRegCount; - pInfo->userDataRegCount += 1; - pPipelineInfo->numUserDataNodes += 1; - pPipelineInfo->numRsrcMapNodes += 1; + pUserDataLayout->uberFetchConstBufRegBase = pInfo->userDataRegCount; + pInfo->userDataRegCount += 1; + pPipelineInfo->numUserDataNodes += 1; + pPipelineInfo->numRsrcMapNodes += 1; } // Reserve an user-data to store the VA of buffer for transform feedback. if (ReserveXfbNode(pDevice)) { - pUserDataLayout->transformFeedbackRegBase = pInfo->userDataRegCount; - pUserDataLayout->transformFeedbackRegCount = 1; - pInfo->userDataRegCount += pUserDataLayout->transformFeedbackRegCount; - pPipelineInfo->numUserDataNodes += 1; + pUserDataLayout->transformFeedbackRegBase = pInfo->userDataRegCount; + pUserDataLayout->transformFeedbackRegCount = 1; + pInfo->userDataRegCount += pUserDataLayout->transformFeedbackRegCount; + pPipelineInfo->numUserDataNodes += 1; } if (pDevice->GetEnabledFeatures().enableDebugPrintf) { - pPipelineInfo->numUserDataNodes += 1; - pUserDataLayout->debugPrintfRegBase = pInfo->userDataRegCount; - pInfo->userDataRegCount += 1; - pPipelineInfo->numRsrcMapNodes += 1; + pPipelineInfo->numUserDataNodes += 1; + pUserDataLayout->debugPrintfRegBase = pInfo->userDataRegCount; + pInfo->userDataRegCount += 1; + pPipelineInfo->numRsrcMapNodes += 1; } // Allocate user data for the thread group reversal state @@ -402,29 +402,34 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( &pInfo->userDataRegCount, &pUserDataLayout->threadGroupReversalRegBase); + // Allocate user data for push constants + pPipelineInfo->numUserDataNodes += pushConstantsUserDataNodeCount; + + pCommonUserDataLayout->pushConstRegBase = pInfo->userDataRegCount; + pCommonUserDataLayout->pushConstRegCount = pushConstRegCount; + pInfo->userDataRegCount += pushConstRegCount; + #if VKI_RAY_TRACING if (HasRayTracing(pDevice, pIn)) { // Reserve one node for indirect RT capture replay. - pPipelineInfo->numUserDataNodes += 1; - pUserDataLayout->rtCaptureReplayConstBufRegBase = pInfo->userDataRegCount; - pInfo->userDataRegCount += InternalConstBufferRegCount; - - // Dispatch ray args - pInfo->userDataRegCount += MaxTraceRayUserDataRegCount; - pPipelineInfo->numUserDataNodes += MaxTraceRayUserDataNodeCount; - pPipelineInfo->numRsrcMapNodes += MaxTraceRayResourceNodeCount; - pPipelineInfo->hasRayTracing = true; + pPipelineInfo->numUserDataNodes += 1; + pCommonUserDataLayout->rtCaptureReplayConstBufRegBase = pInfo->userDataRegCount; + pInfo->userDataRegCount += InternalConstBufferRegCount; + + // NOTE: In certain Proton games, the dispatchRaysArgsPtrRegBase must be positioned carefully within the user + // data entry list. Experimental results indicate that these games work without a GPU hang when + // dispatchRaysArgsPtrRegBase is placed after the pushConst user data entry. The root cause of this behavior is + // currently unknown and may be due to a potential bug in Proton. Exercise caution when changing the location of + // dispatchRaysArgsPtrRegBase. + pCommonUserDataLayout->dispatchRaysArgsPtrRegBase = pInfo->userDataRegCount; + pInfo->userDataRegCount += MaxTraceRayUserDataRegCount; + pPipelineInfo->numUserDataNodes += MaxTraceRayUserDataNodeCount; + pPipelineInfo->numRsrcMapNodes += MaxTraceRayResourceNodeCount; + pPipelineInfo->hasRayTracing = true; } #endif - // Allocate user data for push constants - pPipelineInfo->numUserDataNodes += pushConstantsUserDataNodeCount; - - pCommonUserDataLayout->pushConstRegBase = pInfo->userDataRegCount; - pCommonUserDataLayout->pushConstRegCount = pushConstRegCount; - pInfo->userDataRegCount += pushConstRegCount; - // Populate user data layouts for each descriptor set that is active pUserDataLayout->setBindingRegBase = pInfo->userDataRegCount; @@ -656,16 +661,16 @@ VkResult PipelineLayout::BuildIndirectSchemeInfo( #if VKI_RAY_TRACING if (HasRayTracing(pDevice, pIn)) { - pUserDataLayout->dispatchRaysArgsPtrRegBase = pInfo->userDataRegCount; - pPipelineInfo->numUserDataNodes += MaxTraceRayUserDataNodeCount; - pPipelineInfo->numRsrcMapNodes += MaxTraceRayResourceNodeCount; - pInfo->userDataRegCount += MaxTraceRayUserDataRegCount; - pPipelineInfo->hasRayTracing = true; + pCommonUserDataLayout->dispatchRaysArgsPtrRegBase = pInfo->userDataRegCount; + pPipelineInfo->numUserDataNodes += MaxTraceRayUserDataNodeCount; + pPipelineInfo->numRsrcMapNodes += MaxTraceRayResourceNodeCount; + pInfo->userDataRegCount += MaxTraceRayUserDataRegCount; + pPipelineInfo->hasRayTracing = true; // Reserve one node for indirect RT capture replay. - pUserDataLayout->rtCaptureReplayConstBufRegBase = pInfo->userDataRegCount; - pPipelineInfo->numUserDataNodes += 1; - pInfo->userDataRegCount += InternalConstBufferRegCount; + pCommonUserDataLayout->rtCaptureReplayConstBufRegBase = pInfo->userDataRegCount; + pPipelineInfo->numUserDataNodes += 1; + pInfo->userDataRegCount += InternalConstBufferRegCount; } #endif @@ -1191,21 +1196,7 @@ uint32_t PipelineLayout::GetDispatchRaysUserData() const if (m_pipelineInfo.hasRayTracing) { - if (userDataLayout.scheme == PipelineLayoutScheme::Compact) - { - // The dispatch rays args is always the last entry - // TODO #raytracing: This means it spills first. Probably bad for perf. - dispatchRaysUserData = m_info.userDataRegCount; - } - else if (userDataLayout.scheme == PipelineLayoutScheme::Indirect) - { - dispatchRaysUserData = userDataLayout.indirect.dispatchRaysArgsPtrRegBase; - } - else - { - VK_NEVER_CALLED(); - dispatchRaysUserData = 0; - } + dispatchRaysUserData = userDataLayout.common.dispatchRaysArgsPtrRegBase; } return dispatchRaysUserData; @@ -1366,7 +1357,7 @@ VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( { BuildLlpcInternalConstantBufferMapping( stageMask, - userDataLayout.rtCaptureReplayConstBufRegBase, + commonUserDataLayout.rtCaptureReplayConstBufRegBase, Vkgc::RtCaptureReplayInternalBufferBinding, &pUserDataNodes[userDataNodeCount], &userDataNodeCount); @@ -1374,7 +1365,7 @@ VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( BuildLlpcRayTracingDispatchArgumentsMapping( stageMask, - m_info.userDataRegCount, + commonUserDataLayout.dispatchRaysArgsPtrRegBase, MaxTraceRayUserDataRegCount, &pUserDataNodes[userDataNodeCount], &userDataNodeCount, @@ -1677,7 +1668,7 @@ void PipelineLayout::BuildIndirectSchemeLlpcPipelineMapping( if (appendRtCaptureReplayCb) { - VK_ASSERT(rtCaptureReplayCbRegBase == userDataLayout.rtCaptureReplayConstBufRegBase); + VK_ASSERT(rtCaptureReplayCbRegBase == commonUserDataLayout.rtCaptureReplayConstBufRegBase); BuildLlpcInternalConstantBufferMapping( stageMask, rtCaptureReplayCbRegBase, diff --git a/icd/api/vk_queue.cpp b/icd/api/vk_queue.cpp index 291b6bae..3e941fb0 100644 --- a/icd/api/vk_queue.cpp +++ b/icd/api/vk_queue.cpp @@ -43,9 +43,7 @@ #include "include/vk_swapchain.h" #include "include/vk_utils.h" -#if ICD_GPUOPEN_DEVMODE_BUILD #include "devmode/devmode_mgr.h" -#endif #if VKI_RAY_TRACING #include "raytrace/ray_tracing_device.h" @@ -1089,13 +1087,10 @@ VkResult Queue::Submit( const SubmitInfoType* pSubmits, VkFence fence) { -#if ICD_GPUOPEN_DEVMODE_BUILD IDevMode* pDevMode = m_pDevice->VkInstance()->GetDevModeMgr(); bool timedQueueEvents = ((pDevMode != nullptr) && pDevMode->IsQueueTimingActive(m_pDevice)); -#else - bool timedQueueEvents = false; -#endif + Fence* pFence = Fence::ObjectFromHandle(fence); VirtualStackFrame virtStackFrame(m_pStackAllocator); @@ -1469,8 +1464,9 @@ VkResult Queue::Submit( pDevMode->RecordRenderOps(deviceIdx, this, drawCallCount, dispatchCallCount); } - Pal::IFence* iFence[2] = {nullptr, nullptr}; + Pal::IFence* iFence[2] = { nullptr, nullptr }; palSubmitInfo.ppFences = iFence; + palSubmitInfo.fenceCount = 0; #if VKI_RAY_TRACING if (pCpsMemFence != nullptr) @@ -1573,7 +1569,6 @@ VkResult Queue::Submit( } else { -#if ICD_GPUOPEN_DEVMODE_BUILD // TMZ is NOT supported for GPUOPEN path. VK_ASSERT((*pCommandBuffers[0])->IsProtected() == false); @@ -1584,9 +1579,6 @@ VkResult Queue::Submit( pCmdBuffers, palSubmitInfo, &virtStackFrame); -#else - VK_NEVER_CALLED(); -#endif } result = PalToVkResult(palResult); @@ -1696,14 +1688,10 @@ VkResult Queue::PalSignalSemaphores( const uint32_t semaphoreDeviceIndicesCount, const uint32_t* pSemaphoreDeviceIndices) { -#if ICD_GPUOPEN_DEVMODE_BUILD IDevMode* pDevMode = m_pDevice->VkInstance()->GetDevModeMgr(); bool timedQueueEvents = ((pDevMode != nullptr) && pDevMode->IsQueueTimingActive(m_pDevice)); -#else - bool timedQueueEvents = false; -#endif Pal::Result palResult = Pal::Result::Success; uint32_t deviceIdx = DefaultDeviceIndex; @@ -1744,14 +1732,8 @@ VkResult Queue::PalSignalSemaphores( } else { -#if ICD_GPUOPEN_DEVMODE_BUILD palResult = pDevMode->TimedSignalQueueSemaphore(deviceIdx, this, pSemaphores[i], pointValue, pPalSemaphore); -#else - VK_NEVER_CALLED(); - - palResult = Pal::Result::ErrorUnknown; -#endif } } } @@ -1773,14 +1755,10 @@ VkResult Queue::PalWaitSemaphores( Pal::Result palResult = Pal::Result::Success; uint32_t deviceIdx = DefaultDeviceIndex; -#if ICD_GPUOPEN_DEVMODE_BUILD IDevMode* pDevMode = m_pDevice->VkInstance()->GetDevModeMgr(); bool timedQueueEvents = ((pDevMode != nullptr) && pDevMode->IsQueueTimingActive(m_pDevice)); -#else - bool timedQueueEvents = false; -#endif for (uint32_t i = 0; (i < semaphoreCount) && (palResult == Pal::Result::Success); ++i) { @@ -1823,14 +1801,8 @@ VkResult Queue::PalWaitSemaphores( } else { -#if ICD_GPUOPEN_DEVMODE_BUILD palResult = pDevMode->TimedWaitQueueSemaphore(deviceIdx, this, pSemaphores[i], pointValue, pPalSemaphore); -#else - VK_NEVER_CALLED(); - - palResult = Pal::Result::ErrorUnknown; -#endif } } } @@ -1889,7 +1861,6 @@ VkResult Queue::Present( if (pPresentInfo == nullptr) { -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevice->VkInstance()->GetDevModeMgr() != nullptr) { m_pDevice->VkInstance()->GetDevModeMgr()->NotifyFrameEnd(this, @@ -1897,7 +1868,6 @@ VkResult Queue::Present( m_pDevice->VkInstance()->GetDevModeMgr()->NotifyFrameBegin(this, IDevMode::FrameDelimiterType::QueuePresent); } -#endif return VK_ERROR_INITIALIZATION_FAILED; } @@ -2048,13 +2018,11 @@ VkResult Queue::Present( m_pDevice->VkInstance()->PalPlatform()->UpdateFrameTraceController(pPresentQueue); // Notify gpuopen developer mode that we're about to present (frame-end boundary) -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevice->VkInstance()->GetDevModeMgr() != nullptr) { m_pDevice->VkInstance()->GetDevModeMgr()->NotifyFrameEnd(this, IDevMode::FrameDelimiterType::QueuePresent); } -#endif bool syncFlip = false; bool postFrameTimerSubmission = false; @@ -2092,13 +2060,11 @@ VkResult Queue::Present( pSwapChain->PostPresent(presentInfo, &palResult); // Notify gpuopen developer mode that a present occurred (frame-begin boundary) -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevice->VkInstance()->GetDevModeMgr() != nullptr) { m_pDevice->VkInstance()->GetDevModeMgr()->NotifyFrameBegin(this, IDevMode::FrameDelimiterType::QueuePresent); } -#endif VkResult curResult = PalToVkResult(palResult); @@ -2736,7 +2702,6 @@ void Queue::InsertDebugUtilsLabel( if (strcmp(pLabelInfo->pLabelName, settings.devModeEndFrameDebugUtilsLabel) == 0) { -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevice->VkInstance()->GetDevModeMgr() != nullptr) { m_pDevice->VkInstance()->GetDevModeMgr()->NotifyFrameEnd(this, IDevMode::FrameDelimiterType::QueueLabel); @@ -2749,18 +2714,15 @@ void Queue::InsertDebugUtilsLabel( VK_ASSERT(tempResult == VK_SUCCESS); } } -#endif } if (strcmp(pLabelInfo->pLabelName, settings.devModeStartFrameDebugUtilsLabel) == 0) { -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevice->VkInstance()->GetDevModeMgr() != nullptr) { m_pDevice->VkInstance()->GetDevModeMgr()->NotifyFrameBegin(this, IDevMode::FrameDelimiterType::QueueLabel); } -#endif } } @@ -2771,7 +2733,6 @@ void Queue::DevModeFrameBoundary( IDevMode* pDevMode, const VkFrameBoundaryEXT* pFrameBoundaryInfo) { -#if ICD_GPUOPEN_DEVMODE_BUILD if ((pDevMode != nullptr) && (pFrameBoundaryInfo != nullptr)) { @@ -2783,7 +2744,6 @@ void Queue::DevModeFrameBoundary( IDevMode::FrameDelimiterType::QueuePresent); } } -#endif } #if VKI_RAY_TRACING diff --git a/icd/api/vk_swapchain.cpp b/icd/api/vk_swapchain.cpp index 7b936c3b..046189f7 100644 --- a/icd/api/vk_swapchain.cpp +++ b/icd/api/vk_swapchain.cpp @@ -668,7 +668,7 @@ VkResult SwapChain::SetupAutoStereo( 3, userDataNodes, 0, - false, + ShaderWaveSize::WaveSizeAuto, nullptr, &m_pAutoStereoPipeline); @@ -963,7 +963,6 @@ bool SwapChain::BuildPostProcessingCommands( imageViewInfo[0].subresRange.startSubres.plane = 0; imageViewInfo[0].possibleLayouts.usages = Pal::LayoutShaderRead | Pal::LayoutShaderWrite; imageViewInfo[0].possibleLayouts.engines = Pal::ImageLayoutEngineFlags::LayoutUniversalEngine; - // Update array slice for right eye SRD imageViewInfo[1] = imageViewInfo[0]; imageViewInfo[1].subresRange.startSubres.arraySlice = 1; @@ -1009,7 +1008,7 @@ bool SwapChain::BuildPostProcessingCommands( dispatchDimensions.y = Util::RoundUpToMultiple(imageCreateInfo.extent.width, workGroupSize[1]) / workGroupSize[1]; dispatchDimensions.z = 1; - pCmdBuf->CmdDispatch(dispatchDimensions); + pCmdBuf->CmdDispatch(dispatchDimensions, {}); Pal::AcquireReleaseInfo acquireRelInfo = {}; diff --git a/icd/imported/gputexdecoder/gpuTexDecoder.cpp b/icd/imported/gputexdecoder/gpuTexDecoder.cpp index 0c4d95c5..40265842 100755 --- a/icd/imported/gputexdecoder/gpuTexDecoder.cpp +++ b/icd/imported/gputexdecoder/gpuTexDecoder.cpp @@ -515,7 +515,7 @@ Pal::Result Device::GpuDecodeImage( const uint32 threadGroupsZ = Util::Max(pPalImageRegions[idx].extent.depth, pPalImageRegions[idx].numSlices); - m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, threadGroupsY, threadGroupsZ }); + m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, threadGroupsY, threadGroupsZ }, {}); } } else if ((type == InternalTexConvertCsType::ConvertETC2ToRGBA8) || @@ -589,7 +589,7 @@ Pal::Result Device::GpuDecodeImage( const uint32 threadGroupsZ = Util::Max(pPalImageRegions[idx].extent.depth, pPalImageRegions[idx].numSlices); - m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, threadGroupsY, threadGroupsZ }); + m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, threadGroupsY, threadGroupsZ }, {}); } } else @@ -641,7 +641,7 @@ Pal::Result Device::GpuDecodeImage( uint32 height = pPalImageRegions[idx].extent.height * 4; const uint32 threadGroupsX = (width * height + 63) / 64; - m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, 1, 1 }); + m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, 1, 1 }, {}); } } @@ -743,7 +743,7 @@ Pal::Result Device::GpuDecodeBuffer( const uint32 threadGroupsZ = Util::Max(pPalBufferRegionsIn[idx].imageExtent.depth, pPalBufferRegionsIn[idx].numSlices); - m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, threadGroupsY, threadGroupsZ }); + m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, threadGroupsY, threadGroupsZ }, {}); } } else @@ -843,7 +843,7 @@ Pal::Result Device::GpuDecodeBuffer( const uint32 threadGroupsZ = Util::Max(pPalBufferRegionsIn[idx].imageExtent.depth, pPalBufferRegionsIn[idx].numSlices); - m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, threadGroupsY, threadGroupsZ }); + m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, threadGroupsY, threadGroupsZ }, {}); } } diff --git a/icd/res/ver.h b/icd/res/ver.h index 52eb7fb5..911aa4c2 100644 --- a/icd/res/ver.h +++ b/icd/res/ver.h @@ -36,7 +36,7 @@ #define VERSION_MAJOR_STR MAKE_VERSION_STRING(VULKAN_ICD_MAJOR_VERSION) "\0" // Bump up after each promotion to mainline -#define VULKAN_ICD_BUILD_VERSION 325 +#define VULKAN_ICD_BUILD_VERSION 328 // String version is needed with leading zeros and extra termination (unicode) #define VERSION_NUMBER_MINOR VULKAN_ICD_BUILD_VERSION @@ -45,11 +45,11 @@ // These values specify the driver ID and driver info string #define VULKAN_DRIVER_ID VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR // "AMDOPEN" #define VULKAN_DRIVER_NAME_STR "AMD open-source driver" -#define VULKAN_DRIVER_INFO_STR "2024.Q4.1" +#define VULKAN_DRIVER_INFO_STR "2024.Q4.2" #define VULKAN_DRIVER_INFO_STR_LLPC "(LLPC)" // These values tell which version of the conformance test the driver is compliant against #define CTS_VERSION_MAJOR 1 #define CTS_VERSION_MINOR 3 -#define CTS_VERSION_SUBMINOR 5 +#define CTS_VERSION_SUBMINOR 9 #define CTS_VERSION_PATCH 2 diff --git a/icd/settings/settings.cpp b/icd/settings/settings.cpp index 0c663cf9..66211c4d 100644 --- a/icd/settings/settings.cpp +++ b/icd/settings/settings.cpp @@ -232,7 +232,7 @@ void VulkanSettingsLoader::OverrideDefaultsExperimentInfo() VK_SET_VAL_IF_EXPERIMENT_ENABLED(AmdVendorExtensions, disableAmdVendorExtensions, true); - VK_SET_VAL_IF_EXPERIMENT_ENABLED(ComputeQueueSupport, asyncComputeQueueLimit, 0); + VK_SET_VAL_IF_EXPERIMENT_ENABLED(ComputeQueueSupport, forceComputeQueueCount, 0); if (pExpSettings->expBarrierOptimizations.ValueOr(false)) { @@ -304,7 +304,7 @@ void VulkanSettingsLoader::OverrideDefaultsExperimentInfo() m_settings.allowExternalPipelineCacheObject = false; } - VK_SET_VAL_IF_EXPERIMENT_ENABLED(TextureColorCompression, forceEnableDcc, ForceDisableCompressionForColor); + VK_SET_VAL_IF_EXPERIMENT_ENABLED(TextureColorCompression, forceDisableCompression, DisableCompressionForColor); PAL_SET_VAL_IF_EXPERIMENT_ENABLED(ZeroUnboundDescriptors, zeroUnboundDescDebugSrd, true); @@ -348,7 +348,7 @@ void VulkanSettingsLoader::FinalizeExperiments() pExpSettings->expAmdVendorExtensions = m_settings.disableAmdVendorExtensions; - pExpSettings->expComputeQueueSupport = (m_settings.asyncComputeQueueLimit == 0); + pExpSettings->expComputeQueueSupport = (m_settings.forceComputeQueueCount == 0); pExpSettings->expBarrierOptimizations = ((pPalSettings->pwsMode == Pal::PwsMode::Disabled) && (m_settings.useAcquireReleaseInterface == false)); @@ -371,7 +371,7 @@ void VulkanSettingsLoader::FinalizeExperiments() pExpSettings->expRayTracingPipelineCompilationMode = (m_settings.rtCompileMode == RtCompileModeIndirect); #endif - pExpSettings->expTextureColorCompression = m_settings.forceEnableDcc == ForceDisableCompressionForColor; + pExpSettings->expTextureColorCompression = m_settings.forceDisableCompression == DisableCompressionForColor; pExpSettings->expZeroUnboundDescriptors = pPalSettings->zeroUnboundDescDebugSrd; @@ -484,7 +484,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.nggCompactVertex = false; } - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + else if (IsGfx11(pInfo->gfxLevel)) { // Enable NGG compactionless mode for Navi3x m_settings.nggCompactVertex = false; @@ -528,12 +528,14 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.optColorTargetUsageDoesNotContainResolveLayout = true; m_settings.barrierFilterOptions = SkipStrayExecutionDependencies | - SkipImageLayoutUndefined | - SkipDuplicateResourceBarriers; + SkipImageLayoutUndefined | + SkipDuplicateResourceBarriers; m_settings.modifyResourceKeyForAppProfile = true; m_settings.forceImageSharingMode = ForceImageSharingMode::ForceImageSharingModeExclusive; + m_settings.asyncComputeQueueMaxWavesPerCu = 20; + // id games are known to query instance-level functions with vkGetDeviceProcAddr illegally thus we // can't do any better than returning a non-null function pointer for them. m_settings.lenientInstanceFuncQuery = true; @@ -595,7 +597,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( { } } - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + else if (IsGfx11(pInfo->gfxLevel)) { if (pInfo->revision == Pal::AsicRevision::Navi31) { @@ -615,25 +617,26 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.optColorTargetUsageDoesNotContainResolveLayout = true; m_settings.barrierFilterOptions = SkipStrayExecutionDependencies | - SkipImageLayoutUndefined; + SkipImageLayoutUndefined; m_settings.modifyResourceKeyForAppProfile = true; m_settings.forceImageSharingMode = ForceImageSharingMode::ForceImageSharingModeExclusive; - m_settings.asyncComputeQueueLimit = 1; + m_settings.forceComputeQueueCount = 1; + + m_settings.asyncComputeQueueMaxWavesPerCu = 20; // id games are known to query instance-level functions with vkGetDeviceProcAddr illegally thus we // can't do any better than returning a non-null function pointer for them. m_settings.lenientInstanceFuncQuery = true; } - if (((appProfile == AppProfile::WolfensteinII) || - (appProfile == AppProfile::WolfensteinYoungblood) || - (appProfile == AppProfile::Doom)) && + if (((appProfile == AppProfile::WolfensteinII) || + (appProfile == AppProfile::WolfensteinYoungblood) || + (appProfile == AppProfile::Doom)) && ((pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) || - (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3))) + (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3))) { - m_settings.asyncComputeQueueMaxWavesPerCu = 20; m_settings.nggSubgroupSizing = NggSubgroupExplicit; m_settings.nggVertsPerSubgroup = 254; m_settings.nggPrimsPerSubgroup = 128; @@ -704,7 +707,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( } } - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + if (IsGfx11(pInfo->gfxLevel)) { m_settings.resourceBarrierOptions &= ~ResourceBarrierOptions::SkipDstCacheInv; } @@ -780,7 +783,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( } } - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + if (IsGfx11(pInfo->gfxLevel)) { m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrPolicy::MallNoAllocSsrAsSnsr; m_settings.ac01WaNotNeeded = true; @@ -970,8 +973,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( ForceDccForColorAttachments | ForceDccFor32BppShaderStorage); } - - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + else if (IsGfx11(pInfo->gfxLevel)) { if (pInfo->revision == Pal::AsicRevision::Navi31) { @@ -1086,7 +1088,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( if (appProfile == AppProfile::RainbowSixExtraction) { - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + if (IsGfx11(pInfo->gfxLevel)) { if (pInfo->revision == Pal::AsicRevision::Navi31) { @@ -1162,7 +1164,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.fsWaveSize = 64; } } - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + else if (IsGfx11(pInfo->gfxLevel)) { m_settings.pipelineBinningMode = PipelineBinningModeDisable; m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; @@ -1202,7 +1204,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( ForceDccForColorAttachments | ForceDccFor64BppShaderStorage); - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + if (IsGfx11(pInfo->gfxLevel)) { m_settings.forceEnableDcc |= ForceDccForNonColorAttachmentShaderStorage; } @@ -1273,7 +1275,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( } } - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + if (IsGfx11(pInfo->gfxLevel)) { } } @@ -1331,6 +1333,9 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.rtUnifiedVgprLimit = 64; } } + + // Turn off FP16 for this application to fix 5% perf drop + m_settings.rtFp16BoxNodesInBlasMode = Fp16BoxNodesInBlasMode::Fp16BoxNodesInBlasModeNone; } #endif @@ -1356,6 +1361,8 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.alwaysReportHdrFormats = true; + m_settings.asyncComputeQueueMaxWavesPerCu = 20; + if (pInfo->gpuType == Pal::GpuType::Discrete) { m_settings.cmdAllocatorDataHeap = Pal::GpuHeapLocal; @@ -1389,15 +1396,12 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( // triangle culling and there are no options in the game to turn it off making NGG somewhat redundant. m_settings.enableNgg = false; - m_settings.asyncComputeQueueMaxWavesPerCu = 20; - m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; m_settings.csWaveSize = 64; } else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { - m_settings.asyncComputeQueueMaxWavesPerCu = 20; m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; if (pInfo->revision != Pal::AsicRevision::Navi21) @@ -1408,7 +1412,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.csWaveSize = 64; } - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + else if (IsGfx11(pInfo->gfxLevel)) { // Navi31 Mall and Tiling Settings if ((pInfo->revision == Pal::AsicRevision::Navi31) || (pInfo->revision == Pal::AsicRevision::Navi32)) @@ -1436,7 +1440,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( if ((appProfile == AppProfile::DxvkHaloInfiniteLauncher) || (appProfile == AppProfile::DxvkTf2) -#ifndef ICD_X64_BUILD +#ifndef VKI_X64_BUILD || (appProfile == AppProfile::DXVK) #endif ) @@ -1569,7 +1573,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; } } - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + else if (IsGfx11(pInfo->gfxLevel)) { if (pInfo->revision == Pal::AsicRevision::Navi31) { @@ -1663,11 +1667,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( { OverrideVkd3dCommonSettings(&m_settings); - if ((pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) -#if VKI_BUILD_GFX115 - || (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_5) -#endif - ) + if (IsGfx11(pInfo->gfxLevel)) { m_settings.fsWaveSize = 32; } @@ -1695,6 +1695,12 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.memoryDeviceOverallocationAllowed = true; } + if (appProfile == AppProfile::Blender) + { + m_settings.memoryDeviceOverallocationAllowed = true; + m_settings.syncPreviousDrawForTransferStage = true; + } + if (appProfile == AppProfile::SevenDaysToDie) { m_settings.disableDisplayDcc = DisplayableDcc::DisplayableDccDisabled; @@ -1762,7 +1768,7 @@ VkResult VulkanSettingsLoader::ProcessSettings( *pAppProfile = static_cast(m_settings.forceAppProfileValue); } -#if ICD_X86_BUILD +#if VKI_X86_BUILD if (m_settings.shaderCacheMode == ShaderCacheEnableRuntimeOnly) { m_settings.shaderCacheMode = ShaderCacheDisable; @@ -1966,10 +1972,10 @@ void VulkanSettingsLoader::UpdatePalSettings() switch (m_settings.disableBinningPsKill) { - case DisableBinningPsKillEnable: + case DisableBinningPsKillTrue: pPalSettings->disableBinningPsKill = Pal::OverrideMode::Enabled; break; - case DisableBinningPsKillDisable: + case DisableBinningPsKillFalse: pPalSettings->disableBinningPsKill = Pal::OverrideMode::Disabled; break; case DisableBinningPsKillDefault: diff --git a/icd/settings/settings_xgl.json b/icd/settings/settings_xgl.json index b50e462d..ae52cc05 100644 --- a/icd/settings/settings_xgl.json +++ b/icd/settings/settings_xgl.json @@ -885,6 +885,18 @@ "Scope": "Driver", "Type": "bool" }, + { + "Name": "SyncPreviousDrawForTransferStage", + "Description": "Whether sync the previous draw for pipeline transfer stage barrier", + "Tags": [ + "Pipeline Options" + ], + "Defaults": { + "Default": false + }, + "Scope": "Driver", + "Type": "bool" + }, { "Name": "PipelineBinningMode", "Description": "Specifies whether to override binning setting for pipeline.", @@ -930,19 +942,19 @@ "IsEnum": true, "Values": [ { - "Name": "DisableBinningPsKillDisable", + "Name": "DisableBinningPsKillDefault", "Value": 0, - "Description": "Enable Binning." + "Description": "Default PAL values" }, { - "Name": "DisableBinningPsKillEnable", + "Name": "DisableBinningPsKillFalse", "Value": 1, - "Description": "Disable Binning" + "Description": "Enable Binning." }, { - "Name": "DisableBinningPsKillDefault", + "Name": "DisableBinningPsKillTrue", "Value": 2, - "Description": "Default PAL values" + "Description": "Disable Binning" } ], "Name": "DisableBinningPsKill" @@ -3095,7 +3107,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": "Fp16BoxNodesInBlasModeNone" + "Default": "Fp16BoxNodesInBlasModeMixed" }, "ValidValues": { "IsEnum": true, @@ -3166,7 +3178,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": 0.0 + "Default": 1.1 }, "Type": "float", "Name": "RtFp16BoxNodesInBlasModeMixedThreshold", @@ -3277,7 +3289,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": 0 + "Default": 4 }, "Type": "uint32", "Name": "RtTriangleSplittingBudgetPerTriangle", @@ -3382,7 +3394,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": false + "Default": true }, "Type": "bool", "Name": "EnableVariableBitsMortonCodes", @@ -3398,7 +3410,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": false + "Default": true }, "Type": "bool", "Scope": "Driver" @@ -3742,7 +3754,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": true + "Default": false }, "Type": "bool", "Name": "EnablePairCompressionCostCheck", @@ -4081,7 +4093,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": false + "Default": true }, "Type": "bool", "Name": "RtEnableFastLBVH", @@ -4102,21 +4114,6 @@ "Name": "BuildParallelWavesPerSimd", "Scope": "Driver" }, - { - "Name": "RtEnableAcquireReleaseInterface", - "Description": "Enable Acquire/release-based barrier interface if PAL reports the ASIC supports it.", - "Tags": [ - "Ray Tracing" - ], - "BuildTypes": [ - "VKI_RAY_TRACING" - ], - "Defaults": { - "Default": false - }, - "Type": "bool", - "Scope": "Driver" - }, { "Name": "EnableFusedInstanceNode", "Description": "Enable fused instance node for BVH builder", @@ -4144,6 +4141,18 @@ "Default": 32 } }, + { + "Name": "RtPersistentDispatchRaysFactor", + "Type": "float", + "Description": "Controls the number of groups launched for a persistent DispatchRays or 0.0 to disable persistent launch", + "Scope": "Driver", + "Tags": [ + "Ray Tracing" + ], + "Defaults": { + "Default": 0.0 + } + }, { "Name": "RtEnableBuildAccelStructStats", "Description": "Dump built acceleration stats. (Pending implementation)", @@ -5401,7 +5410,7 @@ { "Description": "Enable pair compression in early build stage, i.e., During Encode phase.", "Tags": [ - "RayTracing" + "Ray Tracing" ], "BuildTypes": [ "VKI_RAY_TRACING" @@ -5416,7 +5425,7 @@ { "Description": "Triangle pair search radius during EarlyPairCompression.", "Tags": [ - "RayTracing" + "Ray Tracing" ], "BuildTypes": [ "VKI_RAY_TRACING" @@ -5580,16 +5589,25 @@ "Type": "uint32" }, { - "Name": "AsyncComputeQueueLimit", - "Description": "Limit the number of async compute queues that are reported.", + "Name": "ForceGraphicsQueueCount", + "Description": "Sets the number of graphics/universal queues reported by the driver. If set to UINT32_MAX, this setting will not be used. The maximum allowable queue count is currently 8, as defined by 'MaxQueuesPerFamily' in the driver.", "Tags": [ "General" ], "Defaults": { "Default": 4294967295 }, - "Flags": { - "IsHex": true + "Scope": "Driver", + "Type": "uint32" + }, + { + "Name": "ForceComputeQueueCount", + "Description": "Sets the number of compute queues reported by the driver. If set to UINT32_MAX, this setting will not be used. The maximum allowable queue count is currently 8, as defined by 'MaxQueuesPerFamily' in the driver.", + "Tags": [ + "General" + ], + "Defaults": { + "Default": 4294967295 }, "Scope": "Driver", "Type": "uint32" @@ -7050,7 +7068,7 @@ }, { "Name": "ForceEnableDcc", - "Description": "If not default, force enables/disables compression on the basis of resource and/or BPP. NOTE: To force enable shader storage DCC, at least one of 2D/3D and one of CA/non-CA need to be set", + "Description": "If not default, force enables compression on the basis of resource and/or BPP. NOTE: To force enable shader storage DCC, at least one of 2D/3D and one of CA/non-CA need to be set", "Tags": [ "Optimization" ], @@ -7094,29 +7112,51 @@ "Name": "ForceDccFor64BppShaderStorage", "Value": 32, "Description": "Force enable DCC for shader storage resources with 64 BPP or deeper." - }, + } + ], + "Name": "ForceEnableDcc" + }, + "Flags": { + "IsHex": true, + "IsBitmask": true + }, + "Scope": "Driver", + "Type": "uint32" + }, + { + "Name": "ForceDisableCompression", + "Description": "If not default, force disables metadata and compression for appropriate resource types", + "Tags": [ + "Optimization" + ], + "Defaults": { + "Default": "DisableCompressionDefault" + }, + "ValidValues": { + "Name": "ForceDisableCompression", + "IsEnum": true, + "Values": [ { - "Name": "ForceDisableCompression", - "Value": 64, - "Description": "Force disable compression for every resource irrespective of PAL heuristics." + "Name": "DisableCompressionDefault", + "Value": 0, + "Description": "Don't force anything. Let PAL heuristics decide what's best." }, { - "Name": "ForceDisableCompressionForSharedImages", - "Value": 128, + "Name": "DisableCompressionForSharedImages", + "Value": 1, "Description": "Force disable compression for externally sharable resources." }, { - "Name": "ForceDisableCompressionForColor", - "Value": 256, + "Name": "DisableCompressionForColor", + "Value": 2, "Description": "Force disable compression for all color format images." }, { - "Name": "ForceDisableCompressionForDepthStencil", - "Value": 512, + "Name": "DisableCompressionForDepthStencil", + "Value": 4, "Description": "Force disable compression for all depth and stencil format images." } - ], - "Name": "ForceEnableDcc" + ] }, "Flags": { "IsHex": true, diff --git a/icd/tools/generate/genShaderProfile.py b/icd/tools/generate/genShaderProfile.py index 325ffb3a..d26e88db 100644 --- a/icd/tools/generate/genShaderProfile.py +++ b/icd/tools/generate/genShaderProfile.py @@ -52,7 +52,7 @@ FUN_DEC_CLASS_SHADER_PROFILE_PUBLIC, FUNC_DEC_PARSE_JSON_PROFILE, FUNC_DEC_BUILD_APP_PROFILE_LLPC, \ BUILD_APP_PROFILE_LLPC_FUNC, JSON_WRITER_GENERIC_DEF, JSON_READER_GENERIC_DEF, NAMESPACE_VK, CPP_INCLUDE, \ CopyrightAndWarning, CONDITION_DYNAMIC_SHADER_INFO_APPLY, CLASS_TEMPLATE, ShaderTuningStructsAndVars, \ - HEADER_INCLUDES, PARSE_DWORD_ARRAY_FUNC, CONDITION_SHADER_CREATE_TUNING_OPTION_FLAGS + HEADER_INCLUDES, PARSE_DWORD_ARRAY_FUNC, CONDITION_SHADER_CREATE_TUNING_OPTION_FLAGS, CONDITION_GFX_IP_11 OUTPUT_FILE = "g_shader_profile" CONFIG_FILE_NAME = "profile.json" @@ -341,6 +341,8 @@ def gen_profile(input_json, compiler): if not success: raise ValueError("JSON parsing failed") action_result, cpp_action = parse_json_profile_entry_action(action) + if not action_result['success']: + raise ValueError("JSON parsing failed") for branch, result in action_result.items(): if result: result_ret[branch] = True @@ -1186,7 +1188,11 @@ def main(): if_gfxip_body = indent(if_asic_group_dict[title] + if_asic_generic_dict[title]) else: if_gfxip_body = indent(if_asic_group_dict[title]) - if_gfxip = CONDITION_GFX_IP.replace("%Gfxip%", gfxip[0].upper() + gfxip[1:]) + if gfxip == "gfxIp11_0": + # Use the IsGfx11 method instead of the explicit CONDITION_GFX_IP. + if_gfxip = CONDITION_GFX_IP_11 + else: + if_gfxip = CONDITION_GFX_IP.replace("%Gfxip%", gfxip[0].upper() + gfxip[1:]) if_gfxip = if_gfxip.replace("%Defs%", if_gfxip_body) if gfxip in BuildTypesTemplate: if_gfxip = wrap_with_directive(if_gfxip, BuildTypesTemplate[gfxip]) diff --git a/icd/tools/generate/shaderProfileTemplate.py b/icd/tools/generate/shaderProfileTemplate.py index c2bab504..b2da7372 100644 --- a/icd/tools/generate/shaderProfileTemplate.py +++ b/icd/tools/generate/shaderProfileTemplate.py @@ -417,6 +417,12 @@ class JsonOutputStream; } """ +CONDITION_GFX_IP_11 = """if (IsGfx11(gfxIpLevel)) +{ +%Defs%\ +} +""" + CONDITION_ASIC = """if (asicRevision == Pal::AsicRevision::%Asic%) { SetAppProfile%FuncName%(pPipelineProfile);