From 4a564e244726381ef8aa7e50dcba6e2ad183072e Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Wed, 1 Mar 2023 15:36:57 +0100 Subject: [PATCH 1/4] Latte/Vulkan: Set shader rounding mode via VK_KHR_SHADER_FLOAT_CONTROLS --- src/Cafe/HW/Latte/Core/LatteShader.cpp | 44 ++++++++++++---- src/Cafe/HW/Latte/Core/LatteShader.h | 1 + src/Cafe/HW/Latte/Core/LatteShaderCache.cpp | 18 +++++-- .../LatteDecompiler.cpp | 34 +++++------- .../LegacyShaderDecompiler/LatteDecompiler.h | 19 +++++-- .../LatteDecompilerEmitGLSLHeader.hpp | 5 ++ .../LatteDecompilerInternal.h | 8 ++- .../Renderer/Vulkan/RendererShaderVk.cpp | 14 ++--- .../Latte/Renderer/Vulkan/VulkanRenderer.cpp | 52 ++++++++++++++----- .../HW/Latte/Renderer/Vulkan/VulkanRenderer.h | 20 ++++--- .../Renderer/Vulkan/VulkanRendererCore.cpp | 2 +- 11 files changed, 142 insertions(+), 75 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index 4ac6c1fea..ccb4c282c 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -1,20 +1,25 @@ #include "Cafe/HW/Latte/Core/LatteConst.h" #include "Cafe/HW/Latte/Core/LatteShaderAssembly.h" #include "Cafe/HW/Latte/ISA/RegDefines.h" -#include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency #include "Cafe/HW/Latte/ISA/LatteReg.h" #include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h" +#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h" +#include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency #include "Cafe/GraphicPack/GraphicPack2.h" #include "util/helpers/StringParser.h" #include "config/ActiveSettings.h" -#include "util/Zir/EmitterGLSL/ZpIREmitGLSL.h" -#include "util/Zir/Core/ZpIRDebug.h" #include "util/containers/flat_hash_map.hpp" #include +// experimental new decompiler (WIP) +#include "util/Zir/EmitterGLSL/ZpIREmitGLSL.h" +#include "util/Zir/Core/ZpIRDebug.h" +#include "Cafe/HW/Latte/Transcompiler/LatteTC.h" +#include "Cafe/HW/Latte/ShaderInfo/ShaderInfo.h" + struct _ShaderHashCache { uint64 prevHash1; @@ -672,10 +677,18 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi return shader; } -#include "Cafe/HW/Latte/Transcompiler/LatteTC.h" -#include "Cafe/HW/Latte/ShaderInfo/ShaderInfo.h" +void LatteShader_GetDecompilerOptions(LatteDecompilerOptions& options, LatteConst::ShaderType shaderType, bool geometryShaderEnabled) +{ + options.usesGeometryShader = geometryShaderEnabled; + options.spirvInstrinsics.hasRoundingModeRTEFloat32 = false; + if (g_renderer->GetType() == RendererAPI::Vulkan) + { + options.useTFViaSSBO = VulkanRenderer::GetInstance()->UseTFViaSSBO(); + options.spirvInstrinsics.hasRoundingModeRTEFloat32 = VulkanRenderer::GetInstance()->HasSPRIVRoundingModeRTE32(); + } +} -LatteDecompilerShader* LatteShader_compileSeparableVertexShader(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader) +LatteDecompilerShader* LatteShader_CompileSeparableVertexShader2(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader) { /* Analyze shader to gather general information about inputs/outputs */ Latte::ShaderDescription shaderDescription; @@ -725,14 +738,17 @@ LatteDecompilerShader* LatteShader_compileSeparableVertexShader(uint64 baseHash, // compile new vertex shader (relies partially on current state) LatteDecompilerShader* LatteShader_CompileSeparableVertexShader(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader) { - // new decompiler - //LatteShader_compileSeparableVertexShader(baseHash, vsAuxHash, vertexShaderPtr, vertexShaderSize, usesGeometryShader, fetchShader); + // new decompiler test + //LatteShader_CompileSeparableVertexShader2(baseHash, vsAuxHash, vertexShaderPtr, vertexShaderSize, usesGeometryShader, fetchShader); // legacy decompiler + LatteDecompilerOptions options; + LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Vertex, usesGeometryShader); + LatteDecompilerOutput_t decompilerOutput{}; LatteFetchShader* fetchShaderList[1]; fetchShaderList[0] = fetchShader; - LatteDecompiler_DecompileVertexShader(_shaderBaseHash_vs, LatteGPUState.contextRegister, vertexShaderPtr, vertexShaderSize, fetchShaderList, 1, LatteGPUState.contextNew.GetSpecialStateValues(), usesGeometryShader, &decompilerOutput); + LatteDecompiler_DecompileVertexShader(_shaderBaseHash_vs, LatteGPUState.contextRegister, vertexShaderPtr, vertexShaderSize, fetchShaderList, 1, LatteGPUState.contextNew.GetSpecialStateValues(), options, &decompilerOutput); LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister); vsAuxHash = vertexShader->auxHash; if (vertexShader->hasError == false) @@ -759,10 +775,13 @@ LatteDecompilerShader* LatteShader_CompileSeparableVertexShader(uint64 baseHash, LatteDecompilerShader* LatteShader_CompileSeparableGeometryShader(uint64 baseHash, uint8* geometryShaderPtr, uint32 geometryShaderSize, uint8* geometryCopyShader, uint32 geometryCopyShaderSize) { + LatteDecompilerOptions options; + LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Geometry, true); + LatteDecompilerOutput_t decompilerOutput{}; LatteFetchShader* fetchShaderList[1]; fetchShaderList[0] = _activeFetchShader; - LatteDecompiler_DecompileGeometryShader(_shaderBaseHash_gs, LatteGPUState.contextRegister, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), _activeVertexShader->ringParameterCount, &decompilerOutput); + LatteDecompiler_DecompileGeometryShader(_shaderBaseHash_gs, LatteGPUState.contextRegister, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), _activeVertexShader->ringParameterCount, options, &decompilerOutput); LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister); if (geometryShader->hasError == false) { @@ -787,8 +806,11 @@ LatteDecompilerShader* LatteShader_CompileSeparableGeometryShader(uint64 baseHas LatteDecompilerShader* LatteShader_CompileSeparablePixelShader(uint64 baseHash, uint64& psAuxHash, uint8* pixelShaderPtr, uint32 pixelShaderSize, bool usesGeometryShader) { + LatteDecompilerOptions options; + LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Pixel, usesGeometryShader); + LatteDecompilerOutput_t decompilerOutput{}; - LatteDecompiler_DecompilePixelShader(baseHash, LatteGPUState.contextRegister, pixelShaderPtr, pixelShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), usesGeometryShader, &decompilerOutput); + LatteDecompiler_DecompilePixelShader(baseHash, LatteGPUState.contextRegister, pixelShaderPtr, pixelShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), options, &decompilerOutput); LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister); psAuxHash = pixelShader->auxHash; LatteShader_DumpShader(_shaderBaseHash_ps, psAuxHash, pixelShader); diff --git a/src/Cafe/HW/Latte/Core/LatteShader.h b/src/Cafe/HW/Latte/Core/LatteShader.h index 0fba0322d..eb623d85a 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.h +++ b/src/Cafe/HW/Latte/Core/LatteShader.h @@ -94,6 +94,7 @@ extern uint64 _shaderBaseHash_vs; extern uint64 _shaderBaseHash_gs; extern uint64 _shaderBaseHash_ps; +void LatteShader_GetDecompilerOptions(struct LatteDecompilerOptions& options, LatteConst::ShaderType shaderType, bool geometryShaderEnabled); LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompilerOutput_t& decompilerOutput, uint64 baseHash, bool calculateAuxHash, uint64 optionalAuxHash, uint32* contextRegister); void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compileAsync); diff --git a/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp b/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp index 07a0d461a..9a6c3f84b 100644 --- a/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp @@ -641,16 +641,19 @@ bool LatteShaderCache_readSeparableVertexShader(MemStreamReader& streamReader, u return false; if (streamReader.hasError() || !streamReader.isEndOfStream()) return false; - // update PS inputs (influence VS shader outputs) + // update PS inputs (affects VS shader outputs) LatteShader_UpdatePSInputs(lcr->GetRawView()); // get fetch shader LatteFetchShader::CacheHash fsHash = LatteFetchShader::CalculateCacheHash((uint32*)fetchShaderData.data(), fetchShaderData.size()); LatteFetchShader* fetchShader = LatteShaderRecompiler_createFetchShader(fsHash, lcr->GetRawView(), (uint32*)fetchShaderData.data(), fetchShaderData.size()); + // determine decompiler options + LatteDecompilerOptions options; + LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Vertex, usesGeometryShader); // decompile vertex shader LatteDecompilerOutput_t decompilerOutput{}; LatteFetchShader* fetchShaderList[1]; fetchShaderList[0] = fetchShader; - LatteDecompiler_DecompileVertexShader(shaderBaseHash, lcr->GetRawView(), vertexShaderData.data(), vertexShaderData.size(), fetchShaderList, 1, lcr->GetSpecialStateValues(), usesGeometryShader, &decompilerOutput); + LatteDecompiler_DecompileVertexShader(shaderBaseHash, lcr->GetRawView(), vertexShaderData.data(), vertexShaderData.size(), fetchShaderList, 1, lcr->GetSpecialStateValues(), options, &decompilerOutput); LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView()); // compile LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, vertexShader); @@ -688,15 +691,17 @@ bool LatteShaderCache_readSeparableGeometryShader(MemStreamReader& streamReader, return false; // update PS inputs LatteShader_UpdatePSInputs(lcr->GetRawView()); + // determine decompiler options + LatteDecompilerOptions options; + LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Geometry, true); // decompile geometry shader LatteDecompilerOutput_t decompilerOutput{}; - LatteDecompiler_DecompileGeometryShader(shaderBaseHash, lcr->GetRawView(), geometryShaderData.data(), geometryShaderData.size(), geometryCopyShaderData.data(), geometryCopyShaderData.size(), lcr->GetSpecialStateValues(), vsRingParameterCount, &decompilerOutput); + LatteDecompiler_DecompileGeometryShader(shaderBaseHash, lcr->GetRawView(), geometryShaderData.data(), geometryShaderData.size(), geometryCopyShaderData.data(), geometryCopyShaderData.size(), lcr->GetSpecialStateValues(), vsRingParameterCount, options, &decompilerOutput); LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView()); // compile LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, geometryShader); LatteShader_DumpRawShader(shaderBaseHash, shaderAuxHash, SHADER_DUMP_TYPE_GEOMETRY, geometryShaderData.data(), geometryShaderData.size()); LatteShaderCache_loadOrCompileSeparableShader(geometryShader, shaderBaseHash, shaderAuxHash); - catchOpenGLError(); LatteSHRC_RegisterShader(geometryShader, shaderBaseHash, shaderAuxHash); return true; } @@ -724,9 +729,12 @@ bool LatteShaderCache_readSeparablePixelShader(MemStreamReader& streamReader, ui return false; // update PS inputs LatteShader_UpdatePSInputs(lcr->GetRawView()); + // determine decompiler options + LatteDecompilerOptions options; + LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Pixel, usesGeometryShader); // decompile pixel shader LatteDecompilerOutput_t decompilerOutput{}; - LatteDecompiler_DecompilePixelShader(shaderBaseHash, lcr->GetRawView(), pixelShaderData.data(), pixelShaderData.size(), lcr->GetSpecialStateValues(), usesGeometryShader, &decompilerOutput); + LatteDecompiler_DecompilePixelShader(shaderBaseHash, lcr->GetRawView(), pixelShaderData.data(), pixelShaderData.size(), lcr->GetSpecialStateValues(), options, &decompilerOutput); LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView()); // compile LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, pixelShader); diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp index 919c3094d..ee7e14bd6 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp @@ -1066,35 +1066,34 @@ void _LatteDecompiler_Process(LatteDecompilerShaderContext* shaderContext, uint8 _LatteDecompiler_GenerateDataForFastAccess(shaderContext->shader); } -void LatteDecompiler_InitContext(LatteDecompilerShaderContext& dCtx, LatteDecompilerOutput_t* output, LatteConst::ShaderType shaderType, uint64 shaderBaseHash, uint32* contextRegisters) +void LatteDecompiler_InitContext(LatteDecompilerShaderContext& dCtx, const LatteDecompilerOptions& options, LatteDecompilerOutput_t* output, LatteConst::ShaderType shaderType, uint64 shaderBaseHash, uint32* contextRegisters) { dCtx.output = output; dCtx.shaderType = shaderType; + dCtx.options = &options; output->shaderType = shaderType; dCtx.shaderBaseHash = shaderBaseHash; dCtx.contextRegisters = contextRegisters; dCtx.contextRegistersNew = (LatteContextRegister*)contextRegisters; + + // set context parameters (redundant stuff since options can be accessed directly) + dCtx.usesGeometryShader = options.usesGeometryShader; + dCtx.useTFViaSSBO = options.useTFViaSSBO; } -void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, LatteFetchShader** fetchShaderList, sint32 fetchShaderCount, uint32* hleSpecialState, bool usesGeometryShader, LatteDecompilerOutput_t* output, bool useTFViaSSBO) +void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader** fetchShaderList, sint32 fetchShaderCount, uint32* hleSpecialState, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output) { cemu_assert_debug((programSize & 3) == 0); performanceMonitor.gpuTime_shaderCreate.beginMeasuring(); // prepare decompiler context LatteDecompilerShaderContext shaderContext = { 0 }; - LatteDecompiler_InitContext(shaderContext, output, LatteConst::ShaderType::Vertex, shaderBaseHash, contextRegisters); + LatteDecompiler_InitContext(shaderContext, options, output, LatteConst::ShaderType::Vertex, shaderBaseHash, contextRegisters); cemu_assert_debug(fetchShaderCount == 1); for (sint32 i = 0; i < fetchShaderCount; i++) { shaderContext.fetchShaderList[i] = fetchShaderList[i]; } shaderContext.fetchShaderCount = fetchShaderCount; - // ugly hack to get tf mode from Vulkan renderer - shaderContext.useTFViaSSBO = useTFViaSSBO; - if (g_renderer->GetType() == RendererAPI::Vulkan) - { - shaderContext.useTFViaSSBO = VulkanRenderer::GetInstance()->useTFViaSSBO(); - } // prepare shader (deprecated) LatteDecompilerShader* shader = new LatteDecompilerShader(); shader->shaderType = LatteConst::ShaderType::Vertex; @@ -1103,7 +1102,6 @@ void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contex output->shaderType = LatteConst::ShaderType::Vertex; shaderContext.shader = shader; output->shader = shader; - shaderContext.usesGeometryShader = usesGeometryShader; for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++) { shader->textureUnitSamplerAssignment[i] = LATTE_DECOMPILER_SAMPLER_NONE; @@ -1114,14 +1112,14 @@ void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contex performanceMonitor.gpuTime_shaderCreate.endMeasuring(); } -void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32* hleSpecialState, uint32 vsRingParameterCount, LatteDecompilerOutput_t* output, bool useTFViaSSBO) +void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32* hleSpecialState, uint32 vsRingParameterCount, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output) { cemu_assert_debug((programSize & 3) == 0); performanceMonitor.gpuTime_shaderCreate.beginMeasuring(); // prepare decompiler context LatteDecompilerShaderContext shaderContext = { 0 }; shaderContext.fetchShaderCount = 0; - LatteDecompiler_InitContext(shaderContext, output, LatteConst::ShaderType::Geometry, shaderBaseHash, contextRegisters); + LatteDecompiler_InitContext(shaderContext, options, output, LatteConst::ShaderType::Geometry, shaderBaseHash, contextRegisters); // prepare shader LatteDecompilerShader* shader = new LatteDecompilerShader(); shaderContext.output = output; @@ -1131,7 +1129,6 @@ void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* cont output->shaderType = LatteConst::ShaderType::Geometry; shaderContext.shader = shader; output->shader = shader; - shaderContext.usesGeometryShader = true; if (gsCopyProgramData == NULL) { shader->hasError = true; @@ -1145,24 +1142,18 @@ void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* cont shader->textureUnitSamplerAssignment[i] = LATTE_DECOMPILER_SAMPLER_NONE; shader->textureUsesDepthCompare[i] = false; } - // ugly hack to get tf mode from Vulkan renderer - shaderContext.useTFViaSSBO = useTFViaSSBO; - if (g_renderer->GetType() == RendererAPI::Vulkan) - { - shaderContext.useTFViaSSBO = VulkanRenderer::GetInstance()->useTFViaSSBO(); - } // parse & compile _LatteDecompiler_Process(&shaderContext, programData, programSize); performanceMonitor.gpuTime_shaderCreate.endMeasuring(); } -void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint32* hleSpecialState, bool usesGeometryShader, LatteDecompilerOutput_t* output) +void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint32* hleSpecialState, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output) { cemu_assert_debug((programSize & 3) == 0); performanceMonitor.gpuTime_shaderCreate.beginMeasuring(); // prepare decompiler context LatteDecompilerShaderContext shaderContext = { 0 }; - LatteDecompiler_InitContext(shaderContext, output, LatteConst::ShaderType::Pixel, shaderBaseHash, contextRegisters); + LatteDecompiler_InitContext(shaderContext, options, output, LatteConst::ShaderType::Pixel, shaderBaseHash, contextRegisters); shaderContext.contextRegisters = contextRegisters; // prepare shader LatteDecompilerShader* shader = new LatteDecompilerShader(); @@ -1172,7 +1163,6 @@ void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* context output->shaderType = LatteConst::ShaderType::Pixel; shaderContext.shader = shader; output->shader = shader; - shaderContext.usesGeometryShader = usesGeometryShader; for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++) { shader->textureUnitSamplerAssignment[i] = LATTE_DECOMPILER_SAMPLER_NONE; diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h index 4f0d3ff29..febec1a42 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h @@ -218,7 +218,7 @@ struct LatteDecompilerShader std::vector entries; }; std::vector list_remappedUniformEntries_register; - std::vector<_RemappedUniformBufferGroup> list_remappedUniformEntries_bufferGroups; + std::vector<_RemappedUniformBufferGroup> list_remappedUniformEntries_bufferGroups; }; struct LatteDecompilerOutputUniformOffsets @@ -250,6 +250,17 @@ struct LatteDecompilerOutputUniformOffsets } }; +struct LatteDecompilerOptions +{ + bool usesGeometryShader{ false }; + // Vulkan-specific + bool useTFViaSSBO{ false }; + struct + { + bool hasRoundingModeRTEFloat32{ false }; + }spirvInstrinsics; +}; + struct LatteDecompilerOutput_t { LatteDecompilerShader* shader; @@ -272,9 +283,9 @@ struct LatteDecompilerOutput_t struct LatteDecompilerSubroutineInfo; -void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader** fetchShaderList, sint32 fetchShaderCount, uint32* hleSpecialState, bool usesGeometryShader, LatteDecompilerOutput_t* output, bool useTFViaSSBO = false); -void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32* hleSpecialState, uint32 vsRingParameterCount, LatteDecompilerOutput_t* output, bool useTFViaSSBO = false); -void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint32* hleSpecialState, bool usesGeometryShader, LatteDecompilerOutput_t* output); +void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader** fetchShaderList, sint32 fetchShaderCount, uint32* hleSpecialState, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output); +void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32* hleSpecialState, uint32 vsRingParameterCount, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output); +void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint32* hleSpecialState, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output); // specialized shader parsers diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp index 677399c26..a2545d1d9 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp @@ -306,6 +306,11 @@ namespace LatteDecompiler { src->add("#define GET_FRAGCOORD() vec4(gl_FragCoord.xy*uf_fragCoordScale.xy,gl_FragCoord.z, 1.0/gl_FragCoord.w)" _CRLF); } + if (decompilerContext->options->spirvInstrinsics.hasRoundingModeRTEFloat32) + { + src->add("#extension GL_EXT_spirv_intrinsics: enable" _CRLF); + src->add("spirv_execution_mode(4462, 32);" _CRLF); // RoundingModeRTE 32 + } src->add("#else" _CRLF); // OpenGL defines src->add("#define ATTR_LAYOUT(__vkSet, __location) layout(location = __location)" _CRLF); diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h index 3dc3eca65..9e3120e65 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h @@ -70,8 +70,6 @@ struct LatteDecompilerTEXInstruction uint8 nfa{}; uint8 isSigned{}; }memRead; - // custom shadow function - sint32 shadowFunctionIndex{}; }; struct LatteDecompilerCFInstruction @@ -116,7 +114,7 @@ struct LatteDecompilerCFInstruction ~LatteDecompilerCFInstruction() { - cemu_assert_debug(!(instructionsALU.size() != 0 && instructionsTEX.size() != 0)); // make sure we dont accidentally added the wrong instruction type + cemu_assert_debug(!(instructionsALU.size() != 0 && instructionsTEX.size() != 0)); // make sure we haven't accidentally added the wrong instruction type } #if BOOST_OS_WINDOWS @@ -148,6 +146,7 @@ struct LatteDecompilerShaderContext LatteDecompilerOutput_t* output; LatteDecompilerShader* shader; LatteConst::ShaderType shaderType; + const class LatteDecompilerOptions* options; uint32* contextRegisters; // deprecated struct LatteContextRegister* contextRegistersNew; uint64 shaderBaseHash; @@ -217,10 +216,9 @@ struct LatteDecompilerShaderContext bool hasUniformVarBlock; sint32 currentBindingPointVK{}; - // unsorted + // misc bool usesGeometryShader; // for VS bool useTFViaSSBO; - sint32 currentShadowFunctionIndex; std::vector list_subroutines; }; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.cpp index 7c5779034..672b9c802 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.cpp @@ -1,17 +1,19 @@ #include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h" - -#include -#include - #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanAPI.h" #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h" - #include "config/ActiveSettings.h" #include "config/CemuConfig.h" #include "util/helpers/ConcurrentQueue.h" - #include "Cemu/FileCache/FileCache.h" +#include +#include + +// required for modifying SPIR-V +#include + + + bool s_isLoadingShadersVk{ false }; class FileCache* s_spirvCache{nullptr}; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp index 3c85baac0..df37ae7eb 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp @@ -37,13 +37,13 @@ extern std::atomic_int g_compiling_pipelines; const std::vector kOptionalDeviceExtensions = { - //VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, VK_NV_FILL_RECTANGLE_EXTENSION_NAME, VK_EXT_PIPELINE_CREATION_FEEDBACK_EXTENSION_NAME, VK_EXT_FILTER_CUBIC_EXTENSION_NAME, // not supported by any device yet VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, + VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME }; const std::vector kRequiredDeviceExtensions = @@ -236,25 +236,51 @@ void VulkanRenderer::DetermineVendor() void VulkanRenderer::GetDeviceFeatures() { + /* Get Vulkan features via GetPhysicalDeviceFeatures2 */ + void* prevStruct = nullptr; VkPhysicalDeviceCustomBorderColorFeaturesEXT bcf{}; bcf.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT; + bcf.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT; + prevStruct = &bcf; VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT pcc{}; pcc.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT; - pcc.pNext = &bcf; + pcc.pNext = prevStruct; + prevStruct = &pcc; VkPhysicalDeviceFeatures2 physicalDeviceFeatures2{}; physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - physicalDeviceFeatures2.pNext = &pcc; + physicalDeviceFeatures2.pNext = prevStruct; vkGetPhysicalDeviceFeatures2(m_physicalDevice, &physicalDeviceFeatures2); + /* Get Vulkan device properties and limits */ + VkPhysicalDeviceFloatControlsPropertiesKHR pfcp{}; + prevStruct = nullptr; + if (m_featureControl.deviceExtensions.shader_float_controls) + { + pfcp.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR; + pfcp.pNext = prevStruct; + prevStruct = &pfcp; + } + + VkPhysicalDeviceProperties2 prop2{}; + prop2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + prop2.pNext = prevStruct; + + vkGetPhysicalDeviceProperties2(m_physicalDevice, &prop2); + + /* Determine which subfeatures we can use */ + m_featureControl.deviceExtensions.pipeline_creation_cache_control = pcc.pipelineCreationCacheControl; m_featureControl.deviceExtensions.custom_border_color_without_format = m_featureControl.deviceExtensions.custom_border_color && bcf.customBorderColorWithoutFormat; + m_featureControl.shaderFloatControls.shaderRoundingModeRTEFloat32 = m_featureControl.deviceExtensions.shader_float_controls && pfcp.shaderRoundingModeRTEFloat32; + if(!m_featureControl.shaderFloatControls.shaderRoundingModeRTEFloat32) + cemuLog_log(LogType::Force, "Shader round mode control not available on this device or driver. Some rendering issues might occur."); if (!m_featureControl.deviceExtensions.pipeline_creation_cache_control) { - forceLogDebug_printf("VK_EXT_pipeline_creation_cache_control not supported. Cannot use asynchronous shader and pipeline compilation"); + cemuLog_log(LogType::Force, "VK_EXT_pipeline_creation_cache_control not supported. Cannot use asynchronous shader and pipeline compilation"); // if async shader compilation is enabled show warning message if (GetConfig().async_compile) wxMessageBox(_("The currently installed graphics driver does not support the Vulkan extension necessary for asynchronous shader compilation. Asynchronous compilation cannot be used.\n \nRequired extension: VK_EXT_pipeline_creation_cache_control\n\nInstalling the latest graphics driver may solve this error."), _("Information"), wxOK | wxCENTRE); @@ -270,13 +296,11 @@ void VulkanRenderer::GetDeviceFeatures() forceLog_printf("VK_EXT_custom_border_color not supported. Cannot emulate arbitrary border color"); } } - // retrieve limits - VkPhysicalDeviceProperties2 p2{}; - p2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - vkGetPhysicalDeviceProperties2(m_physicalDevice, &p2); - m_featureControl.limits.minUniformBufferOffsetAlignment = std::max(p2.properties.limits.minUniformBufferOffsetAlignment, (VkDeviceSize)4); - m_featureControl.limits.nonCoherentAtomSize = std::max(p2.properties.limits.nonCoherentAtomSize, (VkDeviceSize)4); - cemuLog_log(LogType::Force, fmt::format("VulkanLimits: UBAlignment {0} nonCoherentAtomSize {1}", p2.properties.limits.minUniformBufferOffsetAlignment, p2.properties.limits.nonCoherentAtomSize)); + + // get limits + m_featureControl.limits.minUniformBufferOffsetAlignment = std::max(prop2.properties.limits.minUniformBufferOffsetAlignment, (VkDeviceSize)4); + m_featureControl.limits.nonCoherentAtomSize = std::max(prop2.properties.limits.nonCoherentAtomSize, (VkDeviceSize)4); + cemuLog_log(LogType::Force, fmt::format("VulkanLimits: UBAlignment {0} nonCoherentAtomSize {1}", prop2.properties.limits.minUniformBufferOffsetAlignment, prop2.properties.limits.nonCoherentAtomSize)); } VulkanRenderer::VulkanRenderer() @@ -1025,6 +1049,8 @@ VkDeviceCreateInfo VulkanRenderer::CreateDeviceCreateInfo(const std::vector& data, cons { try { - // g_imgui_textures.emplace_back(texture); std::vector tmp(size.x * size.y * 4); for (size_t i = 0; i < data.size() / 3; ++i) { diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h index 81c3801d6..147b6c159 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h @@ -463,8 +463,14 @@ class VulkanRenderer : public Renderer bool external_memory_host = false; // VK_EXT_external_memory_host bool synchronization2 = false; // VK_KHR_synchronization2 bool dynamic_rendering = false; // VK_KHR_dynamic_rendering + bool shader_float_controls = false; // VK_KHR_shader_float_controls }deviceExtensions; + struct + { + bool shaderRoundingModeRTEFloat32{ false }; + }shaderFloatControls; // from VK_KHR_shader_float_controls + struct { bool debug_utils = false; // VK_EXT_DEBUG_UTILS @@ -482,8 +488,8 @@ class VulkanRenderer : public Renderer uint32 nonCoherentAtomSize = 256; }limits; - bool debugMarkersSupported = false; // frame debugger is attached - bool disableMultithreadedCompilation = false; // for old nvidia drivers + bool debugMarkersSupported{ false }; // frame debugger is attached + bool disableMultithreadedCompilation{ false }; // for old nvidia drivers }m_featureControl{}; static bool CheckDeviceExtensionSupport(const VkPhysicalDevice device, FeatureControl& info); @@ -936,12 +942,10 @@ class VulkanRenderer : public Renderer public: - bool GetDisableMultithreadedCompilation() { return m_featureControl.disableMultithreadedCompilation; } - bool useTFViaSSBO() { return m_featureControl.mode.useTFEmulationViaSSBO; } - bool IsDebugUtilsEnabled() const - { - return m_featureControl.debugMarkersSupported && m_featureControl.instanceExtensions.debug_utils; - } + bool GetDisableMultithreadedCompilation() const { return m_featureControl.disableMultithreadedCompilation; } + bool UseTFViaSSBO() const { return m_featureControl.mode.useTFEmulationViaSSBO; } + bool HasSPRIVRoundingModeRTE32() const { return m_featureControl.shaderFloatControls.shaderRoundingModeRTEFloat32; } + bool IsDebugUtilsEnabled() const { return m_featureControl.debugMarkersSupported && m_featureControl.instanceExtensions.debug_utils; } private: diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index f3104e772..5d826b618 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -1579,7 +1579,7 @@ void VulkanRenderer::draw_updateVertexBuffersDirectAccess() if (bufferAddress == MPTR_NULL) { - cemu_assert_unimplemented(); + bufferAddress = 0x10000000; } if (m_state.currentVertexBinding[bufferIndex].offset == bufferAddress) continue; From f8ea59444757e349247c4d1d58965c998dc27a3d Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Wed, 1 Mar 2023 22:16:57 +0100 Subject: [PATCH 2/4] Latte: Small refactor for shader decompiler Latte: Small refactor for shader decompiler --- src/Cafe/HW/Latte/Core/LatteShader.cpp | 14 +-- src/Cafe/HW/Latte/Core/LatteShaderCache.cpp | 10 +- src/Cafe/HW/Latte/Core/LatteStreamoutGPU.cpp | 6 +- .../LatteDecompiler.cpp | 37 ++----- .../LegacyShaderDecompiler/LatteDecompiler.h | 16 +-- .../LatteDecompilerAnalyzer.cpp | 15 ++- .../LatteDecompilerEmitGLSL.cpp | 97 ++++--------------- .../LatteDecompilerEmitGLSLHeader.hpp | 14 +-- .../LatteDecompilerInternal.h | 7 +- 9 files changed, 67 insertions(+), 149 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index ccb4c282c..fe8056daa 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -549,7 +549,7 @@ uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* cont // hash stride for streamout buffers for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++) { - if(!vertexShader->streamoutBufferWriteMask2[i]) + if(!vertexShader->streamoutBufferWriteMask[i]) continue; uint32 bufferStride = contextRegisters[mmVGT_STRMOUT_VTX_STRIDE_0 + i * 4]; auxHash = std::rotl(auxHash, 7); @@ -617,7 +617,7 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi // copy texture info shader->textureUnitMask2 = decompilerOutput.textureUnitMask; // copy streamout info - shader->streamoutBufferWriteMask2 = decompilerOutput.streamoutBufferWriteMask; + shader->streamoutBufferWriteMask = decompilerOutput.streamoutBufferWriteMask; shader->hasStreamoutBufferWrite = decompilerOutput.streamoutBufferWriteMask.any(); // copy uniform offsets // for OpenGL these are retrieved in _prepareSeparableUniforms() @@ -746,9 +746,7 @@ LatteDecompilerShader* LatteShader_CompileSeparableVertexShader(uint64 baseHash, LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Vertex, usesGeometryShader); LatteDecompilerOutput_t decompilerOutput{}; - LatteFetchShader* fetchShaderList[1]; - fetchShaderList[0] = fetchShader; - LatteDecompiler_DecompileVertexShader(_shaderBaseHash_vs, LatteGPUState.contextRegister, vertexShaderPtr, vertexShaderSize, fetchShaderList, 1, LatteGPUState.contextNew.GetSpecialStateValues(), options, &decompilerOutput); + LatteDecompiler_DecompileVertexShader(_shaderBaseHash_vs, LatteGPUState.contextRegister, vertexShaderPtr, vertexShaderSize, fetchShader, options, &decompilerOutput); LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister); vsAuxHash = vertexShader->auxHash; if (vertexShader->hasError == false) @@ -779,9 +777,7 @@ LatteDecompilerShader* LatteShader_CompileSeparableGeometryShader(uint64 baseHas LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Geometry, true); LatteDecompilerOutput_t decompilerOutput{}; - LatteFetchShader* fetchShaderList[1]; - fetchShaderList[0] = _activeFetchShader; - LatteDecompiler_DecompileGeometryShader(_shaderBaseHash_gs, LatteGPUState.contextRegister, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), _activeVertexShader->ringParameterCount, options, &decompilerOutput); + LatteDecompiler_DecompileGeometryShader(_shaderBaseHash_gs, LatteGPUState.contextRegister, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, _activeVertexShader->ringParameterCount, options, &decompilerOutput); LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister); if (geometryShader->hasError == false) { @@ -810,7 +806,7 @@ LatteDecompilerShader* LatteShader_CompileSeparablePixelShader(uint64 baseHash, LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Pixel, usesGeometryShader); LatteDecompilerOutput_t decompilerOutput{}; - LatteDecompiler_DecompilePixelShader(baseHash, LatteGPUState.contextRegister, pixelShaderPtr, pixelShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), options, &decompilerOutput); + LatteDecompiler_DecompilePixelShader(baseHash, LatteGPUState.contextRegister, pixelShaderPtr, pixelShaderSize, options, &decompilerOutput); LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister); psAuxHash = pixelShader->auxHash; LatteShader_DumpShader(_shaderBaseHash_ps, psAuxHash, pixelShader); diff --git a/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp b/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp index 9a6c3f84b..d33c3b506 100644 --- a/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp @@ -651,15 +651,12 @@ bool LatteShaderCache_readSeparableVertexShader(MemStreamReader& streamReader, u LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Vertex, usesGeometryShader); // decompile vertex shader LatteDecompilerOutput_t decompilerOutput{}; - LatteFetchShader* fetchShaderList[1]; - fetchShaderList[0] = fetchShader; - LatteDecompiler_DecompileVertexShader(shaderBaseHash, lcr->GetRawView(), vertexShaderData.data(), vertexShaderData.size(), fetchShaderList, 1, lcr->GetSpecialStateValues(), options, &decompilerOutput); + LatteDecompiler_DecompileVertexShader(shaderBaseHash, lcr->GetRawView(), vertexShaderData.data(), vertexShaderData.size(), fetchShader, options, &decompilerOutput); LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView()); // compile LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, vertexShader); LatteShader_DumpRawShader(shaderBaseHash, shaderAuxHash, SHADER_DUMP_TYPE_VERTEX, vertexShaderData.data(), vertexShaderData.size()); LatteShaderCache_loadOrCompileSeparableShader(vertexShader, shaderBaseHash, shaderAuxHash); - catchOpenGLError(); LatteSHRC_RegisterShader(vertexShader, shaderBaseHash, shaderAuxHash); return true; } @@ -696,7 +693,7 @@ bool LatteShaderCache_readSeparableGeometryShader(MemStreamReader& streamReader, LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Geometry, true); // decompile geometry shader LatteDecompilerOutput_t decompilerOutput{}; - LatteDecompiler_DecompileGeometryShader(shaderBaseHash, lcr->GetRawView(), geometryShaderData.data(), geometryShaderData.size(), geometryCopyShaderData.data(), geometryCopyShaderData.size(), lcr->GetSpecialStateValues(), vsRingParameterCount, options, &decompilerOutput); + LatteDecompiler_DecompileGeometryShader(shaderBaseHash, lcr->GetRawView(), geometryShaderData.data(), geometryShaderData.size(), geometryCopyShaderData.data(), geometryCopyShaderData.size(), vsRingParameterCount, options, &decompilerOutput); LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView()); // compile LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, geometryShader); @@ -734,13 +731,12 @@ bool LatteShaderCache_readSeparablePixelShader(MemStreamReader& streamReader, ui LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Pixel, usesGeometryShader); // decompile pixel shader LatteDecompilerOutput_t decompilerOutput{}; - LatteDecompiler_DecompilePixelShader(shaderBaseHash, lcr->GetRawView(), pixelShaderData.data(), pixelShaderData.size(), lcr->GetSpecialStateValues(), options, &decompilerOutput); + LatteDecompiler_DecompilePixelShader(shaderBaseHash, lcr->GetRawView(), pixelShaderData.data(), pixelShaderData.size(), options, &decompilerOutput); LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView()); // compile LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, pixelShader); LatteShader_DumpRawShader(shaderBaseHash, shaderAuxHash, SHADER_DUMP_TYPE_PIXEL, pixelShaderData.data(), pixelShaderData.size()); LatteShaderCache_loadOrCompileSeparableShader(pixelShader, shaderBaseHash, shaderAuxHash); - catchOpenGLError(); LatteSHRC_RegisterShader(pixelShader, shaderBaseHash, shaderAuxHash); return true; } diff --git a/src/Cafe/HW/Latte/Core/LatteStreamoutGPU.cpp b/src/Cafe/HW/Latte/Core/LatteStreamoutGPU.cpp index e212dd186..ccea31b81 100644 --- a/src/Cafe/HW/Latte/Core/LatteStreamoutGPU.cpp +++ b/src/Cafe/HW/Latte/Core/LatteStreamoutGPU.cpp @@ -101,16 +101,16 @@ void LatteStreamout_PrepareDrawcall(uint32 count, uint32 instanceCount) if (geometryShader) { #ifdef CEMU_DEBUG_ASSERT - cemu_assert_debug(vertexShader->streamoutBufferWriteMask2.any() == false); + cemu_assert_debug(vertexShader->streamoutBufferWriteMask.any() == false); #endif for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++) - if (geometryShader->streamoutBufferWriteMask2[i]) + if (geometryShader->streamoutBufferWriteMask[i]) streamoutWriteMask |= (1 << i); } else { for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++) - if (vertexShader->streamoutBufferWriteMask2[i]) + if (vertexShader->streamoutBufferWriteMask[i]) streamoutWriteMask |= (1 << i); } activeStreamoutOperation.streamoutWriteMask = streamoutWriteMask; diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp index ee7e14bd6..52254a2ac 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp @@ -1071,34 +1071,24 @@ void LatteDecompiler_InitContext(LatteDecompilerShaderContext& dCtx, const Latte dCtx.output = output; dCtx.shaderType = shaderType; dCtx.options = &options; - output->shaderType = shaderType; dCtx.shaderBaseHash = shaderBaseHash; dCtx.contextRegisters = contextRegisters; dCtx.contextRegistersNew = (LatteContextRegister*)contextRegisters; - - // set context parameters (redundant stuff since options can be accessed directly) - dCtx.usesGeometryShader = options.usesGeometryShader; - dCtx.useTFViaSSBO = options.useTFViaSSBO; + output->shaderType = shaderType; } -void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader** fetchShaderList, sint32 fetchShaderCount, uint32* hleSpecialState, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output) +void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader* fetchShader, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output) { + cemu_assert_debug(fetchShader); cemu_assert_debug((programSize & 3) == 0); performanceMonitor.gpuTime_shaderCreate.beginMeasuring(); // prepare decompiler context LatteDecompilerShaderContext shaderContext = { 0 }; LatteDecompiler_InitContext(shaderContext, options, output, LatteConst::ShaderType::Vertex, shaderBaseHash, contextRegisters); - cemu_assert_debug(fetchShaderCount == 1); - for (sint32 i = 0; i < fetchShaderCount; i++) - { - shaderContext.fetchShaderList[i] = fetchShaderList[i]; - } - shaderContext.fetchShaderCount = fetchShaderCount; + shaderContext.fetchShader = fetchShader; // prepare shader (deprecated) - LatteDecompilerShader* shader = new LatteDecompilerShader(); - shader->shaderType = LatteConst::ShaderType::Vertex; - shader->compatibleFetchShader = shaderContext.fetchShaderList[0]; - shaderContext.shaderType = LatteConst::ShaderType::Vertex; + LatteDecompilerShader* shader = new LatteDecompilerShader(LatteConst::ShaderType::Vertex); + shader->compatibleFetchShader = shaderContext.fetchShader; output->shaderType = LatteConst::ShaderType::Vertex; shaderContext.shader = shader; output->shader = shader; @@ -1112,20 +1102,16 @@ void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contex performanceMonitor.gpuTime_shaderCreate.endMeasuring(); } -void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32* hleSpecialState, uint32 vsRingParameterCount, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output) +void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32 vsRingParameterCount, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output) { cemu_assert_debug((programSize & 3) == 0); performanceMonitor.gpuTime_shaderCreate.beginMeasuring(); // prepare decompiler context LatteDecompilerShaderContext shaderContext = { 0 }; - shaderContext.fetchShaderCount = 0; LatteDecompiler_InitContext(shaderContext, options, output, LatteConst::ShaderType::Geometry, shaderBaseHash, contextRegisters); // prepare shader - LatteDecompilerShader* shader = new LatteDecompilerShader(); - shaderContext.output = output; - shader->shaderType = LatteConst::ShaderType::Geometry; + LatteDecompilerShader* shader = new LatteDecompilerShader(LatteConst::ShaderType::Geometry); shader->ringParameterCountFromPrevStage = vsRingParameterCount; - shaderContext.shaderType = LatteConst::ShaderType::Geometry; output->shaderType = LatteConst::ShaderType::Geometry; shaderContext.shader = shader; output->shader = shader; @@ -1147,7 +1133,7 @@ void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* cont performanceMonitor.gpuTime_shaderCreate.endMeasuring(); } -void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint32* hleSpecialState, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output) +void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output) { cemu_assert_debug((programSize & 3) == 0); performanceMonitor.gpuTime_shaderCreate.beginMeasuring(); @@ -1156,10 +1142,7 @@ void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* context LatteDecompiler_InitContext(shaderContext, options, output, LatteConst::ShaderType::Pixel, shaderBaseHash, contextRegisters); shaderContext.contextRegisters = contextRegisters; // prepare shader - LatteDecompilerShader* shader = new LatteDecompilerShader(); - shaderContext.output = output; - shader->shaderType = LatteConst::ShaderType::Pixel; - shaderContext.shaderType = LatteConst::ShaderType::Pixel; + LatteDecompilerShader* shader = new LatteDecompilerShader(LatteConst::ShaderType::Pixel); output->shaderType = LatteConst::ShaderType::Pixel; shaderContext.shader = shader; output->shader = shader; diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h index febec1a42..90c92eac8 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h @@ -148,6 +148,8 @@ struct LatteDecompilerShaderResourceMapping struct LatteDecompilerShader { + LatteDecompilerShader(LatteConst::ShaderType shaderType) : shaderType(shaderType) {} + LatteDecompilerShader* next; LatteConst::ShaderType shaderType; uint64 baseHash; @@ -167,21 +169,21 @@ struct LatteDecompilerShader Latte::E_DIM textureUnitDim[LATTE_NUM_MAX_TEX_UNITS]; // dimension of texture unit, from the currently set texture bool textureIsIntegerFormat[LATTE_NUM_MAX_TEX_UNITS]{}; // analyzer stage (uniforms) - uint8 uniformMode; // determines how uniforms are managed within the shader (see GPU7_DECOMPILER_UNIFORM_MODE_* constants) + uint8 uniformMode; // determines how uniforms are managed within the shader (see LATTE_DECOMPILER_UNIFORM_MODE_* constants) uint64 uniformDataHash64[2]; // used to avoid redundant calls to glUniform* std::vector list_remappedUniformEntries; // analyzer stage (textures) std::bitset textureUnitMask2; - uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]; // GPU7_SAMPLER_NONE means undefined + uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]; // LATTE_DECOMPILER_SAMPLER_NONE means undefined bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]; // analyzer stage (pixel outputs) - uint32 pixelColorOutputMask; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments, may differ from export index inside the pixel shader) + uint32 pixelColorOutputMask; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments) // analyzer stage (geometry shader parameters/inputs) uint32 ringParameterCount; uint32 ringParameterCountFromPrevStage; // used in geometry shader to hold VS ringParameterCount // analyzer stage (misc) - std::bitset streamoutBufferWriteMask2; + std::bitset streamoutBufferWriteMask; bool hasStreamoutBufferWrite; // output code class StringBuf* strBuf_shaderSource{nullptr}; @@ -283,9 +285,9 @@ struct LatteDecompilerOutput_t struct LatteDecompilerSubroutineInfo; -void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader** fetchShaderList, sint32 fetchShaderCount, uint32* hleSpecialState, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output); -void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32* hleSpecialState, uint32 vsRingParameterCount, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output); -void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint32* hleSpecialState, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output); +void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader* fetchShader, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output); +void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32 vsRingParameterCount, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output); +void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output); // specialized shader parsers diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp index 665628963..dd179ac00 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp @@ -630,7 +630,7 @@ namespace LatteDecompiler if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry && decompilerContext->analyzer.outputPointSize && decompilerContext->analyzer.writesPointSize == false) decompilerContext->hasUniformVarBlock = true; // uf_pointSize if (decompilerContext->analyzer.useSSBOForStreamout && - (decompilerContext->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->usesGeometryShader == false) || + (decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) || (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)) { decompilerContext->hasUniformVarBlock = true; // uf_verticesPerInstance and uf_streamoutBufferBase* @@ -735,7 +735,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD // analyze render state shaderContext->analyzer.isPointsPrimitive = shaderContext->contextRegistersNew->VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE() == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS; shaderContext->analyzer.hasStreamoutEnable = shaderContext->contextRegisters[mmVGT_STRMOUT_EN] != 0; // set if the shader is used for transform feedback operations - if (shaderContext->shaderType == LatteConst::ShaderType::Vertex && shaderContext->usesGeometryShader == false) + if (shaderContext->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader) shaderContext->analyzer.outputPointSize = shaderContext->analyzer.isPointsPrimitive; else if (shaderContext->shaderType == LatteConst::ShaderType::Geometry) { @@ -746,10 +746,9 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD // analyze input attributes for vertex/geometry shader if (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry) { - for (sint32 f = 0; f < shaderContext->fetchShaderCount; f++) + if(shaderContext->fetchShader) { - LatteFetchShader* parsedFetchShader = (LatteFetchShader*)shaderContext->fetchShaderList[f]; - + LatteFetchShader* parsedFetchShader = shaderContext->fetchShader; for(auto& bufferGroup : parsedFetchShader->bufferGroups) { for (sint32 i = 0; i < bufferGroup.attribCount; i++) @@ -938,9 +937,9 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD // analyze input attributes again (if shader has relative GPR read) if(shaderContext->analyzer.usesRelativeGPRRead && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry) ) { - for (sint32 f = 0; f < shaderContext->fetchShaderCount; f++) + if(shaderContext->fetchShader) { - LatteFetchShader* parsedFetchShader = (LatteFetchShader*)shaderContext->fetchShaderList[f]; + LatteFetchShader* parsedFetchShader = shaderContext->fetchShader; for(auto& bufferGroup : parsedFetchShader->bufferGroups) { for (sint32 i = 0; i < bufferGroup.attribCount; i++) @@ -1077,7 +1076,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD if(list_subroutineAddrs.empty() == false) forceLogDebug_printf("Todo - analyze shader subroutine CF stack"); // TF mode - if (shaderContext->useTFViaSSBO && shaderContext->output->streamoutBufferWriteMask.any()) + if (shaderContext->options->useTFViaSSBO && shaderContext->output->streamoutBufferWriteMask.any()) { shaderContext->analyzer.useSSBOForStreamout = true; } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSL.cpp index 2fb269fb8..1e1d853fe 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSL.cpp @@ -3363,7 +3363,7 @@ void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, LatteDeco if ((cfInstruction->memWriteCompMask&(1 << i)) == 0) continue; - if (shaderContext->useTFViaSSBO) + if (shaderContext->options->useTFViaSSBO) { uint32 u32Offset = streamWrite->exportArrayBase + i; src->addFmt("sb_buffer[sbBase{} + {}]", streamWrite->bufferIndex, u32Offset); @@ -3483,7 +3483,7 @@ void _emitStreamWriteCode(LatteDecompilerShaderContext* shaderContext, LatteDeco if ((cfInstruction->memWriteCompMask&(1 << i)) == 0) continue; - if (shaderContext->useTFViaSSBO) + if (shaderContext->options->useTFViaSSBO) { uint32 u32Offset = cfInstruction->exportArrayBase + i; src->addFmt("sb_buffer[sbBase{} + {}]", streamoutBufferIndex, u32Offset); @@ -3831,7 +3831,7 @@ void LatteDecompiler_emitGLSLHelperFunctions(LatteDecompilerShaderContext* shade //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){float r = a*b;r = intBitsToFloat(floatBitsToInt(r)&(((floatBitsToInt(a) != 0) && (floatBitsToInt(b) != 0))?0xFFFFFFFF:0));return r;}" STR_LINEBREAK); works if( LatteGPUState.glVendor == GLVENDOR_NVIDIA && !ActiveSettings::DumpShadersEnabled()) - fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){return mix(0.0, a*b, (a != 0.0) && (b != 0.0));}" _CRLF); // compiles faster on Nvidia and also results in lower RAM usage + fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){return mix(0.0, a*b, (a != 0.0) && (b != 0.0));}" _CRLF); // compiles faster on Nvidia and also results in lower RAM usage (OpenGL) else fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; }" _CRLF); } @@ -3841,51 +3841,6 @@ void LatteDecompiler_emitGLSLHelperFunctions(LatteDecompilerShaderContext* shade } } -void _addPixelShaderExtraDebugInfo(LatteDecompilerShaderContext* shaderContext, StringBuf* fCStr_shaderSource) -{ -#ifdef CEMU_DEBUG_ASSERT - fCStr_shaderSource->add("// Color buffers:" _CRLF); - for(uint32 i=0; i<8; i++) - { - uint32 regColorBuffer = shaderContext->contextRegisters[mmCB_COLOR0_BASE+i]; - uint32 regColorSize = shaderContext->contextRegisters[mmCB_COLOR0_SIZE+i]; - uint32 regColorInfo = shaderContext->contextRegisters[mmCB_COLOR0_INFO+i]; - uint32 regColorView = shaderContext->contextRegisters[mmCB_COLOR0_VIEW+i]; - MPTR colorBufferPhysMem = regColorBuffer; - if( regColorBuffer == MPTR_NULL ) - continue; - - uint32 colorBufferFormat = (regColorInfo>>2)&0x3F; // format - uint32 colorBufferTileMode = 0; - colorBufferTileMode = (regColorInfo >> 8) & 0xF; - switch ( (regColorInfo >> 12) & 7 ) - { - case 4: - colorBufferFormat |= 0x100; - break; - case 1: - colorBufferFormat |= 0x200; - break; - case 5: - colorBufferFormat |= 0x300; - break; - case 6: - colorBufferFormat |= 0x400; - break; - case 7: - colorBufferFormat |= 0x800; - break; - default: - break; - } - - uint32 colorBufferWidth = (regColorSize>>0)&0xFFFF; - uint32 colorBufferHeight = (regColorSize>>16)&0xFFFF; - fCStr_shaderSource->addFmt("// Color{}: {}x{} at 0x{:08x} fmt {:04x} tm {}" _CRLF, i, colorBufferWidth, colorBufferHeight, colorBufferPhysMem, colorBufferFormat, colorBufferTileMode); - } -#endif -} - #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp" void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* shaderContext, LatteParsedFetchShaderAttribute_t& attrib) @@ -3954,18 +3909,14 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext, src->add("#extension GL_ARB_texture_gather : enable" _CRLF); src->add("#extension GL_ARB_separate_shader_objects : enable" _CRLF); - if (shaderContext->analyzer.hasStreamoutWrite || shaderContext->usesGeometryShader ) + if (shaderContext->analyzer.hasStreamoutWrite || shaderContext->options->usesGeometryShader ) src->add("#extension GL_ARB_enhanced_layouts : enable" _CRLF); // debug info - src->addFmt("// shader %08x%08x" _CRLF, (uint32)(shaderContext->shaderBaseHash >> 32), (uint32)(shaderContext->shaderBaseHash & 0xFFFFFFFF)); + src->addFmt("// shader {:016x}" _CRLF, shaderContext->shaderBaseHash); #ifdef CEMU_DEBUG_ASSERT src->addFmt("// usesIntegerValues: {}" _CRLF, shaderContext->analyzer.usesIntegerValues?"true":"false"); src->addFmt(_CRLF); - - if( shader->shaderType == LatteConst::ShaderType::Pixel ) - _addPixelShaderExtraDebugInfo(shaderContext, src); - #endif // header part (definitions for inputs and outputs) LatteDecompiler::emitHeader(shaderContext); @@ -3982,7 +3933,6 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext, { if (shaderContext->analyzer.usesRelativeGPRRead || (shaderContext->analyzer.gprUseMask[i / 8] & (1 << (i & 7))) != 0) { - //fCStr_appendFormatted(fCStr_shaderSource, "ivec4 R{}i, R{}i, R{}i, R{}i;" STR_LINEBREAK, i*4+0, i*4+1, i*4+2, i*4+3); if (shaderContext->typeTracker.genIntReg) src->addFmt("ivec4 R{}i = ivec4(0);" _CRLF, i); else if (shaderContext->typeTracker.genFloatReg) @@ -4035,7 +3985,6 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext, src->add("bool predResult = true;" _CRLF); if(shaderContext->analyzer.modifiesPixelActiveState ) { - // cemu_assert_debug(shaderContext->analyzer.activeStackMaxDepth == 0); src->addFmt("bool activeMaskStack[{}];" _CRLF, shaderContext->analyzer.activeStackMaxDepth+1); src->addFmt("bool activeMaskStackC[{}];" _CRLF, shaderContext->analyzer.activeStackMaxDepth+2); for (sint32 i = 0; i < shaderContext->analyzer.activeStackMaxDepth; i++) @@ -4058,8 +4007,11 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext, } } // helper variables for cube maps (todo: Only emit when used) - src->addFmt("vec3 cubeMapSTM;" _CRLF); - src->addFmt("int cubeMapFaceId;" _CRLF); + if (shaderContext->analyzer.hasRedcCUBE) + { + src->add("vec3 cubeMapSTM;" _CRLF); + src->add("int cubeMapFaceId;" _CRLF); + } for(sint32 i=0; ioutput->textureUnitMask[i]) @@ -4106,25 +4058,18 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext, cemu_assert_unimplemented(); } - if (shaderContext->fetchShaderCount == 1) + LatteFetchShader* parsedFetchShader = shaderContext->fetchShader; + for(auto& bufferGroup : parsedFetchShader->bufferGroups) { - LatteFetchShader* parsedFetchShader = shaderContext->fetchShaderList[0]; - for(auto& bufferGroup : parsedFetchShader->bufferGroups) - { - for(sint32 i=0; ibufferGroupsInvalid) - { - // these attributes point to non-existent buffers - // todo - figure out how the hardware actually handles this, currently we assume the input values are zero - for (sint32 i = 0; i < bufferGroup.attribCount; i++) - LatteDecompiler_emitAttributeImport(shaderContext, bufferGroup.attrib[i]); - } + for(sint32 i=0; ibufferGroupsInvalid) { - cemu_assert_unimplemented(); + // these attributes point to non-existent buffers + // todo - figure out how the hardware actually handles this, currently we assume the input values are zero + for (sint32 i = 0; i < bufferGroup.attribCount; i++) + LatteDecompiler_emitAttributeImport(shaderContext, bufferGroup.attrib[i]); } } else if (shader->shaderType == LatteConst::ShaderType::Pixel) @@ -4172,7 +4117,7 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext, continue; } - if (shaderContext->usesGeometryShader) + if (shaderContext->options->usesGeometryShader) { // import from geometry shader if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) @@ -4216,7 +4161,7 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext, // vertex shader should write renderstate point size at the end if required but not modified by shader if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false) { - if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->usesGeometryShader == false) + if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->options->usesGeometryShader == false) src->add("gl_PointSize = uf_pointSize;" _CRLF); } // end of shader main diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp index a2545d1d9..581bfee50 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp @@ -95,7 +95,7 @@ namespace LatteDecompiler } if (decompilerContext->analyzer.outputPointSize && decompilerContext->analyzer.writesPointSize == false) { - if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->usesGeometryShader) || + if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) || decompilerContext->shaderType == LatteConst::ShaderType::Geometry) { uniformCurrentOffset = (uniformCurrentOffset + 3)&~3; @@ -135,7 +135,7 @@ namespace LatteDecompiler } // define uf_verticesPerInstance + uf_streamoutBufferBaseX if (decompilerContext->analyzer.useSSBOForStreamout && - (shader->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->usesGeometryShader == false) || + (shader->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->options->usesGeometryShader == false) || (shader->shaderType == LatteConst::ShaderType::Geometry) ) { shaderSrc->add("uniform int uf_verticesPerInstance;" _CRLF); @@ -298,7 +298,7 @@ namespace LatteDecompiler if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry) { - if (decompilerContext->usesGeometryShader) + if (decompilerContext->options->usesGeometryShader) src->add("#define V2G_LAYOUT layout(location = 0)" _CRLF); } } @@ -322,7 +322,7 @@ namespace LatteDecompiler src->add("#define XFB_BLOCK_LAYOUT(__bufferIndex, __stride, __location) layout(xfb_buffer = __bufferIndex, xfb_stride = __stride)" _CRLF); src->add("#define SET_POSITION(_v) gl_Position = _v\r\n"); - if (decompilerContext->usesGeometryShader) + if (decompilerContext->options->usesGeometryShader) src->add("#define V2G_LAYOUT" _CRLF); } else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel) @@ -430,7 +430,7 @@ namespace LatteDecompiler { auto src = decompilerContext->shaderSource; // per-vertex output (VS or GS) - if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->usesGeometryShader == false) || + if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) || (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)) { src->add("out gl_PerVertex" _CRLF); @@ -441,7 +441,7 @@ namespace LatteDecompiler src->add("};" _CRLF); } // varyings (variables passed from vertex to pixel shader, only if geometry stage is disabled - if (decompilerContext->usesGeometryShader == false) + if (decompilerContext->options->usesGeometryShader == false) { if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) { @@ -537,7 +537,7 @@ namespace LatteDecompiler // streamout buffer (transform feedback) if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry) && decompilerContext->analyzer.hasStreamoutEnable) { - if (decompilerContext->useTFViaSSBO) + if (decompilerContext->options->useTFViaSSBO) { if (decompilerContext->analyzer.useSSBOForStreamout && decompilerContext->analyzer.hasStreamoutWrite) { diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h index 9e3120e65..49bd3a644 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h @@ -150,11 +150,10 @@ struct LatteDecompilerShaderContext uint32* contextRegisters; // deprecated struct LatteContextRegister* contextRegistersNew; uint64 shaderBaseHash; - StringBuf* shaderSource; // move to output struct + StringBuf* shaderSource; std::vector cfInstructions; // fetch shader (required for vertex shader) - LatteFetchShader* fetchShaderList[32]; - sint32 fetchShaderCount; + LatteFetchShader* fetchShader{}; // geometry copy shader (only present when geometry shader is active) LatteParsedGSCopyShader* parsedGSCopyShader; // state @@ -217,8 +216,6 @@ struct LatteDecompilerShaderContext sint32 currentBindingPointVK{}; // misc - bool usesGeometryShader; // for VS - bool useTFViaSSBO; std::vector list_subroutines; }; From 466ad8d35e99315fabb8a8491cb71879794e0e84 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Thu, 2 Mar 2023 16:18:50 +0100 Subject: [PATCH 3/4] Latte/Vulkan: Set RoundingModeRTE for all float types Seems like GLSL's unpackHalf2x16() is affected by the 16bit rounding setting --- .../LegacyShaderDecompiler/LatteDecompiler.h | 42 +++++++++---------- .../LatteDecompilerEmitGLSLHeader.hpp | 5 ++- src/Cafe/HW/Latte/Renderer/RendererShader.cpp | 2 +- .../TextureRelationWindow.cpp | 2 +- 4 files changed, 27 insertions(+), 24 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h index 90c92eac8..3264f5d3a 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h @@ -150,50 +150,50 @@ struct LatteDecompilerShader { LatteDecompilerShader(LatteConst::ShaderType shaderType) : shaderType(shaderType) {} - LatteDecompilerShader* next; + LatteDecompilerShader* next{nullptr}; LatteConst::ShaderType shaderType; - uint64 baseHash; - uint64 auxHash; + uint64 baseHash{0}; + uint64 auxHash{0}; // vertex shader struct LatteFetchShader* compatibleFetchShader{}; // error tracking - bool hasError; // if set, the shader cannot be used + bool hasError{false}; // if set, the shader cannot be used // optimized access / iteration // list of uniform buffers used uint8 uniformBufferList[LATTE_NUM_MAX_UNIFORM_BUFFERS]; - uint8 uniformBufferListCount; + uint8 uniformBufferListCount{ 0 }; // list of used texture units (faster access than iterating textureUnitMask) uint8 textureUnitList[LATTE_NUM_MAX_TEX_UNITS]; - uint8 textureUnitListCount; + uint8 textureUnitListCount{ 0 }; // input - Latte::E_DIM textureUnitDim[LATTE_NUM_MAX_TEX_UNITS]; // dimension of texture unit, from the currently set texture + Latte::E_DIM textureUnitDim[LATTE_NUM_MAX_TEX_UNITS]{}; // dimension of texture unit, from the currently set texture bool textureIsIntegerFormat[LATTE_NUM_MAX_TEX_UNITS]{}; // analyzer stage (uniforms) - uint8 uniformMode; // determines how uniforms are managed within the shader (see LATTE_DECOMPILER_UNIFORM_MODE_* constants) - uint64 uniformDataHash64[2]; // used to avoid redundant calls to glUniform* + uint8 uniformMode{0}; // determines how uniforms are managed within the shader (see LATTE_DECOMPILER_UNIFORM_MODE_* constants) + uint64 uniformDataHash64[2]{0}; // used to avoid redundant calls to glUniform* std::vector list_remappedUniformEntries; // analyzer stage (textures) std::bitset textureUnitMask2; - uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]; // LATTE_DECOMPILER_SAMPLER_NONE means undefined - bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]; + uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]{ 0 }; // LATTE_DECOMPILER_SAMPLER_NONE means undefined + bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]{}; // analyzer stage (pixel outputs) - uint32 pixelColorOutputMask; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments) + uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments) // analyzer stage (geometry shader parameters/inputs) - uint32 ringParameterCount; - uint32 ringParameterCountFromPrevStage; // used in geometry shader to hold VS ringParameterCount + uint32 ringParameterCount{ 0 }; + uint32 ringParameterCountFromPrevStage{ 0 }; // used in geometry shader to hold VS ringParameterCount // analyzer stage (misc) std::bitset streamoutBufferWriteMask; - bool hasStreamoutBufferWrite; + bool hasStreamoutBufferWrite{ false }; // output code - class StringBuf* strBuf_shaderSource{nullptr}; + class StringBuf* strBuf_shaderSource{ nullptr }; // separable shaders - RendererShader* shader; - bool isCustomShader; + RendererShader* shader{ nullptr }; + bool isCustomShader{ false }; - uint32 outputParameterMask; + uint32 outputParameterMask{ 0 }; // resource mapping (binding points) - LatteDecompilerShaderResourceMapping resourceMapping; + LatteDecompilerShaderResourceMapping resourceMapping{}; // uniforms struct { @@ -210,7 +210,7 @@ struct LatteDecompilerShader sint32 loc_verticesPerInstance; sint32 loc_streamoutBufferBase[LATTE_NUM_STREAMOUT_BUFFER]; sint32 uniformRangeSize; // entire size of uniform variable block - }uniform; + }uniform{ 0 }; // fast access struct _RemappedUniformBufferGroup { diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp index 581bfee50..0bd4eb6f7 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp @@ -309,7 +309,10 @@ namespace LatteDecompiler if (decompilerContext->options->spirvInstrinsics.hasRoundingModeRTEFloat32) { src->add("#extension GL_EXT_spirv_intrinsics: enable" _CRLF); - src->add("spirv_execution_mode(4462, 32);" _CRLF); // RoundingModeRTE 32 + // set RoundingModeRTE + src->add("spirv_execution_mode(4462, 16);" _CRLF); + src->add("spirv_execution_mode(4462, 32);" _CRLF); + src->add("spirv_execution_mode(4462, 64);" _CRLF); } src->add("#else" _CRLF); // OpenGL defines diff --git a/src/Cafe/HW/Latte/Renderer/RendererShader.cpp b/src/Cafe/HW/Latte/Renderer/RendererShader.cpp index 67b76b4e0..f66dc9f4b 100644 --- a/src/Cafe/HW/Latte/Renderer/RendererShader.cpp +++ b/src/Cafe/HW/Latte/Renderer/RendererShader.cpp @@ -17,7 +17,7 @@ uint32 RendererShader::GeneratePrecompiledCacheId() v += (EMULATOR_VERSION_MINOR * 100u); // settings that can influence shaders - v += (uint32)g_current_game_profile->GetAccurateShaderMul() * 133; // this option modifies shaders + v += (uint32)g_current_game_profile->GetAccurateShaderMul() * 133; return v; } diff --git a/src/gui/windows/TextureRelationViewer/TextureRelationWindow.cpp b/src/gui/windows/TextureRelationViewer/TextureRelationWindow.cpp index 718bca3a4..5b79ffd86 100644 --- a/src/gui/windows/TextureRelationViewer/TextureRelationWindow.cpp +++ b/src/gui/windows/TextureRelationViewer/TextureRelationWindow.cpp @@ -228,7 +228,7 @@ void TextureRelationViewerWindow::_setTextureRelationListItemTexture(wxListCtrl* uiList->SetItem(rowIndex, columnIndex, tempStr); columnIndex++; // tilemode - sprintf(tempStr, "%d", texInfo->tileMode); + sprintf(tempStr, "%d", (int)texInfo->tileMode); uiList->SetItem(rowIndex, columnIndex, tempStr); columnIndex++; // sliceRange From a2c518354fc764d4eac65d33950568e0b5253380 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Thu, 2 Mar 2023 16:44:22 +0100 Subject: [PATCH 4/4] Remove unnecessary include --- src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.cpp index 672b9c802..352ca4e4c 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.cpp @@ -9,11 +9,6 @@ #include #include -// required for modifying SPIR-V -#include - - - bool s_isLoadingShadersVk{ false }; class FileCache* s_spirvCache{nullptr};