Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better shaders #3

Merged
merged 8 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions src/Cafe/HW/Latte/Core/FetchShader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,23 @@ void LatteFetchShader::CalculateFetchShaderVkHash()
this->vkPipelineHashFragment = h;
}

void LatteFetchShader::CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister)
{uint64 key = 0;
for (sint32 g = 0; g < bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = bufferGroups[g];
uint32 bufferIndex = group.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;

key += (uint64)bufferIndex;
key = std::rotl<uint64>(key, 5);
key += (uint64)bufferStride;
key = std::rotl<uint64>(key, 5);
}
mtlShaderHashObject = key;
}

void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* parsedFetchShader, uint32* contextRegister, const LatteClauseInstruction_VTX* instr)
{
uint32 semanticId = instr->getFieldSEM_SEMANTIC_ID(); // location (attribute index inside shader)
Expand All @@ -161,7 +178,7 @@ void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* pars
auto nfa = instr->getField_NUM_FORMAT_ALL();
bool isSigned = instr->getField_FORMAT_COMP_ALL() == LatteClauseInstruction_VTX::FORMAT_COMP::COMP_SIGNED;
auto endianSwap = instr->getField_ENDIAN_SWAP();

// get buffer
cemu_assert_debug(bufferId >= 0xA0 && bufferId < 0xB0);
uint32 bufferIndex = (bufferId - 0xA0);
Expand Down Expand Up @@ -316,7 +333,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
// {0x00000002, 0x01800c00, 0x00000000, 0x8a000000, 0x2c00a001, 0x2c151000, 0x000a0000, ...} // size 0x50
// {0x00000002, 0x01801000, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x60
// {0x00000002, 0x01801c00, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x90

// our new implementation:
// {0x00000002, 0x01800400, 0x00000000, 0x8a000000, 0x0000a001, 0x2c151000, 0x00020000, ...}

Expand All @@ -328,6 +345,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
// these only make sense when vertex shader does not call FS?
LatteShader_calculateFSKey(newFetchShader);
newFetchShader->CalculateFetchShaderVkHash();
newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister);
return newFetchShader;
}

Expand Down Expand Up @@ -387,6 +405,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
}
LatteShader_calculateFSKey(newFetchShader);
newFetchShader->CalculateFetchShaderVkHash();
newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister);

// register in cache
// its possible that during multi-threaded shader cache loading, two identical (same hash) fetch shaders get created simultaneously
Expand All @@ -411,7 +430,7 @@ LatteFetchShader::~LatteFetchShader()
UnregisterInCache();
}

struct FetchShaderLookupInfo
struct FetchShaderLookupInfo
{
LatteFetchShader* fetchShader;
uint32 programSize;
Expand Down
7 changes: 6 additions & 1 deletion src/Cafe/HW/Latte/Core/FetchShader.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,18 @@ struct LatteFetchShader
// Vulkan
uint64 vkPipelineHashFragment{}; // hash of all fetch shader state that influences the Vulkan graphics pipeline

// Metal
uint64 mtlShaderHashObject{};

// cache info
CacheHash m_cacheHash{};
bool m_isRegistered{}; // if true, fetch shader is referenced by cache (RegisterInCache() succeeded)


void CalculateFetchShaderVkHash();

void CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister);

uint64 getVkPipelineHashFragment() const { return vkPipelineHashFragment; };

static bool isValidBufferIndex(const uint32 index) { return index < 0x10; };
Expand All @@ -69,4 +74,4 @@ struct LatteFetchShader
static std::unordered_map<CacheHash, LatteFetchShader*> s_fetchShaderByHash;
};

LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::CacheHash fsHash, uint32* contextRegister, uint32* fsProgramCode, uint32 fsProgramSize);
LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::CacheHash fsHash, uint32* contextRegister, uint32* fsProgramCode, uint32 fsProgramSize);
19 changes: 19 additions & 0 deletions src/Cafe/HW/Latte/Core/LatteShader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
#include "config/ActiveSettings.h"
#include "Cafe/GameProfile/GameProfile.h"
#include "util/containers/flat_hash_map.hpp"
#if BOOST_OS_MACOS
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#endif
#include <cinttypes>

// experimental new decompiler (WIP)
Expand Down Expand Up @@ -498,6 +501,8 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
uint64 vsHash2 = 0;
_calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2);
uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL);
if (g_renderer->GetType() == RendererAPI::Metal && usesGeometryShader)
vsHash += _activeFetchShader->mtlShaderHashObject;

uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F;
vsHash += tmp;
Expand Down Expand Up @@ -542,6 +547,20 @@ void LatteSHRC_UpdatePSBaseHash(uint8* pixelShaderPtr, uint32 pixelShaderSize, b
_calculateShaderProgramHash(psProgramCode, pixelShaderSize, &hashCachePS, &psHash1, &psHash2);
// get vertex shader
uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL);

#if BOOST_OS_MACOS
if (g_renderer->GetType() == RendererAPI::Metal)
{
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew);
uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType;
psHash += (uint64)dataType;
psHash = std::rotl<uint64>(psHash, 7);
}
}
#endif

_shaderBaseHash_ps = psHash;
}

Expand Down
1 change: 1 addition & 0 deletions src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ struct LatteDecompilerShaderResourceMapping
sint8 attributeMapping[LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS];
// Metal exclusive
sint8 indexBufferBinding{-1};
sint8 indexTypeBinding{-1};

sint32 getTextureCount()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1020,4 +1020,5 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
LatteDecompiler::_initUniformBindingPoints(shaderContext);
LatteDecompiler::_initAttributeBindingPoints(shaderContext);
shaderContext->output->resourceMappingMTL.indexBufferBinding = shaderContext->currentBufferBindingPointMTL++;
shaderContext->output->resourceMappingMTL.indexTypeBinding = shaderContext->currentBufferBindingPointMTL++;
}
137 changes: 128 additions & 9 deletions src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "config/ActiveSettings.h"
#include "util/helpers/StringBuf.h"

Expand Down Expand Up @@ -3214,11 +3215,13 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe
src->add(") == false) discard_fragment();" _CRLF);
}
// pixel color output
src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(pixelColorOutputIndex));
src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetColorAttachmentTypeStr(pixelColorOutputIndex));
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i);
src->add(");" _CRLF);
src->add("#endif" _CRLF);
auto dataType = GetColorBufferDataType(pixelColorOutputIndex, *shaderContext->contextRegistersNew);
if (dataType != MetalDataType::NONE)
{
src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetDataTypeStr(dataType));
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i);
src->add(");" _CRLF);
}

if( cfInstruction->exportArrayBase+i >= 8 )
cemu_assert_unimplemented();
Expand Down Expand Up @@ -3881,9 +3884,125 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
case LatteConst::ShaderType::Vertex:
if (shaderContext->options->usesGeometryShader || isRectVertexShader)
{
// Defined just-in-time
// Will also modify vid in case of an indexed draw
src->add("VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS);" _CRLF);
// TODO: clean this up
// fetchVertex will modify vid in case of an indexed draw

// Vertex buffers
std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS ";
std::string vertexBuffers = "#define VERTEX_BUFFERS ";
std::string inputFetchDefinition = "VertexIn fetchVertex(thread uint& vid, device uint* indexBuffer, uint indexType VERTEX_BUFFER_DEFINITIONS) {\n";

// Index buffer
inputFetchDefinition += "if (indexType == 1) // UShort\n";
inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n";
inputFetchDefinition += "else if (indexType == 2)\n";
inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid]; // UInt\n";

inputFetchDefinition += "VertexIn in;\n";
for (auto& bufferGroup : shaderContext->fetchShader->bufferGroups)
{
std::optional<LatteConst::VertexFetchType2> fetchType;

uint32 bufferIndex = bufferGroup.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (shaderContext->contextRegisters[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;

for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
{
auto& attr = bufferGroup.attrib[j];

uint32 semanticId = shaderContext->output->resourceMappingMTL.attributeMapping[attr.semanticId];
if (semanticId == (uint32)-1)
continue; // attribute not used?

std::string formatName;
uint8 componentCount = 0;
switch (GetMtlVertexFormat(attr.format))
{
case MTL::VertexFormatUChar:
formatName = "uchar";
componentCount = 1;
break;
case MTL::VertexFormatUChar2:
formatName = "uchar2";
componentCount = 2;
break;
case MTL::VertexFormatUChar3:
formatName = "uchar3";
componentCount = 3;
break;
case MTL::VertexFormatUChar4:
formatName = "uchar4";
componentCount = 4;
break;
case MTL::VertexFormatUShort:
formatName = "ushort";
componentCount = 1;
break;
case MTL::VertexFormatUShort2:
formatName = "ushort2";
componentCount = 2;
break;
case MTL::VertexFormatUShort3:
formatName = "ushort3";
componentCount = 3;
break;
case MTL::VertexFormatUShort4:
formatName = "ushort4";
componentCount = 4;
break;
case MTL::VertexFormatUInt:
formatName = "uint";
componentCount = 1;
break;
case MTL::VertexFormatUInt2:
formatName = "uint2";
componentCount = 2;
break;
case MTL::VertexFormatUInt3:
formatName = "uint3";
componentCount = 3;
break;
case MTL::VertexFormatUInt4:
formatName = "uint4";
componentCount = 4;
break;
}

// Fetch the attribute
inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId);
inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName);
inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex);
inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset);
for (uint8 i = 0; i < (4 - componentCount); i++)
inputFetchDefinition += ", 0";
inputFetchDefinition += ");\n";

if (fetchType.has_value())
cemu_assert_debug(fetchType == attr.fetchType);
else
fetchType = attr.fetchType;

if (attr.fetchType == LatteConst::INSTANCE_DATA)
{
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
}
}

// TODO: fetch type

vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex);
}

inputFetchDefinition += "return in;\n";
inputFetchDefinition += "}\n";

src->add(vertexBufferDefinitions.c_str());
src->add("\n");
src->add(vertexBuffers.c_str());
src->add("\n");
src->add(inputFetchDefinition.c_str());

functionType = "[[object, max_total_threads_per_threadgroup(VERTICES_PER_VERTEX_PRIMITIVE), max_total_threadgroups_per_mesh_grid(1)]]";
outputTypeName = "void";
Expand Down Expand Up @@ -3916,7 +4035,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
// TODO: don't hardcode the instance index
src->add("uint iid = 0;" _CRLF);
// Fetch the input
src->add("VertexIn in = fetchInput(vid VERTEX_BUFFERS);" _CRLF);
src->add("VertexIn in = fetchVertex(vid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF);
// Output is defined as object payload
src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,9 +281,11 @@ namespace LatteDecompiler
{
if ((decompilerContext->shader->pixelColorOutputMask & (1 << i)) != 0)
{
src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(i));
src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetColorAttachmentTypeStr(i), i, i);
src->add("#endif" _CRLF);
auto dataType = GetColorBufferDataType(i, *decompilerContext->contextRegistersNew);
if (dataType != MetalDataType::NONE)
{
src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetDataTypeStr(dataType), i, i);
}
}
}

Expand Down Expand Up @@ -495,6 +497,10 @@ namespace LatteDecompiler
src->add(", mesh_grid_properties meshGridProperties");
src->add(", uint tig [[threadgroup_position_in_grid]]");
src->add(", uint tid [[thread_index_in_threadgroup]]");
// TODO: inly include index buffer if needed
src->addFmt(", device uint* indexBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexBufferBinding);
// TODO: use uchar?
src->addFmt(", constant uint& indexType [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexTypeBinding);
src->add(" VERTEX_BUFFER_DEFINITIONS");
}
else
Expand Down
1 change: 1 addition & 0 deletions src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM
auto pixelFormat = GetMtlPixelFormat(format, isDepth, m_mtlr->GetPixelFormatSupport());
desc->setPixelFormat(pixelFormat);

// TODO: using MTL::TextureUsageShaderWrite as well fixes Mario Tennis: Ultra Smash, investigate why
MTL::TextureUsage usage = MTL::TextureUsageShaderRead;
if (!Latte::IsCompressedFormat(format))
{
Expand Down
Loading
Loading