Skip to content

Commit

Permalink
implement async shader compilation
Browse files Browse the repository at this point in the history
  • Loading branch information
SamoZ256 committed Oct 10, 2024
1 parent bfd9059 commit bdfac96
Show file tree
Hide file tree
Showing 3 changed files with 177 additions and 20 deletions.
12 changes: 11 additions & 1 deletion src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,14 +223,16 @@ void MetalRenderer::ResizeLayer(const Vector2i& size, bool mainWindow)
void MetalRenderer::Initialize()
{
Renderer::Initialize();
RendererShaderMtl::Initialize();
}

void MetalRenderer::Shutdown()
{
// TODO: should shutdown both layers
ImGui_ImplMetal_Shutdown();
Renderer::Shutdown();
CommitCommandBuffer();
Renderer::Shutdown();
RendererShaderMtl::Shutdown();
}

bool MetalRenderer::IsPadWindowActive()
Expand Down Expand Up @@ -935,13 +937,21 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
return;
}

// TODO: special state 8 and 5

auto& encoderState = m_state.m_encoderState;

// Shaders
LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader();
if (vertexShader && !vertexShader->shader->IsCompiled())
return;
LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader();
if (geometryShader && !geometryShader->shader->IsCompiled())
return;
LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader();
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
if (vertexShader && !pixelShader->shader->IsCompiled())
return;

bool neverSkipAccurateBarrier = false;

Expand Down
151 changes: 144 additions & 7 deletions src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp
Original file line number Diff line number Diff line change
@@ -1,42 +1,179 @@
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
//#include "Cemu/FileCache/FileCache.h"
//#include "config/ActiveSettings.h"

#include "Cemu/Logging/CemuLogging.h"
#include "Common/precompiled.h"
#include "config/CemuConfig.h"
#include "util/helpers/helpers.h"

extern std::atomic_int g_compiled_shaders_total;
extern std::atomic_int g_compiled_shaders_async;

class ShaderMtlThreadPool
{
public:
void StartThreads()
{
if (m_threadsActive.exchange(true))
return;
// create thread pool
const uint32 threadCount = 2;
for (uint32 i = 0; i < threadCount; ++i)
s_threads.emplace_back(&ShaderMtlThreadPool::CompilerThreadFunc, this);
}

void StopThreads()
{
if (!m_threadsActive.exchange(false))
return;
for (uint32 i = 0; i < s_threads.size(); ++i)
s_compilationQueueCount.increment();
for (auto& it : s_threads)
it.join();
s_threads.clear();
}

~ShaderMtlThreadPool()
{
StopThreads();
}

void CompilerThreadFunc()
{
SetThreadName("mtlShaderComp");
while (m_threadsActive.load(std::memory_order::relaxed))
{
s_compilationQueueCount.decrementWithWait();
s_compilationQueueMutex.lock();
if (s_compilationQueue.empty())
{
// queue empty again, shaders compiled synchronously via PreponeCompilation()
s_compilationQueueMutex.unlock();
continue;
}
RendererShaderMtl* job = s_compilationQueue.front();
s_compilationQueue.pop_front();
// set compilation state
cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderMtl::COMPILATION_STATE::QUEUED);
job->m_compilationState.setValue(RendererShaderMtl::COMPILATION_STATE::COMPILING);
s_compilationQueueMutex.unlock();
// compile
job->CompileInternal();
++g_compiled_shaders_async;
// mark as compiled
cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderMtl::COMPILATION_STATE::COMPILING);
job->m_compilationState.setValue(RendererShaderMtl::COMPILATION_STATE::DONE);
}
}

bool HasThreadsRunning() const { return m_threadsActive; }

public:
std::vector<std::thread> s_threads;

std::deque<RendererShaderMtl*> s_compilationQueue;
CounterSemaphore s_compilationQueueCount;
std::mutex s_compilationQueueMutex;

private:
std::atomic<bool> m_threadsActive;
} shaderMtlThreadPool;

void RendererShaderMtl::Initialize()
{
shaderMtlThreadPool.StartThreads();
}

void RendererShaderMtl::Shutdown()
{
shaderMtlThreadPool.StopThreads();
}

RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode)
: RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer}
: RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer}, m_mslCode{mslCode}
{
// start async compilation
shaderMtlThreadPool.s_compilationQueueMutex.lock();
m_compilationState.setValue(COMPILATION_STATE::QUEUED);
shaderMtlThreadPool.s_compilationQueue.push_back(this);
shaderMtlThreadPool.s_compilationQueueCount.increment();
shaderMtlThreadPool.s_compilationQueueMutex.unlock();
cemu_assert_debug(shaderMtlThreadPool.HasThreadsRunning()); // make sure .StartThreads() was called
}

RendererShaderMtl::~RendererShaderMtl()
{
if (m_function)
m_function->release();
}

void RendererShaderMtl::PreponeCompilation(bool isRenderThread)
{
shaderMtlThreadPool.s_compilationQueueMutex.lock();
bool isStillQueued = m_compilationState.hasState(COMPILATION_STATE::QUEUED);
if (isStillQueued)
{
// remove from queue
shaderMtlThreadPool.s_compilationQueue.erase(std::remove(shaderMtlThreadPool.s_compilationQueue.begin(), shaderMtlThreadPool.s_compilationQueue.end(), this), shaderMtlThreadPool.s_compilationQueue.end());
m_compilationState.setValue(COMPILATION_STATE::COMPILING);
}
shaderMtlThreadPool.s_compilationQueueMutex.unlock();
if (!isStillQueued)
{
m_compilationState.waitUntilValue(COMPILATION_STATE::DONE);
--g_compiled_shaders_async; // compilation caused a stall so we don't consider this one async
return;
}
else
{
// compile synchronously
CompileInternal();
m_compilationState.setValue(COMPILATION_STATE::DONE);
}
}

bool RendererShaderMtl::IsCompiled()
{
return m_compilationState.hasState(COMPILATION_STATE::DONE);
};

bool RendererShaderMtl::WaitForCompiled()
{
m_compilationState.waitUntilValue(COMPILATION_STATE::DONE);
return true;
}

void RendererShaderMtl::CompileInternal()
{
MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init();
// TODO: always disable fast math for problematic shaders
if (GetConfig().fast_math)
options->setFastMathEnabled(true);

NS::Error* error = nullptr;
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), options, &error);
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(m_mslCode), options, &error);
options->release();
if (error)
{
cemuLog_log(LogType::Force, "failed to create library: {} -> {}", error->localizedDescription()->utf8String(), mslCode.c_str());
cemuLog_log(LogType::Force, "failed to create library: {} -> {}", error->localizedDescription()->utf8String(), m_mslCode.c_str());
error->release();
FinishCompilation();
return;
}
m_function = library->newFunction(ToNSString("main0"));
library->release();

FinishCompilation();

// Count shader compilation
g_compiled_shaders_total++;
}

RendererShaderMtl::~RendererShaderMtl()
void RendererShaderMtl::FinishCompilation()
{
if (m_function)
m_function->release();
m_mslCode.clear();
m_mslCode.shrink_to_fit();
}
34 changes: 22 additions & 12 deletions src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,26 @@
#include "HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "util/helpers/ConcurrentQueue.h"
#include "util/helpers/Semaphore.h"

#include <Metal/Metal.hpp>

class RendererShaderMtl : public RendererShader
{
//enum class COMPILATION_STATE : uint32
//{
// NONE,
// QUEUED,
// COMPILING,
// DONE
//};
friend class ShaderMtlThreadPool;

enum class COMPILATION_STATE : uint32
{
NONE,
QUEUED,
COMPILING,
DONE
};

public:
static void Initialize();
static void Shutdown();

RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode);
virtual ~RendererShaderMtl();

Expand All @@ -42,15 +48,19 @@ class RendererShaderMtl : public RendererShader
cemu_assert_suspicious();
}

// TODO: implement this
void PreponeCompilation(bool isRenderThread) override {}
bool IsCompiled() override { return true; }
bool WaitForCompiled() override { return true; }
void PreponeCompilation(bool isRenderThread) override;
bool IsCompiled() override;
bool WaitForCompiled() override;

private:
class MetalRenderer* m_mtlr;

MTL::Function* m_function = nullptr;

void Compile(const std::string& mslCode);
StateSemaphore<COMPILATION_STATE> m_compilationState{ COMPILATION_STATE::NONE };

std::string m_mslCode;

void CompileInternal();
void FinishCompilation();
};

0 comments on commit bdfac96

Please sign in to comment.