Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Experimental] PPU LLVM: Recycle identical functions #15308

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 132 additions & 0 deletions rpcs3/Emu/Cell/PPUAnalyser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2078,6 +2078,138 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
}

ppu_log.notice("Block analysis: %zu blocks (%zu enqueued)", funcs.size(), block_queue.size());

std::unordered_map<std::string_view, std::pair<u32, u32>> duplicate_data_map;
duplicate_map.clear();

for (auto& func : funcs)
{
if (func.size == 0 || func.size > 10000u)
{
continue;
}

auto& data = duplicate_data_map[std::string_view{get_ptr<char>(func.addr), func.size}];

const usz count = data.first;

if (!count)
{
data.first++;
data.second = func.addr;
continue;
}

if (!data.second)
{
continue;
}

if (count == 1)
{
const u32 faddr = func.addr;
const u32 fend = func.addr + func.size;

bool fail = false;

//for (const auto [addr, size] : func.blocks)
const u32 addr = func.addr;
const u32 size = func.size;
{
if (size == 0)
{
continue;
}

auto i_ptr = ensure(get_ptr<u32>(addr));

for (u32 i = addr; i < fend; i += 4, i_ptr++)
{
/ const ppu_opcode_t op{*i_ptr};
const auto itype = s_ppu_itype.decode(op.opcode);

if (itype != ppu_itype::BC && itype != ppu_itype::B)
{
if (i == fend - 4)
{
if (!(itype & ppu_itype::branch))
{
// Inserts a branch to following code
fail = true;
break;
}
}

continue;
}

if (!op.aa)
{
fail = true;
break;
}

if (itype == ppu_itype::BC && (op.bo & 0x14) != 0x14)
{
if (i == fend - 4)
{
// Can branch to next
fail = true;
break;
}
}
}
}

if (fail)
{
data.first = 1;
data.second = 0;
continue;
}
}

data.first++;

// Choose the lowest function as the source
data.second = std::min<u32>(data.second, func.addr);
}

usz dups_count = 0;

for (auto& func : funcs)
{
if (func.size == 0 || func.size > 10000u)
{
continue;
}

const auto data = ::at32(duplicate_data_map, std::string_view{get_ptr<char>(func.addr), func.size});

if (data.first > 1)
{
duplicate_map[func.addr] = data.second;

for (const auto [addr, size] : func.blocks)
{
if (size == 0 || addr >= func.addr + func.size)
{
continue;
}

duplicate_map[addr] = data.second + (addr - func.addr);
}

if (func.addr != data.second)
{
dups_count++;
}

ppu_log.trace("Found PPU function duplicate: func 0x%x vs 0x%x (%d times) (size=%d)", func.addr, data.second, data.first, func.size);
}
}

ppu_log.success("Function duplication count: %d/%d (%g%)", dups_count, duplicate_data_map.size(), dups_count * 100.0 / duplicate_data_map.size());
return true;
}

Expand Down
2 changes: 2 additions & 0 deletions rpcs3/Emu/Cell/PPUAnalyser.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ struct ppu_module
std::vector<ppu_function> funcs{};
std::deque<std::shared_ptr<void>> allocations;
std::map<u32, u32> addr_to_seg_index;
std::unordered_map<u32, u32> duplicate_map;

// Copy info without functions
void copy_part(const ppu_module& info)
Expand All @@ -107,6 +108,7 @@ struct ppu_module
secs = info.secs;
allocations = info.allocations;
addr_to_seg_index = info.addr_to_seg_index;
duplicate_map = info.duplicate_map;
}

bool analyse(u32 lib_toc, u32 entry, u32 end, const std::basic_string<u32>& applied, std::function<bool()> check_aborted = {});
Expand Down
36 changes: 30 additions & 6 deletions rpcs3/Emu/Cell/PPUThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3918,13 +3918,13 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
mself_header hdr{};

if (mself.read(hdr) && hdr.get_count(mself.size()))
{
{
std::set<u64> offs;

for (u32 j = 0; j < hdr.count; j++)
{
mself_record rec{};

std::set<u64> offs;

if (mself.read(rec) && rec.get_pos(mself.size()))
{
if (rec.size <= 0x20)
Expand Down Expand Up @@ -4165,7 +4165,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
}

ppu_log.notice("Failed to precompile '%s' (prx: %s, ovl: %s): Attempting compilation as executable file", path, prx_err, ovl_err);
possible_exec_file_paths.push(path, offset, file_size);
possible_exec_file_paths.push(file_queue[func_i]);
inc_fdone = 0;
}

Expand Down Expand Up @@ -4680,8 +4680,15 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size)
// Copy block or function entry
ppu_function& entry = part.funcs.emplace_back(func);

u32 og_func = entry.addr;

if (auto it = info.duplicate_map.find(entry.addr); it != info.duplicate_map.end())
{
og_func = it->second;
}

// Fixup some information
entry.name = fmt::format("__0x%x", entry.addr - reloc);
entry.name = fmt::format("__0x%x", og_func - reloc);

if (has_mfvscr && g_cfg.core.ppu_set_sat_bit)
{
Expand Down Expand Up @@ -4848,7 +4855,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size)
settings += ppu_settings::contains_symbol_resolver; // Avoid invalidating all modules for this purpose

// Write version, hash, CPU, settings
fmt::append(obj_name, "v6-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
fmt::append(obj_name, "v7-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
}

if (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped())
Expand Down Expand Up @@ -5086,6 +5093,9 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size)
jit_mod.symbol_resolver(vm::g_exec_addr, info.segs[0].addr);

// Find a BLR-only function in order to copy it to all BLRs (some games need it)
bool early_exit = false;

// Get and install function addresses
for (const auto& func : info.funcs)
{
if (func.size == 4 && *info.get_ptr<u32>(func.addr) == ppu_instructions::BLR())
Expand Down Expand Up @@ -5156,6 +5166,11 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
{
if (func.size)
{
if (auto it = module_part.duplicate_map.find(func.addr); it != module_part.duplicate_map.end() && it->second != it->first)
{
continue;
}

const auto f = cast<Function>(_module->getOrInsertFunction(func.name, _func).getCallee());
f->setCallingConv(CallingConv::GHC);
f->addParamAttr(1, llvm::Attribute::NoAlias);
Expand Down Expand Up @@ -5229,6 +5244,15 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co

if (module_part.funcs[fi].size)
{
const u32 faddr = module_part.funcs[fi].addr;
auto it = module_part.duplicate_map.find(faddr);

if (it != module_part.duplicate_map.end() && it->second != faddr)
{
ppu_log.trace("LLVM: Function 0x%x was skipped (duplicate)", faddr);
continue;
}

// Translate
if (const auto func = translator.Translate(module_part.funcs[fi]))
{
Expand Down
Loading