diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index c89d6fa6f087..866387e33e3a 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -9,6 +9,7 @@ #include "PPUInterpreter.h" #include "PPUAnalyser.h" #include "PPUModule.h" +#include "SPURecompiler.h" #include "lv2/sys_sync.h" #include "lv2/sys_prx.h" #include "Utilities/GDBDebugServer.h" @@ -1086,6 +1087,21 @@ extern void ppu_initialize() return; } + // New PPU cache location + const std::string ppu_cache = fmt::format("%sdata/%s/ppu-%s-%s/", + fs::get_config_dir(), + Emu.GetTitleID(), + fmt::base57(_main->sha1), + Emu.GetBoot().substr(Emu.GetBoot().find_last_of('/') + 1)); + + if (!fs::create_path(ppu_cache)) + { + fmt::throw_exception("Failed to create cache directory: %s (%s)", ppu_cache, fs::g_tls_error); + } + + // Initialize SPU cache + spu_cache::initialize(ppu_cache); + // Initialize main module ppu_initialize(*_main); diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 33dedf2143f1..8c43cc1ee61b 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -57,6 +57,7 @@ spu_function_t spu_recompiler::get(u32 lsa) // Initialize if necessary if (!m_spurt) { + m_cache = fxm::get(); m_spurt = fxm::get_always(); } @@ -64,11 +65,12 @@ spu_function_t spu_recompiler::get(u32 lsa) return m_spurt->m_dispatcher[lsa / 4]; } -spu_function_t spu_recompiler::compile(const std::vector& func) +spu_function_t spu_recompiler::compile(std::vector&& func_rv) { // Initialize if necessary if (!m_spurt) { + m_cache = fxm::get(); m_spurt = fxm::get_always(); } @@ -80,16 +82,18 @@ spu_function_t spu_recompiler::compile(const std::vector& func) lock.lock(); } - // Try to find existing function - { - const auto found = m_spurt->m_map.find(func); + // Try to find existing function, register new one if necessary + const auto fn_info = m_spurt->m_map.emplace(std::move(func_rv), nullptr); - if (found != m_spurt->m_map.end() && found->second) - { - return found->second; - } + auto& fn_location = fn_info.first->second; + + if (fn_location) + { + return fn_location; } + auto& func = fn_info.first->first; + using namespace asmjit; SPUDisAsm dis_asm(CPUDisAsm_InterpreterMode); @@ -811,7 +815,7 @@ spu_function_t spu_recompiler::compile(const std::vector& func) } // Register function - m_spurt->m_map[func] = fn; + fn_location = fn; // Generate a dispatcher (übertrampoline) std::vector addrv{func[0]}; @@ -1043,6 +1047,11 @@ spu_function_t spu_recompiler::compile(const std::vector& func) fs::file(Emu.GetCachePath() + "SPUJIT.log", fs::write + fs::append).write(log); } + if (m_cache) + { + m_cache->add(func); + } + return fn; } diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h index ac393ca48b62..358863ec563b 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h @@ -39,7 +39,7 @@ class spu_recompiler : public spu_recompiler_base virtual spu_function_t get(u32 lsa) override; - virtual spu_function_t compile(const std::vector& func) override; + virtual spu_function_t compile(std::vector&&) override; private: // emitter: diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index e83ede6f128c..d1be6a5bd0af 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -1,14 +1,16 @@ -#include "stdafx.h" +#include "stdafx.h" #include "Emu/System.h" #include "Emu/IdManager.h" #include "Emu/Memory/Memory.h" #include "Crypto/sha1.h" +#include "Utilities/StrUtil.h" #include "SPUThread.h" #include "SPUAnalyser.h" #include "SPUInterpreter.h" #include "SPUDisAsm.h" #include "SPURecompiler.h" +#include "PPUAnalyser.h" #include #include #include @@ -17,6 +19,146 @@ extern u64 get_system_time(); const spu_decoder s_spu_itype; +spu_cache::spu_cache(const std::string& loc) + : m_file(loc, fs::read + fs::write + fs::create) +{ +} + +spu_cache::~spu_cache() +{ +} + +std::vector> spu_cache::get() +{ + std::vector> result; + + if (!m_file) + { + return result; + } + + m_file.seek(0); + + // TODO: signal truncated or otherwise broken file + while (true) + { + be_t size; + be_t addr; + std::vector func; + + if (!m_file.read(size) || !m_file.read(addr)) + { + break; + } + + func.resize(size + 1); + func[0] = addr; + + if (m_file.read(func.data() + 1, func.size() * 4 - 4) != func.size() * 4 - 4) + { + break; + } + + result.emplace_back(std::move(func)); + } + + return result; +} + +void spu_cache::add(const std::vector& func) +{ + if (!m_file) + { + return; + } + + be_t size = ::size32(func) - 1; + be_t addr = func[0]; + m_file.write(size); + m_file.write(addr); + m_file.write(func.data() + 1, func.size() * 4 - 4); +} + +void spu_cache::initialize(const std::string& ppu_cache) +{ + const auto _main = fxm::check(); + + if (!_main || !g_cfg.core.spu_shared_runtime) + { + return; + } + + // SPU cache file (version + block size type) + const std::string loc = ppu_cache + u8"spu-§" + fmt::to_lower(g_cfg.core.spu_block_size.to_string()) + "-v0.dat"; + + auto cache = std::make_shared(loc); + + if (!*cache) + { + LOG_ERROR(SPU, "Failed to initialize SPU cache: %s", loc); + return; + } + + // Read cache + auto func_list = cache->get(); + + if (!func_list.empty()) + { + // Recompiler instance for cache initialization + std::unique_ptr compiler; + + if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit) + { + compiler = spu_recompiler_base::make_asmjit_recompiler(); + } + + if (g_cfg.core.spu_decoder == spu_decoder_type::llvm) + { + compiler = spu_recompiler_base::make_llvm_recompiler(); + } + + if (compiler) + { + // Fake LS + std::vector> ls(0x10000); + + // Build functions + for (auto&& func : func_list) + { + // Initialize LS with function data only + for (u32 i = 1, pos = func[0]; i < func.size(); i++, pos += 4) + { + ls[pos / 4] = se_storage::swap(func[i]); + } + + // Call analyser + std::vector func2 = compiler->block(ls.data(), func[0]); + + compiler->compile(std::move(func)); + + // Clear fake LS + for (u32 i = 1, pos = func2[0]; i < func2.size(); i++, pos += 4) + { + if (se_storage::swap(func2[i]) != ls[pos / 4]) + { + LOG_ERROR(SPU, "[0x%05x] SPU Analyser failed at 0x%x", func2[0], pos); + } + + ls[pos / 4] = 0; + } + } + + LOG_SUCCESS(SPU, "SPU Runtime: Built %u cached functions.", func_list.size()); + } + } + + // Register cache instance + fxm::import([&]() -> std::shared_ptr&& + { + return std::move(cache); + }); +} + spu_recompiler_base::spu_recompiler_base() { } @@ -54,14 +196,14 @@ void spu_recompiler_base::dispatch(SPUThread& spu, void*, u8* rip) } // Compile - verify(HERE), spu.jit->compile(block(spu, spu.pc, &spu.jit->m_block_info)); + verify(HERE), spu.jit->compile(spu.jit->block(spu._ptr(0), spu.pc)); spu.jit_dispatcher[spu.pc / 4] = spu.jit->get(spu.pc); } void spu_recompiler_base::branch(SPUThread& spu, void*, u8* rip) { // Compile - const auto func = verify(HERE, spu.jit->compile(block(spu, spu.pc, &spu.jit->m_block_info))); + const auto func = verify(HERE, spu.jit->compile(spu.jit->block(spu._ptr(0), spu.pc))); spu.jit_dispatcher[spu.pc / 4] = spu.jit->get(spu.pc); // Overwrite jump to this function with jump to the compiled function @@ -102,23 +244,16 @@ void spu_recompiler_base::branch(SPUThread& spu, void*, u8* rip) #endif } -std::vector spu_recompiler_base::block(SPUThread& spu, u32 lsa, std::bitset<0x10000>* out_info) +std::vector spu_recompiler_base::block(const be_t* ls, u32 lsa) { - // Block info (local) - std::bitset<0x10000> block_info{}; - - // Select one to use - std::bitset<0x10000>& blocks = out_info ? *out_info : block_info; - - if (out_info) - { - out_info->reset(); - } - // Result: addr + raw instruction data std::vector result; result.reserve(256); result.push_back(lsa); + + // Initialize block entries + std::bitset<0x10000>& blocks = m_block_info; + blocks.reset(); blocks.set(lsa / 4); // Simple block entry workload list @@ -140,13 +275,6 @@ std::vector spu_recompiler_base::block(SPUThread& spu, u32 lsa, std::bitset // Associated constant values for 32-bit preferred slot std::array values; - if (spu.pc == lsa && g_cfg.core.spu_block_size == spu_block_size_type::giga) - { - // TODO: use current register values for speculations - vflags[0] = +vf::is_const; - values[0] = spu.gpr[0]._u32[3]; - } - for (u32 wi = 0; wi < wl.size();) { const auto next_block = [&] @@ -172,7 +300,7 @@ std::vector spu_recompiler_base::block(SPUThread& spu, u32 lsa, std::bitset }; const u32 pos = wl[wi]; - const u32 data = spu._ref(pos); + const u32 data = ls[pos / 4]; const auto op = spu_opcode_t{data}; wl[wi] += 4; @@ -272,7 +400,7 @@ std::vector spu_recompiler_base::block(SPUThread& spu, u32 lsa, std::bitset for (u32 i = start; i < limit; i += 4) { - const u32 target = spu._ref(i); + const u32 target = ls[i / 4]; if (target == 0 || target % 4) { @@ -542,7 +670,7 @@ std::vector spu_recompiler_base::block(SPUThread& spu, u32 lsa, std::bitset if (result[i] == 0) { const u32 pos = lsa + (i - 1) * 4; - const u32 data = spu._ref(pos); + const u32 data = ls[pos / 4]; const auto type = s_spu_itype.decode(data); // Allow only NOP or LNOP instructions in holes @@ -795,6 +923,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Initialize if necessary if (!m_spurt) { + m_cache = fxm::get(); m_spurt = fxm::get_always(); m_context = m_spurt->m_jit.get_context(); } @@ -803,11 +932,12 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator return m_spurt->m_dispatcher[lsa / 4]; } - virtual spu_function_t compile(const std::vector& func) override + virtual spu_function_t compile(std::vector&& func_rv) override { // Initialize if necessary if (!m_spurt) { + m_cache = fxm::get(); m_spurt = fxm::get_always(); m_context = m_spurt->m_jit.get_context(); } @@ -820,14 +950,18 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator lock.lock(); } - // Try to find existing function, register new - auto& fn_location = m_spurt->m_map[func]; + // Try to find existing function, register new one if necessary + const auto fn_info = m_spurt->m_map.emplace(std::move(func_rv), nullptr); + + auto& fn_location = fn_info.first->second; if (fn_location) { return fn_location; } + auto& func = fn_info.first->first; + std::string hash; { sha1_context ctx; @@ -1262,6 +1396,12 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator fs::file(Emu.GetCachePath() + "SPU.log", fs::write + fs::append).write(log); } + // Cache function bytes (TODO: enable once severe bugs are fixed) + if (m_cache && false) + { + m_cache->add(func); + } + return fn; } diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 83ca5a1e5a6a..df8478ca288e 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -1,7 +1,33 @@ #pragma once +#include "Utilities/File.h" #include "SPUThread.h" +#include +#include #include +#include + +// Helper class +class spu_cache +{ + fs::file m_file; + +public: + spu_cache(const std::string& loc); + + ~spu_cache(); + + operator bool() const + { + return m_file.operator bool(); + } + + std::vector> get(); + + void add(const std::vector& func); + + static void initialize(const std::string& ppu_cache); +}; // SPU Recompiler instance base class class spu_recompiler_base @@ -11,6 +37,8 @@ class spu_recompiler_base std::bitset<0x10000> m_block_info; + std::shared_ptr m_cache; + public: spu_recompiler_base(); @@ -20,7 +48,7 @@ class spu_recompiler_base virtual spu_function_t get(u32 lsa) = 0; // Compile function - virtual spu_function_t compile(const std::vector& func) = 0; + virtual spu_function_t compile(std::vector&&) = 0; // Default dispatch function fallback (second arg is unused) static void dispatch(SPUThread&, void*, u8* rip); @@ -29,7 +57,7 @@ class spu_recompiler_base static void branch(SPUThread&, void*, u8* rip); // Get the block at specified address - static std::vector block(SPUThread&, u32 lsa, std::bitset<0x10000>* = nullptr); + std::vector block(const be_t* ls, u32 lsa); // Create recompiler instance (ASMJIT) static std::unique_ptr make_asmjit_recompiler();