diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 7ba3d519f..6aeac2c1b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -11,13 +11,26 @@ static x86Assembler64::GPR32 _reg32(IMLReg physReg) { cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32); - return (x86Assembler64::GPR32)physReg.GetRegID(); + IMLRegID regId = physReg.GetRegID(); + cemu_assert_debug(regId < 16); + return (x86Assembler64::GPR32)regId; } static uint32 _reg64(IMLReg physReg) { cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I64); - return physReg.GetRegID(); + IMLRegID regId = physReg.GetRegID(); + cemu_assert_debug(regId < 16); + return regId; +} + +uint32 _regF64(IMLReg physReg) +{ + cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::F64); + IMLRegID regId = physReg.GetRegID(); + cemu_assert_debug(regId >= IMLArchX86::PHYSREG_FPR_BASE && regId < IMLArchX86::PHYSREG_FPR_BASE+16); + regId -= IMLArchX86::PHYSREG_FPR_BASE; + return regId; } static x86Assembler64::GPR8_REX _reg8(IMLReg physReg) @@ -1233,111 +1246,192 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; - auto regR = _reg64(imlInstruction->op_r_name.regR); - - if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) - { - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0)); - } - else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) + if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64) { - sint32 sprIndex = (name - PPCREC_NAME_SPR0); - if (sprIndex == SPR_LR) - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); - else if (sprIndex == SPR_CTR) - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); - else if (sprIndex == SPR_XER) - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); - else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) + auto regR = _reg64(imlInstruction->op_r_name.regR); + if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32) + { + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0)); + } + else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999) + { + sint32 sprIndex = (name - PPCREC_NAME_SPR0); + if (sprIndex == SPR_LR) + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); + else if (sprIndex == SPR_CTR) + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); + else if (sprIndex == SPR_XER) + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); + else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) + { + sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, memOffset); + } + else + assert_dbg(); + } + else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) + { + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); + } + else if (name == PPCREC_NAME_XER_CA) + { + x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); + } + else if (name == PPCREC_NAME_XER_SO) + { + x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so)); + } + else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) + { + x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)); + } + else if (name == PPCREC_NAME_CPU_MEMRES_EA) { - sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, memOffset); + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr)); + } + else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + { + x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue)); } else assert_dbg(); } - else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) - { - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); - } - else if (name == PPCREC_NAME_XER_CA) - { - x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - } - else if (name == PPCREC_NAME_XER_SO) - { - x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so)); - } - else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) - { - x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)); - } - else if (name == PPCREC_NAME_CPU_MEMRES_EA) - { - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr)); - } - else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64) { - x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue)); + auto regR = _regF64(imlInstruction->op_r_name.regR); + if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32)) + { + x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); + } + else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) + { + x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); + } + else + { + cemu_assert_debug(false); + } } else - assert_dbg(); + DEBUG_BREAK; + } void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; - auto regR = _reg64(imlInstruction->op_r_name.regR); - - if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) - { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), regR); - } - else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) + + if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64) { - uint32 sprIndex = (name - PPCREC_NAME_SPR0); - if (sprIndex == SPR_LR) - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), regR); - else if (sprIndex == SPR_CTR) - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), regR); - else if (sprIndex == SPR_XER) - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), regR); - else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) + auto regR = _reg64(imlInstruction->op_r_name.regR); + if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32) + { + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0), regR); + } + else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999) + { + uint32 sprIndex = (name - PPCREC_NAME_SPR0); + if (sprIndex == SPR_LR) + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), regR); + else if (sprIndex == SPR_CTR) + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), regR); + else if (sprIndex == SPR_XER) + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), regR); + else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) + { + sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, memOffset, regR); + } + else + assert_dbg(); + } + else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, memOffset, regR); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), regR); + } + else if (name == PPCREC_NAME_XER_CA) + { + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg64(regR)); + } + else if (name == PPCREC_NAME_XER_SO) + { + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg64(regR)); + } + else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) + { + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg64(regR)); + } + else if (name == PPCREC_NAME_CPU_MEMRES_EA) + { + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), regR); + } + else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + { + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), regR); } else - assert_dbg(); - } - else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) - { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), regR); - } - else if (name == PPCREC_NAME_XER_CA) - { - x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg64(regR)); - } - else if (name == PPCREC_NAME_XER_SO) - { - x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg64(regR)); - } - else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) - { - x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg64(regR)); - } - else if (name == PPCREC_NAME_CPU_MEMRES_EA) - { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), regR); + assert_dbg(); } - else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64) { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), regR); + auto regR = _regF64(imlInstruction->op_r_name.regR); + uint32 name = imlInstruction->op_r_name.name; + if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32)) + { + x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); + } + else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) + { + x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); + } + else + { + cemu_assert_debug(false); + } } else - assert_dbg(); + DEBUG_BREAK; + + } +//void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +//{ +// uint32 name = imlInstruction->op_r_name.name; +// uint32 fprReg = _regF64(imlInstruction->op_r_name.regR); +// if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32)) +// { +// x64Gen_movupd_xmmReg_memReg128(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); +// } +// else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) +// { +// x64Gen_movupd_xmmReg_memReg128(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); +// } +// else +// { +// cemu_assert_debug(false); +// } +//} +// +//void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +//{ +// uint32 name = imlInstruction->op_r_name.name; +// uint32 fprReg = _regF64(imlInstruction->op_r_name.regR); +// if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32)) +// { +// x64Gen_movupd_memReg128_xmmReg(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); +// } +// else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) +// { +// x64Gen_movupd_memReg128_xmmReg(x64GenContext, fprReg, X86_REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); +// } +// else +// { +// cemu_assert_debug(false); +// } +//} + uint8* codeMemoryBlock = nullptr; sint32 codeMemoryBlockIndex = 0; sint32 codeMemoryBlockSize = 0; diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index 0942842de..8db27e41e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -5,11 +5,7 @@ #include "asm/x64util.h" // for recompiler_fres / frsqrte -uint32 _regF64(IMLReg r) -{ - cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::F64); - return (uint32)r.GetRegID(); -} +uint32 _regF64(IMLReg physReg); uint32 _regI32(IMLReg r) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index f1820f570..0f1a0803b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -15,8 +15,8 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction); // optimizer passes // todo - rename -bool PPCRecompiler_reduceNumberOfFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext); -bool PPCRecompiler_manageFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext); +//bool PPCRecompiler_reduceNumberOfFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext); +//bool PPCRecompiler_manageFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_optimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_optimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 9511a5a7c..a6b4925c8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -41,22 +41,36 @@ const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml) return _tempOpcodename; } -void IMLDebug_AppendRegisterParam(StringBuf& strOutput, IMLReg virtualRegister, bool isLast = false) +std::string IMLDebug_GetRegName(IMLReg r) { - uint32 regId = virtualRegister.GetRegID(); - DEBUG_BREAK; // todo (print type) - if (isLast) + std::string regName; + uint32 regId = r.GetRegID(); + switch (r.GetRegFormat()) { - if (regId < 10) - strOutput.addFmt("t{} ", regId); - else - strOutput.addFmt("t{}", regId); - return; + case IMLRegFormat::F32: + regName.append("f"); + break; + case IMLRegFormat::F64: + regName.append("fd"); + break; + case IMLRegFormat::I32: + regName.append("i"); + break; + case IMLRegFormat::I64: + regName.append("r"); + break; + default: + __debugbreak(); } - if (regId < 10) - strOutput.addFmt("t{} , ", regId); - else - strOutput.addFmt("t{}, ", regId); + regName.append(fmt::format("{}", regId)); + return regName; +} + +void IMLDebug_AppendRegisterParam(StringBuf& strOutput, IMLReg virtualRegister, bool isLast = false) +{ + strOutput.add(IMLDebug_GetRegName(virtualRegister)); + if (!isLast) + strOutput.add(", "); } void IMLDebug_AppendS32Param(StringBuf& strOutput, sint32 val, bool isLast = false) @@ -149,12 +163,6 @@ std::string IMLDebug_GetConditionName(IMLCondition cond) return "ukn"; } -std::string IMLDebug_GetRegName(IMLReg r) -{ - cemu_assert_unimplemented(); - return ""; -} - void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo) { StringBuf strOutput(1024); @@ -197,19 +205,24 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool if (inst.type == PPCREC_IML_TYPE_R_NAME || inst.type == PPCREC_IML_TYPE_NAME_R) { if (inst.type == PPCREC_IML_TYPE_R_NAME) - strOutput.add("LD_NAME"); + strOutput.add("R_NAME"); else - strOutput.add("ST_NAME"); + strOutput.add("NAME_R"); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR); + if(inst.type == PPCREC_IML_TYPE_R_NAME) + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR); - strOutput.addFmt("name_{} (", inst.op_r_name.regR.GetRegID()); + strOutput.add("name_"); if (inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0 + 999)) { strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0); } + else if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999)) + { + strOutput.addFmt("f{}", inst.op_r_name.name - PPCREC_NAME_FPR0); + } else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999)) { strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0); @@ -227,8 +240,15 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_VAL) strOutput.add("cpuReservation.value"); else - strOutput.add("ukn"); - strOutput.add(")"); + { + strOutput.addFmt("name_ukn{}", inst.op_r_name.name); + } + if (inst.type != PPCREC_IML_TYPE_R_NAME) + { + strOutput.add(", "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR, true); + } + } else if (inst.type == PPCREC_IML_TYPE_R_R) { @@ -281,7 +301,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool } else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) { - strOutput.add("CJUMP2 "); + strOutput.add("CJUMP "); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); IMLDebug_AppendRegisterParam(strOutput, inst.op_conditional_jump.registerBool, true); @@ -342,9 +362,9 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData); if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) - strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.registerMem2.GetRegID()); + strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), IMLDebug_GetRegName(inst.op_storeLoad.registerMem2)); else - strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32); + strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32); } else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { @@ -366,7 +386,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool { if (inst.operation == PPCREC_IML_MACRO_B_TO_REG) { - strOutput.addFmt("MACRO B_TO_REG t{}", inst.op_macro.param); + strOutput.addFmt("MACRO B_TO_REG {}", IMLDebug_GetRegName(inst.op_macro.paramReg)); } else if (inst.operation == PPCREC_IML_MACRO_BL) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 61939a244..d50ed1052 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -638,78 +638,64 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& tr } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { - + op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_NAME_R) { - + op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { - if (op_storeLoad.registerMem.IsValid()) - { - op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); - } - if (op_storeLoad.registerGQR.IsValid()) - { - op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); - } + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) { - if (op_storeLoad.registerMem.IsValid()) - { - op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); - } - if (op_storeLoad.registerMem2.IsValid()) - { - op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); - } - if (op_storeLoad.registerGQR.IsValid()) - { - op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); - } + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); + op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_STORE) { - if (op_storeLoad.registerMem.IsValid()) - { - op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); - } - if (op_storeLoad.registerGQR.IsValid()) - { - op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); - } + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) { - if (op_storeLoad.registerMem.IsValid()) - { - op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); - } - if (op_storeLoad.registerMem2.IsValid()) - { - op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); - } - if (op_storeLoad.registerGQR.IsValid()) - { - op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); - } + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); + op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); + } + else if (type == PPCREC_IML_TYPE_FPR_R) + { + op_fpr_r.regR = replaceRegisterIdMultiple(op_fpr_r.regR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_R_R) { + op_fpr_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r.regR, translationTable); + op_fpr_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r.regA, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_R_R_R) { + op_fpr_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r.regR, translationTable); + op_fpr_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r.regA, translationTable); + op_fpr_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r.regB, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) { - } - else if (type == PPCREC_IML_TYPE_FPR_R) - { + op_fpr_r_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regR, translationTable); + op_fpr_r_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regA, translationTable); + op_fpr_r_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regB, translationTable); + op_fpr_r_r_r_r.regC = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regC, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_COMPARE) { + op_fpr_compare.regA = replaceRegisterIdMultiple(op_fpr_compare.regA, translationTable); + op_fpr_compare.regB = replaceRegisterIdMultiple(op_fpr_compare.regB, translationTable); op_fpr_compare.regR = replaceRegisterIdMultiple(op_fpr_compare.regR, translationTable); } else diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index a3f0f652d..8b49cd22e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -388,6 +388,7 @@ struct IMLUsedRegisters template void ForEachAccessedGPR(Fn F) const { + // GPRs if (readGPR1.IsValid()) F(readGPR1, false); if (readGPR2.IsValid()) @@ -398,22 +399,33 @@ struct IMLUsedRegisters F(writtenGPR1, true); if (writtenGPR2.IsValid()) F(writtenGPR2, true); - } - - bool HasSameBaseFPRRegId(IMLRegID regId) const - { - if (readFPR1.IsValid() && readFPR1.GetRegID() == regId) - return true; - if (readFPR2.IsValid() && readFPR2.GetRegID() == regId) - return true; - if (readFPR3.IsValid() && readFPR3.GetRegID() == regId) - return true; - if (readFPR4.IsValid() && readFPR4.GetRegID() == regId) - return true; - if (writtenFPR1.IsValid() && writtenFPR1.GetRegID() == regId) - return true; - return false; - } + // FPRs + if (readFPR1.IsValid()) + F(readFPR1, false); + if (readFPR2.IsValid()) + F(readFPR2, false); + if (readFPR3.IsValid()) + F(readFPR3, false); + if (readFPR4.IsValid()) + F(readFPR4, false); + if (writtenFPR1.IsValid()) + F(writtenFPR1, true); + } + + //bool HasSameBaseFPRRegId(IMLRegID regId) const + //{ + // if (readFPR1.IsValid() && readFPR1.GetRegID() == regId) + // return true; + // if (readFPR2.IsValid() && readFPR2.GetRegID() == regId) + // return true; + // if (readFPR3.IsValid() && readFPR3.GetRegID() == regId) + // return true; + // if (readFPR4.IsValid() && readFPR4.GetRegID() == regId) + // return true; + // if (writtenFPR1.IsValid() && writtenFPR1.GetRegID() == regId) + // return true; + // return false; + //} }; struct IMLInstruction @@ -765,4 +777,11 @@ struct IMLInstruction void ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegisterReplaced[4]); void ReplaceFPR(IMLRegID fprRegisterSearched, IMLRegID fprRegisterReplaced); +}; + +// architecture specific constants +namespace IMLArchX86 +{ + static constexpr int PHYSREG_GPR_BASE = 0; + static constexpr int PHYSREG_FPR_BASE = 16; }; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index 568c0b791..b9449c949 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -6,369 +6,369 @@ #include "../PPCRecompilerIml.h" #include "../BackendX64/BackendX64.h" -bool _RegExceedsFPRSpace(IMLReg r) -{ - if (r.IsInvalid()) - return false; - if (r.GetRegID() >= PPC_X64_FPR_USABLE_REGISTERS) - return true; - return false; -} +//bool _RegExceedsFPRSpace(IMLReg r) +//{ +// if (r.IsInvalid()) +// return false; +// if (r.GetRegID() >= PPC_X64_FPR_USABLE_REGISTERS) +// return true; +// return false; +//} IMLReg _FPRRegFromID(IMLRegID regId) { return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, regId); } -bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) -{ - // only xmm0 to xmm14 may be used, xmm15 is reserved - // this method will reduce the number of fpr registers used - // inefficient algorithm for optimizing away excess registers - // we simply load, use and store excess registers into other unused registers when we need to - // first we remove all name load and store instructions that involve out-of-bounds registers - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - size_t imlIndex = 0; - while( imlIndex < segIt->imlList.size() ) - { - IMLInstruction& imlInstructionItr = segIt->imlList[imlIndex]; - if( imlInstructionItr.type == PPCREC_IML_TYPE_FPR_R_NAME || imlInstructionItr.type == PPCREC_IML_TYPE_FPR_NAME_R ) - { - if(_RegExceedsFPRSpace(imlInstructionItr.op_r_name.regR)) - { - imlInstructionItr.make_no_op(); - } - } - imlIndex++; - } - } - // replace registers - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - size_t imlIndex = 0; - while( imlIndex < segIt->imlList.size() ) - { - IMLUsedRegisters registersUsed; - while( true ) - { - segIt->imlList[imlIndex].CheckRegisterUsage(®istersUsed); - if(_RegExceedsFPRSpace(registersUsed.readFPR1) || _RegExceedsFPRSpace(registersUsed.readFPR2) || _RegExceedsFPRSpace(registersUsed.readFPR3) || _RegExceedsFPRSpace(registersUsed.readFPR4) || _RegExceedsFPRSpace(registersUsed.writtenFPR1) ) - { - // get index of register to replace - sint32 fprToReplace = -1; - if(_RegExceedsFPRSpace(registersUsed.readFPR1) ) - fprToReplace = registersUsed.readFPR1.GetRegID(); - else if(_RegExceedsFPRSpace(registersUsed.readFPR2) ) - fprToReplace = registersUsed.readFPR2.GetRegID(); - else if (_RegExceedsFPRSpace(registersUsed.readFPR3)) - fprToReplace = registersUsed.readFPR3.GetRegID(); - else if (_RegExceedsFPRSpace(registersUsed.readFPR4)) - fprToReplace = registersUsed.readFPR4.GetRegID(); - else if(_RegExceedsFPRSpace(registersUsed.writtenFPR1) ) - fprToReplace = registersUsed.writtenFPR1.GetRegID(); - if (fprToReplace >= 0) - { - // generate mask of useable registers - uint8 useableRegisterMask = 0x7F; // lowest bit is fpr register 0 - if (registersUsed.readFPR1.IsValid()) - useableRegisterMask &= ~(1 << (registersUsed.readFPR1.GetRegID())); - if (registersUsed.readFPR2.IsValid()) - useableRegisterMask &= ~(1 << (registersUsed.readFPR2.GetRegID())); - if (registersUsed.readFPR3.IsValid()) - useableRegisterMask &= ~(1 << (registersUsed.readFPR3.GetRegID())); - if (registersUsed.readFPR4.IsValid()) - useableRegisterMask &= ~(1 << (registersUsed.readFPR4.GetRegID())); - if (registersUsed.writtenFPR1.IsValid()) - useableRegisterMask &= ~(1 << (registersUsed.writtenFPR1.GetRegID())); - // get highest unused register index (0-6 range) - sint32 unusedRegisterIndex = -1; - for (sint32 f = 0; f < PPC_X64_FPR_USABLE_REGISTERS; f++) - { - if (useableRegisterMask & (1 << f)) - { - unusedRegisterIndex = f; - } - } - if (unusedRegisterIndex == -1) - assert_dbg(); - // determine if the placeholder register is actually used (if not we must not load/store it) - uint32 unusedRegisterName = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; - bool replacedRegisterIsUsed = true; - if (unusedRegisterName >= PPCREC_NAME_FPR0 && unusedRegisterName < (PPCREC_NAME_FPR0 + 32)) - { - replacedRegisterIsUsed = segIt->ppcFPRUsed[unusedRegisterName - PPCREC_NAME_FPR0]; - } - // replace registers that are out of range - segIt->imlList[imlIndex].ReplaceFPR(fprToReplace, unusedRegisterIndex); - // add load/store name after instruction - PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex + 1, 2); - // add load/store before current instruction - PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex, 2); - // name_unusedRegister = unusedRegister - IMLInstruction* imlInstructionItr = segIt->imlList.data() + (imlIndex + 0); - memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - if (replacedRegisterIsUsed) - { - imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); - imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; - } - else - imlInstructionItr->make_no_op(); - imlInstructionItr = segIt->imlList.data() + (imlIndex + 1); - memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); - imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; - // name_gprToReplace = unusedRegister - imlInstructionItr = segIt->imlList.data() + (imlIndex + 3); - memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); - imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; - // unusedRegister = name_unusedRegister - imlInstructionItr = segIt->imlList.data() + (imlIndex + 4); - memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); - if (replacedRegisterIsUsed) - { - imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); - imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; - } - else - imlInstructionItr->make_no_op(); - } - } - else - break; - } - imlIndex++; - } - } - return true; -} - -typedef struct -{ - bool isActive; - uint32 virtualReg; - sint32 lastUseIndex; -}ppcRecRegisterMapping_t; - -typedef struct -{ - ppcRecRegisterMapping_t currentMapping[PPC_X64_FPR_USABLE_REGISTERS]; - sint32 ppcRegToMapping[64]; - sint32 currentUseIndex; -}ppcRecManageRegisters_t; - -ppcRecRegisterMapping_t* PPCRecompiler_findAvailableRegisterDepr(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters) -{ - // find free register - for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) - { - if (rCtx->currentMapping[i].isActive == false) - { - rCtx->currentMapping[i].isActive = true; - rCtx->currentMapping[i].virtualReg = -1; - rCtx->currentMapping[i].lastUseIndex = rCtx->currentUseIndex; - return rCtx->currentMapping + i; - } - } - // all registers are used - return nullptr; -} - -ppcRecRegisterMapping_t* PPCRecompiler_findUnloadableRegister(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters, uint32 unloadLockedMask) -{ - // find unloadable register (with lowest lastUseIndex) - sint32 unloadIndex = -1; - sint32 unloadIndexLastUse = 0x7FFFFFFF; - for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) - { - if (rCtx->currentMapping[i].isActive == false) - continue; - if( (unloadLockedMask&(1<currentMapping[i].virtualReg; - bool isReserved = instructionUsedRegisters->HasSameBaseFPRRegId(virtualReg); - if (isReserved) - continue; - if (rCtx->currentMapping[i].lastUseIndex < unloadIndexLastUse) - { - unloadIndexLastUse = rCtx->currentMapping[i].lastUseIndex; - unloadIndex = i; - } - } - cemu_assert(unloadIndex != -1); - return rCtx->currentMapping + unloadIndex; -} - -bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenContext, sint32 segmentIndex) -{ - ppcRecManageRegisters_t rCtx = { 0 }; - for (sint32 i = 0; i < 64; i++) - rCtx.ppcRegToMapping[i] = -1; - IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; - size_t idx = 0; - sint32 currentUseIndex = 0; - IMLUsedRegisters registersUsed; - while (idx < imlSegment->imlList.size()) - { - IMLInstruction& idxInst = imlSegment->imlList[idx]; - if (idxInst.IsSuffixInstruction()) - break; - idxInst.CheckRegisterUsage(®istersUsed); - IMLReg fprMatch[4]; - IMLReg fprReplace[4]; - fprMatch[0] = IMLREG_INVALID; - fprMatch[1] = IMLREG_INVALID; - fprMatch[2] = IMLREG_INVALID; - fprMatch[3] = IMLREG_INVALID; - fprReplace[0] = IMLREG_INVALID; - fprReplace[1] = IMLREG_INVALID; - fprReplace[2] = IMLREG_INVALID; - fprReplace[3] = IMLREG_INVALID; - // generate a mask of registers that we may not free - sint32 numReplacedOperands = 0; - uint32 unloadLockedMask = 0; - for (sint32 f = 0; f < 5; f++) - { - IMLReg virtualFpr; - if (f == 0) - virtualFpr = registersUsed.readFPR1; - else if (f == 1) - virtualFpr = registersUsed.readFPR2; - else if (f == 2) - virtualFpr = registersUsed.readFPR3; - else if (f == 3) - virtualFpr = registersUsed.readFPR4; - else if (f == 4) - virtualFpr = registersUsed.writtenFPR1; - if(virtualFpr.IsInvalid()) - continue; - cemu_assert_debug(virtualFpr.GetBaseFormat() == IMLRegFormat::F64); - cemu_assert_debug(virtualFpr.GetRegFormat() == IMLRegFormat::F64); - cemu_assert_debug(virtualFpr.GetRegID() < 64); - // check if this virtual FPR is already loaded in any real register - ppcRecRegisterMapping_t* regMapping; - if (rCtx.ppcRegToMapping[virtualFpr.GetRegID()] == -1) - { - // not loaded - // find available register - while (true) - { - regMapping = PPCRecompiler_findAvailableRegisterDepr(&rCtx, ®istersUsed); - if (regMapping == NULL) - { - // unload least recently used register and try again - ppcRecRegisterMapping_t* unloadRegMapping = PPCRecompiler_findUnloadableRegister(&rCtx, ®istersUsed, unloadLockedMask); - // mark as locked - unloadLockedMask |= (1<<(unloadRegMapping- rCtx.currentMapping)); - // create unload instruction - PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); - IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; - memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); - imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; - imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(unloadRegMapping - rCtx.currentMapping)); - imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unloadRegMapping->virtualReg]; - idx++; - // update mapping - unloadRegMapping->isActive = false; - rCtx.ppcRegToMapping[unloadRegMapping->virtualReg] = -1; - } - else - break; - } - // create load instruction - PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); - IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; - memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); - imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_R_NAME; - imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(regMapping-rCtx.currentMapping)); - imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[virtualFpr.GetRegID()]; - idx++; - // update mapping - regMapping->virtualReg = virtualFpr.GetRegID(); - rCtx.ppcRegToMapping[virtualFpr.GetRegID()] = (sint32)(regMapping - rCtx.currentMapping); - regMapping->lastUseIndex = rCtx.currentUseIndex; - rCtx.currentUseIndex++; - } - else - { - regMapping = rCtx.currentMapping + rCtx.ppcRegToMapping[virtualFpr.GetRegID()]; - regMapping->lastUseIndex = rCtx.currentUseIndex; - rCtx.currentUseIndex++; - } - // replace FPR - bool entryFound = false; - for (sint32 t = 0; t < numReplacedOperands; t++) - { - if (fprMatch[t].IsValid() && fprMatch[t].GetRegID() == virtualFpr.GetRegID()) - { - cemu_assert_debug(fprReplace[t] == _FPRRegFromID(regMapping - rCtx.currentMapping)); - entryFound = true; - break; - } - } - if (entryFound == false) - { - cemu_assert_debug(numReplacedOperands != 4); - fprMatch[numReplacedOperands] = virtualFpr; - fprReplace[numReplacedOperands] = _FPRRegFromID(regMapping - rCtx.currentMapping); - numReplacedOperands++; - } - } - if (numReplacedOperands > 0) - { - imlSegment->imlList[idx].ReplaceFPRs(fprMatch, fprReplace); - } - // next - idx++; - } - // count loaded registers - sint32 numLoadedRegisters = 0; - for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) - { - if (rCtx.currentMapping[i].isActive) - numLoadedRegisters++; - } - // store all loaded registers - if (numLoadedRegisters > 0) - { - PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, numLoadedRegisters); - for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) - { - if (rCtx.currentMapping[i].isActive == false) - continue; - IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; - memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); - imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; - imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionTemp->op_r_name.regR = _FPRRegFromID(i); - imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[rCtx.currentMapping[i].virtualReg]; - idx++; - } - } - return true; -} - -bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) -{ - for (sint32 s = 0; s < ppcImlGenContext->segmentList2.size(); s++) - { - if (PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext, s) == false) - return false; - } - return true; -} +//bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) +//{ +// // only xmm0 to xmm14 may be used, xmm15 is reserved +// // this method will reduce the number of fpr registers used +// // inefficient algorithm for optimizing away excess registers +// // we simply load, use and store excess registers into other unused registers when we need to +// // first we remove all name load and store instructions that involve out-of-bounds registers +// for (IMLSegment* segIt : ppcImlGenContext->segmentList2) +// { +// size_t imlIndex = 0; +// while( imlIndex < segIt->imlList.size() ) +// { +// IMLInstruction& imlInstructionItr = segIt->imlList[imlIndex]; +// if( imlInstructionItr.type == PPCREC_IML_TYPE_FPR_R_NAME || imlInstructionItr.type == PPCREC_IML_TYPE_FPR_NAME_R ) +// { +// if(_RegExceedsFPRSpace(imlInstructionItr.op_r_name.regR)) +// { +// imlInstructionItr.make_no_op(); +// } +// } +// imlIndex++; +// } +// } +// // replace registers +// for (IMLSegment* segIt : ppcImlGenContext->segmentList2) +// { +// size_t imlIndex = 0; +// while( imlIndex < segIt->imlList.size() ) +// { +// IMLUsedRegisters registersUsed; +// while( true ) +// { +// segIt->imlList[imlIndex].CheckRegisterUsage(®istersUsed); +// if(_RegExceedsFPRSpace(registersUsed.readFPR1) || _RegExceedsFPRSpace(registersUsed.readFPR2) || _RegExceedsFPRSpace(registersUsed.readFPR3) || _RegExceedsFPRSpace(registersUsed.readFPR4) || _RegExceedsFPRSpace(registersUsed.writtenFPR1) ) +// { +// // get index of register to replace +// sint32 fprToReplace = -1; +// if(_RegExceedsFPRSpace(registersUsed.readFPR1) ) +// fprToReplace = registersUsed.readFPR1.GetRegID(); +// else if(_RegExceedsFPRSpace(registersUsed.readFPR2) ) +// fprToReplace = registersUsed.readFPR2.GetRegID(); +// else if (_RegExceedsFPRSpace(registersUsed.readFPR3)) +// fprToReplace = registersUsed.readFPR3.GetRegID(); +// else if (_RegExceedsFPRSpace(registersUsed.readFPR4)) +// fprToReplace = registersUsed.readFPR4.GetRegID(); +// else if(_RegExceedsFPRSpace(registersUsed.writtenFPR1) ) +// fprToReplace = registersUsed.writtenFPR1.GetRegID(); +// if (fprToReplace >= 0) +// { +// // generate mask of useable registers +// uint8 useableRegisterMask = 0x7F; // lowest bit is fpr register 0 +// if (registersUsed.readFPR1.IsValid()) +// useableRegisterMask &= ~(1 << (registersUsed.readFPR1.GetRegID())); +// if (registersUsed.readFPR2.IsValid()) +// useableRegisterMask &= ~(1 << (registersUsed.readFPR2.GetRegID())); +// if (registersUsed.readFPR3.IsValid()) +// useableRegisterMask &= ~(1 << (registersUsed.readFPR3.GetRegID())); +// if (registersUsed.readFPR4.IsValid()) +// useableRegisterMask &= ~(1 << (registersUsed.readFPR4.GetRegID())); +// if (registersUsed.writtenFPR1.IsValid()) +// useableRegisterMask &= ~(1 << (registersUsed.writtenFPR1.GetRegID())); +// // get highest unused register index (0-6 range) +// sint32 unusedRegisterIndex = -1; +// for (sint32 f = 0; f < PPC_X64_FPR_USABLE_REGISTERS; f++) +// { +// if (useableRegisterMask & (1 << f)) +// { +// unusedRegisterIndex = f; +// } +// } +// if (unusedRegisterIndex == -1) +// assert_dbg(); +// // determine if the placeholder register is actually used (if not we must not load/store it) +// uint32 unusedRegisterName = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; +// bool replacedRegisterIsUsed = true; +// if (unusedRegisterName >= PPCREC_NAME_FPR0 && unusedRegisterName < (PPCREC_NAME_FPR0 + 32)) +// { +// replacedRegisterIsUsed = segIt->ppcFPRUsed[unusedRegisterName - PPCREC_NAME_FPR0]; +// } +// // replace registers that are out of range +// segIt->imlList[imlIndex].ReplaceFPR(fprToReplace, unusedRegisterIndex); +// // add load/store name after instruction +// PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex + 1, 2); +// // add load/store before current instruction +// PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex, 2); +// // name_unusedRegister = unusedRegister +// IMLInstruction* imlInstructionItr = segIt->imlList.data() + (imlIndex + 0); +// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); +// if (replacedRegisterIsUsed) +// { +// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; +// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; +// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); +// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; +// } +// else +// imlInstructionItr->make_no_op(); +// imlInstructionItr = segIt->imlList.data() + (imlIndex + 1); +// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); +// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; +// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; +// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); +// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; +// // name_gprToReplace = unusedRegister +// imlInstructionItr = segIt->imlList.data() + (imlIndex + 3); +// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); +// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; +// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; +// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); +// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; +// // unusedRegister = name_unusedRegister +// imlInstructionItr = segIt->imlList.data() + (imlIndex + 4); +// memset(imlInstructionItr, 0x00, sizeof(IMLInstruction)); +// if (replacedRegisterIsUsed) +// { +// imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; +// imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; +// imlInstructionItr->op_r_name.regR = _FPRRegFromID(unusedRegisterIndex); +// imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; +// } +// else +// imlInstructionItr->make_no_op(); +// } +// } +// else +// break; +// } +// imlIndex++; +// } +// } +// return true; +//} +// +//typedef struct +//{ +// bool isActive; +// uint32 virtualReg; +// sint32 lastUseIndex; +//}ppcRecRegisterMapping_t; +// +//typedef struct +//{ +// ppcRecRegisterMapping_t currentMapping[PPC_X64_FPR_USABLE_REGISTERS]; +// sint32 ppcRegToMapping[64]; +// sint32 currentUseIndex; +//}ppcRecManageRegisters_t; +// +//ppcRecRegisterMapping_t* PPCRecompiler_findAvailableRegisterDepr(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters) +//{ +// // find free register +// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) +// { +// if (rCtx->currentMapping[i].isActive == false) +// { +// rCtx->currentMapping[i].isActive = true; +// rCtx->currentMapping[i].virtualReg = -1; +// rCtx->currentMapping[i].lastUseIndex = rCtx->currentUseIndex; +// return rCtx->currentMapping + i; +// } +// } +// // all registers are used +// return nullptr; +//} +// +//ppcRecRegisterMapping_t* PPCRecompiler_findUnloadableRegister(ppcRecManageRegisters_t* rCtx, IMLUsedRegisters* instructionUsedRegisters, uint32 unloadLockedMask) +//{ +// // find unloadable register (with lowest lastUseIndex) +// sint32 unloadIndex = -1; +// sint32 unloadIndexLastUse = 0x7FFFFFFF; +// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) +// { +// if (rCtx->currentMapping[i].isActive == false) +// continue; +// if( (unloadLockedMask&(1<currentMapping[i].virtualReg; +// bool isReserved = instructionUsedRegisters->HasSameBaseFPRRegId(virtualReg); +// if (isReserved) +// continue; +// if (rCtx->currentMapping[i].lastUseIndex < unloadIndexLastUse) +// { +// unloadIndexLastUse = rCtx->currentMapping[i].lastUseIndex; +// unloadIndex = i; +// } +// } +// cemu_assert(unloadIndex != -1); +// return rCtx->currentMapping + unloadIndex; +//} +// +//bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenContext, sint32 segmentIndex) +//{ +// ppcRecManageRegisters_t rCtx = { 0 }; +// for (sint32 i = 0; i < 64; i++) +// rCtx.ppcRegToMapping[i] = -1; +// IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; +// size_t idx = 0; +// sint32 currentUseIndex = 0; +// IMLUsedRegisters registersUsed; +// while (idx < imlSegment->imlList.size()) +// { +// IMLInstruction& idxInst = imlSegment->imlList[idx]; +// if (idxInst.IsSuffixInstruction()) +// break; +// idxInst.CheckRegisterUsage(®istersUsed); +// IMLReg fprMatch[4]; +// IMLReg fprReplace[4]; +// fprMatch[0] = IMLREG_INVALID; +// fprMatch[1] = IMLREG_INVALID; +// fprMatch[2] = IMLREG_INVALID; +// fprMatch[3] = IMLREG_INVALID; +// fprReplace[0] = IMLREG_INVALID; +// fprReplace[1] = IMLREG_INVALID; +// fprReplace[2] = IMLREG_INVALID; +// fprReplace[3] = IMLREG_INVALID; +// // generate a mask of registers that we may not free +// sint32 numReplacedOperands = 0; +// uint32 unloadLockedMask = 0; +// for (sint32 f = 0; f < 5; f++) +// { +// IMLReg virtualFpr; +// if (f == 0) +// virtualFpr = registersUsed.readFPR1; +// else if (f == 1) +// virtualFpr = registersUsed.readFPR2; +// else if (f == 2) +// virtualFpr = registersUsed.readFPR3; +// else if (f == 3) +// virtualFpr = registersUsed.readFPR4; +// else if (f == 4) +// virtualFpr = registersUsed.writtenFPR1; +// if(virtualFpr.IsInvalid()) +// continue; +// cemu_assert_debug(virtualFpr.GetBaseFormat() == IMLRegFormat::F64); +// cemu_assert_debug(virtualFpr.GetRegFormat() == IMLRegFormat::F64); +// cemu_assert_debug(virtualFpr.GetRegID() < 64); +// // check if this virtual FPR is already loaded in any real register +// ppcRecRegisterMapping_t* regMapping; +// if (rCtx.ppcRegToMapping[virtualFpr.GetRegID()] == -1) +// { +// // not loaded +// // find available register +// while (true) +// { +// regMapping = PPCRecompiler_findAvailableRegisterDepr(&rCtx, ®istersUsed); +// if (regMapping == NULL) +// { +// // unload least recently used register and try again +// ppcRecRegisterMapping_t* unloadRegMapping = PPCRecompiler_findUnloadableRegister(&rCtx, ®istersUsed, unloadLockedMask); +// // mark as locked +// unloadLockedMask |= (1<<(unloadRegMapping- rCtx.currentMapping)); +// // create unload instruction +// PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); +// IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; +// memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); +// imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; +// imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; +// imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(unloadRegMapping - rCtx.currentMapping)); +// imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unloadRegMapping->virtualReg]; +// idx++; +// // update mapping +// unloadRegMapping->isActive = false; +// rCtx.ppcRegToMapping[unloadRegMapping->virtualReg] = -1; +// } +// else +// break; +// } +// // create load instruction +// PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); +// IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; +// memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); +// imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_R_NAME; +// imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; +// imlInstructionTemp->op_r_name.regR = _FPRRegFromID((uint8)(regMapping-rCtx.currentMapping)); +// imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[virtualFpr.GetRegID()]; +// idx++; +// // update mapping +// regMapping->virtualReg = virtualFpr.GetRegID(); +// rCtx.ppcRegToMapping[virtualFpr.GetRegID()] = (sint32)(regMapping - rCtx.currentMapping); +// regMapping->lastUseIndex = rCtx.currentUseIndex; +// rCtx.currentUseIndex++; +// } +// else +// { +// regMapping = rCtx.currentMapping + rCtx.ppcRegToMapping[virtualFpr.GetRegID()]; +// regMapping->lastUseIndex = rCtx.currentUseIndex; +// rCtx.currentUseIndex++; +// } +// // replace FPR +// bool entryFound = false; +// for (sint32 t = 0; t < numReplacedOperands; t++) +// { +// if (fprMatch[t].IsValid() && fprMatch[t].GetRegID() == virtualFpr.GetRegID()) +// { +// cemu_assert_debug(fprReplace[t] == _FPRRegFromID(regMapping - rCtx.currentMapping)); +// entryFound = true; +// break; +// } +// } +// if (entryFound == false) +// { +// cemu_assert_debug(numReplacedOperands != 4); +// fprMatch[numReplacedOperands] = virtualFpr; +// fprReplace[numReplacedOperands] = _FPRRegFromID(regMapping - rCtx.currentMapping); +// numReplacedOperands++; +// } +// } +// if (numReplacedOperands > 0) +// { +// imlSegment->imlList[idx].ReplaceFPRs(fprMatch, fprReplace); +// } +// // next +// idx++; +// } +// // count loaded registers +// sint32 numLoadedRegisters = 0; +// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) +// { +// if (rCtx.currentMapping[i].isActive) +// numLoadedRegisters++; +// } +// // store all loaded registers +// if (numLoadedRegisters > 0) +// { +// PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, numLoadedRegisters); +// for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) +// { +// if (rCtx.currentMapping[i].isActive == false) +// continue; +// IMLInstruction* imlInstructionTemp = imlSegment->imlList.data() + idx; +// memset(imlInstructionTemp, 0x00, sizeof(IMLInstruction)); +// imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; +// imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; +// imlInstructionTemp->op_r_name.regR = _FPRRegFromID(i); +// imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[rCtx.currentMapping[i].virtualReg]; +// idx++; +// } +// } +// return true; +//} +// +//bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) +//{ +// for (sint32 s = 0; s < ppcImlGenContext->segmentList2.size(); s++) +// { +// if (PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext, s) == false) +// return false; +// } +// return true; +//} /* @@ -663,11 +663,13 @@ void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenCont } } +IMLName PPCRecompilerImlGen_GetRegName(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg); + sint32 _getGQRIndexFromRegister(ppcImlGenContext_t* ppcImlGenContext, IMLReg gqrReg) { if (gqrReg.IsInvalid()) return -1; - sint32 namedReg = ppcImlGenContext->mappedRegister[gqrReg.GetRegID()]; + sint32 namedReg = PPCRecompilerImlGen_GetRegName(ppcImlGenContext, gqrReg); if (namedReg >= (PPCREC_NAME_SPR0 + SPR_UGQR0) && namedReg <= (PPCREC_NAME_SPR0 + SPR_UGQR7)) { return namedReg - (PPCREC_NAME_SPR0 + SPR_UGQR0); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 4b6100f72..2fb55c801 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -1100,7 +1100,7 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML if(it.second.isProcessed) continue; IMLRegID regId = it.first; - raLivenessRange_t* range = PPCRecRA_createRangeBase(ctx.deprGenContext, regId, ctx.deprGenContext->mappedRegister[regId]); + raLivenessRange_t* range = PPCRecRA_createRangeBase(ctx.deprGenContext, regId, ctx.raParam->regIdToName.find(regId)->second); PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, range); } // fill created ranges with read/write location indices diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h index 5e0d0f044..52b203970 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h @@ -93,7 +93,8 @@ struct IMLRegisterAllocatorParameters return perTypePhysPool[stdx::to_underlying(regFormat)]; } - IMLPhysRegisterSet perTypePhysPool[stdx::to_underlying(IMLRegFormat::TYPE_COUNT)];// physicalRegisterPool; + IMLPhysRegisterSet perTypePhysPool[stdx::to_underlying(IMLRegFormat::TYPE_COUNT)]; + std::unordered_map regIdToName; }; void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam); \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index a530c85f3..70151422e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -91,7 +91,7 @@ struct IMLSegment bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary) uint32 enterPPCAddress{}; // used if isEnterable is true // PPC FPR use mask - bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR + //bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR // CR use mask uint32 crBitsInput{}; // bits that are expected to be set from the previous segment (read in this segment but not overwritten) uint32 crBitsRead{}; // all bits that are read in this segment diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 7af38c69a..07ba15db4 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -206,8 +206,19 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP // return nullptr; //} - // Large functions for testing (botw): - // 3B4049C + //if (ppcRecFunc->ppcAddress == 0x03C26844) + //{ + // __debugbreak(); + // IMLDebug_Dump(&ppcImlGenContext); + // __debugbreak(); + //} + // 31A8778 + + // Functions for testing (botw): + // 3B4049C (large with switch case) + // 30BF118 (has a bndz copy loop + some float instructions at the end) + + // emit x64 code bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext); @@ -217,8 +228,6 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP } // collect list of PPC-->x64 entry points - cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size()); - entryPointsOut.clear(); for(IMLSegment* imlSegment : ppcImlGenContext.segmentList2) { @@ -230,6 +239,9 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP entryPointsOut.emplace_back(ppcEnterOffset, x64Offset); } + + cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {} Entrypoints: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size(), entryPointsOut.size()); + return ppcRecFunc; } @@ -242,72 +254,85 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) // if GQRs can be predicted, optimize PSQ load/stores PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext); - // count number of used registers - uint32 numLoadedFPRRegisters = 0; - for (uint32 i = 0; i < 255; i++) - { - if (ppcImlGenContext.mappedFPRRegister[i]) - numLoadedFPRRegisters++; - } - // insert name store instructions at the end of each segment but before branch instructions - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - { - if (segIt->imlList.size() == 0) - continue; // ignore empty segments - // analyze segment for register usage - IMLUsedRegisters registersUsed; - for (sint32 i = 0; i < segIt->imlList.size(); i++) - { - segIt->imlList[i].CheckRegisterUsage(®istersUsed); - IMLReg accessedTempReg[5]; - // intermediate FPRs - accessedTempReg[0] = registersUsed.readFPR1; - accessedTempReg[1] = registersUsed.readFPR2; - accessedTempReg[2] = registersUsed.readFPR3; - accessedTempReg[3] = registersUsed.readFPR4; - accessedTempReg[4] = registersUsed.writtenFPR1; - for (sint32 f = 0; f < 5; f++) - { - if (accessedTempReg[f].IsInvalid()) - continue; - uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f].GetRegID()]; - if (regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0 + 32) - { - segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true; - } - } - } - } + //for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + //{ + // if (segIt->imlList.size() == 0) + // continue; // ignore empty segments + // // analyze segment for register usage + // IMLUsedRegisters registersUsed; + // for (sint32 i = 0; i < segIt->imlList.size(); i++) + // { + // segIt->imlList[i].CheckRegisterUsage(®istersUsed); + // IMLReg accessedTempReg[5]; + // // intermediate FPRs + // accessedTempReg[0] = registersUsed.readFPR1; + // accessedTempReg[1] = registersUsed.readFPR2; + // accessedTempReg[2] = registersUsed.readFPR3; + // accessedTempReg[3] = registersUsed.readFPR4; + // accessedTempReg[4] = registersUsed.writtenFPR1; + // for (sint32 f = 0; f < 5; f++) + // { + // if (accessedTempReg[f].IsInvalid()) + // continue; + // uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f].GetRegID()]; + // if (regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0 + 32) + // { + // segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true; + // } + // } + // } + //} // merge certain float load+store patterns (must happen before FPR register remapping) PPCRecompiler_optimizeDirectFloatCopies(&ppcImlGenContext); // delay byte swapping for certain load+store patterns PPCRecompiler_optimizeDirectIntegerCopies(&ppcImlGenContext); - if (numLoadedFPRRegisters > 0) - { - if (PPCRecompiler_manageFPRRegisters(&ppcImlGenContext) == false) - { - return false; - } - } + //if (numLoadedFPRRegisters > 0) + //{ + // if (PPCRecompiler_manageFPRRegisters(&ppcImlGenContext) == false) + // { + // return false; + // } + //} IMLRegisterAllocatorParameters raParam; + for (auto& it : ppcImlGenContext.mappedRegs) + raParam.regIdToName.try_emplace(it.second.GetRegID(), it.first); + auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64); - gprPhysPool.SetAvailable(X86_REG_RAX); - gprPhysPool.SetAvailable(X86_REG_RDX); - gprPhysPool.SetAvailable(X86_REG_RBX); - gprPhysPool.SetAvailable(X86_REG_RBP); - gprPhysPool.SetAvailable(X86_REG_RSI); - gprPhysPool.SetAvailable(X86_REG_RDI); - gprPhysPool.SetAvailable(X86_REG_R8); - gprPhysPool.SetAvailable(X86_REG_R9); - gprPhysPool.SetAvailable(X86_REG_R10); - gprPhysPool.SetAvailable(X86_REG_R11); - gprPhysPool.SetAvailable(X86_REG_R12); - gprPhysPool.SetAvailable(X86_REG_RCX); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RBX); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RBP); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RSI); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDI); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R9); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R10); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R11); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R12); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX); + + // add XMM registers, except XMM15 which is the temporary register + auto& fprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::F64); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 0); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 1); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 2); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 3); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 4); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 5); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 6); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 7); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 8); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 9); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 10); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 11); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 12); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 13); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 14); IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 7f9817aac..080ce2fa0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -42,9 +42,12 @@ struct ppcImlGenContext_t // cycle counter uint32 cyclesSinceLastBranch; // used to track ppc cycles // temporary general purpose registers - uint32 mappedRegister[PPC_REC_MAX_VIRTUAL_GPR]; + //uint32 mappedRegister[PPC_REC_MAX_VIRTUAL_GPR]; // temporary floating point registers (single and double precision) - uint32 mappedFPRRegister[256]; + //uint32 mappedFPRRegister[256]; + + std::unordered_map mappedRegs; + // list of segments std::vector segmentList2; // code generation control diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 042cf0675..5d30267d5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -102,4 +102,3 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o // IML general void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext); - diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index f89edfe35..f474b0156 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -134,74 +134,73 @@ void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, P basicBlockInfo.appendSegment = segMerge; } -uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) -{ - if( mappedName == PPCREC_NAME_NONE ) +IMLReg PPCRecompilerImlGen_LookupReg(ppcImlGenContext_t* ppcImlGenContext, IMLName mappedName, IMLRegFormat regFormat) +{ + auto it = ppcImlGenContext->mappedRegs.find(mappedName); + if (it != ppcImlGenContext->mappedRegs.end()) + return it->second; + // create new reg entry + IMLRegFormat baseFormat; + if (regFormat == IMLRegFormat::F64) + baseFormat = IMLRegFormat::F64; + else if (regFormat == IMLRegFormat::I32) + baseFormat = IMLRegFormat::I64; + else { - debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(): Invalid mappedName parameter\n"); - return PPC_REC_INVALID_REGISTER; + cemu_assert_suspicious(); } - for(uint32 i=0; i<(PPC_REC_MAX_VIRTUAL_GPR-1); i++) - { - if( ppcImlGenContext->mappedRegister[i] == PPCREC_NAME_NONE ) - { - ppcImlGenContext->mappedRegister[i] = mappedName; - return i; - } - } - return 0; + IMLRegID newRegId = ppcImlGenContext->mappedRegs.size(); + IMLReg newReg(baseFormat, regFormat, 0, newRegId); + ppcImlGenContext->mappedRegs.try_emplace(mappedName, newReg); + return newReg; } -uint32 PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) +IMLName PPCRecompilerImlGen_GetRegName(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg) { - for(uint32 i=0; i< PPC_REC_MAX_VIRTUAL_GPR; i++) + for (auto& it : ppcImlGenContext->mappedRegs) { - if( ppcImlGenContext->mappedRegister[i] == mappedName ) - { - return i; - } + if (it.second.GetRegID() == reg.GetRegID()) + return it.first; } - return PPC_REC_INVALID_REGISTER; + cemu_assert(false); + return 0; } uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - if( mappedName == PPCREC_NAME_NONE ) - { - debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(): Invalid mappedName parameter\n"); - return PPC_REC_INVALID_REGISTER; - } - for(uint32 i=0; i<255; i++) - { - if( ppcImlGenContext->mappedFPRRegister[i] == PPCREC_NAME_NONE ) - { - ppcImlGenContext->mappedFPRRegister[i] = mappedName; - return i; - } - } + __debugbreak(); + //if( mappedName == PPCREC_NAME_NONE ) + //{ + // debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(): Invalid mappedName parameter\n"); + // return PPC_REC_INVALID_REGISTER; + //} + //for(uint32 i=0; i<255; i++) + //{ + // if( ppcImlGenContext->mappedFPRRegister[i] == PPCREC_NAME_NONE ) + // { + // ppcImlGenContext->mappedFPRRegister[i] = mappedName; + // return i; + // } + //} return 0; } uint32 PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - for(uint32 i=0; i<255; i++) - { - if( ppcImlGenContext->mappedFPRRegister[i] == mappedName ) - { - return i; - } - } + __debugbreak(); + //for(uint32 i=0; i<255; i++) + //{ + // if( ppcImlGenContext->mappedFPRRegister[i] == mappedName ) + // { + // return i; + // } + //} return PPC_REC_INVALID_REGISTER; } IMLReg PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - uint32 loadedRegisterIndex = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, mappedName); - if (loadedRegisterIndex != PPC_REC_INVALID_REGISTER) - return IMLReg(IMLRegFormat::I64, IMLRegFormat::I32, 0, loadedRegisterIndex); - - uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, mappedName); - return IMLReg(IMLRegFormat::I64, IMLRegFormat::I32, 0, registerIndex); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::I32); } IMLReg _GetRegGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 index) @@ -243,14 +242,15 @@ IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index) */ IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew) { - if( loadNew == false ) - { - uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName); - if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) - return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex); - } - uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName); - return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex); + //if( loadNew == false ) + //{ + // uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName); + // if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) + // return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex); + //} + //uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName); + //return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::F64); } /* @@ -259,11 +259,12 @@ IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, */ IMLReg PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName); - if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) - return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex); - uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName); - return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex); + //uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName); + //if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) + // return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, loadedRegisterIndex); + //uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName); + //return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, registerIndex); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::F64); } bool PPCRecompiler_canInlineFunction(MPTR functionPtr, sint32* functionInstructionCount) diff --git a/src/util/helpers/StringBuf.h b/src/util/helpers/StringBuf.h index 432fa7a1d..8b34e54f7 100644 --- a/src/util/helpers/StringBuf.h +++ b/src/util/helpers/StringBuf.h @@ -44,7 +44,8 @@ class StringBuf void add(std::string_view appendedStr) { - size_t remainingLen = this->limit - this->length; + if (this->length + appendedStr.size() + 1 >= this->limit) + _reserve(std::max(this->length + appendedStr.size() + 64, this->limit + this->limit / 2)); size_t copyLen = appendedStr.size(); if (remainingLen < copyLen) copyLen = remainingLen; @@ -80,6 +81,13 @@ class StringBuf } private: + void _reserve(uint32 newLimit) + { + cemu_assert_debug(newLimit > length); + this->str = (uint8*)realloc(this->str, newLimit + 4); + this->limit = newLimit; + } + uint8* str; uint32 length; /* in bytes */ uint32 limit; /* in bytes */