diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index ffd635a03c..67585ed2a9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -82,6 +82,36 @@ X86Cond _x86Cond(IMLCondition imlCond) return X86_CONDITION_Z; } +X86Cond _x86CondInverted(IMLCondition imlCond) +{ + switch (imlCond) + { + case IMLCondition::EQ: + return X86_CONDITION_NZ; + case IMLCondition::NEQ: + return X86_CONDITION_Z; + case IMLCondition::UNSIGNED_GT: + return X86_CONDITION_BE; + case IMLCondition::UNSIGNED_LT: + return X86_CONDITION_NB; + case IMLCondition::SIGNED_GT: + return X86_CONDITION_LE; + case IMLCondition::SIGNED_LT: + return X86_CONDITION_NL; + default: + break; + } + cemu_assert_suspicious(); + return X86_CONDITION_Z; +} + +X86Cond _x86Cond(IMLCondition imlCond, bool condIsInverted) +{ + if (condIsInverted) + return _x86CondInverted(imlCond); + return _x86Cond(imlCond); +} + /* * Remember current instruction output offset for reloc * The instruction generated after this method has been called will be adjusted @@ -638,6 +668,10 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } } + else if( imlInstruction->operation == PPCREC_IML_OP_X86_CMP) + { + x64GenContext->emitter->CMP_dd(regR, regA); + } else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ ) { if( regR != regA ) @@ -680,6 +714,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, cemu_assert_debug((imlInstruction->op_r_immS32.immS32 & 0x80) == 0); x64Gen_rol_reg64Low32_imm8(x64GenContext, regR, (uint8)imlInstruction->op_r_immS32.immS32); } + else if( imlInstruction->operation == PPCREC_IML_OP_X86_CMP) + { + sint32 imm = imlInstruction->op_r_immS32.immS32; + x64GenContext->emitter->CMP_di32(regR, imm); + } else { debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation); @@ -1082,6 +1121,13 @@ bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, return true; } +void PPCRecompilerX64Gen_imlInstruction_x86_eflags_jcc(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment) +{ + X86Cond cond = _x86Cond(imlInstruction->op_x86_eflags_jcc.cond, imlInstruction->op_x86_eflags_jcc.invertedCondition); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); + x64GenContext->emitter->Jcc_j32(cond, 0); +} + bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment) { PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); @@ -1504,6 +1550,10 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo if (PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false) codeGenerationFailed = true; } + else if(imlInstruction->type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) + { + PPCRecompilerX64Gen_imlInstruction_x86_eflags_jcc(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt); + } else if (imlInstruction->type == PPCREC_IML_TYPE_JUMP) { if (PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index b58fdfa8d7..98c48a849b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -11,6 +11,9 @@ void IMLOptimizer_OptimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGen void IMLOptimizer_OptimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext); +void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext); + // debug +void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& disassemblyLineOut); void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo = false); void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo = false); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index d295f0aa86..192f06a1aa 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -75,12 +75,14 @@ void IMLDebug_AppendRegisterParam(StringBuf& strOutput, IMLReg virtualRegister, void IMLDebug_AppendS32Param(StringBuf& strOutput, sint32 val, bool isLast = false) { - if (isLast) + if (val < 0) { - strOutput.addFmt("0x{:08x}", val); - return; + strOutput.add("-"); + val = -val; } - strOutput.addFmt("0x{:08x}, ", val); + strOutput.addFmt("0x{:08x}", val); + if (!isLast) + strOutput.add(", "); } void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* imlSegment, sint32 offset) @@ -163,323 +165,332 @@ std::string IMLDebug_GetConditionName(IMLCondition cond) return "ukn"; } -void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo) +void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& disassemblyLineOut) { - StringBuf strOutput(1024); - - strOutput.addFmt("SEGMENT {} | PPC=0x{:08x} Loop-depth {}", IMLDebug_GetSegmentName(ctx, imlSegment), imlSegment->ppcAddress, imlSegment->loopDepth); - if (imlSegment->isEnterable) - { - strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress); - } - //else if (imlSegment->isJumpDestination) - //{ - // strOutput.addFmt(" JUMP-DEST (0x{:08x})", imlSegment->jumpDestinationPPCAddress); - //} - - debug_printf("%s\n", strOutput.c_str()); - - //strOutput.reset(); - //strOutput.addFmt("SEGMENT NAME 0x{:016x}", (uintptr_t)imlSegment); - //debug_printf("%s", strOutput.c_str()); + const sint32 lineOffsetParameters = 10;//18; - if (printLivenessRangeInfo) - { - strOutput.reset(); - IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START); - debug_printf("%s\n", strOutput.c_str()); - } - //debug_printf("\n"); + StringBuf strOutput(1024); strOutput.reset(); - - sint32 lineOffsetParameters = 18; - - for (sint32 i = 0; i < imlSegment->imlList.size(); i++) + if (inst.type == PPCREC_IML_TYPE_R_NAME || inst.type == PPCREC_IML_TYPE_NAME_R) { - const IMLInstruction& inst = imlSegment->imlList[i]; - // don't log NOP instructions - if (inst.type == PPCREC_IML_TYPE_NO_OP) - continue; - strOutput.reset(); - strOutput.addFmt("{:02x} ", i); - if (inst.type == PPCREC_IML_TYPE_R_NAME || inst.type == PPCREC_IML_TYPE_NAME_R) - { - if (inst.type == PPCREC_IML_TYPE_R_NAME) - strOutput.add("R_NAME"); - else - strOutput.add("NAME_R"); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); + if (inst.type == PPCREC_IML_TYPE_R_NAME) + strOutput.add("R_NAME"); + else + strOutput.add("NAME_R"); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); - if(inst.type == PPCREC_IML_TYPE_R_NAME) - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR); + if(inst.type == PPCREC_IML_TYPE_R_NAME) + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR); - strOutput.add("name_"); - if (inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0 + 999)) - { - strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0); - } - else if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999)) - { - strOutput.addFmt("f{}", inst.op_r_name.name - PPCREC_NAME_FPR0); - } - else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999)) - { - strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0); - } - else if (inst.op_r_name.name >= PPCREC_NAME_CR && inst.op_r_name.name <= PPCREC_NAME_CR_LAST) - strOutput.addFmt("cr{}", inst.op_r_name.name - PPCREC_NAME_CR); - else if (inst.op_r_name.name == PPCREC_NAME_XER_CA) - strOutput.add("xer.ca"); - else if (inst.op_r_name.name == PPCREC_NAME_XER_SO) - strOutput.add("xer.so"); - else if (inst.op_r_name.name == PPCREC_NAME_XER_OV) - strOutput.add("xer.ov"); - else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_EA) - strOutput.add("cpuReservation.ea"); - else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_VAL) - strOutput.add("cpuReservation.value"); - else - { - strOutput.addFmt("name_ukn{}", inst.op_r_name.name); - } - if (inst.type != PPCREC_IML_TYPE_R_NAME) - { - strOutput.add(", "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR, true); - } - - } - else if (inst.type == PPCREC_IML_TYPE_R_R) - { - strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regR); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regA, true); - } - else if (inst.type == PPCREC_IML_TYPE_R_R_R) - { - strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regR); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regA); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regB, true); - } - else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY) + strOutput.add("name_"); + if (inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0 + 999)) { - strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regR); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regA); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regB); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regCarry, true); + strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0); } - else if (inst.type == PPCREC_IML_TYPE_COMPARE) + else if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999)) { - strOutput.add("CMP "); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regA); - IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regB); - strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare.cond)); - strOutput.add(" -> "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regR, true); + strOutput.addFmt("f{}", inst.op_r_name.name - PPCREC_NAME_FPR0); } - else if (inst.type == PPCREC_IML_TYPE_COMPARE_S32) + else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999)) { - strOutput.add("CMP "); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regA); - strOutput.addFmt("{}", inst.op_compare_s32.immS32); - strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare_s32.cond)); - strOutput.add(" -> "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regR, true); + strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0); } - else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + else if (inst.op_r_name.name >= PPCREC_NAME_CR && inst.op_r_name.name <= PPCREC_NAME_CR_LAST) + strOutput.addFmt("cr{}", inst.op_r_name.name - PPCREC_NAME_CR); + else if (inst.op_r_name.name == PPCREC_NAME_XER_CA) + strOutput.add("xer.ca"); + else if (inst.op_r_name.name == PPCREC_NAME_XER_SO) + strOutput.add("xer.so"); + else if (inst.op_r_name.name == PPCREC_NAME_XER_OV) + strOutput.add("xer.ov"); + else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_EA) + strOutput.add("cpuReservation.ea"); + else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_VAL) + strOutput.add("cpuReservation.value"); + else { - strOutput.add("CJUMP "); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_conditional_jump.registerBool, true); - if (!inst.op_conditional_jump.mustBeTrue) - strOutput.add("(inverted)"); + strOutput.addFmt("name_ukn{}", inst.op_r_name.name); } - else if (inst.type == PPCREC_IML_TYPE_JUMP) + if (inst.type != PPCREC_IML_TYPE_R_NAME) { - strOutput.add("JUMP"); + strOutput.add(", "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR, true); } - else if (inst.type == PPCREC_IML_TYPE_R_R_S32) - { - strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regR); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regA); - IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32.immS32, true); - } - else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY) - { - strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); + } + else if (inst.type == PPCREC_IML_TYPE_R_R) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regA, true); + } + else if (inst.type == PPCREC_IML_TYPE_R_R_R) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regB, true); + } + else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regB); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regCarry, true); + } + else if (inst.type == PPCREC_IML_TYPE_COMPARE) + { + strOutput.add("CMP "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regB); + strOutput.addFmt("{}", IMLDebug_GetConditionName(inst.op_compare.cond)); + strOutput.add(" -> "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regR, true); + } + else if (inst.type == PPCREC_IML_TYPE_COMPARE_S32) + { + strOutput.add("CMP "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regA); + strOutput.addFmt("{}", inst.op_compare_s32.immS32); + strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare_s32.cond)); + strOutput.add(" -> "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regR, true); + } + else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + strOutput.add("CJUMP "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_conditional_jump.registerBool, true); + if (!inst.op_conditional_jump.mustBeTrue) + strOutput.add("(inverted)"); + } + else if (inst.type == PPCREC_IML_TYPE_JUMP) + { + strOutput.add("JUMP"); + } + else if (inst.type == PPCREC_IML_TYPE_R_R_S32) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regR); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regA); - IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32_carry.immS32); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regCarry, true); - } - else if (inst.type == PPCREC_IML_TYPE_R_S32) - { - strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regA); + IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32.immS32, true); + } + else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_r_immS32.regR); - IMLDebug_AppendS32Param(strOutput, inst.op_r_immS32.immS32, true); - } - else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE || - inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) - { - if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED) - strOutput.add("LD_"); - else - strOutput.add("ST_"); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regA); + IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32_carry.immS32); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regCarry, true); + } + else if (inst.type == PPCREC_IML_TYPE_R_S32) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_immS32.regR); + IMLDebug_AppendS32Param(strOutput, inst.op_r_immS32.immS32, true); + } + else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE || + inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) + { + if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED) + strOutput.add("LD_"); + else + strOutput.add("ST_"); - if (inst.op_storeLoad.flags2.signExtend) - strOutput.add("S"); - else - strOutput.add("U"); - strOutput.addFmt("{}", inst.op_storeLoad.copyWidth); + if (inst.op_storeLoad.flags2.signExtend) + strOutput.add("S"); + else + strOutput.add("U"); + strOutput.addFmt("{}", inst.op_storeLoad.copyWidth); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData); + IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData); - if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) - strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), IMLDebug_GetRegName(inst.op_storeLoad.registerMem2)); - else - strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32); - } - else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) - { - strOutput.add("ATOMIC_ST_U32"); + if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) + strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), IMLDebug_GetRegName(inst.op_storeLoad.registerMem2)); + else + strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32); + } + else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) + { + strOutput.add("ATOMIC_ST_U32"); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regEA); - IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regCompareValue); - IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regWriteValue); - IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regBoolOut, true); - } - else if (inst.type == PPCREC_IML_TYPE_NO_OP) + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regEA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regCompareValue); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regWriteValue); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regBoolOut, true); + } + else if (inst.type == PPCREC_IML_TYPE_NO_OP) + { + strOutput.add("NOP"); + } + else if (inst.type == PPCREC_IML_TYPE_MACRO) + { + if (inst.operation == PPCREC_IML_MACRO_B_TO_REG) { - strOutput.add("NOP"); + strOutput.addFmt("MACRO B_TO_REG {}", IMLDebug_GetRegName(inst.op_macro.paramReg)); } - else if (inst.type == PPCREC_IML_TYPE_MACRO) + else if (inst.operation == PPCREC_IML_MACRO_BL) { - if (inst.operation == PPCREC_IML_MACRO_B_TO_REG) - { - strOutput.addFmt("MACRO B_TO_REG {}", IMLDebug_GetRegName(inst.op_macro.paramReg)); - } - else if (inst.operation == PPCREC_IML_MACRO_BL) - { - strOutput.addFmt("MACRO BL 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16); - } - else if (inst.operation == PPCREC_IML_MACRO_B_FAR) - { - strOutput.addFmt("MACRO B_FAR 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16); - } - else if (inst.operation == PPCREC_IML_MACRO_LEAVE) - { - strOutput.addFmt("MACRO LEAVE ppc: 0x{:08x}", inst.op_macro.param); - } - else if (inst.operation == PPCREC_IML_MACRO_HLE) - { - strOutput.addFmt("MACRO HLE ppcAddr: 0x{:08x} funcId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); - } - else if (inst.operation == PPCREC_IML_MACRO_MFTB) - { - strOutput.addFmt("MACRO MFTB ppcAddr: 0x{:08x} sprId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); - } - else if (inst.operation == PPCREC_IML_MACRO_COUNT_CYCLES) - { - strOutput.addFmt("MACRO COUNT_CYCLES cycles: {}", inst.op_macro.param); - } - else - { - strOutput.addFmt("MACRO ukn operation {}", inst.operation); - } + strOutput.addFmt("MACRO BL 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16); } - else if (inst.type == PPCREC_IML_TYPE_FPR_LOAD) + else if (inst.operation == PPCREC_IML_MACRO_B_FAR) { - strOutput.addFmt("{} = ", IMLDebug_GetRegName(inst.op_storeLoad.registerData)); - if (inst.op_storeLoad.flags2.signExtend) - strOutput.add("S"); - else - strOutput.add("U"); - strOutput.addFmt("{} [{}+{}] mode {}", inst.op_storeLoad.copyWidth / 8, IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32, inst.op_storeLoad.mode); - if (inst.op_storeLoad.flags2.notExpanded) - { - strOutput.addFmt(" "); - } + strOutput.addFmt("MACRO B_FAR 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16); } - else if (inst.type == PPCREC_IML_TYPE_FPR_STORE) + else if (inst.operation == PPCREC_IML_MACRO_LEAVE) { - if (inst.op_storeLoad.flags2.signExtend) - strOutput.add("S"); - else - strOutput.add("U"); - strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32); - strOutput.addFmt(" = {} mode {}", IMLDebug_GetRegName(inst.op_storeLoad.registerData), inst.op_storeLoad.mode); + strOutput.addFmt("MACRO LEAVE ppc: 0x{:08x}", inst.op_macro.param); } - else if (inst.type == PPCREC_IML_TYPE_FPR_R_R) + else if (inst.operation == PPCREC_IML_MACRO_HLE) { - strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("{}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r.regA)); + strOutput.addFmt("MACRO HLE ppcAddr: 0x{:08x} funcId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); } - else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R) + else if (inst.operation == PPCREC_IML_MACRO_MFTB) { - strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("{}, {}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regB), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regC)); + strOutput.addFmt("MACRO MFTB ppcAddr: 0x{:08x} sprId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); } - else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R) + else if (inst.operation == PPCREC_IML_MACRO_COUNT_CYCLES) { - strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("{}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regB)); + strOutput.addFmt("MACRO COUNT_CYCLES cycles: {}", inst.op_macro.param); } - else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + else { - strOutput.addFmt("CYCLE_CHECK"); + strOutput.addFmt("MACRO ukn operation {}", inst.operation); } - else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) + } + else if (inst.type == PPCREC_IML_TYPE_FPR_LOAD) + { + strOutput.addFmt("{} = ", IMLDebug_GetRegName(inst.op_storeLoad.registerData)); + if (inst.op_storeLoad.flags2.signExtend) + strOutput.add("S"); + else + strOutput.add("U"); + strOutput.addFmt("{} [{}+{}] mode {}", inst.op_storeLoad.copyWidth / 8, IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32, inst.op_storeLoad.mode); + if (inst.op_storeLoad.flags2.notExpanded) { - strOutput.addFmt("{} ", IMLDebug_GetRegName(inst.op_conditional_r_s32.regR)); - bool displayAsHex = false; - if (inst.operation == PPCREC_IML_OP_ASSIGN) - { - displayAsHex = true; - strOutput.add("="); - } - else - strOutput.addFmt("(unknown operation CONDITIONAL_R_S32 {})", inst.operation); - if (displayAsHex) - strOutput.addFmt(" 0x{:x}", inst.op_conditional_r_s32.immS32); - else - strOutput.addFmt(" {}", inst.op_conditional_r_s32.immS32); - strOutput.add(" (conditional)"); + strOutput.addFmt(" "); } + } + else if (inst.type == PPCREC_IML_TYPE_FPR_STORE) + { + if (inst.op_storeLoad.flags2.signExtend) + strOutput.add("S"); else + strOutput.add("U"); + strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32); + strOutput.addFmt(" = {} mode {}", IMLDebug_GetRegName(inst.op_storeLoad.registerData), inst.op_storeLoad.mode); + } + else if (inst.type == PPCREC_IML_TYPE_FPR_R_R) + { + strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("{}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r.regA)); + } + else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R) + { + strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("{}, {}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regB), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regC)); + } + else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R) + { + strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("{}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regB)); + } + else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + { + strOutput.addFmt("CYCLE_CHECK"); + } + else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) + { + strOutput.addFmt("{} ", IMLDebug_GetRegName(inst.op_conditional_r_s32.regR)); + bool displayAsHex = false; + if (inst.operation == PPCREC_IML_OP_ASSIGN) { - strOutput.addFmt("Unknown iml type {}", inst.type); + displayAsHex = true; + strOutput.add("="); } - debug_printf("%s", strOutput.c_str()); + else + strOutput.addFmt("(unknown operation CONDITIONAL_R_S32 {})", inst.operation); + if (displayAsHex) + strOutput.addFmt(" 0x{:x}", inst.op_conditional_r_s32.immS32); + else + strOutput.addFmt(" {}", inst.op_conditional_r_s32.immS32); + strOutput.add(" (conditional)"); + } + else if (inst.type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) + { + strOutput.addFmt("X86_JCC {}", IMLDebug_GetConditionName(inst.op_x86_eflags_jcc.cond)); + } + else + { + strOutput.addFmt("Unknown iml type {}", inst.type); + } + disassemblyLineOut.assign(strOutput.c_str()); +} + +void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo) +{ + StringBuf strOutput(1024); + + strOutput.addFmt("SEGMENT {} | PPC=0x{:08x} Loop-depth {}", IMLDebug_GetSegmentName(ctx, imlSegment), imlSegment->ppcAddress, imlSegment->loopDepth); + if (imlSegment->isEnterable) + { + strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress); + } + if (imlSegment->deadCodeEliminationHintSeg) + { + strOutput.addFmt(" InheritOverwrite: {}", IMLDebug_GetSegmentName(ctx, imlSegment->deadCodeEliminationHintSeg)); + } + debug_printf("%s\n", strOutput.c_str()); + + if (printLivenessRangeInfo) + { + strOutput.reset(); + IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START); + debug_printf("%s\n", strOutput.c_str()); + } + //debug_printf("\n"); + strOutput.reset(); + + std::string disassemblyLine; + for (sint32 i = 0; i < imlSegment->imlList.size(); i++) + { + const IMLInstruction& inst = imlSegment->imlList[i]; + // don't log NOP instructions + if (inst.type == PPCREC_IML_TYPE_NO_OP) + continue; + //strOutput.addFmt("{:02x} ", i); + debug_printf(fmt::format("{:02x} ", i).c_str()); + disassemblyLine.clear(); + IMLDebug_DisassembleInstruction(inst, disassemblyLine); + debug_printf("%s", disassemblyLine.c_str()); if (printLivenessRangeInfo) { IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, i); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index f2476e6124..53841bafcf 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -26,7 +26,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const } else if (type == PPCREC_IML_TYPE_R_R) { - if (operation == PPCREC_IML_OP_DCBZ) + if (operation == PPCREC_IML_OP_DCBZ || + operation == PPCREC_IML_OP_X86_CMP) { // both operands are read only registersUsed->readGPR1 = op_r_r.regR; @@ -58,13 +59,18 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const if (operation == PPCREC_IML_OP_LEFT_ROTATE) { - // operand register is read and write + // register operand is read and write registersUsed->readGPR1 = op_r_immS32.regR; registersUsed->writtenGPR1 = op_r_immS32.regR; } + else if (operation == PPCREC_IML_OP_X86_CMP) + { + // register operand is read only + registersUsed->readGPR1 = op_r_immS32.regR; + } else { - // operand register is write only + // register operand is write only // todo - use explicit lists, avoid default cases registersUsed->writtenGPR1 = op_r_immS32.regR; } @@ -453,6 +459,10 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readFPR1 = op_fpr_compare.regA; registersUsed->readFPR2 = op_fpr_compare.regB; } + else if (type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) + { + // no registers read or written (except for the implicit eflags) + } else { cemu_assert_unimplemented(); @@ -675,6 +685,10 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& tr op_fpr_compare.regB = replaceRegisterIdMultiple(op_fpr_compare.regB, translationTable); op_fpr_compare.regR = replaceRegisterIdMultiple(op_fpr_compare.regR, translationTable); } + else if (type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) + { + // no registers read or written (except for the implicit eflags) + } else { cemu_assert_unimplemented(); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 817fef190b..aea6750c2a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -180,6 +180,9 @@ enum // R_R_R_carry PPCREC_IML_OP_ADD_WITH_CARRY, // similar to ADD but also adds carry bit (0 or 1) + + // X86 extension + PPCREC_IML_OP_X86_CMP, // R_R and R_S32 }; #define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN) @@ -261,6 +264,9 @@ enum PPCREC_IML_TYPE_FPR_R, PPCREC_IML_TYPE_FPR_COMPARE, // r* = r* CMP[cond] r* + + // X86 specific + PPCREC_IML_TYPE_X86_EFLAGS_JCC, }; enum // IMLName @@ -350,13 +356,29 @@ struct IMLUsedRegisters }; }; + bool IsWrittenByRegId(IMLRegID regId) const + { + if (writtenGPR1.IsValid() && writtenGPR1.GetRegID() == regId) + return true; + if (writtenGPR2.IsValid() && writtenGPR2.GetRegID() == regId) + return true; + return false; + } + bool IsBaseGPRWritten(IMLReg imlReg) const { cemu_assert_debug(imlReg.IsValid()); auto regId = imlReg.GetRegID(); - if (writtenGPR1.IsValid() && writtenGPR1.GetRegID() == regId) + return IsWrittenByRegId(regId); + } + + bool IsRegIdRead(IMLRegID regId) const + { + if (readGPR1.IsValid() && readGPR1.GetRegID() == regId) return true; - if (writtenGPR2.IsValid() && writtenGPR2.GetRegID() == regId) + if (readGPR2.IsValid() && readGPR2.GetRegID() == regId) + return true; + if (readGPR3.IsValid() && readGPR3.GetRegID() == regId) return true; return false; } @@ -556,6 +578,12 @@ struct IMLInstruction uint8 crBitIndex; bool bitMustBeSet; }op_conditional_r_s32; + // X86 specific + struct + { + IMLCondition cond; + bool invertedCondition; + }op_x86_eflags_jcc; }; bool IsSuffixInstruction() const @@ -568,7 +596,8 @@ struct IMLInstruction type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_MFTB || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP || - type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + type == PPCREC_IML_TYPE_CONDITIONAL_JUMP || + type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) return true; return false; } @@ -753,6 +782,15 @@ struct IMLInstruction this->op_fpr_compare.cond = cond; } + /* X86 specific */ + void make_x86_eflags_jcc(IMLCondition cond, bool invertedCondition) + { + this->type = PPCREC_IML_TYPE_X86_EFLAGS_JCC; + this->operation = -999; + this->op_x86_eflags_jcc.cond = cond; + this->op_x86_eflags_jcc.invertedCondition = invertedCondition; + } + void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; void RewriteGPR(const std::unordered_map& translationTable); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index cdf922ce72..2856eb2479 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -6,6 +6,11 @@ #include "../PPCRecompilerIml.h" #include "../BackendX64/BackendX64.h" +#include "Common/FileStream.h" + +#include +#include + IMLReg _FPRRegFromID(IMLRegID regId) { return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, regId); @@ -328,3 +333,464 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) } } } + +// analyses register dependencies across the entire function +// per segment this will generate information about which registers need to be preserved and which ones don't (e.g. are overwritten) +class IMLOptimizerRegIOAnalysis +{ + public: + // constructor with segment pointer list as span + IMLOptimizerRegIOAnalysis(std::span segmentList, uint32 maxRegId) : m_segmentList(segmentList), m_maxRegId(maxRegId) + { + m_segRegisterInOutList.resize(segmentList.size()); + } + + struct IMLSegmentRegisterInOut + { + // todo - since our register ID range is usually pretty small (<64) we could use integer bitmasks to accelerate this? There is a helper class used in RA code already + std::unordered_set regWritten; // registers which are modified in this segment + std::unordered_set regImported; // registers which are read in this segment before they are written (importing value from previous segments) + std::unordered_set regForward; // registers which are not read or written in this segment, but are imported into a later segment (propagated info) + }; + + // calculate which registers are imported (read-before-written) and forwarded (read-before-written by a later segment) per segment + // then in a second step propagate the dependencies across linked segments + void ComputeDepedencies() + { + std::vector& segRegisterInOutList = m_segRegisterInOutList; + IMLSegmentRegisterInOut* segIO = segRegisterInOutList.data(); + uint32 index = 0; + for(auto& seg : m_segmentList) + { + seg->momentaryIndex = index; + index++; + for(auto& instr : seg->imlList) + { + IMLUsedRegisters registerUsage; + instr.CheckRegisterUsage(®isterUsage); + // registers are considered imported if they are read before being written in this seg + registerUsage.ForEachReadGPR([&](IMLReg gprReg) { + IMLRegID gprId = gprReg.GetRegID(); + if (!segIO->regWritten.contains(gprId)) + { + segIO->regImported.insert(gprId); + } + }); + registerUsage.ForEachWrittenGPR([&](IMLReg gprReg) { + IMLRegID gprId = gprReg.GetRegID(); + segIO->regWritten.insert(gprId); + }); + } + segIO++; + } + // for every exit segment, import all registers + for(auto& seg : m_segmentList) + { + if (!seg->nextSegmentIsUncertain) + continue; + if(seg->deadCodeEliminationHintSeg) + continue; + IMLSegmentRegisterInOut& segIO = segRegisterInOutList[seg->momentaryIndex]; + for(uint32 i=0; i<=m_maxRegId; i++) + { + segIO.regImported.insert((IMLRegID)i); + } + } + // broadcast dependencies across segment chains + std::unordered_set segIdsWhichNeedUpdate; + for (uint32 i = 0; i < m_segmentList.size(); i++) + { + segIdsWhichNeedUpdate.insert(i); + } + while(!segIdsWhichNeedUpdate.empty()) + { + auto firstIt = segIdsWhichNeedUpdate.begin(); + uint32 segId = *firstIt; + segIdsWhichNeedUpdate.erase(firstIt); + // forward regImported and regForward to earlier segments into their regForward, unless the register is written + auto& curSeg = m_segmentList[segId]; + IMLSegmentRegisterInOut& curSegIO = segRegisterInOutList[segId]; + for(auto& prevSeg : curSeg->list_prevSegments) + { + IMLSegmentRegisterInOut& prevSegIO = segRegisterInOutList[prevSeg->momentaryIndex]; + bool prevSegChanged = false; + for(auto& regId : curSegIO.regImported) + { + if (!prevSegIO.regWritten.contains(regId)) + prevSegChanged |= prevSegIO.regForward.insert(regId).second; + } + for(auto& regId : curSegIO.regForward) + { + if (!prevSegIO.regWritten.contains(regId)) + prevSegChanged |= prevSegIO.regForward.insert(regId).second; + } + if(prevSegChanged) + segIdsWhichNeedUpdate.insert(prevSeg->momentaryIndex); + } + // same for hint links + for(auto& prevSeg : curSeg->list_deadCodeHintBy) + { + IMLSegmentRegisterInOut& prevSegIO = segRegisterInOutList[prevSeg->momentaryIndex]; + bool prevSegChanged = false; + for(auto& regId : curSegIO.regImported) + { + if (!prevSegIO.regWritten.contains(regId)) + prevSegChanged |= prevSegIO.regForward.insert(regId).second; + } + for(auto& regId : curSegIO.regForward) + { + if (!prevSegIO.regWritten.contains(regId)) + prevSegChanged |= prevSegIO.regForward.insert(regId).second; + } + if(prevSegChanged) + segIdsWhichNeedUpdate.insert(prevSeg->momentaryIndex); + } + } + } + + std::unordered_set GetRegistersNeededAtEndOfSegment(IMLSegment& seg) + { + std::unordered_set regsNeeded; + if(seg.nextSegmentIsUncertain) + { + if(seg.deadCodeEliminationHintSeg) + { + auto& nextSegIO = m_segRegisterInOutList[seg.deadCodeEliminationHintSeg->momentaryIndex]; + regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end()); + regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end()); + } + else + { + // add all regs + for(uint32 i = 0; i <= m_maxRegId; i++) + regsNeeded.insert(i); + } + return regsNeeded; + } + if(seg.nextSegmentBranchTaken) + { + auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchTaken->momentaryIndex]; + regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end()); + regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end()); + } + if(seg.nextSegmentBranchNotTaken) + { + auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchNotTaken->momentaryIndex]; + regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end()); + regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end()); + } + return regsNeeded; + } + + bool IsRegisterNeededAtEndOfSegment(IMLSegment& seg, IMLRegID regId) + { + if(seg.nextSegmentIsUncertain) + { + if(!seg.deadCodeEliminationHintSeg) + return true; + auto& nextSegIO = m_segRegisterInOutList[seg.deadCodeEliminationHintSeg->momentaryIndex]; + if(nextSegIO.regImported.contains(regId)) + return true; + if(nextSegIO.regForward.contains(regId)) + return true; + return false; + } + if(seg.nextSegmentBranchTaken) + { + auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchTaken->momentaryIndex]; + if(nextSegIO.regImported.contains(regId)) + return true; + if(nextSegIO.regForward.contains(regId)) + return true; + } + if(seg.nextSegmentBranchNotTaken) + { + auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchNotTaken->momentaryIndex]; + if(nextSegIO.regImported.contains(regId)) + return true; + if(nextSegIO.regForward.contains(regId)) + return true; + } + return false; + } + + private: + std::span m_segmentList; + uint32 m_maxRegId; + + std::vector m_segRegisterInOutList; + +}; + +// scan backwards starting from index and return the index of the first found instruction which writes to the given register (by id) +sint32 IMLUtil_FindInstructionWhichWritesRegister(IMLSegment& seg, sint32 startIndex, IMLReg reg, sint32 maxScanDistance = -1) +{ + sint32 endIndex = std::max(startIndex - maxScanDistance, 0); + for (sint32 i = startIndex; i >= endIndex; i--) + { + IMLInstruction& imlInstruction = seg.imlList[i]; + IMLUsedRegisters registersUsed; + imlInstruction.CheckRegisterUsage(®istersUsed); + if (registersUsed.IsBaseGPRWritten(reg)) + return i; + } + return -1; +} + +// returns true if the instruction can safely be moved while keeping ordering constraints and data dependencies intact +// initialIndex is inclusive, targetIndex is exclusive +bool IMLUtil_CanMoveInstructionTo(IMLSegment& seg, sint32 initialIndex, sint32 targetIndex) +{ + boost::container::static_vector regsWritten; + boost::container::static_vector regsRead; + // get list of read and written registers + IMLUsedRegisters registersUsed; + seg.imlList[initialIndex].CheckRegisterUsage(®istersUsed); + registersUsed.ForEachAccessedGPR([&](IMLReg reg, bool isWritten) { + if (isWritten) + regsWritten.push_back(reg.GetRegID()); + else + regsRead.push_back(reg.GetRegID()); + }); + // check all the instructions inbetween + if(initialIndex < targetIndex) + { + sint32 scanStartIndex = initialIndex+1; // +1 to skip the moving instruction itself + sint32 scanEndIndex = targetIndex; + for (sint32 i = scanStartIndex; i < scanEndIndex; i++) + { + IMLUsedRegisters registersUsed; + seg.imlList[i].CheckRegisterUsage(®istersUsed); + // in order to be able to move an instruction past another instruction, any of the read registers must not be modified (written) + // and any of it's written registers must not be read + bool canMove = true; + registersUsed.ForEachAccessedGPR([&](IMLReg reg, bool isWritten) { + IMLRegID regId = reg.GetRegID(); + if (!isWritten) + canMove = canMove && std::find(regsWritten.begin(), regsWritten.end(), regId) == regsWritten.end(); + else + canMove = canMove && std::find(regsRead.begin(), regsRead.end(), regId) == regsRead.end(); + }); + if(!canMove) + return false; + } + } + else + { + cemu_assert_unimplemented(); // backwards scan is todo + return false; + } + return true; +} + +sint32 IMLUtil_CountRegisterReadsInRange(IMLSegment& seg, sint32 scanStartIndex, sint32 scanEndIndex, IMLRegID regId) +{ + cemu_assert_debug(scanStartIndex <= scanEndIndex); + cemu_assert_debug(scanEndIndex < seg.imlList.size()); + sint32 count = 0; + for (sint32 i = scanStartIndex; i <= scanEndIndex; i++) + { + IMLUsedRegisters registersUsed; + seg.imlList[i].CheckRegisterUsage(®istersUsed); + registersUsed.ForEachReadGPR([&](IMLReg reg) { + if (reg.GetRegID() == regId) + count++; + }); + } + return count; +} + +// move instruction from one index to another +// instruction will be inserted before the instruction at targetIndex +// returns the new instruction index of the moved instruction +sint32 IMLUtil_MoveInstructionTo(IMLSegment& seg, sint32 initialIndex, sint32 targetIndex) +{ + cemu_assert_debug(initialIndex != targetIndex); + IMLInstruction temp = seg.imlList[initialIndex]; + if (initialIndex < targetIndex) + { + cemu_assert_debug(targetIndex > 0); + targetIndex--; + std::copy_backward(seg.imlList.begin() + initialIndex + 1, seg.imlList.begin() + targetIndex + 1, seg.imlList.begin() + targetIndex); + seg.imlList[targetIndex] = temp; + return targetIndex; + } + else + { + cemu_assert_unimplemented(); // testing needed + std::copy(seg.imlList.begin() + targetIndex, seg.imlList.begin() + initialIndex, seg.imlList.begin() + targetIndex + 1); + seg.imlList[targetIndex] = temp; + return targetIndex; + } +} + +// x86 specific +bool IMLOptimizerX86_ModifiesEFlags(IMLInstruction& inst) +{ + // this is a very conservative implementation. There are more cases but this is good enough for now + if(inst.type == PPCREC_IML_TYPE_NAME_R || inst.type == PPCREC_IML_TYPE_R_NAME) + return false; + if((inst.type == PPCREC_IML_TYPE_R_R || inst.type == PPCREC_IML_TYPE_R_S32) && inst.operation == PPCREC_IML_OP_ASSIGN) + return false; + return true; // if we dont know for sure, assume it does +} + +void IMLOptimizer_DebugPrintSeg(ppcImlGenContext_t& ppcImlGenContext, IMLSegment& seg) +{ + printf("----------------\n"); + IMLDebug_DumpSegment(&ppcImlGenContext, &seg); + fflush(stdout); +} + +void IMLOptimizer_RemoveDeadCodeFromSegment(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg) +{ + // algorithm works like this: + // Calculate which registers need to be preserved at the end of each segment + // Then for each segment: + // - Iterate instructions backwards + // - Maintain a list of registers which are read at a later point (initially this is the list from the first step) + // - If an instruction only modifies registers which are not in the read list, then it is dead code and can be replaced with a no-op + + std::unordered_set regsNeeded = regIoAnalysis.GetRegistersNeededAtEndOfSegment(seg); + + // start with suffix instruction + if(seg.HasSuffixInstruction()) + { + IMLInstruction& imlInstruction = seg.imlList[seg.GetSuffixInstructionIndex()]; + IMLUsedRegisters registersUsed; + imlInstruction.CheckRegisterUsage(®istersUsed); + registersUsed.ForEachWrittenGPR([&](IMLReg reg) { + regsNeeded.erase(reg.GetRegID()); + }); + registersUsed.ForEachReadGPR([&](IMLReg reg) { + regsNeeded.insert(reg.GetRegID()); + }); + } + // iterate instructions backwards + for (sint32 i = seg.imlList.size() - (seg.HasSuffixInstruction() ? 2:1); i >= 0; i--) + { + IMLInstruction& imlInstruction = seg.imlList[i]; + IMLUsedRegisters registersUsed; + imlInstruction.CheckRegisterUsage(®istersUsed); + // register read -> remove from overwritten list + // register written -> add to overwritten list + + // check if this instruction only writes registers which will never be read + bool onlyWritesRedundantRegisters = true; + registersUsed.ForEachWrittenGPR([&](IMLReg reg) { + if (regsNeeded.contains(reg.GetRegID())) + onlyWritesRedundantRegisters = false; + }); + // check if any of the written registers are read after this point + registersUsed.ForEachWrittenGPR([&](IMLReg reg) { + regsNeeded.erase(reg.GetRegID()); + }); + registersUsed.ForEachReadGPR([&](IMLReg reg) { + regsNeeded.insert(reg.GetRegID()); + }); + // for now we only allow some instruction types to be deleted, eventually we should find a safer way to identify side effects that can't be judged by register usage alone + if(imlInstruction.type != PPCREC_IML_TYPE_R_R && imlInstruction.type != PPCREC_IML_TYPE_R_R_S32 && imlInstruction.type != PPCREC_IML_TYPE_COMPARE && imlInstruction.type != PPCREC_IML_TYPE_COMPARE_S32) + continue; + if(onlyWritesRedundantRegisters) + { + imlInstruction.make_no_op(); + } + } +} + +void IMLOptimizerX86_SubstituteCJumpForEflagsJump(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg) +{ + // convert and optimize bool condition jumps to eflags condition jumps + // - Moves eflag setter (e.g. cmp) closer to eflags consumer (conditional jump) if necessary. If not possible but required then exit early + // - Since we only rely on eflags, the boolean register can be optimized out if DCE considers it unused + // - Further detect and optimize patterns like DEC + CMP + JCC into fused ops (todo) + + // check if this segment ends with a conditional jump + if(!seg.HasSuffixInstruction()) + return; + sint32 cjmpInstIndex = seg.GetSuffixInstructionIndex(); + if(cjmpInstIndex < 0) + return; + IMLInstruction& cjumpInstr = seg.imlList[cjmpInstIndex]; + if( cjumpInstr.type != PPCREC_IML_TYPE_CONDITIONAL_JUMP ) + return; + IMLReg regCondBool = cjumpInstr.op_conditional_jump.registerBool; + bool invertedCondition = !cjumpInstr.op_conditional_jump.mustBeTrue; + // find the instruction which sets the bool + sint32 cmpInstrIndex = IMLUtil_FindInstructionWhichWritesRegister(seg, cjmpInstIndex-1, regCondBool, 20); + if(cmpInstrIndex < 0) + return; + // check if its an instruction combo which can be optimized (currently only cmp + cjump) and get the condition + IMLInstruction& condSetterInstr = seg.imlList[cmpInstrIndex]; + IMLCondition cond; + if(condSetterInstr.type == PPCREC_IML_TYPE_COMPARE) + cond = condSetterInstr.op_compare.cond; + else if(condSetterInstr.type == PPCREC_IML_TYPE_COMPARE_S32) + cond = condSetterInstr.op_compare_s32.cond; + else + return; + // check if instructions inbetween modify eflags + sint32 indexEflagsSafeStart = -1; // index of the first instruction which does not modify eflags up to cjump + for(sint32 i = cjmpInstIndex-1; i > cmpInstrIndex; i--) + { + if(IMLOptimizerX86_ModifiesEFlags(seg.imlList[i])) + { + indexEflagsSafeStart = i+1; + break; + } + } + if(indexEflagsSafeStart >= 0) + { + cemu_assert(indexEflagsSafeStart > 0); + // there are eflags-modifying instructions inbetween the bool setter and cjump + // try to move the eflags setter close enough to the cjump (to indexEflagsSafeStart) + bool canMove = IMLUtil_CanMoveInstructionTo(seg, cmpInstrIndex, indexEflagsSafeStart); + if(!canMove) + { + return; + } + else + { + cmpInstrIndex = IMLUtil_MoveInstructionTo(seg, cmpInstrIndex, indexEflagsSafeStart); + } + } + // we can turn the jump into an eflags jump + cjumpInstr.make_x86_eflags_jcc(cond, invertedCondition); + + if (IMLUtil_CountRegisterReadsInRange(seg, cmpInstrIndex, cjmpInstIndex, regCondBool.GetRegID()) > 1 || regIoAnalysis.IsRegisterNeededAtEndOfSegment(seg, regCondBool.GetRegID())) + return; // bool register is used beyond the CMP, we can't drop it + + auto& cmpInstr = seg.imlList[cmpInstrIndex]; + cemu_assert_debug(cmpInstr.type == PPCREC_IML_TYPE_COMPARE || cmpInstr.type == PPCREC_IML_TYPE_COMPARE_S32); + if(cmpInstr.type == PPCREC_IML_TYPE_COMPARE) + { + IMLReg regA = cmpInstr.op_compare.regA; + IMLReg regB = cmpInstr.op_compare.regB; + seg.imlList[cmpInstrIndex].make_r_r(PPCREC_IML_OP_X86_CMP, regA, regB); + } + else + { + IMLReg regA = cmpInstr.op_compare_s32.regA; + sint32 val = cmpInstr.op_compare_s32.immS32; + seg.imlList[cmpInstrIndex].make_r_s32(PPCREC_IML_OP_X86_CMP, regA, val); + } + +} + +void IMLOptimizer_StandardOptimizationPassForSegment(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg) +{ + IMLOptimizer_RemoveDeadCodeFromSegment(regIoAnalysis, seg); + + // x86 specific optimizations + IMLOptimizerX86_SubstituteCJumpForEflagsJump(regIoAnalysis, seg); // this pass should be applied late since it creates invisible eflags dependencies (which would break further register dependency analysis) +} + +void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext) +{ + IMLOptimizerRegIOAnalysis regIoAnalysis(ppcImlGenContext.segmentList2, ppcImlGenContext.GetMaxRegId()); + regIoAnalysis.ComputeDepedencies(); + for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + { + IMLOptimizer_StandardOptimizationPassForSegment(regIoAnalysis, *segIt); + } +} diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 42fe619b2e..96f8d9f0f4 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -75,14 +75,14 @@ bool _detectLoop(IMLSegment* currentSegment, sint32 depth, uint32 iterationIndex { if (currentSegment->nextSegmentBranchNotTaken->momentaryIndex > currentSegment->momentaryIndex) { - currentSegment->raInfo.isPartOfProcessedLoop = _detectLoop(currentSegment->nextSegmentBranchNotTaken, depth + 1, iterationIndex, imlSegmentLoopBase); + currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchNotTaken, depth + 1, iterationIndex, imlSegmentLoopBase); } } if (currentSegment->nextSegmentBranchTaken) { if (currentSegment->nextSegmentBranchTaken->momentaryIndex > currentSegment->momentaryIndex) { - currentSegment->raInfo.isPartOfProcessedLoop = _detectLoop(currentSegment->nextSegmentBranchTaken, depth + 1, iterationIndex, imlSegmentLoopBase); + currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchTaken, depth + 1, iterationIndex, imlSegmentLoopBase); } } if (currentSegment->raInfo.isPartOfProcessedLoop) @@ -341,8 +341,8 @@ void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment { // this works as a pre-pass to actual register allocation. Assigning registers in advance based on fixed requirements (e.g. calling conventions and operations with fixed-reg input/output like x86 DIV/MUL) // algorithm goes as follows: - // 1) Iterate all instructions from beginning to end and keep a list of covering ranges - // 2) If we encounter an instruction with a fixed register we: + // 1) Iterate all instructions in the function from beginning to end and keep a list of active ranges for the currently iterated instruction + // 2) If we encounter an instruction with a fixed register requirement we: // 2.0) Check if there are any other ranges already using the same fixed-register and if yes, we split them and unassign the register for any follow-up instructions just prior to the current instruction // 2.1) For inputs: Split the range that needs to be assigned a phys reg on the current instruction. Basically creating a 1-instruction long subrange that we can assign the physical register. RA will then schedule register allocation around that and avoid moves // 2.2) For outputs: Split the range that needs to be assigned a phys reg on the current instruction diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index bf1868cf8f..f0420b0119 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -81,6 +81,10 @@ struct IMLSegment IMLSegment* nextSegmentBranchTaken{}; bool nextSegmentIsUncertain{}; std::vector list_prevSegments{}; + // source for overwrite analysis (if nextSegmentIsUncertain is true) + // sometimes a segment is marked as an exit point, but for the purposes of dead code elimination we know the next segment + IMLSegment* deadCodeEliminationHintSeg{}; + std::vector list_deadCodeHintBy{}; // enterable segments bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary) uint32 enterPPCAddress{}; // used if isEnterable is true @@ -101,6 +105,14 @@ struct IMLSegment return nextSegmentBranchNotTaken; } + void SetNextSegmentForOverwriteHints(IMLSegment* seg) + { + cemu_assert_debug(!deadCodeEliminationHintSeg); + deadCodeEliminationHintSeg = seg; + if (seg) + seg->list_deadCodeHintBy.push_back(this); + } + // instruction API IMLInstruction* AppendInstruction(); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index db6659b490..c429dfc31d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -139,7 +139,6 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP cemuLog_log(LogType::Force, "Attempting to recompile function outside of allowed code area"); return nullptr; } - uint32 codeGenRangeStart; uint32 codeGenRangeSize = 0; coreinit::OSGetCodegenVirtAddrRangeInternal(codeGenRangeStart, codeGenRangeSize); @@ -160,6 +159,7 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP // generate intermediate code ppcImlGenContext_t ppcImlGenContext = { 0 }; + ppcImlGenContext.debug_entryPPCAddress = range.startAddress; bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses, boundaryTracker); if (compiledSuccessfully == false) { @@ -240,7 +240,9 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP entryPointsOut.emplace_back(ppcEnterOffset, x64Offset); } - cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {} Entrypoints: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size(), entryPointsOut.size()); + //cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {} Entrypoints: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size(), entryPointsOut.size()); + + cemuLog_logDebug(LogType::Force, "[Recompiler] PPC 0x{:08x} -> x64: 0x{:x}", (uint32)ppcRecFunc->ppcAddress, (uint64)(uintptr_t)ppcRecFunc->x86Code); return ppcRecFunc; } @@ -301,11 +303,19 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) // delay byte swapping for certain load+store patterns IMLOptimizer_OptimizeDirectIntegerCopies(&ppcImlGenContext); + IMLOptimizer_StandardOptimizationPass(ppcImlGenContext); + PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext); //PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); //PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext); +// if(ppcImlGenContext.debug_entryPPCAddress == 0x0200E1E8) +// { +// IMLDebug_Dump(&ppcImlGenContext); +// __debugbreak(); +// } + return true; } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 080ce2fa05..6cce5eb24b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -41,13 +41,16 @@ struct ppcImlGenContext_t bool PSE{ true }; // cycle counter uint32 cyclesSinceLastBranch; // used to track ppc cycles - // temporary general purpose registers - //uint32 mappedRegister[PPC_REC_MAX_VIRTUAL_GPR]; - // temporary floating point registers (single and double precision) - //uint32 mappedFPRRegister[256]; std::unordered_map mappedRegs; + uint32 GetMaxRegId() const + { + if (mappedRegs.empty()) + return 0; + return mappedRegs.size()-1; + } + // list of segments std::vector segmentList2; // code generation control diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 38a20a24ed..b89b7f7c7a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -2935,6 +2935,10 @@ void PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext_t& ppcImlGenContext, P splitSeg->SetLinkBranchTaken(exitSegment); exitSegment->AppendInstruction()->make_macro(PPCREC_IML_MACRO_LEAVE, basicBlockInfo.startAddress, 0, 0, IMLREG_INVALID); + + cemu_assert_debug(splitSeg->nextSegmentBranchNotTaken); + // let the IML optimizer and RA know that the original segment should be used during analysis for dead code elimination + exitSegment->SetNextSegmentForOverwriteHints(splitSeg->nextSegmentBranchNotTaken); } void PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext_t& ppcImlGenContext)