From 8eb6b57024edf41590e56e926cae2149362620fa Mon Sep 17 00:00:00 2001 From: offtkp Date: Mon, 2 Sep 2024 04:39:49 +0300 Subject: [PATCH 1/7] Implement overflow flag for S_ADD_I32 --- src/shader_recompiler/frontend/translate/scalar_alu.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index af258cd1964..8193193a19d 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -373,8 +373,13 @@ void Translator::S_AND_B64(NegateMode negate, const GcnInst& inst) { void Translator::S_ADD_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.IAdd(src0, src1)); - // TODO: Overflow flag + const IR::U32 result{ir.IAdd(src0, src1)}; + SetDst(inst.dst[0], result); + const IR::U32 sign_mask = ir.Imm32(1 << 31); + const IR::U32 sign0 = ir.BitwiseAnd(src0, sign_mask); + const IR::U32 sign1 = ir.BitwiseAnd(src1, sign_mask); + const IR::U32 signr = ir.BitwiseAnd(result, sign_mask); + ir.SetScc(ir.LogicalAnd(ir.IEqual(sign0, sign1), ir.INotEqual(sign0, signr))); } void Translator::S_AND_B32(const GcnInst& inst) { From b331f241645fce74a4377863baa3a2135854c457 Mon Sep 17 00:00:00 2001 From: offtkp Date: Mon, 2 Sep 2024 04:47:11 +0300 Subject: [PATCH 2/7] Implement S_SUB_I32 and flags for S_SUB_U32 --- .../frontend/translate/scalar_alu.cpp | 18 +++++++++++++++--- .../frontend/translate/translate.h | 1 + 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 8193193a19d..8e25790bc7f 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -70,8 +70,9 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { case Opcode::S_ADDC_U32: return S_ADDC_U32(inst); case Opcode::S_SUB_U32: - case Opcode::S_SUB_I32: return S_SUB_U32(inst); + case Opcode::S_SUB_I32: + return S_SUB_I32(inst); case Opcode::S_MIN_U32: return S_MIN_U32(inst); case Opcode::S_MAX_U32: @@ -519,8 +520,19 @@ void Translator::S_SUB_U32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.ISub(src0, src1)); - // TODO: Carry out - ir.SetScc(ir.Imm1(false)); + ir.SetScc(ir.IGreaterThan(src1, src0, false)); +} + +void Translator::S_SUB_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 result = ir.ISub(src0, src1); + SetDst(inst.dst[0], result); + const IR::U32 sign_mask = ir.Imm32(1 << 31); + const IR::U32 sign0 = ir.BitwiseAnd(src0, sign_mask); + const IR::U32 sign1 = ir.BitwiseAnd(src1, sign_mask); + const IR::U32 signr = ir.BitwiseAnd(result, sign_mask); + ir.SetScc(ir.LogicalAnd(ir.INotEqual(sign0, sign1), ir.INotEqual(sign0, signr))); } void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) { diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index f1619e810a6..e29f9838487 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -95,6 +95,7 @@ class Translator { void S_NOT_B64(const GcnInst& inst); void S_BREV_B32(const GcnInst& inst); void S_ADD_U32(const GcnInst& inst); + void S_SUB_I32(const GcnInst& inst); void S_SUB_U32(const GcnInst& inst); void S_GETPC_B64(u32 pc, const GcnInst& inst); void S_ADDC_U32(const GcnInst& inst); From 0491cebdb34e6900841651392fee08f9e7299ae1 Mon Sep 17 00:00:00 2001 From: offtkp Date: Mon, 2 Sep 2024 04:54:05 +0300 Subject: [PATCH 3/7] Set unsigned overflow flag for S_ADD_U32 --- src/shader_recompiler/frontend/translate/scalar_alu.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 8e25790bc7f..f5ec996b53a 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -511,9 +511,9 @@ void Translator::S_BREV_B32(const GcnInst& inst) { void Translator::S_ADD_U32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.IAdd(src0, src1)); - // TODO: Carry out - ir.SetScc(ir.Imm1(false)); + const IR::U32 result = ir.IAdd(src0, src1); + SetDst(inst.dst[0], result); + ir.SetScc(ir.ILessThan(result, src0, false)); } void Translator::S_SUB_U32(const GcnInst& inst) { From aee53079ca151faae0662542f0f50811dc6d3084 Mon Sep 17 00:00:00 2001 From: offtkp Date: Mon, 2 Sep 2024 04:57:01 +0300 Subject: [PATCH 4/7] Match the coding style a bit more --- .../frontend/translate/scalar_alu.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index f5ec996b53a..c97c38ba70a 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -376,10 +376,10 @@ void Translator::S_ADD_I32(const GcnInst& inst) { const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 result{ir.IAdd(src0, src1)}; SetDst(inst.dst[0], result); - const IR::U32 sign_mask = ir.Imm32(1 << 31); - const IR::U32 sign0 = ir.BitwiseAnd(src0, sign_mask); - const IR::U32 sign1 = ir.BitwiseAnd(src1, sign_mask); - const IR::U32 signr = ir.BitwiseAnd(result, sign_mask); + const IR::U32 sign_mask{ir.Imm32(1 << 31)}; + const IR::U32 sign0{ir.BitwiseAnd(src0, sign_mask)}; + const IR::U32 sign1{ir.BitwiseAnd(src1, sign_mask)}; + const IR::U32 signr{ir.BitwiseAnd(result, sign_mask)}; ir.SetScc(ir.LogicalAnd(ir.IEqual(sign0, sign1), ir.INotEqual(sign0, signr))); } @@ -511,7 +511,7 @@ void Translator::S_BREV_B32(const GcnInst& inst) { void Translator::S_ADD_U32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 result = ir.IAdd(src0, src1); + const IR::U32 result{ir.IAdd(src0, src1)}; SetDst(inst.dst[0], result); ir.SetScc(ir.ILessThan(result, src0, false)); } @@ -526,12 +526,12 @@ void Translator::S_SUB_U32(const GcnInst& inst) { void Translator::S_SUB_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 result = ir.ISub(src0, src1); + const IR::U32 result{ir.ISub(src0, src1)}; SetDst(inst.dst[0], result); - const IR::U32 sign_mask = ir.Imm32(1 << 31); - const IR::U32 sign0 = ir.BitwiseAnd(src0, sign_mask); - const IR::U32 sign1 = ir.BitwiseAnd(src1, sign_mask); - const IR::U32 signr = ir.BitwiseAnd(result, sign_mask); + const IR::U32 sign_mask{ir.Imm32(1 << 31)}; + const IR::U32 sign0{ir.BitwiseAnd(src0, sign_mask)}; + const IR::U32 sign1{ir.BitwiseAnd(src1, sign_mask)}; + const IR::U32 signr{ir.BitwiseAnd(result, sign_mask)}; ir.SetScc(ir.LogicalAnd(ir.INotEqual(sign0, sign1), ir.INotEqual(sign0, signr))); } From 5f6a9d07ac93754e1f04b3054a2a7118c25d3965 Mon Sep 17 00:00:00 2001 From: offtkp Date: Mon, 2 Sep 2024 05:08:30 +0300 Subject: [PATCH 5/7] Set carry for S_ADDC_U32 --- src/shader_recompiler/frontend/translate/scalar_alu.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index c97c38ba70a..89a859dd832 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -547,7 +547,11 @@ void Translator::S_ADDC_U32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 carry{ir.Select(ir.GetScc(), ir.Imm32(1U), ir.Imm32(0U))}; - SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), carry)); + const IR::U32 result{ir.IAdd(ir.IAdd(src0, src1), carry)}; + SetDst(inst.dst[0], result); + const IR::U1 less_src0 = ir.ILessThan(result, src0, false); + const IR::U1 less_src1 = ir.ILessThan(result, src1, false); + ir.SetScc(ir.LogicalOr(less_src0, less_src1)); } void Translator::S_MAX_U32(const GcnInst& inst) { From 0d81774464251659bc6e12d2064f005acaa9c5a9 Mon Sep 17 00:00:00 2001 From: offtkp Date: Mon, 2 Sep 2024 05:25:53 +0300 Subject: [PATCH 6/7] Set overflow flag for V_ADD_I32 --- src/shader_recompiler/frontend/translate/vector_alu.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 7fef913775c..2aa19ab81cc 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -411,8 +411,13 @@ void Translator::V_ADD_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.IAdd(src0, src1)); - // TODO: Carry + const IR::U32 result{ir.IAdd(src0, src1)}; + ir.SetVectorReg(dst_reg, result); + const IR::U32 sign_mask{ir.Imm32(1 << 31)}; + const IR::U32 sign0{ir.BitwiseAnd(src0, sign_mask)}; + const IR::U32 sign1{ir.BitwiseAnd(src1, sign_mask)}; + const IR::U32 signr{ir.BitwiseAnd(result, sign_mask)}; + ir.SetVcc(ir.LogicalAnd(ir.IEqual(sign0, sign1), ir.INotEqual(sign0, signr))); } void Translator::V_ADDC_U32(const GcnInst& inst) { From 616910f3ad13cbc6b8ce5f1a3d6eb54c53385ff5 Mon Sep 17 00:00:00 2001 From: offtkp Date: Mon, 2 Sep 2024 05:28:09 +0300 Subject: [PATCH 7/7] Set overflow flag for V_SUBREV_I32 --- src/shader_recompiler/frontend/translate/vector_alu.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 2aa19ab81cc..bfbb84f527c 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -665,8 +665,13 @@ void Translator::V_SUBREV_F32(const GcnInst& inst) { void Translator::V_SUBREV_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.ISub(src1, src0)); - // TODO: Carry-out + const IR::U32 result{ir.ISub(src1, src0)}; + SetDst(inst.dst[0], result); + const IR::U32 sign_mask{ir.Imm32(1 << 31)}; + const IR::U32 sign0{ir.BitwiseAnd(src0, sign_mask)}; + const IR::U32 sign1{ir.BitwiseAnd(src1, sign_mask)}; + const IR::U32 signr{ir.BitwiseAnd(result, sign_mask)}; + ir.SetVcc(ir.LogicalAnd(ir.INotEqual(sign0, sign1), ir.INotEqual(sign0, signr))); } void Translator::V_MAD_U64_U32(const GcnInst& inst) {