From 6cac7554cf901db35e959c53ff9ca13d57d0566e Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Fri, 22 Mar 2024 09:28:28 -0400 Subject: [PATCH] Use vcmpunordps instead of vcmpps (#45) This is equivalent, but more obvious. --- fidget/src/jit/x86_64/float_slice.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fidget/src/jit/x86_64/float_slice.rs b/fidget/src/jit/x86_64/float_slice.rs index c2bb1c85..29578501 100644 --- a/fidget/src/jit/x86_64/float_slice.rs +++ b/fidget/src/jit/x86_64/float_slice.rs @@ -241,8 +241,8 @@ impl Assembler for FloatSliceAssembler { fn build_max(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) { dynasm!(self.0.ops // Build a mask of NANs; conveniently, all 1s is a NAN - ; vcmpps ymm1, Ry(reg(lhs_reg)), Ry(reg(lhs_reg)), 3 - ; vcmpps ymm2, Ry(reg(rhs_reg)), Ry(reg(rhs_reg)), 3 + ; vcmpunordps ymm1, Ry(reg(lhs_reg)), Ry(reg(lhs_reg)) + ; vcmpunordps ymm2, Ry(reg(rhs_reg)), Ry(reg(rhs_reg)) ; vorps ymm1, ymm2, ymm1 // Calculate the max, which ignores NANs @@ -255,8 +255,8 @@ impl Assembler for FloatSliceAssembler { fn build_min(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) { dynasm!(self.0.ops // Build a mask of NANs; conveniently, all 1s is a NAN - ; vcmpps ymm1, Ry(reg(lhs_reg)), Ry(reg(lhs_reg)), 3 - ; vcmpps ymm2, Ry(reg(rhs_reg)), Ry(reg(rhs_reg)), 3 + ; vcmpunordps ymm1, Ry(reg(lhs_reg)), Ry(reg(lhs_reg)) + ; vcmpunordps ymm2, Ry(reg(rhs_reg)), Ry(reg(rhs_reg)) ; vorps ymm1, ymm2, ymm1 // Calculate the min, which ignores NANs