Fix NAN handling in x86 SIMD

mkeeter · Mar 16, 2024 · 4b6418f · 4b6418f
1 parent 072d627
commit 4b6418f
Showing 1 changed file with 20 additions and 2 deletions.
diff --git a/fidget/src/jit/x86_64/float_slice.rs b/fidget/src/jit/x86_64/float_slice.rs
@@ -239,15 +239,33 @@ impl Assembler for FloatSliceAssembler {
         );
     }
     fn build_max(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) {
-        // TODO: does this handle NaN correctly?
         dynasm!(self.0.ops
+            // Build a mask of NANs; conveniently, all 1s is a NAN
+            ; vcmpps ymm1, Ry(reg(lhs_reg)), Ry(reg(lhs_reg)), 3
+            ; vcmpps ymm2, Ry(reg(rhs_reg)), Ry(reg(rhs_reg)), 3
+            ; vorps ymm1, ymm2, ymm1
+
+            // Calculate the max, which ignores NANs
             ; vmaxps Ry(reg(out_reg)), Ry(reg(lhs_reg)), Ry(reg(rhs_reg))
+
+            // Set the NAN bits
+            ; vorps Ry(reg(out_reg)), Ry(reg(out_reg)), ymm1
         );
     }
     fn build_min(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) {
-        // TODO: does this handle NaN correctly?
         dynasm!(self.0.ops
+            // Build a mask of NANs; conveniently, all 1s is a NAN
+            ; vcmpps ymm1, Ry(reg(lhs_reg)), Ry(reg(lhs_reg)), 3
+            ; vcmpps ymm2, Ry(reg(rhs_reg)), Ry(reg(rhs_reg)), 3
+            ; vorps ymm1, ymm2, ymm1
+
+            // Calculate the min, which ignores NANs
             ; vminps Ry(reg(out_reg)), Ry(reg(lhs_reg)), Ry(reg(rhs_reg))
+
+            // Set the NAN bits
+            // (note that we leave other bits unchanged, because it doesn't
+            // matter here!)
+            ; vorps Ry(reg(out_reg)), Ry(reg(out_reg)), ymm1
         );
     }
     fn load_imm(&mut self, imm: f32) -> u8 {