Skip to content

Commit

Permalink
Fix NAN handling in x86 SIMD
Browse files Browse the repository at this point in the history
  • Loading branch information
mkeeter committed Mar 16, 2024
1 parent 1d941f4 commit 91be1d2
Showing 1 changed file with 20 additions and 2 deletions.
22 changes: 20 additions & 2 deletions fidget/src/jit/x86_64/float_slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,15 +239,33 @@ impl Assembler for FloatSliceAssembler {
);
}
fn build_max(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) {
// TODO: does this handle NaN correctly?
dynasm!(self.0.ops
// Build a mask of NANs; conveniently, all 1s is a NAN
; vcmpps ymm1, Ry(reg(lhs_reg)), Ry(reg(lhs_reg)), 3
; vcmpps ymm2, Ry(reg(rhs_reg)), Ry(reg(rhs_reg)), 3
; vorps ymm1, ymm2, ymm1

// Calculate the max, which ignores NANs
; vmaxps Ry(reg(out_reg)), Ry(reg(lhs_reg)), Ry(reg(rhs_reg))

// Set the NAN bits
; vorps Ry(reg(out_reg)), Ry(reg(out_reg)), ymm1
);
}
fn build_min(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) {
// TODO: does this handle NaN correctly?
dynasm!(self.0.ops
// Build a mask of NANs; conveniently, all 1s is a NAN
; vcmpps ymm1, Ry(reg(lhs_reg)), Ry(reg(lhs_reg)), 3
; vcmpps ymm2, Ry(reg(rhs_reg)), Ry(reg(rhs_reg)), 3
; vorps ymm1, ymm2, ymm1

// Calculate the min, which ignores NANs
; vminps Ry(reg(out_reg)), Ry(reg(lhs_reg)), Ry(reg(rhs_reg))

// Set the NAN bits
// (note that we leave other bits unchanged, because it doesn't
// matter here!)
; vorps Ry(reg(out_reg)), Ry(reg(out_reg)), ymm1
);
}
fn load_imm(&mut self, imm: f32) -> u8 {
Expand Down

0 comments on commit 91be1d2

Please sign in to comment.