diff --git a/fidget/src/jit/aarch64/float_slice.rs b/fidget/src/jit/aarch64/float_slice.rs index d7af2670..01dbd322 100644 --- a/fidget/src/jit/aarch64/float_slice.rs +++ b/fidget/src/jit/aarch64/float_slice.rs @@ -402,11 +402,23 @@ impl Assembler for FloatSliceAssembler { /// Loads an immediate into register V4, using W9 as an intermediary fn load_imm(&mut self, imm: f32) -> u8 { let imm_u32 = imm.to_bits(); - dynasm!(self.0.ops - ; movz w9, imm_u32 >> 16, lsl 16 - ; movk w9, imm_u32 - ; dup V(IMM_REG as u32).s4, w9 - ); + if imm_u32 & 0xFFFF == 0 { + dynasm!(self.0.ops + ; movz w9, imm_u32 >> 16, lsl 16 + ; dup V(IMM_REG as u32).s4, w9 + ); + } else if imm_u32 & 0xFFFF_0000 == 0 { + dynasm!(self.0.ops + ; movz w9, imm_u32 & 0xFFFF + ; dup V(IMM_REG as u32).s4, w9 + ); + } else { + dynasm!(self.0.ops + ; movz w9, imm_u32 >> 16, lsl 16 + ; movk w9, imm_u32 & 0xFFFF + ; dup V(IMM_REG as u32).s4, w9 + ); + } IMM_REG.wrapping_sub(OFFSET) } diff --git a/fidget/src/jit/aarch64/grad_slice.rs b/fidget/src/jit/aarch64/grad_slice.rs index c09e240e..09a87086 100644 --- a/fidget/src/jit/aarch64/grad_slice.rs +++ b/fidget/src/jit/aarch64/grad_slice.rs @@ -488,11 +488,23 @@ impl Assembler for GradSliceAssembler { /// Loads an immediate into register S4, using W9 as an intermediary fn load_imm(&mut self, imm: f32) -> u8 { let imm_u32 = imm.to_bits(); - dynasm!(self.0.ops - ; movz w9, imm_u32 >> 16, lsl 16 - ; movk w9, imm_u32 - ; fmov S(IMM_REG as u32), w9 - ); + if imm_u32 & 0xFFFF == 0 { + dynasm!(self.0.ops + ; movz w9, imm_u32 >> 16, lsl 16 + ; fmov S(IMM_REG as u32), w9 + ); + } else if imm_u32 & 0xFFFF_0000 == 0 { + dynasm!(self.0.ops + ; movz w9, imm_u32 & 0xFFFF + ; fmov S(IMM_REG as u32), w9 + ); + } else { + dynasm!(self.0.ops + ; movz w9, imm_u32 >> 16, lsl 16 + ; movk w9, imm_u32 & 0xFFFF + ; fmov S(IMM_REG as u32), w9 + ); + } IMM_REG.wrapping_sub(OFFSET) } diff --git a/fidget/src/jit/aarch64/interval.rs b/fidget/src/jit/aarch64/interval.rs index d4b97ec1..6f4c0a17 100644 --- a/fidget/src/jit/aarch64/interval.rs +++ b/fidget/src/jit/aarch64/interval.rs @@ -729,14 +729,26 @@ impl Assembler for IntervalAssembler { ); } - /// Loads an immediate into register S4, using W9 as an intermediary + /// Loads an immediate into register S4, using W15 as an intermediary fn load_imm(&mut self, imm: f32) -> u8 { let imm_u32 = imm.to_bits(); - dynasm!(self.0.ops - ; movz w15, imm_u32 >> 16, lsl 16 - ; movk w15, imm_u32 - ; dup V(IMM_REG as u32).s2, w15 - ); + if imm_u32 & 0xFFFF == 0 { + dynasm!(self.0.ops + ; movz w15, imm_u32 >> 16, lsl 16 + ; dup V(IMM_REG as u32).s2, w15 + ); + } else if imm_u32 & 0xFFFF_0000 == 0 { + dynasm!(self.0.ops + ; movz w15, imm_u32 & 0xFFFF + ; dup V(IMM_REG as u32).s2, w15 + ); + } else { + dynasm!(self.0.ops + ; movz w15, imm_u32 >> 16, lsl 16 + ; movk w15, imm_u32 & 0xFFFF + ; dup V(IMM_REG as u32).s2, w15 + ); + } IMM_REG.wrapping_sub(OFFSET) } diff --git a/fidget/src/jit/aarch64/point.rs b/fidget/src/jit/aarch64/point.rs index db6a281e..949d520c 100644 --- a/fidget/src/jit/aarch64/point.rs +++ b/fidget/src/jit/aarch64/point.rs @@ -434,11 +434,23 @@ impl Assembler for PointAssembler { /// Loads an immediate into register S4, using W9 as an intermediary fn load_imm(&mut self, imm: f32) -> u8 { let imm_u32 = imm.to_bits(); - dynasm!(self.0.ops - ; movz w9, imm_u32 >> 16, lsl 16 - ; movk w9, imm_u32 - ; fmov S(IMM_REG as u32), w9 - ); + if imm_u32 & 0xFFFF == 0 { + dynasm!(self.0.ops + ; movz w9, imm_u32 >> 16, lsl 16 + ; fmov S(IMM_REG as u32), w9 + ); + } else if imm_u32 & 0xFFFF_0000 == 0 { + dynasm!(self.0.ops + ; movz w9, imm_u32 & 0xFFFF + ; fmov S(IMM_REG as u32), w9 + ); + } else { + dynasm!(self.0.ops + ; movz w9, imm_u32 >> 16, lsl 16 + ; movk w9, imm_u32 & 0xFFFF + ; fmov S(IMM_REG as u32), w9 + ); + } IMM_REG.wrapping_sub(OFFSET) }