diff --git a/fidget/src/jit/x86_64/float_slice.rs b/fidget/src/jit/x86_64/float_slice.rs index fc87a083..bc6f8786 100644 --- a/fidget/src/jit/x86_64/float_slice.rs +++ b/fidget/src/jit/x86_64/float_slice.rs @@ -35,7 +35,9 @@ pub const SIMD_WIDTH: usize = 8; /// | -0x28 | `r15` | | /// |----------|--------------|---------------------------------------------| /// | ... | ... | Register spills live up here | +/// | 0x220 | | /// |----------|--------------|---------------------------------------------| +/// | 0x200 | function in | Stashed arguments for function calls | /// | 0x180 | function i/o | Inputs and outputs for function calls | /// |----------|--------------|---------------------------------------------| /// | 0x160 | ymm15 | Caller-saved registers during functions | @@ -52,7 +54,7 @@ pub const SIMD_WIDTH: usize = 8; /// | 0x00 | ymm4 | | /// ``` const STACK_SIZE_UPPER: usize = 0x28; // Positions relative to `rbp` -const STACK_SIZE_LOWER: usize = 0x200; // Positions relative to `rsp` +const STACK_SIZE_LOWER: usize = 0x220; // Positions relative to `rsp` impl Assembler for FloatSliceAssembler { type Data = f32; @@ -456,6 +458,101 @@ impl FloatSliceAssembler { // Get the output value from the stack ; vmovups Ry(reg(out_reg)), [rsp + 0x180] + // Restore pointers + ; mov rdi, [rbp - 0x8] + ; mov rsi, [rbp - 0x10] + ; mov rdx, [rbp - 0x18] + ; mov rcx, [rbp - 0x20] + ; mov r15, [rbp - 0x28] + ); + } + fn call_fn_binary( + &mut self, + out_reg: u8, + lhs_reg: u8, + rhs_reg: u8, + f: extern "sysv64" fn(f32, f32) -> f32, + ) { + let addr = f as usize; + dynasm!(self.0.ops + // Back up all of our pointers to the stack + ; mov [rbp - 0x8], rdi + ; mov [rbp - 0x10], rsi + ; mov [rbp - 0x18], rdx + ; mov [rbp - 0x20], rcx + ; mov [rbp - 0x28], r15 + + // Back up register values to the stack, saving all 128 bits + ; vmovups [rsp], ymm4 + ; vmovups [rsp + 0x20], ymm5 + ; vmovups [rsp + 0x40], ymm6 + ; vmovups [rsp + 0x60], ymm7 + ; vmovups [rsp + 0x80], ymm8 + ; vmovups [rsp + 0xa0], ymm9 + ; vmovups [rsp + 0xc0], ymm10 + ; vmovups [rsp + 0xe0], ymm11 + ; vmovups [rsp + 0x100], ymm12 + ; vmovups [rsp + 0x120], ymm13 + ; vmovups [rsp + 0x140], ymm14 + ; vmovups [rsp + 0x160], ymm15 + + // Put the function pointer into a caller-saved register + ; mov r15, QWORD addr as _ + + // Copy our input arguments to the stack for safe-keeping + ; vmovups [rsp + 0x180], Ry(reg(lhs_reg)) + ; vmovups [rsp + 0x200], Ry(reg(rhs_reg)) + + ; movd xmm0, [rsp + 0x180] + ; movd xmm1, [rsp + 0x200] + ; call r15 + ; movd [rsp + 0x180], xmm0 + ; movd xmm0, [rsp + 0x184] + ; movd xmm1, [rsp + 0x204] + ; call r15 + ; movd [rsp + 0x184], xmm0 + ; movd xmm0, [rsp + 0x188] + ; movd xmm1, [rsp + 0x208] + ; call r15 + ; movd [rsp + 0x188], xmm0 + ; movd xmm0, [rsp + 0x18c] + ; movd xmm1, [rsp + 0x20c] + ; call r15 + ; movd [rsp + 0x18c], xmm0 + ; movd xmm0, [rsp + 0x190] + ; movd xmm1, [rsp + 0x210] + ; call r15 + ; movd [rsp + 0x190], xmm0 + ; movd xmm0, [rsp + 0x194] + ; movd xmm1, [rsp + 0x214] + ; call r15 + ; movd [rsp + 0x194], xmm0 + ; movd xmm0, [rsp + 0x198] + ; movd xmm1, [rsp + 0x218] + ; call r15 + ; movd [rsp + 0x198], xmm0 + ; movd xmm0, [rsp + 0x19c] + ; movd xmm1, [rsp + 0x21c] + ; call r15 + ; movd [rsp + 0x19c], xmm0 + + // Restore float registers + ; vmovups ymm4, [rsp] + ; vmovups ymm5, [rsp + 0x20] + ; vmovups ymm6, [rsp + 0x40] + ; vmovups ymm7, [rsp + 0x60] + ; vmovups ymm8, [rsp + 0x80] + ; vmovups ymm9, [rsp + 0xa0] + ; vmovups ymm10, [rsp + 0xc0] + ; vmovups ymm11, [rsp + 0xe0] + ; vmovups ymm12, [rsp + 0x100] + ; vmovups ymm13, [rsp + 0x120] + ; vmovups ymm14, [rsp + 0x140] + ; vmovups ymm15, [rsp + 0x160] + + // Get the output value from the stack + ; vmovups Ry(reg(out_reg)), [rsp + 0x180] + // Restore pointers ; mov rdi, [rbp - 0x8] ; mov rsi, [rbp - 0x10] diff --git a/fidget/src/jit/x86_64/point.rs b/fidget/src/jit/x86_64/point.rs index 0104a227..1dce19b3 100644 --- a/fidget/src/jit/x86_64/point.rs +++ b/fidget/src/jit/x86_64/point.rs @@ -479,6 +479,71 @@ impl PointAssembler { ; mov rsi, r13 ; mov rdx, r14 + ; movss Rx(reg(out_reg)), xmm0 + ); + } + fn call_fn_binary( + &mut self, + out_reg: u8, + lhs_reg: u8, + rhs_reg: u8, + f: extern "sysv64" fn(f32, f32) -> f32, + ) { + // Back up a few callee-saved registers that we're about to use + if !self.0.saved_callee_regs { + dynasm!(self.0.ops + ; mov [rbp - 0x8], r12 + ; mov [rbp - 0x10], r13 + ; mov [rbp - 0x18], r14 + ); + self.0.saved_callee_regs = true + } + let addr = f as usize; + dynasm!(self.0.ops + // Back up pointers to caller-saved registers + ; mov r12, rdi + ; mov r13, rsi + ; mov r14, rdx + + // Back up all register values to the stack + ; movss [rsp], xmm4 + ; movss [rsp + 0x4], xmm5 + ; movss [rsp + 0x8], xmm6 + ; movss [rsp + 0xc], xmm7 + ; movss [rsp + 0x10], xmm8 + ; movss [rsp + 0x14], xmm9 + ; movss [rsp + 0x18], xmm10 + ; movss [rsp + 0x1c], xmm11 + ; movss [rsp + 0x20], xmm12 + ; movss [rsp + 0x24], xmm13 + ; movss [rsp + 0x28], xmm14 + ; movss [rsp + 0x2c], xmm15 + + // call the function + ; movss xmm0, Rx(reg(lhs_reg)) + ; movss xmm1, Rx(reg(rhs_reg)) + ; mov rsi, QWORD addr as _ + ; call rsi + + // Restore float registers + ; movss xmm4, [rsp] + ; movss xmm5, [rsp + 0x4] + ; movss xmm6, [rsp + 0x8] + ; movss xmm7, [rsp + 0xc] + ; movss xmm8, [rsp + 0x10] + ; movss xmm9, [rsp + 0x14] + ; movss xmm10, [rsp + 0x18] + ; movss xmm11, [rsp + 0x1c] + ; movss xmm12, [rsp + 0x20] + ; movss xmm13, [rsp + 0x24] + ; movss xmm14, [rsp + 0x28] + ; movss xmm15, [rsp + 0x2c] + + // Restore pointers + ; mov rdi, r12 + ; mov rsi, r13 + ; mov rdx, r14 + ; movss Rx(reg(out_reg)), xmm0 ); }