Skip to content

Commit

Permalink
x86 impl (untested)
Browse files Browse the repository at this point in the history
  • Loading branch information
mkeeter committed Aug 4, 2024
1 parent b79e40d commit f2bc281
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 31 deletions.
15 changes: 10 additions & 5 deletions fidget/src/jit/x86_64/float_slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,22 @@ impl Assembler for FloatSliceAssembler {
; vmovups [rsp + sp_offset], Ry(reg(src_reg))
);
}

fn build_input(&mut self, out_reg: u8, src_arg: u32) {
let pos = 8 * i32::try_from(src_arg).unwrap();
dynasm!(self.0.ops
; mov r8, [rdi + pos] // read the *const float from the array
; add r8, rcx // offset it by array position
; vmovups Ry(reg(out_reg)), [r8]
; vmovups Ry(reg(out_reg)), [r8 + rcx] // offset by array
);
}

fn build_output(&mut self, arg_reg: u8, out_index: u32) {
assert_eq!(out_index, 0);
dynasm!(self.0.ops
; vmovups [rsi + rcx], Ry(reg(arg_reg))
);
}

fn build_sin(&mut self, out_reg: u8, lhs_reg: u8) {
extern "sysv64" fn float_sin(f: f32) -> f32 {
f.sin()
Expand Down Expand Up @@ -368,9 +376,6 @@ impl Assembler for FloatSliceAssembler {
}
fn finalize(mut self, out_reg: u8) -> Result<Mmap, Error> {
dynasm!(self.0.ops
// Copy data from out_reg into the out array, then adjust it
; vmovups [rsi], Ry(reg(out_reg))
; add rsi, 32
; sub rdx, 8
; add rcx, 32
; jmp ->L
Expand Down
14 changes: 9 additions & 5 deletions fidget/src/jit/x86_64/grad_slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,17 @@ impl Assembler for GradSliceAssembler {
let pos = 8 * i32::try_from(src_arg).unwrap();
dynasm!(self.0.ops
; mov r8, [rdi + pos] // read the *const float from the array
; add r8, rcx // offset it by array position
; vmovaps Rx(reg(out_reg)), [r8]
; vmovaps Rx(reg(out_reg)), [r8 + rcx] // offset by array
);
}

fn build_output(&mut self, arg_reg: u8, out_index: u32) {
assert_eq!(out_index, 0);
dynasm!(self.0.ops
; vmovups [rsi + rcx], Rx(reg(arg_reg))
);
}

fn build_sin(&mut self, out_reg: u8, lhs_reg: u8) {
extern "sysv64" fn grad_sin(v: Grad) -> Grad {
v.sin()
Expand Down Expand Up @@ -468,9 +475,6 @@ impl Assembler for GradSliceAssembler {
}
fn finalize(mut self, out_reg: u8) -> Result<Mmap, Error> {
dynasm!(self.0.ops
// Copy data from out_reg into the out array, then adjust it
; vmovups [rsi], Rx(reg(out_reg))
; add rsi, 16 // 4x float
; sub rdx, 1 // we process one element at a time
; add rcx, 16 // input is array is Grad (f32 x 4)
; jmp ->L
Expand Down
18 changes: 13 additions & 5 deletions fidget/src/jit/x86_64/interval.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@ use dynasmrt::{dynasm, DynasmApi, DynasmLabelApi};
///
/// Registers are passed in as follows:
///
/// | Variable | Register | Type |
/// |------------|----------|-----------------------|
/// | `vars` | `rdi` | `*const [f32; 2]` |
/// | `choices` | `rsi` | `*mut u8` (array) |
/// | `simplify` | `rdx` | `*mut u8` (single) |
/// | Variable | Register | Type |
/// |------------|----------|-------------------------------|
/// | `vars` | `rdi` | `*const [f32; 2]` |
/// | `choices` | `rsi` | `*mut u8` (array) |
/// | `simplify` | `rdx` | `*mut u8` (single) |
/// | `output` | `rcx` | `*mut [f32; 2]` (single) |
///
/// The stack is configured as follows
///
Expand All @@ -29,6 +30,7 @@ use dynasmrt::{dynasm, DynasmApi, DynasmLabelApi};
/// | -0x08 | `r12` | During functions calls, we use these |
/// | -0x10 | `r13` | as temporary storage so must preserve their |
/// | -0x18 | `r14` | previous values on the stack |
/// | -0x20 | `r15` | |
/// |----------|--------------|---------------------------------------------|
/// | ... | ... | Register spills live up here |
/// |----------|--------------|---------------------------------------------|
Expand Down Expand Up @@ -760,6 +762,7 @@ impl Assembler for IntervalAssembler {
; mov r12, [rbp - 0x8]
; mov r13, [rbp - 0x10]
; mov r14, [rbp - 0x18]
; mov r15, [rbp - 0x20]
);
}
dynasm!(self.0.ops
Expand All @@ -786,6 +789,7 @@ impl IntervalAssembler {
; mov [rbp - 0x8], r12
; mov [rbp - 0x10], r13
; mov [rbp - 0x18], r14
; mov [rbp - 0x20], r15
);
self.0.saved_callee_regs = true
}
Expand All @@ -795,6 +799,7 @@ impl IntervalAssembler {
; mov r12, rdi
; mov r13, rsi
; mov r14, rdx
; mov r15, rcx

// Back up register values to the stack, treating them as doubles
// (since we want to back up all 64 bits)
Expand Down Expand Up @@ -836,6 +841,7 @@ impl IntervalAssembler {
; mov rdi, r12
; mov rsi, r13
; mov rdx, r14
; mov rcx, r15

// Unpack the interval result
; vmovq Rx(reg(out_reg)), xmm0
Expand Down Expand Up @@ -863,6 +869,7 @@ impl IntervalAssembler {
; mov r12, rdi
; mov r13, rsi
; mov r14, rdx
; mov r15, rcx

// Back up register values to the stack, treating them as doubles
// (since we want to back up all 64 bits)
Expand Down Expand Up @@ -906,6 +913,7 @@ impl IntervalAssembler {
; mov rdi, r12
; mov rsi, r13
; mov rdx, r14
; mov rcx, r15

// Unpack the interval result
; vmovq Rx(reg(out_reg)), xmm0
Expand Down
34 changes: 18 additions & 16 deletions fidget/src/jit/x86_64/point.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use dynasmrt::{dynasm, DynasmApi, DynasmLabelApi};
/// | `vars` | `rdi` | `*const f32` |
/// | `choices` | `rsi` | `*mut u8` (array) |
/// | `simplify` | `rdx` | `*mut u8` (single) |
/// | `output` | `rcx` | `*mut f32` (single) |
///
/// The stack is configured as follows
///
Expand All @@ -28,6 +29,7 @@ use dynasmrt::{dynasm, DynasmApi, DynasmLabelApi};
/// | -0x08 | `r12` | During functions calls, we use these |
/// | -0x10 | `r13` | as temporary storage so must preserve their |
/// | -0x18 | `r14` | previous values on the stack |
/// | -0x20 | `r15` | |
/// |----------|--------------|---------------------------------------------|
/// | ... | ... | Register spills live up here |
/// |----------|--------------|---------------------------------------------|
Expand All @@ -44,7 +46,7 @@ use dynasmrt::{dynasm, DynasmApi, DynasmLabelApi};
/// | 0x04 | xmm5 | |
/// | 0x00 | xmm4 | |
/// ```
const STACK_SIZE_UPPER: usize = 0x18; // Positions relative to `rbp`
const STACK_SIZE_UPPER: usize = 0x20; // Positions relative to `rbp`
const STACK_SIZE_LOWER: usize = 0x30; // Positions relative to `rsp`

impl Assembler for PointAssembler {
Expand Down Expand Up @@ -423,27 +425,32 @@ impl Assembler for PointAssembler {
}

impl PointAssembler {
fn call_fn_unary(
&mut self,
out_reg: u8,
arg_reg: u8,
f: extern "sysv64" fn(f32) -> f32,
) {
fn ensure_callee_regs_saved(&mut self) {
// Back up a few callee-saved registers that we're about to use
if !self.0.saved_callee_regs {
dynasm!(self.0.ops
; mov [rbp - 0x8], r12
; mov [rbp - 0x10], r13
; mov [rbp - 0x18], r14
; mov [rbp - 0x20], r15
);
self.0.saved_callee_regs = true
}
}
fn call_fn_unary(
&mut self,
out_reg: u8,
arg_reg: u8,
f: extern "sysv64" fn(f32) -> f32,
) {
self.ensure_callee_regs_saved();
let addr = f as usize;
dynasm!(self.0.ops
// Back up pointers to caller-saved registers
; mov r12, rdi
; mov r13, rsi
; mov r14, rdx
; mov r15, rcx

// Back up all register values to the stack
; movss [rsp], xmm4
Expand Down Expand Up @@ -482,6 +489,7 @@ impl PointAssembler {
; mov rdi, r12
; mov rsi, r13
; mov rdx, r14
; mov rcx, r15

; movss Rx(reg(out_reg)), xmm0
);
Expand All @@ -493,21 +501,14 @@ impl PointAssembler {
rhs_reg: u8,
f: extern "sysv64" fn(f32, f32) -> f32,
) {
// Back up a few callee-saved registers that we're about to use
if !self.0.saved_callee_regs {
dynasm!(self.0.ops
; mov [rbp - 0x8], r12
; mov [rbp - 0x10], r13
; mov [rbp - 0x18], r14
);
self.0.saved_callee_regs = true
}
self.ensure_callee_regs_saved();
let addr = f as usize;
dynasm!(self.0.ops
// Back up pointers to caller-saved registers
; mov r12, rdi
; mov r13, rsi
; mov r14, rdx
; mov r15, rcx

// Back up all register values to the stack
; movss [rsp], xmm4
Expand Down Expand Up @@ -549,6 +550,7 @@ impl PointAssembler {
; mov rdi, r12
; mov rsi, r13
; mov rdx, r14
; mov rcx, r15

; movss Rx(reg(out_reg)), xmm0
);
Expand Down

0 comments on commit f2bc281

Please sign in to comment.