Skip to content

Commit

Permalink
Beginning to clean up x86 stacks
Browse files Browse the repository at this point in the history
  • Loading branch information
mkeeter committed Mar 11, 2024
1 parent a51678f commit 992a1f9
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 19 deletions.
18 changes: 6 additions & 12 deletions fidget/src/jit/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,6 @@ impl<T> AssemblerData<T> {
}
}

#[cfg(target_arch = "aarch64")]
fn prepare_stack(&mut self, slot_count: usize, stack_size: usize) {
// We always use the stack, if only to store callee-saved registers
let mem = slot_count.saturating_sub(REGISTER_LIMIT)
Expand All @@ -289,24 +288,19 @@ impl<T> AssemblerData<T> {

// Round up to the nearest multiple of 16 bytes, for alignment
self.mem_offset = ((mem + 15) / 16) * 16;
self.push_stack();
}

#[cfg(target_arch = "aarch64")]
fn push_stack(&mut self) {
assert!(self.mem_offset < 4096);
dynasm!(self.ops
; sub sp, sp, #(self.mem_offset as u32)
);
}

#[cfg(target_arch = "x86_64")]
fn prepare_stack(&mut self, slot_count: usize) {
// We always use the stack on x86_64, if only to store X/Y/Z
let stack_slots = slot_count.saturating_sub(REGISTER_LIMIT);

// We put X/Y/Z values at the top of the stack, where they can be
// accessed with `movss [rbp - i*size_of(T)] xmm`. This frees up the
// incoming registers (xmm0-2) in the point evaluator.
let mem = (stack_slots + 4) * std::mem::size_of::<T>();

// Round up to the nearest multiple of 16 bytes, for alignment
self.mem_offset = ((mem + 15) / 16) * 16;
fn push_stack(&mut self) {
dynasm!(self.ops
; sub rsp, self.mem_offset as i32
);
Expand Down
4 changes: 2 additions & 2 deletions fidget/src/jit/x86_64/float_slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ impl Assembler for FloatSliceAssembler {
let mut out = AssemblerData::new(mmap);
dynasm!(out.ops
; push rbp
; mov rbp, rsp
; push r12
; push r13
; push r14
; push r15
; mov rbp, rsp
; vzeroupper
);
out.prepare_stack(slot_count);
out.prepare_stack(slot_count, 4);
dynasm!(out.ops
// The loop returns here, and we check whether to keep looping
; ->L:
Expand Down
2 changes: 1 addition & 1 deletion fidget/src/jit/x86_64/grad_slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ impl Assembler for GradSliceAssembler {
; mov rbp, rsp
; vzeroupper
);
out.prepare_stack(slot_count);
out.prepare_stack(slot_count, 4);
dynasm!(out.ops
// The loop returns here, and we check whether to keep looping
; ->L:
Expand Down
4 changes: 2 additions & 2 deletions fidget/src/jit/x86_64/interval.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,18 @@ impl Assembler for IntervalAssembler {
let mut out = AssemblerData::new(mmap);
dynasm!(out.ops
; push rbp
; mov rbp, rsp
; push r12
; push r13
; push r14
; mov rbp, rsp
; vzeroupper

// Put X/Y/Z on the stack so we can use those registers
; movq [rbp - 8], xmm0
; movq [rbp - 16], xmm1
; movq [rbp - 24], xmm2
);
out.prepare_stack(slot_count);
out.prepare_stack(slot_count, 4);
Self(out)
}
fn build_load(&mut self, dst_reg: u8, src_mem: u32) {
Expand Down
4 changes: 2 additions & 2 deletions fidget/src/jit/x86_64/point.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,18 @@ impl Assembler for PointAssembler {
let mut out = AssemblerData::new(mmap);
dynasm!(out.ops
; push rbp
; mov rbp, rsp
; push r12
; push r13
; push r14
; push r15
; vzeroupper
; mov rbp, rsp
// Put X/Y/Z on the stack so we can use those registers
; vmovss [rbp - 4], xmm0
; vmovss [rbp - 8], xmm1
; vmovss [rbp - 12], xmm2
);
out.prepare_stack(slot_count);
out.prepare_stack(slot_count, 4);
Self(out)
}

Expand Down

0 comments on commit 992a1f9

Please sign in to comment.