From 30f01db32cf02b4e657e1a6c56eea39d593a6977 Mon Sep 17 00:00:00 2001 From: Vacantron Chen Date: Sun, 10 Nov 2024 21:43:18 +0800 Subject: [PATCH 1/3] Add "src/rv32_jit.c" This file is generated by "tools/gen-jit-template.py" To bring up the Linux Kernel to just-in-time (JIT) compilation, we need to update the memory-related operation with the memory management unit. However, the current "src/rv32_jit.c" was generated by the template. That template reduced the rework for the repeated statements, but also reduced the flexibility and the intuitiveness for bring up the new feature. In this commit, we deprecate that template and just use a regular file for the flexibility. --- .gitignore | 1 - Makefile | 5 +- src/rv32_jit.c | 713 ++++++++++++++++++++++++++++++++++++++ tools/gen-jit-template.py | 274 --------------- 4 files changed, 714 insertions(+), 279 deletions(-) create mode 100644 src/rv32_jit.c delete mode 100755 tools/gen-jit-template.py diff --git a/.gitignore b/.gitignore index 647c8dec..9a1596b2 100644 --- a/.gitignore +++ b/.gitignore @@ -28,4 +28,3 @@ tests/arch-test-target/config.ini tests/arch-test-target/sail_cSim/riscv_sim_RV32 tests/scimark2/ __pycache__/ -src/rv32_jit.c diff --git a/Makefile b/Makefile index f0928fde..b16a1629 100644 --- a/Makefile +++ b/Makefile @@ -194,9 +194,6 @@ ifeq ($(call has, JIT), 1) $(error JIT mode only supports for x64 and arm64 target currently.) endif -src/rv32_jit.c: - $(Q)tools/gen-jit-template.py $(CFLAGS) > $@ - $(OUT)/jit.o: src/jit.c src/rv32_jit.c $(VECHO) " CC\t$@\n" $(Q)$(CC) -o $@ $(CFLAGS) -c -MMD -MF $@.d $< @@ -353,7 +350,7 @@ endif endif clean: - $(RM) $(BIN) $(OBJS) $(DEV_OBJS) $(BUILD_DTB) $(HIST_BIN) $(HIST_OBJS) $(deps) $(WEB_FILES) $(CACHE_OUT) src/rv32_jit.c + $(RM) $(BIN) $(OBJS) $(DEV_OBJS) $(BUILD_DTB) $(HIST_BIN) $(HIST_OBJS) $(deps) $(WEB_FILES) $(CACHE_OUT) distclean: clean -$(RM) $(DOOM_DATA) $(QUAKE_DATA) $(BUILDROOT_DATA) $(LINUX_DATA) $(RM) -r $(OUT)/linux-image diff --git a/src/rv32_jit.c b/src/rv32_jit.c new file mode 100644 index 00000000..4c1dad9f --- /dev/null +++ b/src/rv32_jit.c @@ -0,0 +1,713 @@ +GEN(nop, {}) +GEN(lui, { + vm_reg[0] = map_vm_reg(state, ir->rd); + emit_load_imm(state, vm_reg[0], ir->imm); +}) +GEN(auipc, { + vm_reg[0] = map_vm_reg(state, ir->rd); + emit_load_imm(state, vm_reg[0], ir->pc + ir->imm); +}) +GEN(jal, { + if (ir->rd) { + vm_reg[0] = map_vm_reg(state, ir->rd); + emit_load_imm(state, vm_reg[0], ir->pc + 4); + } + store_back(state); + emit_jmp(state, ir->pc + ir->imm); + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(jalr, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_mov(state, vm_reg[0], temp_reg); + emit_alu32_imm32(state, 0x81, 0, temp_reg, ir->imm); + emit_alu32_imm32(state, 0x81, 4, temp_reg, ~1U); + if (ir->rd) { + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_imm(state, vm_reg[1], ir->pc + 4); + } + store_back(state); + parse_branch_history_table(state, ir); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(beq, { + ra_load2(state, ir->rs1, ir->rs2); + emit_cmp32(state, vm_reg[1], vm_reg[0]); + store_back(state); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x84); + if (ir->branch_untaken) { + emit_jmp(state, ir->pc + 4); + } + emit_load_imm(state, temp_reg, ir->pc + 4); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); + emit_jump_target_offset(state, JUMP_LOC, state->offset); + if (ir->branch_taken) { + emit_jmp(state, ir->pc + ir->imm); + } + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(bne, { + ra_load2(state, ir->rs1, ir->rs2); + emit_cmp32(state, vm_reg[1], vm_reg[0]); + store_back(state); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x85); + if (ir->branch_untaken) { + emit_jmp(state, ir->pc + 4); + } + emit_load_imm(state, temp_reg, ir->pc + 4); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); + emit_jump_target_offset(state, JUMP_LOC, state->offset); + if (ir->branch_taken) { + emit_jmp(state, ir->pc + ir->imm); + } + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(blt, { + ra_load2(state, ir->rs1, ir->rs2); + emit_cmp32(state, vm_reg[1], vm_reg[0]); + store_back(state); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x8c); + if (ir->branch_untaken) { + emit_jmp(state, ir->pc + 4); + } + emit_load_imm(state, temp_reg, ir->pc + 4); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); + emit_jump_target_offset(state, JUMP_LOC, state->offset); + if (ir->branch_taken) { + emit_jmp(state, ir->pc + ir->imm); + } + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(bge, { + ra_load2(state, ir->rs1, ir->rs2); + emit_cmp32(state, vm_reg[1], vm_reg[0]); + store_back(state); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x8d); + if (ir->branch_untaken) { + emit_jmp(state, ir->pc + 4); + } + emit_load_imm(state, temp_reg, ir->pc + 4); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); + emit_jump_target_offset(state, JUMP_LOC, state->offset); + if (ir->branch_taken) { + emit_jmp(state, ir->pc + ir->imm); + } + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(bltu, { + ra_load2(state, ir->rs1, ir->rs2); + emit_cmp32(state, vm_reg[1], vm_reg[0]); + store_back(state); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x82); + if (ir->branch_untaken) { + emit_jmp(state, ir->pc + 4); + } + emit_load_imm(state, temp_reg, ir->pc + 4); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); + emit_jump_target_offset(state, JUMP_LOC, state->offset); + if (ir->branch_taken) { + emit_jmp(state, ir->pc + ir->imm); + } + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(bgeu, { + ra_load2(state, ir->rs1, ir->rs2); + emit_cmp32(state, vm_reg[1], vm_reg[0]); + store_back(state); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x83); + if (ir->branch_untaken) { + emit_jmp(state, ir->pc + 4); + } + emit_load_imm(state, temp_reg, ir->pc + 4); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); + emit_jump_target_offset(state, JUMP_LOC, state->offset); + if (ir->branch_taken) { + emit_jmp(state, ir->pc + ir->imm); + } + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(lb, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_sext(state, S8, temp_reg, vm_reg[1], 0); +}) +GEN(lh, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_sext(state, S16, temp_reg, vm_reg[1], 0); +}) +GEN(lw, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S32, temp_reg, vm_reg[1], 0); +}) +GEN(lbu, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S8, temp_reg, vm_reg[1], 0); +}) +GEN(lhu, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S16, temp_reg, vm_reg[1], 0); +}) +GEN(sb, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S8, vm_reg[1], temp_reg, 0); +}) +GEN(sh, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S16, vm_reg[1], temp_reg, 0); +}) +GEN(sw, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S32, vm_reg[1], temp_reg, 0); +}) +GEN(addi, { + vm_reg[0] = ra_load(state, ir->rs1); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } + emit_alu32_imm32(state, 0x81, 0, vm_reg[1], ir->imm); +}) +GEN(slti, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_cmp_imm32(state, vm_reg[0], ir->imm); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_imm(state, vm_reg[1], 1); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x8c); + emit_load_imm(state, vm_reg[1], 0); + emit_jump_target_offset(state, JUMP_LOC, state->offset); +}) +GEN(sltiu, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_cmp_imm32(state, vm_reg[0], ir->imm); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_imm(state, vm_reg[1], 1); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x82); + emit_load_imm(state, vm_reg[1], 0); + emit_jump_target_offset(state, JUMP_LOC, state->offset); +}) +GEN(xori, { + vm_reg[0] = ra_load(state, ir->rs1); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } + emit_alu32_imm32(state, 0x81, 6, vm_reg[1], ir->imm); +}) +GEN(ori, { + vm_reg[0] = ra_load(state, ir->rs1); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } + emit_alu32_imm32(state, 0x81, 1, vm_reg[1], ir->imm); +}) +GEN(andi, { + vm_reg[0] = ra_load(state, ir->rs1); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } + emit_alu32_imm32(state, 0x81, 4, vm_reg[1], ir->imm); +}) +GEN(slli, { + vm_reg[0] = ra_load(state, ir->rs1); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } + emit_alu32_imm8(state, 0xc1, 4, vm_reg[1], ir->imm & 0x1f); +}) +GEN(srli, { + vm_reg[0] = ra_load(state, ir->rs1); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } + emit_alu32_imm8(state, 0xc1, 5, vm_reg[1], ir->imm & 0x1f); +}) +GEN(srai, { + vm_reg[0] = ra_load(state, ir->rs1); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } + emit_alu32_imm8(state, 0xc1, 7, vm_reg[1], ir->imm & 0x1f); +}) +GEN(add, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x01, temp_reg, vm_reg[2]); +}) +GEN(sub, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x29, temp_reg, vm_reg[2]); +}) +GEN(sll, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32_imm32(state, 0x81, 4, temp_reg, 0x1f); + emit_alu32(state, 0xd3, 4, vm_reg[2]); +}) +GEN(slt, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_cmp32(state, vm_reg[1], vm_reg[0]); + emit_load_imm(state, vm_reg[2], 1); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x8c); + emit_load_imm(state, vm_reg[2], 0); + emit_jump_target_offset(state, JUMP_LOC, state->offset); +}) +GEN(sltu, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_cmp32(state, vm_reg[1], vm_reg[0]); + emit_load_imm(state, vm_reg[2], 1); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x82); + emit_load_imm(state, vm_reg[2], 0); + emit_jump_target_offset(state, JUMP_LOC, state->offset); +}) +GEN(xor, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x31, temp_reg, vm_reg[2]); +}) +GEN(srl, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32_imm32(state, 0x81, 4, temp_reg, 0x1f); + emit_alu32(state, 0xd3, 5, vm_reg[2]); +}) +GEN(sra, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32_imm32(state, 0x81, 4, temp_reg, 0x1f); + emit_alu32(state, 0xd3, 7, vm_reg[2]); +}) +GEN(or, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x09, temp_reg, vm_reg[2]); +}) +GEN(and, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x21, temp_reg, vm_reg[2]); +}) +GEN(fence, { assert(NULL); }) +GEN(ecall, { + store_back(state); + emit_load_imm(state, temp_reg, ir->pc); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_call(state, (intptr_t) rv->io.on_ecall); + emit_exit(state); +}) +GEN(ebreak, { + store_back(state); + emit_load_imm(state, temp_reg, ir->pc); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_call(state, (intptr_t) rv->io.on_ebreak); + emit_exit(state); +}) +GEN(wfi, { assert(NULL); }) +GEN(uret, { assert(NULL); }) +GEN(sret, { assert(NULL); }) +GEN(hret, { assert(NULL); }) +GEN(mret, { assert(NULL); }) +GEN(sfencevma, { assert(NULL); }) +#if RV32_HAS(Zifencei) /* RV32 Zifencei Standard Extension */ +GEN(fencei, { assert(NULL); }) +#endif +#if RV32_HAS(Zicsr) /* RV32 Zicsr Standard Extension */ +GEN(csrrw, { assert(NULL); }) +GEN(csrrs, { assert(NULL); }) +GEN(csrrc, { assert(NULL); }) +GEN(csrrwi, { assert(NULL); }) +GEN(csrrsi, { assert(NULL); }) +GEN(csrrci, { assert(NULL); }) +#endif +#if RV32_HAS(EXT_M) +GEN(mul, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + muldivmod(state, 0x28, temp_reg, vm_reg[2], 0); +}) +GEN(mulh, { + ra_load2_sext(state, ir->rs1, ir->rs2, true, true); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + muldivmod(state, 0x2f, temp_reg, vm_reg[2], 0); + emit_alu64_imm8(state, 0xc1, 5, vm_reg[2], 32); +}) +GEN(mulhsu, { + ra_load2_sext(state, ir->rs1, ir->rs2, true, false); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + muldivmod(state, 0x2f, temp_reg, vm_reg[2], 0); + emit_alu64_imm8(state, 0xc1, 5, vm_reg[2], 32); +}) +GEN(mulhu, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + muldivmod(state, 0x2f, temp_reg, vm_reg[2], 0); + emit_alu64_imm8(state, 0xc1, 5, vm_reg[2], 32); +}) +GEN(div, { + ra_load2_sext(state, ir->rs1, ir->rs2, true, true); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + muldivmod(state, 0x38, temp_reg, vm_reg[2], 1); +}) +GEN(divu, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + muldivmod(state, 0x38, temp_reg, vm_reg[2], 0); +}) +GEN(rem, { + ra_load2_sext(state, ir->rs1, ir->rs2, true, true); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + muldivmod(state, 0x98, temp_reg, vm_reg[2], 1); +}) +GEN(remu, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + muldivmod(state, 0x98, temp_reg, vm_reg[2], 0); +}) +#endif +#if RV32_HAS(EXT_A) +GEN(lrw, { assert(NULL); }) +GEN(scw, { assert(NULL); }) +GEN(amoswapw, { assert(NULL); }) +GEN(amoaddw, { assert(NULL); }) +GEN(amoxorw, { assert(NULL); }) +GEN(amoandw, { assert(NULL); }) +GEN(amoorw, { assert(NULL); }) +GEN(amominw, { assert(NULL); }) +GEN(amomaxw, { assert(NULL); }) +GEN(amominuw, { assert(NULL); }) +GEN(amomaxuw, { assert(NULL); }) +#endif +#if RV32_HAS(EXT_F) +GEN(flw, { assert(NULL); }) +GEN(fsw, { assert(NULL); }) +GEN(fmadds, { assert(NULL); }) +GEN(fmsubs, { assert(NULL); }) +GEN(fnmsubs, { assert(NULL); }) +GEN(fnmadds, { assert(NULL); }) +GEN(fadds, { assert(NULL); }) +GEN(fsubs, { assert(NULL); }) +GEN(fmuls, { assert(NULL); }) +GEN(fdivs, { assert(NULL); }) +GEN(fsqrts, { assert(NULL); }) +GEN(fsgnjs, { assert(NULL); }) +GEN(fsgnjns, { assert(NULL); }) +GEN(fsgnjxs, { assert(NULL); }) +GEN(fmins, { assert(NULL); }) +GEN(fmaxs, { assert(NULL); }) +GEN(fcvtws, { assert(NULL); }) +GEN(fcvtwus, { assert(NULL); }) +GEN(fmvxw, { assert(NULL); }) +GEN(feqs, { assert(NULL); }) +GEN(flts, { assert(NULL); }) +GEN(fles, { assert(NULL); }) +GEN(fclasss, { assert(NULL); }) +GEN(fcvtsw, { assert(NULL); }) +GEN(fcvtswu, { assert(NULL); }) +GEN(fmvwx, { assert(NULL); }) +#endif +#if RV32_HAS(EXT_C) +GEN(caddi4spn, { + vm_reg[0] = ra_load(state, rv_reg_sp); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } + emit_alu32_imm32(state, 0x81, 0, vm_reg[1], (uint16_t) ir->imm); +}) +GEN(clw, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S32, temp_reg, vm_reg[1], 0); +}) +GEN(csw, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S32, vm_reg[1], temp_reg, 0); +}) +GEN(cnop, {}) +GEN(caddi, { + vm_reg[0] = ra_load(state, ir->rd); + emit_alu32_imm32(state, 0x81, 0, vm_reg[0], (int16_t) ir->imm); +}) +GEN(cjal, { + vm_reg[0] = map_vm_reg(state, rv_reg_ra); + emit_load_imm(state, vm_reg[0], ir->pc + 2); + store_back(state); + emit_jmp(state, ir->pc + ir->imm); + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(cli, { + vm_reg[0] = map_vm_reg(state, ir->rd); + emit_load_imm(state, vm_reg[0], ir->imm); +}) +GEN(caddi16sp, { + vm_reg[0] = ra_load(state, ir->rd); + emit_alu32_imm32(state, 0x81, 0, vm_reg[0], ir->imm); +}) +GEN(clui, { + vm_reg[0] = map_vm_reg(state, ir->rd); + emit_load_imm(state, vm_reg[0], ir->imm); +}) +GEN(csrli, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_alu32_imm8(state, 0xc1, 5, vm_reg[0], ir->shamt); +}) +GEN(csrai, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_alu32_imm8(state, 0xc1, 7, vm_reg[0], ir->shamt); +}) +GEN(candi, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_alu32_imm32(state, 0x81, 4, vm_reg[0], ir->imm); +}) +GEN(csub, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x29, temp_reg, vm_reg[2]); +}) +GEN(cxor, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x31, temp_reg, vm_reg[2]); +}) +GEN(cor, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x09, temp_reg, vm_reg[2]); +}) +GEN(cand, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x21, temp_reg, vm_reg[2]); +}) +GEN(cj, { + store_back(state); + emit_jmp(state, ir->pc + ir->imm); + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(cbeqz, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_cmp_imm32(state, vm_reg[0], 0); + store_back(state); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x84); + if (ir->branch_untaken) { + emit_jmp(state, ir->pc + 2); + } + emit_load_imm(state, temp_reg, ir->pc + 2); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); + emit_jump_target_offset(state, JUMP_LOC, state->offset); + if (ir->branch_taken) { + emit_jmp(state, ir->pc + ir->imm); + } + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(cbnez, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_cmp_imm32(state, vm_reg[0], 0); + store_back(state); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x85); + if (ir->branch_untaken) { + emit_jmp(state, ir->pc + 2); + } + emit_load_imm(state, temp_reg, ir->pc + 2); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); + emit_jump_target_offset(state, JUMP_LOC, state->offset); + if (ir->branch_taken) { + emit_jmp(state, ir->pc + ir->imm); + } + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(cslli, { + vm_reg[0] = ra_load(state, ir->rd); + emit_alu32_imm8(state, 0xc1, 4, vm_reg[0], (uint8_t) ir->imm); +}) +GEN(clwsp, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, rv_reg_sp); + emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S32, temp_reg, vm_reg[1], 0); +}) +GEN(cjr, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_mov(state, vm_reg[0], temp_reg); + store_back(state); + parse_branch_history_table(state, ir); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(cmv, { + vm_reg[0] = ra_load(state, ir->rs2); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } else { + set_dirty(vm_reg[1], true); + } +}) +GEN(cebreak, { + store_back(state); + emit_load_imm(state, temp_reg, ir->pc); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_call(state, (intptr_t) rv->io.on_ebreak); + emit_exit(state); +}) +GEN(cjalr, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_mov(state, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, rv_reg_ra); + emit_load_imm(state, vm_reg[1], ir->pc + 2); + store_back(state); + parse_branch_history_table(state, ir); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(cadd, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x01, temp_reg, vm_reg[2]); +}) +GEN(cswsp, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, rv_reg_sp); + emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S32, vm_reg[1], temp_reg, 0); +}) +#endif +#if RV32_HAS(EXT_C) && RV32_HAS(EXT_F) +GEN(cflwsp, { assert(NULL); }) +GEN(cfswsp, { assert(NULL); }) +GEN(cflw, { assert(NULL); }) +GEN(cfsw, { assert(NULL); }) +#endif diff --git a/tools/gen-jit-template.py b/tools/gen-jit-template.py deleted file mode 100755 index ff605b07..00000000 --- a/tools/gen-jit-template.py +++ /dev/null @@ -1,274 +0,0 @@ -#!/usr/bin/env python3 - -''' -This script serves as a code generator for creating JIT code templates -based on existing code files in the 'src' directory, eliminating the need -for writing duplicated code. -''' - -import re -import sys - -INSN = { - "Zifencei": ["fencei"], - "Zicsr": [ - "csrrw", - "csrrs", - "csrrc", - "csrrw", - "csrrsi", - "csrrci"], - "EXT_M": [ - "mul", - "mulh", - "mulhsu", - "mulhu", - "div", - "divu", - "rem", - "remu"], - "EXT_A": [ - "lrw", - "scw", - "amoswapw", - "amoaddw", - "amoxorw", - "amoandw", - "amoorw", - "amominw", - "amomaxw", - "amominuw", - "amomaxuw"], - "EXT_F": [ - "flw", - "fsw", - "fmadds", - "fmsubs", - "fnmsubs", - "fnmadds", - "fadds", - "fsubs", - "fmuls", - "fdivs", - "fsqrts", - "fsgnjs", - "fsgnjns", - "fsgnjxs", - "fmins", - "fmaxs", - "fcvtws", - "fcvtwus", - "fmvxw", - "feqs", - "flts", - "fles", - "fclasss", - "fcvtsw", - "fcvtswu", - "fmvwx"], - "EXT_C": [ - "caddi4spn", - "clw", - "csw", - "cnop", - "caddi", - "cjal", - "cli", - "caddi16sp", - "clui", - "csrli", - "csrai", - "candi", - "csub", - "cxor", - "cor", - "cand", - "cj", - "cbeqz", - "cbnez", - "cslli", - "clwsp", - "cjr", - "cmv", - "cebreak", - "cjalr", - "cadd", - "cswsp", - ], - "EXT_FC": [ - "cflwsp", - "cfswsp", - "cflw", - "cfsw", - ], -} -EXT_LIST = ["Zifencei", "Zicsr", "EXT_M", "EXT_A", "EXT_F", "EXT_C"] -SKIP_LIST = [] -# check enabled extension in Makefile - - -def parse_argv(EXT_LIST, SKIP_LIST): - for argv in sys.argv: - if argv.find("RV32_FEATURE_") != -1: - ext = argv[argv.find("RV32_FEATURE_") + 13:-2] - if argv[-1:] == "1" and EXT_LIST.count(ext): - EXT_LIST.remove(ext) - for ext in EXT_LIST: - SKIP_LIST += INSN[ext] - if "EXT_F" in EXT_LIST or "EXT_C" in EXT_LIST: - SKIP_LIST += INSN["EXT_FC"] - -parse_argv(EXT_LIST, SKIP_LIST) -# prepare PROLOGUE -output = "" -f = open('src/rv32_template.c', 'r') -lines = f.read() -# remove_comment -lines = re.sub(r'/\*[\s|\S]+?\*/', "", lines) -# remove exception handler -lines = re.sub(r'RV_EXC[\S]+?\([\S|\s]+?\);\s', "", lines) -# collect functions -emulate_funcs = re.findall(r'RVOP\([\s|\S]+?}\)', lines) -codegen_funcs = re.findall(r'GEN\([\s|\S]+?}\)', lines) -op = [] -impl = [] -for i in range(len(emulate_funcs)): - op.append(emulate_funcs[i][5:emulate_funcs[i].find(',')].strip()) - impl.append(codegen_funcs[i]) - -f.close() - -fields = {"imm", "pc", "rs1", "rs2", "rd", "shamt", "branch_taken", "branch_untaken"} -virt_regs = {"VR0", "VR1", "VR2"} -# generate jit template -for i in range(len(op)): - if (not SKIP_LIST.count(op[i])): - output += impl[i][0:4] + op[i] + ", {" - IRs = re.findall(r'[\s|\S]+?;', impl[i][5:]) - # parse_and_translate_IRs - for i in range(len(IRs)): - IR = IRs[i].strip()[:-1] - items = [s.strip() for s in IR.split(',')] - asm = "" - for i in range(len(items)): - if items[i] in fields: - items[i] = "ir->" + items[i] - if items[i] in virt_regs: - items[i] = "vm_reg[" + items[i][-1] + "]" - if items[i] == "TMP": - items[i] = "temp_reg" - if items[0] == "alu32imm": - if len(items) == 8: - asm = "emit_alu32_imm{}(state, {}, {}, {}, ({}{}_t) {});".format( - items[1], items[2], items[3], items[4], items[5], items[6], items[7]) - elif len(items) == 7: - asm = "emit_alu32_imm{}(state, {}, {}, {}, {} & {});".format( - items[1], items[2], items[3], items[4], items[5], items[6]) - else: - asm = "emit_alu32_imm{}(state, {}, {}, {}, {});".format( - items[1], items[2], items[3], items[4], items[5]) - elif items[0] == "alu64imm": - asm = "emit_alu64_imm{}(state, {}, {}, {}, {});".format( - items[1], items[2], items[3], items[4], items[5]) - elif items[0] == "alu64": - asm = "emit_alu64(state, {}, {}, {});".format( - items[1], items[2], items[3]) - elif items[0] == "alu32": - asm = "emit_alu32(state, {}, {}, {});".format( - items[1], items[2], items[3]) - elif items[0] == "ldimm": - if items[2] == "mem": - asm = "emit_load_imm(state, {}, (intptr_t) (m->mem_base + ir->imm));".format( - items[1]) - elif len(items) == 4: - asm = "emit_load_imm(state, {}, {} + {});".format( - items[1], items[2], items[3]) - else: - asm = "emit_load_imm(state, {}, {});".format( - items[1], items[2]) - elif items[0] == "lds": - if (items[3] == "X"): - asm = "emit_load_sext(state, {}, parameter_reg[0], {}, offsetof(riscv_t, X) + 4 * {});".format( - items[1], items[2], items[4]) - else: - asm = "emit_load_sext(state, {}, {}, {}, {});".format( - items[1], items[2], items[3], items[4]) - elif items[0] == "rald": - asm = "{} = ra_load(state, {});".format(items[1], items[2]) - elif items[0] == "rald2": - asm = "ra_load2(state, {}, {});".format(items[1], items[2]) - elif items[0] == "rald2s": - asm = "ra_load2_sext(state, {}, {}, {}, {});".format(items[1], items[2], items[3], items[4]) - elif items[0] == "map": - asm = "{} = map_vm_reg(state, {});".format(items[1], items[2]) - elif items[0] == "ld": - if (items[3] == "X"): - asm = "emit_load(state, {}, parameter_reg[0], {}, offsetof(riscv_t, X) + 4 * {});".format( - items[1], items[2], items[4]) - else: - asm = "emit_load(state, {}, {}, {}, {});".format( - items[1], items[2], items[3], items[4]) - elif items[0] == "st": - if (items[3] == "X"): - asm = "emit_store(state, {}, {}, parameter_reg[0], offsetof(riscv_t, X) + 4 * {});".format( - items[1], items[2], items[4]) - elif items[3] == "PC" or items[3] == "compressed": - asm = "emit_store(state, {}, {}, parameter_reg[0], offsetof(riscv_t, {}));".format( - items[1], items[2], items[3]) - else: - asm = "emit_store(state, {}, {}, {}, {});".format( - items[1], items[2], items[3], items[4]) - elif items[0] == "mov": - asm = "emit_mov(state, {}, {});".format( - items[1], items[2]) - elif items[0] == "cmp": - asm = "emit_cmp32(state, {}, {});".format( - items[1], items[2]) - elif items[0] == "cmpimm": - asm = "emit_cmp_imm32(state, {}, {});".format( - items[1], items[2]) - elif items[0] == "jmp": - asm = "emit_jmp(state, {} + {});".format( - items[1], items[2]) - elif items[0] == "jcc": - asm = "emit_jcc_offset(state, {});".format(items[1]) - elif items[0] == "setjmpoff": - asm = "uint32_t jump_loc = state->offset;" - elif items[0] == "jmpoff": - asm = "emit_jump_target_offset(state, JUMP_LOC, state->offset);" - elif items[0] == "mem": - asm = "memory_t *m = PRIV(rv)->mem;" - elif items[0] == "call": - asm = "emit_call(state, (intptr_t) rv->io.on_{});".format( - items[1]) - elif items[0] == "exit": - asm = "emit_exit(state);" - elif items[0] == "mul": - asm = "muldivmod(state, {}, {}, {}, {});".format( - items[1], items[2], items[3], items[4]) - elif items[0] == "div": - asm = "muldivmod(state, {}, {}, {}, {});".format( - items[1], items[2], items[3], items[4]) - elif items[0] == "mod": - asm = "muldivmod(state, {}, {}, {}, {});".format( - items[1], items[2], items[3], items[4]) - elif items[0] == "cond": - if items[1] == "regneq": - items[1] = "vm_reg[0] != vm_reg[1]" - asm = "if({})".format(items[1]) + "{" - elif items[0] == "else": - asm = "} else {" - elif items[0] == "end": - asm = "}" - elif items[0] == "pollute": - asm = "set_dirty({}, true);".format(items[1]) - elif items[0] == "break": - asm = "store_back(state);" - elif items[0] == "assert": - asm = "assert(NULL);" - elif items[0] == "predict": - asm = "parse_branch_history_table(state, ir);" - output += asm + "\n" - output += "})\n" - -sys.stdout.write(output) From eca48102df902fa83a98c71a512680acee37df85 Mon Sep 17 00:00:00 2001 From: Vacantron Chen Date: Sat, 16 Nov 2024 06:53:50 +0800 Subject: [PATCH 2/3] Refactor "src/system.c" to enhence reusability --- Makefile | 7 +- src/system.c | 222 ++++++++++++++------------------------------------- src/system.h | 134 +++++++++++++++++++++++++++++++ 3 files changed, 201 insertions(+), 162 deletions(-) create mode 100644 src/system.h diff --git a/Makefile b/Makefile index b16a1629..f73075ff 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,8 @@ CFLAGS = -std=gnu99 -O2 -Wall -Wextra CFLAGS += -Wno-unused-label CFLAGS += -include src/common.h -Isrc/ +OBJS_EXT := + # In the system test suite, the executable is an ELF file (e.g., MMU). # However, the Linux kernel emulation includes the Image, DT, and # root filesystem (rootfs). Therefore, the test suite needs this @@ -29,6 +31,9 @@ $(call set-feature, BLOCK_CHAINING) # Enable system emulation ENABLE_SYSTEM ?= 0 $(call set-feature, SYSTEM) +ifeq ($(call has, SYSTEM), 1) +OBJS_EXT += system.o +endif # Enable link-time optimization (LTO) ENABLE_LTO ?= 1 @@ -59,8 +64,6 @@ endif # Disable Intel's Control-flow Enforcement Technology (CET) CFLAGS += $(CFLAGS_NO_CET) -OBJS_EXT := - # Integer Multiplication and Division instructions ENABLE_EXT_M ?= 1 $(call set-feature, EXT_M) diff --git a/src/system.c b/src/system.c index 62ecbc11..9ff50ee4 100644 --- a/src/system.c +++ b/src/system.c @@ -3,17 +3,14 @@ * "LICENSE" for information on usage and redistribution of this file. */ -#if !RV32_HAS(SYSTEM) -#error "Do not manage to build this file unless you enable system support." -#endif - #include #include "devices/plic.h" #include "devices/uart.h" #include "riscv_private.h" +#include "system.h" -#if RV32_HAS(SYSTEM) && !RV32_HAS(ELF_LOADER) +#if !RV32_HAS(ELF_LOADER) void emu_update_uart_interrupts(riscv_t *rv) { vm_attr_t *attr = PRIV(rv); @@ -24,84 +21,6 @@ void emu_update_uart_interrupts(riscv_t *rv) attr->plic->active &= ~IRQ_UART_BIT; plic_update_interrupts(attr->plic); } - -#define MMIO_R 1 -#define MMIO_W 0 - -enum SUPPORTED_MMIO { - MMIO_PLIC, - MMIO_UART, -}; - -/* clang-format off */ -#define MMIO_OP(io, rw) \ - switch(io){ \ - case MMIO_PLIC: \ - IIF(rw)( /* read */ \ - mmio_read_val = plic_read(PRIV(rv)->plic, addr & 0x3FFFFFF); \ - plic_update_interrupts(PRIV(rv)->plic); \ - return mmio_read_val; \ - , /* write */ \ - plic_write(PRIV(rv)->plic, addr & 0x3FFFFFF, val); \ - plic_update_interrupts(PRIV(rv)->plic); \ - return; \ - ) \ - break; \ - case MMIO_UART: \ - IIF(rw)( /* read */ \ - mmio_read_val = u8250_read(PRIV(rv)->uart, addr & 0xFFFFF); \ - emu_update_uart_interrupts(rv); \ - return mmio_read_val; \ - , /* write */ \ - u8250_write(PRIV(rv)->uart, addr & 0xFFFFF, val); \ - emu_update_uart_interrupts(rv); \ - return; \ - ) \ - break; \ - default: \ - fprintf(stderr, "unknown MMIO type %d\n", io); \ - break; \ - } -/* clang-format on */ - -#define MMIO_READ() \ - do { \ - uint32_t mmio_read_val; \ - if ((addr >> 28) == 0xF) { /* MMIO at 0xF_______ */ \ - /* 256 regions of 1MiB */ \ - switch ((addr >> 20) & MASK(8)) { \ - case 0x0: \ - case 0x2: /* PLIC (0 - 0x3F) */ \ - MMIO_OP(MMIO_PLIC, MMIO_R); \ - break; \ - case 0x40: /* UART */ \ - MMIO_OP(MMIO_UART, MMIO_R); \ - break; \ - default: \ - __UNREACHABLE; \ - break; \ - } \ - } \ - } while (0) - -#define MMIO_WRITE() \ - do { \ - if ((addr >> 28) == 0xF) { /* MMIO at 0xF_______ */ \ - /* 256 regions of 1MiB */ \ - switch ((addr >> 20) & MASK(8)) { \ - case 0x0: \ - case 0x2: /* PLIC (0 - 0x3F) */ \ - MMIO_OP(MMIO_PLIC, MMIO_W); \ - break; \ - case 0x40: /* UART */ \ - MMIO_OP(MMIO_UART, MMIO_W); \ - break; \ - default: \ - __UNREACHABLE; \ - break; \ - } \ - } \ - } while (0) #endif static bool ppn_is_valid(riscv_t *rv, uint32_t ppn) @@ -116,14 +35,7 @@ static bool ppn_is_valid(riscv_t *rv, uint32_t ppn) ? (uint32_t *) (attr->mem->mem_base + (ppn << (RV_PG_SHIFT))) \ : NULL -/* Walk through page tables and get the corresponding PTE by virtual address if - * exists - * @rv: RISC-V emulator - * @addr: virtual address - * @level: the level of which the PTE is located - * @return: NULL if a not found or fault else the corresponding PTE - */ -static uint32_t *mmu_walk(riscv_t *rv, const uint32_t addr, uint32_t *level) +uint32_t *mmu_walk(riscv_t *rv, const uint32_t addr, uint32_t *level) { vm_attr_t *attr = PRIV(rv); uint32_t ppn = rv->csr_satp & MASK(22); @@ -178,81 +90,71 @@ static uint32_t *mmu_walk(riscv_t *rv, const uint32_t addr, uint32_t *level) /* FIXME: handle access fault, addr out of range check */ #define MMU_FAULT_CHECK(op, rv, pte, addr, access_bits) \ mmu_##op##_fault_check(rv, pte, addr, access_bits) -#define MMU_FAULT_CHECK_IMPL(op, pgfault) \ - static bool mmu_##op##_fault_check(riscv_t *rv, pte_t *pte, uint32_t addr, \ - uint32_t access_bits) \ - { \ - uint32_t scause; \ - uint32_t stval = addr; \ - switch (access_bits) { \ - case PTE_R: \ - scause = PAGEFAULT_LOAD; \ - break; \ - case PTE_W: \ - scause = PAGEFAULT_STORE; \ - break; \ - case PTE_X: \ - scause = PAGEFAULT_INSN; \ - break; \ - default: \ - __UNREACHABLE; \ - break; \ - } \ - if (pte && (!(*pte & PTE_V))) { \ - SET_CAUSE_AND_TVAL_THEN_TRAP(rv, scause, stval); \ - return false; \ - } \ - if (!(pte && (*pte & access_bits))) { \ - SET_CAUSE_AND_TVAL_THEN_TRAP(rv, scause, stval); \ - return false; \ - } \ - /* \ - * (1) When MXR=0, only loads from pages marked readable (R=1) will \ - * succeed. \ - * \ - * (2) When MXR=1, loads from pages marked either readable or \ - * executable (R=1 or X=1) will succeed. \ - */ \ - if (pte && ((!(SSTATUS_MXR & rv->csr_sstatus) && !(*pte & PTE_R) && \ - (access_bits == PTE_R)) || \ - ((SSTATUS_MXR & rv->csr_sstatus) && \ - !((*pte & PTE_R) | (*pte & PTE_X)) && \ - (access_bits == PTE_R)))) { \ - SET_CAUSE_AND_TVAL_THEN_TRAP(rv, scause, stval); \ - return false; \ - } \ - /* \ - * When SUM=0, S-mode memory accesses to pages that are accessible by \ - * U-mode will fault. \ - */ \ - if (pte && rv->priv_mode == RV_PRIV_S_MODE && \ - !(SSTATUS_SUM & rv->csr_sstatus) && (*pte & PTE_U)) { \ - SET_CAUSE_AND_TVAL_THEN_TRAP(rv, scause, stval); \ - return false; \ - } \ - /* PTE not found, map it in handler */ \ - if (!pte) { \ - SET_CAUSE_AND_TVAL_THEN_TRAP(rv, scause, stval); \ - return false; \ - } \ - /* valid PTE */ \ - return true; \ +#define MMU_FAULT_CHECK_IMPL(op, pgfault) \ + bool mmu_##op##_fault_check(riscv_t *rv, pte_t *pte, uint32_t addr, \ + uint32_t access_bits) \ + { \ + uint32_t scause; \ + uint32_t stval = addr; \ + switch (access_bits) { \ + case PTE_R: \ + scause = PAGEFAULT_LOAD; \ + break; \ + case PTE_W: \ + scause = PAGEFAULT_STORE; \ + break; \ + case PTE_X: \ + scause = PAGEFAULT_INSN; \ + break; \ + default: \ + __UNREACHABLE; \ + break; \ + } \ + if (pte && (!(*pte & PTE_V))) { \ + SET_CAUSE_AND_TVAL_THEN_TRAP(rv, scause, stval); \ + return false; \ + } \ + if (!(pte && (*pte & access_bits))) { \ + SET_CAUSE_AND_TVAL_THEN_TRAP(rv, scause, stval); \ + return false; \ + } \ + /* \ + * (1) When MXR=0, only loads from pages marked readable (R=1) will \ + * succeed. \ + * \ + * (2) When MXR=1, loads from pages marked either readable or \ + * executable (R=1 or X=1) will succeed. \ + */ \ + if (pte && ((!(SSTATUS_MXR & rv->csr_sstatus) && !(*pte & PTE_R) && \ + (access_bits == PTE_R)) || \ + ((SSTATUS_MXR & rv->csr_sstatus) && \ + !((*pte & PTE_R) | (*pte & PTE_X)) && \ + (access_bits == PTE_R)))) { \ + SET_CAUSE_AND_TVAL_THEN_TRAP(rv, scause, stval); \ + return false; \ + } \ + /* \ + * When SUM=0, S-mode memory accesses to pages that are accessible by \ + * U-mode will fault. \ + */ \ + if (pte && rv->priv_mode == RV_PRIV_S_MODE && \ + !(SSTATUS_SUM & rv->csr_sstatus) && (*pte & PTE_U)) { \ + SET_CAUSE_AND_TVAL_THEN_TRAP(rv, scause, stval); \ + return false; \ + } \ + /* PTE not found, map it in handler */ \ + if (!pte) { \ + SET_CAUSE_AND_TVAL_THEN_TRAP(rv, scause, stval); \ + return false; \ + } \ + /* valid PTE */ \ + return true; \ } MMU_FAULT_CHECK_IMPL(ifetch, pagefault_insn) MMU_FAULT_CHECK_IMPL(read, pagefault_load) MMU_FAULT_CHECK_IMPL(write, pagefault_store) -#define get_ppn_and_offset() \ - uint32_t ppn; \ - uint32_t offset; \ - do { \ - assert(pte); \ - ppn = *pte >> (RV_PG_SHIFT - 2) << RV_PG_SHIFT; \ - offset = level == 1 ? addr & MASK((RV_PG_SHIFT + 10)) \ - : addr & MASK(RV_PG_SHIFT); \ - } while (0) - /* The IO handler that operates when the Memory Management Unit (MMU) * is enabled during system emulation is responsible for managing * input/output operations. These callbacks are designed to implement diff --git a/src/system.h b/src/system.h new file mode 100644 index 00000000..9432b9bb --- /dev/null +++ b/src/system.h @@ -0,0 +1,134 @@ +/* + * rv32emu is freely redistributable under the MIT License. See the file + * "LICENSE" for information on usage and redistribution of this file. + */ + +#pragma once + +#if !RV32_HAS(SYSTEM) +#error "Do not manage to build this file unless you enable system support." +#endif + +#include "devices/plic.h" +#include "devices/uart.h" +#include "riscv_private.h" + +#if !RV32_HAS(ELF_LOADER) + +#define MMIO_R 1 +#define MMIO_W 0 + +enum SUPPORTED_MMIO { + MMIO_PLIC, + MMIO_UART, +}; + +/* clang-format off */ +#define MMIO_OP(io, rw) \ + switch(io){ \ + case MMIO_PLIC: \ + IIF(rw)( /* read */ \ + mmio_read_val = plic_read(PRIV(rv)->plic, addr & 0x3FFFFFF); \ + plic_update_interrupts(PRIV(rv)->plic); \ + return mmio_read_val; \ + , /* write */ \ + plic_write(PRIV(rv)->plic, addr & 0x3FFFFFF, val); \ + plic_update_interrupts(PRIV(rv)->plic); \ + return; \ + ) \ + break; \ + case MMIO_UART: \ + IIF(rw)( /* read */ \ + mmio_read_val = u8250_read(PRIV(rv)->uart, addr & 0xFFFFF); \ + emu_update_uart_interrupts(rv); \ + return mmio_read_val; \ + , /* write */ \ + u8250_write(PRIV(rv)->uart, addr & 0xFFFFF, val); \ + emu_update_uart_interrupts(rv); \ + return; \ + ) \ + break; \ + default: \ + fprintf(stderr, "unknown MMIO type %d\n", io); \ + break; \ + } +/* clang-format on */ + +#define MMIO_READ() \ + do { \ + uint32_t mmio_read_val; \ + if ((addr >> 28) == 0xF) { /* MMIO at 0xF_______ */ \ + /* 256 regions of 1MiB */ \ + switch ((addr >> 20) & MASK(8)) { \ + case 0x0: \ + case 0x2: /* PLIC (0 - 0x3F) */ \ + MMIO_OP(MMIO_PLIC, MMIO_R); \ + break; \ + case 0x40: /* UART */ \ + MMIO_OP(MMIO_UART, MMIO_R); \ + break; \ + default: \ + __UNREACHABLE; \ + break; \ + } \ + } \ + } while (0) + +#define MMIO_WRITE() \ + do { \ + if ((addr >> 28) == 0xF) { /* MMIO at 0xF_______ */ \ + /* 256 regions of 1MiB */ \ + switch ((addr >> 20) & MASK(8)) { \ + case 0x0: \ + case 0x2: /* PLIC (0 - 0x3F) */ \ + MMIO_OP(MMIO_PLIC, MMIO_W); \ + break; \ + case 0x40: /* UART */ \ + MMIO_OP(MMIO_UART, MMIO_W); \ + break; \ + default: \ + __UNREACHABLE; \ + break; \ + } \ + } \ + } while (0) + +void emu_update_uart_interrupts(riscv_t *rv); +#endif + +/* Walk through page tables and get the corresponding PTE by virtual address if + * exists + * @rv: RISC-V emulator + * @addr: virtual address + * @level: the level of which the PTE is located + * @return: NULL if a not found or fault else the corresponding PTE + */ +uint32_t *mmu_walk(riscv_t *rv, const uint32_t addr, uint32_t *level); + +/* Verify the PTE and generate corresponding faults if needed + * @op: the operation + * @rv: RISC-V emulator + * @pte: to be verified pte + * @addr: the corresponding virtual address to cause fault + * @return: false if a any fault is generated which caused by violating the + * access permission else true + */ +/* FIXME: handle access fault, addr out of range check */ +#define MMU_FAULT_CHECK_DECL(op) \ + bool mmu_##op##_fault_check(riscv_t *rv, uint32_t *pte, uint32_t addr, \ + uint32_t access_bits); + +MMU_FAULT_CHECK_DECL(ifetch); +MMU_FAULT_CHECK_DECL(read); +MMU_FAULT_CHECK_DECL(write); + +uint32_t *mmu_walk(riscv_t *rv, const uint32_t addr, uint32_t *level); + +#define get_ppn_and_offset() \ + uint32_t ppn; \ + uint32_t offset; \ + do { \ + ppn = *pte >> (RV_PG_SHIFT - 2) << RV_PG_SHIFT; \ + offset = level == 1 ? addr & MASK((RV_PG_SHIFT + 10)) \ + : addr & MASK(RV_PG_SHIFT); \ + } while (0) From 09fc94535f5b62231ffce1041a447744a1572b8e Mon Sep 17 00:00:00 2001 From: Vacantron Chen Date: Mon, 16 Dec 2024 14:31:08 +0800 Subject: [PATCH 3/3] Support just-in-time (JIT) with system simulation This commit introduces "satp" field to the block structure in JIT mode to ensure the block cache is replaced correctly. The MOP fusion and T2C are disabled temporarily. Use the following commands to boot Linux Kernel: $ make ENABLE_SYSTEM=1 ENABLE_MOP_FUSION=0 ENABLE_JIT=1 ENABLE_T2C=0 $ ./build/rv32emu -k -i -b --- Makefile | 2 +- src/decode.h | 3 + src/emulate.c | 71 ++++++-- src/jit.c | 335 ++++++++++++++++++++++++++++++++---- src/jit.h | 6 + src/riscv.c | 6 +- src/riscv_private.h | 15 ++ src/rv32_jit.c | 410 ++++++++++++++++++++++++++++++++++++++------ src/rv32_template.c | 192 +++++++++++++-------- src/utils.c | 28 ++- src/utils.h | 31 ++++ 11 files changed, 918 insertions(+), 181 deletions(-) diff --git a/Makefile b/Makefile index f73075ff..0798d9f6 100644 --- a/Makefile +++ b/Makefile @@ -188,7 +188,7 @@ ifeq ($(call has, JIT), 1) ifeq ("$(CHECK_LLVM_LIBS)", "0") OBJS_EXT += t2c.o CFLAGS += -g $(shell $(LLVM_CONFIG) --cflags) - LDFLAGS += $(shell $(LLVM_CONFIG) --libs) + LDFLAGS += $(shell $(LLVM_CONFIG) --libfiles) else $(error No llvm-config-18 installed. Check llvm-config-18 installation in advance, or use "ENABLE_T2C=0" to disable tier-2 LLVM compiler) endif diff --git a/src/decode.h b/src/decode.h index e2b2984c..edb88f42 100644 --- a/src/decode.h +++ b/src/decode.h @@ -288,6 +288,9 @@ typedef struct { struct rv_insn *target[HISTORY_SIZE]; #else uint32_t times[HISTORY_SIZE]; +#if RV32_HAS(SYSTEM) + uint32_t satp[HISTORY_SIZE]; +#endif #endif } branch_history_table_t; diff --git a/src/emulate.c b/src/emulate.c index 293ce031..e706925c 100644 --- a/src/emulate.c +++ b/src/emulate.c @@ -42,7 +42,9 @@ extern struct target_ops gdbstub_ops; #define IF_imm(i, v) (i->imm == v) #if RV32_HAS(SYSTEM) +#if !RV32_HAS(JIT) static bool need_clear_block_map = false; +#endif static uint32_t reloc_enable_mmu_jalr_addr; static bool reloc_enable_mmu = false; bool need_retranslate = false; @@ -704,6 +706,7 @@ static inline void remove_next_nth_ir(const riscv_t *rv, * Strategies are being devised to increase the number of instructions that * match the pattern, including possible instruction reordering. */ +#if RV32_HAS(MOP_FUSION) static void match_pattern(riscv_t *rv, block_t *block) { uint32_t i; @@ -795,7 +798,7 @@ static void match_pattern(riscv_t *rv, block_t *block) } } } - +#endif typedef struct { bool is_constant[N_RV_REGS]; uint32_t const_val[N_RV_REGS]; @@ -838,12 +841,11 @@ static block_t *block_find_or_translate(riscv_t *rv) block_t *next_blk = block_find(map, rv->PC); #else /* lookup the next block in the block cache */ - /* - * The function "cache_get()" gets the cached block by the given "key (PC)". - * In system simulation, the returned block might be dropped because it is - * not the one from the current process (by checking SATP CSR register). - */ block_t *next_blk = (block_t *) cache_get(rv->block_cache, rv->PC, true); +#if RV32_HAS(SYSTEM) + if (next_blk && next_blk->satp != rv->csr_satp) + next_blk = NULL; +#endif #endif if (next_blk) @@ -861,12 +863,20 @@ static block_t *block_find_or_translate(riscv_t *rv) block_translate(rv, next_blk); +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + /* + * may be an ifetch fault which changes satp, Do not do this + * in "block_alloc() + */ + next_blk->satp = rv->csr_satp; +#endif + optimize_constant(rv, next_blk); + #if RV32_HAS(GDBSTUB) if (likely(!rv->debug_mode)) #endif -#if RV32_HAS(MOP_FUSION) - /* macro operation fusion */ +#if RV32_HAS(GDBSTUB) || RV32_HAS(MOP_FUSION) match_pattern(rv, next_blk); #endif @@ -890,8 +900,6 @@ static block_t *block_find_or_translate(riscv_t *rv) return next_blk; } - list_del_init(&replaced_blk->list); - if (prev == replaced_blk) prev = NULL; @@ -910,6 +918,32 @@ static block_t *block_find_or_translate(riscv_t *rv) if (untaken == replaced_blk_entry) { entry->ir_tail->branch_untaken = NULL; } + + /* upadte JALR LUT */ + if (!entry->ir_tail->branch_table) + continue; + +#if 0 + /* + * This branch lookup updating is unused since we get the PC from it and + * use function "cache_get()" achieve the branch prediction of T1C. + * However, if the structure "branch_table_t" is going to reference the + * block directly, this updating is nacessary to avoid to use the freed + * blocks. + */ + for (int i = 0; i < HISTORY_SIZE; i++) { + if (entry->ir_tail->branch_table->PC[i] == replaced_blk->pc_start) { + IIF(RV32_HAS(SYSTEM)) + (if (entry->ir_tail->branch_table->satp[i] == + replaced_blk->satp), ) + { + entry->ir_tail->branch_table->PC[i] = + entry->ir_tail->branch_table->satp[i] = + entry->ir_tail->branch_table->times[i] = 0; + } + } + } +#endif } /* free IRs in replaced block */ @@ -923,6 +957,7 @@ static block_t *block_find_or_translate(riscv_t *rv) mpool_free(rv->block_ir_mp, ir); } + list_del_init(&replaced_blk->list); mpool_free(rv->block_mp, replaced_blk); #if RV32_HAS(T2C) pthread_mutex_unlock(&rv->cache_lock); @@ -941,6 +976,10 @@ static bool runtime_profiler(riscv_t *rv, block_t *block) * we posit that our profiler could effectively identify hotspots using * three key indicators. */ +#if RV32_HAS(SYSTEM) + if (block->satp != rv->csr_satp) + return false; +#endif uint32_t freq = cache_freq(rv->block_cache, block->pc_start); /* To profile a block after chaining, it must first be executed. */ if (unlikely(freq >= 2 && block->has_loops)) @@ -1022,15 +1061,21 @@ void rv_step(void *arg) block_t *block = block_find_or_translate(rv); /* by now, a block should be available */ assert(block); +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + assert(block->satp == rv->csr_satp); +#endif /* After emulating the previous block, it is determined whether the * branch is taken or not. The IR array of the current block is then * assigned to either the branch_taken or branch_untaken pointer of * the previous block. */ - #if RV32_HAS(BLOCK_CHAINING) - if (prev) { + if (prev +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + && prev->satp == rv->csr_satp +#endif + ) { rv_insn_t *last_ir = prev->ir_tail; /* chain block */ if (!insn_is_unconditional_branch(last_ir->opcode)) { @@ -1048,7 +1093,7 @@ void rv_step(void *arg) #endif last_pc = rv->PC; #if RV32_HAS(JIT) -#if RV32_HAS(T2C) +#if RV32_HAS(T2C) && !RV32_HAS(SYSTEM) /* executed through the tier-2 JIT compiler */ if (block->hot2) { ((exec_t2c_func_t) block->func)(rv); diff --git a/src/jit.c b/src/jit.c index da5eb079..fa23b5e2 100644 --- a/src/jit.c +++ b/src/jit.c @@ -45,6 +45,9 @@ #include "riscv.h" #include "riscv_private.h" #include "utils.h" +#if RV32_HAS(SYSTEM) +#include "system.h" +#endif #define JIT_CLS_MASK 0x07 #define JIT_ALU_OP_MASK 0xf0 @@ -267,12 +270,15 @@ static inline void set_dirty(int reg_idx, bool is_dirty) } } -static inline void offset_map_insert(struct jit_state *state, int32_t target_pc) +static inline void offset_map_insert(struct jit_state *state, block_t *block) { struct offset_map *map_entry = &state->offset_map[state->n_blocks++]; - assert(state->n_blocks < MAX_BLOCKS); - map_entry->pc = target_pc; + assert(state->n_blocks <= MAX_BLOCKS); + map_entry->pc = block->pc_start; map_entry->offset = state->offset; +#if RV32_HAS(SYSTEM) + map_entry->satp = block->satp; +#endif } #if !defined(__APPLE__) @@ -287,6 +293,10 @@ static void emit_bytes(struct jit_state *state, void *data, uint32_t len) should_flush = true; return; } + if (unlikely(state->n_blocks == MAX_BLOCKS)) { + should_flush = true; + return; + } #if defined(__APPLE__) && defined(__aarch64__) pthread_jit_write_protect_np(false); #endif @@ -330,11 +340,12 @@ static inline void emit_modrm_and_displacement(struct jit_state *state, int m, int32_t d) { + /* + * Do not use short encoding even if the offset is one byte value since the + * length of operation is undetermined. + */ if (d == 0 && (m & 7) != RBP) { emit_modrm(state, 0x00, r, m); - } else if ((int8_t) d == d) { - emit_modrm(state, 0x40, r, m); - emit1(state, d); } else { emit_modrm(state, 0x80, r, m); emit4(state, d); @@ -377,12 +388,16 @@ static inline void emit_pop(struct jit_state *state, int r) } static inline void emit_jump_target_address(struct jit_state *state, - int32_t target_pc) + int32_t target_pc, + uint32_t target_satp UNUSED) { struct jump *jump = &state->jumps[state->n_jumps++]; - assert(state->n_jumps < MAX_JUMPS); + assert(state->n_jumps <= MAX_JUMPS); jump->offset_loc = state->offset; jump->target_pc = target_pc; +#if RV32_HAS(SYSTEM) + jump->target_satp = target_satp; +#endif emit4(state, 0); } #elif defined(__aarch64__) @@ -591,7 +606,7 @@ static inline void emit_jump_target_offset(struct jit_state *state, uint32_t jump_state_offset) { struct jump *jump = &state->jumps[state->n_jumps++]; - assert(state->n_jumps < MAX_JUMPS); + assert(state->n_jumps <= MAX_JUMPS); jump->offset_loc = jump_loc; jump->target_offset = jump_state_offset; } @@ -968,17 +983,22 @@ static inline void emit_store(struct jit_state *state, set_dirty(src, false); } -static inline void emit_jmp(struct jit_state *state, uint32_t target_pc) +static inline void emit_jmp(struct jit_state *state, + uint32_t target_pc, + uint32_t target_satp) { #if defined(__x86_64__) emit1(state, 0xe9); - emit_jump_target_address(state, target_pc); + emit_jump_target_address(state, target_pc, target_satp); #elif defined(__aarch64__) struct jump *jump = &state->jumps[state->n_jumps++]; - assert(state->n_jumps < MAX_JUMPS); + assert(state->n_jumps <= MAX_JUMPS); jump->offset_loc = state->offset; jump->target_pc = target_pc; emit_a64(state, UBR_B); +#if RV32_HAS(SYSTEM) + jump->target_satp = target_satp; +#endif #endif } @@ -1017,7 +1037,7 @@ static inline void emit_exit(struct jit_state *state) emit_jump_target_offset(state, state->offset, state->exit_loc); emit4(state, 0); #elif defined(__aarch64__) - emit_jmp(state, TARGET_PC_EXIT); + emit_jmp(state, TARGET_PC_EXIT, 0); #endif } @@ -1233,6 +1253,211 @@ static void muldivmod(struct jit_state *state, } #endif /* RV32_HAS(EXT_M) */ +#if RV32_HAS(SYSTEM) +uint32_t jit_mmio_read_wrapper(riscv_t *rv, uint32_t addr) +{ + MMIO_READ(); + __UNREACHABLE; +} + +void jit_mmu_handler(riscv_t *rv, uint32_t vreg_idx) +{ + assert(vreg_idx < 32); + + uint32_t addr = rv->jit_mmu.vaddr; + + if (!rv->csr_satp) { + rv->jit_mmu.paddr = addr; + return; + } + + bool ok; + uint32_t level, *pte = mmu_walk(rv, addr, &level); + + if (rv->jit_mmu.type == rv_insn_sb || rv->jit_mmu.type == rv_insn_sh || + rv->jit_mmu.type == rv_insn_sw) + ok = mmu_write_fault_check(rv, pte, addr, PTE_W); + else + ok = mmu_read_fault_check(rv, pte, addr, PTE_R); + + if (unlikely(!ok)) + pte = mmu_walk(rv, addr, &level); + + get_ppn_and_offset(); + addr = ppn | offset; + + if (likely(addr < PRIV(rv)->mem->mem_size)) { + rv->jit_mmu.is_mmio = 0; + rv->jit_mmu.paddr = addr; + return; + } + + uint32_t val; + rv->jit_mmu.is_mmio = 1; + + switch (rv->jit_mmu.type) { + case rv_insn_sb: + val = rv->X[vreg_idx] & 0xff; + MMIO_WRITE(); + break; + case rv_insn_sh: + val = rv->X[vreg_idx] & 0xffff; + MMIO_WRITE(); + break; + case rv_insn_sw: + val = rv->X[vreg_idx]; + MMIO_WRITE(); + break; + case rv_insn_lb: + rv->X[vreg_idx] = (int8_t) jit_mmio_read_wrapper(rv, addr); + break; + case rv_insn_lh: + rv->X[vreg_idx] = (int16_t) jit_mmio_read_wrapper(rv, addr); + break; + case rv_insn_lw: + rv->X[vreg_idx] = jit_mmio_read_wrapper(rv, addr); + break; + case rv_insn_lbu: + rv->X[vreg_idx] = (uint8_t) jit_mmio_read_wrapper(rv, addr); + break; + case rv_insn_lhu: + rv->X[vreg_idx] = (uint16_t) jit_mmio_read_wrapper(rv, addr); + break; + default: + assert(NULL); + __UNREACHABLE; + } +} + +void emit_jit_mmu_handler(struct jit_state *state, uint8_t vreg_idx) +{ + assert(vreg_idx < 32); + +#if defined(__x86_64__) + /* push $rdi */ + emit1(state, 0xff); + emit_modrm(state, 0x3 << 6, 0x6, parameter_reg[0]); + + /* mov $vreg_idx, %rsi */ + emit1(state, 0xbe); + emit4(state, vreg_idx); + + /* call jit_mmu_handler */ + emit_load_imm(state, temp_reg, (uintptr_t) &jit_mmu_handler); + emit1(state, 0xff); + emit_modrm(state, 0x3 << 6, 0x2, temp_reg); + + /* pop rv to $rdi */ + emit1(state, 0x8f); + emit_modrm(state, 0x3 << 6, 0x0, parameter_reg[0]); +#elif defined(__aarch64__) + uint32_t insn; + + /* push rv into stack */ + insn = (0xf81f0fe << 4) | R0; + emit_a64(state, insn); + + emit_movewide_imm(state, false, R1, vreg_idx); + + /* blr jit_mmu_handler */ + emit_movewide_imm(state, true, temp_reg, (uintptr_t) &jit_mmu_handler); + insn = (0xd63f << 16) | (temp_reg << 5); + emit_a64(state, insn); + + /* pop from stack */ + insn = (0xf84107e << 4) | R0; + emit_a64(state, insn); +#endif +} + +void emit_jit_mmio_escape_load(struct jit_state *state) +{ +#if defined(__x86_64__) + /* JE */ + emit1(state, 0x0f); + emit1(state, 0x84); + + /* pre-calculated jump offset */ + emit4(state, 0xb); + return; +#elif defined(__aarch64__) + /* b.eq (3 instructions) */ + emit_a64(state, (0x54 << 24) | (0x3 << 5)); +#endif +} + +void emit_jit_mmio_escape_load_end(struct jit_state *state, + int rv_insn_type UNUSED) +{ +#if defined(__x86_64__) + /* JMP */ + emit1(state, 0xe9); + + /* pre-calculated jump offset */ + switch (rv_insn_type) { + case rv_insn_sb: + case rv_insn_sh: + emit4(state, 0x1c); + return; + case rv_insn_sw: + emit4(state, 0x1b); + return; + case rv_insn_lb: + case rv_insn_lh: + case rv_insn_lbu: + case rv_insn_lhu: + emit4(state, 0x16); + return; + case rv_insn_lw: + emit4(state, 0x15); + return; + default: + assert(NULL); + __UNREACHABLE; + } +#elif defined(__aarch64__) + /* b (5 instructions) */ + emit_a64(state, (0x54 << 24) | (0x6 << 5) | 0xe); +#endif +} + +void emit_jit_mmio_escape_store(struct jit_state *state, + int rv_insn_type UNUSED) +{ +#if defined(__x86_64__) + /* JE */ + emit1(state, 0x0f); + emit1(state, 0x84); + + /* pre-calculated jump offset */ + switch (rv_insn_type) { + case rv_insn_sb: + case rv_insn_sh: + emit4(state, 0x1c); + return; + case rv_insn_sw: + emit4(state, 0x1b); + return; + case rv_insn_lb: + case rv_insn_lh: + case rv_insn_lbu: + case rv_insn_lhu: + emit4(state, 0x16); + return; + case rv_insn_lw: + emit4(state, 0x15); + return; + default: + assert(NULL); + __UNREACHABLE; + } +#elif defined(__aarch64__) + /* b.eq (7 instructions) */ + emit_a64(state, (0x54 << 24) | (0x7 << 5)); +#endif +} +#endif + static void prepare_translate(struct jit_state *state) { #if defined(__x86_64__) @@ -1734,7 +1959,9 @@ static void ra_load2_sext(struct jit_state *state, } } -void parse_branch_history_table(struct jit_state *state, rv_insn_t *ir) +void parse_branch_history_table(struct jit_state *state, + riscv_t *rv UNUSED, + rv_insn_t *ir) { int max_idx = 0; branch_history_table_t *bt = ir->branch_table; @@ -1745,14 +1972,21 @@ void parse_branch_history_table(struct jit_state *state, rv_insn_t *ir) max_idx = i; } if (bt->PC[max_idx] && bt->times[max_idx] >= IN_JUMP_THRESHOLD) { - save_reg(state, 0); - unmap_vm_reg(0); - emit_load_imm(state, register_map[0].reg_idx, bt->PC[max_idx]); - emit_cmp32(state, temp_reg, register_map[0].reg_idx); - uint32_t jump_loc = state->offset; - emit_jcc_offset(state, 0x85); - emit_jmp(state, bt->PC[max_idx]); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + IIF(RV32_HAS(SYSTEM))(if (bt->satp[max_idx] == rv->csr_satp), ) + { + save_reg(state, 0); + unmap_vm_reg(0); + emit_load_imm(state, register_map[0].reg_idx, bt->PC[max_idx]); + emit_cmp32(state, temp_reg, register_map[0].reg_idx); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x85); +#if RV32_HAS(SYSTEM) + emit_jmp(state, bt->PC[max_idx], bt->satp[max_idx]); +#else + emit_jmp(state, bt->PC[max_idx], 0); +#endif + emit_jump_target_offset(state, JUMP_LOC, state->offset); + } } } @@ -1914,8 +2148,12 @@ static void resolve_jumps(struct jit_state *state) target_loc = jump.offset_loc + sizeof(uint32_t); for (int i = 0; i < state->n_blocks; i++) { if (jump.target_pc == state->offset_map[i].pc) { - target_loc = state->offset_map[i].offset; - break; + IIF(RV32_HAS(SYSTEM)) + (if (jump.target_satp == state->offset_map[i].satp), ) + { + target_loc = state->offset_map[i].offset; + break; + } } } } @@ -1936,11 +2174,14 @@ static void translate_chained_block(struct jit_state *state, riscv_t *rv, block_t *block) { - if (set_has(&state->set, block->pc_start)) + if (set_has(&state->set, COMPOSED_KEY(block))) return; - set_add(&state->set, block->pc_start); - offset_map_insert(state, block->pc_start); + if (state->n_blocks == MAX_BLOCKS) + return; + + assert(set_add(&state->set, COMPOSED_KEY(block))); + offset_map_insert(state, block); translate(state, rv, block); if (unlikely(should_flush)) return; @@ -1948,15 +2189,22 @@ static void translate_chained_block(struct jit_state *state, if (ir->branch_untaken && !set_has(&state->set, ir->branch_untaken->pc)) { block_t *block1 = cache_get(rv->block_cache, ir->branch_untaken->pc, false); - if (block1->translatable) - translate_chained_block(state, rv, block1); + if (block1->translatable) { + IIF(RV32_HAS(SYSTEM)) + (if (block1->satp == rv->csr_satp), ) + translate_chained_block(state, rv, block1); + } } if (ir->branch_taken && !set_has(&state->set, ir->branch_taken->pc)) { block_t *block1 = cache_get(rv->block_cache, ir->branch_taken->pc, false); - if (block1->translatable) - translate_chained_block(state, rv, block1); + if (block1->translatable) { + IIF(RV32_HAS(SYSTEM)) + (if (block1->satp == rv->csr_satp), ) + translate_chained_block(state, rv, block1); + } } + branch_history_table_t *bt = ir->branch_table; if (bt) { int max_idx = 0; @@ -1968,10 +2216,16 @@ static void translate_chained_block(struct jit_state *state, } if (bt->PC[max_idx] && bt->times[max_idx] >= IN_JUMP_THRESHOLD && !set_has(&state->set, bt->PC[max_idx])) { - block_t *block1 = - cache_get(rv->block_cache, bt->PC[max_idx], false); - if (block1 && block1->translatable) - translate_chained_block(state, rv, block1); + IIF(RV32_HAS(SYSTEM))(if (bt->satp[max_idx] == rv->csr_satp), ) + { + block_t *block1 = + cache_get(rv->block_cache, bt->PC[max_idx], false); + if (block1 && block1->translatable) { + IIF(RV32_HAS(SYSTEM)) + (if (block1->satp == rv->csr_satp), ) + translate_chained_block(state, rv, block1); + } + } } } } @@ -1979,18 +2233,23 @@ static void translate_chained_block(struct jit_state *state, void jit_translate(riscv_t *rv, block_t *block) { struct jit_state *state = rv->jit_state; - if (set_has(&state->set, block->pc_start)) { + if (set_has(&state->set, COMPOSED_KEY(block))) { for (int i = 0; i < state->n_blocks; i++) { - if (block->pc_start == state->offset_map[i].pc) { + if (block->pc_start == state->offset_map[i].pc +#if RV32_HAS(SYSTEM) + && block->satp == state->offset_map[i].satp +#endif + ) { block->offset = state->offset_map[i].offset; block->hot = true; return; } } + assert(NULL); __UNREACHABLE; } restart: - memset(state->jumps, 0, 1024 * sizeof(struct jump)); + memset(state->jumps, 0, MAX_JUMPS * sizeof(struct jump)); state->n_jumps = 0; block->offset = state->offset; translate_chained_block(state, rv, block); diff --git a/src/jit.h b/src/jit.h index 3967a1df..4bbafa2f 100644 --- a/src/jit.h +++ b/src/jit.h @@ -14,11 +14,17 @@ struct jump { uint32_t offset_loc; uint32_t target_pc; uint32_t target_offset; +#if RV32_HAS(SYSTEM) + uint32_t target_satp; +#endif }; struct offset_map { uint32_t pc; uint32_t offset; +#if RV32_HAS(SYSTEM) + uint32_t satp; +#endif }; struct jit_state { diff --git a/src/riscv.c b/src/riscv.c index a06a2f33..8e1d103b 100644 --- a/src/riscv.c +++ b/src/riscv.c @@ -463,7 +463,7 @@ riscv_t *rv_create(riscv_user_t rv_attr) rv->jit_state = jit_state_init(CODE_CACHE_SIZE); rv->block_cache = cache_create(BLOCK_MAP_CAPACITY_BITS); assert(rv->block_cache); -#if RV32_HAS(T2C) +#if RV32_HAS(T2C) && !RV32_HAS(SYSTEM) rv->quit = false; rv->jit_cache = jit_cache_init(); /* prepare wait queue. */ @@ -566,8 +566,10 @@ bool rv_has_halted(riscv_t *rv) void rv_delete(riscv_t *rv) { assert(rv); -#if !RV32_HAS(JIT) +#if !RV32_HAS(JIT) || (RV32_HAS(SYSTEM) && !RV32_HAS(ELF_LOADER)) vm_attr_t *attr = PRIV(rv); +#endif +#if !RV32_HAS(JIT) map_delete(attr->fd_map); memory_delete(attr->mem); block_map_destroy(rv); diff --git a/src/riscv_private.h b/src/riscv_private.h index 0ae6f279..684426dd 100644 --- a/src/riscv_private.h +++ b/src/riscv_private.h @@ -90,6 +90,9 @@ typedef struct block { bool translatable; /**< Determine the block has RV32AF insturctions or not */ bool has_loops; /**< Determine the block has loop or not */ +#if RV32_HAS(SYSTEM) + uint32_t satp; +#endif #if RV32_HAS(T2C) bool compiled; /**< The T2C request is enqueued or not */ #endif @@ -126,6 +129,18 @@ struct riscv_internal { riscv_word_t X[N_RV_REGS]; riscv_word_t PC; +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + /* + * Aarch encoder only accepts 9 bits signed offset. Do not put this + * structure to the bottom. + */ + struct { + uint32_t is_mmio; /* whether is MMIO or not */ + uint32_t type; /* 0: read, 1: write */ + uint32_t vaddr; + uint32_t paddr; + } jit_mmu; +#endif /* user provided data */ riscv_user_t data; diff --git a/src/rv32_jit.c b/src/rv32_jit.c index 4c1dad9f..50ec8c03 100644 --- a/src/rv32_jit.c +++ b/src/rv32_jit.c @@ -13,7 +13,7 @@ GEN(jal, { emit_load_imm(state, vm_reg[0], ir->pc + 4); } store_back(state); - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); @@ -28,7 +28,7 @@ GEN(jalr, { emit_load_imm(state, vm_reg[1], ir->pc + 4); } store_back(state); - parse_branch_history_table(state, ir); + parse_branch_history_table(state, rv, ir); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); }) @@ -39,14 +39,14 @@ GEN(beq, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x84); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -59,14 +59,14 @@ GEN(bne, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x85); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -79,14 +79,14 @@ GEN(blt, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x8c); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -99,14 +99,14 @@ GEN(bge, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x8d); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -119,14 +119,14 @@ GEN(bltu, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x82); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -139,14 +139,14 @@ GEN(bgeu, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x83); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -155,66 +155,366 @@ GEN(bgeu, { GEN(lb, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load_sext(state, S8, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lb); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from the memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + emit_jit_mmio_escape_load(state); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* jump over reguler loading */ + emit_jit_mmio_escape_load_end(state, rv_insn_lb); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load_sext(state, S8, temp_reg, vm_reg[1], 0); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_sext(state, S8, temp_reg, vm_reg[1], 0); + }) }) GEN(lh, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load_sext(state, S16, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lh); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from the memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + emit_jit_mmio_escape_load(state); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* jump over reguler loading */ + emit_jit_mmio_escape_load_end(state, rv_insn_lh); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load_sext(state, S16, temp_reg, vm_reg[1], 0); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_sext(state, S16, temp_reg, vm_reg[1], 0); + }) }) GEN(lw, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load(state, S32, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lw); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from the memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + emit_jit_mmio_escape_load(state); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* jump over reguler loading */ + emit_jit_mmio_escape_load_end(state, rv_insn_lw); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load(state, S32, temp_reg, vm_reg[1], 0); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S32, temp_reg, vm_reg[1], 0); + }) }) GEN(lbu, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load(state, S8, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lbu); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from the memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + emit_jit_mmio_escape_load(state); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* jump over reguler loading */ + emit_jit_mmio_escape_load_end(state, rv_insn_lbu); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load(state, S8, temp_reg, vm_reg[1], 0); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S8, temp_reg, vm_reg[1], 0); + }) }) GEN(lhu, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load(state, S16, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lhu); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from the memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + emit_jit_mmio_escape_load(state); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* jump over reguler loading */ + emit_jit_mmio_escape_load_end(state, rv_insn_lhu); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load(state, S16, temp_reg, vm_reg[1], 0); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S16, temp_reg, vm_reg[1], 0); + }) }) GEN(sb, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = ra_load(state, ir->rs2); - emit_store(state, S8, vm_reg[1], temp_reg, 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_sb); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + store_back(state); + emit_jit_mmu_handler(state, ir->rs2); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, it does not need to do the storing since it has + * been done in the mmio handler, otherwise, store the value into th + * memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 1); + emit_jit_mmio_escape_store(state, rv_insn_sb); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S8, vm_reg[1], temp_reg, 0); + reset_reg(); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S8, vm_reg[1], temp_reg, 0); + }) }) GEN(sh, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = ra_load(state, ir->rs2); - emit_store(state, S16, vm_reg[1], temp_reg, 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_sh); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + store_back(state); + emit_jit_mmu_handler(state, ir->rs2); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, it does not need to do the storing since it has + * been done in the mmio handler, otherwise, store the value into th + * memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 1); + emit_jit_mmio_escape_store(state, rv_insn_sh); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S16, vm_reg[1], temp_reg, 0); + reset_reg(); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S16, vm_reg[1], temp_reg, 0); + }) }) GEN(sw, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = ra_load(state, ir->rs2); - emit_store(state, S32, vm_reg[1], temp_reg, 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_sw); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + store_back(state); + emit_jit_mmu_handler(state, ir->rs2); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, it does not need to do the storing since it has + * been done in the mmio handler, otherwise, store the value into th + * memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 1); + emit_jit_mmio_escape_store(state, rv_insn_sw); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S32, vm_reg[1], temp_reg, 0); + reset_reg(); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S32, vm_reg[1], temp_reg, 0); + }) }) GEN(addi, { vm_reg[0] = ra_load(state, ir->rs1); @@ -388,7 +688,9 @@ GEN(ebreak, { }) GEN(wfi, { assert(NULL); }) GEN(uret, { assert(NULL); }) +#if RV32_HAS(SYSTEM) GEN(sret, { assert(NULL); }) +#endif GEN(hret, { assert(NULL); }) GEN(mret, { assert(NULL); }) GEN(sfencevma, { assert(NULL); }) @@ -539,7 +841,7 @@ GEN(cjal, { vm_reg[0] = map_vm_reg(state, rv_reg_ra); emit_load_imm(state, vm_reg[0], ir->pc + 2); store_back(state); - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); @@ -598,7 +900,7 @@ GEN(cand, { }) GEN(cj, { store_back(state); - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); @@ -610,14 +912,14 @@ GEN(cbeqz, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x84); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 2); + emit_jmp(state, ir->pc + 2, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 2); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -630,14 +932,14 @@ GEN(cbnez, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x85); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 2); + emit_jmp(state, ir->pc + 2, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 2); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -659,7 +961,7 @@ GEN(cjr, { vm_reg[0] = ra_load(state, ir->rs1); emit_mov(state, vm_reg[0], temp_reg); store_back(state); - parse_branch_history_table(state, ir); + parse_branch_history_table(state, rv, ir); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); }) @@ -685,7 +987,7 @@ GEN(cjalr, { vm_reg[1] = map_vm_reg(state, rv_reg_ra); emit_load_imm(state, vm_reg[1], ir->pc + 2); store_back(state); - parse_branch_history_table(state, ir); + parse_branch_history_table(state, rv, ir); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); }) diff --git a/src/rv32_template.c b/src/rv32_template.c index e0e41cf6..30d5b14e 100644 --- a/src/rv32_template.c +++ b/src/rv32_template.c @@ -167,11 +167,18 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)(if (!rv->is_trapped && !reloc_enable_mmu), )) + { + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } + } #endif #if RV32_HAS(SYSTEM) if (!rv->is_trapped) @@ -244,32 +251,45 @@ RVOP( } \ } #else -#define LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE() \ - block_t *block = cache_get(rv->block_cache, PC, true); \ - if (block) { \ - for (int i = 0; i < HISTORY_SIZE; i++) { \ - if (ir->branch_table->PC[i] == PC) { \ - ir->branch_table->times[i]++; \ - if (cache_hot(rv->block_cache, PC)) \ - goto end_op; \ - } \ - } \ - /* update branch history table */ \ - int min_idx = 0; \ - for (int i = 0; i < HISTORY_SIZE; i++) { \ - if (!ir->branch_table->times[i]) { \ - min_idx = i; \ - break; \ - } else if (ir->branch_table->times[min_idx] > \ - ir->branch_table->times[i]) { \ - min_idx = i; \ - } \ - } \ - ir->branch_table->times[min_idx] = 1; \ - ir->branch_table->PC[min_idx] = PC; \ - if (cache_hot(rv->block_cache, PC)) \ - goto end_op; \ - MUST_TAIL return block->ir_head->impl(rv, block->ir_head, cycle, PC); \ +#define LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE() \ + IIF(RV32_HAS(SYSTEM))(if (!rv->is_trapped && !reloc_enable_mmu), ) \ + { \ + block_t *block = cache_get(rv->block_cache, PC, true); \ + if (block) { \ + IIF(RV32_HAS(SYSTEM))(if (block->satp == rv->csr_satp), ) \ + { \ + for (int i = 0; i < HISTORY_SIZE; i++) { \ + if (ir->branch_table->PC[i] == PC) { \ + IIF(RV32_HAS(SYSTEM)) \ + (if (ir->branch_table->satp[i] == rv->csr_satp), ) \ + { \ + ir->branch_table->times[i]++; \ + if (cache_hot(rv->block_cache, PC)) \ + goto end_op; \ + } \ + } \ + } \ + /* update branch history table */ \ + int min_idx = 0; \ + for (int i = 0; i < HISTORY_SIZE; i++) { \ + if (!ir->branch_table->times[i]) { \ + min_idx = i; \ + break; \ + } else if (ir->branch_table->times[min_idx] > \ + ir->branch_table->times[i]) { \ + min_idx = i; \ + } \ + } \ + ir->branch_table->times[min_idx] = 1; \ + ir->branch_table->PC[min_idx] = PC; \ + IIF(RV32_HAS(SYSTEM)) \ + (ir->branch_table->satp[min_idx] = rv->csr_satp, ); \ + if (cache_hot(rv->block_cache, PC)) \ + goto end_op; \ + MUST_TAIL return block->ir_head->impl(rv, block->ir_head, \ + cycle, PC); \ + } \ + } \ } #endif @@ -359,11 +379,14 @@ RVOP( IIF(RV32_HAS(JIT)) \ ( \ { \ - cache_get(rv->block_cache, PC + 4, true); \ - if (!set_add(&pc_set, PC + 4)) \ - has_loops = true; \ - if (cache_hot(rv->block_cache, PC + 4)) \ - goto nextop; \ + block_t *next = cache_get(rv->block_cache, PC + 4, true); \ + if (next IIF(RV32_HAS(SYSTEM))( \ + &&next->satp == rv->csr_satp, )) { \ + if (!set_add(&pc_set, PC + 4)) \ + has_loops = true; \ + if (cache_hot(rv->block_cache, PC + 4)) \ + goto nextop; \ + } \ }, ); \ PC += 4; \ IIF(RV32_HAS(SYSTEM)) \ @@ -393,11 +416,14 @@ RVOP( IIF(RV32_HAS(JIT)) \ ( \ { \ - cache_get(rv->block_cache, PC, true); \ - if (!set_add(&pc_set, PC)) \ - has_loops = true; \ - if (cache_hot(rv->block_cache, PC)) \ - goto end_op; \ + block_t *next = cache_get(rv->block_cache, PC, true); \ + if (next IIF(RV32_HAS(SYSTEM))( \ + &&next->satp == rv->csr_satp, )) { \ + if (!set_add(&pc_set, PC)) \ + has_loops = true; \ + if (cache_hot(rv->block_cache, PC)) \ + goto end_op; \ + } \ }, ); \ IIF(RV32_HAS(SYSTEM)) \ ( \ @@ -2079,11 +2105,15 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } #endif #if RV32_HAS(SYSTEM) @@ -2246,11 +2276,15 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } #endif #if RV32_HAS(SYSTEM) if (!rv->is_trapped) @@ -2284,11 +2318,15 @@ RVOP( if (!untaken) goto nextop; #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC + 2, true); - if (!set_add(&pc_set, PC + 2)) - has_loops = true; - if (cache_hot(rv->block_cache, PC + 2)) - goto nextop; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC + 2, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC + 2)) + has_loops = true; + if (cache_hot(rv->block_cache, PC + 2)) + goto nextop; + } #endif PC += 2; #if RV32_HAS(SYSTEM) @@ -2306,11 +2344,15 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } #endif #if RV32_HAS(SYSTEM) if (!rv->is_trapped) @@ -2353,11 +2395,15 @@ RVOP( if (!untaken) goto nextop; #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC + 2, true); - if (!set_add(&pc_set, PC + 2)) - has_loops = true; - if (cache_hot(rv->block_cache, PC + 2)) - goto nextop; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC + 2, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC + 2)) + has_loops = true; + if (cache_hot(rv->block_cache, PC + 2)) + goto nextop; + } #endif PC += 2; #if RV32_HAS(SYSTEM) @@ -2375,11 +2421,15 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } #endif #if RV32_HAS(SYSTEM) if (!rv->is_trapped) diff --git a/src/utils.c b/src/utils.c index 3199235d..13696855 100644 --- a/src/utils.c +++ b/src/utils.c @@ -3,6 +3,7 @@ * "LICENSE" for information on usage and redistribution of this file. */ +#include #include #include #include @@ -174,7 +175,11 @@ char *sanitize_path(const char *input) return ret; } +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) +HASH_FUNC_IMPL_64(set_hash_64, SET_SIZE_BITS, 1 << SET_SIZE_BITS); +#else HASH_FUNC_IMPL(set_hash, SET_SIZE_BITS, 1 << SET_SIZE_BITS); +#endif void set_reset(set_t *set) { @@ -186,15 +191,25 @@ void set_reset(set_t *set) * @set: a pointer points to target set * @key: the key of the inserted entry */ +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) +bool set_add(set_t *set, uint64_t key) +#else bool set_add(set_t *set, uint32_t key) +#endif { +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) + const uint64_t index = set_hash_64(key); +#else const uint32_t index = set_hash(key); +#endif + uint8_t count = 0; - while (set->table[index][count]) { + while (count < SET_SLOTS_SIZE && set->table[index][count]) { if (set->table[index][count++] == key) return false; } + assert(count < SET_SLOTS_SIZE); set->table[index][count] = key; return true; } @@ -204,10 +219,19 @@ bool set_add(set_t *set, uint32_t key) * @set: a pointer points to target set * @key: the key of the inserted entry */ +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) +bool set_has(set_t *set, uint64_t key) +#else bool set_has(set_t *set, uint32_t key) +#endif { +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) + const uint64_t index = set_hash_64(key); +#else const uint32_t index = set_hash(key); - for (uint8_t count = 0; set->table[index][count]; count++) { +#endif + for (uint8_t count = 0; count < SET_SLOTS_SIZE && set->table[index][count]; + count++) { if (set->table[index][count] == key) return true; } diff --git a/src/utils.h b/src/utils.h index 78f68985..e6f42228 100644 --- a/src/utils.h +++ b/src/utils.h @@ -24,6 +24,14 @@ void rv_clock_gettime(struct timespec *tp); return (val * 0x61C88647 >> (32 - size_bits)) & ((size) - (1)); \ } +#define HASH_FUNC_IMPL_64(name, size_bits, size) \ + FORCE_INLINE uint64_t name(uint64_t val) \ + { \ + /* 0x61c8864680b583eb is 64-bit golden ratio */ \ + return (val * 0x61c8864680b583ebull >> (64 - size_bits)) & \ + ((size) - (1)); \ + } + /* sanitize_path returns the shortest path name equivalent to path * by purely lexical processing. It applies the following rules * iteratively until no further processing can be done: @@ -133,11 +141,26 @@ static inline void list_del_init(struct list_head *node) #define SET_SIZE (1 << SET_SIZE_BITS) #define SET_SLOTS_SIZE 32 +/* + * Use composed key in JIT system simulation. The higher 32 bits stores the + * value of supervisor address translation and protection (SATP) register, + * and the lower 32 bits stores the program counter (PC) as same as userspace + * simulation. + */ +#define COMPOSED_KEY(block) \ + IIF(RV32_HAS(SYSTEM)) \ + (((((uint64_t) block->satp) << 32) | (uint64_t) block->pc_start), \ + (uint32_t) block->pc_start) + /* The set consists of SET_SIZE buckets, with each bucket containing * SET_SLOTS_SIZE slots. */ typedef struct { +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) + uint64_t table[SET_SIZE][SET_SLOTS_SIZE]; +#else uint32_t table[SET_SIZE][SET_SLOTS_SIZE]; +#endif } set_t; /** @@ -151,11 +174,19 @@ void set_reset(set_t *set); * @set: a pointer points to target set * @key: the key of the inserted entry */ +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) +bool set_add(set_t *set, uint64_t key); +#else bool set_add(set_t *set, uint32_t key); +#endif /** * set_has - check whether the element exist in the set or not * @set: a pointer points to target set * @key: the key of the inserted entry */ +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) +bool set_has(set_t *set, uint64_t key); +#else bool set_has(set_t *set, uint32_t key); +#endif