Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow JIT compilation for system emulation #521

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,3 @@ tests/arch-test-target/config.ini
tests/arch-test-target/sail_cSim/riscv_sim_RV32
tests/scimark2/
__pycache__/
src/rv32_jit.c
vacantron marked this conversation as resolved.
Show resolved Hide resolved
14 changes: 7 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ CFLAGS = -std=gnu99 -O2 -Wall -Wextra
CFLAGS += -Wno-unused-label
CFLAGS += -include src/common.h -Isrc/

OBJS_EXT :=

# In the system test suite, the executable is an ELF file (e.g., MMU).
# However, the Linux kernel emulation includes the Image, DT, and
# root filesystem (rootfs). Therefore, the test suite needs this
Expand All @@ -29,6 +31,9 @@ $(call set-feature, BLOCK_CHAINING)
# Enable system emulation
ENABLE_SYSTEM ?= 0
$(call set-feature, SYSTEM)
ifeq ($(call has, SYSTEM), 1)
OBJS_EXT += system.o
endif

# Enable link-time optimization (LTO)
ENABLE_LTO ?= 1
Expand Down Expand Up @@ -59,8 +64,6 @@ endif
# Disable Intel's Control-flow Enforcement Technology (CET)
CFLAGS += $(CFLAGS_NO_CET)

OBJS_EXT :=

# Integer Multiplication and Division instructions
ENABLE_EXT_M ?= 1
$(call set-feature, EXT_M)
Expand Down Expand Up @@ -185,7 +188,7 @@ ifeq ($(call has, JIT), 1)
ifeq ("$(CHECK_LLVM_LIBS)", "0")
OBJS_EXT += t2c.o
CFLAGS += -g $(shell $(LLVM_CONFIG) --cflags)
LDFLAGS += $(shell $(LLVM_CONFIG) --libs)
LDFLAGS += $(shell $(LLVM_CONFIG) --libfiles)
else
$(error No llvm-config-18 installed. Check llvm-config-18 installation in advance, or use "ENABLE_T2C=0" to disable tier-2 LLVM compiler)
endif
Expand All @@ -194,9 +197,6 @@ ifeq ($(call has, JIT), 1)
$(error JIT mode only supports for x64 and arm64 target currently.)
endif

src/rv32_jit.c:
$(Q)tools/gen-jit-template.py $(CFLAGS) > $@

$(OUT)/jit.o: src/jit.c src/rv32_jit.c
$(VECHO) " CC\t$@\n"
$(Q)$(CC) -o $@ $(CFLAGS) -c -MMD -MF $@.d $<
Expand Down Expand Up @@ -353,7 +353,7 @@ endif
endif

clean:
$(RM) $(BIN) $(OBJS) $(DEV_OBJS) $(BUILD_DTB) $(HIST_BIN) $(HIST_OBJS) $(deps) $(WEB_FILES) $(CACHE_OUT) src/rv32_jit.c
$(RM) $(BIN) $(OBJS) $(DEV_OBJS) $(BUILD_DTB) $(HIST_BIN) $(HIST_OBJS) $(deps) $(WEB_FILES) $(CACHE_OUT)
distclean: clean
-$(RM) $(DOOM_DATA) $(QUAKE_DATA) $(BUILDROOT_DATA) $(LINUX_DATA)
$(RM) -r $(OUT)/linux-image
Expand Down
3 changes: 3 additions & 0 deletions src/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,9 @@ typedef struct {
struct rv_insn *target[HISTORY_SIZE];
#else
uint32_t times[HISTORY_SIZE];
#if RV32_HAS(SYSTEM)
uint32_t satp[HISTORY_SIZE];
#endif
#endif
} branch_history_table_t;

Expand Down
71 changes: 58 additions & 13 deletions src/emulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ extern struct target_ops gdbstub_ops;
#define IF_imm(i, v) (i->imm == v)

#if RV32_HAS(SYSTEM)
#if !RV32_HAS(JIT)
static bool need_clear_block_map = false;
#endif
static uint32_t reloc_enable_mmu_jalr_addr;
static bool reloc_enable_mmu = false;
bool need_retranslate = false;
Expand Down Expand Up @@ -704,6 +706,7 @@ static inline void remove_next_nth_ir(const riscv_t *rv,
* Strategies are being devised to increase the number of instructions that
* match the pattern, including possible instruction reordering.
*/
#if RV32_HAS(MOP_FUSION)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wrap around the comment as well.

static void match_pattern(riscv_t *rv, block_t *block)
{
uint32_t i;
Expand Down Expand Up @@ -795,7 +798,7 @@ static void match_pattern(riscv_t *rv, block_t *block)
}
}
}

#endif
typedef struct {
bool is_constant[N_RV_REGS];
uint32_t const_val[N_RV_REGS];
Expand Down Expand Up @@ -838,12 +841,11 @@ static block_t *block_find_or_translate(riscv_t *rv)
block_t *next_blk = block_find(map, rv->PC);
#else
/* lookup the next block in the block cache */
/*
* The function "cache_get()" gets the cached block by the given "key (PC)".
* In system simulation, the returned block might be dropped because it is
* not the one from the current process (by checking SATP CSR register).
*/
block_t *next_blk = (block_t *) cache_get(rv->block_cache, rv->PC, true);
#if RV32_HAS(SYSTEM)
if (next_blk && next_blk->satp != rv->csr_satp)
next_blk = NULL;
#endif
Comment on lines +845 to +848
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add comments for SATP.

#endif

if (next_blk)
Expand All @@ -861,12 +863,20 @@ static block_t *block_find_or_translate(riscv_t *rv)

block_translate(rv, next_blk);

#if RV32_HAS(JIT) && RV32_HAS(SYSTEM)
/*
* may be an ifetch fault which changes satp, Do not do this
* in "block_alloc()
*/
next_blk->satp = rv->csr_satp;
#endif

optimize_constant(rv, next_blk);

#if RV32_HAS(GDBSTUB)
if (likely(!rv->debug_mode))
#endif
#if RV32_HAS(MOP_FUSION)
/* macro operation fusion */
#if RV32_HAS(GDBSTUB) || RV32_HAS(MOP_FUSION)
match_pattern(rv, next_blk);
#endif

Expand All @@ -890,8 +900,6 @@ static block_t *block_find_or_translate(riscv_t *rv)
return next_blk;
}

list_del_init(&replaced_blk->list);

if (prev == replaced_blk)
prev = NULL;

Expand All @@ -910,6 +918,32 @@ static block_t *block_find_or_translate(riscv_t *rv)
if (untaken == replaced_blk_entry) {
entry->ir_tail->branch_untaken = NULL;
}

/* upadte JALR LUT */
if (!entry->ir_tail->branch_table)
continue;

#if 0
/*
* This branch lookup updating is unused since we get the PC from it and
* use function "cache_get()" achieve the branch prediction of T1C.
* However, if the structure "branch_table_t" is going to reference the
* block directly, this updating is nacessary to avoid to use the freed
* blocks.
*/
Comment on lines +926 to +933
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't get it. When is it safe to release the resources?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As the comments above, the current workflow is safe. However, if the implementation of branch_table is changed (e.g. storing the possible blocks directly instead of the program counters), this updating cannot be ignored.

for (int i = 0; i < HISTORY_SIZE; i++) {
if (entry->ir_tail->branch_table->PC[i] == replaced_blk->pc_start) {
IIF(RV32_HAS(SYSTEM))
(if (entry->ir_tail->branch_table->satp[i] ==
replaced_blk->satp), )
{
entry->ir_tail->branch_table->PC[i] =
entry->ir_tail->branch_table->satp[i] =
entry->ir_tail->branch_table->times[i] = 0;
}
}
}
#endif
}

/* free IRs in replaced block */
Expand All @@ -923,6 +957,7 @@ static block_t *block_find_or_translate(riscv_t *rv)
mpool_free(rv->block_ir_mp, ir);
}

list_del_init(&replaced_blk->list);
mpool_free(rv->block_mp, replaced_blk);
#if RV32_HAS(T2C)
pthread_mutex_unlock(&rv->cache_lock);
Expand All @@ -941,6 +976,10 @@ static bool runtime_profiler(riscv_t *rv, block_t *block)
* we posit that our profiler could effectively identify hotspots using
* three key indicators.
*/
#if RV32_HAS(SYSTEM)
if (block->satp != rv->csr_satp)
return false;
#endif
uint32_t freq = cache_freq(rv->block_cache, block->pc_start);
/* To profile a block after chaining, it must first be executed. */
if (unlikely(freq >= 2 && block->has_loops))
Expand Down Expand Up @@ -1022,15 +1061,21 @@ void rv_step(void *arg)
block_t *block = block_find_or_translate(rv);
/* by now, a block should be available */
assert(block);
#if RV32_HAS(JIT) && RV32_HAS(SYSTEM)
assert(block->satp == rv->csr_satp);
#endif

/* After emulating the previous block, it is determined whether the
* branch is taken or not. The IR array of the current block is then
* assigned to either the branch_taken or branch_untaken pointer of
* the previous block.
*/

#if RV32_HAS(BLOCK_CHAINING)
if (prev) {
if (prev
#if RV32_HAS(JIT) && RV32_HAS(SYSTEM)
&& prev->satp == rv->csr_satp
#endif
) {
rv_insn_t *last_ir = prev->ir_tail;
/* chain block */
if (!insn_is_unconditional_branch(last_ir->opcode)) {
Expand All @@ -1048,7 +1093,7 @@ void rv_step(void *arg)
#endif
last_pc = rv->PC;
#if RV32_HAS(JIT)
#if RV32_HAS(T2C)
#if RV32_HAS(T2C) && !RV32_HAS(SYSTEM)
/* executed through the tier-2 JIT compiler */
if (block->hot2) {
((exec_t2c_func_t) block->func)(rv);
Expand Down
Loading
Loading