From e466d418a938d5575da4109b0d5bf1cb37119ab3 Mon Sep 17 00:00:00 2001 From: Zoltan Herczeg Date: Wed, 3 Apr 2024 07:23:18 +0000 Subject: [PATCH] JIT compiler update --- .../allocator_src/sljitExecAllocatorApple.c | 6 +- .../allocator_src/sljitExecAllocatorCore.c | 19 +- src/sljit/sljitConfigInternal.h | 47 ++++- src/sljit/sljitLir.c | 67 ++++--- src/sljit/sljitLir.h | 25 ++- src/sljit/sljitNativeARM_32.c | 132 +++++++----- src/sljit/sljitNativeARM_64.c | 103 ++++++---- src/sljit/sljitNativeARM_T2_32.c | 98 +++++---- src/sljit/sljitNativeLOONGARCH_64.c | 189 +++++++++--------- src/sljit/sljitNativeMIPS_32.c | 6 +- src/sljit/sljitNativeMIPS_64.c | 6 +- src/sljit/sljitNativeMIPS_common.c | 118 ++++++----- src/sljit/sljitNativePPC_common.c | 94 +++++---- src/sljit/sljitNativeRISCV_common.c | 150 +++++++------- src/sljit/sljitNativeS390X.c | 55 +++-- src/sljit/sljitNativeX86_32.c | 99 +++++++-- src/sljit/sljitNativeX86_64.c | 42 +++- src/sljit/sljitNativeX86_common.c | 175 ++++++++-------- src/sljit/sljitSerialize.c | 20 +- 19 files changed, 885 insertions(+), 566 deletions(-) diff --git a/src/sljit/allocator_src/sljitExecAllocatorApple.c b/src/sljit/allocator_src/sljitExecAllocatorApple.c index 95b9842fa..9bd2094f4 100644 --- a/src/sljit/allocator_src/sljitExecAllocatorApple.c +++ b/src/sljit/allocator_src/sljitExecAllocatorApple.c @@ -41,9 +41,10 @@ #include #include -#define SLJIT_MAP_JIT (get_map_jit_flag()) #define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) +#ifdef MAP_JIT +#define SLJIT_MAP_JIT (get_map_jit_flag()) static SLJIT_INLINE int get_map_jit_flag(void) { size_t page_size; @@ -70,6 +71,9 @@ static SLJIT_INLINE int get_map_jit_flag(void) } return map_jit_flag; } +#else /* !defined(MAP_JIT) */ +#define SLJIT_MAP_JIT (0) +#endif #elif defined(SLJIT_CONFIG_ARM) && SLJIT_CONFIG_ARM diff --git a/src/sljit/allocator_src/sljitExecAllocatorCore.c b/src/sljit/allocator_src/sljitExecAllocatorCore.c index 85f3a9d1e..4e1119bc4 100644 --- a/src/sljit/allocator_src/sljitExecAllocatorCore.c +++ b/src/sljit/allocator_src/sljitExecAllocatorCore.c @@ -181,8 +181,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) header->executable_offset = free_block->header.executable_offset; #endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ AS_BLOCK_HEADER(header, size)->prev_size = size; - } - else { + } else { sljit_remove_free_block(free_block); header = (struct block_header*)free_block; size = chunk_size; @@ -230,26 +229,25 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) #endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ sljit_insert_free_block(free_block, chunk_size); next_header = AS_BLOCK_HEADER(free_block, chunk_size); - } - else { + } else { /* All space belongs to this allocation. */ allocated_size += chunk_size; header->size = chunk_size; next_header = AS_BLOCK_HEADER(header, chunk_size); } - SLJIT_ALLOCATOR_UNLOCK(); next_header->size = 1; next_header->prev_size = chunk_size; #ifdef SLJIT_HAS_EXECUTABLE_OFFSET next_header->executable_offset = executable_offset; #endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ + SLJIT_ALLOCATOR_UNLOCK(); return MEM_START(header); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void *ptr) { struct block_header *header; - struct free_block* free_block; + struct free_block *free_block; SLJIT_ALLOCATOR_LOCK(); header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header)); @@ -269,8 +267,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) free_block->size += header->size; header = AS_BLOCK_HEADER(free_block, free_block->size); header->prev_size = free_block->size; - } - else { + } else { free_block = (struct free_block*)header; sljit_insert_free_block(free_block, header->size); } @@ -323,8 +320,8 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) } #ifdef SLJIT_HAS_EXECUTABLE_OFFSET -SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr) +SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code) { - return ((struct block_header *)(ptr))[-1].executable_offset; + return ((struct block_header*)SLJIT_CODE_TO_PTR(code))[-1].executable_offset; } #endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ diff --git a/src/sljit/sljitConfigInternal.h b/src/sljit/sljitConfigInternal.h index f12505e9c..de06dd8e0 100644 --- a/src/sljit/sljitConfigInternal.h +++ b/src/sljit/sljitConfigInternal.h @@ -49,8 +49,8 @@ extern "C" { sljit_s16, sljit_u16 : signed and unsigned 16 bit integer type sljit_s32, sljit_u32 : signed and unsigned 32 bit integer type sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer - sljit_p : unsgined pointer value (usually the same as sljit_uw, but - some 64 bit ABIs may use 32 bit pointers) + sljit_sp, sljit_up : signed and unsigned pointer value (usually the same as + sljit_uw, but some 64 bit ABIs may use 32 bit pointers) sljit_f32 : 32 bit single precision floating point value sljit_f64 : 64 bit double precision floating point value @@ -98,6 +98,10 @@ extern "C" { SLJIT_TMP_R(i) : accessing temporary registers SLJIT_TMP_FR0 .. FR9 : accessing temporary floating point registers SLJIT_TMP_FR(i) : accessing temporary floating point registers + SLJIT_TMP_DEST_REG : a temporary register for results + SLJIT_TMP_MEM_REG : a temporary base register for accessing memory + (can be the same as SLJIT_TMP_DEST_REG) + SLJIT_TMP_DEST_FREG : a temporary register for float results SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (platform independent helper) SLJIT_F64_SECOND(reg) : provides the register index of the second 32 bit part of a 64 bit @@ -358,7 +362,8 @@ typedef long int sljit_sw; #endif /* _WIN32 */ #endif -typedef sljit_uw sljit_p; +typedef sljit_sw sljit_sp; +typedef sljit_uw sljit_up; /* Floating point types. */ typedef float sljit_f32; @@ -561,9 +566,9 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); #endif /* SLJIT_FREE_EXEC */ #if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) -SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); -#define SLJIT_EXEC_OFFSET(ptr) sljit_exec_offset(ptr) -#endif +SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code); +#define SLJIT_EXEC_OFFSET(code) sljit_exec_offset(code) +#endif /* SLJIT_PROT_EXECUTABLE_ALLOCATOR */ #endif /* SLJIT_EXECUTABLE_ALLOCATOR */ @@ -583,6 +588,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R0 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R0 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_LOCALS_OFFSET_BASE (8 * SSIZE_OF(sw)) #define SLJIT_PREF_SHIFT_REG SLJIT_R2 #define SLJIT_MASKED_SHIFT 1 @@ -603,6 +611,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 10 #define SLJIT_LOCALS_OFFSET_BASE (4 * SSIZE_OF(sw)) #endif /* !_WIN64 */ +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R0 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R0 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_PREF_SHIFT_REG SLJIT_R3 #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 @@ -615,6 +626,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_LOCALS_OFFSET_BASE 0 #elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) @@ -625,6 +639,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R0 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R0 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_LOCALS_OFFSET_BASE (2 * (sljit_s32)sizeof(sljit_sw)) #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 @@ -637,6 +654,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 18 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX) #define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * (sljit_s32)sizeof(sljit_sw)) #elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) @@ -661,6 +681,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #endif #define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 3 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 @@ -672,6 +695,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_LOCALS_OFFSET_BASE 0 #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 @@ -705,6 +731,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R0 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R2 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_LOCALS_OFFSET_BASE SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE #define SLJIT_MASKED_SHIFT 1 @@ -716,6 +745,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_LOCALS_OFFSET_BASE 0 #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 @@ -729,6 +761,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 0 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 0 +#define SLJIT_TMP_DEST_REG 0 +#define SLJIT_TMP_MEM_REG 0 +#define SLJIT_TMP_DEST_FREG 0 #define SLJIT_LOCALS_OFFSET_BASE 0 #endif diff --git a/src/sljit/sljitLir.c b/src/sljit/sljitLir.c index 71ad11091..6228f10b7 100644 --- a/src/sljit/sljitLir.c +++ b/src/sljit/sljitLir.c @@ -318,6 +318,14 @@ /* Utils can still be used even if SLJIT_CONFIG_UNSUPPORTED is set. */ #include "sljitUtils.c" +#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +#define SLJIT_CODE_TO_PTR(code) ((void*)((sljit_up)(code) & ~(sljit_up)0x1)) +#elif (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +#define SLJIT_CODE_TO_PTR(code) ((void*)(*(sljit_up*)code)) +#else /* !SLJIT_CONFIG_ARM_THUMB2 && !SLJIT_INDIRECT_CALL */ +#define SLJIT_CODE_TO_PTR(code) ((void*)(code)) +#endif /* SLJIT_CONFIG_ARM_THUMB2 || SLJIT_INDIRECT_CALL */ + #if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) #if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) @@ -457,10 +465,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo sizeof(sljit_s8) == 1 && sizeof(sljit_u8) == 1 && sizeof(sljit_s16) == 2 && sizeof(sljit_u16) == 2 && sizeof(sljit_s32) == 4 && sizeof(sljit_u32) == 4 - && (sizeof(sljit_p) == 4 || sizeof(sljit_p) == 8) - && sizeof(sljit_p) <= sizeof(sljit_sw) + && (sizeof(sljit_up) == 4 || sizeof(sljit_up) == 8) + && sizeof(sljit_up) <= sizeof(sljit_sw) + && sizeof(sljit_up) == sizeof(sljit_sp) && (sizeof(sljit_sw) == 4 || sizeof(sljit_sw) == 8) - && (sizeof(sljit_uw) == 4 || sizeof(sljit_uw) == 8), + && (sizeof(sljit_uw) == sizeof(sljit_sw)), invalid_integer_types); SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_32, rewritable_jump_and_single_op_must_not_be_the_same); @@ -565,31 +574,12 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compi compiler->error = SLJIT_ERR_ALLOC_FAILED; } -#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) -{ - SLJIT_UNUSED_ARG(exec_allocator_data); - - /* Remove thumb mode flag. */ - SLJIT_FREE_EXEC((void*)((sljit_uw)code & ~(sljit_uw)0x1), exec_allocator_data); -} -#elif (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) -{ - SLJIT_UNUSED_ARG(exec_allocator_data); - - /* Resolve indirection. */ - code = (void*)(*(sljit_uw*)code); - SLJIT_FREE_EXEC(code, exec_allocator_data); -} -#else SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) { SLJIT_UNUSED_ARG(exec_allocator_data); - SLJIT_FREE_EXEC(code, exec_allocator_data); + SLJIT_FREE_EXEC(SLJIT_CODE_TO_PTR(code), exec_allocator_data); } -#endif SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) { @@ -1163,6 +1153,10 @@ static const char* op2_names[] = { "ashr", "mashr", "rotl", "rotr" }; +static const char* op2r_names[] = { + "muladd" +}; + static const char* op_src_dst_names[] = { "fast_return", "skip_frames_before_fast_return", "prefetch_l1", "prefetch_l2", @@ -1693,6 +1687,33 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler CHECK_RETURN_OK; } +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT((op | SLJIT_32) == SLJIT_MULADD32); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg)); + FUNCTION_CHECK_SRC(src1, src1w); + FUNCTION_CHECK_SRC(src2, src2w); + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s ", op2r_names[GET_OPCODE(op) - SLJIT_OP2R_BASE], !(op & SLJIT_32) ? "" : "32"); + + sljit_verbose_reg(compiler, dst_reg); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 src1_reg, diff --git a/src/sljit/sljitLir.h b/src/sljit/sljitLir.h index 3877bb995..b73dd16bc 100644 --- a/src/sljit/sljitLir.h +++ b/src/sljit/sljitLir.h @@ -872,7 +872,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *c int | 4 byte (physical_address & 0x3 == 0) word | 4 byte if SLJIT_32BIT_ARCHITECTURE is defined and its value is 1 | 8 byte if SLJIT_64BIT_ARCHITECTURE is defined and its value is 1 - pointer | size of sljit_p type (4 byte on 32 bit machines, 4 or 8 byte + pointer | size of sljit_up type (4 byte on 32 bit machines, 4 or 8 byte | on 64 bit machines) Note: Different architectures have different addressing limitations. @@ -1108,7 +1108,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile S16 - signed 16 bit data transfer U32 - unsigned int (32 bit) data transfer S32 - signed int (32 bit) data transfer - P - pointer (sljit_p) data transfer + P - pointer (sljit_up) data transfer */ /* Flags: - (does not modify flags) */ @@ -1275,6 +1275,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w); +/* Starting index of opcodes for sljit_emit_op2r. */ +#define SLJIT_OP2R_BASE 96 + +/* Flags: - (may destroy flags) */ +#define SLJIT_MULADD (SLJIT_OP2R_BASE + 0) +#define SLJIT_MULADD32 (SLJIT_MULADD | SLJIT_32) + +/* Similar to sljit_emit_fop2, except the destination is always a register. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + /* Emit a left or right shift operation, where the bits shifted in comes from a separate source operand. All operands are interpreted as unsigned integers. @@ -1322,7 +1335,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * /* Starting index of opcodes for sljit_emit_op_src and sljit_emit_op_dst. */ -#define SLJIT_OP_SRC_DST_BASE 96 +#define SLJIT_OP_SRC_DST_BASE 112 /* Fast return, see SLJIT_FAST_CALL for more details. Note: src cannot be an immedate value @@ -1374,7 +1387,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *comp sljit_s32 dst, sljit_sw dstw); /* Starting index of opcodes for sljit_emit_fop1. */ -#define SLJIT_FOP1_BASE 128 +#define SLJIT_FOP1_BASE 144 /* Flags: - (does not modify flags) */ #define SLJIT_MOV_F64 (SLJIT_FOP1_BASE + 0) @@ -1419,7 +1432,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil sljit_s32 src, sljit_sw srcw); /* Starting index of opcodes for sljit_emit_fop2. */ -#define SLJIT_FOP2_BASE 160 +#define SLJIT_FOP2_BASE 176 /* Flags: - (may destroy flags) */ #define SLJIT_ADD_F64 (SLJIT_FOP2_BASE + 0) @@ -1440,7 +1453,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil sljit_s32 src2, sljit_sw src2w); /* Starting index of opcodes for sljit_emit_fop2r. */ -#define SLJIT_FOP2R_BASE 168 +#define SLJIT_FOP2R_BASE 192 /* Flags: - (may destroy flags) */ #define SLJIT_COPYSIGN_F64 (SLJIT_FOP2R_BASE + 0) diff --git a/src/sljit/sljitNativeARM_32.c b/src/sljit/sljitNativeARM_32.c index 90444de26..a253c06f0 100644 --- a/src/sljit/sljitNativeARM_32.c +++ b/src/sljit/sljitNativeARM_32.c @@ -120,6 +120,7 @@ static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) #define LDREX 0xe1900f9f #define LDREXB 0xe1d00f9f #define LDREXH 0xe1f00f9f +#define MLA 0xe0200090 #define MOV 0xe1a00000 #define MUL 0xe0000090 #define MVN 0xe1e00000 @@ -968,7 +969,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == word_count) { - label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -1207,7 +1208,7 @@ static const sljit_ins data_transfer_insts[16] = { /* Inverted immediate. */ #define INV_IMM 0x02 /* Source and destination is register. */ -#define MOVE_REG_CONV 0x04 +#define REGISTER_OP 0x04 /* Unused return value. */ #define UNUSED_RETURN 0x08 /* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */ @@ -1592,7 +1593,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *c static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_uw dst, sljit_uw src1, sljit_uw src2) { - sljit_s32 is_masked; + sljit_s32 reg, is_masked; sljit_uw shift_type; switch (op) { @@ -1609,7 +1610,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_MOV_U8: case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); - if (flags & MOVE_REG_CONV) + if (flags & REGISTER_OP) return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2)); if (dst != src2) { @@ -1621,7 +1622,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_MOV_U16: case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); - if (flags & MOVE_REG_CONV) + if (flags & REGISTER_OP) return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2)); if (dst != src2) { @@ -1637,11 +1638,11 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_CTZ: SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + SLJIT_ASSERT(src1 == TMP_REG1 && src2 != TMP_REG2 && !(flags & ARGS_SWAPPED)); #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) - FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG1) | RN(src2) | 0)); - FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | RM(TMP_REG1))); - FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG2))); + FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0)); + FAIL_IF(push_inst(compiler, AND | RD(TMP_REG1) | RN(src2) | RM(TMP_REG2))); + FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG1))); FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(dst) | 32)); return push_inst(compiler, (EOR ^ 0xf0000000) | SRC2_IMM | RD(dst) | RN(dst) | 0x1f); #else /* !SLJIT_CONFIG_ARM_V6 */ @@ -1657,9 +1658,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_REV_U16: case SLJIT_REV_S16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED) && src2 != TMP_REG1 && dst != TMP_REG1); + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); FAIL_IF(push_inst(compiler, REV16 | RD(dst) | RM(src2))); - if (dst == TMP_REG2 || (src2 == TMP_REG2 && op == SLJIT_REV_U16)) + if (!(flags & REGISTER_OP)) return SLJIT_SUCCESS; return push_inst(compiler, (op == SLJIT_REV_U16 ? UXTH : SXTH) | RD(dst) | RM(dst)); case SLJIT_ADD: @@ -1695,10 +1696,11 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (!(flags & SET_FLAGS)) return push_inst(compiler, MUL | RN(dst) | RM8(src2) | RM(src1)); - FAIL_IF(push_inst(compiler, SMULL | RN(TMP_REG1) | RD(dst) | RM8(src2) | RM(src1))); + reg = dst == TMP_REG1 ? TMP_REG2 : TMP_REG1; + FAIL_IF(push_inst(compiler, SMULL | RN(reg) | RD(dst) | RM8(src2) | RM(src1))); /* cmp TMP_REG1, dst asr #31. */ - return push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | RM(dst) | 0xfc0); + return push_inst(compiler, CMP | SET_FLAGS | RN(reg) | RM(dst) | 0xfc0); case SLJIT_AND: if ((flags & (UNUSED_RETURN | INV_IMM)) == UNUSED_RETURN) @@ -1748,6 +1750,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl is_masked = 0; break; + case SLJIT_MULADD: + return push_inst(compiler, MLA | RN(dst) | RD(dst) | RM8(src2) | RM(src1)); + default: SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; @@ -2067,6 +2072,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 sljit_s32 dst_reg; sljit_s32 src1_reg = 0; sljit_s32 src2_reg = 0; + sljit_s32 src2_tmp_reg = 0; sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; sljit_s32 neg_op = 0; sljit_u32 imm2; @@ -2076,7 +2082,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 if (flags & SET_FLAGS) inp_flags &= ~ALLOW_DOUBLE_IMM; - if (dst == TMP_REG2) + if (dst == TMP_REG1) flags |= UNUSED_RETURN; SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM)); @@ -2162,17 +2168,6 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } } while(0); - /* Source 1. */ - if (FAST_IS_REG(src1)) - src1_reg = src1; - else if (src1 & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); - src1_reg = TMP_REG1; - } else if (!(inp_flags & ALLOW_DOUBLE_IMM) || src2_reg != 0 || op == SLJIT_SUB || op == SLJIT_SUBC) { - FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); - src1_reg = TMP_REG1; - } - /* Destination. */ dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2; @@ -2182,21 +2177,44 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 inp_flags &= ~SIGNED; if (FAST_IS_REG(src2)) - return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG2); + return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG1); } if (FAST_IS_REG(src2) && dst_reg != TMP_REG2) - flags |= MOVE_REG_CONV; + flags |= REGISTER_OP; + + src2_tmp_reg = dst_reg; + } else { + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) { + if (!(dst & SLJIT_MEM) && (!(src2 & SLJIT_MEM) || op == SLJIT_REV_S16)) + flags |= REGISTER_OP; + } + + src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2; + } + + if (src2_reg == 0 && (src2 & SLJIT_MEM)) { + src2_reg = src2_tmp_reg; + FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG1)); + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) + src1_reg = src1; + else if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); + src1_reg = TMP_REG1; + } else if (!(inp_flags & ALLOW_DOUBLE_IMM) || src2_reg != 0 || op == SLJIT_SUB || op == SLJIT_SUBC) { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); + src1_reg = TMP_REG1; } /* Source 2. */ if (src2_reg == 0) { - src2_reg = (op <= SLJIT_MOV_P) ? dst_reg : TMP_REG2; + src2_reg = src2_tmp_reg; if (FAST_IS_REG(src2)) src2_reg = src2; - else if (src2 & SLJIT_MEM) - FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2)); else if (!(inp_flags & ALLOW_DOUBLE_IMM)) FAIL_IF(load_immediate(compiler, src2_reg, (sljit_uw)src2w)); else { @@ -2216,8 +2234,8 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } if (src2_reg == 0) { - FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)src2w)); - src2_reg = TMP_REG2; + FAIL_IF(load_immediate(compiler, src2_tmp_reg, (sljit_uw)src2w)); + src2_reg = src2_tmp_reg; } else { FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg)); src1_reg = dst_reg; @@ -2462,7 +2480,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); SLJIT_SKIP_CHECKS(compiler); - return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + return emit_op(compiler, op, 0, dst_reg, 0, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, @@ -2627,8 +2663,8 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, arg &= ~SLJIT_MEM; if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_ins)argw & 0x3) << 7))); - arg = TMP_REG2; + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_ins)argw & 0x3) << 7))); + arg = TMP_REG1; argw = 0; } @@ -2641,25 +2677,25 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc); if (imm) { - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | imm)); - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2)); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | imm)); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2)); } imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc); if (imm) { argw = -argw; - FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG2) | RN(arg & REG_MASK) | imm)); - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG2, reg, (argw & 0x3fc) >> 2)); + FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg & REG_MASK) | imm)); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2)); } } if (arg) { - FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw)); - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(TMP_REG2))); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw)); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(TMP_REG1))); } else - FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw)); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw)); - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0)); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, 0)); } static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, @@ -2769,7 +2805,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil switch (GET_OPCODE(op)) { case SLJIT_MOV_F64: if (src != dst_r) { - if (dst_r != TMP_FREG1) + if (!(dst & SLJIT_MEM)) FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_32, dst_r, src, 0))); else dst_r = src; @@ -2839,7 +2875,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil return push_inst(compiler, EMIT_FPU_OPERATION((VNEG_F32 & ~COND_MASK) | 0xb0000000, op & SLJIT_32, dst_r, dst_r, 0)); } - if (dst_r == TMP_FREG1) + if (dst_r != dst) FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw)); return SLJIT_SUCCESS; @@ -3520,7 +3556,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp } if (src1 & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG2)); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG1)); if (src2_reg != dst_reg) { src1 = src2_reg; @@ -3583,8 +3619,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *com } if (src1 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); - src1 = TMP_FREG1; + FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG2, src1, src1w)); + src1 = TMP_FREG2; } cc = get_cc(compiler, type & ~SLJIT_32); diff --git a/src/sljit/sljitNativeARM_64.c b/src/sljit/sljitNativeARM_64.c index 0c9f2201a..967ff5d6e 100644 --- a/src/sljit/sljitNativeARM_64.c +++ b/src/sljit/sljitNativeARM_64.c @@ -840,7 +840,6 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s imm = (flags & ARG2_IMM) ? arg2 : arg1; switch (op) { - case SLJIT_MUL: case SLJIT_CLZ: case SLJIT_CTZ: case SLJIT_REV: @@ -850,6 +849,8 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s case SLJIT_REV_S32: case SLJIT_ADDC: case SLJIT_SUBC: + case SLJIT_MUL: + case SLJIT_MULADD: /* No form with immediate operand (except imm 0, which is represented by a ZERO register). */ break; @@ -1104,6 +1105,9 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s /* fallthrough */ case SLJIT_ROTR: return push_inst(compiler, (RORV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_MULADD: + compiler->status_flags_state = 0; + return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(dst)); default: SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; @@ -1575,7 +1579,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile op = GET_OPCODE(op); if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { /* Both operands are registers. */ - if (dst_r != TMP_REG1 && FAST_IS_REG(src)) + if (FAST_IS_REG(dst) && FAST_IS_REG(src)) return emit_op_imm(compiler, op | ((op_flags & SLJIT_32) ? INT_OP : 0), dst_r, TMP_REG1, src); switch (op) { @@ -1625,7 +1629,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile else if (!(src & SLJIT_MEM)) dst_r = src; else - FAIL_IF(emit_op_mem(compiler, mem_flags, dst_r, src, srcw, TMP_REG1)); + FAIL_IF(emit_op_mem(compiler, mem_flags, dst_r, src, srcw, TMP_REG2)); if (dst & SLJIT_MEM) return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2); @@ -1687,7 +1691,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile mem_flags = INT_SIZE; } - if (dst == TMP_REG1) + if (dst == TMP_REG2) flags |= UNUSED_RETURN; if (src1 & SLJIT_MEM) { @@ -1725,7 +1729,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); SLJIT_SKIP_CHECKS(compiler); - return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, @@ -1906,18 +1927,18 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10))); - return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1)); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10))); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG2)); } arg &= REG_MASK; if (!arg) { - FAIL_IF(load_immediate(compiler, TMP_REG1, argw & ~(0xfff << shift))); + FAIL_IF(load_immediate(compiler, TMP_REG2, argw & ~(0xfff << shift))); argw = (argw >> shift) & 0xfff; - return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | ((sljit_ins)argw << 10)); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG2) | ((sljit_ins)argw << 10)); } if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) { @@ -1925,18 +1946,18 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, return push_inst(compiler, STR_FI | type | VT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift))); if (argw <= 0xffffff) { - FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG1) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG2) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); argw = ((argw & 0xfff) >> shift); - return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | ((sljit_ins)argw << 10)); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG2) | ((sljit_ins)argw << 10)); } } if (argw <= 255 && argw >= -256) return push_inst(compiler, STUR_FI | type | VT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12)); - FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); - return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg) | RM(TMP_REG1)); + FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); + return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg) | RM(TMP_REG2)); } static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, @@ -2063,7 +2084,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil switch (GET_OPCODE(op)) { case SLJIT_MOV_F64: if (src != dst_r) { - if (dst_r != TMP_FREG1) + if (!(dst & SLJIT_MEM)) FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src))); else dst_r = src; @@ -2337,7 +2358,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile jump->flags |= IS_BL; jump->addr = compiler->size; - PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1))); + PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG2))); /* Maximum number of instructions required for generating a constant. */ compiler->size += JUMP_MAX_SIZE - 1; @@ -2390,7 +2411,7 @@ static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compi PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src))); jump->addr = compiler->size; - PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG1))); + PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG2))); /* Maximum number of instructions required for generating a constant. */ compiler->size += JUMP_MAX_SIZE - 1; @@ -2422,7 +2443,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi jump->addr = compiler->size; /* Maximum number of instructions required for generating a constant. */ compiler->size += JUMP_MAX_SIZE - 1; - return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)); + return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG2)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, @@ -2470,7 +2491,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co if (GET_OPCODE(op) < SLJIT_ADD) { FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO))); - if (dst_r == TMP_REG1) { + if (dst & SLJIT_MEM) { mem_flags = (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE; return emit_op_mem(compiler, mem_flags, TMP_REG1, dst, dstw, TMP_REG2); } @@ -2517,11 +2538,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp if (src1 == SLJIT_IMM) { if (type & SLJIT_32) src1w = (sljit_s32)src1w; - FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); - src1 = TMP_REG1; + FAIL_IF(load_immediate(compiler, TMP_REG2, src1w)); + src1 = TMP_REG2; } else if (src1 & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG2)); - src1 = TMP_REG1; + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src1, src1w, TMP_REG2)); + src1 = TMP_REG2; } cc = get_cc(compiler, type & ~SLJIT_32); @@ -2542,8 +2563,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *com ADJUST_LOCAL_OFFSET(src1, src1w); if (src1 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src1, src1w)); - src1 = TMP_FREG1; + FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG2, src1, src1w)); + src1 = TMP_FREG2; } cc = get_cc(compiler, type & ~SLJIT_32); @@ -2710,13 +2731,13 @@ static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, slj sljit_s32 mem = *mem_ptr; if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { - *mem_ptr = TMP_REG1; - return push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10)); + *mem_ptr = TMP_REG2; + return push_inst(compiler, ADD | RD(TMP_REG2) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10)); } if (!(mem & REG_MASK)) { - *mem_ptr = TMP_REG1; - return load_immediate(compiler, TMP_REG1, memw); + *mem_ptr = TMP_REG2; + return load_immediate(compiler, TMP_REG2, memw); } mem &= REG_MASK; @@ -2726,11 +2747,11 @@ static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, slj return SLJIT_SUCCESS; } - *mem_ptr = TMP_REG1; + *mem_ptr = TMP_REG2; if (memw < -0xffffff || memw > 0xffffff) { - FAIL_IF(load_immediate(compiler, TMP_REG1, memw)); - return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem)); + FAIL_IF(load_immediate(compiler, TMP_REG2, memw)); + return push_inst(compiler, ADD | RD(TMP_REG2) | RN(TMP_REG2) | RM(mem)); } ins = ADDI; @@ -2741,16 +2762,16 @@ static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, slj } if (memw > 0xfff) { - FAIL_IF(push_inst(compiler, ins | (1 << 22) | RD(TMP_REG1) | RN(mem) | ((sljit_ins)(memw >> 12) << 10))); + FAIL_IF(push_inst(compiler, ins | (1 << 22) | RD(TMP_REG2) | RN(mem) | ((sljit_ins)(memw >> 12) << 10))); memw &= 0xfff; if (memw == 0) return SLJIT_SUCCESS; - mem = TMP_REG1; + mem = TMP_REG2; } - return push_inst(compiler, ins | RD(TMP_REG1) | RN(mem) | ((sljit_ins)memw << 10)); + return push_inst(compiler, ins | RD(TMP_REG2) | RN(mem) | ((sljit_ins)memw << 10)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, @@ -2958,8 +2979,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil return push_inst(compiler, MOVI | imm | VD(freg)); } - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); - src = TMP_REG1; + FAIL_IF(load_immediate(compiler, TMP_REG2, srcw)); + src = TMP_REG2; } return push_inst(compiler, DUP_g | ins | VD(freg) | RN(src)); @@ -3028,8 +3049,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile if (elem_size < 3) srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1; - FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw)); - srcdst = TMP_REG1; + FAIL_IF(load_immediate(compiler, TMP_REG2, srcdstw)); + srcdst = TMP_REG2; } if (type & SLJIT_SIMD_STORE) { @@ -3186,7 +3207,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c FAIL_IF(push_inst(compiler, USRA | (1 << 30) | (imms << 16) | VD(TMP_FREG1) | VN(TMP_FREG1))); - dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; ins = (0x1 << 16); if (reg_size == 4 && elem_size == 0) { @@ -3196,8 +3217,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c FAIL_IF(push_inst(compiler, UMOV | ins | RD(dst_r) | VN(TMP_FREG1))); - if (dst_r == TMP_REG1) - return emit_op_mem(compiler, STORE | ((type & SLJIT_32) ? INT_SIZE : WORD_SIZE), TMP_REG1, dst, dstw, TMP_REG2); + if (dst_r == TMP_REG2) + return emit_op_mem(compiler, STORE | ((type & SLJIT_32) ? INT_SIZE : WORD_SIZE), TMP_REG2, dst, dstw, TMP_REG1); return SLJIT_SUCCESS; } diff --git a/src/sljit/sljitNativeARM_T2_32.c b/src/sljit/sljitNativeARM_T2_32.c index 03a3beaa6..799954a85 100644 --- a/src/sljit/sljitNativeARM_T2_32.c +++ b/src/sljit/sljitNativeARM_T2_32.c @@ -157,6 +157,7 @@ static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) #define LSRSI 0x0800 #define LSR_W 0xfa20f000 #define LSR_WI 0xea4f0010 +#define MLA 0xfb000000 #define MOV 0x4600 #define MOVS 0x0000 #define MOVSI 0x2000 @@ -785,10 +786,11 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, /* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */ #define SET_FLAGS 0x0100000 #define UNUSED_RETURN 0x0200000 +#define REGISTER_OP 0x0400000 static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_uw arg1, sljit_uw arg2) { - /* dst must be register, TMP_REG1 + /* dst must be register arg1 must be register, imm arg2 must be register, imm */ sljit_s32 reg; @@ -814,6 +816,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s case SLJIT_REV_U32: case SLJIT_REV_S32: case SLJIT_MUL: + case SLJIT_MULADD: /* No form with immediate operand. */ break; case SLJIT_MOV: @@ -1049,17 +1052,17 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst32(compiler, REV_W | RN4(arg2) | RD4(dst) | RM4(arg2)); case SLJIT_REV_U16: case SLJIT_REV_S16: - SLJIT_ASSERT(arg1 == TMP_REG2 && dst != TMP_REG2); + SLJIT_ASSERT(arg1 == TMP_REG2); - flags &= 0xffff; if (IS_2_LO_REGS(dst, arg2)) FAIL_IF(push_inst16(compiler, REV16 | RD3(dst) | RN3(arg2))); else FAIL_IF(push_inst32(compiler, REV16_W | RN4(arg2) | RD4(dst) | RM4(arg2))); - if (dst == TMP_REG1 || (arg2 == TMP_REG1 && flags == SLJIT_REV_U16)) + if (!(flags & REGISTER_OP)) return SLJIT_SUCCESS; + flags &= 0xffff; if (reg_map[dst] <= 7) return push_inst16(compiler, (flags == SLJIT_REV_U16 ? UXTH : SXTH) | RD3(dst) | RN3(dst)); return push_inst32(compiler, (flags == SLJIT_REV_U16 ? UXTH_W : SXTH_W) | RD4(dst) | RM4(dst)); @@ -1094,10 +1097,10 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s compiler->status_flags_state = 0; if (!(flags & SET_FLAGS)) return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2)); - SLJIT_ASSERT(dst != TMP_REG2); - FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2))); + reg = (dst == TMP_REG2) ? TMP_REG1 : TMP_REG2; + FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(reg) | RN4(arg1) | RM4(arg2))); /* cmp TMP_REG2, dst asr #31. */ - return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst)); + return push_inst32(compiler, CMP_W | RN4(reg) | 0x70e0 | RM4(dst)); case SLJIT_AND: if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2)); @@ -1148,6 +1151,9 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, RORS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, ROR_W | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_MULADD: + compiler->status_flags_state = 0; + return push_inst32(compiler, MLA | RD4(dst) | RN4(arg1) | RM4(arg2) | RT4(dst)); } SLJIT_UNREACHABLE(); @@ -1911,14 +1917,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile sljit_s32 src, sljit_sw srcw) { sljit_s32 dst_r, flags; - sljit_s32 op_flags = GET_ALL_FLAGS(op); CHECK_ERROR(); CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); - dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; op = GET_OPCODE(op); if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { @@ -1958,35 +1963,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile if (src == SLJIT_IMM) FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, (sljit_uw)srcw)); - else if (src & SLJIT_MEM) { + else if (src & SLJIT_MEM) FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1)); - } else { - if (dst_r != TMP_REG1) - return emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src); + else if (FAST_IS_REG(dst)) + return emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src); + else dst_r = src; - } if (!(dst & SLJIT_MEM)) return SLJIT_SUCCESS; - return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2); + return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG1); } SLJIT_COMPILE_ASSERT(WORD_SIZE == 0, word_size_must_be_0); - flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; + flags = WORD_SIZE; - if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) { + if (!(dst & SLJIT_MEM) && (!(src & SLJIT_MEM) || op == SLJIT_REV_S16)) + op |= REGISTER_OP; flags |= HALF_SIZE; + } if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem(compiler, flags, TMP_REG1, src, srcw, TMP_REG1)); src = TMP_REG1; } - emit_op_imm(compiler, flags | op, dst_r, TMP_REG2, (sljit_uw)src); + emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src); if (SLJIT_UNLIKELY(dst & SLJIT_MEM)) - return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2); + return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG1); return SLJIT_SUCCESS; } @@ -1995,7 +2002,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_s32 dst_reg, flags, src2_reg; + sljit_s32 dst_reg, src2_tmp_reg, flags; CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); @@ -2003,36 +2010,34 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2; flags = HAS_FLAGS(op) ? SET_FLAGS : 0; if (dst == TMP_REG1) flags |= UNUSED_RETURN; + if (src2 == SLJIT_IMM) + flags |= ARG2_IMM; + else if (src2 & SLJIT_MEM) { + src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2; + emit_op_mem(compiler, WORD_SIZE, src2_tmp_reg, src2, src2w, TMP_REG1); + src2w = src2_tmp_reg; + } else + src2w = src2; + if (src1 == SLJIT_IMM) flags |= ARG1_IMM; else if (src1 & SLJIT_MEM) { emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1); src1w = TMP_REG1; - } - else + } else src1w = src1; - if (src2 == SLJIT_IMM) - flags |= ARG2_IMM; - else if (src2 & SLJIT_MEM) { - src2_reg = (!(flags & ARG1_IMM) && (src1w == TMP_REG1)) ? TMP_REG2 : TMP_REG1; - emit_op_mem(compiler, WORD_SIZE, src2_reg, src2, src2w, src2_reg); - src2w = src2_reg; - } - else - src2w = src2; - emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, (sljit_uw)src1w, (sljit_uw)src2w); if (!(dst & SLJIT_MEM)) return SLJIT_SUCCESS; - return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG2); + return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, @@ -2046,6 +2051,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 src1_reg, @@ -2360,7 +2382,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil switch (GET_OPCODE(op)) { case SLJIT_MOV_F64: if (src != dst_r) { - if (dst_r != TMP_FREG1) + if (!(dst & SLJIT_MEM)) FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src))); else dst_r = src; @@ -3102,7 +3124,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp } if (src1 & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, WORD_SIZE, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG2)); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG1)); if (src2_reg != dst_reg) { src1 = src2_reg; @@ -3174,8 +3196,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *com } if (src1 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); - src1 = TMP_FREG1; + FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG2, src1, src1w)); + src1 = TMP_FREG2; } FAIL_IF(push_inst16(compiler, IT | (get_cc(compiler, type & ~SLJIT_32) << 4) | 0x8)); diff --git a/src/sljit/sljitNativeLOONGARCH_64.c b/src/sljit/sljitNativeLOONGARCH_64.c index 16d5e90dc..ed1d5a94f 100644 --- a/src/sljit/sljitNativeLOONGARCH_64.c +++ b/src/sljit/sljitNativeLOONGARCH_64.c @@ -807,6 +807,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) #define SLOW_SRC1 0x08000 #define SLOW_SRC2 0x10000 #define SLOW_DEST 0x20000 +#define MEM_USE_TMP2 0x40000 #define STACK_STORE ST_D #define STACK_LOAD LD_D @@ -1139,7 +1140,7 @@ static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, slj static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { sljit_s32 base = arg & REG_MASK; - sljit_s32 tmp_r = TMP_REG1; + sljit_s32 tmp_r = (flags & MEM_USE_TMP2) ? TMP_REG2 : TMP_REG1; sljit_sw offset; SLJIT_ASSERT(arg & SLJIT_MEM); @@ -1148,11 +1149,6 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl next_argw = 0; } - /* Since tmp can be the same as base or offset registers, - * these might be unavailable after modifying tmp. */ - if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) - tmp_r = reg; - if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { argw &= 0x3; @@ -1249,8 +1245,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, slji FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(dst))); \ } \ } \ - } \ - else { \ + } else { \ if (op & SLJIT_SET_Z) \ FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); \ if (!(flags & UNUSED_DEST)) \ @@ -1265,88 +1260,88 @@ static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, slji static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2) { - sljit_s32 is_overflow, is_carry, carry_src_r, is_handled; + sljit_s32 is_overflow, is_carry, carry_src_r, is_handled, reg; sljit_ins op_imm, op_reg; sljit_ins word_size = ((op & SLJIT_32) ? 32 : 64); switch (GET_OPCODE(op)) { case SLJIT_MOV: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if (dst != src2) return push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src2) | IMM_I12(0)); return SLJIT_SUCCESS; case SLJIT_MOV_U8: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) return push_inst(compiler, ANDI | RD(dst) | RJ(src2) | IMM_I12(0xff)); SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_S8: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) return push_inst(compiler, EXT_W_B | RD(dst) | RJ(src2)); SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_U16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(src2) | (15 << 16)); SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_S16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) return push_inst(compiler, EXT_W_H | RD(dst) | RJ(src2)); SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_U32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(src2) | (31 << 16)); SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_S32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) return push_inst(compiler, SLLI_W | RD(dst) | RJ(src2) | IMM_I12(0)); SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_CLZ: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); return push_inst(compiler, INST(CLZ, op) | RD(dst) | RJ(src2)); case SLJIT_CTZ: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); return push_inst(compiler, INST(CTZ, op) | RD(dst) | RJ(src2)); case SLJIT_REV: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); return push_inst(compiler, ((op & SLJIT_32) ? REVB_2W : REVB_D) | RD(dst) | RJ(src2)); case SLJIT_REV_S16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2))); return push_inst(compiler, EXT_W_H | RD(dst) | RJ(dst)); case SLJIT_REV_U16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2))); return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(dst) | (15 << 16)); case SLJIT_REV_S32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && dst != TMP_REG1); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1); FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2))); return push_inst(compiler, SLLI_W | RD(dst) | RJ(dst) | IMM_I12(0)); case SLJIT_REV_U32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && dst != TMP_REG1); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1); FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2))); return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(dst) | (31 << 16)); @@ -1363,15 +1358,13 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(-1))); FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); } - } - else if (op & SLJIT_SET_Z) + } else if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2))); /* Only the zero flag is needed. */ if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(src2))); - } - else { + } else { if (is_overflow) FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); else if (op & SLJIT_SET_Z) @@ -1461,8 +1454,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (GET_FLAG_TYPE(op) == SLJIT_LESS) { FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2))); is_handled = 1; - } - else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) { + } else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) { FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2))); is_handled = 1; } @@ -1472,8 +1464,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl is_handled = 1; if (flags & SRC2_IMM) { - FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2))); - src2 = TMP_REG2; + reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(push_inst(compiler, ADDI_D | RD(reg) | RJ(TMP_ZERO) | IMM_I12(src2))); + src2 = reg; flags &= ~SRC2_IMM; } @@ -1499,8 +1492,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2))); if (!(flags & UNUSED_DEST)) return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)); - } - else { + } else { if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); if (!(flags & UNUSED_DEST)) @@ -1520,8 +1512,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-1))); FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); } - } - else if (op & SLJIT_SET_Z) + } else if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2))); if (is_overflow || is_carry) @@ -1530,8 +1521,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl /* Only the zero flag is needed. */ if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2))); - } - else { + } else { if (is_overflow) FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); else if (op & SLJIT_SET_Z) @@ -1568,8 +1558,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2))); FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2))); - } - else { + } else { if (is_carry) FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); @@ -1650,7 +1639,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (GET_OPCODE(op) == SLJIT_ROTL) src2 = word_size - src2; return push_inst(compiler, INST(ROTRI, op) | RD(dst) | RJ(src1) | IMM_I12(src2)); - } if (src2 == TMP_ZERO) { @@ -1701,7 +1689,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 sljit_s32 dst_r = TMP_REG2; sljit_s32 src1_r; sljit_sw src2_r = 0; - sljit_s32 sugg_src2_r = TMP_REG2; + sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2; if (!(flags & ALT_KEEP_CACHE)) { compiler->cache_arg = 0; @@ -1712,22 +1700,19 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 SLJIT_ASSERT(HAS_FLAGS(op)); flags |= UNUSED_DEST; dst = TMP_REG2; - } - else if (FAST_IS_REG(dst)) { + } else if (FAST_IS_REG(dst)) { dst_r = dst; flags |= REG_DEST; if (flags & MOVE_OP) - sugg_src2_r = dst_r; - } - else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) + src2_tmp_reg = dst_r; + } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) flags |= SLOW_DEST; if (flags & IMM_OP) { if (src2 == SLJIT_IMM && src2w != 0 && src2w <= I12_MAX && src2w >= I12_MIN) { flags |= SRC2_IMM; src2_r = src2w; - } - else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= I12_MAX && src1w >= I12_MIN) { + } else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= I12_MAX && src1w >= I12_MIN) { flags |= SRC2_IMM; src2_r = src1w; @@ -1743,16 +1728,14 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 if (FAST_IS_REG(src1)) { src1_r = src1; flags |= REG1_SOURCE; - } - else if (src1 == SLJIT_IMM) { + } else if (src1 == SLJIT_IMM) { if (src1w) { FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); src1_r = TMP_REG1; } else src1_r = TMP_ZERO; - } - else { + } else { if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w)) FAIL_IF(compiler->error); else @@ -1766,14 +1749,12 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 flags |= REG2_SOURCE; if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) dst_r = (sljit_s32)src2_r; - } - else if (src2 == SLJIT_IMM) { + } else if (src2 == SLJIT_IMM) { if (!(flags & SRC2_IMM)) { if (src2w) { - FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); - src2_r = sugg_src2_r; - } - else { + FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w)); + src2_r = src2_tmp_reg; + } else { src2_r = TMP_ZERO; if (flags & MOVE_OP) { if (dst & SLJIT_MEM) @@ -1783,31 +1764,29 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } } } - } - else { - if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w)) + } else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, src2_tmp_reg, src2, src2w)) FAIL_IF(compiler->error); else flags |= SLOW_SRC2; - src2_r = sugg_src2_r; + src2_r = src2_tmp_reg; } if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { SLJIT_ASSERT(src2_r == TMP_REG2); - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + if ((flags & SLOW_DEST) && !can_cache(src2, src2w, src1, src1w) && can_cache(src2, src2w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | MEM_USE_TMP2, TMP_REG2, src2, src2w, dst, dstw)); + } else { FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); } - else { - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); - } } else if (flags & SLOW_SRC1) FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); else if (flags & SLOW_SRC2) - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | ((src1_r == TMP_REG1) ? MEM_USE_TMP2 : 0), src2_tmp_reg, src2, src2w, dst, dstw)); FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); @@ -1878,40 +1857,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_MOV: case SLJIT_MOV_P: - return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, srcw); case SLJIT_MOV_U32: - return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw); case SLJIT_MOV_S32: /* Logical operators have no W variant, so sign extended input is necessary for them. */ case SLJIT_MOV32: - return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw); case SLJIT_MOV_U8: - return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); + return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); case SLJIT_MOV_S8: - return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw); + return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw); case SLJIT_MOV_U16: - return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw); + return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw); case SLJIT_MOV_S16: - return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw); + return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_CLZ: case SLJIT_CTZ: case SLJIT_REV: - return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, op, flags, dst, dstw, TMP_ZERO, 0, src, srcw); case SLJIT_REV_U16: case SLJIT_REV_S16: - return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_ZERO, 0, src, srcw); case SLJIT_REV_U32: case SLJIT_REV_S32: - return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_ZERO, 0, src, srcw); } SLJIT_UNREACHABLE(); @@ -1993,6 +1972,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w)); + return push_inst(compiler, ADD_D | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG2)); + } + + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 src1_reg, @@ -2392,7 +2389,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil switch (GET_OPCODE(op)) { case SLJIT_MOV_F64: if (src != dst_r) { - if (dst_r != TMP_FREG1) + if (!(dst & SLJIT_MEM)) FAIL_IF(push_inst(compiler, FINST(FMOV, op) | FRD(dst_r) | FRJ(src))); else dst_r = src; @@ -2451,11 +2448,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil } if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); - } - else { + } else { FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); } @@ -2485,7 +2481,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil break; } - if (dst_r == TMP_FREG2) + if (dst_r != dst) FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); return SLJIT_SUCCESS; } @@ -2700,6 +2696,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler struct sljit_jump *jump; sljit_s32 flags; sljit_ins inst; + sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); @@ -2717,8 +2714,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler } if (src2 & SLJIT_MEM) { - PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG2, src2, src2w, 0, 0)); - src2 = TMP_REG2; + PTR_FAIL_IF(emit_op_mem2(compiler, flags, src2_tmp_reg, src2, src2w, 0, 0)); + src2 = src2_tmp_reg; } if (src1 == SLJIT_IMM) { @@ -2732,8 +2729,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler if (src2 == SLJIT_IMM) { if (src2w != 0) { - PTR_FAIL_IF(load_immediate(compiler, TMP_REG2, src2w)); - src2 = TMP_REG2; + PTR_FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w)); + src2 = src2_tmp_reg; } else src2 = TMP_ZERO; @@ -2959,13 +2956,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp type ^= 0x1; } else { if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { - FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(dst_reg) | IMM_I12(0))); + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(dst_reg) | IMM_I12(0))); if ((src1 & REG_MASK) == dst_reg) - src1 = (src1 & ~REG_MASK) | TMP_REG2; + src1 = (src1 & ~REG_MASK) | TMP_REG1; if (OFFS_REG(src1) == dst_reg) - src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2); + src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1); } FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src2_reg) | IMM_I12(0))); @@ -3007,15 +3004,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *com if ((type & ~SLJIT_32) == SLJIT_EQUAL) invert = 1; FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(EQUAL_FLAG))); - } - else + } else { + if (get_jump_instruction(type & ~SLJIT_32) == (BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO))) + invert = 1; FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(OTHER_FLAG))); + } if (src1 & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, dst_freg, src1, src1w)); + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src1, src1w)); if (invert) - return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(dst_freg) | FRK(src2_freg) | FCA(F_OTHER_FLAG)); - return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(dst_freg) | FCA(F_OTHER_FLAG)); + return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(TMP_FREG2) | FRK(src2_freg) | FCA(F_OTHER_FLAG)); + return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(TMP_FREG2) | FCA(F_OTHER_FLAG)); } else { if (invert) return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src1) | FRK(src2_freg) | FCA(F_OTHER_FLAG)); diff --git a/src/sljit/sljitNativeMIPS_32.c b/src/sljit/sljitNativeMIPS_32.c index 9620b945f..91153e5f2 100644 --- a/src/sljit/sljitNativeMIPS_32.c +++ b/src/sljit/sljitNativeMIPS_32.c @@ -225,7 +225,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t sljit_ins f64_hi = TA(6), f64_lo = TA(7); #endif /* SLJIT_LITTLE_ENDIAN */ - SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12); + SLJIT_ASSERT(reg_map[TMP_REG2] == 4 && freg_map[TMP_FREG1] == 12); arg_types >>= SLJIT_ARG_SHIFT; @@ -370,7 +370,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile } else if (type & SLJIT_CALL_RETURN) PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); - SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25); if (ins == NOP && compiler->delay_slot != UNMOVABLE_INS) jump->flags |= IS_MOVABLE; @@ -441,7 +441,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi return sljit_emit_ijump(compiler, type, src, srcw); } - SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25); if (src == SLJIT_IMM) FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); diff --git a/src/sljit/sljitNativeMIPS_64.c b/src/sljit/sljitNativeMIPS_64.c index 52a0d3fb7..b9f03a7bd 100644 --- a/src/sljit/sljitNativeMIPS_64.c +++ b/src/sljit/sljitNativeMIPS_64.c @@ -225,7 +225,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t sljit_ins prev_ins = *ins_ptr; sljit_ins ins = NOP; - SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12); + SLJIT_ASSERT(reg_map[TMP_REG2] == 4 && freg_map[TMP_FREG1] == 12); arg_types >>= SLJIT_ARG_SHIFT; @@ -309,7 +309,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile if ((type & 0xff) != SLJIT_CALL_REG_ARG) PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins)); - SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25); if (ins == NOP && compiler->delay_slot != UNMOVABLE_INS) jump->flags |= IS_MOVABLE; @@ -366,7 +366,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi return sljit_emit_ijump(compiler, type, src, srcw); } - SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG1); if (src == SLJIT_IMM) FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); diff --git a/src/sljit/sljitNativeMIPS_common.c b/src/sljit/sljitNativeMIPS_common.c index 97ce33efd..88eb30b7f 100644 --- a/src/sljit/sljitNativeMIPS_common.c +++ b/src/sljit/sljitNativeMIPS_common.c @@ -83,7 +83,7 @@ typedef sljit_u32 sljit_ins; #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) /* For position independent code, t9 must contain the function address. */ -#define PIC_ADDR_REG TMP_REG2 +#define PIC_ADDR_REG TMP_REG1 /* Floating point status register. */ #define FCSR_REG 31 @@ -95,7 +95,7 @@ typedef sljit_u32 sljit_ins; #define OTHER_FLAG 1 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { - 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31, 3, 1 + 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 25, 4, 31, 3, 1 }; #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) @@ -916,9 +916,9 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size, sljit_ins *ins_ptr); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#define SELECT_OP(a, b) (b) +#define SELECT_OP(d, w) (w) #else -#define SELECT_OP(a, b) (!(op & SLJIT_32) ? a : b) +#define SELECT_OP(d, w) (!(op & SLJIT_32) ? (d) : (w)) #endif #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) @@ -985,9 +985,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi offset = local_size - SSIZE_OF(sw); } else { FAIL_IF(load_immediate(compiler, OTHER_FLAG, local_size)); - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | TA(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP))); - base = S(TMP_REG2); + base = S(TMP_REG1); offset = -SSIZE_OF(sw); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) local_size = 0; @@ -1196,8 +1196,8 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit if (tmp < frame_size) tmp = frame_size; - FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size - tmp)); - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP))); + FAIL_IF(load_immediate(compiler, DR(TMP_REG2), local_size - tmp)); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG2) | D(SLJIT_SP), DR(SLJIT_SP))); local_size = tmp; } @@ -1695,7 +1695,7 @@ static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2) { - sljit_s32 is_overflow, is_carry, carry_src_ar, is_handled; + sljit_s32 is_overflow, is_carry, carry_src_ar, is_handled, reg; sljit_ins op_imm, op_v; #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) sljit_ins ins, op_dimm, op_dimm32, op_dv; @@ -1947,8 +1947,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl is_handled = 1; if (flags & SRC2_IMM) { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); - src2 = TMP_REG2; + reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(reg) | IMM(src2), DR(reg))); + src2 = reg; flags &= ~SRC2_IMM; } @@ -2267,7 +2268,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 sljit_s32 dst_r = TMP_REG2; sljit_s32 src1_r; sljit_sw src2_r = 0; - sljit_s32 sugg_src2_r = TMP_REG2; + sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2; if (!(flags & ALT_KEEP_CACHE)) { compiler->cache_arg = 0; @@ -2283,7 +2284,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 dst_r = dst; flags |= REG_DEST; if (flags & MOVE_OP) - sugg_src2_r = dst_r; + src2_tmp_reg = dst_r; } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, DR(TMP_REG1), dst, dstw)) flags |= SLOW_DEST; @@ -2335,8 +2336,8 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 else if (src2 == SLJIT_IMM) { if (!(flags & SRC2_IMM)) { if (src2w) { - FAIL_IF(load_immediate(compiler, DR(sugg_src2_r), src2w)); - src2_r = sugg_src2_r; + FAIL_IF(load_immediate(compiler, DR(src2_tmp_reg), src2w)); + src2_r = src2_tmp_reg; } else { src2_r = 0; @@ -2350,16 +2351,16 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } } else { - if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w)) + if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(src2_tmp_reg), src2, src2w)) FAIL_IF(compiler->error); else flags |= SLOW_SRC2; - src2_r = sugg_src2_r; + src2_r = src2_tmp_reg; } if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { SLJIT_ASSERT(src2_r == TMP_REG2); - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + if ((flags & SLOW_DEST) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, src1, src1w)); FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw)); } @@ -2371,7 +2372,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 else if (flags & SLOW_SRC1) FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw)); else if (flags & SLOW_SRC2) - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w, dst, dstw)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(src2_tmp_reg), src2, src2w, dst, dstw)); FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); @@ -2643,12 +2644,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) #define SELECT_OP3(op, src2w, D, D32, W) (((op & SLJIT_32) ? (W) : ((src2w) < 32) ? (D) : (D32)) | (((sljit_ins)src2w & 0x1f) << 6)) -#define SELECT_OP2(op, D, W) ((op & SLJIT_32) ? (W) : (D)) #else /* !SLJIT_CONFIG_MIPS_64 */ #define SELECT_OP3(op, src2w, D, D32, W) ((W) | ((sljit_ins)(src2w) << 6)) -#define SELECT_OP2(op, D, W) (W) #endif /* SLJIT_CONFIG_MIPS_64 */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w)); + return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst_reg) | T(TMP_REG2) | D(dst_reg), DR(dst_reg)); + } + + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 src1_reg, @@ -2702,18 +2719,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG2), src3, src3w)); src3 = TMP_REG2; } else if (dst_reg == src3) { - FAIL_IF(push_inst(compiler, SELECT_OP2(op, DADDU, ADDU) | S(src3) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src3) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); src3 = TMP_REG2; } if (is_left) { - ins1 = SELECT_OP2(op, DSRL, SRL); - ins2 = SELECT_OP2(op, DSLLV, SLLV); - ins3 = SELECT_OP2(op, DSRLV, SRLV); + ins1 = SELECT_OP(DSRL, SRL); + ins2 = SELECT_OP(DSLLV, SLLV); + ins3 = SELECT_OP(DSRLV, SRLV); } else { - ins1 = SELECT_OP2(op, DSLL, SLL); - ins2 = SELECT_OP2(op, DSRLV, SRLV); - ins3 = SELECT_OP2(op, DSLLV, SLLV); + ins1 = SELECT_OP(DSLL, SLL); + ins2 = SELECT_OP(DSRLV, SRLV); + ins3 = SELECT_OP(DSLLV, SLLV); } FAIL_IF(push_inst(compiler, ins2 | S(src3) | T(src1_reg) | D(dst_reg), DR(dst_reg))); @@ -2723,14 +2740,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * FAIL_IF(push_inst(compiler, XORI | S(src3) | T(TMP_REG2) | ((sljit_ins)bit_length - 1), DR(TMP_REG2))); src2_reg = TMP_REG1; } else - FAIL_IF(push_inst(compiler, SELECT_OP2(op, DSUBU, SUBU) | SA(0) | T(src3) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(0) | T(src3) | D(TMP_REG2), DR(TMP_REG2))); FAIL_IF(push_inst(compiler, ins3 | S(TMP_REG2) | T(src2_reg) | D(TMP_REG1), DR(TMP_REG1))); return push_inst(compiler, OR | S(dst_reg) | T(TMP_REG1) | D(dst_reg), DR(dst_reg)); } #undef SELECT_OP3 -#undef SELECT_OP2 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) @@ -3087,7 +3103,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil switch (GET_OPCODE(op)) { case SLJIT_MOV_F64: if (src != dst_r) { - if (dst_r != TMP_FREG1) + if (!(dst & SLJIT_MEM)) FAIL_IF(push_inst(compiler, MOV_fmt(FMT(op)) | FS(src) | FD(dst_r), MOVABLE_INS)); else dst_r = src; @@ -3146,11 +3162,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil } if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, src1, src1w)); FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, dst, dstw)); - } - else { + } else { FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, src2, src2w)); FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, dst, dstw)); } @@ -3345,10 +3360,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile PTR_FAIL_IF(push_inst(compiler, inst, UNMOVABLE_INS)); if (type <= SLJIT_JUMP) - PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); else { jump->flags |= IS_JAL; - PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); } jump->addr = compiler->size; @@ -3376,8 +3391,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile #define RESOLVE_IMM2() \ if (src2 == SLJIT_IMM) { \ if (src2w) { \ - PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG2), src2w)); \ - src2 = TMP_REG2; \ + PTR_FAIL_IF(load_immediate(compiler, DR(src2_tmp_reg), src2w)); \ + src2 = src2_tmp_reg; \ } \ else \ src2 = 0; \ @@ -3390,6 +3405,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler struct sljit_jump *jump; sljit_s32 flags; sljit_ins inst; + sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); @@ -3410,8 +3426,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler } if (src2 & SLJIT_MEM) { - PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG2), src2, src2w, 0, 0)); - src2 = TMP_REG2; + PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(src2_tmp_reg), src2, src2w, 0, 0)); + src2 = src2_tmp_reg; } jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); @@ -3499,7 +3515,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(TMP_REG1) | TA(0) | BRANCH_LENGTH, UNMOVABLE_INS)); } - PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); jump->addr = compiler->size; PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); @@ -3537,11 +3553,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi if (compiler->delay_slot != UNMOVABLE_INS) jump->flags |= IS_MOVABLE; - src = TMP_REG2; + src = PIC_ADDR_REG; } else if (src & SLJIT_MEM) { ADJUST_LOCAL_OFFSET(src, srcw); - FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(TMP_REG2), src, srcw)); - src = TMP_REG2; + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); + src = PIC_ADDR_REG; } if (type <= SLJIT_JUMP) @@ -3739,8 +3755,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) if (src1 & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG2), src1, src1w)); - src1 = TMP_REG2; + FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG1), src1, src1w)); + src1 = TMP_REG1; } else if (src1 == SLJIT_IMM) { #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) if (type & SLJIT_32) @@ -3768,13 +3784,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp type ^= 0x1; } else { if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { - FAIL_IF(push_inst(compiler, ADDU_W | S(dst_reg) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, ADDU_W | S(dst_reg) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); if ((src1 & REG_MASK) == dst_reg) - src1 = (src1 & ~REG_MASK) | TMP_REG2; + src1 = (src1 & ~REG_MASK) | TMP_REG1; if (OFFS_REG(src1) == dst_reg) - src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2); + src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1); } FAIL_IF(push_inst(compiler, mov_ins | S(src2_reg) | TA(0) | D(dst_reg), DR(dst_reg))); @@ -3831,8 +3847,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *com #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) if (src1 & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, FR(TMP_FREG1), src1, src1w)); - src1 = TMP_FREG1; + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, FR(TMP_FREG2), src1, src1w)); + src1 = TMP_FREG2; } return push_inst(compiler, get_select_cc(type, 1) | FMT(type) | FS(src1) | FD(dst_freg), MOVABLE_INS); diff --git a/src/sljit/sljitNativePPC_common.c b/src/sljit/sljitNativePPC_common.c index b0a7fcee7..1f17d9042 100644 --- a/src/sljit/sljitNativePPC_common.c +++ b/src/sljit/sljitNativePPC_common.c @@ -98,7 +98,7 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) #define TMP_CALL_REG (SLJIT_NUMBER_OF_REGISTERS + 5) #else -#define TMP_CALL_REG TMP_REG2 +#define TMP_CALL_REG TMP_REG1 #endif #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) @@ -984,14 +984,16 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit sljit_s32 i, tmp, base, offset; sljit_s32 local_size = compiler->local_size; + SLJIT_ASSERT(TMP_CALL_REG != TMP_REG2); + base = SLJIT_SP; if (local_size > STACK_MAX_DISTANCE) { - base = TMP_REG1; + base = TMP_REG2; if (local_size > 2 * STACK_MAX_DISTANCE + LR_SAVE_OFFSET) { FAIL_IF(push_inst(compiler, STACK_LOAD | D(base) | A(SLJIT_SP) | IMM(0))); local_size = 0; } else { - FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG1) | A(SLJIT_SP) | IMM(local_size - STACK_MAX_DISTANCE))); + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(SLJIT_SP) | IMM(local_size - STACK_MAX_DISTANCE))); local_size = STACK_MAX_DISTANCE; } } @@ -1033,7 +1035,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit if (local_size > 0) return push_inst(compiler, ADDI | D(SLJIT_SP) | A(base) | IMM(local_size)); - SLJIT_ASSERT(base == TMP_REG1); + SLJIT_ASSERT(base == TMP_REG2); return push_inst(compiler, OR | S(base) | A(SLJIT_SP) | B(base)); } @@ -1300,7 +1302,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 sljit_s32 dst_r = TMP_REG2; sljit_s32 src1_r; sljit_s32 src2_r; - sljit_s32 sugg_src2_r = TMP_REG2; + sljit_s32 src2_tmp_reg = (!(input_flags & ALT_SIGN_EXT) && GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2; sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_SIGN_EXT | ALT_SET_FLAGS); /* Destination check. */ @@ -1311,24 +1313,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 flags |= REG_DEST; if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) - sugg_src2_r = dst_r; - } - - /* Source 1. */ - if (FAST_IS_REG(src1)) { - src1_r = src1; - flags |= REG1_SOURCE; - } - else if (src1 == SLJIT_IMM) { - src1_r = TMP_ZERO; - if (src1w != 0) { - FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); - src1_r = TMP_REG1; - } - } - else { - FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); - src1_r = TMP_REG1; + src2_tmp_reg = dst_r; } /* Source 2. */ @@ -1338,17 +1323,30 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P) dst_r = src2_r; - } - else if (src2 == SLJIT_IMM) { + } else if (src2 == SLJIT_IMM) { src2_r = TMP_ZERO; if (src2w != 0) { - FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); - src2_r = sugg_src2_r; + FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w)); + src2_r = src2_tmp_reg; } + } else { + FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, src2_tmp_reg, src2, src2w, TMP_REG1)); + src2_r = src2_tmp_reg; } - else { - FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, TMP_REG2)); - src2_r = sugg_src2_r; + + /* Source 1. */ + if (FAST_IS_REG(src1)) { + src1_r = src1; + flags |= REG1_SOURCE; + } else if (src1 == SLJIT_IMM) { + src1_r = TMP_ZERO; + if (src1w != 0) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_r = TMP_REG1; + } + } else { + FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); + src1_r = TMP_REG1; } FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); @@ -1804,7 +1802,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL) { - if (dst == TMP_REG2) { + if (dst == TMP_REG1) { if (TEST_UL_IMM(src2, src2w)) { compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); @@ -1819,7 +1817,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); } - if (dst == TMP_REG2 && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + if (dst == TMP_REG1 && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { if (TEST_SL_IMM(src2, src2w)) { compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); @@ -1958,13 +1956,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); SLJIT_SKIP_CHECKS(compiler); - return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); } #undef TEST_ADD_FORM1 #undef TEST_SUB_FORM2 #undef TEST_SUB_FORM3 +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w)); + return push_inst(compiler, ADD | D(dst_reg) | A(dst_reg) | B(TMP_REG2)); + } + + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 src1_reg, @@ -2275,7 +2291,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil /* Fall through. */ case SLJIT_MOV_F64: if (src != dst_r) { - if (dst_r != TMP_FREG1) + if (!(dst & SLJIT_MEM)) FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src))); else dst_r = src; @@ -2315,7 +2331,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil } if (src2 & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, TMP_REG2)); + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, TMP_REG1)); src2 = TMP_FREG2; } @@ -2498,7 +2514,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile type &= 0xff; if ((type | 0x1) == SLJIT_NOT_CARRY) - PTR_FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG1) | A(TMP_ZERO) | B(TMP_ZERO))); + PTR_FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG2) | A(TMP_ZERO) | B(TMP_ZERO))); /* In PPC, we don't need to touch the arguments. */ if (type < SLJIT_JUMP) @@ -2799,13 +2815,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp type ^= 0x1; } else { if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { - FAIL_IF(push_inst(compiler, OR | S(dst_reg) | A(TMP_REG2) | B(dst_reg))); + FAIL_IF(push_inst(compiler, OR | S(dst_reg) | A(TMP_REG1) | B(dst_reg))); if ((src1 & REG_MASK) == dst_reg) - src1 = (src1 & ~REG_MASK) | TMP_REG2; + src1 = (src1 & ~REG_MASK) | TMP_REG1; if (OFFS_REG(src1) == dst_reg) - src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2); + src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1); } FAIL_IF(push_inst(compiler, OR | S(src2_reg) | A(dst_reg) | B(src2_reg))); diff --git a/src/sljit/sljitNativeRISCV_common.c b/src/sljit/sljitNativeRISCV_common.c index 717d5a7f2..d86100a80 100644 --- a/src/sljit/sljitNativeRISCV_common.c +++ b/src/sljit/sljitNativeRISCV_common.c @@ -703,6 +703,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) #define SLOW_SRC1 0x08000 #define SLOW_SRC2 0x10000 #define SLOW_DEST 0x20000 +#define MEM_USE_TMP2 0x40000 #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) #define STACK_STORE SW @@ -987,7 +988,6 @@ static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, /* Can perform an operation using at most 1 instruction. */ static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { - SLJIT_ASSERT(arg & SLJIT_MEM); if (!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) { @@ -1032,7 +1032,7 @@ static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, slj static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { sljit_s32 base = arg & REG_MASK; - sljit_s32 tmp_r = TMP_REG1; + sljit_s32 tmp_r = (flags & MEM_USE_TMP2) ? TMP_REG2 : TMP_REG1; sljit_sw offset, argw_hi; SLJIT_ASSERT(arg & SLJIT_MEM); @@ -1041,11 +1041,6 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl next_argw = 0; } - /* Since tmp can be the same as base or offset registers, - * these might be unavailable after modifying tmp. */ - if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA) && reg == TMP_REG2) - tmp_r = reg; - if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { argw &= 0x3; @@ -1291,7 +1286,7 @@ static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2) { - sljit_s32 is_overflow, is_carry, carry_src_r, is_handled; + sljit_s32 is_overflow, is_carry, carry_src_r, is_handled, reg; sljit_ins op_imm, op_reg; #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5; @@ -1301,20 +1296,20 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl switch (GET_OPCODE(op)) { case SLJIT_MOV: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if (dst != src2) return push_inst(compiler, ADDI | RD(dst) | RS1(src2) | IMM_I(0)); return SLJIT_SUCCESS; case SLJIT_MOV_U8: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) return push_inst(compiler, ANDI | RD(dst) | RS1(src2) | IMM_I(0xff)); SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_S8: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(24))); return push_inst(compiler, SRAI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(24)); @@ -1323,7 +1318,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; case SLJIT_MOV_U16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(16))); return push_inst(compiler, SRLI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(16)); @@ -1332,7 +1327,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; case SLJIT_MOV_S16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(16))); return push_inst(compiler, SRAI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(16)); @@ -1342,7 +1337,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) case SLJIT_MOV_U32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(src2) | IMM_I(32))); return push_inst(compiler, SRLI | RD(dst) | RS1(dst) | IMM_I(32)); @@ -1351,7 +1346,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; case SLJIT_MOV_S32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) return push_inst(compiler, ADDI | 0x8 | RD(dst) | RS1(src2) | IMM_I(0)); SLJIT_ASSERT(dst == src2); @@ -1360,7 +1355,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_CLZ: case SLJIT_CTZ: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); return emit_clz_ctz(compiler, op, dst, src2); case SLJIT_REV: @@ -1368,17 +1363,17 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) case SLJIT_REV_U32: #endif /* SLJIT_CONFIG_RISCV_32 */ - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); return emit_rev(compiler, op, dst, src2); case SLJIT_REV_U16: case SLJIT_REV_S16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); return emit_rev16(compiler, op, dst, src2); #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) case SLJIT_REV_U32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && dst != TMP_REG1); + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1); FAIL_IF(emit_rev(compiler, op, dst, src2)); if (dst == TMP_REG2) return SLJIT_SUCCESS; @@ -1506,8 +1501,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl is_handled = 1; if (flags & SRC2_IMM) { - FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(TMP_ZERO) | IMM_I(src2))); - src2 = TMP_REG2; + reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(push_inst(compiler, ADDI | RD(reg) | RS1(TMP_ZERO) | IMM_I(src2))); + src2 = reg; flags &= ~SRC2_IMM; } @@ -1735,7 +1731,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 sljit_s32 dst_r = TMP_REG2; sljit_s32 src1_r; sljit_sw src2_r = 0; - sljit_s32 sugg_src2_r = TMP_REG2; + sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2; if (!(flags & ALT_KEEP_CACHE)) { compiler->cache_arg = 0; @@ -1751,7 +1747,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 dst_r = dst; flags |= REG_DEST; if (flags & MOVE_OP) - sugg_src2_r = dst_r; + src2_tmp_reg = dst_r; } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) flags |= SLOW_DEST; @@ -1777,16 +1773,14 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 if (FAST_IS_REG(src1)) { src1_r = src1; flags |= REG1_SOURCE; - } - else if (src1 == SLJIT_IMM) { + } else if (src1 == SLJIT_IMM) { if (src1w) { FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3)); src1_r = TMP_REG1; } else src1_r = TMP_ZERO; - } - else { + } else { if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w)) FAIL_IF(compiler->error); else @@ -1800,14 +1794,12 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 flags |= REG2_SOURCE; if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) dst_r = (sljit_s32)src2_r; - } - else if (src2 == SLJIT_IMM) { + } else if (src2 == SLJIT_IMM) { if (!(flags & SRC2_IMM)) { if (src2w) { - FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w, TMP_REG3)); - src2_r = sugg_src2_r; - } - else { + FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w, TMP_REG3)); + src2_r = src2_tmp_reg; + } else { src2_r = TMP_ZERO; if (flags & MOVE_OP) { if (dst & SLJIT_MEM) @@ -1817,30 +1809,28 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } } } - } - else { - if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w)) + } else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, src2_tmp_reg, src2, src2w)) FAIL_IF(compiler->error); else flags |= SLOW_SRC2; - src2_r = sugg_src2_r; + src2_r = src2_tmp_reg; } if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { SLJIT_ASSERT(src2_r == TMP_REG2); - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + if ((flags & SLOW_DEST) && !can_cache(src2, src2w, src1, src1w) && can_cache(src2, src2w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | MEM_USE_TMP2, TMP_REG2, src2, src2w, dst, dstw)); + } else { FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); } - else { - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); - } } else if (flags & SLOW_SRC1) FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); else if (flags & SLOW_SRC2) - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | ((src1_r == TMP_REG1) ? MEM_USE_TMP2 : 0), src2_tmp_reg, src2, src2w, dst, dstw)); FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); @@ -1923,42 +1913,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_MOV32: #endif case SLJIT_MOV_P: - return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, srcw); #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) case SLJIT_MOV_U32: - return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw); case SLJIT_MOV_S32: /* Logical operators have no W variant, so sign extended input is necessary for them. */ case SLJIT_MOV32: - return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw); #endif case SLJIT_MOV_U8: - return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); + return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); case SLJIT_MOV_S8: - return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw); + return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw); case SLJIT_MOV_U16: - return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw); + return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw); case SLJIT_MOV_S16: - return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw); + return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_CLZ: case SLJIT_CTZ: case SLJIT_REV: - return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, op, flags, dst, dstw, TMP_ZERO, 0, src, srcw); case SLJIT_REV_U16: case SLJIT_REV_S16: - return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_ZERO, 0, src, srcw); case SLJIT_REV_U32: case SLJIT_REV_S32: - return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_ZERO, 0, src, srcw); } SLJIT_UNREACHABLE(); @@ -2045,6 +2035,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5; +#endif /* SLJIT_CONFIG_RISCV_64 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + SLJIT_ASSERT(WORD == 0 || WORD == 0x8); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w)); + return push_inst(compiler, ADD | WORD | RD(dst_reg) | RS1(dst_reg) | RS2(TMP_REG2)); + } + + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 src1_reg, @@ -2396,7 +2410,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil switch (GET_OPCODE(op)) { case SLJIT_MOV_F64: if (src != dst_r) { - if (dst_r != TMP_FREG1) + if (!(dst & SLJIT_MEM)) FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src))); else dst_r = src; @@ -2455,11 +2469,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil } if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); - } - else { + } else { FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); } @@ -2495,7 +2508,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil return push_inst(compiler, FSGNJ_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2)); } - if (dst_r == TMP_FREG2) + if (dst_r != dst) FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); return SLJIT_SUCCESS; @@ -2653,6 +2666,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler struct sljit_jump *jump; sljit_s32 flags; sljit_ins inst; + sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); @@ -2673,8 +2687,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler } if (src2 & SLJIT_MEM) { - PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG2, src2, src2w, 0, 0)); - src2 = TMP_REG2; + PTR_FAIL_IF(emit_op_mem2(compiler, flags, src2_tmp_reg, src2, src2w, 0, 0)); + src2 = src2_tmp_reg; } if (src1 == SLJIT_IMM) { @@ -2688,8 +2702,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler if (src2 == SLJIT_IMM) { if (src2w != 0) { - PTR_FAIL_IF(load_immediate(compiler, TMP_REG2, src2w, TMP_REG3)); - src2 = TMP_REG2; + PTR_FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w, TMP_REG3)); + src2 = src2_tmp_reg; } else src2 = TMP_ZERO; @@ -2918,13 +2932,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp type ^= 0x1; } else { if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { - FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(dst_reg) | IMM_I(0))); + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(dst_reg) | IMM_I(0))); if ((src1 & REG_MASK) == dst_reg) - src1 = (src1 & ~REG_MASK) | TMP_REG2; + src1 = (src1 & ~REG_MASK) | TMP_REG1; if (OFFS_REG(src1) == dst_reg) - src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2); + src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1); } FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst_reg) | RS1(src2_reg) | IMM_I(0))); @@ -2948,7 +2962,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp } else FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst_reg) | RS1(src1) | IMM_I(0))); - *ptr = get_jump_instruction(type & ~SLJIT_32) | (sljit_ins)((compiler->size - size) << 9); + size = compiler->size - size; + *ptr = get_jump_instruction(type & ~SLJIT_32) | (sljit_ins)((size & 0x7) << 9) | (sljit_ins)((size >> 3) << 25); return SLJIT_SUCCESS; } @@ -2987,7 +3002,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *com else FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(type) | FRD(dst_freg) | FRS1(src1) | FRS2(src1))); - *ptr = get_jump_instruction(type & ~SLJIT_32) | (sljit_ins)((compiler->size - size) << 9); + size = compiler->size - size; + *ptr = get_jump_instruction(type & ~SLJIT_32) | (sljit_ins)((size & 0x7) << 9) | (sljit_ins)((size >> 3) << 25); return SLJIT_SUCCESS; } diff --git a/src/sljit/sljitNativeS390X.c b/src/sljit/sljitNativeS390X.c index 647f54200..3d709678c 100644 --- a/src/sljit/sljitNativeS390X.c +++ b/src/sljit/sljitNativeS390X.c @@ -1573,6 +1573,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil buf = buf->next; } while (buf); + if (next_label_size == half_count) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); SLJIT_ASSERT(code + (ins_size >> 1) == code_ptr); SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr); @@ -2456,7 +2464,7 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, const struct ins_forms *forms; sljit_ins ins; - if (dst == (sljit_s32)tmp0 && flag_type <= SLJIT_SIG_LESS_EQUAL) { + if (dst == TMP_REG2 && flag_type <= SLJIT_SIG_LESS_EQUAL) { int compare_signed = flag_type >= SLJIT_SIG_LESS; compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE; @@ -2556,7 +2564,7 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, - the first operand is less if the sign bit of the result is not set The -result operation sets the corrent sign, because the result cannot be zero. The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */ - FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2))); + FAIL_IF(push_inst(compiler, brc(0xe, (op & SLJIT_32) ? (2 + 1) : (2 + 2)))); FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r))); } else if (op & SLJIT_SET_Z) @@ -2718,7 +2726,7 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o sljit_s32 type = GET_OPCODE(op); const struct ins_forms *forms; - if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) { + if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == TMP_REG2))) { sljit_s32 count16 = 0; sljit_uw imm = (sljit_uw)src2w; @@ -2734,13 +2742,13 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o if ((imm & 0xffff000000000000ull) != 0) count16++; - if (type == SLJIT_AND && dst == (sljit_s32)tmp0 && count16 == 1) { - sljit_gpr src_r = tmp0; + if (type == SLJIT_AND && dst == TMP_REG2 && count16 == 1) { + sljit_gpr src_r = tmp1; if (FAST_IS_REG(src1)) src_r = gpr(src1 & REG_MASK); else - FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + FAIL_IF(emit_move(compiler, tmp1, src1, src1w)); if ((imm & 0x000000000000ffffull) != 0 || imm == 0) return push_inst(compiler, 0xa7010000 /* tmll */ | R20A(src_r) | imm); @@ -2961,11 +2969,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { + sljit_s32 dst_reg = (GET_OPCODE(op) == SLJIT_SUB || GET_OPCODE(op) == SLJIT_AND) ? TMP_REG2 : TMP_REG1; + CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); SLJIT_SKIP_CHECKS(compiler); - return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w); + return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), 0 /* tmp0 */, 0, src1, src1w, src2, src2w)); + return push_inst(compiler, ((op & SLJIT_32) ? 0x1a00 /* ar */ : 0xb9080000 /* agr */) | R4A(gpr(dst_reg)) | R0A(tmp0)); + } + + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, @@ -3374,12 +3402,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src))); } - if (!(dst & SLJIT_MEM)) - return SLJIT_SUCCESS; - - SLJIT_ASSERT(dst_r == TMP_FREG1); + if (dst & SLJIT_MEM) + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); - return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; } #define FLOAT_MOV(op, dst_r, src_r) \ @@ -3450,7 +3476,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil if (dst & SLJIT_MEM) return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); - SLJIT_ASSERT(dst_r != TMP_FREG1); return SLJIT_SUCCESS; } @@ -3796,8 +3821,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | (sljit_ins)(src1w & 0xffff) << 16); } - FAIL_IF(push_load_imm_inst(compiler, tmp0, src1w)); - src_r = tmp0; + FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w)); + src_r = tmp1; } else src_r = gpr(src1); diff --git a/src/sljit/sljitNativeX86_32.c b/src/sljit/sljitNativeX86_32.c index d7399f821..59ea04a5c 100644 --- a/src/sljit/sljitNativeX86_32.c +++ b/src/sljit/sljitNativeX86_32.c @@ -1246,6 +1246,68 @@ static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler, /* Other operations */ /* --------------------------------------------------------------------- */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ + sljit_s32 dst = dst_reg; + sljit_sw dstw = 0; + sljit_sw src2w = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2_reg, src2w, (void)0); + + type &= ~SLJIT_32; + + if (dst & SLJIT_MEM) { + if (src1 == SLJIT_IMM || (!(src1 & SLJIT_MEM) && (src2_reg & SLJIT_MEM))) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + src1 = src2_reg; + src1w = src2w; + type ^= 0x1; + } else + EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w); + + dst_reg = TMP_REG1; + } else { + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + src1w = src2w; + type ^= 0x1; + } else if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + EMIT_MOV(compiler, dst_reg, 0, src1, src1w); + src1 = src2_reg; + src1w = src2w; + type ^= 0x1; + } else + EMIT_MOV(compiler, dst_reg, 0, src2_reg, src2w); + } + } + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && (src1 != SLJIT_IMM || dst_reg != TMP_REG1)) { + if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + src1 = TMP_REG1; + src1w = 0; + } + + FAIL_IF(emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w)); + } else + FAIL_IF(emit_cmov_generic(compiler, type, dst_reg, src1, src1w)); + + if (dst & SLJIT_MEM) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw) @@ -1446,10 +1508,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *comp if (u.imm == 0) { inst[2] = PXOR_x_xm; - inst[3] = U8(freg | (freg << 3) | MOD_REG); + inst[3] = U8(freg_map[freg] | (freg_map[freg] << 3) | MOD_REG); } else { inst[2] = MOVD_x_rm; - inst[3] = U8(reg_map[TMP_REG1] | (freg << 3) | MOD_REG); + inst[3] = U8(reg_map[TMP_REG1] | (freg_map[freg] << 3) | MOD_REG); } return SLJIT_SUCCESS; @@ -1459,7 +1521,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *comp sljit_s32 freg, sljit_f64 value) { sljit_u8 *inst; - sljit_s32 tmp_freg = freg; union { sljit_s32 imm[2]; sljit_f64 value; @@ -1475,8 +1536,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *comp return emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]); - } else + } else { + SLJIT_ASSERT(cpu_feature_list != 0); + + if (!(cpu_feature_list & CPU_FEATURE_SSE41) && u.imm[1] != 0 && u.imm[0] != u.imm[1]) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, u.imm[0]); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_IMM, u.imm[1]); + + return emit_groupf(compiler, MOVLPD_x_m | EX86_SSE2, freg, SLJIT_MEM1(SLJIT_SP), 0); + } + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[0]); + } FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, TMP_REG1, 0)); @@ -1490,23 +1561,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *comp inst[0] = GROUP_0F; inst[1] = SHUFPS_x_xm; - inst[2] = U8(MOD_REG | (freg << 3) | freg); + inst[2] = U8(MOD_REG | (freg_map[freg] << 3) | freg_map[freg]); inst[3] = 0x51; return SLJIT_SUCCESS; } if (u.imm[0] != u.imm[1]) { - SLJIT_ASSERT(u.imm[1] != 0 && cpu_feature_list != 0); - + SLJIT_ASSERT(cpu_feature_list & CPU_FEATURE_SSE41); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]); - if (cpu_feature_list & CPU_FEATURE_SSE41) { - FAIL_IF(emit_groupf_ext(compiler, PINSRD_x_rm_i8 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, TMP_REG1, 0)); - return emit_byte(compiler, 1); - } - - FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, TMP_REG1, 0)); - tmp_freg = TMP_FREG; + FAIL_IF(emit_groupf_ext(compiler, PINSRD_x_rm_i8 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, TMP_REG1, 0)); + return emit_byte(compiler, 1); } inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); @@ -1515,7 +1580,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *comp inst[0] = GROUP_0F; inst[1] = UNPCKLPS_x_xm; - inst[2] = U8(MOD_REG | (freg << 3) | tmp_freg); + inst[2] = U8(MOD_REG | (freg_map[freg] << 3) | freg_map[freg]); return SLJIT_SUCCESS; } @@ -1578,7 +1643,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compi inst[0] = GROUP_66; inst[1] = GROUP_0F; inst[2] = PSHUFD_x_xm; - inst[3] = U8(MOD_REG | (TMP_FREG << 3) | freg); + inst[3] = U8(MOD_REG | (TMP_FREG << 3) | freg_map[freg]); inst[4] = 1; } else if (reg != 0) FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw)); @@ -1594,7 +1659,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compi inst[0] = GROUP_0F; inst[1] = UNPCKLPS_x_xm; - inst[2] = U8(MOD_REG | (freg << 3) | (reg == 0 ? freg : TMP_FREG)); + inst[2] = U8(MOD_REG | (freg_map[freg] << 3) | freg_map[reg == 0 ? freg : TMP_FREG]); } else FAIL_IF(emit_groupf(compiler, MOVD_rm_x | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw)); diff --git a/src/sljit/sljitNativeX86_64.c b/src/sljit/sljitNativeX86_64.c index b537a1a3f..1ab79293c 100644 --- a/src/sljit/sljitNativeX86_64.c +++ b/src/sljit/sljitNativeX86_64.c @@ -365,7 +365,7 @@ static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_pt int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && (jump->flags & JUMP_ADDR) && (jump->u.target <= 0xffffffff); /* The relative jump below specialized for this case. */ - SLJIT_ASSERT(reg_map[TMP_REG2] >= 8); + SLJIT_ASSERT(reg_map[TMP_REG2] >= 8 && TMP_REG2 != SLJIT_TMP_DEST_REG); if (type < SLJIT_JUMP) { /* Invert type. */ @@ -1007,6 +1007,46 @@ static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler, /* Other operations */ /* --------------------------------------------------------------------- */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + compiler->mode32 = type & SLJIT_32; + type &= ~SLJIT_32; + + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + EMIT_MOV(compiler, dst_reg, 0, src1, src1w); + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else + EMIT_MOV(compiler, dst_reg, 0, src2_reg, 0); + } + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) { + if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) { + EMIT_MOV(compiler, TMP_REG2, 0, src1, src1w); + src1 = TMP_REG2; + src1w = 0; + } + + return emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w); + } + + return emit_cmov_generic(compiler, type, dst_reg, src1, src1w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw) diff --git a/src/sljit/sljitNativeX86_common.c b/src/sljit/sljitNativeX86_common.c index b5e08d4d1..de519e6b9 100644 --- a/src/sljit/sljitNativeX86_common.c +++ b/src/sljit/sljitNativeX86_common.c @@ -391,6 +391,7 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = { #define CPU_FEATURE_CMOV 0x020 #define CPU_FEATURE_AVX 0x040 #define CPU_FEATURE_AVX2 0x080 +#define CPU_FEATURE_OSXSAVE 0x100 static sljit_u32 cpu_feature_list = 0; @@ -491,6 +492,42 @@ static void execute_cpu_id(sljit_u32 info[4]) #endif /* _MSC_VER && _MSC_VER >= 1400 */ } +static sljit_u32 execute_get_xcr0_low(void) +{ + sljit_u32 xcr0; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + + xcr0 = (sljit_u32)_xgetbv(0); + +#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__TINYC__) + + /* AT&T syntax. */ + __asm__ ( + "xorl %%ecx, %%ecx\n" + "xgetbv\n" + : "=a" (xcr0) + : +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + : "ecx", "edx" +#else /* !SLJIT_CONFIG_X86_32 */ + : "rcx", "rdx" +#endif /* SLJIT_CONFIG_X86_32 */ + ); + +#else /* _MSC_VER < 1400 */ + + /* Intel syntax. */ + __asm { + mov ecx, 0 + xgetbv + mov xcr0, eax + } + +#endif /* _MSC_VER && _MSC_VER >= 1400 */ + return xcr0; +} + static void get_cpu_features(void) { sljit_u32 feature_list = CPU_FEATURE_DETECTED; @@ -518,6 +555,8 @@ static void get_cpu_features(void) if (info[2] & 0x80000) feature_list |= CPU_FEATURE_SSE41; + if (info[2] & 0x8000000) + feature_list |= CPU_FEATURE_OSXSAVE; if (info[2] & 0x10000000) feature_list |= CPU_FEATURE_AVX; #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) @@ -535,6 +574,9 @@ static void get_cpu_features(void) if (info[2] & 0x20) feature_list |= CPU_FEATURE_LZCNT; + if ((feature_list & CPU_FEATURE_OSXSAVE) && (execute_get_xcr0_low() & 0x4) == 0) + feature_list &= ~(sljit_u32)(CPU_FEATURE_AVX | CPU_FEATURE_AVX2); + cpu_feature_list = feature_list; } @@ -1031,8 +1073,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) BINARY_IMM32(op_imm, immw, arg, argw); \ } \ else { \ - FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \ - inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \ + FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, immw)); \ + inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \ FAIL_IF(!inst); \ *inst = (op_mr); \ } \ @@ -2358,10 +2400,9 @@ static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); FAIL_IF(!inst); *inst = GROUP_F7; - } - else { - FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w)); - inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w); + } else { + FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, src2w)); + inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, 0, src1, src1w); FAIL_IF(!inst); *inst = TEST_rm_r; } @@ -2598,8 +2639,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile compiler->mode32 = op & SLJIT_32; #endif - SLJIT_ASSERT(dst != TMP_REG1 || HAS_FLAGS(op)); - switch (GET_OPCODE(op)) { case SLJIT_ADD: if (!HAS_FLAGS(op)) { @@ -2693,12 +2732,44 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil compiler->mode32 = op & SLJIT_32; #endif - if (opcode == SLJIT_SUB) { + if (opcode == SLJIT_SUB) return emit_cmp_binary(compiler, src1, src1w, src2, src2w); - } + return emit_test_binary(compiler, src1, src1w, src2, src2w); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + sljit_sw dstw = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + CHECK_EXTRA_REGS(dst_reg, dstw, (void)0); + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2, src2w, (void)0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_32; +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + FAIL_IF(emit_mul(compiler, TMP_REG1, 0, src1, src1w, src2, src2w)); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst_reg, dstw); + FAIL_IF(!inst); + *inst = ADD_rm_r; + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst_reg, sljit_s32 src1_reg, @@ -3231,15 +3302,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil /* Swap arguments. */ src2 = src1; src2w = src1w; - } - else if (dst != src2) + } else if (dst != src2) FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w)); else { dst_r = TMP_FREG; FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); } - } - else { + } else { dst_r = TMP_FREG; FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); } @@ -3262,7 +3331,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil break; } - if (dst_r == TMP_FREG) + if (dst_r != dst) return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } @@ -3512,82 +3581,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co #endif /* SLJIT_CONFIG_X86_64 */ } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst_reg, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2_reg) -{ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_s32 dst = dst_reg; - sljit_sw dstw = 0; -#endif /* SLJIT_CONFIG_X86_32 */ - sljit_sw src2w = 0; - - CHECK_ERROR(); - CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); - - ADJUST_LOCAL_OFFSET(src1, src1w); - - CHECK_EXTRA_REGS(dst, dstw, (void)0); - CHECK_EXTRA_REGS(src1, src1w, (void)0); - CHECK_EXTRA_REGS(src2_reg, src2w, (void)0); - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = type & SLJIT_32; -#endif /* SLJIT_CONFIG_X86_64 */ - type &= ~SLJIT_32; - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (dst & SLJIT_MEM) { - if (src1 == SLJIT_IMM || (!(src1 & SLJIT_MEM) && (src2_reg & SLJIT_MEM))) { - EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); - src1 = src2_reg; - src1w = src2w; - type ^= 0x1; - } else - EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w); - - dst_reg = TMP_REG1; - } else { -#endif /* SLJIT_CONFIG_X86_32 */ - if (dst_reg != src2_reg) { - if (dst_reg == src1) { - src1 = src2_reg; - src1w = src2w; - type ^= 0x1; - } else { - if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { - EMIT_MOV(compiler, dst_reg, 0, src1, src1w); - src1 = src2_reg; - src1w = src2w; - type ^= 0x1; - } else - EMIT_MOV(compiler, dst_reg, 0, src2_reg, src2w); - } - } - - if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) { - SLJIT_ASSERT(dst_reg != TMP_REG1); - EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); - src1 = TMP_REG1; - src1w = 0; - } -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - } -#endif /* SLJIT_CONFIG_X86_32 */ - - if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) - FAIL_IF(emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w)); - else - FAIL_IF(emit_cmov_generic(compiler, type, dst_reg, src1, src1w)); - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (dst_reg == TMP_REG1) - return emit_mov(compiler, dst, dstw, TMP_REG1, 0); -#endif /* SLJIT_CONFIG_X86_32 */ - return SLJIT_SUCCESS; -} - SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 dst_freg, sljit_s32 src1, sljit_sw src1w, diff --git a/src/sljit/sljitSerialize.c b/src/sljit/sljitSerialize.c index 790ddb2d0..6ef161fd4 100644 --- a/src/sljit/sljitSerialize.c +++ b/src/sljit/sljitSerialize.c @@ -115,7 +115,7 @@ struct sljit_serialized_const { SLJIT_API_FUNC_ATTRIBUTE sljit_uw* sljit_serialize_compiler(struct sljit_compiler *compiler, sljit_s32 options, sljit_uw *size) { - sljit_uw total_size = sizeof(struct sljit_serialized_compiler); + sljit_uw serialized_size = sizeof(struct sljit_serialized_compiler); struct sljit_memory_fragment *buf; struct sljit_label *label; struct sljit_jump *jump; @@ -141,39 +141,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_uw* sljit_serialize_compiler(struct sljit_compile #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ || (defined SLJIT_DEBUG && SLJIT_DEBUG) if (!(options & SLJIT_SERIALIZE_IGNORE_DEBUG)) - total_size += sizeof(struct sljit_serialized_debug_info); + serialized_size += sizeof(struct sljit_serialized_debug_info); #endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) - total_size += SLJIT_SERIALIZE_ALIGN(compiler->cpool_fill * (sizeof(sljit_uw) + 1)); + serialized_size += SLJIT_SERIALIZE_ALIGN(compiler->cpool_fill * (sizeof(sljit_uw) + 1)); #endif /* SLJIT_CONFIG_ARM_V6 */ /* Compute the size of the data. */ buf = compiler->buf; while (buf != NULL) { - total_size += sizeof(sljit_uw) + SLJIT_SERIALIZE_ALIGN(buf->used_size); + serialized_size += sizeof(sljit_uw) + SLJIT_SERIALIZE_ALIGN(buf->used_size); buf = buf->next; } - total_size += compiler->label_count * sizeof(struct sljit_serialized_label); + serialized_size += compiler->label_count * sizeof(struct sljit_serialized_label); jump = compiler->jumps; while (jump != NULL) { - total_size += sizeof(struct sljit_serialized_jump); + serialized_size += sizeof(struct sljit_serialized_jump); jump = jump->next; } const_ = compiler->consts; while (const_ != NULL) { - total_size += sizeof(struct sljit_serialized_const); + serialized_size += sizeof(struct sljit_serialized_const); const_ = const_->next; } - result = (sljit_u8*)SLJIT_MALLOC(total_size, compiler->allocator_data); + result = (sljit_u8*)SLJIT_MALLOC(serialized_size, compiler->allocator_data); PTR_FAIL_IF_NULL(result); if (size != NULL) - *size = total_size; + *size = serialized_size; ptr = result; serialized_compiler = (struct sljit_serialized_compiler*)ptr; @@ -281,7 +281,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_uw* sljit_serialize_compiler(struct sljit_compile } #endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ - SLJIT_ASSERT((sljit_uw)(ptr - result) == total_size); + SLJIT_ASSERT((sljit_uw)(ptr - result) == serialized_size); return (sljit_uw*)result; }