ir/ir_x86.dasc

8318 lines
225 KiB
Plaintext

#include "ir.h"
#include "ir_x86.h"
#include "ir_private.h"
#ifdef _WIN32
# define IR_SET_ALIGNED(alignment, decl) __declspec(align(alignment)) decl
#elif defined(HAVE_ATTRIBUTE_ALIGNED)
# define IR_SET_ALIGNED(alignment, decl) decl __attribute__ ((__aligned__ (alignment)))
#else
# define IR_SET_ALIGNED(alignment, decl) decl
#endif
#define DASM_M_GROW(ctx, t, p, sz, need) \
do { \
size_t _sz = (sz), _need = (need); \
if (_sz < _need) { \
if (_sz < 16) _sz = 16; \
while (_sz < _need) _sz += _sz; \
(p) = (t *)ir_mem_realloc((p), _sz); \
(sz) = _sz; \
} \
} while(0)
#define DASM_M_FREE(ctx, p, sz) ir_mem_free(p)
#if IR_DEBUG
# define DASM_CHECKS
#endif
#include "dynasm/dasm_proto.h"
#include "dynasm/dasm_x86.h"
#if defined(__GNUC__)
# pragma GCC diagnostic ignored "-Warray-bounds"
#endif
|.if X64
|.arch x64
|.else
|.arch x86
|.endif
|.actionlist dasm_actions
|.globals ir_lb
|.section code, cold_code, rodata, jmp_table
#define IR_IS_SIGNED_32BIT(val) ((((intptr_t)(val)) <= 0x7fffffff) && (((intptr_t)(val)) >= (-2147483647 - 1)))
#define IR_IS_UNSIGNED_32BIT(val) (((uintptr_t)(val)) <= 0xffffffff)
#define IR_IS_32BIT(type, val) (IR_IS_TYPE_SIGNED(type) ? IR_IS_SIGNED_32BIT((val).i64) : IR_IS_UNSIGNED_32BIT((val).u64))
#define IR_MAY_USE_32BIT_ADDR(a) \
(ctx->code_buffer && \
IR_IS_SIGNED_32BIT((char*)addr - (char*)ctx->code_buffer) && \
IR_IS_SIGNED_32BIT((char*)addr - ((char*)ctx->code_buffer + ctx->code_buffer_size)))
#define IR_SPILL_POS_TO_OFFSET(offset) \
((ctx->flags & IR_USE_FRAME_POINTER) ? \
((offset) - (data->ra_data.stack_frame_size - data->stack_frame_alignment)) : \
((offset) + data->call_stack_size))
|.macro ASM_REG_OP, op, type, reg
|| switch (ir_type_size[type]) {
|| case 1:
| op Rb(reg)
|| break;
|| case 2:
| op Rw(reg)
|| break;
|| case 4:
| op Rd(reg)
|| break;
|.if X64
|| case 8:
| op Rq(reg)
|| break;
|.endif
|| default:
|| IR_ASSERT(0);
|| }
|.endmacro
|.macro ASM_MEM_OP, op, type, mem
|| switch (ir_type_size[type]) {
|| case 1:
| op byte mem
|| break;
|| case 2:
| op word mem
|| break;
|| case 4:
| op dword mem
|| break;
|.if X64
|| case 8:
| op qword mem
|| break;
|.endif
|| default:
|| IR_ASSERT(0);
|| }
|.endmacro
|.macro ASM_REG_REG_OP, op, type, dst, src
|| switch (ir_type_size[type]) {
|| case 1:
| op Rb(dst), Rb(src)
|| break;
|| case 2:
| op Rw(dst), Rw(src)
|| break;
|| case 4:
| op Rd(dst), Rd(src)
|| break;
|.if X64
|| case 8:
| op Rq(dst), Rq(src)
|| break;
|.endif
|| default:
|| IR_ASSERT(0);
|| }
|.endmacro
|.macro ASM_REG_REG_OP2, op, type, dst, src
|| switch (ir_type_size[type]) {
|| case 1:
|| case 2:
| op Rw(dst), Rw(src)
|| break;
|| case 4:
| op Rd(dst), Rd(src)
|| break;
|.if X64
|| case 8:
| op Rq(dst), Rq(src)
|| break;
|.endif
|| default:
|| IR_ASSERT(0);
|| }
|.endmacro
|.macro ASM_REG_IMM_OP, op, type, dst, src
|| switch (ir_type_size[type]) {
|| case 1:
| op Rb(dst), src
|| break;
|| case 2:
| op Rw(dst), src
|| break;
|| case 4:
| op Rd(dst), src
|| break;
|.if X64
|| case 8:
| op Rq(dst), src
|| break;
|.endif
|| default:
|| IR_ASSERT(0);
|| }
|.endmacro
|.macro ASM_MEM_REG_OP, op, type, dst, src
|| switch (ir_type_size[type]) {
|| case 1:
| op byte dst, Rb(src)
|| break;
|| case 2:
| op word dst, Rw(src)
|| break;
|| case 4:
| op dword dst, Rd(src)
|| break;
|.if X64
|| case 8:
| op qword dst, Rq(src)
|| break;
|.endif
|| default:
|| IR_ASSERT(0);
|| }
|.endmacro
|.macro ASM_MEM_IMM_OP, op, type, dst, src
|| switch (ir_type_size[type]) {
|| case 1:
| op byte dst, src
|| break;
|| case 2:
| op word dst, src
|| break;
|| case 4:
| op dword dst, src
|| break;
|.if X64
|| case 8:
| op qword dst, src
|| break;
|.endif
|| default:
|| IR_ASSERT(0);
|| }
|.endmacro
|.macro ASM_REG_MEM_OP, op, type, dst, src
|| switch (ir_type_size[type]) {
|| case 1:
| op Rb(dst), byte src
|| break;
|| case 2:
| op Rw(dst), word src
|| break;
|| case 4:
| op Rd(dst), dword src
|| break;
|.if X64
|| case 8:
| op Rq(dst), qword src
|| break;
|.endif
|| default:
|| IR_ASSERT(0);
|| }
|.endmacro
|.macro ASM_REG_REG_IMUL, type, dst, src
|| switch (ir_type_size[type]) {
|| case 2:
| imul Rw(dst), Rw(src)
|| break;
|| case 4:
| imul Rd(dst), Rd(src)
|| break;
|.if X64
|| case 8:
| imul Rq(dst), Rq(src)
|| break;
|.endif
|| default:
|| IR_ASSERT(0);
|| }
|.endmacro
|.macro ASM_REG_IMM_IMUL, type, dst, src
|| switch (ir_type_size[type]) {
|| case 2:
| imul Rw(dst), src
|| break;
|| case 4:
| imul Rd(dst), src
|| break;
|.if X64
|| case 8:
| imul Rq(dst), src
|| break;
|.endif
|| default:
|| IR_ASSERT(0);
|| }
|.endmacro
|.macro ASM_REG_MEM_IMUL, type, dst, src
|| switch (ir_type_size[type]) {
|| case 2:
| imul Rw(dst), word src
|| break;
|| case 4:
| imul Rd(dst), dword src
|| break;
|.if X64
|| case 8:
| imul Rq(dst), qword src
|| break;
|.endif
|| default:
|| IR_ASSERT(0);
|| }
|.endmacro
|.macro ASM_SSE2_REG_REG_OP, fop, dop, type, dst, src
|| if (type == IR_DOUBLE) {
| dop xmm(dst-IR_REG_FP_FIRST), xmm(src-IR_REG_FP_FIRST)
|| } else {
|| IR_ASSERT(type == IR_FLOAT);
| fop xmm(dst-IR_REG_FP_FIRST), xmm(src-IR_REG_FP_FIRST)
|| }
|.endmacro
|.macro ASM_SSE2_REG_MEM_OP, fop, dop, type, dst, src
|| if (type == IR_DOUBLE) {
| dop xmm(dst-IR_REG_FP_FIRST), qword src
|| } else {
|| IR_ASSERT(type == IR_FLOAT);
| fop xmm(dst-IR_REG_FP_FIRST), dword src
|| }
|.endmacro
|.macro ASM_AVX_REG_REG_REG_OP, fop, dop, type, dst, op1, op2
|| if (type == IR_DOUBLE) {
| dop xmm(dst-IR_REG_FP_FIRST), xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST)
|| } else {
|| IR_ASSERT(type == IR_FLOAT);
| fop xmm(dst-IR_REG_FP_FIRST), xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST)
|| }
|.endmacro
|.macro ASM_AVX_REG_REG_MEM_OP, fop, dop, type, dst, op1, op2
|| if (type == IR_DOUBLE) {
| dop xmm(dst-IR_REG_FP_FIRST), xmm(op1-IR_REG_FP_FIRST), qword op2
|| } else {
|| IR_ASSERT(type == IR_FLOAT);
| fop xmm(dst-IR_REG_FP_FIRST), xmm(op1-IR_REG_FP_FIRST), dword op2
|| }
|.endmacro
|.macro ASM_FP_REG_REG_OP, fop, dop, avx_fop, avx_dop, type, dst, src
|| if (ctx->flags & IR_AVX) {
| ASM_SSE2_REG_REG_OP avx_fop, avx_dop, type, dst, src
|| } else {
| ASM_SSE2_REG_REG_OP fop, dop, type, dst, src
|| }
|.endmacro
|.macro ASM_FP_MEM_REG_OP, fop, dop, avx_fop, avx_dop, type, dst, src
|| if (type == IR_DOUBLE) {
|| if (ctx->flags & IR_AVX) {
| avx_dop qword dst, xmm(src-IR_REG_FP_FIRST)
|| } else {
| dop qword dst, xmm(src-IR_REG_FP_FIRST)
|| }
|| } else {
|| IR_ASSERT(type == IR_FLOAT);
|| if (ctx->flags & IR_AVX) {
| avx_fop dword dst, xmm(src-IR_REG_FP_FIRST)
|| } else {
| fop dword dst, xmm(src-IR_REG_FP_FIRST)
|| }
|| }
|.endmacro
|.macro ASM_FP_REG_MEM_OP, fop, dop, avx_fop, avx_dop, type, dst, src
|| if (ctx->flags & IR_AVX) {
| ASM_SSE2_REG_MEM_OP avx_fop, avx_dop, type, dst, src
|| } else {
| ASM_SSE2_REG_MEM_OP fop, dop, type, dst, src
|| }
|.endmacro
typedef struct _ir_backend_data {
ir_reg_alloc_data ra_data;
int32_t stack_frame_alignment;
int32_t call_stack_size;
int32_t param_stack_size;
#ifndef IR_REG_FP_RET1
int32_t float_ret_slot;
int32_t double_ret_slot;
#endif
ir_regset used_preserved_regs;
uint32_t dessa_from_block;
dasm_State *dasm_state;
int rodata_label, jmp_table_label;
bool double_neg_const;
bool float_neg_const;
bool double_abs_const;
bool float_abs_const;
} ir_backend_data;
#define IR_GP_REG_NAME(code, name64, name32, name16, name8, name8h) \
#name64,
#define IR_GP_REG_NAME32(code, name64, name32, name16, name8, name8h) \
#name32,
#define IR_GP_REG_NAME16(code, name64, name32, name16, name8, name8h) \
#name16,
#define IR_GP_REG_NAME8(code, name64, name32, name16, name8, name8h) \
#name8,
#define IR_FP_REG_NAME(code, name) \
#name,
static const char *_ir_reg_name[IR_REG_NUM] = {
IR_GP_REGS(IR_GP_REG_NAME)
IR_FP_REGS(IR_FP_REG_NAME)
};
static const char *_ir_reg_name32[IR_REG_NUM] = {
IR_GP_REGS(IR_GP_REG_NAME32)
};
static const char *_ir_reg_name16[IR_REG_NUM] = {
IR_GP_REGS(IR_GP_REG_NAME16)
};
static const char *_ir_reg_name8[IR_REG_NUM] = {
IR_GP_REGS(IR_GP_REG_NAME8)
};
/* Calling Convention */
#ifdef _WIN64
static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = {
IR_REG_INT_ARG1,
IR_REG_INT_ARG2,
IR_REG_INT_ARG3,
IR_REG_INT_ARG4,
};
static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = {
IR_REG_FP_ARG1,
IR_REG_FP_ARG2,
IR_REG_FP_ARG3,
IR_REG_FP_ARG4,
};
#elif defined(__x86_64__)
static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = {
IR_REG_INT_ARG1,
IR_REG_INT_ARG2,
IR_REG_INT_ARG3,
IR_REG_INT_ARG4,
IR_REG_INT_ARG5,
IR_REG_INT_ARG6,
};
static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = {
IR_REG_FP_ARG1,
IR_REG_FP_ARG2,
IR_REG_FP_ARG3,
IR_REG_FP_ARG4,
IR_REG_FP_ARG5,
IR_REG_FP_ARG6,
IR_REG_FP_ARG7,
IR_REG_FP_ARG8,
};
#else
static const int8_t *_ir_int_reg_params = NULL;
static const int8_t *_ir_fp_reg_params = NULL;
static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS] = {
IR_REG_INT_FCARG1,
IR_REG_INT_FCARG2,
};
static const int8_t *_ir_fp_fc_reg_params = NULL;
#endif
const char *ir_reg_name(int8_t reg, ir_type type)
{
if (reg == IR_REG_NUM) {
return "SCRATCH";
}
IR_ASSERT(reg >= 0 && reg < IR_REG_NUM);
if (type == IR_VOID) {
type = (reg < IR_REG_FP_FIRST) ? IR_ADDR : IR_DOUBLE;
}
if (IR_IS_TYPE_FP(type) || ir_type_size[type] == 8) {
return _ir_reg_name[reg];
} else if (ir_type_size[type] == 4) {
return _ir_reg_name32[reg];
} else if (ir_type_size[type] == 2) {
return _ir_reg_name16[reg];
} else if (ir_type_size[type] == 1) {
return _ir_reg_name8[reg];
}
IR_ASSERT(0);
return NULL;
}
#if !defined(_WIN64) && !defined(__x86_64__)
static bool ir_is_fastcall(ir_ctx *ctx, ir_insn *insn)
{
if (sizeof(void*) == 4) {
if (IR_IS_CONST_REF(insn->op2)) {
return (ctx->ir_base[insn->op2].const_flags & IR_CONST_FASTCALL_FUNC) != 0;
} else if (ctx->ir_base[insn->op2].op == IR_BITCAST) {
return (ctx->ir_base[insn->op2].op2 & IR_CONST_FASTCALL_FUNC) != 0;
}
return 0;
}
return 0;
}
#else
# define ir_is_fastcall(ctx, insn) 0
#endif
typedef enum _ir_rule {
IR_SKIP = IR_LAST_OP,
IR_SKIP_REG,
IR_SKIP_MEM,
IR_CMP_INT,
IR_CMP_FP,
IR_MUL_INT,
IR_DIV_INT,
IR_MOD_INT,
IR_TEST_INT,
IR_SETCC_INT,
IR_LEA_OB,
IR_LEA_SI,
IR_LEA_SIB,
IR_LEA_IB,
IR_LEA_SI_O,
IR_LEA_SIB_O,
IR_LEA_IB_O,
IR_LEA_I_OB,
IR_LEA_OB_I,
IR_LEA_OB_SI,
IR_LEA_SI_OB,
IR_LEA_B_SI,
IR_LEA_SI_B,
IR_INC,
IR_DEC,
IR_MUL_PWR2,
IR_DIV_PWR2,
IR_MOD_PWR2,
IR_BOOL_NOT_INT,
IR_ABS_INT,
IR_OP_INT,
IR_OP_FP,
IR_IMUL3,
IR_BINOP_INT,
IR_BINOP_SSE2,
IR_BINOP_AVX,
IR_SHIFT,
IR_SHIFT_CONST,
IR_COPY_INT,
IR_COPY_FP,
IR_CMP_AND_BRANCH_INT,
IR_CMP_AND_BRANCH_FP,
IR_TEST_AND_BRANCH_INT,
IR_JCC_INT,
IR_GUARD_CMP_INT,
IR_GUARD_CMP_FP,
IR_GUARD_OVERFLOW,
IR_OVERFLOW_AND_BRANCH,
IR_MIN_MAX_INT,
IR_MEM_OP_INT,
IR_MEM_INC,
IR_MEM_DEC,
IR_MEM_MUL_PWR2,
IR_MEM_DIV_PWR2,
IR_MEM_MOD_PWR2,
IR_MEM_BINOP_INT,
IR_MEM_SHIFT,
IR_MEM_SHIFT_CONST,
IR_REG_BINOP_INT,
IR_SKIP_MEM_BINOP_INT,
IR_SKIP_REG_BINOP_INT,
IR_SKIP_TEST_INT,
IR_SKIP_SHIFT,
IR_VSTORE_INT,
IR_VSTORE_FP,
IR_LOAD_INT,
IR_LOAD_FP,
IR_STORE_INT,
IR_STORE_FP,
IR_IF_INT,
IR_RETURN_VOID,
IR_RETURN_INT,
IR_RETURN_FP,
} ir_rule;
/* instruction selection */
bool ir_needs_vreg(ir_ctx *ctx, ir_ref ref)
{
IR_ASSERT(ctx->rules);
return ctx->rules[ref] != IR_SKIP
&& ctx->rules[ref] != IR_SKIP_MEM
&& ctx->rules[ref] != IR_SKIP_MEM_BINOP_INT
&& ctx->rules[ref] != IR_SKIP_REG_BINOP_INT
&& ctx->rules[ref] != IR_SKIP_TEST_INT
&& ctx->rules[ref] != IR_SKIP_SHIFT;
}
ir_regset ir_get_scratch_regset(ir_ctx *ctx, ir_ref ref, ir_live_pos *start, ir_live_pos *end)
{
ir_ref rule;
rule = ctx->rules[ref];
if (rule == IR_SHIFT || rule == IR_SKIP_SHIFT) {
*start = IR_LOAD_SUB_REF;
*end = IR_DEF_SUB_REF;
return IR_REGSET(IR_REG_RCX);
} else if (rule == IR_MUL_INT) {
*start = IR_USE_SUB_REF;
*end = IR_DEF_SUB_REF;
return IR_REGSET(IR_REG_RDX); /* %rax - used as input and result */
} else if (rule == IR_DIV_INT) {
*start = IR_LOAD_SUB_REF;
*end = IR_DEF_SUB_REF;
return IR_REGSET(IR_REG_RDX); /* %rax - used as input and result */
} else if (rule == IR_MOD_INT) {
*start = IR_LOAD_SUB_REF;
*end = IR_DEF_SUB_REF;
return IR_REGSET(IR_REG_RAX) | IR_REGSET(IR_REG_RDX); /* %rdx - used as result */
} else if (rule == IR_CALL) {
*start = IR_USE_SUB_REF;
*end = IR_DEF_SUB_REF;
return IR_REGSET_SCRATCH;
}
return IR_REGSET_EMPTY;
}
static ir_reg ir_get_param_reg(ir_ctx *ctx, ir_ref ref)
{
ir_use_list *use_list = &ctx->use_lists[1];
int i;
ir_ref use, *p;
ir_insn *insn;
int int_param = 0;
int fp_param = 0;
int int_reg_params_count = IR_REG_INT_ARGS;
int fp_reg_params_count = IR_REG_FP_ARGS;
const int8_t *int_reg_params = _ir_int_reg_params;
const int8_t *fp_reg_params = _ir_fp_reg_params;
#if !defined(_WIN64) && !defined(__x86_64__)
if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) {
int_reg_params_count = IR_REG_INT_FCARGS;
fp_reg_params_count = IR_REG_FP_FCARGS;
int_reg_params = _ir_int_fc_reg_params;
fp_reg_params = _ir_fp_fc_reg_params;
}
#endif
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
use = *p;
insn = &ctx->ir_base[use];
if (insn->op == IR_PARAM) {
if (IR_IS_TYPE_INT(insn->type)) {
if (use == ref) {
if (int_param < int_reg_params_count) {
return int_reg_params[int_param];
} else {
return IR_REG_NONE;
}
}
int_param++;
} else if (IR_IS_TYPE_FP(insn->type)) {
if (use == ref) {
if (fp_param < fp_reg_params_count) {
return fp_reg_params[fp_param];
} else {
return IR_REG_NONE;
}
}
fp_param++;
} else {
IR_ASSERT(0);
}
}
}
return IR_REG_NONE;
}
static ir_reg ir_get_arg_reg(ir_ctx *ctx, ir_insn *insn, int op_num)
{
int j, n;
ir_type type;
int int_param = 0;
int fp_param = 0;
int int_reg_params_count = IR_REG_INT_ARGS;
int fp_reg_params_count = IR_REG_FP_ARGS;
const int8_t *int_reg_params = _ir_int_reg_params;
const int8_t *fp_reg_params = _ir_fp_reg_params;
#if !defined(_WIN64) && !defined(__x86_64__)
if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) {
int_reg_params_count = IR_REG_INT_FCARGS;
fp_reg_params_count = IR_REG_FP_FCARGS;
int_reg_params = _ir_int_fc_reg_params;
fp_reg_params = _ir_fp_fc_reg_params;
}
#endif
n = ir_input_edges_count(ctx, insn);
for (j = 3; j <= n; j++) {
type = ctx->ir_base[insn->ops[j]].type;
if (IR_IS_TYPE_INT(type)) {
if (j == op_num) {
if (int_param < int_reg_params_count) {
return int_reg_params[int_param];
} else {
return IR_REG_NONE;
}
}
int_param++;
} else if (IR_IS_TYPE_FP(type)) {
if (j == op_num) {
if (fp_param < fp_reg_params_count) {
return fp_reg_params[fp_param];
} else {
return IR_REG_NONE;
}
}
fp_param++;
} else {
IR_ASSERT(0);
}
}
return IR_REG_NONE;
}
static bool ir_call_needs_tmp_int_reg(ir_ctx *ctx, ir_ref ref)
{
ir_insn *insn = &ctx->ir_base[ref];
ir_ref arg;
ir_insn *arg_insn;
int j, n;
ir_type type;
int int_param = 0;
int int_reg_params_count = IR_REG_INT_ARGS;
#if !defined(_WIN64) && !defined(__x86_64__)
if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) {
int_reg_params_count = IR_REG_INT_FCARGS;
}
#endif
n = ir_input_edges_count(ctx, insn);
for (j = 3; j <= n; j++) {
arg = insn->ops[j];
arg_insn = &ctx->ir_base[arg];
type = arg_insn->type;
if (IR_IS_TYPE_INT(type)) {
if (IR_IS_CONST_REF(arg)) {
if (arg_insn->op == IR_STR || !IR_IS_SIGNED_32BIT(arg_insn->val.i64)) {
return 1;
}
} else {
if (int_param < int_reg_params_count) {
if (int_param > 0) {
return 1; /* for swap */
}
}
}
int_param++;
} else if (type == IR_DOUBLE) {
if (IR_IS_CONST_REF(arg) && arg_insn->val.i64 != 0) {
return 1;
}
}
}
return 0;
}
uint8_t ir_get_def_flags(ir_ctx *ctx, ir_ref ref, ir_reg *reg)
{
ir_ref rule = ctx->rules[ref];
ir_insn *insn;
*reg = IR_REG_NONE;
switch (rule) {
case IR_BINOP_INT:
case IR_BINOP_SSE2:
case IR_SHIFT:
case IR_SHIFT_CONST:
case IR_COPY_INT:
case IR_COPY_FP:
case IR_INC:
case IR_DEC:
case IR_MUL_PWR2:
case IR_DIV_PWR2:
case IR_MOD_PWR2:
case IR_OP_INT:
case IR_OP_FP:
case IR_MIN_MAX_INT:
case IR_COPY:
return IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG;
case IR_ABS_INT:
return IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG;
case IR_SKIP_REG: /* PARAM PHI PI */
insn = &ctx->ir_base[ref];
if (insn->op == IR_PARAM) {
*reg = ir_get_param_reg(ctx, ref);
if (*reg == IR_REG_NONE) {
return IR_USE_MUST_BE_IN_REG;
}
}
return IR_USE_SHOULD_BE_IN_REG;
case IR_MUL_INT:
case IR_DIV_INT:
*reg = IR_REG_RAX;
return IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG;
case IR_MOD_INT :
*reg = IR_REG_RDX;
break;
case IR_CALL:
insn = &ctx->ir_base[ref];
if (IR_IS_TYPE_INT(insn->type)) {
*reg = IR_REG_INT_RET1;
#ifdef IR_REG_FP_RET1
} else {
*reg = IR_REG_FP_RET1;
#endif
}
break;
case IR_EXITCALL:
*reg = IR_REG_INT_RET1;
break;
}
return IR_USE_MUST_BE_IN_REG;
}
uint8_t ir_get_use_flags(ir_ctx *ctx, ir_ref ref, int op_num, ir_reg *reg)
{
ir_ref rule = ctx->rules[ref];
ir_insn *insn;
IR_ASSERT(op_num > 0);
*reg = IR_REG_NONE;
switch (rule) {
case IR_BINOP_INT:
case IR_SKIP_REG_BINOP_INT:
case IR_BINOP_SSE2:
case IR_BINOP_AVX:
case IR_IF_INT:
case IR_CMP_FP:
case IR_GUARD:
case IR_GUARD_NOT:
return (op_num == 2) ? IR_USE_SHOULD_BE_IN_REG : IR_USE_MUST_BE_IN_REG;
case IR_CMP_INT:
case IR_SKIP_TEST_INT:
if (op_num == 2) {
insn = &ctx->ir_base[ref];
if (IR_IS_CONST_REF(insn->op1)) {
ir_insn *val_insn = &ctx->ir_base[insn->op1];
if (ir_type_size[val_insn->type] != 8 || IR_IS_32BIT(val_insn->type, val_insn->val)) {
return IR_USE_SHOULD_BE_IN_REG;
}
} else if (ctx->rules[insn->op1] != IR_SKIP_MEM) {
return IR_USE_SHOULD_BE_IN_REG;
}
} else if (op_num == 1) {
insn = &ctx->ir_base[ref];
if (IR_IS_CONST_REF(insn->op2)) {
ir_insn *val_insn = &ctx->ir_base[insn->op2];
if (ir_type_size[val_insn->type] != 8 || IR_IS_32BIT(val_insn->type, val_insn->val)) {
return IR_USE_SHOULD_BE_IN_REG;
}
}
}
return IR_USE_MUST_BE_IN_REG;
case IR_MIN_MAX_INT:
return (op_num == 1) ? IR_USE_SHOULD_BE_IN_REG : IR_USE_MUST_BE_IN_REG;
case IR_CALL:
case IR_TAILCALL:
if (op_num > 2) {
insn = &ctx->ir_base[ref];
*reg = ir_get_arg_reg(ctx, insn, op_num);
return IR_USE_SHOULD_BE_IN_REG;
}
return IR_USE_SHOULD_BE_IN_REG;
case IR_IMUL3:
case IR_IJMP:
case IR_RSTORE:
case IR_SKIP_REG: /* PARAM PHI PI */
return IR_USE_SHOULD_BE_IN_REG;
// case IR_VLOAD:
// case IR_VSTORE_INT:
// case IR_VSTORE_FP:
// return (op_num == 2) ? 0 : IR_USE_MUST_BE_IN_REG;
case IR_SKIP:
insn = &ctx->ir_base[ref];
switch (insn->op) {
case IR_EQ:
case IR_NE:
case IR_LT:
case IR_GE:
case IR_LE:
case IR_GT:
case IR_ULT:
case IR_UGE:
case IR_ULE:
case IR_UGT:
if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) {
if (op_num == 2) {
if (IR_IS_CONST_REF(insn->op1)) {
ir_insn *val_insn = &ctx->ir_base[insn->op1];
if (ir_type_size[val_insn->type] != 8 || IR_IS_32BIT(val_insn->type, val_insn->val)) {
return IR_USE_SHOULD_BE_IN_REG;
}
} else if (ctx->rules[insn->op1] != IR_SKIP_MEM) {
return IR_USE_SHOULD_BE_IN_REG;
}
} else if (op_num == 1) {
if (IR_IS_CONST_REF(insn->op2)) {
ir_insn *val_insn = &ctx->ir_base[insn->op2];
if (ir_type_size[val_insn->type] != 8 || IR_IS_32BIT(val_insn->type, val_insn->val)) {
return IR_USE_SHOULD_BE_IN_REG;
}
}
}
} else {
return (op_num == 2) ? IR_USE_SHOULD_BE_IN_REG : IR_USE_MUST_BE_IN_REG;
}
break;
case IR_SNAPSHOT:
return IR_USE_SHOULD_BE_IN_REG;
}
break;
case IR_SHIFT:
case IR_SKIP_SHIFT:
if (op_num == 2) {
*reg = IR_REG_RCX;
}
break;
case IR_MUL_INT:
case IR_DIV_INT:
if (op_num == 1) {
*reg = IR_REG_RAX;
}
return IR_USE_SHOULD_BE_IN_REG;
case IR_MOD_INT:
if (op_num == 1) {
*reg = IR_REG_RAX;
}
return IR_USE_SHOULD_BE_IN_REG;
case IR_RETURN_INT:
IR_ASSERT(op_num == 2);
*reg = IR_REG_INT_RET1;
return IR_USE_SHOULD_BE_IN_REG;
case IR_RETURN_FP:
#ifdef IR_REG_FP_RET1
IR_ASSERT(op_num == 2);
*reg = IR_REG_FP_RET1;
#endif
return IR_USE_SHOULD_BE_IN_REG;
}
return IR_USE_MUST_BE_IN_REG;
}
int ir_get_temporary_regs(ir_ctx *ctx, ir_ref ref, ir_tmp_reg *tmp_regs)
{
ir_ref rule;
ir_insn *insn;
int n;
rule = ctx->rules[ref];
switch (rule) {
case IR_BINOP_INT:
case IR_SKIP_MEM_BINOP_INT:
case IR_SKIP_REG_BINOP_INT:
case IR_SKIP_TEST_INT:
insn = &ctx->ir_base[ref];
if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) {
insn = &ctx->ir_base[insn->op2];
if (ir_type_size[insn->type] == 8 && !IR_IS_32BIT(insn->type, insn->val)) {
tmp_regs[0].num = 2;
tmp_regs[0].type = insn->type;
tmp_regs[0].start = IR_LOAD_SUB_REF;
tmp_regs[0].end = IR_DEF_SUB_REF;
return 1;
}
}
break;
case IR_DIV_INT:
case IR_MOD_INT:
case IR_MIN_MAX_INT:
insn = &ctx->ir_base[ref];
if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) {
tmp_regs[0].num = 2;
tmp_regs[0].type = insn->type;
tmp_regs[0].start = IR_LOAD_SUB_REF;
tmp_regs[0].end = IR_DEF_SUB_REF;
return 1;
}
break;
case IR_CMP_INT:
insn = &ctx->ir_base[ref];
n = 0;
cmp_int:
if (IR_IS_CONST_REF(insn->op1)) {
ir_insn *val_insn = &ctx->ir_base[insn->op1];
if (ir_type_size[val_insn->type] == 8 && !IR_IS_32BIT(val_insn->type, val_insn->val)) {
tmp_regs[n].num = 1;
tmp_regs[n].type = val_insn->type;
tmp_regs[n].start = IR_LOAD_SUB_REF;
tmp_regs[n].end = IR_DEF_SUB_REF;
n++;
}
}
if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) {
ir_insn *val_insn = &ctx->ir_base[insn->op2];
if (ir_type_size[val_insn->type] == 8 && !IR_IS_32BIT(val_insn->type, val_insn->val)) {
tmp_regs[n].num = 2;
tmp_regs[n].type = val_insn->type;
tmp_regs[n].start = IR_LOAD_SUB_REF;
tmp_regs[n].end = IR_DEF_SUB_REF;
n++;
}
}
if (n == 0 && IR_IS_CONST_REF(insn->op1) && IR_IS_CONST_REF(insn->op2)) {
insn = &ctx->ir_base[insn->op1];
tmp_regs[n].num = 2;
tmp_regs[n].type = insn->type;
tmp_regs[n].start = IR_LOAD_SUB_REF;
tmp_regs[n].end = IR_DEF_SUB_REF;
n++;
}
return n;
case IR_CMP_FP:
insn = &ctx->ir_base[ref];
tmp_regs[0].num = 3;
tmp_regs[0].type = IR_BOOL;
tmp_regs[0].start = IR_DEF_SUB_REF;
tmp_regs[0].end = IR_SAVE_SUB_REF;
n = 1;
cmp_fp:
if (IR_IS_CONST_REF(insn->op1)) {
ir_insn *val_insn = &ctx->ir_base[insn->op1];
tmp_regs[n].num = 1;
tmp_regs[n].type = val_insn->type;
tmp_regs[n].start = IR_LOAD_SUB_REF;
tmp_regs[n].end = IR_DEF_SUB_REF;
n++;
}
return n;
case IR_BINOP_AVX:
insn = &ctx->ir_base[ref];
if (IR_IS_CONST_REF(insn->op1)) {
tmp_regs[0].num = 1;
tmp_regs[0].type = insn->type;
tmp_regs[0].start = IR_LOAD_SUB_REF;
tmp_regs[0].end = IR_DEF_SUB_REF;
return 1;
}
break;
case IR_VSTORE_INT:
insn = &ctx->ir_base[ref];
if (IR_IS_CONST_REF(insn->op3)) {
insn = &ctx->ir_base[insn->op3];
if (ir_type_size[insn->type] == 8 && !IR_IS_32BIT(insn->type, insn->val)) {
tmp_regs[0].num = 3;
tmp_regs[0].type = insn->type;
tmp_regs[0].start = IR_LOAD_SUB_REF;
tmp_regs[0].end = IR_DEF_SUB_REF;
return 1;
}
}
break;
case IR_STORE_INT:
insn = &ctx->ir_base[ref];
n = 0;
if (IR_IS_CONST_REF(insn->op2)) {
IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
if (ir_type_size[insn->type] == 8 && !IR_IS_SIGNED_32BIT(ctx->ir_base[insn->op2].val.i64)) {
tmp_regs[n].num = 2;
tmp_regs[n].type = IR_ADDR;
tmp_regs[n].start = IR_LOAD_SUB_REF;
tmp_regs[n].end = IR_DEF_SUB_REF;
n++;
}
}
if (IR_IS_CONST_REF(insn->op3)) {
insn = &ctx->ir_base[insn->op3];
if (ir_type_size[insn->type] == 8 && !IR_IS_32BIT(insn->type, insn->val)) {
tmp_regs[n].num = 3;
tmp_regs[n].type = insn->type;
tmp_regs[n].start = IR_LOAD_SUB_REF;
tmp_regs[n].end = IR_DEF_SUB_REF;
n++;
}
}
return n;
case IR_VSTORE_FP:
insn = &ctx->ir_base[ref];
if (IR_IS_CONST_REF(insn->op3)) {
insn = &ctx->ir_base[insn->op3];
tmp_regs[0].num = 3;
tmp_regs[0].type = insn->type;
tmp_regs[0].start = IR_LOAD_SUB_REF;
tmp_regs[0].end = IR_DEF_SUB_REF;
return 1;
}
break;
case IR_LOAD_FP:
case IR_MEM_BINOP_INT:
insn = &ctx->ir_base[ref];
n = 0;
if (IR_IS_CONST_REF(insn->op2)) {
IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
tmp_regs[n].num = 2;
tmp_regs[n].type = IR_ADDR;
tmp_regs[n].start = IR_LOAD_SUB_REF;
tmp_regs[n].end = IR_DEF_SUB_REF;
n++;
}
return n;
case IR_STORE_FP:
insn = &ctx->ir_base[ref];
n = 0;
if (IR_IS_CONST_REF(insn->op2)) {
IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
tmp_regs[n].num = 2;
tmp_regs[n].type = IR_ADDR;
tmp_regs[n].start = IR_LOAD_SUB_REF;
tmp_regs[n].end = IR_DEF_SUB_REF;
n++;
}
if (IR_IS_CONST_REF(insn->op3)) {
insn = &ctx->ir_base[insn->op3];
tmp_regs[n].num = 3;
tmp_regs[n].type = insn->type;
tmp_regs[n].start = IR_LOAD_SUB_REF;
tmp_regs[n].end = IR_DEF_SUB_REF;
n++;
}
return n;
case IR_SWITCH:
insn = &ctx->ir_base[ref];
n = 0;
if (IR_IS_CONST_REF(insn->op2)) {
insn = &ctx->ir_base[insn->op2];
tmp_regs[n].num = 2;
tmp_regs[n].type = insn->type;
tmp_regs[n].start = IR_LOAD_SUB_REF;
tmp_regs[n].end = IR_DEF_SUB_REF;
n++;
}
if (sizeof(void*) == 8) {
tmp_regs[n].num = 3;
tmp_regs[n].type = IR_ADDR;
tmp_regs[n].start = IR_LOAD_SUB_REF;
tmp_regs[n].end = IR_DEF_SUB_REF;
n++;
}
return n;
case IR_CALL:
case IR_TAILCALL:
if (ir_call_needs_tmp_int_reg(ctx, ref)) {
tmp_regs[0].num = 1;
tmp_regs[0].type = IR_ADDR;
tmp_regs[0].start = IR_LOAD_SUB_REF;
tmp_regs[0].end = IR_USE_SUB_REF;
return 1;
}
break;
case IR_SKIP:
insn = &ctx->ir_base[ref];
switch (insn->op) {
case IR_EQ:
case IR_NE:
case IR_LT:
case IR_GE:
case IR_LE:
case IR_GT:
case IR_ULT:
case IR_UGE:
case IR_ULE:
case IR_UGT:
if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) {
/* part of IR_CMP_AND_BRANCH_INT or IR_GUARD_CMP_INT */
n = 0;
goto cmp_int;
} else {
/* part of IR_CMP_AND_BRANCH_FP or IR_GUARD_CMP_IFP */
n = 0;
goto cmp_fp;
}
break;
}
break;
}
return 0;
}
static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb);
static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref, ir_block *bb)
{
if (!IR_IS_CONST_REF(addr_ref)) {
if (!ctx->rules[addr_ref]) {
ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb);
}
if (ctx->rules[addr_ref] == IR_RLOAD) {
ctx->rules[addr_ref] = IR_SKIP_MEM;
} else if (ctx->rules[addr_ref] == IR_LEA_OB) {
ir_use_list *use_list = &ctx->use_lists[addr_ref];
ir_ref j = use_list->count;
if (j > 1) {
/* check if address is used only in LOAD and STORE */
ir_ref *p = &ctx->use_edges[use_list->refs];
do {
ir_insn *insn = &ctx->ir_base[*p];
if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) {
return;
}
p++;
} while (--j);
}
ctx->rules[addr_ref] = IR_SKIP_MEM;
}
}
}
static void ir_match_fuse_load(ir_ctx *ctx, ir_ref ref, ir_block *bb)
{
if (ref > bb->start
&& ctx->ir_base[ref].op == IR_LOAD
&& ctx->use_lists[ref].count == 2) {
ir_ref addr_ref = ctx->ir_base[ref].op2;
ir_insn *addr_insn = &ctx->ir_base[addr_ref];
if (IR_IS_CONST_REF(addr_ref)) {
if (addr_insn->op == IR_C_ADDR &&
(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64))) {
ctx->rules[ref] = IR_SKIP_MEM;
}
} else {
ir_match_fuse_addr(ctx, addr_ref, bb);
ctx->rules[ref] = IR_SKIP_MEM;
}
}
}
static void ir_match_swap_cmp(ir_ctx *ctx, ir_insn *insn)
{
if (ctx->flags & IR_OPT_CODEGEN) {
if (insn->op == IR_LT || insn->op == IR_LE) {
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
if (IR_IS_TYPE_FP(op1_insn->type)) {
/* swap operands to avoid P flag check */
ir_ref tmp = insn->op1;
insn->op1 = insn->op2;
insn->op2 = tmp;
insn->op ^= 3;
return;
}
}
if (!IR_IS_CONST_REF(insn->op2)
&& !IR_IS_CONST_REF(insn->op1)) {
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
ir_insn *op2_insn = &ctx->ir_base[insn->op2];
if ((op1_insn->op == IR_VLOAD || op1_insn->op == IR_LOAD)
&& (op2_insn->op != IR_VLOAD && op2_insn->op != IR_LOAD)) {
/* swap for better load fusion */
ir_ref tmp = insn->op1;
insn->op1 = insn->op2;
insn->op2 = tmp;
if (insn->op != IR_EQ && insn->op != IR_NE) {
insn->op ^= 3;
}
}
}
}
}
static void ir_match_swap_commutative(ir_ctx *ctx, ir_insn *insn)
{
if ((ctx->flags & IR_OPT_CODEGEN)
&& (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE)
&& !IR_IS_CONST_REF(insn->op2)
&& !IR_IS_CONST_REF(insn->op1)) {
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
ir_insn *op2_insn = &ctx->ir_base[insn->op2];
if ((op1_insn->op == IR_VLOAD || op1_insn->op == IR_LOAD)
&& (op2_insn->op != IR_VLOAD && op2_insn->op != IR_LOAD)) {
/* swap for better load fusion */
ir_ref tmp = insn->op1;
insn->op1 = insn->op2;
insn->op2 = tmp;
}
}
}
static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb)
{
ir_insn *op2_insn;
ir_insn *insn = &ctx->ir_base[ref];
uint32_t store_rule;
ir_op load_op;
switch (insn->op) {
case IR_EQ:
case IR_NE:
case IR_LT:
case IR_GE:
case IR_LE:
case IR_GT:
case IR_ULT:
case IR_UGE:
case IR_ULE:
case IR_UGT:
ir_match_swap_cmp(ctx, insn);
if (insn->op2 > bb->start
&& insn->op2 < ref
&& !ctx->rules[insn->op2]) {
ir_match_fuse_load(ctx, insn->op2, bb);
}
if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) {
if (IR_IS_CONST_REF(insn->op2)
&& ctx->ir_base[insn->op2].val.i64 == 0
&& insn->op1 == ref - 1) { /* prevoius instruction */
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
if (op1_insn->op == IR_ADD ||
op1_insn->op == IR_SUB ||
// op1_insn->op == IR_MUL ||
op1_insn->op == IR_OR ||
op1_insn->op == IR_AND ||
op1_insn->op == IR_XOR) {
ir_match_swap_commutative(ctx, op1_insn);
ir_match_fuse_load(ctx, op1_insn->op2, bb);
if (op1_insn->op == IR_AND && ctx->use_lists[insn->op1].count == 1) {
if (IR_IS_CONST_REF(op1_insn->op2)
&& op1_insn->op1 > bb->start
&& op1_insn->op1 < ref
&& !ctx->rules[op1_insn->op1]) {
ir_match_fuse_load(ctx, op1_insn->op1, bb);
}
ctx->rules[insn->op1] = IR_SKIP_TEST_INT;
return IR_TEST_INT;
} else {
ctx->rules[insn->op1] = IR_BINOP_INT;
return IR_SETCC_INT;
}
}
}
if (IR_IS_CONST_REF(insn->op2)
&& insn->op1 > bb->start
&& insn->op1 < ref
&& !ctx->rules[insn->op1]) {
ir_match_fuse_load(ctx, insn->op1, bb);
}
return IR_CMP_INT;
} else {
return IR_CMP_FP;
}
break;
case IR_ADD:
case IR_SUB:
if (IR_IS_TYPE_INT(insn->type)) {
if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
op2_insn = &ctx->ir_base[insn->op2];
if (IR_IS_CONST_REF(insn->op1)) {
// const
} else if (op2_insn->val.i64 == 0) {
return IR_COPY_INT;
} else if ((ir_type_size[insn->type] >= 4 && insn->op == IR_ADD && IR_IS_SIGNED_32BIT(op2_insn->val.i64)) ||
(ir_type_size[insn->type] >= 4 && insn->op == IR_SUB && IR_IS_SIGNED_32BIT(-op2_insn->val.i64))) {
if (insn->op1 > bb->start && insn->op1 < ref && ctx->use_lists[insn->op1].count == 1) {
if (!ctx->rules[insn->op1]) {
ctx->rules[insn->op1] = ir_match_insn(ctx, insn->op1, bb);
}
}
if (ctx->rules[insn->op1] == IR_LEA_SI) {
ctx->rules[insn->op1] = IR_SKIP;
return IR_LEA_SI_O; // lea ret, [op1.op1.reg*op1.op2.scale+op2.offset]
} else if (ctx->rules[insn->op1] == IR_LEA_SIB) {
ctx->rules[insn->op1] = IR_SKIP;
return IR_LEA_SIB_O; // lea ret, [op1.op1.reg+op1.op1.reg*op1.op2.scale+op2.offset]
} else if (ctx->rules[insn->op1] == IR_LEA_IB) {
ctx->rules[insn->op1] = IR_SKIP;
return IR_LEA_IB_O; // lea ret, [op1.op1.reg+op1.op2.reg+op2.offset]
}
return IR_LEA_OB; // lea ret, [op1.reg+op2.offset]
} else if (op2_insn->val.i64 == 1 || op2_insn->val.i64 == -1) {
if (insn->op == IR_ADD) {
if (op2_insn->val.i64 == 1) {
return IR_INC; // inc op1
} else {
return IR_DEC; // dec op1
}
} else {
if (op2_insn->val.i64 == 1) {
return IR_DEC; // dec op1
} else {
return IR_INC; // inc op1
}
}
}
} else if ((ctx->flags & IR_OPT_CODEGEN) && insn->op == IR_ADD && ir_type_size[insn->type] >= 4) {
if (insn->op1 > bb->start && insn->op1 < ref && ctx->use_lists[insn->op1].count == 1) {
if (!ctx->rules[insn->op1]) {
ctx->rules[insn->op1] = ir_match_insn(ctx, insn->op1, bb);
}
}
if (insn->op2 > bb->start && insn->op2 < ref && ctx->use_lists[insn->op2].count == 1) {
if (!ctx->rules[insn->op2]) {
ctx->rules[insn->op2] = ir_match_insn(ctx, insn->op2, bb);
}
}
if (ctx->rules[insn->op1] == IR_LEA_OB) {
ctx->rules[insn->op1] = IR_SKIP;
if (ctx->rules[insn->op2] == IR_LEA_SI) {
ctx->rules[insn->op2] = IR_SKIP;
return IR_LEA_OB_SI; // lea ret, [op1.op1.reg+op1.op2.offset+op2.op1.reg*op2.op2.scale]
}
return IR_LEA_OB_I; // lea ret, [op1.op1.reg+op1.op2.offset+op2.reg]
}
if (ctx->rules[insn->op2] == IR_LEA_OB) {
ctx->rules[insn->op2] = IR_SKIP;
if (ctx->rules[insn->op1] == IR_LEA_SI) {
ctx->rules[insn->op1] = IR_SKIP;
return IR_LEA_SI_OB; // lea ret, [op1.op1.reg*op1.op2.scale+op2.op1.reg+op2.op2.offset]
}
return IR_LEA_I_OB; // lea ret, [op1.reg+op2.op1.reg+op2.op2.offset]
}
return IR_LEA_IB; // lea ret, [op1.reg+op2.reg]
}
binop_int:
ir_match_swap_commutative(ctx, insn);
ir_match_fuse_load(ctx, insn->op2, bb);
return IR_BINOP_INT;
} else {
binop_fp:
ir_match_swap_commutative(ctx, insn);
ir_match_fuse_load(ctx, insn->op2, bb);
if (ctx->flags & IR_AVX) {
return IR_BINOP_AVX;
} else {
return IR_BINOP_SSE2;
}
}
break;
case IR_MUL:
if (IR_IS_TYPE_INT(insn->type)) {
if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
op2_insn = &ctx->ir_base[insn->op2];
if (IR_IS_CONST_REF(insn->op1)) {
// const
} else if (op2_insn->val.u64 == 0) {
// 0
} else if (op2_insn->val.u64 == 1) {
return IR_COPY_INT;
} else if (ir_type_size[insn->type] >= 4 &&
(op2_insn->val.u64 == 2 || op2_insn->val.u64 == 4 || op2_insn->val.u64 == 8)) {
return IR_LEA_SI; // lea ret, [op1.reg*op2.scale]
} else if (ir_type_size[insn->type] >= 4 &&
(op2_insn->val.u64 == 3 || op2_insn->val.u64 == 5 || op2_insn->val.u64 == 9)) {
return IR_LEA_SIB; // lea ret, [op1.reg+op1.reg*op2.scale]
} else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) {
return IR_MUL_PWR2; // shl op1, IR_LOG2(op2_insn->val.u64)
} else if (IR_IS_TYPE_SIGNED(insn->type)
&& ir_type_size[insn->type] != 1
&& IR_IS_SIGNED_32BIT(op2_insn->val.i64)) {
ir_match_fuse_load(ctx, insn->op1, bb);
return IR_IMUL3;
}
}
if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) {
goto binop_int;
}
ir_match_fuse_load(ctx, insn->op2, bb);
return IR_MUL_INT;
} else {
goto binop_fp;
}
break;
case IR_ADD_OV:
case IR_SUB_OV:
IR_ASSERT(IR_IS_TYPE_INT(insn->type));
goto binop_int;
case IR_MUL_OV:
IR_ASSERT(IR_IS_TYPE_INT(insn->type));
if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) {
if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
op2_insn = &ctx->ir_base[insn->op2];
if (IR_IS_SIGNED_32BIT(op2_insn->val.i64)) {
ir_match_fuse_load(ctx, insn->op1, bb);
return IR_IMUL3;
}
}
goto binop_int;
}
ir_match_fuse_load(ctx, insn->op2, bb);
return IR_MUL_INT;
case IR_DIV:
if (IR_IS_TYPE_INT(insn->type)) {
if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
op2_insn = &ctx->ir_base[insn->op2];
if (IR_IS_CONST_REF(insn->op1)) {
// const
} else if (op2_insn->val.u64 == 1) {
return IR_COPY_INT;
} else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) {
return IR_DIV_PWR2; // shr op1, IR_LOG2(op2_insn->val.u64)
}
}
ir_match_fuse_load(ctx, insn->op2, bb);
return IR_DIV_INT;
} else {
goto binop_fp;
}
break;
case IR_MOD:
if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
op2_insn = &ctx->ir_base[insn->op2];
if (IR_IS_CONST_REF(insn->op1)) {
// const
} else if (IR_IS_TYPE_UNSIGNED(insn->type)
&& IR_IS_POWER_OF_TWO(op2_insn->val.u64)
&& IR_IS_UNSIGNED_32BIT(op2_insn->val.u64 - 1)) {
return IR_MOD_PWR2; // and op1, op2_insn->val.u64-1
}
}
ir_match_fuse_load(ctx, insn->op2, bb);
return IR_MOD_INT;
case IR_BSWAP:
case IR_NOT:
if (insn->type == IR_BOOL) {
if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) {
return IR_BOOL_NOT_INT;
} else {
IR_ASSERT(0); // TODO: IR_BOOL_NOT_FP
}
} else if (IR_IS_TYPE_INT(insn->type)) {
return IR_OP_INT;
} else {
IR_ASSERT(0);
}
break;
case IR_NEG:
if (IR_IS_TYPE_INT(insn->type)) {
return IR_OP_INT;
} else {
return IR_OP_FP;
}
case IR_ABS:
if (IR_IS_TYPE_INT(insn->type)) {
return IR_ABS_INT; // movl %edi, %eax; negl %eax; cmovs %edi, %eax
} else {
return IR_OP_FP;
}
case IR_OR:
if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
op2_insn = &ctx->ir_base[insn->op2];
if (IR_IS_CONST_REF(insn->op1)) {
// const
} else if (op2_insn->val.i64 == 0) {
return IR_COPY_INT;
} else if (op2_insn->val.i64 == -1) {
// -1
}
}
goto binop_int;
case IR_AND:
if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
op2_insn = &ctx->ir_base[insn->op2];
if (IR_IS_CONST_REF(insn->op1)) {
// const
} else if (op2_insn->val.i64 == 0) {
// 0
} else if (op2_insn->val.i64 == -1) {
return IR_COPY_INT;
}
}
goto binop_int;
case IR_XOR:
if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
op2_insn = &ctx->ir_base[insn->op2];
if (IR_IS_CONST_REF(insn->op1)) {
// const
}
}
goto binop_int;
case IR_SHL:
if (IR_IS_CONST_REF(insn->op2)) {
if (ctx->flags & IR_OPT_CODEGEN) {
op2_insn = &ctx->ir_base[insn->op2];
if (IR_IS_CONST_REF(insn->op1)) {
// const
} else if (op2_insn->val.u64 == 0) {
return IR_COPY_INT;
} else if (ir_type_size[insn->type] >= 4) {
if (op2_insn->val.u64 == 1) {
// lea [op1*2]
} else if (op2_insn->val.u64 == 2) {
// lea [op1*4]
} else if (op2_insn->val.u64 == 3) {
// lea [op1*8]
}
}
}
return IR_SHIFT_CONST;
}
return IR_SHIFT;
case IR_SHR:
case IR_SAR:
case IR_ROL:
case IR_ROR:
if (IR_IS_CONST_REF(insn->op2)) {
if (ctx->flags & IR_OPT_CODEGEN) {
op2_insn = &ctx->ir_base[insn->op2];
if (IR_IS_CONST_REF(insn->op1)) {
// const
} else if (op2_insn->val.u64 == 0) {
return IR_COPY_INT;
}
}
return IR_SHIFT_CONST;
}
return IR_SHIFT;
case IR_MIN:
case IR_MAX:
if (IR_IS_TYPE_INT(insn->type)) {
return IR_MIN_MAX_INT;
} else {
goto binop_fp;
}
break;
// case IR_COND:
case IR_COPY:
if (IR_IS_TYPE_INT(insn->type)) {
return IR_COPY_INT;
} else {
return IR_COPY_FP;
}
break;
case IR_PHI:
case IR_PI:
case IR_PARAM:
case IR_RLOAD:
return IR_SKIP_REG;
case IR_CALL:
ctx->flags |= IR_HAS_CALLS;
case IR_TAILCALL:
if (ir_input_edges_count(ctx, insn) == 2 /* no arguments */
&& insn->op2 > bb->start
&& insn->op2 < ref
&& !ctx->rules[insn->op2]) {
ir_match_fuse_load(ctx, insn->op2, bb);
}
return insn->op;
case IR_VAR:
if (ctx->use_lists[ref].count > 0) {
return IR_VAR;
} else {
return IR_SKIP;
}
break;
case IR_ALLOCA:
/* alloca() may be use only in functions */
IR_ASSERT(ctx->flags & IR_FUNCTION);
ctx->flags |= IR_USE_FRAME_POINTER | IR_HAS_ALLOCA;
return IR_ALLOCA;
case IR_VSTORE:
if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) {
store_rule = IR_VSTORE_INT;
load_op = IR_VLOAD;
store_int:
if ((ctx->flags & IR_OPT_CODEGEN)
&& insn->op3 > bb->start
&& insn->op3 < ref
&& (ctx->use_lists[insn->op3].count == 1 ||
(ctx->use_lists[insn->op3].count == 2
&& (ctx->ir_base[insn->op3].op == IR_ADD_OV ||
ctx->ir_base[insn->op3].op == IR_SUB_OV)))
&& IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) {
ir_insn *op_insn = &ctx->ir_base[insn->op3];
if (!ctx->rules[insn->op3]) {
ctx->rules[insn->op3] = ir_match_insn(ctx, insn->op3, bb);
}
if ((ctx->rules[insn->op3] == IR_BINOP_INT && op_insn->op != IR_MUL) ||
ctx->rules[insn->op3] == IR_LEA_OB ||
ctx->rules[insn->op3] == IR_LEA_IB) {
if (ctx->ir_base[op_insn->op1].op == load_op
&& ctx->ir_base[op_insn->op1].op2 == insn->op2) {
if (op_insn->op1 > bb->start
&& ctx->use_lists[op_insn->op1].count == 2
&& insn->op1 == op_insn->op1) {
ctx->rules[insn->op3] = IR_SKIP_MEM_BINOP_INT;
ctx->rules[op_insn->op1] = IR_SKIP;
ir_ref addr_ref = insn->op2;
if (!IR_IS_CONST_REF(addr_ref)) {
if (!ctx->rules[addr_ref]) {
ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb);
}
if (ctx->rules[addr_ref] == IR_LEA_OB) {
ctx->rules[addr_ref] = IR_SKIP_MEM;
}
}
return IR_MEM_BINOP_INT;
}
} else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE)
&& ctx->ir_base[op_insn->op2].op == load_op
&& ctx->ir_base[op_insn->op2].op2 == insn->op2) {
if (op_insn->op2 > bb->start
&& ctx->use_lists[op_insn->op2].count == 2
&& insn->op1 == op_insn->op2) {
ir_ref tmp = op_insn->op1;
op_insn->op1 = op_insn->op2;
op_insn->op2 = tmp;
ctx->rules[insn->op3] = IR_SKIP_MEM_BINOP_INT;
ctx->rules[op_insn->op1] = IR_SKIP;
ir_ref addr_ref = insn->op2;
if (!IR_IS_CONST_REF(addr_ref)) {
if (!ctx->rules[addr_ref]) {
ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb);
}
if (ctx->rules[addr_ref] == IR_LEA_OB) {
ctx->rules[addr_ref] = IR_SKIP_MEM;
}
}
return IR_MEM_BINOP_INT;
}
}
} else if (ctx->rules[insn->op3] == IR_INC) {
if (ctx->ir_base[op_insn->op1].op == load_op
&& ctx->ir_base[op_insn->op1].op2 == insn->op2) {
if (op_insn->op1 > bb->start
&& ctx->use_lists[op_insn->op1].count == 2
&& insn->op1 == op_insn->op1) {
ctx->rules[insn->op3] = IR_SKIP;
ctx->rules[op_insn->op1] = IR_SKIP;
return IR_MEM_INC;
}
}
} else if (ctx->rules[insn->op3] == IR_DEC) {
if (ctx->ir_base[op_insn->op1].op == load_op
&& ctx->ir_base[op_insn->op1].op2 == insn->op2) {
if (op_insn->op1 > bb->start
&& ctx->use_lists[op_insn->op1].count == 2
&& insn->op1 == op_insn->op1) {
ctx->rules[insn->op3] = IR_SKIP;
ctx->rules[op_insn->op1] = IR_SKIP;
return IR_MEM_DEC;
}
}
} else if (ctx->rules[insn->op3] == IR_MUL_PWR2) {
if (ctx->ir_base[op_insn->op1].op == load_op
&& ctx->ir_base[op_insn->op1].op2 == insn->op2) {
if (op_insn->op1 > bb->start
&& ctx->use_lists[op_insn->op1].count == 2
&& insn->op1 == op_insn->op1) {
ctx->rules[insn->op3] = IR_SKIP;
ctx->rules[op_insn->op1] = IR_SKIP;
return IR_MEM_MUL_PWR2;
}
}
} else if (ctx->rules[insn->op3] == IR_DIV_PWR2) {
if (ctx->ir_base[op_insn->op1].op == load_op
&& ctx->ir_base[op_insn->op1].op2 == insn->op2) {
if (op_insn->op1 > bb->start
&& ctx->use_lists[op_insn->op1].count == 2
&& insn->op1 == op_insn->op1) {
ctx->rules[insn->op3] = IR_SKIP;
ctx->rules[op_insn->op1] = IR_SKIP;
return IR_MEM_DIV_PWR2;
}
}
} else if (ctx->rules[insn->op3] == IR_MOD_PWR2) {
if (ctx->ir_base[op_insn->op1].op == load_op
&& ctx->ir_base[op_insn->op1].op2 == insn->op2) {
if (op_insn->op1 > bb->start
&& ctx->use_lists[op_insn->op1].count == 2
&& insn->op1 == op_insn->op1) {
ctx->rules[insn->op3] = IR_SKIP;
ctx->rules[op_insn->op1] = IR_SKIP;
return IR_MEM_MOD_PWR2;
}
}
} else if (ctx->rules[insn->op3] == IR_SHIFT) {
if (ctx->ir_base[op_insn->op1].op == load_op
&& ctx->ir_base[op_insn->op1].op2 == insn->op2) {
if (op_insn->op1 > bb->start
&& ctx->use_lists[op_insn->op1].count == 2
&& insn->op1 == op_insn->op1) {
ctx->rules[insn->op3] = IR_SKIP_SHIFT;
ctx->rules[op_insn->op1] = IR_SKIP;
return IR_MEM_SHIFT;
}
}
} else if (ctx->rules[insn->op3] == IR_SHIFT_CONST) {
if (ctx->ir_base[op_insn->op1].op == load_op
&& ctx->ir_base[op_insn->op1].op2 == insn->op2) {
if (op_insn->op1 > bb->start
&& ctx->use_lists[op_insn->op1].count == 2
&& insn->op1 == op_insn->op1) {
ctx->rules[insn->op3] = IR_SKIP;
ctx->rules[op_insn->op1] = IR_SKIP;
return IR_MEM_SHIFT_CONST;
}
}
} else if (ctx->rules[insn->op3] == IR_OP_INT && op_insn->op != IR_BSWAP) {
if (ctx->ir_base[op_insn->op1].op == load_op
&& ctx->ir_base[op_insn->op1].op2 == insn->op2) {
if (op_insn->op1 > bb->start
&& ctx->use_lists[op_insn->op1].count == 2
&& insn->op1 == op_insn->op1) {
ctx->rules[insn->op3] = IR_SKIP;
ctx->rules[op_insn->op1] = IR_SKIP;
return IR_MEM_OP_INT;
}
}
}
}
return store_rule;
} else {
return IR_VSTORE_FP;
}
break;
case IR_LOAD:
ir_match_fuse_addr(ctx, insn->op2, bb);
if (IR_IS_TYPE_INT(insn->type)) {
return IR_LOAD_INT;
} else {
return IR_LOAD_FP;
}
break;
case IR_STORE:
ir_match_fuse_addr(ctx, insn->op2, bb);
if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) {
store_rule = IR_STORE_INT;
load_op = IR_LOAD;
goto store_int;
} else {
return IR_STORE_FP;
}
break;
case IR_RSTORE:
if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) {
if ((ctx->flags & IR_OPT_CODEGEN)
&& insn->op2 > bb->start
&& insn->op2 < ref
&& ctx->use_lists[insn->op2].count == 1
&& IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) {
ir_insn *op_insn = &ctx->ir_base[insn->op2];
if (op_insn->op == IR_ADD ||
op_insn->op == IR_SUB ||
// op_insn->op == IR_MUL ||
op_insn->op == IR_OR ||
op_insn->op == IR_AND ||
op_insn->op == IR_XOR) {
if (ctx->ir_base[op_insn->op1].op == IR_RLOAD
&& ctx->ir_base[op_insn->op1].op2 == insn->op3) {
ctx->rules[insn->op2] = IR_SKIP_REG_BINOP_INT;
ctx->rules[op_insn->op1] = IR_SKIP;
return IR_REG_BINOP_INT;
} else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE)
&& ctx->ir_base[op_insn->op2].op == IR_RLOAD
&& ctx->ir_base[op_insn->op2].op2 == insn->op3) {
ir_ref tmp = op_insn->op1;
op_insn->op1 = op_insn->op2;
op_insn->op2 = tmp;
ctx->rules[insn->op2] = IR_SKIP_REG_BINOP_INT;
ctx->rules[op_insn->op1] = IR_SKIP;
return IR_REG_BINOP_INT;
}
}
}
}
if (insn->op2 > bb->start
&& insn->op2 < ref
&& !ctx->rules[insn->op2]) {
ir_match_fuse_load(ctx, insn->op2, bb);
}
return IR_RSTORE;
case IR_START:
case IR_BEGIN:
// case IR_END:
case IR_IF_TRUE:
case IR_IF_FALSE:
case IR_CASE_VAL:
case IR_CASE_DEFAULT:
case IR_MERGE:
case IR_LOOP_BEGIN:
// case IR_LOOP_END:
case IR_UNREACHABLE:
case IR_SNAPSHOT:
return IR_SKIP;
case IR_RETURN:
if (!insn->op2) {
return IR_RETURN_VOID;
} else if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) {
return IR_RETURN_INT;
} else {
return IR_RETURN_FP;
}
case IR_IF:
if (insn->op2 > bb->start && insn->op2 < ref && ctx->use_lists[insn->op2].count == 1) {
op2_insn = &ctx->ir_base[insn->op2];
if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) {
ir_match_swap_cmp(ctx, op2_insn);
if (op2_insn->op2 > bb->start
&& op2_insn->op2 < ref
&& !ctx->rules[op2_insn->op2]) {
ir_match_fuse_load(ctx, op2_insn->op2, bb);
}
ctx->rules[insn->op2] = IR_SKIP;
if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
if (IR_IS_CONST_REF(op2_insn->op2)
&& op2_insn->op1 > bb->start
&& op2_insn->op1 < ref
&& !ctx->rules[op2_insn->op1]) {
ir_match_fuse_load(ctx, op2_insn->op1, bb);
}
return IR_CMP_AND_BRANCH_INT;
} else {
return IR_CMP_AND_BRANCH_FP;
}
} else if (op2_insn->op == IR_AND) { // TODO: OR, XOR. etc
ir_match_swap_commutative(ctx, op2_insn);
ir_match_fuse_load(ctx, op2_insn->op2, bb);
if (IR_IS_CONST_REF(op2_insn->op2)
&& op2_insn->op1 > bb->start
&& op2_insn->op1 < ref
&& !ctx->rules[op2_insn->op1]) {
ir_match_fuse_load(ctx, op2_insn->op1, bb);
}
ctx->rules[insn->op2] = IR_SKIP_TEST_INT;
return IR_TEST_AND_BRANCH_INT;
} else if (op2_insn->op == IR_OVERFLOW) {
ctx->rules[insn->op2] = IR_SKIP;
return IR_OVERFLOW_AND_BRANCH;
}
}
if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) {
if (insn->op2 == ref - 1 /* prevoius instruction */
&& insn->op2 > bb->start) {
op2_insn = &ctx->ir_base[insn->op2];
if (op2_insn->op == IR_ADD ||
op2_insn->op == IR_SUB ||
// op2_insn->op == IR_MUL ||
op2_insn->op == IR_OR ||
op2_insn->op == IR_AND ||
op2_insn->op == IR_XOR) {
ir_match_swap_commutative(ctx, op2_insn);
ir_match_fuse_load(ctx, op2_insn->op2, bb);
ctx->rules[insn->op2] = IR_BINOP_INT;
return IR_JCC_INT;
}
} else if ((ctx->flags & IR_OPT_CODEGEN)
&& insn->op1 == ref - 1 /* prevoius instruction */
&& insn->op2 == ref - 2 /* prevoius instruction */
&& insn->op2 > bb->start
&& ctx->use_lists[insn->op2].count == 2
&& IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) {
ir_insn *store_insn = &ctx->ir_base[insn->op1];
if (store_insn->op == IR_STORE && store_insn->op3 == insn->op2) {
ir_insn *op_insn = &ctx->ir_base[insn->op2];
if (op_insn->op == IR_ADD ||
op_insn->op == IR_SUB ||
// op_insn->op == IR_MUL ||
op_insn->op == IR_OR ||
op_insn->op == IR_AND ||
op_insn->op == IR_XOR) {
if (ctx->ir_base[op_insn->op1].op == IR_LOAD
&& ctx->ir_base[op_insn->op1].op2 == store_insn->op2) {
if (op_insn->op1 > bb->start
&& ctx->use_lists[op_insn->op1].count == 2
&& store_insn->op1 == op_insn->op1) {
ctx->rules[insn->op2] = IR_SKIP_MEM_BINOP_INT;
ctx->rules[op_insn->op1] = IR_SKIP;
ir_ref addr_ref = store_insn->op2;
if (!IR_IS_CONST_REF(addr_ref)) {
if (!ctx->rules[addr_ref]) {
ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb);
}
if (ctx->rules[addr_ref] == IR_LEA_OB) {
ctx->rules[addr_ref] = IR_SKIP_MEM;
}
}
ctx->rules[insn->op1] = IR_MEM_BINOP_INT;
return IR_JCC_INT;
}
} else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE)
&& ctx->ir_base[op_insn->op2].op == IR_LOAD
&& ctx->ir_base[op_insn->op2].op2 == store_insn->op2) {
if (op_insn->op2 > bb->start
&& ctx->use_lists[op_insn->op2].count == 2
&& store_insn->op1 == op_insn->op2) {
ir_ref tmp = op_insn->op1;
op_insn->op1 = op_insn->op2;
op_insn->op2 = tmp;
ctx->rules[insn->op2] = IR_SKIP_MEM_BINOP_INT;
ctx->rules[op_insn->op1] = IR_SKIP;
ir_ref addr_ref = store_insn->op2;
if (!IR_IS_CONST_REF(addr_ref)) {
if (!ctx->rules[addr_ref]) {
ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb);
}
if (ctx->rules[addr_ref] == IR_LEA_OB) {
ctx->rules[addr_ref] = IR_SKIP_MEM;
}
}
ctx->rules[insn->op1] = IR_MEM_BINOP_INT;
return IR_JCC_INT;
}
}
}
}
}
ir_match_fuse_load(ctx, insn->op2, bb);
return IR_IF_INT;
} else {
IR_ASSERT(0 && "NIY IR_IF_FP");
}
case IR_GUARD:
case IR_GUARD_NOT:
if (insn->op2 > bb->start && insn->op2 < ref && ctx->use_lists[insn->op2].count == 1) {
op2_insn = &ctx->ir_base[insn->op2];
if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) {
ctx->rules[insn->op2] = IR_SKIP;
if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
if (op2_insn->op1 > bb->start
&& op2_insn->op1 < ref
&& !ctx->rules[op2_insn->op1]) {
ir_match_fuse_load(ctx, op2_insn->op1, bb);
}
return IR_GUARD_CMP_INT;
} else {
return IR_GUARD_CMP_FP;
}
} else if (op2_insn->op == IR_OVERFLOW) {
ctx->rules[insn->op2] = IR_SKIP;
return IR_GUARD_OVERFLOW;
}
}
ir_match_fuse_load(ctx, insn->op2, bb);
return insn->op;
case IR_IJMP:
if (insn->op2 > bb->start
&& insn->op2 < ref
&& !ctx->rules[insn->op2]) {
ir_match_fuse_load(ctx, insn->op2, bb);
}
return insn->op;
case IR_SEXT:
case IR_ZEXT:
case IR_BITCAST:
case IR_INT2FP:
case IR_FP2INT:
case IR_FP2FP:
ir_match_fuse_load(ctx, insn->op1, bb);
return insn->op;
default:
break;
}
return insn->op;
}
int ir_match(ir_ctx *ctx)
{
int b, n;
ir_ref i;
ir_block *bb;
ir_insn *insn;
if (!ctx->prev_insn_len) {
ctx->prev_insn_len = ir_mem_malloc(ctx->insns_count * sizeof(uint32_t));
n = 1;
for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
if (bb->flags & IR_BB_UNREACHABLE) {
continue;
}
for (i = bb->start, insn = ctx->ir_base + i; i <= bb->end;) {
ctx->prev_insn_len[i] = n;
n = ir_operands_count(ctx, insn);
n = 1 + (n >> 2); // support for multi-word instructions like MERGE and PHI
i += n;
insn += n;
}
}
}
ctx->rules = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) {
if (bb->flags & IR_BB_UNREACHABLE) {
continue;
}
for (i = bb->end; i >= bb->start; i -= ctx->prev_insn_len[i]) {
insn = &ctx->ir_base[i];
if (!ctx->rules[i]) {
ctx->rules[i] = ir_match_insn(ctx, i, bb);
}
}
}
return 1;
}
/* code genertion */
static int32_t ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg)
{
ir_backend_data *data = ctx->data;
int32_t offset;
IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos;
IR_ASSERT(offset != -1);
if (ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
IR_ASSERT(ctx->spill_base != IR_REG_NONE);
*reg = ctx->spill_base;
return offset;
}
*reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
return IR_SPILL_POS_TO_OFFSET(offset);
}
static void ir_emit_load_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
IR_ASSERT(IR_IS_TYPE_INT(type));
if (ir_type_size[type] == 8 && !IR_IS_SIGNED_32BIT(val)) {
IR_ASSERT(sizeof(void*) == 8);
|.if X64
if (IR_IS_UNSIGNED_32BIT(val)) {
| mov Rd(reg), (uint32_t)val // zero extended load
} else {
| mov64 Ra(reg), val
}
|.endif
} else if (val == 0) {
| ASM_REG_REG_OP xor, type, reg, reg
} else {
| ASM_REG_IMM_OP mov, type, reg, (int32_t)val // sign extended load
}
}
static void ir_emit_load_mem_int(ir_ctx *ctx, ir_type type, ir_reg reg, ir_reg base_reg, int32_t offset)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
if (base_reg != IR_REG_NONE) {
| ASM_REG_MEM_OP mov, type, reg, [Ra(base_reg)+offset]
} else {
| ASM_REG_MEM_OP mov, type, reg, [offset]
}
}
static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_insn *insn = &ctx->ir_base[src];
int label;
if (type == IR_FLOAT && insn->val.u32 == 0) {
if (ctx->flags & IR_AVX) {
| vxorps xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST)
} else {
| xorps xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST)
}
} else if (type == IR_DOUBLE && insn->val.u64 == 0) {
if (ctx->flags & IR_AVX) {
| vxorpd xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST)
} else {
| xorpd xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST)
}
} else {
label = ctx->cfg_blocks_count - src;
insn->const_flags |= IR_CONST_EMIT;
| ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, reg, [=>label]
}
}
static void ir_emit_load_mem_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_reg base_reg, int32_t offset)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
if (base_reg != IR_REG_NONE) {
| ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, reg, [Ra(base_reg)+offset]
} else {
| ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, reg, [Ra(base_reg)+offset]
}
}
static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src)
{
int32_t offset;
ir_reg fp;
if (IR_IS_CONST_REF(src)) {
if (IR_IS_TYPE_INT(type)) {
ir_insn *insn = &ctx->ir_base[src];
IR_ASSERT(insn->op != IR_STR);
ir_emit_load_imm_int(ctx, type, reg, insn->val.i64);
} else {
ir_emit_load_imm_fp(ctx, type, reg, src);
}
} else {
offset = ir_ref_spill_slot(ctx, src, &fp);
if (IR_IS_TYPE_INT(type)) {
ir_emit_load_mem_int(ctx, type, reg, fp, offset);
} else {
ir_emit_load_mem_fp(ctx, type, reg, fp, offset);
}
}
}
static void ir_emit_store_mem_int(ir_ctx *ctx, ir_type type, ir_reg base_reg, int32_t offset, ir_reg reg)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
| ASM_MEM_REG_OP mov, type, [Ra(base_reg)+offset], reg
}
static void ir_emit_store_mem_fp(ir_ctx *ctx, ir_type type, ir_reg base_reg, int32_t offset, ir_reg reg)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
| ASM_FP_MEM_REG_OP movss, movsd, vmovss, vmovsd, type, [Ra(base_reg)+offset], reg
}
static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg)
{
int32_t offset;
ir_reg fp;
IR_ASSERT(dst >= 0);
offset = ir_ref_spill_slot(ctx, dst, &fp);
if (IR_IS_TYPE_INT(type)) {
ir_emit_store_mem_int(ctx, type, fp, offset, reg);
} else {
ir_emit_store_mem_fp(ctx, type, fp, offset, reg);
}
}
static bool ir_is_same_mem(ir_ctx *ctx, ir_ref r1, ir_ref r2)
{
ir_live_interval *ival1, *ival2;
int32_t o1, o2;
if (IR_IS_CONST_REF(r1) || IR_IS_CONST_REF(r2)) {
return 0;
}
IR_ASSERT(ctx->vregs[r1] && ctx->vregs[r2]);
ival1 = ctx->live_intervals[ctx->vregs[r1]];
ival2 = ctx->live_intervals[ctx->vregs[r2]];
IR_ASSERT(ival1 && ival2);
o1 = ival1->stack_spill_pos;
o2 = ival2->stack_spill_pos;
IR_ASSERT(o1 != -1 && o2 != -1);
return o1 == o2;
}
static void ir_emit_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
| ASM_REG_REG_OP mov, type, dst, src
}
static void ir_emit_fp_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
| ASM_FP_REG_REG_OP movaps, movapd, vmovaps, vmovapd, type, dst, src
}
static int32_t ir_fuse_addr(ir_ctx *ctx, ir_ref ref, ir_reg *preg)
{
ir_insn *addr_insn = &ctx->ir_base[ref];
ir_reg reg;
if (addr_insn->op == IR_C_ADDR
&& (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64))) {
return addr_insn->val.i32;
} else if (addr_insn->op == IR_RLOAD) {
return 0;
} else if (addr_insn->op == IR_ADD) {
IR_ASSERT(!IR_IS_CONST_REF(addr_insn->op1) && IR_IS_CONST_REF(addr_insn->op2));
reg = *preg;
IR_ASSERT(reg != IR_REG_NONE);
if (reg & IR_REG_SPILL_LOAD) {
reg &= ~IR_REG_SPILL_LOAD;
*preg = reg;
ir_emit_load(ctx, IR_ADDR, reg, addr_insn->op1);
}
return ctx->ir_base[addr_insn->op2].val.i32;
} else {
IR_ASSERT(0);
}
return 0;
}
static int32_t ir_fuse_load(ir_ctx *ctx, ir_ref ref, ir_reg *preg)
{
ir_insn *load_insn = &ctx->ir_base[ref];
IR_ASSERT(load_insn->op == IR_LOAD);
if (!IR_IS_CONST_REF(load_insn->op2)
&& ctx->rules[load_insn->op2] != IR_SKIP_MEM) {
IR_ASSERT(*preg != IR_REG_NONE);
IR_ASSERT(!((*preg) & IR_REG_SPILL_LOAD));
/* just fuse the LOAD itself */
return 0;
}
return ir_fuse_addr(ctx, load_insn->op2, preg);
}
static void ir_emit_prologue(ir_ctx *ctx)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
if (ctx->flags & IR_SKIP_PROLOGUE) {
return;
}
if (ctx->flags & IR_USE_FRAME_POINTER) {
| push Ra(IR_REG_RBP)
| mov Ra(IR_REG_RBP), Ra(IR_REG_RSP)
}
if (data->ra_data.stack_frame_size + data->call_stack_size) {
| sub Ra(IR_REG_RSP), (data->ra_data.stack_frame_size + data->call_stack_size)
}
if (data->used_preserved_regs) {
int offset;
uint32_t i;
if (ctx->flags & IR_USE_FRAME_POINTER) {
offset = 0;
} else {
offset = data->ra_data.stack_frame_size + data->call_stack_size;
}
for (i = 0; i < IR_REG_NUM; i++) {
if (IR_REGSET_IN(data->used_preserved_regs, i)) {
if (i < IR_REG_FP_FIRST) {
ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset -= sizeof(void*);
| mov aword [Ra(fp)+offset], Ra(i)
} else {
IR_ASSERT(0 && "NIY FP register saing");
}
}
}
}
}
static void ir_emit_epilogue(ir_ctx *ctx)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
if (data->used_preserved_regs) {
int offset;
uint32_t i;
if (ctx->flags & IR_USE_FRAME_POINTER) {
offset = 0;
} else {
offset = data->ra_data.stack_frame_size + data->call_stack_size;
}
for (i = 0; i < IR_REG_NUM; i++) {
if (IR_REGSET_IN(data->used_preserved_regs, i)) {
if (i < IR_REG_FP_FIRST) {
ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset -= sizeof(void*);
| mov Ra(i), aword [Ra(fp)+offset]
} else {
IR_ASSERT(0 && "NIY FP register saing");
}
}
}
}
if (ctx->flags & IR_USE_FRAME_POINTER) {
| mov Ra(IR_REG_RSP), Ra(IR_REG_RBP)
| pop Ra(IR_REG_RBP)
} else if (data->ra_data.stack_frame_size + data->call_stack_size) {
| add Ra(IR_REG_RSP), (data->ra_data.stack_frame_size + data->call_stack_size)
}
}
static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_ref op1 = insn->op1;
ir_ref op2 = insn->op2;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
ir_reg op2_reg = ctx->regs[def][2];
IR_ASSERT(def_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
if (def_reg != op1_reg) {
if (op1_reg != IR_REG_NONE) {
ir_emit_mov(ctx, type, def_reg, op1_reg);
} else {
ir_emit_load(ctx, type, def_reg, op1);
}
}
if (op2_reg != IR_REG_NONE && ctx->rules[op2] != IR_SKIP_MEM) {
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
if (op1 != op2) {
ir_emit_load(ctx, type, op2_reg, op2);
}
}
switch (insn->op) {
case IR_ADD:
case IR_ADD_OV:
| ASM_REG_REG_OP add, type, def_reg, op2_reg
break;
case IR_SUB:
case IR_SUB_OV:
| ASM_REG_REG_OP sub, type, def_reg, op2_reg
break;
case IR_MUL:
case IR_MUL_OV:
| ASM_REG_REG_IMUL type, def_reg, op2_reg
break;
case IR_OR:
| ASM_REG_REG_OP or, type, def_reg, op2_reg
break;
case IR_AND:
| ASM_REG_REG_OP and, type, def_reg, op2_reg
break;
case IR_XOR:
| ASM_REG_REG_OP xor, type, def_reg, op2_reg
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
} else if (IR_IS_CONST_REF(op2)) {
ir_insn *val_insn = &ctx->ir_base[op2];
int32_t val;
IR_ASSERT(IR_IS_32BIT(val_insn->type, val_insn->val));
val = val_insn->val.i32;
switch (insn->op) {
case IR_ADD:
case IR_ADD_OV:
| ASM_REG_IMM_OP add, type, def_reg, val
break;
case IR_SUB:
case IR_SUB_OV:
| ASM_REG_IMM_OP sub, type, def_reg, val
break;
case IR_MUL:
case IR_MUL_OV:
| ASM_REG_IMM_IMUL type, def_reg, val
break;
case IR_OR:
| ASM_REG_IMM_OP or, type, def_reg, val
break;
case IR_AND:
| ASM_REG_IMM_OP and, type, def_reg, val
break;
case IR_XOR:
| ASM_REG_IMM_OP xor, type, def_reg, val
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
} else {
int32_t offset = 0;
if (ctx->rules[op2] == IR_SKIP_MEM) {
IR_ASSERT(op2_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, op2, &op2_reg);
} else {
offset = ir_ref_spill_slot(ctx, op2, &op2_reg);
}
switch (insn->op) {
case IR_ADD:
case IR_ADD_OV:
| ASM_REG_MEM_OP add, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_SUB:
case IR_SUB_OV:
| ASM_REG_MEM_OP sub, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_MUL:
case IR_MUL_OV:
| ASM_REG_MEM_IMUL type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_OR:
| ASM_REG_MEM_OP or, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_AND:
| ASM_REG_MEM_OP and, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_XOR:
| ASM_REG_MEM_OP xor, type, def_reg, [Ra(op2_reg)+offset]
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_imul3(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_ref op1 = insn->op1;
ir_ref op2 = insn->op2;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
ir_insn *val_insn = &ctx->ir_base[op2];
int32_t val;
IR_ASSERT(def_reg != IR_REG_NONE);
IR_ASSERT(IR_IS_CONST_REF(op2));
IR_ASSERT(IR_IS_32BIT(val_insn->type, val_insn->val));
val = val_insn->val.i32;
if (op1_reg != IR_REG_NONE && ctx->rules[op1] != IR_SKIP_MEM) {
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
switch (ir_type_size[type]) {
case 2:
| imul Rw(def_reg), Rw(op1_reg), val
break;
case 4:
| imul Rd(def_reg), Rd(op1_reg), val
break;
|.if X64
|| case 8:
| imul Rq(def_reg), Rq(op1_reg), val
|| break;
|.endif
default:
IR_ASSERT(0);
}
} else {
int32_t offset = 0;
if (ctx->rules[op1] == IR_SKIP_MEM) {
IR_ASSERT(op1_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, op1, &op1_reg);
} else {
offset = ir_ref_spill_slot(ctx, op1, &op1_reg);
}
switch (ir_type_size[type]) {
case 2:
| imul Rw(def_reg), word [Ra(op1_reg)+offset], val
break;
case 4:
| imul Rd(def_reg), dword [Ra(op1_reg)+offset], val
break;
|.if X64
|| case 8:
| imul Rq(def_reg), qword [Ra(op1_reg)+offset], val
|| break;
|.endif
default:
IR_ASSERT(0);
}
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_min_max_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_ref op1 = insn->op1;
ir_ref op2 = insn->op2;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
ir_reg op2_reg = ctx->regs[def][2];
IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
if (def_reg != op1_reg) {
if (op1_reg != IR_REG_NONE) {
ir_emit_mov(ctx, type, def_reg, op1_reg);
} else {
ir_emit_load(ctx, type, def_reg, op1);
}
}
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
if (op1 != op2) {
ir_emit_load(ctx, type, op2_reg, op2);
}
}
if (op1 == op2) {
return;
}
| ASM_REG_REG_OP cmp, type, def_reg, op2_reg
if (insn->op == IR_MIN) {
if (IR_IS_TYPE_SIGNED(type)) {
| ASM_REG_REG_OP2 cmovl, type, def_reg, op2_reg
} else {
| ASM_REG_REG_OP2 cmovb, type, def_reg, op2_reg
}
} else {
IR_ASSERT(insn->op == IR_MAX);
if (IR_IS_TYPE_SIGNED(type)) {
| ASM_REG_REG_OP2 cmovg, type, def_reg, op2_reg
} else {
| ASM_REG_REG_OP2 cmova, type, def_reg, op2_reg
}
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_type type = ctx->ir_base[insn->op1].type;
IR_ASSERT(def_reg != IR_REG_NONE);
IR_ASSERT(IR_IS_TYPE_INT(type));
if (IR_IS_TYPE_SIGNED(type)) {
| seto Rb(def_reg)
} else {
| setc Rb(def_reg)
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
}
static void ir_emit_overflow_and_branch(ir_ctx *ctx, int b, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_insn *overflow_insn = &ctx->ir_base[insn->op2];
ir_type type = ctx->ir_base[overflow_insn->op1].type;
int true_block, false_block, next_block;
bool reverse = 0;
ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block);
if (true_block == next_block) {
reverse = 1;
true_block = false_block;
false_block = 0;
} else if (false_block == next_block) {
false_block = 0;
}
if (IR_IS_TYPE_SIGNED(type)) {
if (reverse) {
| jno =>true_block
} else {
| jo =>true_block
}
} else {
if (reverse) {
| jnc =>true_block
} else {
| jc =>true_block
}
}
if (false_block) {
| jmp =>false_block
}
}
static void ir_emit_mem_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_insn *op_insn = &ctx->ir_base[insn->op3];
ir_type type = op_insn->type;
ir_ref op2 = op_insn->op2;
ir_reg op2_reg = ctx->regs[insn->op3][2];
ir_reg reg;
int32_t offset = 0;
if (insn->op == IR_VSTORE) {
offset = ir_ref_spill_slot(ctx, insn->op2, &reg);
} else if (insn->op == IR_STORE) {
reg = ctx->regs[def][2];
IR_ASSERT(reg != IR_REG_NONE);
if (IR_IS_CONST_REF(insn->op2)) {
ir_emit_load(ctx, IR_ADDR, reg, insn->op2);
} else {
if (reg & IR_REG_SPILL_LOAD) {
reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, IR_ADDR, reg, insn->op2);
}
if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
offset = ir_fuse_addr(ctx, insn->op2, &reg);
}
}
} else {
IR_ASSERT(0);
return;
}
if (op2_reg == IR_REG_NONE) {
ir_val *val = &ctx->ir_base[op2].val;
IR_ASSERT(IR_IS_CONST_REF(op2) && (ir_type_size[type] != 8 || IR_IS_32BIT(type, ctx->ir_base[op2].val)));
switch (op_insn->op) {
case IR_ADD:
case IR_ADD_OV:
| ASM_MEM_IMM_OP add, type, [Ra(reg)+offset], val->i32
break;
case IR_SUB:
case IR_SUB_OV:
| ASM_MEM_IMM_OP sub, type, [Ra(reg)+offset], val->i32
break;
case IR_OR:
| ASM_MEM_IMM_OP or, type, [Ra(reg)+offset], val->i32
break;
case IR_AND:
| ASM_MEM_IMM_OP and, type, [Ra(reg)+offset], val->i32
break;
case IR_XOR:
| ASM_MEM_IMM_OP xor, type, [Ra(reg)+offset], val->i32
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
} else {
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op2_reg, op2);
}
switch (op_insn->op) {
case IR_ADD:
case IR_ADD_OV:
| ASM_MEM_REG_OP add, type, [Ra(reg)+offset], op2_reg
break;
case IR_SUB:
case IR_SUB_OV:
| ASM_MEM_REG_OP sub, type, [Ra(reg)+offset], op2_reg
break;
case IR_OR:
| ASM_MEM_REG_OP or, type, [Ra(reg)+offset], op2_reg
break;
case IR_AND:
| ASM_MEM_REG_OP and, type, [Ra(reg)+offset], op2_reg
break;
case IR_XOR:
| ASM_MEM_REG_OP xor, type, [Ra(reg)+offset], op2_reg
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
}
}
static void ir_emit_reg_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_insn *op_insn = &ctx->ir_base[insn->op2];
ir_type type = op_insn->type;
ir_ref op2 = op_insn->op2;
ir_reg op2_reg = ctx->regs[insn->op2][2];
ir_reg reg;
IR_ASSERT(insn->op == IR_RSTORE);
reg = insn->op3;
if (op2_reg == IR_REG_NONE) {
ir_val *val = &ctx->ir_base[op2].val;
IR_ASSERT(IR_IS_CONST_REF(op2) && (ir_type_size[type] != 8 || IR_IS_32BIT(type, ctx->ir_base[op2].val)));
switch (op_insn->op) {
case IR_ADD:
| ASM_REG_IMM_OP add, type, reg, val->i32
break;
case IR_SUB:
| ASM_REG_IMM_OP sub, type, reg, val->i32
break;
case IR_OR:
| ASM_REG_IMM_OP or, type, reg, val->i32
break;
case IR_AND:
| ASM_REG_IMM_OP and, type, reg, val->i32
break;
case IR_XOR:
| ASM_REG_IMM_OP xor, type, reg, val->i32
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
} else {
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op2_reg, op2);
}
switch (op_insn->op) {
case IR_ADD:
| ASM_REG_REG_OP add, type, reg, op2_reg
break;
case IR_SUB:
| ASM_REG_REG_OP sub, type, reg, op2_reg
break;
case IR_OR:
| ASM_REG_REG_OP or, type, reg, op2_reg
break;
case IR_AND:
| ASM_REG_REG_OP and, type, reg, op2_reg
break;
case IR_XOR:
| ASM_REG_REG_OP xor, type, reg, op2_reg
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
}
}
static void ir_emit_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_ref op1 = insn->op1;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
IR_ASSERT(def_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
if (def_reg != op1_reg) {
if (op1_reg != IR_REG_NONE) {
ir_emit_mov(ctx, type, def_reg, op1_reg);
} else {
ir_emit_load(ctx, type, def_reg, op1);
}
}
if (insn->op == IR_MUL) {
uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64);
if (shift == 1) {
| ASM_REG_REG_OP add, insn->type, def_reg, def_reg
} else {
| ASM_REG_IMM_OP shl, insn->type, def_reg, shift
}
} else if (insn->op == IR_DIV) {
uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64);
| ASM_REG_IMM_OP shr, insn->type, def_reg, shift
} else if (insn->op == IR_MOD) {
uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1;
IR_ASSERT(IR_IS_UNSIGNED_32BIT(mask));
| ASM_REG_IMM_OP and, insn->type, def_reg, mask
} else {
IR_ASSERT(0);
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_mem_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_insn *op_insn = &ctx->ir_base[insn->op3];
ir_type type = op_insn->type;
ir_reg reg;
int32_t offset = 0;
if (insn->op == IR_VSTORE) {
offset = ir_ref_spill_slot(ctx, insn->op2, &reg);
} else if (insn->op == IR_STORE) {
reg = ctx->regs[def][2];
IR_ASSERT(reg != IR_REG_NONE);
if (reg & IR_REG_SPILL_LOAD) {
reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, IR_ADDR, reg, insn->op2);
}
if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
offset = ir_fuse_addr(ctx, insn->op2, &reg);
}
} else {
IR_ASSERT(0);
return;
}
if (op_insn->op == IR_MUL) {
uint32_t shift = IR_LOG2(ctx->ir_base[op_insn->op2].val.u64);
| ASM_MEM_IMM_OP shl, type, [Ra(reg)+offset], shift
} else if (op_insn->op == IR_DIV) {
uint32_t shift = IR_LOG2(ctx->ir_base[op_insn->op2].val.u64);
| ASM_MEM_IMM_OP shr, type, [Ra(reg)+offset], shift
} else if (op_insn->op == IR_MOD) {
uint64_t mask = ctx->ir_base[op_insn->op2].val.u64 - 1;
IR_ASSERT(IR_IS_UNSIGNED_32BIT(mask));
| ASM_MEM_IMM_OP and, type, [Ra(reg)+offset], mask
} else {
IR_ASSERT(0);
}
}
static void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
ir_reg op2_reg = ctx->regs[def][2];
IR_ASSERT(def_reg != IR_REG_NONE && def_reg != IR_REG_RCX);
if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, insn->op1);
}
if (op2_reg != IR_REG_NONE && (op2_reg & IR_REG_SPILL_LOAD)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op2_reg, insn->op2);
}
if (op2_reg != IR_REG_RCX) {
if (op1_reg == IR_REG_RCX) {
ir_emit_mov(ctx, type, def_reg, op1_reg);
op1_reg = def_reg;
}
if (op2_reg != IR_REG_NONE) {
ir_emit_mov(ctx, type, IR_REG_RCX, op2_reg);
} else {
ir_emit_load(ctx, type, IR_REG_RCX, insn->op2);
}
}
if (def_reg != op1_reg) {
if (op1_reg != IR_REG_NONE) {
ir_emit_mov(ctx, type, def_reg, op1_reg);
} else {
ir_emit_load(ctx, type, def_reg, insn->op1);
}
}
switch (insn->op) {
case IR_SHL:
| ASM_REG_IMM_OP shl, insn->type, def_reg, cl
break;
case IR_SHR:
| ASM_REG_IMM_OP shr, insn->type, def_reg, cl
break;
case IR_SAR:
| ASM_REG_IMM_OP sar, insn->type, def_reg, cl
break;
case IR_ROL:
| ASM_REG_IMM_OP rol, insn->type, def_reg, cl
break;
case IR_ROR:
| ASM_REG_IMM_OP ror, insn->type, def_reg, cl
break;
default:
IR_ASSERT(0);
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_mem_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_insn *op_insn = &ctx->ir_base[insn->op3];
ir_type type = op_insn->type;
ir_ref op2 = op_insn->op2;
ir_reg op2_reg = ctx->regs[insn->op3][2];
ir_reg reg;
int32_t offset = 0;
if (insn->op == IR_VSTORE) {
offset = ir_ref_spill_slot(ctx, insn->op2, &reg);
} else if (insn->op == IR_STORE) {
reg = ctx->regs[def][2];
IR_ASSERT(reg != IR_REG_NONE);
if (reg & IR_REG_SPILL_LOAD) {
reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, IR_ADDR, reg, insn->op2);
}
if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
offset = ir_fuse_addr(ctx, insn->op2, &reg);
}
} else {
IR_ASSERT(0);
return;
}
if (op2_reg != IR_REG_NONE && (op2_reg & IR_REG_SPILL_LOAD)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op2_reg, op2);
}
if (op2_reg != IR_REG_RCX) {
if (op2_reg != IR_REG_NONE) {
ir_emit_mov(ctx, type, IR_REG_RCX, op2_reg);
} else {
ir_emit_load(ctx, type, IR_REG_RCX, op2);
}
}
switch (op_insn->op) {
case IR_SHL:
| ASM_MEM_IMM_OP shl, type, [Ra(reg)+offset], cl
break;
case IR_SHR:
| ASM_MEM_IMM_OP shr, type, [Ra(reg)+offset], cl
break;
case IR_SAR:
| ASM_MEM_IMM_OP sar, type, [Ra(reg)+offset], cl
break;
case IR_ROL:
| ASM_MEM_IMM_OP rol, type, [Ra(reg)+offset], cl
break;
case IR_ROR:
| ASM_MEM_IMM_OP ror, type, [Ra(reg)+offset], cl
break;
default:
IR_ASSERT(0);
}
}
static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
uint32_t shift = ctx->ir_base[insn->op2].val.u64;
ir_type type = insn->type;
ir_ref op1 = insn->op1;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
IR_ASSERT(def_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
if (def_reg != op1_reg) {
if (op1_reg != IR_REG_NONE) {
ir_emit_mov(ctx, type, def_reg, op1_reg);
} else {
ir_emit_load(ctx, type, def_reg, op1);
}
}
switch (insn->op) {
case IR_SHL:
| ASM_REG_IMM_OP shl, insn->type, def_reg, shift
break;
case IR_SHR:
| ASM_REG_IMM_OP shr, insn->type, def_reg, shift
break;
case IR_SAR:
| ASM_REG_IMM_OP sar, insn->type, def_reg, shift
break;
case IR_ROL:
| ASM_REG_IMM_OP rol, insn->type, def_reg, shift
break;
case IR_ROR:
| ASM_REG_IMM_OP ror, insn->type, def_reg, shift
break;
default:
IR_ASSERT(0);
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_mem_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_insn *op_insn = &ctx->ir_base[insn->op3];
ir_type type = op_insn->type;
uint32_t shift = ctx->ir_base[op_insn->op2].val.u64;
ir_reg reg;
int32_t offset = 0;
if (insn->op == IR_VSTORE) {
offset = ir_ref_spill_slot(ctx, insn->op2, &reg);
} else if (insn->op == IR_STORE) {
reg = ctx->regs[def][2];
IR_ASSERT(reg != IR_REG_NONE);
if (reg & IR_REG_SPILL_LOAD) {
reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, IR_ADDR, reg, insn->op2);
}
if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
offset = ir_fuse_addr(ctx, insn->op2, &reg);
}
} else {
IR_ASSERT(0);
return;
}
switch (op_insn->op) {
case IR_SHL:
| ASM_MEM_IMM_OP shl, type, [Ra(reg)+offset], shift
break;
case IR_SHR:
| ASM_MEM_IMM_OP shr, type, [Ra(reg)+offset], shift
break;
case IR_SAR:
| ASM_MEM_IMM_OP sar, type, [Ra(reg)+offset], shift
break;
case IR_ROL:
| ASM_MEM_IMM_OP rol, type, [Ra(reg)+offset], shift
break;
case IR_ROR:
| ASM_MEM_IMM_OP ror, type, [Ra(reg)+offset], shift
break;
default:
IR_ASSERT(0);
}
}
static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_ref op1 = insn->op1;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
IR_ASSERT(def_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
if (def_reg != op1_reg) {
if (op1_reg != IR_REG_NONE) {
ir_emit_mov(ctx, type, def_reg, op1_reg);
} else {
ir_emit_load(ctx, type, def_reg, op1);
}
}
if (insn->op == IR_ADD) {
| ASM_REG_OP inc, insn->type, def_reg
} else if (insn->op == IR_SUB) {
| ASM_REG_OP dec, insn->type, def_reg
} else if (insn->op == IR_NOT) {
| ASM_REG_OP not, insn->type, def_reg
} else if (insn->op == IR_NEG) {
| ASM_REG_OP neg, insn->type, def_reg
} else if (insn->op == IR_BSWAP) {
switch (ir_type_size[insn->type]) {
case 4:
| bswap Rd(def_reg)
break;
case 8:
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| bswap Rq(def_reg)
|.endif
break;
default:
IR_ASSERT(0);
}
} else {
IR_ASSERT(0);
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_mem_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_insn *op_insn = &ctx->ir_base[insn->op3];
ir_type type = op_insn->type;
ir_reg reg;
int32_t offset = 0;
if (insn->op == IR_VSTORE) {
offset = ir_ref_spill_slot(ctx, insn->op2, &reg);
} else if (insn->op == IR_STORE) {
reg = ctx->regs[def][2];
IR_ASSERT(reg != IR_REG_NONE);
if (reg & IR_REG_SPILL_LOAD) {
reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, IR_ADDR, reg, insn->op2);
}
if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
offset = ir_fuse_addr(ctx, insn->op2, &reg);
}
} else {
IR_ASSERT(0);
return;
}
if (op_insn->op == IR_ADD) {
| ASM_MEM_OP inc, type, [Ra(reg)+offset]
} else if (insn->op == IR_SUB) {
| ASM_MEM_OP dec, type, [Ra(reg)+offset]
} else if (insn->op == IR_NOT) {
| ASM_MEM_OP not, type, [Ra(reg)+offset]
} else if (insn->op == IR_NEG) {
| ASM_MEM_OP neg, type, [Ra(reg)+offset]
} else {
IR_ASSERT(0);
}
}
static void ir_emit_abs_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_ref op1 = insn->op1;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
IR_ASSERT(def_reg != op1_reg);
ir_emit_mov(ctx, insn->type, def_reg, op1_reg);
| ASM_REG_OP neg, insn->type, def_reg
| ASM_REG_REG_OP2, cmovs, type, def_reg, op1_reg
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_bool_not_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = ctx->ir_base[insn->op1].type;
ir_ref op1 = insn->op1;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
IR_ASSERT(def_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
if (op1_reg != IR_REG_NONE) {
| ASM_REG_REG_OP test, type, op1_reg, op1_reg
} else {
ir_reg fp;
int32_t offset = ir_ref_spill_slot(ctx, op1, &fp);
| ASM_MEM_IMM_OP cmp, type, [Ra(fp)+offset], 0
}
| sete Rb(def_reg)
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_mul_div_mod(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_ref op1 = insn->op1;
ir_ref op2 = insn->op2;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
ir_reg op2_reg = ctx->regs[def][2];
int32_t offset = 0;
IR_ASSERT(def_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
if (op1_reg != IR_REG_RAX) {
if (op1_reg != IR_REG_NONE) {
ir_emit_mov(ctx, type, IR_REG_RAX, op1_reg);
} else {
ir_emit_load(ctx, type, IR_REG_RAX, op1);
}
}
if (op2_reg == IR_REG_NONE && op1 == op2) {
op2_reg = IR_REG_RAX;
} else if (IR_IS_CONST_REF(op2)) {
if (insn->op == IR_MUL || insn->op == IR_MUL_OV) {
op2_reg = IR_REG_RDX;
} else {
IR_ASSERT(op2_reg != IR_REG_NONE);
}
ir_emit_load(ctx, type, op2_reg, op2);
}
if (insn->op == IR_MUL || insn->op == IR_MUL_OV) {
IR_ASSERT(!IR_IS_TYPE_SIGNED(insn->type));
if (op2_reg != IR_REG_NONE && ctx->rules[op2] != IR_SKIP_MEM) {
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op2_reg, op2);
}
| ASM_REG_OP mul, type, op2_reg
} else {
if (ctx->rules[op2] == IR_SKIP_MEM) {
IR_ASSERT(op2_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, op2, &op2_reg);
} else {
offset = ir_ref_spill_slot(ctx, op2, &op2_reg);
}
| ASM_MEM_OP mul, type, [Ra(op2_reg)+offset]
}
} else {
if (IR_IS_TYPE_SIGNED(type)) {
if (ir_type_size[type] == 8) {
| cqo
} else if (ir_type_size[type] == 4) {
| cdq
} else if (ir_type_size[type] == 2) {
| cwd
}
if (op2_reg != IR_REG_NONE && (IR_IS_CONST_REF(op2) || ctx->rules[op2] != IR_SKIP_MEM)) {
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op2_reg, op2);
}
| ASM_REG_OP idiv, type, op2_reg
} else {
if (ctx->rules[op2] == IR_SKIP_MEM) {
IR_ASSERT(op2_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, op2, &op2_reg);
} else {
offset = ir_ref_spill_slot(ctx, op2, &op2_reg);
}
| ASM_MEM_OP idiv, type, [Ra(op2_reg)+offset]
}
} else {
| ASM_REG_REG_OP xor, type, IR_REG_RDX, IR_REG_RDX
if (op2_reg != IR_REG_NONE && (IR_IS_CONST_REF(op2) || ctx->rules[op2] != IR_SKIP_MEM)) {
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op2_reg, op2);
}
| ASM_REG_OP div, type, op2_reg
} else {
if (ctx->rules[op2] == IR_SKIP_MEM) {
IR_ASSERT(op2_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, op2, &op2_reg);
} else {
offset = ir_ref_spill_slot(ctx, op2, &op2_reg);
}
| ASM_MEM_OP div, type, [Ra(op2_reg)+offset]
}
}
}
if (insn->op == IR_MUL || insn->op == IR_MUL_OV || insn->op == IR_DIV) {
if (def_reg != IR_REG_RAX) {
if (def_reg != IR_REG_NONE) {
ir_emit_mov(ctx, type, def_reg, IR_REG_RAX);
} else {
ir_emit_store(ctx, type, def, IR_REG_RAX);
}
}
} else if (insn->op == IR_MOD) {
if (ir_type_size[type] == 1) {
if (def_reg != IR_REG_NONE) {
| mov al, ah
| mov Rb(def_reg), al
} else {
ir_reg fp;
int32_t offset = ir_ref_spill_slot(ctx, def, &fp);
| mov byte [Ra(fp)+offset], ah
}
} else if (def_reg != IR_REG_RDX) {
if (def_reg != IR_REG_NONE) {
ir_emit_mov(ctx, type, def_reg, IR_REG_RDX);
} else {
ir_emit_store(ctx, type, def, IR_REG_RDX);
}
}
} else {
IR_ASSERT(0);
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_rodata(ir_ctx *ctx)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
|.rodata
if (!data->rodata_label) {
int label = data->rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2;
|=>label:
}
}
static void ir_emit_op_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_ref op1 = insn->op1;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
IR_ASSERT(def_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
if (def_reg != op1_reg) {
if (op1_reg != IR_REG_NONE) {
ir_emit_fp_mov(ctx, type, def_reg, op1_reg);
} else {
ir_emit_load(ctx, type, def_reg, op1);
}
}
if (insn->op == IR_NEG) {
if (insn->type == IR_DOUBLE) {
if (!data->double_neg_const) {
data->double_neg_const = 1;
ir_rodata(ctx);
|.align 16
|->double_neg_const:
|.dword 0, 0x80000000, 0, 0
|.code
}
if (ctx->flags & IR_AVX) {
| vxorpd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->double_neg_const]
} else {
| xorpd xmm(def_reg-IR_REG_FP_FIRST), [->double_neg_const]
}
} else {
IR_ASSERT(insn->type == IR_FLOAT);
if (!data->float_neg_const) {
data->float_neg_const = 1;
ir_rodata(ctx);
|.align 16
|->float_neg_const:
|.dword 0x80000000, 0, 0, 0
|.code
}
if (ctx->flags & IR_AVX) {
| vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->float_neg_const]
} else {
| xorps xmm(def_reg-IR_REG_FP_FIRST), [->float_neg_const]
}
}
} else if (insn->op == IR_ABS) {
if (insn->type == IR_DOUBLE) {
if (!data->double_abs_const) {
data->double_abs_const = 1;
ir_rodata(ctx);
|.align 16
|->double_abs_const:
|.dword 0xffffffff, 0x7fffffff, 0, 0
|.code
}
if (ctx->flags & IR_AVX) {
| vandpd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->double_abs_const]
} else {
| andpd xmm(def_reg-IR_REG_FP_FIRST), [->double_abs_const]
}
} else {
IR_ASSERT(insn->type == IR_FLOAT);
if (!data->float_abs_const) {
data->float_abs_const = 1;
ir_rodata(ctx);
|.align 16
|->float_abs_const:
|.dword 0x7fffffff, 0, 0, 0
|.code
}
if (ctx->flags & IR_AVX) {
| vandps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->float_abs_const]
} else {
| andps xmm(def_reg-IR_REG_FP_FIRST), [->float_abs_const]
}
}
} else {
IR_ASSERT(0);
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
}
static void ir_emit_binop_sse2(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_ref op1 = insn->op1;
ir_ref op2 = insn->op2;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
ir_reg op2_reg = ctx->regs[def][2];
IR_ASSERT(def_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
if (def_reg != op1_reg) {
if (op1_reg != IR_REG_NONE) {
ir_emit_fp_mov(ctx, type, def_reg, op1_reg);
} else {
ir_emit_load(ctx, type, def_reg, op1);
}
}
if (op2_reg != IR_REG_NONE && ctx->rules[op2] != IR_SKIP_MEM) {
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
if (op1 != op2) {
ir_emit_load(ctx, type, op2_reg, op2);
}
}
switch (insn->op) {
case IR_ADD:
| ASM_SSE2_REG_REG_OP addss, addsd, type, def_reg, op2_reg
break;
case IR_SUB:
| ASM_SSE2_REG_REG_OP subss, subsd, type, def_reg, op2_reg
break;
case IR_MUL:
| ASM_SSE2_REG_REG_OP mulss, mulsd, type, def_reg, op2_reg
break;
case IR_DIV:
| ASM_SSE2_REG_REG_OP divss, divsd, type, def_reg, op2_reg
break;
case IR_MIN:
| ASM_SSE2_REG_REG_OP minss, minsd, type, def_reg, op2_reg
break;
case IR_MAX:
| ASM_SSE2_REG_REG_OP maxss, maxsd, type, def_reg, op2_reg
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
} else if (IR_IS_CONST_REF(op2)) {
ir_insn *val_insn = &ctx->ir_base[op2];
int label = ctx->cfg_blocks_count - op2;
val_insn->const_flags |= IR_CONST_EMIT;
switch (insn->op) {
case IR_ADD:
| ASM_SSE2_REG_MEM_OP addss, addsd, type, def_reg, [=>label]
break;
case IR_SUB:
| ASM_SSE2_REG_MEM_OP subss, subsd, type, def_reg, [=>label]
break;
case IR_MUL:
| ASM_SSE2_REG_MEM_OP mulss, mulsd, type, def_reg, [=>label]
break;
case IR_DIV:
| ASM_SSE2_REG_MEM_OP divss, divsd, type, def_reg, [=>label]
break;
case IR_MIN:
| ASM_SSE2_REG_MEM_OP minss, minsd, type, def_reg, [=>label]
break;
case IR_MAX:
| ASM_SSE2_REG_MEM_OP maxss, maxsd, type, def_reg, [=>label]
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
} else {
int32_t offset = 0;
if (ctx->rules[op2] == IR_SKIP_MEM) {
IR_ASSERT(op2_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, op2, &op2_reg);
} else {
offset = ir_ref_spill_slot(ctx, op2, &op2_reg);
}
switch (insn->op) {
case IR_ADD:
| ASM_SSE2_REG_MEM_OP addss, addsd, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_SUB:
| ASM_SSE2_REG_MEM_OP subss, subsd, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_MUL:
| ASM_SSE2_REG_MEM_OP mulss, mulsd, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_DIV:
| ASM_SSE2_REG_MEM_OP divss, divsd, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_MIN:
| ASM_SSE2_REG_MEM_OP minss, minsd, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_MAX:
| ASM_SSE2_REG_MEM_OP maxss, maxsd, type, def_reg, [Ra(op2_reg)+offset]
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
}
static void ir_emit_binop_avx(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_ref op1 = insn->op1;
ir_ref op2 = insn->op2;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
ir_reg op2_reg = ctx->regs[def][2];
IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
if (op2_reg != IR_REG_NONE && ctx->rules[op2] != IR_SKIP_MEM) {
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
if (op1 != op2) {
ir_emit_load(ctx, type, op2_reg, op2);
}
}
switch (insn->op) {
case IR_ADD:
| ASM_AVX_REG_REG_REG_OP vaddss, vaddsd, type, def_reg, op1_reg, op2_reg
break;
case IR_SUB:
| ASM_AVX_REG_REG_REG_OP vsubss, vsubsd, type, def_reg, op1_reg, op2_reg
break;
case IR_MUL:
| ASM_AVX_REG_REG_REG_OP vmulss, vmulsd, type, def_reg, op1_reg, op2_reg
break;
case IR_DIV:
| ASM_AVX_REG_REG_REG_OP vdivss, vdivsd, type, def_reg, op1_reg, op2_reg
break;
case IR_MIN:
| ASM_AVX_REG_REG_REG_OP vminss, vminsd, type, def_reg, op1_reg, op2_reg
break;
case IR_MAX:
| ASM_AVX_REG_REG_REG_OP vmaxss, vmaxsd, type, def_reg, op1_reg, op2_reg
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
} else if (IR_IS_CONST_REF(op2)) {
ir_insn *val_insn = &ctx->ir_base[op2];
int label = ctx->cfg_blocks_count - op2;
val_insn->const_flags |= IR_CONST_EMIT;
switch (insn->op) {
case IR_ADD:
| ASM_AVX_REG_REG_MEM_OP vaddss, vaddsd, type, def_reg, op1_reg, [=>label]
break;
case IR_SUB:
| ASM_AVX_REG_REG_MEM_OP vsubss, vsubsd, type, def_reg, op1_reg, [=>label]
break;
case IR_MUL:
| ASM_AVX_REG_REG_MEM_OP vmulss, vmulsd, type, def_reg, op1_reg, [=>label]
break;
case IR_DIV:
| ASM_AVX_REG_REG_MEM_OP vdivss, vdivsd, type, def_reg, op1_reg, [=>label]
break;
case IR_MIN:
| ASM_AVX_REG_REG_MEM_OP vminss, vminsd, type, def_reg, op1_reg, [=>label]
break;
case IR_MAX:
| ASM_AVX_REG_REG_MEM_OP vmaxss, vmaxsd, type, def_reg, op1_reg, [=>label]
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
} else {
int32_t offset = 0;
if (ctx->rules[op2] == IR_SKIP_MEM) {
IR_ASSERT(op2_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, op2, &op2_reg);
} else {
offset = ir_ref_spill_slot(ctx, op2, &op2_reg);
}
switch (insn->op) {
case IR_ADD:
| ASM_AVX_REG_REG_MEM_OP vaddss, vaddsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset]
break;
case IR_SUB:
| ASM_AVX_REG_REG_MEM_OP vsubss, vsubsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset]
break;
case IR_MUL:
| ASM_AVX_REG_REG_MEM_OP vmulss, vmulsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset]
break;
case IR_DIV:
| ASM_AVX_REG_REG_MEM_OP vdivss, vdivsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset]
break;
case IR_MIN:
| ASM_AVX_REG_REG_MEM_OP vminss, vminsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset]
break;
case IR_MAX:
| ASM_AVX_REG_REG_MEM_OP vmaxss, vmaxsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset]
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
}
static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_insn *insn, ir_reg op1_reg, ir_ref op1, ir_reg op2_reg, ir_ref op2)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
if (op1_reg != IR_REG_NONE && ctx->rules[insn->op1] != IR_SKIP_MEM) {
if (op2_reg != IR_REG_NONE && ctx->rules[insn->op2] != IR_SKIP_MEM) {
| ASM_REG_REG_OP cmp, type, op1_reg, op2_reg
} else if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) {
| ASM_REG_REG_OP test, type, op1_reg, op1_reg
} else if (IR_IS_CONST_REF(op2)) {
ir_insn *val_insn = &ctx->ir_base[op2];
IR_ASSERT(IR_IS_32BIT(val_insn->type, val_insn->val));
| ASM_REG_IMM_OP cmp, type, op1_reg, val_insn->val.i32
} else {
int32_t offset = 0;
if (ctx->rules[op2] == IR_SKIP_MEM) {
IR_ASSERT(op2_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, op2, &op2_reg);
} else {
offset = ir_ref_spill_slot(ctx, op2, &op2_reg);
}
| ASM_REG_MEM_OP cmp, type, op1_reg, [Ra(op2_reg)+offset]
}
} else if (IR_IS_CONST_REF(insn->op1)) {
IR_ASSERT(0);
} else {
int32_t offset = 0;
if (ctx->rules[insn->op1] == IR_SKIP_MEM) {
offset = ir_fuse_load(ctx, insn->op1, &op1_reg);
} else {
offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg);
}
if (op2_reg != IR_REG_NONE) {
if (op1_reg == IR_REG_NONE) {
| ASM_MEM_REG_OP cmp, type, [offset], op2_reg
} else {
| ASM_MEM_REG_OP cmp, type, [Ra(op1_reg)+offset], op2_reg
}
} else {
IR_ASSERT(!IR_IS_CONST_REF(op1));
IR_ASSERT(IR_IS_CONST_REF(op2));
IR_ASSERT(IR_IS_32BIT(ctx->ir_base[op2].type, ctx->ir_base[op2].val));
if (op1_reg == IR_REG_NONE) {
| ASM_MEM_IMM_OP cmp, type, [offset], ctx->ir_base[op2].val.i32
} else {
| ASM_MEM_IMM_OP cmp, type, [Ra(op1_reg)+offset], ctx->ir_base[op2].val.i32
}
}
}
}
static void _ir_emit_setcc_int(ir_ctx *ctx, uint8_t op, ir_reg def_reg)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
switch (op) {
case IR_EQ:
| sete Rb(def_reg)
break;
case IR_NE:
| setne Rb(def_reg)
break;
case IR_LT:
| setl Rb(def_reg)
break;
case IR_GE:
| setge Rb(def_reg)
break;
case IR_LE:
| setle Rb(def_reg)
break;
case IR_GT:
| setg Rb(def_reg)
break;
case IR_ULT:
| setb Rb(def_reg)
break;
case IR_UGE:
| setae Rb(def_reg)
break;
case IR_ULE:
| setbe Rb(def_reg)
break;
case IR_UGT:
| seta Rb(def_reg)
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
}
static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = ctx->ir_base[insn->op1].type;
ir_op op = insn->op;
ir_ref op1 = insn->op1;
ir_ref op2 = insn->op2;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
ir_reg op2_reg = ctx->regs[def][2];
IR_ASSERT(def_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1))) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
if (op2_reg != IR_REG_NONE && ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2))) {
op2_reg &= ~IR_REG_SPILL_LOAD;
if (op1 != op2) {
ir_emit_load(ctx, type, op2_reg, op2);
}
}
if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) {
if (op == IR_ULT) {
/* always false */
| xor Ra(def_reg), Ra(def_reg)
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
return;
} else if (op == IR_UGE) {
/* always true */
| ASM_REG_IMM_OP mov, insn->type, def_reg, 1
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
return;
} else if (op == IR_ULE) {
op = IR_EQ;
} else if (op == IR_UGT) {
op = IR_NE;
}
}
ir_emit_cmp_int_common(ctx, type, insn, op1_reg, op1, op2_reg, op2);
_ir_emit_setcc_int(ctx, op, def_reg);
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
}
static void ir_emit_test_int_common(ir_ctx *ctx, ir_ref ref, ir_op op)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_insn *binop_insn = &ctx->ir_base[ref];
ir_type type = binop_insn->type;
ir_ref op1 = binop_insn->op1;
ir_ref op2 = binop_insn->op2;
ir_reg op1_reg = ctx->regs[ref][1];
ir_reg op2_reg = ctx->regs[ref][2];
IR_ASSERT(binop_insn->op == IR_AND);
if (op1_reg != IR_REG_NONE && ctx->rules[op1] != IR_SKIP_MEM) {
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
if (op2_reg != IR_REG_NONE && ctx->rules[op2] != IR_SKIP_MEM) {
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
if (op1 != op2) {
ir_emit_load(ctx, type, op2_reg, op2);
}
}
| ASM_REG_REG_OP test, type, op1_reg, op2_reg
} else if (IR_IS_CONST_REF(op2)) {
ir_insn *val_insn = &ctx->ir_base[op2];
int32_t val;
IR_ASSERT(IR_IS_32BIT(val_insn->type, val_insn->val));
val = val_insn->val.i32;
if ((op == IR_EQ || op == IR_NE) && val == 0xff && (sizeof(void*) == 8 || op1_reg <= IR_REG_R3)) {
| test Rb(op1_reg), Rb(op1_reg)
} else if ((op == IR_EQ || op == IR_NE) && val == 0xff00 && op1_reg <= IR_REG_R3) {
if (op1_reg == IR_REG_RAX) {
| test ah, ah
} else if (op1_reg == IR_REG_RBX) {
| test bh, bh
} else if (op1_reg == IR_REG_RCX) {
| test ch, ch
} else if (op1_reg == IR_REG_RDX) {
| test dh, dh
} else {
IR_ASSERT(0);
}
} else if ((op == IR_EQ || op == IR_NE) && val == 0xffff) {
| test Rw(op1_reg), Rw(op1_reg)
} else if ((op == IR_EQ || op == IR_NE) && val == 0xffffffff) {
| test Rd(op1_reg), Rd(op1_reg)
} else {
| ASM_REG_IMM_OP test, type, op1_reg, val
}
} else {
int32_t offset = 0;
if (ctx->rules[op2] == IR_SKIP_MEM) {
IR_ASSERT(op2_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, op2, &op2_reg);
} else {
offset = ir_ref_spill_slot(ctx, op2, &op2_reg);
}
| ASM_REG_MEM_OP test, type, op1_reg, [Ra(op2_reg)+offset]
}
} else if (IR_IS_CONST_REF(op1)) {
IR_ASSERT(0);
} else {
int32_t offset = 0;
if (ctx->rules[op1] == IR_SKIP_MEM) {
offset = ir_fuse_load(ctx, op1, &op1_reg);
} else {
offset = ir_ref_spill_slot(ctx, op1, &op1_reg);
}
if (op2_reg != IR_REG_NONE) {
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
if (op1 != op2) {
ir_emit_load(ctx, type, op2_reg, op2);
}
}
if (op1_reg == IR_REG_NONE) {
| ASM_MEM_REG_OP test, type, [offset], op2_reg
} else {
| ASM_MEM_REG_OP test, type, [Ra(op1_reg)+offset], op2_reg
}
} else {
IR_ASSERT(!IR_IS_CONST_REF(op1));
IR_ASSERT(IR_IS_CONST_REF(op2));
IR_ASSERT(IR_IS_32BIT(ctx->ir_base[op2].type, ctx->ir_base[op2].val));
if (op1_reg == IR_REG_NONE) {
| ASM_MEM_IMM_OP test, type, [offset], ctx->ir_base[op2].val.i32
} else {
| ASM_MEM_IMM_OP test, type, [Ra(op1_reg)+offset], ctx->ir_base[op2].val.i32
}
}
}
}
static void ir_emit_test_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
IR_ASSERT(def_reg != IR_REG_NONE);
ir_emit_test_int_common(ctx, insn->op1, insn->op);
_ir_emit_setcc_int(ctx, insn->op, def_reg);
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
}
static void ir_emit_setcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
IR_ASSERT(def_reg != IR_REG_NONE);
_ir_emit_setcc_int(ctx, insn->op, def_reg);
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
}
static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = ctx->ir_base[cmp_insn->op1].type;
ir_op op = cmp_insn->op;
ir_ref op1, op2;
ir_reg op1_reg, op2_reg;
op1 = cmp_insn->op1;
op2 = cmp_insn->op2;
op1_reg = ctx->regs[cmp_ref][1];
op2_reg = ctx->regs[cmp_ref][2];
if (op1_reg == IR_REG_NONE && op2_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) {
ir_ref tmp;
ir_reg tmp_reg;
tmp = op1;
op1 = op2;
op2 = tmp;
tmp_reg = op1_reg;
op1_reg = op2_reg;
op2_reg = tmp_reg;
}
IR_ASSERT(op1_reg != IR_REG_NONE);
if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
if (op2_reg != IR_REG_NONE && ctx->rules[op2] != IR_SKIP_MEM) {
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
if (op1 != op2) {
ir_emit_load(ctx, type, op2_reg, op2);
}
}
| ASM_FP_REG_REG_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, op2_reg
} else if (IR_IS_CONST_REF(op2)) {
ir_insn *val_insn = &ctx->ir_base[op2];
int label = ctx->cfg_blocks_count - op2;
val_insn->const_flags |= IR_CONST_EMIT;
| ASM_FP_REG_MEM_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, [=>label]
} else {
int32_t offset = 0;
if (ctx->rules[op2] == IR_SKIP_MEM) {
IR_ASSERT(op2_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, op2, &op2_reg);
} else {
offset = ir_ref_spill_slot(ctx, op2, &op2_reg);
}
| ASM_FP_REG_MEM_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, [Ra(op2_reg)+offset]
}
return op;
}
static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_op op = ir_emit_cmp_fp_common(ctx, def, insn);
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg tmp_reg = ctx->regs[def][3];
IR_ASSERT(def_reg != IR_REG_NONE);
switch (op) {
case IR_EQ:
| setnp Rb(def_reg)
| mov Rd(tmp_reg), 0
| cmovne Rd(def_reg), Rd(tmp_reg)
break;
case IR_NE:
| setp Rb(def_reg)
| mov Rd(tmp_reg), 1
| cmovne Rd(def_reg), Rd(tmp_reg)
break;
case IR_LT:
| setnp Rb(def_reg)
| mov Rd(tmp_reg), 0
| cmovae Rd(def_reg), Rd(tmp_reg)
break;
case IR_GE:
| setae Rb(def_reg)
break;
case IR_LE:
| setnp Rb(def_reg)
| mov Rd(tmp_reg), 0
| cmova Rd(def_reg), Rd(tmp_reg)
break;
case IR_GT:
| seta Rb(def_reg)
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
}
static void ir_emit_jmp_true(ir_ctx *ctx, int b, ir_ref def)
{
int true_block, false_block, next_block;
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block);
if (true_block != next_block) {
| jmp =>true_block
}
}
static void ir_emit_jmp_false(ir_ctx *ctx, int b, ir_ref def)
{
int true_block, false_block, next_block;
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block);
if (false_block != next_block) {
| jmp =>false_block
}
}
static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *insn, bool int_cmp)
{
int true_block, false_block, next_block;
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
bool swap = 0;
ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block);
if (true_block == next_block) {
/* swap to avoid unconditional JMP */
op ^= 1; // reverse
true_block = false_block;
false_block = 0;
swap = 1;
} else if (false_block == next_block) {
false_block = 0;
}
if (int_cmp) {
switch (op) {
case IR_EQ:
| je =>true_block
break;
case IR_NE:
| jne =>true_block
break;
case IR_LT:
| jl =>true_block
break;
case IR_GE:
| jge =>true_block
break;
case IR_LE:
| jle =>true_block
break;
case IR_GT:
| jg =>true_block
break;
case IR_ULT:
| jb =>true_block
break;
case IR_UGE:
| jae =>true_block
break;
case IR_ULE:
| jbe =>true_block
break;
case IR_UGT:
| ja =>true_block
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
} else {
switch (op) {
case IR_EQ:
if (!false_block) {
| jp >1
| je =>true_block
|1:
} else {
| jp =>false_block
| je =>true_block
}
break;
case IR_NE:
| jne =>true_block
| jp =>true_block
break;
case IR_LT:
if (swap) {
| jb =>true_block
} else if (!false_block) {
| jp >1
| jb =>true_block
|1:
} else {
| jp =>false_block
| jb =>true_block
}
break;
case IR_GE:
if (swap) {
| jp =>true_block
}
| jae =>true_block
break;
case IR_LE:
if (swap) {
| jbe =>true_block
} else if (!false_block) {
| jp >1
| jbe =>true_block
|1:
} else {
| jp =>false_block
| jbe =>true_block
}
break;
case IR_GT:
if (swap) {
| jp =>true_block
}
| ja =>true_block
break;
// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break;
// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break;
// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break;
// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
}
if (false_block) {
| jmp =>false_block
}
}
static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, int b, ir_ref def, ir_insn *insn)
{
ir_insn *cmp_insn = &ctx->ir_base[insn->op2];
ir_op op = cmp_insn->op;
ir_type type = ctx->ir_base[cmp_insn->op1].type;
ir_ref op1 = cmp_insn->op1;
ir_ref op2 = cmp_insn->op2;
ir_reg op1_reg = ctx->regs[insn->op2][1];
ir_reg op2_reg = ctx->regs[insn->op2][2];
if (op1_reg != IR_REG_NONE && ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1))) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
if (op2_reg != IR_REG_NONE && ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2))) {
op2_reg &= ~IR_REG_SPILL_LOAD;
if (op1 != op2) {
ir_emit_load(ctx, type, op2_reg, op2);
}
}
if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) {
if (op == IR_ULT) {
/* always false */
ir_emit_jmp_false(ctx, b, def);
return;
} else if (op == IR_UGE) {
/* always true */
ir_emit_jmp_true(ctx, b, def);
return;
} else if (op == IR_ULE) {
op = IR_EQ;
} else if (op == IR_UGT) {
op = IR_NE;
}
}
bool same_comparison = 0;
ir_insn *prev_insn = &ctx->ir_base[insn->op1];
if (prev_insn->op == IR_IF_TRUE || prev_insn->op == IR_IF_FALSE) {
if (ctx->rules[prev_insn->op1] == IR_CMP_AND_BRANCH_INT) {
prev_insn = &ctx->ir_base[prev_insn->op1];
prev_insn = &ctx->ir_base[prev_insn->op2];
if (prev_insn->op1 == cmp_insn->op1 && prev_insn->op2 == cmp_insn->op2) {
same_comparison = true;
}
}
}
if (!same_comparison) {
ir_emit_cmp_int_common(ctx, type, cmp_insn, op1_reg, op1, op2_reg, op2);
}
ir_emit_jcc(ctx, op, b, def, insn, 1);
}
static void ir_emit_test_and_branch_int(ir_ctx *ctx, int b, ir_ref def, ir_insn *insn)
{
ir_emit_test_int_common(ctx, insn->op2, IR_NE);
ir_emit_jcc(ctx, IR_NE, b, def, insn, 1);
}
static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, int b, ir_ref def, ir_insn *insn)
{
ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]);
ir_emit_jcc(ctx, op, b, def, insn, 0);
}
static void ir_emit_if_int(ir_ctx *ctx, int b, ir_ref def, ir_insn *insn)
{
ir_type type = ctx->ir_base[insn->op2].type;
ir_reg op2_reg = ctx->regs[def][2];
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
if (op2_reg != IR_REG_NONE && ctx->rules[insn->op2] != IR_SKIP_MEM) {
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op2_reg, insn->op2);
}
| ASM_REG_REG_OP test, type, op2_reg, op2_reg
} else if (IR_IS_CONST_REF(insn->op2)) {
IR_ASSERT(0);
} else {
int32_t offset = 0;
if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
offset = ir_fuse_load(ctx, insn->op2, &op2_reg);
} else {
offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg);
}
if (op2_reg == IR_REG_NONE) {
| ASM_MEM_IMM_OP cmp, type, [offset], 0
} else {
| ASM_MEM_IMM_OP cmp, type, [Ra(op2_reg)+offset], 0
}
}
ir_emit_jcc(ctx, IR_NE, b, def, insn, 1);
}
static void ir_emit_return_void(ir_ctx *ctx)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_emit_epilogue(ctx);
#if !defined(_WIN64) && !defined(__x86_64__)
if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC) && data->param_stack_size) {
| ret data->param_stack_size
return;
}
#endif
| ret
}
static void ir_emit_return_int(ir_ctx *ctx, ir_reg ref, ir_insn *insn)
{
ir_reg op2_reg = ctx->regs[ref][2];
if (op2_reg != IR_REG_INT_RET1) {
ir_type type = ctx->ir_base[insn->op2].type;
if (op2_reg != IR_REG_NONE && !(op2_reg & IR_REG_SPILL_LOAD)) {
ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg);
} else {
ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2);
}
}
ir_emit_return_void(ctx);
}
static void ir_emit_return_fp(ir_ctx *ctx, ir_reg ref, ir_insn *insn)
{
ir_reg op2_reg = ctx->regs[ref][2];
ir_type type = ctx->ir_base[insn->op2].type;
#ifdef IR_REG_FP_RET1
if (op2_reg != IR_REG_FP_RET1) {
if (op2_reg != IR_REG_NONE && !(op2_reg & IR_REG_SPILL_LOAD)) {
ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg);
} else {
ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2);
}
}
#else
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
if (op2_reg == IR_REG_NONE || (op2_reg & IR_REG_SPILL_LOAD)) {
ir_reg fp;
int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &fp);
if (type == IR_DOUBLE) {
| fld qword [Ra(fp)+offset]
} else {
IR_ASSERT(type == IR_FLOAT);
| fld dword [Ra(fp)+offset]
}
} else {
int32_t offset = (type == IR_FLOAT) ? data->float_ret_slot : data->double_ret_slot;
ir_reg fp;
IR_ASSERT(offset != -1);
offset = IR_SPILL_POS_TO_OFFSET(offset);
fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
ir_emit_store_mem_fp(ctx, type, fp, offset, op2_reg);
if (type == IR_DOUBLE) {
| fld qword [Ra(fp)+offset]
} else {
IR_ASSERT(type == IR_FLOAT);
| fld dword [Ra(fp)+offset]
}
}
#endif
ir_emit_return_void(ctx);
}
static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_type dst_type = insn->type;
ir_type src_type = ctx->ir_base[insn->op1].type;
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
IR_ASSERT(IR_IS_TYPE_INT(src_type));
IR_ASSERT(IR_IS_TYPE_INT(dst_type));
IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]);
IR_ASSERT(def_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && ctx->rules[insn->op1] != IR_SKIP_MEM) {
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, src_type, op1_reg, insn->op1);
}
if (ir_type_size[src_type] == 1) {
if (ir_type_size[dst_type] == 2) {
| movsx Rw(def_reg), Rb(op1_reg)
} else if (ir_type_size[dst_type] == 4) {
| movsx Rd(def_reg), Rb(op1_reg)
} else {
IR_ASSERT(ir_type_size[dst_type] == 8);
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| movsx Rq(def_reg), Rb(op1_reg)
|.endif
}
} else if (ir_type_size[src_type] == 2) {
if (ir_type_size[dst_type] == 4) {
| movsx Rd(def_reg), Rw(op1_reg)
} else {
IR_ASSERT(ir_type_size[dst_type] == 8);
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| movsx Rq(def_reg), Rw(op1_reg)
|.endif
}
} else {
IR_ASSERT(ir_type_size[src_type] == 4);
IR_ASSERT(ir_type_size[dst_type] == 8);
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| movsxd Rq(def_reg), Rd(op1_reg)
|.endif
}
} else if (IR_IS_CONST_REF(insn->op1)) {
IR_ASSERT(0);
} else {
int32_t offset = 0;
if (ctx->rules[insn->op1] == IR_SKIP_MEM) {
IR_ASSERT(op1_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, insn->op1, &op1_reg);
} else {
offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg);
}
if (ir_type_size[src_type] == 1) {
if (ir_type_size[dst_type] == 2) {
| movsx Rw(def_reg), byte [Ra(op1_reg)+offset]
} else if (ir_type_size[dst_type] == 4) {
| movsx Rd(def_reg), byte [Ra(op1_reg)+offset]
} else {
IR_ASSERT(ir_type_size[dst_type] == 8);
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| movsx Rq(def_reg), byte [Ra(op1_reg)+offset]
|.endif
}
} else if (ir_type_size[src_type] == 2) {
if (ir_type_size[dst_type] == 4) {
| movsx Rd(def_reg), word [Ra(op1_reg)+offset]
} else {
IR_ASSERT(ir_type_size[dst_type] == 8);
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| movsx Rq(def_reg), word [Ra(op1_reg)+offset]
|.endif
}
} else {
IR_ASSERT(ir_type_size[src_type] == 4);
IR_ASSERT(ir_type_size[dst_type] == 8);
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| movsxd Rq(def_reg), dword [Ra(op1_reg)+offset]
|.endif
}
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, dst_type, def, def_reg);
}
}
static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_type dst_type = insn->type;
ir_type src_type = ctx->ir_base[insn->op1].type;
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
IR_ASSERT(IR_IS_TYPE_INT(src_type));
IR_ASSERT(IR_IS_TYPE_INT(dst_type));
IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]);
IR_ASSERT(def_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && ctx->rules[insn->op1] != IR_SKIP_MEM) {
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, src_type, op1_reg, insn->op1);
}
if (ir_type_size[src_type] == 1) {
if (ir_type_size[dst_type] == 2) {
| movzx Rw(def_reg), Rb(op1_reg)
} else if (ir_type_size[dst_type] == 4) {
| movzx Rd(def_reg), Rb(op1_reg)
} else {
IR_ASSERT(ir_type_size[dst_type] == 8);
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| movzx Rq(def_reg), Rb(op1_reg)
|.endif
}
} else if (ir_type_size[src_type] == 2) {
if (ir_type_size[dst_type] == 4) {
| movzx Rd(def_reg), Rw(op1_reg)
} else {
IR_ASSERT(ir_type_size[dst_type] == 8);
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| movzx Rq(def_reg), Rw(op1_reg)
|.endif
}
} else {
IR_ASSERT(ir_type_size[src_type] == 4);
IR_ASSERT(ir_type_size[dst_type] == 8);
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| mov Rd(def_reg), Rd(op1_reg)
|.endif
}
} else if (IR_IS_CONST_REF(insn->op1)) {
IR_ASSERT(0);
} else {
int32_t offset = 0;
if (ctx->rules[insn->op1] == IR_SKIP_MEM) {
IR_ASSERT(op1_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, insn->op1, &op1_reg);
} else {
offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg);
}
if (ir_type_size[src_type] == 1) {
if (ir_type_size[dst_type] == 2) {
| movzx Rw(def_reg), byte [Ra(op1_reg)+offset]
} else if (ir_type_size[dst_type] == 4) {
| movzx Rd(def_reg), byte [Ra(op1_reg)+offset]
} else {
IR_ASSERT(ir_type_size[dst_type] == 8);
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| movzx Rq(def_reg), byte [Ra(op1_reg)+offset]
|.endif
}
} else if (ir_type_size[src_type] == 2) {
if (ir_type_size[dst_type] == 4) {
| movzx Rd(def_reg), word [Ra(op1_reg)+offset]
} else {
IR_ASSERT(ir_type_size[dst_type] == 8);
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| movzx Rq(def_reg), word [Ra(op1_reg)+offset]
|.endif
}
} else {
IR_ASSERT(ir_type_size[src_type] == 4);
IR_ASSERT(ir_type_size[dst_type] == 8);
|.if X64
| mov Rd(def_reg), dword [Ra(op1_reg)+offset]
|.endif
}
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, dst_type, def, def_reg);
}
}
static void ir_emit_trunc(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_type dst_type = insn->type;
ir_type src_type = ctx->ir_base[insn->op1].type;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
IR_ASSERT(IR_IS_TYPE_INT(src_type));
IR_ASSERT(IR_IS_TYPE_INT(dst_type));
IR_ASSERT(ir_type_size[dst_type] < ir_type_size[src_type]);
IR_ASSERT(def_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, src_type, op1_reg, insn->op1);
}
if (op1_reg != IR_REG_NONE) {
if (op1_reg != def_reg) {
ir_emit_mov(ctx, dst_type, def_reg, op1_reg);
}
} else {
ir_emit_load(ctx, dst_type, def_reg, insn->op1);
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, dst_type, def, def_reg);
}
}
static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_type dst_type = insn->type;
ir_type src_type = ctx->ir_base[insn->op1].type;
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
int32_t offset;
IR_ASSERT(ir_type_size[dst_type] == ir_type_size[src_type]);
IR_ASSERT(def_reg != IR_REG_NONE);
if (IR_IS_TYPE_INT(src_type) && IR_IS_TYPE_INT(dst_type)) {
if (ctx->rules[insn->op1] == IR_SKIP_MEM) {
IR_ASSERT(op1_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, insn->op1, &op1_reg);
ir_emit_load_mem_int(ctx, dst_type, def_reg, op1_reg, offset);
} else if (op1_reg != IR_REG_NONE) {
if (op1_reg != def_reg) {
ir_emit_mov(ctx, dst_type, def_reg, op1_reg);
}
} else {
ir_emit_load(ctx, dst_type, def_reg, insn->op1);
}
} else if (IR_IS_TYPE_FP(src_type) && IR_IS_TYPE_FP(dst_type)) {
if (ctx->rules[insn->op1] == IR_SKIP_MEM) {
IR_ASSERT(op1_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, insn->op1, &op1_reg);
ir_emit_load_mem_fp(ctx, dst_type, def_reg, op1_reg, offset);
} else if (op1_reg != IR_REG_NONE) {
if (op1_reg != def_reg) {
ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg);
}
} else {
ir_emit_load(ctx, dst_type, def_reg, insn->op1);
}
} else if (IR_IS_TYPE_FP(src_type)) {
IR_ASSERT(IR_IS_TYPE_INT(dst_type));
if (op1_reg != IR_REG_NONE && ctx->rules[insn->op1] != IR_SKIP_MEM) {
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, src_type, op1_reg, insn->op1);
}
if (src_type == IR_DOUBLE) {
IR_ASSERT(sizeof(void*) == 8);
|.if X64
if (ctx->flags & IR_AVX) {
| vmovd Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST)
} else {
| movd Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST)
}
|.endif
} else {
IR_ASSERT(src_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vmovd Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST)
} else {
| movd Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST)
}
}
} else if (IR_IS_CONST_REF(insn->op1)) {
ir_insn *_insn = &ctx->ir_base[insn->op1];
if (src_type == IR_DOUBLE) {
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| mov64 Rq(def_reg), _insn->val.i64
|.endif
} else {
IR_ASSERT(src_type == IR_FLOAT);
| mov Rd(def_reg), _insn->val.i32
}
} else {
int32_t offset = 0;
if (ctx->rules[insn->op1] == IR_SKIP_MEM) {
IR_ASSERT(op1_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, insn->op1, &op1_reg);
} else {
offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg);
}
if (src_type == IR_DOUBLE) {
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| mov Rq(def_reg), qword [Ra(op1_reg)+offset]
|.endif
} else {
IR_ASSERT(src_type == IR_FLOAT);
| mov Rd(def_reg), dword [Ra(op1_reg)+offset]
}
}
} else if (IR_IS_TYPE_FP(dst_type)) {
IR_ASSERT(IR_IS_TYPE_INT(src_type));
if (op1_reg != IR_REG_NONE && ctx->rules[insn->op1] != IR_SKIP_MEM) {
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, src_type, op1_reg, insn->op1);
}
if (dst_type == IR_DOUBLE) {
IR_ASSERT(sizeof(void*) == 8);
|.if X64
if (ctx->flags & IR_AVX) {
| vmovd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg)
} else {
| movd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg)
}
|.endif
} else {
IR_ASSERT(dst_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vmovd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg)
} else {
| movd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg)
}
}
} else if (IR_IS_CONST_REF(insn->op1)) {
ir_insn *val_insn = &ctx->ir_base[insn->op1];
int label = ctx->cfg_blocks_count - insn->op1;
val_insn->const_flags |= IR_CONST_EMIT;
| ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, dst_type, def_reg, [=>label]
} else {
int32_t offset = 0;
if (ctx->rules[insn->op1] == IR_SKIP_MEM) {
IR_ASSERT(op1_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, insn->op1, &op1_reg);
} else {
offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg);
}
| ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, dst_type, def_reg, [Ra(op1_reg)+offset]
}
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, dst_type, def, def_reg);
}
}
static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_type dst_type = insn->type;
ir_type src_type = ctx->ir_base[insn->op1].type;
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
bool src64 = 0;
IR_ASSERT(IR_IS_TYPE_INT(src_type));
IR_ASSERT(IR_IS_TYPE_FP(dst_type));
IR_ASSERT(def_reg != IR_REG_NONE);
if (IR_IS_TYPE_SIGNED(src_type) ? ir_type_size[src_type] == 8 : ir_type_size[src_type] >= 4) {
// TODO: we might need to perform sign/zero integer extension to 32/64 bit integer
src64 = 1;
}
if (op1_reg != IR_REG_NONE && ctx->rules[insn->op1] != IR_SKIP_MEM) {
if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op1)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, src_type, op1_reg, insn->op1);
}
if (!src64) {
if (dst_type == IR_DOUBLE) {
if (ctx->flags & IR_AVX) {
| vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg)
} else {
| cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg)
}
} else {
IR_ASSERT(dst_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg)
} else {
| cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg)
}
}
} else {
IR_ASSERT(sizeof(void*) == 8);
|.if X64
if (dst_type == IR_DOUBLE) {
if (ctx->flags & IR_AVX) {
| vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg)
} else {
| cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg)
}
} else {
IR_ASSERT(dst_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg)
} else {
| cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg)
}
}
|.endif
}
} else {
int32_t offset = 0;
if (ctx->rules[insn->op1] == IR_SKIP_MEM) {
IR_ASSERT(op1_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, insn->op1, &op1_reg);
} else {
offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg);
}
if (!src64) {
if (dst_type == IR_DOUBLE) {
if (ctx->flags & IR_AVX) {
| vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset]
} else {
| cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset]
}
} else {
IR_ASSERT(dst_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset]
} else {
| cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset]
}
}
} else {
IR_ASSERT(sizeof(void*) == 8);
|.if X64
if (dst_type == IR_DOUBLE) {
if (ctx->flags & IR_AVX) {
| vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset]
} else {
| cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset]
}
} else {
IR_ASSERT(dst_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset]
} else {
| cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset]
}
}
|.endif
}
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, dst_type, def, def_reg);
}
}
static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_type dst_type = insn->type;
ir_type src_type = ctx->ir_base[insn->op1].type;
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
bool dst64 = 0;
IR_ASSERT(IR_IS_TYPE_FP(src_type));
IR_ASSERT(IR_IS_TYPE_INT(dst_type));
IR_ASSERT(def_reg != IR_REG_NONE);
if (IR_IS_TYPE_SIGNED(dst_type) ? ir_type_size[dst_type] == 8 : ir_type_size[dst_type] >= 4) {
// TODO: we might need to perform truncation from 32/64 bit integer
dst64 = 1;
}
if (op1_reg != IR_REG_NONE && ctx->rules[insn->op1] != IR_SKIP_MEM) {
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, src_type, op1_reg, insn->op1);
}
if (!dst64) {
if (src_type == IR_DOUBLE) {
if (ctx->flags & IR_AVX) {
| vcvtsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST)
} else {
| cvtsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST)
}
} else {
IR_ASSERT(src_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vcvtss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST)
} else {
| cvtss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST)
}
}
} else {
IR_ASSERT(sizeof(void*) == 8);
|.if X64
if (src_type == IR_DOUBLE) {
if (ctx->flags & IR_AVX) {
| vcvtsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST)
} else {
| cvtsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST)
}
} else {
IR_ASSERT(src_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vcvtss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST)
} else {
| cvtss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST)
}
}
|.endif
}
} else if (IR_IS_CONST_REF(insn->op1)) {
ir_insn *_insn = &ctx->ir_base[insn->op1];
int label = ctx->cfg_blocks_count - insn->op1;
_insn->const_flags |= IR_CONST_EMIT;
if (!dst64) {
if (src_type == IR_DOUBLE) {
if (ctx->flags & IR_AVX) {
| vcvtsd2si Rd(def_reg), qword [=>label]
} else {
| cvtsd2si Rd(def_reg), qword [=>label]
}
} else {
IR_ASSERT(src_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vcvtss2si Rd(def_reg), dword [=>label]
} else {
| cvtss2si Rd(def_reg), dword [=>label]
}
}
} else {
IR_ASSERT(sizeof(void*) == 8);
|.if X64
if (src_type == IR_DOUBLE) {
if (ctx->flags & IR_AVX) {
| vcvtsd2si Rq(def_reg), qword [=>label]
} else {
| cvtsd2si Rq(def_reg), qword [=>label]
}
} else {
IR_ASSERT(src_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vcvtss2si Rq(def_reg), dword [=>label]
} else {
| cvtss2si Rq(def_reg), dword [=>label]
}
}
|.endif
}
} else {
int32_t offset = 0;
if (ctx->rules[insn->op1] == IR_SKIP_MEM) {
IR_ASSERT(op1_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, insn->op1, &op1_reg);
} else {
offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg);
}
if (!dst64) {
if (src_type == IR_DOUBLE) {
if (ctx->flags & IR_AVX) {
| vcvtsd2si Rd(def_reg), qword [Ra(op1_reg)+offset]
} else {
| cvtsd2si Rd(def_reg), qword [Ra(op1_reg)+offset]
}
} else {
IR_ASSERT(src_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vcvtss2si Rd(def_reg), dword [Ra(op1_reg)+offset]
} else {
| cvtss2si Rd(def_reg), dword [Ra(op1_reg)+offset]
}
}
} else {
IR_ASSERT(sizeof(void*) == 8);
|.if X64
if (src_type == IR_DOUBLE) {
if (ctx->flags & IR_AVX) {
| vcvtsd2si Rq(def_reg), qword [Ra(op1_reg)+offset]
} else {
| cvtsd2si Rq(def_reg), qword [Ra(op1_reg)+offset]
}
} else {
IR_ASSERT(src_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vcvtss2si Rq(def_reg), dword [Ra(op1_reg)+offset]
} else {
| cvtss2si Rq(def_reg), dword [Ra(op1_reg)+offset]
}
}
|.endif
}
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, dst_type, def, def_reg);
}
}
static void ir_emit_fp2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_type dst_type = insn->type;
ir_type src_type = ctx->ir_base[insn->op1].type;
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
IR_ASSERT(IR_IS_TYPE_FP(src_type));
IR_ASSERT(IR_IS_TYPE_FP(dst_type));
IR_ASSERT(def_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && ctx->rules[insn->op1] != IR_SKIP_MEM) {
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, src_type, op1_reg, insn->op1);
}
if (src_type == dst_type) {
if (op1_reg != def_reg) {
ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg);
}
} else if (src_type == IR_DOUBLE) {
if (ctx->flags & IR_AVX) {
| vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST)
} else {
| cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST)
}
} else {
IR_ASSERT(src_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST)
} else {
| cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST)
}
}
} else if (IR_IS_CONST_REF(insn->op1)) {
ir_insn *_insn = &ctx->ir_base[insn->op1];
int label = ctx->cfg_blocks_count - insn->op1;
_insn->const_flags |= IR_CONST_EMIT;
if (src_type == IR_DOUBLE) {
if (ctx->flags & IR_AVX) {
| vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [=>label]
} else {
| cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), qword [=>label]
}
} else {
IR_ASSERT(src_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [=>label]
} else {
| cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), dword [=>label]
}
}
} else {
int32_t offset = 0;
if (ctx->rules[insn->op1] == IR_SKIP_MEM) {
IR_ASSERT(op1_reg != IR_REG_NONE);
offset = ir_fuse_load(ctx, insn->op1, &op1_reg);
} else {
offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg);
}
if (src_type == IR_DOUBLE) {
if (ctx->flags & IR_AVX) {
| vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset]
} else {
| cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset]
}
} else {
IR_ASSERT(src_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset]
} else {
| cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset]
}
}
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, dst_type, def, def_reg);
}
}
static void ir_emit_copy_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_ref type = insn->type;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, insn->op1);
}
if (def_reg == op1_reg) {
/* same reg */
} else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) {
ir_emit_mov(ctx, type, def_reg, op1_reg);
} else if (def_reg != IR_REG_NONE) {
ir_emit_load(ctx, type, def_reg, insn->op1);
} else if (op1_reg != IR_REG_NONE) {
ir_emit_store(ctx, type, def, op1_reg);
} else {
IR_ASSERT(0);
}
if (def_reg != IR_REG_NONE && (ctx->regs[def][0] & IR_REG_SPILL_STORE)) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_copy_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_type type = insn->type;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op1_reg = ctx->regs[def][1];
IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE);
if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, insn->op1);
}
if (def_reg == op1_reg) {
/* same reg */
} else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) {
ir_emit_fp_mov(ctx, type, def_reg, op1_reg);
} else if (def_reg != IR_REG_NONE) {
ir_emit_load(ctx, type, def_reg, insn->op1);
} else if (op1_reg != IR_REG_NONE) {
ir_emit_store(ctx, type, def, op1_reg);
} else {
IR_ASSERT(0);
}
if (def_reg != IR_REG_NONE && (ctx->regs[def][0] & IR_REG_SPILL_STORE)) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_ref type = insn->type;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
int32_t offset;
ir_reg fp;
IR_ASSERT(def_reg != IR_REG_NONE);
offset = ir_ref_spill_slot(ctx, insn->op1, &fp);
| lea Ra(def_reg), aword [Ra(fp)+offset]
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_ref type = insn->type;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
if (def_reg == IR_REG_NONE && ir_is_same_mem(ctx, insn->op2, def)) {
return; // fake load
}
IR_ASSERT(def_reg != IR_REG_NONE);
ir_emit_load(ctx, type, def_reg, insn->op2);
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_vstore_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_insn *val_insn = &ctx->ir_base[insn->op3];
ir_ref type = val_insn->type;
ir_reg op3_reg = ctx->regs[ref][3];
int32_t offset;
ir_reg fp;
if ((op3_reg == IR_REG_NONE || (op3_reg & IR_REG_SPILL_LOAD))
&& !IR_IS_CONST_REF(insn->op3) && ir_is_same_mem(ctx, insn->op3, insn->op2)) {
return; // fake store
}
if (IR_IS_CONST_REF(insn->op3) && IR_IS_32BIT(type, val_insn->val)) {
offset = ir_ref_spill_slot(ctx, insn->op2, &fp);
| ASM_MEM_IMM_OP mov, type, [Ra(fp)+offset], val_insn->val.i32
} else {
IR_ASSERT(op3_reg != IR_REG_NONE);
if ((op3_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op3)) {
op3_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op3_reg, insn->op3);
}
ir_emit_store(ctx, type, insn->op2, op3_reg);
}
}
static void ir_emit_vstore_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
{
ir_ref type = ctx->ir_base[insn->op3].type;
ir_reg op3_reg = ctx->regs[ref][3];
if ((op3_reg == IR_REG_NONE || (op3_reg & IR_REG_SPILL_LOAD))
&& !IR_IS_CONST_REF(insn->op3) && ir_is_same_mem(ctx, insn->op3, insn->op2)) {
return; // fake store
}
IR_ASSERT(op3_reg != IR_REG_NONE);
if ((op3_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op3)) {
op3_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op3_reg, insn->op3);
}
ir_emit_store(ctx, type, insn->op2, op3_reg);
}
static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_ref type = insn->type;
ir_reg op2_reg = ctx->regs[def][2];
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
int32_t offset = 0;
if (ctx->use_lists[def].count == 1) {
/* dead load */
return;
}
IR_ASSERT(def_reg != IR_REG_NONE);
if (IR_IS_CONST_REF(insn->op2)) {
void *addr = (void*)ctx->ir_base[insn->op2].val.addr;
if (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr)) {
int32_t addr32 = (intptr_t)addr;
| ASM_REG_MEM_OP mov, type, def_reg, [addr32]
return;
}
}
if (op2_reg == IR_REG_NONE) {
op2_reg = def_reg;
}
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op2)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
offset = ir_fuse_addr(ctx, insn->op2, &op2_reg);
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_reg fp;
if (ir_ref_spill_slot(ctx, def, &fp) == offset && op2_reg == fp) {
return;
}
}
ir_emit_load_mem_int(ctx, type, def_reg, op2_reg, offset);
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_load_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_ref type = insn->type;
ir_reg op2_reg = ctx->regs[def][2];
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
int32_t offset = 0;
if (ctx->use_lists[def].count == 1) {
/* dead load */
return;
}
IR_ASSERT(def_reg != IR_REG_NONE);
if (IR_IS_CONST_REF(insn->op2)) {
void *addr = (void*)ctx->ir_base[insn->op2].val.addr;
if (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr)) {
int32_t addr32 = (intptr_t)addr;
| ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, def_reg, [addr32]
return;
}
}
IR_ASSERT(op2_reg != IR_REG_NONE);
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op2)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
offset = ir_fuse_addr(ctx, insn->op2, &op2_reg);
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_reg fp;
if (ir_ref_spill_slot(ctx, def, &fp) == offset && op2_reg == fp) {
return;
}
}
ir_emit_load_mem_fp(ctx, type, def_reg, op2_reg, offset);
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_store_int(ir_ctx *ctx, ir_reg ref, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_insn *val_insn = &ctx->ir_base[insn->op3];
ir_ref type = val_insn->type;
ir_reg op2_reg = ctx->regs[ref][2];
ir_reg op3_reg = ctx->regs[ref][3];
int32_t offset = 0;
if (IR_IS_CONST_REF(insn->op2)) {
void *addr = (void*)ctx->ir_base[insn->op2].val.addr;
if (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr)) {
int32_t addr32 = (intptr_t)addr;
if (IR_IS_CONST_REF(insn->op3) && IR_IS_32BIT(type, val_insn->val)) {
| ASM_MEM_IMM_OP mov, type, [addr32], val_insn->val.i32
} else {
IR_ASSERT(op3_reg != IR_REG_NONE);
if ((op3_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op3)) {
op3_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op3_reg, insn->op3);
}
| ASM_MEM_REG_OP mov, type, [addr32], op3_reg
}
return;
}
}
IR_ASSERT(op2_reg != IR_REG_NONE);
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op2)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
offset = ir_fuse_addr(ctx, insn->op2, &op2_reg);
}
if (IR_IS_CONST_REF(insn->op3) && IR_IS_32BIT(type, val_insn->val)) {
| ASM_MEM_IMM_OP mov, type, [Ra(op2_reg)+offset], val_insn->val.i32
} else {
IR_ASSERT(op3_reg != IR_REG_NONE);
if ((op3_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op3)) {
op3_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op3_reg, insn->op3);
}
ir_emit_store_mem_int(ctx, type, op2_reg, offset, op3_reg);
}
}
static void ir_emit_store_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_ref type = ctx->ir_base[insn->op3].type;
ir_reg op2_reg = ctx->regs[ref][2];
ir_reg op3_reg = ctx->regs[ref][3];
int32_t offset = 0;
IR_ASSERT(op3_reg != IR_REG_NONE);
if ((op3_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op3)) {
op3_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op3_reg, insn->op3);
}
if (IR_IS_CONST_REF(insn->op2)) {
void *addr = (void*)ctx->ir_base[insn->op2].val.addr;
if (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr)) {
int32_t addr32 = (intptr_t)addr;
| ASM_FP_MEM_REG_OP movss, movsd, vmovss, vmovsd, type, [addr32], op2_reg
return;
}
}
IR_ASSERT(op2_reg != IR_REG_NONE);
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op2)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
offset = ir_fuse_addr(ctx, insn->op2, &op2_reg);
}
ir_emit_store_mem_fp(ctx, type, op2_reg, offset, op3_reg);
}
static void ir_emit_rstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
{
ir_ref type = ctx->ir_base[insn->op2].type;
ir_reg op2_reg = ctx->regs[ref][2];
ir_reg dst_reg = insn->op3;
if (!IR_IS_CONST_REF(insn->op2) && ctx->rules[insn->op2] == IR_SKIP_MEM) {
int32_t offset = ir_fuse_load(ctx, insn->op2, &op2_reg);
if (IR_IS_TYPE_INT(type)) {
ir_emit_load_mem_int(ctx, type, dst_reg, op2_reg, offset);
} else {
ir_emit_load_mem_fp(ctx, type, dst_reg, op2_reg, offset);
}
} else if (op2_reg != IR_REG_NONE) {
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op2_reg, insn->op2);
}
if (op2_reg != dst_reg) {
if (IR_IS_TYPE_INT(type)) {
ir_emit_mov(ctx, type, dst_reg, op2_reg);
} else {
IR_ASSERT(IR_IS_TYPE_FP(type));
ir_emit_fp_mov(ctx, type, dst_reg, op2_reg);
}
}
} else {
ir_emit_load(ctx, type, dst_reg, insn->op2);
}
}
static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
if (IR_IS_CONST_REF(insn->op2)) {
ir_insn *val = &ctx->ir_base[insn->op2];
int32_t size = val->val.i32;
IR_ASSERT(IR_IS_TYPE_INT(val->type));
IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0);
IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64));
if (ctx->flags & IR_HAS_CALLS) {
/* Stack must be 16 byte aligned */
size = IR_ALIGNED_SIZE(size, 16);
} else {
size = IR_ALIGNED_SIZE(size, 8);
}
| ASM_REG_IMM_OP sub, IR_ADDR, IR_REG_RSP, size
} else {
int32_t alignment = (ctx->flags & IR_HAS_CALLS) ? 16 : 8;
ir_reg op2_reg = ctx->regs[def][2];
ir_type type = ctx->ir_base[insn->op2].type;
IR_ASSERT(def_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE && (op2_reg & IR_REG_SPILL_LOAD)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op2_reg, insn->op2);
}
if (def_reg != op2_reg) {
if (op2_reg != IR_REG_NONE) {
ir_emit_mov(ctx, type, def_reg, op2_reg);
} else {
ir_emit_load(ctx, type, def_reg, insn->op2);
}
}
| ASM_REG_IMM_OP add, IR_ADDR, def_reg, (alignment-1)
| ASM_REG_IMM_OP and, IR_ADDR, def_reg, ~(alignment-1)
| ASM_REG_REG_OP sub, IR_ADDR, IR_REG_RSP, def_reg
}
if (def_reg != IR_REG_NONE) {
| mov Ra(def_reg), Ra(IR_REG_RSP)
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
} else {
ir_emit_store(ctx, IR_ADDR, def, IR_REG_STACK_POINTER);
}
}
static void ir_emit_switch(ir_ctx *ctx, int b, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type;
ir_block *bb;
ir_insn *use_insn, *val;
uint32_t n, *p, use_block;
int i;
int label, default_label = 0;
int count = 0;
ir_val min, max;
int64_t offset;
ir_reg op2_reg, tmp_reg;
type = ctx->ir_base[insn->op2].type;
if (IR_IS_TYPE_SIGNED(type)) {
min.u64 = 0x7fffffffffffffff;
max.u64 = 0x8000000000000000;
} else {
min.u64 = 0xffffffffffffffff;
max.u64 = 0x0;
}
bb = &ctx->cfg_blocks[b];
p = &ctx->cfg_edges[bb->successors];
for (n = bb->successors_count; n != 0; p++, n--) {
use_block = *p;
use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start];
if (use_insn->op == IR_CASE_VAL) {
val = &ctx->ir_base[use_insn->op2];
if (IR_IS_TYPE_SIGNED(type)) {
IR_ASSERT(IR_IS_TYPE_SIGNED(val->type));
min.i64 = IR_MIN(min.i64, val->val.i64);
max.i64 = IR_MAX(max.i64, val->val.i64);
} else {
IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type));
min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64);
max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64);
}
count++;
} else if (use_insn->op == IR_CASE_DEFAULT) {
default_label = ir_skip_empty_target_blocks(ctx, use_block);
} else {
IR_ASSERT(0);
}
}
op2_reg = ctx->regs[def][2];
tmp_reg = ctx->regs[def][3];
IR_ASSERT(op2_reg != IR_REG_NONE);
IR_ASSERT(tmp_reg != IR_REG_NONE || sizeof(void*) != 8);
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op2_reg, insn->op2);
} else if (IR_IS_CONST_REF(insn->op2)) {
ir_emit_load(ctx, type, op2_reg, insn->op2);
}
/* Generate a table jmp or a seqence of calls */
if ((max.i64-min.i64) < count * 8) {
int *labels = ir_mem_malloc(sizeof(int) * (max.i64 - min.i64 + 1));
for (i = 0; i <= (max.i64 - min.i64); i++) {
labels[i] = default_label;
}
p = &ctx->cfg_edges[bb->successors];
for (n = bb->successors_count; n != 0; p++, n--) {
use_block = *p;
use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start];
if (use_insn->op == IR_CASE_VAL) {
val = &ctx->ir_base[use_insn->op2];
label = ir_skip_empty_target_blocks(ctx, use_block);
labels[val->val.i64 - min.i64] = label;
}
}
if (IR_IS_32BIT(type, max)) {
| ASM_REG_IMM_OP cmp, type, op2_reg, max.i32
} else {
IR_ASSERT(ir_type_size[type] == 8);
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| mov64 Rq(tmp_reg), max.i64
| cmp Rq(op2_reg), Rq(tmp_reg)
|.endif
}
if (IR_IS_TYPE_SIGNED(type)) {
| jg =>default_label
} else {
| ja =>default_label
}
if (IR_IS_32BIT(type, min)) {
offset = -min.i64 * sizeof(void*);
if (IR_IS_SIGNED_32BIT(offset)) {
| ASM_REG_IMM_OP cmp, type, op2_reg, min.i32
} else {
| ASM_REG_REG_OP sub, type, op2_reg, (int32_t)offset // TODO: reg clobbering
offset = 0;
}
} else {
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| mov64 Rq(tmp_reg), min.i64
| ASM_REG_REG_OP sub, type, op2_reg, tmp_reg // TODO: reg clobbering
offset = 0;
|.endif
}
if (IR_IS_TYPE_SIGNED(type)) {
| jl =>default_label
} else {
| jb =>default_label
}
if (sizeof(void*) == 8) {
|.if X64
switch (ir_type_size[type]) {
case 1:
if (IR_IS_TYPE_SIGNED(type)) {
| movsx Ra(op2_reg), Rb(op2_reg)
} else {
| movzx Ra(op2_reg), Rb(op2_reg)
}
break;
case 2:
if (IR_IS_TYPE_SIGNED(type)) {
| movsx Ra(op2_reg), Rw(op2_reg)
} else {
| movzx Ra(op2_reg), Rw(op2_reg)
}
break;
case 4:
if (IR_IS_TYPE_SIGNED(type)) {
| movsxd Ra(op2_reg), Rd(op2_reg)
} else {
| mov Rd(op2_reg), Rd(op2_reg)
}
break;
case 8:
break;
default:
IR_ASSERT(0);
}
| lea Ra(tmp_reg), aword [>1]
| jmp aword [Ra(tmp_reg)+Ra(op2_reg)*8+(int32_t)offset]
|.endif
} else {
|.if not X64
switch (ir_type_size[type]) {
case 1:
if (IR_IS_TYPE_SIGNED(type)) {
| movsx Ra(op2_reg), Rb(op2_reg)
} else {
| movzx Ra(op2_reg), Rb(op2_reg)
}
break;
case 2:
if (IR_IS_TYPE_SIGNED(type)) {
| movsx Ra(op2_reg), Rw(op2_reg)
} else {
| movzx Ra(op2_reg), Rw(op2_reg)
}
break;
case 4:
break;
default:
IR_ASSERT(0 && "Unsupported type size");
}
|// jmp aword [Ra(op2_reg)*4+(int32_t)offset+>1]
| lea Ra(op2_reg), aword [Ra(op2_reg)*4+(int32_t)offset] // TODO: reg clobbering
| jmp aword [Ra(op2_reg)+>1]
|.endif
}
|.jmp_table
if (!data->jmp_table_label) {
data->jmp_table_label = ctx->cfg_blocks_count + ctx->consts_count + 3;
|=>data->jmp_table_label:
}
|.align aword
|1:
for (i = 0; i <= (max.i64 - min.i64); i++) {
| .aword =>labels[i]
}
|.code
ir_mem_free(labels);
} else {
p = &ctx->cfg_edges[bb->successors];
for (n = bb->successors_count; n != 0; p++, n--) {
use_block = *p;
use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start];
if (use_insn->op == IR_CASE_VAL) {
val = &ctx->ir_base[use_insn->op2];
label = ir_skip_empty_target_blocks(ctx, use_block);
if (IR_IS_32BIT(type, val->val)) {
| ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i32
} else {
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| mov64 Ra(tmp_reg), val->val.i64
| ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg
|.endif
}
| je =>label
}
}
if (default_label) {
| jmp =>default_label
}
}
}
#include <dlfcn.h>
static void *ir_resolve_sym_name(const char *name)
{
void *handle = NULL;
void *addr;
#ifdef RTLD_DEFAULT
handle = RTLD_DEFAULT;
#endif
addr = dlsym(handle, name);
IR_ASSERT(addr != NULL);
return addr;
}
typedef struct _ir_copy {
ir_type type;
ir_reg from;
ir_reg to;
} ir_copy;
static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg)
{
int i;
int8_t *pred, *loc, *types;
ir_reg to, from_reg;
ir_type type;
ir_regset todo, ready;
loc = ir_mem_malloc(IR_REG_NUM * 3 * sizeof(int8_t));
pred = loc + IR_REG_NUM;
types = pred + IR_REG_NUM;
memset(loc, IR_REG_NONE, IR_REG_NUM * 2 * sizeof(int8_t));
todo = IR_REGSET_EMPTY;
ready = IR_REGSET_EMPTY;
for (i = 0; i < count; i++) {
from_reg = copies[i].from;
to = copies[i].to;
if (from_reg != to) {
loc[from_reg] = from_reg;
pred[to] = from_reg;
types[to] = copies[i].type;
IR_REGSET_INCL(todo, to);
}
}
IR_REGSET_FOREACH(todo, i) {
if (loc[i] == IR_REG_NONE) {
IR_REGSET_INCL(ready, i);
}
} IR_REGSET_FOREACH_END();
while (todo != IR_REGSET_EMPTY) {
ir_ref /*a, b,*/ c;
while (ready != IR_REGSET_EMPTY) {
to = IR_REGSET_FIRST(ready);
IR_REGSET_EXCL(ready, to);
from_reg = pred[to];
type = types[to];
c = loc[from_reg];
if (IR_IS_TYPE_INT(type)) {
ir_emit_mov(ctx, type, to, c);
} else {
ir_emit_fp_mov(ctx, type, to, c);
}
loc[from_reg] = to;
if (from_reg == c && pred[from_reg] != IR_REG_NONE) {
IR_REGSET_INCL(ready, from_reg);
}
}
to = IR_REGSET_FIRST(todo);
IR_REGSET_EXCL(todo, to);
from_reg = pred[to];
if (to != loc[from_reg]) {
type = types[to];
if (IR_IS_TYPE_INT(type)) {
IR_ASSERT(tmp_reg != IR_REG_NONE);
IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST);
ir_emit_mov(ctx, type, tmp_reg, to);
loc[to] = tmp_reg;
} else {
IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST);
ir_emit_fp_mov(ctx, type, tmp_fp_reg, to);
loc[to] = tmp_fp_reg;
}
IR_REGSET_INCL(ready, to);
}
}
ir_mem_free(loc);
return 1;
}
static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn)
{
int j, n;
ir_type type;
int int_param = 0;
int fp_param = 0;
int int_reg_params_count = IR_REG_INT_ARGS;
int fp_reg_params_count = IR_REG_FP_ARGS;
int32_t used_stack = 0;
#if !defined(_WIN64) && !defined(__x86_64__)
if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) {
int_reg_params_count = IR_REG_INT_FCARGS;
fp_reg_params_count = IR_REG_FP_FCARGS;
}
#endif
n = ir_input_edges_count(ctx, insn);
for (j = 3; j <= n; j++) {
type = ctx->ir_base[insn->ops[j]].type;
if (IR_IS_TYPE_INT(type)) {
if (int_param >= int_reg_params_count) {
used_stack += IR_MAX(sizeof(void*), ir_type_size[type]);
}
int_param++;
} else if (IR_IS_TYPE_FP(type)) {
if (fp_param >= fp_reg_params_count) {
used_stack += IR_MAX(sizeof(void*), ir_type_size[type]);
}
fp_param++;
} else {
IR_ASSERT(0);
}
}
return used_stack;
}
static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
int j, n;
ir_ref arg;
ir_insn *arg_insn;
uint8_t type;
ir_reg src_reg, dst_reg;
int int_param = 0;
int fp_param = 0;
int count = 0;
int int_reg_params_count = IR_REG_INT_ARGS;
int fp_reg_params_count = IR_REG_FP_ARGS;
const int8_t *int_reg_params = _ir_int_reg_params;
const int8_t *fp_reg_params = _ir_fp_reg_params;
int32_t used_stack, stack_offset = 0;
ir_copy *copies;
bool has_mem_const_args = 0;
ir_reg tmp_fp_reg = IR_REG_FP_LAST; /* Temporary register for FP loads and swap */
n = ir_input_edges_count(ctx, insn);
if (n < 3) {
return 0;
}
#if !defined(_WIN64) && !defined(__x86_64__)
if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) {
int_reg_params_count = IR_REG_INT_FCARGS;
fp_reg_params_count = IR_REG_FP_FCARGS;
int_reg_params = _ir_int_fc_reg_params;
fp_reg_params = _ir_fp_fc_reg_params;
}
#endif
if (insn->op == IR_CALL && (ctx->flags & IR_PREALLOCATED_STACK)) {
// TODO: support for preallocated stack
used_stack = 0;
} else {
used_stack = ir_call_used_stack(ctx, insn);
/* Stack must be 16 byte aligned */
int32_t aligned_stack = IR_ALIGNED_SIZE(used_stack, 16);
data->call_stack_size += aligned_stack;
if (aligned_stack) {
| sub Ra(IR_REG_RSP), aligned_stack
}
}
/* 1. move all arguments that should be passed through stack
* and collect arguments that should be passed through registers */
copies = ir_mem_malloc((n - 2) * sizeof(ir_copy));
for (j = 3; j <= n; j++) {
arg = insn->ops[j];
src_reg = ctx->regs[def][j];
arg_insn = &ctx->ir_base[arg];
type = arg_insn->type;
if (IR_IS_TYPE_INT(type)) {
if (int_param < int_reg_params_count) {
dst_reg = int_reg_params[int_param];
} else {
dst_reg = IR_REG_NONE; /* pass argument through stack */
}
int_param++;
} else if (IR_IS_TYPE_FP(type)) {
if (fp_param < fp_reg_params_count) {
dst_reg = fp_reg_params[fp_param];
} else {
dst_reg = IR_REG_NONE; /* pass argument through stack */
}
fp_param++;
} else {
IR_ASSERT(0);
}
if (dst_reg != IR_REG_NONE && (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE)) {
/* delay CONST->REG and MEM->REG moves to third pass */
has_mem_const_args = 1;
continue;
}
if (dst_reg != IR_REG_NONE) {
IR_ASSERT(src_reg != IR_REG_NONE);
if (src_reg & IR_REG_SPILL_LOAD) {
src_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, src_reg, arg);
}
if (src_reg != dst_reg) {
/* delay REG->REG moves to second pass */
copies[count].type = type;
copies[count].from = src_reg;
copies[count].to = dst_reg;
count++;
}
} else {
/* Pass argument through stack (REG->MEM, IMM->MEM, MEM->MEM moves) */
if (IR_IS_TYPE_INT(type)) {
if (IR_IS_CONST_REF(arg)) {
ir_insn *val_insn = &ctx->ir_base[arg];
if (val_insn->op == IR_STR) {
int label = ctx->cfg_blocks_count - arg;
val_insn->const_flags |= IR_CONST_EMIT;
IR_ASSERT(tmp_reg != IR_REG_NONE);
| lea Ra(tmp_reg), aword [=>label]
| mov [Ra(IR_REG_RSP)+stack_offset], Ra(tmp_reg)
} else if (IR_IS_SIGNED_32BIT(val_insn->val.i64)) {
if (ir_type_size[type] <= 4) {
| mov dword [Ra(IR_REG_RSP)+stack_offset], val_insn->val.i32
} else {
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| mov qword [rsp+stack_offset], val_insn->val.i32
|.endif
}
} else {
IR_ASSERT(sizeof(void*) == 8);
|.if X64
IR_ASSERT(tmp_reg != IR_REG_NONE);
| mov64 Ra(tmp_reg), val_insn->val.i64
| mov [rsp+stack_offset], Ra(tmp_reg)
|.endif
}
} else if (src_reg != IR_REG_NONE) {
if (src_reg & IR_REG_SPILL_LOAD) {
src_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, src_reg, arg);
}
ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg);
} else {
IR_ASSERT(tmp_reg != IR_REG_NONE);
ir_emit_load(ctx, type, tmp_reg, arg);
ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, tmp_reg);
}
} else {
if (IR_IS_CONST_REF(arg)) {
ir_val *val = &ctx->ir_base[arg].val;
if (ir_type_size[type] == 4) {
| mov dword [Ra(IR_REG_RSP)+stack_offset], val->i32
} else if (sizeof(void*) == 8) {
|.if X64
if (val->i64 == 0) {
| mov qword [rsp+stack_offset], val->i32
} else {
IR_ASSERT(tmp_reg != IR_REG_NONE);
| mov64 Rq(tmp_reg), val->i64
| mov qword [rsp+stack_offset], Ra(tmp_reg)
}
|.endif
} else {
ir_emit_load(ctx, type, tmp_fp_reg, arg);
ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_REG_STACK_POINTER, stack_offset, tmp_fp_reg);
}
} else if (src_reg != IR_REG_NONE) {
if (src_reg & IR_REG_SPILL_LOAD) {
src_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, src_reg, arg);
}
ir_emit_store_mem_fp(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg);
} else {
IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
ir_emit_load(ctx, type, tmp_fp_reg, arg);
ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_REG_STACK_POINTER, stack_offset, tmp_fp_reg);
}
}
stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]);
}
}
/* 2. move all arguments that should be passed from one register to anouther (REG->REG movs) */
if (count) {
ir_parallel_copy(ctx, copies, count, tmp_reg, tmp_fp_reg);
}
ir_mem_free(copies);
/* 3. move the remaining memory and immediate values to registers */
if (has_mem_const_args) {
int_param = 0;
fp_param = 0;
for (j = 3; j <= n; j++) {
arg = insn->ops[j];
src_reg = ctx->regs[def][j];
arg_insn = &ctx->ir_base[arg];
type = arg_insn->type;
if (IR_IS_TYPE_INT(type)) {
if (int_param < int_reg_params_count) {
dst_reg = int_reg_params[int_param];
} else {
dst_reg = IR_REG_NONE; /* argument already passed through stack */
}
int_param++;
} else if (IR_IS_TYPE_FP(type)) {
if (fp_param < fp_reg_params_count) {
dst_reg = fp_reg_params[fp_param];
} else {
dst_reg = IR_REG_NONE; /* argument already passed through stack */
}
fp_param++;
} else {
IR_ASSERT(0);
}
if (dst_reg != IR_REG_NONE && (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE)) {
if (IR_IS_TYPE_INT(type)) {
if (IR_IS_CONST_REF(arg)) {
if (type == IR_ADDR) {
ir_insn *val_insn = &ctx->ir_base[arg];
if (val_insn->op == IR_STR) {
int label = ctx->cfg_blocks_count - arg;
val_insn->const_flags |= IR_CONST_EMIT;
| lea Ra(dst_reg), aword [=>label]
continue;
}
} else if (ir_type_size[type] == 1) {
type = IR_ADDR;
}
}
ir_emit_load(ctx, type, dst_reg, arg);
} else {
ir_emit_load(ctx, type, dst_reg, arg);
}
}
}
}
return used_stack;
}
static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg def_reg;
int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]);
if (IR_IS_CONST_REF(insn->op2)) {
ir_insn *addr_insn = &ctx->ir_base[insn->op2];
void *addr;
IR_ASSERT(addr_insn->type == IR_ADDR);
if (addr_insn->op == IR_FUNC) {
addr = ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.addr));
} else {
addr = (void*)addr_insn->val.addr;
}
if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) {
| call aword &addr
} else {
|.if X64
if (IR_IS_SIGNED_32BIT(addr)) {
| mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit>
} else {
| mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit>
}
| call rax
|.endif
}
} else {
ir_reg op2_reg = ctx->regs[def][2];
if (op2_reg != IR_REG_NONE && ctx->rules[insn->op2] != IR_SKIP_MEM) {
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
| call Ra(op2_reg)
} else {
int32_t offset;
if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
offset = ir_fuse_load(ctx, insn->op2, &op2_reg);
} else {
offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg);
}
if (op2_reg != IR_REG_NONE) {
| call aword [Ra(op2_reg)+offset]
} else {
| call aword [offset]
}
}
}
if (used_stack) {
int32_t aligned_stack = IR_ALIGNED_SIZE(used_stack, 16);
data->call_stack_size -= aligned_stack;
if (ir_is_fastcall(ctx, insn)) {
aligned_stack -= used_stack;
if (aligned_stack) {
| add Ra(IR_REG_RSP), aligned_stack
}
} else {
| add Ra(IR_REG_RSP), aligned_stack
}
}
if (insn->type != IR_VOID) {
if (IR_IS_TYPE_INT(insn->type)) {
def_reg = IR_REG_NUM(ctx->regs[def][0]);
if (def_reg != IR_REG_NONE) {
if (def_reg != IR_REG_INT_RET1) {
ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1);
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
} else if (ctx->use_lists[def].count > 1) {
ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1);
}
} else if (IR_IS_TYPE_FP(insn->type)) {
def_reg = IR_REG_NUM(ctx->regs[def][0]);
#ifdef IR_REG_FP_RET1
if (def_reg != IR_REG_NONE) {
if (def_reg != IR_REG_FP_RET1) {
ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1);
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
} else if (ctx->use_lists[def].count > 1) {
ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1);
}
#else
IR_ASSERT(0); // TODO: float/double return value
#endif
} else {
IR_ASSERT(0);
}
}
}
static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]);
(void) used_stack;
if (used_stack != 0) {
ir_emit_call(ctx, def, insn);
ir_emit_return_void(ctx);
return;
}
ir_emit_epilogue(ctx);
if (IR_IS_CONST_REF(insn->op2)) {
ir_insn *addr_insn = &ctx->ir_base[insn->op2];
void *addr;
IR_ASSERT(addr_insn->type == IR_ADDR);
if (addr_insn->op == IR_FUNC) {
addr = ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.addr));
} else {
addr = (void*)addr_insn->val.addr;
}
if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) {
| jmp aword &addr
} else {
|.if X64
if (IR_IS_SIGNED_32BIT(addr)) {
| mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit>
} else {
| mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit>
}
| jmp rax
|.endif
}
} else {
ir_reg op2_reg = ctx->regs[def][2];
if (op2_reg != IR_REG_NONE && ctx->rules[insn->op2] != IR_SKIP_MEM) {
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
| jmp Ra(op2_reg)
} else {
int32_t offset;
if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
offset = ir_fuse_load(ctx, insn->op2, &op2_reg);
} else {
offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg);
}
if (op2_reg != IR_REG_NONE) {
| jmp aword [Ra(op2_reg)+offset]
} else {
| jmp aword [offset]
}
}
}
}
#ifdef IR_SNAPSHOT_HANDLER_DCL
IR_SNAPSHOT_HANDLER_DCL();
#endif
static void *ir_jmp_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn)
{
void *addr;
IR_ASSERT(addr_insn->type == IR_ADDR);
if (addr_insn->op == IR_FUNC) {
addr = ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.addr));
} else {
IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR);
addr = (void*)addr_insn->val.addr;
}
#ifdef IR_SNAPSHOT_HANDLER
if (ctx->ir_base[insn->op1].op == IR_SNAPSHOT) {
addr = IR_SNAPSHOT_HANDLER(ctx, insn->op1, &ctx->ir_base[insn->op1], addr);
}
#endif
return addr;
}
static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg op2_reg = ctx->regs[def][2];
if (IR_IS_CONST_REF(insn->op2)) {
void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]);
if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) {
| jmp aword &addr
} else {
|.if X64
if (IR_IS_SIGNED_32BIT(addr)) {
| mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit>
} else {
| mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit>
}
| jmp rax
|.endif
}
} else if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
int32_t offset;
offset = ir_fuse_load(ctx, insn->op2, &op2_reg);
if (op2_reg == IR_REG_NONE) {
| jmp aword [offset]
} else {
| jmp aword [Ra(op2_reg)+offset]
}
} else if (op2_reg != IR_REG_NONE) {
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
| jmp Ra(op2_reg)
} else {
ir_reg fp;
int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &fp);
| jmp aword [Ra(fp)+offset]
}
}
static void ir_emit_guard(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg op2_reg = ctx->regs[def][2];
ir_type type = ctx->ir_base[insn->op2].type;
void *addr;
IR_ASSERT(IR_IS_TYPE_INT(type));
if (IR_IS_CONST_REF(insn->op2)) {
if ((insn->op == IR_GUARD && insn->op2 == IR_FALSE) ||
(insn->op == IR_GUARD_NOT && insn->op2 == IR_TRUE)) {
addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) {
| jmp aword &addr
} else {
|.if X64
if (IR_IS_SIGNED_32BIT(addr)) {
| mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit>
} else {
| mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit>
}
| jmp aword [rax]
|.endif
}
}
return;
}
if (op2_reg != IR_REG_NONE && ctx->rules[insn->op2] != IR_SKIP_MEM) {
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op2_reg, insn->op2);
}
| ASM_REG_REG_OP test, type, op2_reg, op2_reg
} else {
int32_t offset = 0;
if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
offset = ir_fuse_load(ctx, insn->op2, &op2_reg);
} else {
offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg);
}
if (op2_reg == IR_REG_NONE) {
| ASM_MEM_IMM_OP cmp, type, [offset], 0
} else {
| ASM_MEM_IMM_OP cmp, type, [Ra(op2_reg)+offset], 0
}
}
addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) {
if (insn->op == IR_GUARD) {
| je aword &addr
} else {
| jne aword &addr
}
} else {
|.if X64
if (insn->op == IR_GUARD) {
| jne >1
} else {
| je >1
}
if (IR_IS_SIGNED_32BIT(addr)) {
| mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit>
} else {
| mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit>
}
| jmp aword [rax]
|1:
|.endif
}
}
static void ir_emit_guard_jcc(ir_ctx *ctx, uint8_t op, void *addr, bool int_cmp)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
if (int_cmp) {
switch (op) {
case IR_EQ:
| je &addr
break;
case IR_NE:
| jne &addr
break;
case IR_LT:
| jl &addr
break;
case IR_GE:
| jge &addr
break;
case IR_LE:
| jle &addr
break;
case IR_GT:
| jg &addr
break;
case IR_ULT:
| jb &addr
break;
case IR_UGE:
| jae &addr
break;
case IR_ULE:
| jbe &addr
break;
case IR_UGT:
| ja &addr
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
} else {
switch (op) {
case IR_EQ:
| jp >1
| je &addr
|1:
break;
case IR_NE:
| jne &addr
| jp &addr
break;
case IR_LT:
| jp >1
| jb &addr
|1:
break;
case IR_GE:
| jae &addr
break;
case IR_LE:
| jp >1
| jbe &addr
|1:
break;
case IR_GT:
| ja &addr
break;
// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break;
// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break;
// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break;
// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
}
}
static void ir_emit_guard_cmp_int(ir_ctx *ctx, int b, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_insn *cmp_insn = &ctx->ir_base[insn->op2];
ir_op op = cmp_insn->op;
ir_type type = ctx->ir_base[cmp_insn->op1].type;
ir_ref op1 = cmp_insn->op1;
ir_ref op2 = cmp_insn->op2;
ir_reg op1_reg = ctx->regs[insn->op2][1];
ir_reg op2_reg = ctx->regs[insn->op2][2];
void *addr;
if (op1_reg != IR_REG_NONE && ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1))) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, op1);
}
if (op2_reg != IR_REG_NONE && ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2))) {
op2_reg &= ~IR_REG_SPILL_LOAD;
if (op1 != op2) {
ir_emit_load(ctx, type, op2_reg, op2);
}
}
addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) {
if (op == IR_ULT) {
/* always false */
if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) {
| jmp aword &addr
} else {
|.if X64
if (IR_IS_SIGNED_32BIT(addr)) {
| mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit>
} else {
| mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit>
}
| jmp aword [rax]
|.endif
}
return;
} else if (op == IR_UGE) {
/* always true */
return;
} else if (op == IR_ULE) {
op = IR_EQ;
} else if (op == IR_UGT) {
op = IR_NE;
}
}
ir_emit_cmp_int_common(ctx, type, cmp_insn, op1_reg, op1, op2_reg, op2);
if (insn->op == IR_GUARD) {
op ^= 1; // reverse
}
ir_emit_guard_jcc(ctx, op, addr, 1);
}
static void ir_emit_guard_cmp_fp(ir_ctx *ctx, int b, ir_ref def, ir_insn *insn)
{
ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]);
void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
if (insn->op == IR_GUARD) {
op ^= 1; // reverse
}
ir_emit_guard_jcc(ctx, op, addr, 0);
}
static void ir_emit_guard_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type;
void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
type = ctx->ir_base[ctx->ir_base[insn->op2].op1].type;
IR_ASSERT(IR_IS_TYPE_INT(type));
if (IR_IS_TYPE_SIGNED(type)) {
if (insn->op == IR_GUARD) {
| jno &addr
} else {
| jo &addr
}
} else {
if (insn->op == IR_GUARD) {
| jnc &addr
} else {
| jc &addr
}
}
}
static void ir_emit_lea(ir_ctx *ctx, ir_ref def, ir_type type, ir_reg base_reg, ir_reg index_reg, uint8_t scale, int32_t offset)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
IR_ASSERT(def_reg != IR_REG_NONE);
if (index_reg == IR_REG_NONE) {
IR_ASSERT(base_reg != IR_REG_NONE);
if (!offset) {
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(base_reg)]
} else {
| lea Ra(def_reg), aword [Ra(base_reg)]
}
} else {
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(base_reg)+offset]
} else {
| lea Ra(def_reg), aword [Ra(base_reg)+offset]
}
}
} else {
if (base_reg == IR_REG_NONE) {
if (!offset) {
switch (scale) {
case 2:
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(index_reg)*2]
} else {
| lea Ra(def_reg), aword [Ra(index_reg)*2]
}
break;
case 4:
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(index_reg)*4]
} else {
| lea Ra(def_reg), aword [Ra(index_reg)*4]
}
break;
case 8:
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(index_reg)*8]
} else {
| lea Ra(def_reg), aword [Ra(index_reg)*8]
}
break;
default:
IR_ASSERT(0);
}
} else {
switch (scale) {
case 2:
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(index_reg)*2+offset]
} else {
| lea Ra(def_reg), aword [Ra(index_reg)*2+offset]
}
break;
case 4:
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(index_reg)*4+offset]
} else {
| lea Ra(def_reg), aword [Ra(index_reg)*4+offset]
}
break;
case 8:
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(index_reg)*8+offset]
} else {
| lea Ra(def_reg), aword [Ra(index_reg)*8+offset]
}
break;
default:
IR_ASSERT(0);
}
}
} else {
if (!offset) {
switch (scale) {
case 1:
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)]
} else {
| lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)]
}
break;
case 2:
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*2]
} else {
| lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*2]
}
break;
case 4:
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*4]
} else {
| lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*4]
}
break;
case 8:
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*8]
} else {
| lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*8]
}
break;
default:
IR_ASSERT(0);
}
} else {
switch (scale) {
case 1:
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)+offset]
} else {
| lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)+offset]
}
break;
case 2:
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*2+offset]
} else {
| lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*2+offset]
}
break;
case 4:
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*4+offset]
} else {
| lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*4+offset]
}
break;
case 8:
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*8+offset]
} else {
| lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*8+offset]
}
break;
default:
IR_ASSERT(0);
}
}
}
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg reg = IR_REG_NUM(ctx->regs[def][0]);
if (ctx->use_lists[def].count == 1) {
/* dead load */
return;
}
|.if X64
| fs
|| if (!insn->op3) {
| mov Ra(reg), aword [insn->op2]
|| } else {
| mov Ra(reg), [0x8]
| mov Ra(reg), aword [Ra(reg)+insn->op2]
| mov Ra(reg), aword [Ra(reg)+insn->op3]
|| }
|.else
| gs
|| if (!insn->op3) {
| mov Ra(reg), aword [insn->op2]
|| } else {
| mov Ra(reg), [0x4]
| mov Ra(reg), aword [Ra(reg)+insn->op2]
| mov Ra(reg), aword [Ra(reg)+insn->op3]
|| }
| .endif
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, IR_ADDR, def, reg);
}
}
static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
IR_ASSERT(def_reg != IR_REG_NONE);
|.if X64
| sub rsp, 16*8+16*8+8 /* CPU regs + SSE regs */
| mov aword [rsp+0*8], rax
| mov aword [rsp+1*8], rcx
| mov aword [rsp+2*8], rdx
| mov aword [rsp+3*8], rbx
| mov aword [rsp+5*8], rbp
| mov aword [rsp+6*8], rsi
| mov aword [rsp+7*8], rdi
| mov aword [rsp+8*8], r8
| mov aword [rsp+9*8], r9
| mov aword [rsp+10*8], r10
| mov aword [rsp+11*8], r11
| mov aword [rsp+12*8], r12
| mov aword [rsp+13*8], r13
| mov aword [rsp+14*8], r14
| mov aword [rsp+15*8], r15
| movsd qword [rsp+16*8+0*8], xmm0
| movsd qword [rsp+16*8+1*8], xmm1
| movsd qword [rsp+16*8+2*8], xmm2
| movsd qword [rsp+16*8+3*8], xmm3
| movsd qword [rsp+16*8+4*8], xmm4
| movsd qword [rsp+16*8+5*8], xmm5
| movsd qword [rsp+16*8+6*8], xmm6
| movsd qword [rsp+16*8+7*8], xmm7
| movsd qword [rsp+16*8+8*8], xmm8
| movsd qword [rsp+16*8+9*8], xmm9
| movsd qword [rsp+16*8+10*8], xmm10
| movsd qword [rsp+16*8+11*8], xmm11
| movsd qword [rsp+16*8+12*8], xmm12
| movsd qword [rsp+16*8+13*8], xmm13
| movsd qword [rsp+16*8+14*8], xmm14
| movsd qword [rsp+16*8+15*8], xmm15
|
| mov Ra(IR_REG_INT_ARG2), rsp
| mov aword [rsp+4*8], Ra(IR_REG_INT_ARG2)
| mov Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+8]
|.else
| sub esp, 8*4+8*8+12 /* CPU regs + SSE regs */
| mov aword [esp+0*4], eax
| mov aword [esp+1*4], ecx
| mov aword [esp+2*4], edx
| mov aword [esp+3*4], ebx
| mov aword [esp+5*4], ebp
| mov aword [esp+6*4], esi
| mov aword [esp+7*4], edi
| movsd qword [esp+8*4+0*8], xmm0
| movsd qword [esp+8*4+1*8], xmm1
| movsd qword [esp+8*4+2*8], xmm2
| movsd qword [esp+8*4+3*8], xmm3
| movsd qword [esp+8*4+4*8], xmm4
| movsd qword [esp+8*4+5*8], xmm5
| movsd qword [esp+8*4+6*8], xmm6
| movsd qword [esp+8*4+7*8], xmm7
|
| mov Ra(IR_REG_INT_FCARG2), esp
| mov aword [esp+4*4], Ra(IR_REG_INT_FCARG2)
| mov Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+12]
|.endif
if (IR_IS_CONST_REF(insn->op2)) {
void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]);
if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) {
| call aword &addr
} else {
|.if X64
if (IR_IS_SIGNED_32BIT(addr)) {
| mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit>
} else {
| mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit>
}
| call rax
|.endif
}
} else {
IR_ASSERT(0);
}
// restore SP
|.if X64
| add rsp, 16*8+16*8+16 /* CPU regs + SSE regs */
|.else
| add esp, 8*4+8*8+16 /* CPU regs + SSE regs */
|.endif
if (def_reg != IR_REG_INT_RET1) {
ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1);
}
if (ctx->regs[def][0] & IR_REG_SPILL_STORE) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
}
static void ir_emit_exitgroup(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
uint32_t i;
uint32_t first_exit_point = insn->op2;
uint32_t exit_points_per_group = insn->op3;
for (i = 0; i < exit_points_per_group - 1; i++) {
| push byte i
| .byte 0xeb, (4*(exit_points_per_group-i)-6) // jmp >1
}
| push byte i
|// 1:
| add aword [r4], first_exit_point
}
static int ir_emit_dessa_move(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
uint32_t from_block = data->dessa_from_block;
ir_block *from_bb = &ctx->cfg_blocks[from_block];
ir_ref ref = from_bb->end;
int8_t to_reg, from_reg;
ir_block *to_bb;
int j, k = 0;
ir_ref phi = 0;
bool spill_store = 0;
IR_ASSERT(from_bb->successors_count == 1);
to_bb = &ctx->cfg_blocks[ctx->cfg_edges[from_bb->successors]];
for (j = 0; j < to_bb->predecessors_count; j++) {
if (ctx->cfg_edges[to_bb->predecessors + j] == from_block) {
k = j + 2;
break;
}
}
IR_ASSERT(k != 0);
if (to) {
phi = to;
} else {
ir_use_list *use_list = &ctx->use_lists[from];
for (j = 0; j < use_list->count; j++) {
ir_ref use = ctx->use_edges[use_list->refs + j];
ir_insn *insn = &ctx->ir_base[use];
if (insn->op == IR_PHI && insn->ops[k] == from && insn->op1 == to_bb->start) {
phi = use;
break;
}
}
}
IR_ASSERT(phi != 0);
if (IR_IS_TYPE_INT(type)) {
to_reg = to ? ctx->regs[phi][0] : ctx->regs[ref][0]; /* temporary register for integer swap (see ir_fix_dessa_tmps) */
from_reg = from ? ctx->regs[phi][k] : ctx->regs[ref][0]; /* temporary register for integer swap (see ir_fix_dessa_tmps) */
if (from_reg != IR_REG_NONE && (from_reg & IR_REG_SPILL_LOAD)) {
from_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, from_reg, from);
}
if (to_reg != IR_REG_NONE && (to_reg & IR_REG_SPILL_STORE)) {
if (from_reg != IR_REG_NONE) {
/* store directly into spill slot (valid only when to_reg register is not reused) */
to_reg = IR_REG_NONE;
} else {
to_reg &= ~IR_REG_SPILL_STORE;
spill_store = 1;
}
}
if (from_reg != IR_REG_NONE && to_reg != IR_REG_NONE) {
if (from_reg != to_reg) {
ir_emit_mov(ctx, type, to_reg, from_reg);
}
} else if (to_reg != IR_REG_NONE) {
ir_emit_load(ctx, type, to_reg, from);
} else if (from_reg != IR_REG_NONE) {
ir_emit_store(ctx, type, to, from_reg);
} else if (IR_IS_CONST_REF(from) && (ir_type_size[type] != 8 || IR_IS_SIGNED_32BIT(ctx->ir_base[from].val.i64))) {
ir_reg fp;
int32_t offset = ir_ref_spill_slot(ctx, to, &fp);
| ASM_MEM_IMM_OP mov, type, [Ra(fp)+offset], ctx->ir_base[from].val.i32
} else if (IR_IS_CONST_REF(from) || !ir_is_same_mem(ctx, from, to)) {
from_reg = ctx->regs[ref][1]; /* temporary register for int mem->mem (see ir_fix_dessa_tmps) */
IR_ASSERT(from_reg != IR_REG_NONE);
ir_emit_load(ctx, type, from_reg, from);
ir_emit_store(ctx, type, to, from_reg);
}
if (spill_store) {
ir_emit_store(ctx, type, to, to_reg);
}
} else {
to_reg = to ? ctx->regs[phi][0] : ctx->regs[ref][2]; /* temporary register for fp swap (see ir_fix_dessa_tmps) */
from_reg = from ? ctx->regs[phi][k] : ctx->regs[ref][2]; /* temporary register for fp swap (see ir_fix_dessa_tmps) */
if (from_reg != IR_REG_NONE && (from_reg & IR_REG_SPILL_LOAD)) {
from_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, from_reg, from);
}
if (to_reg != IR_REG_NONE && (to_reg & IR_REG_SPILL_STORE)) {
if (from_reg != IR_REG_NONE) {
/* store directly into spill slot (valid only when to_reg register is not reused) */
to_reg = IR_REG_NONE;
} else {
to_reg &= ~IR_REG_SPILL_STORE;
spill_store = 1;
}
}
if (from_reg != IR_REG_NONE && to_reg != IR_REG_NONE) {
if (from_reg != to_reg) {
ir_emit_fp_mov(ctx, type, to_reg, from_reg);
}
} else if (to_reg != IR_REG_NONE) {
ir_emit_load(ctx, type, to_reg, from);
} else if (from_reg != IR_REG_NONE) {
ir_emit_store(ctx, type, to, from_reg);
} else if (IR_IS_CONST_REF(from) || !ir_is_same_mem(ctx, from, to)) {
from_reg = ctx->regs[ref][3]; /* temporary register for fp mem->mem (see ir_fix_dessa_tmps) */
IR_ASSERT(from_reg != IR_REG_NONE);
ir_emit_load(ctx, type, from_reg, from);
ir_emit_store(ctx, type, to, from_reg);
}
if (spill_store) {
ir_emit_store(ctx, type, to, to_reg);
}
}
return 1;
}
static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_reg to_reg, ir_ref to, int32_t offset)
{
ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE);
if (IR_IS_TYPE_INT(type)) {
if (from_reg != IR_REG_NONE) {
if (to_reg != IR_REG_NONE) {
ir_emit_mov(ctx, type, to_reg, from_reg);
} else {
ir_emit_store(ctx, type, to, from_reg);
}
} else {
ir_emit_load_mem_int(ctx, type, to_reg, fp, offset);
}
} else {
if (from_reg != IR_REG_NONE) {
if (to_reg != IR_REG_NONE) {
ir_emit_fp_mov(ctx, type, to_reg, from_reg);
} else {
ir_emit_store(ctx, type, to, from_reg);
}
} else {
ir_emit_load_mem_fp(ctx, type, to_reg, fp, offset);
}
}
}
static void ir_emit_load_params(ir_ctx *ctx)
{
ir_backend_data *data = ctx->data;
ir_use_list *use_list = &ctx->use_lists[1];
ir_insn *insn;
ir_ref i, n, *p, use;
int int_param_num = 0;
int fp_param_num = 0;
ir_reg src_reg;
ir_reg dst_reg;
// TODO: Calling convention specific
int int_reg_params_count = IR_REG_INT_ARGS;
int fp_reg_params_count = IR_REG_FP_ARGS;
const int8_t *int_reg_params = _ir_int_reg_params;
const int8_t *fp_reg_params = _ir_fp_reg_params;
int32_t stack_offset = 0;
#if !defined(_WIN64) && !defined(__x86_64__)
if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) {
int_reg_params_count = IR_REG_INT_FCARGS;
fp_reg_params_count = IR_REG_FP_FCARGS;
int_reg_params = _ir_int_fc_reg_params;
fp_reg_params = _ir_fp_fc_reg_params;
}
#endif
if (ctx->flags & IR_USE_FRAME_POINTER) {
stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */
} else {
stack_offset = sizeof(void*) + data->ra_data.stack_frame_size + data->call_stack_size; /* skip return address */
}
n = use_list->count;
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
use = *p;
insn = &ctx->ir_base[use];
if (insn->op == IR_PARAM) {
if (IR_IS_TYPE_INT(insn->type)) {
if (int_param_num < int_reg_params_count) {
src_reg = int_reg_params[int_param_num];
} else {
src_reg = IR_REG_NONE;
}
int_param_num++;
} else {
if (fp_param_num < fp_reg_params_count) {
src_reg = fp_reg_params[fp_param_num];
} else {
src_reg = IR_REG_NONE;
}
fp_param_num++;
}
if (ctx->vregs[use]) {
dst_reg = IR_REG_NUM(ctx->regs[use][0]);
IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE ||
stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos +
((ctx->flags & IR_USE_FRAME_POINTER) ? -data->ra_data.stack_frame_size : data->call_stack_size));
if (src_reg != dst_reg) {
ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset);
}
if (dst_reg != IR_REG_NONE && (ctx->regs[use][0] & IR_REG_SPILL_STORE)) {
ir_emit_store(ctx, insn->type, use, dst_reg);
}
}
if (src_reg == IR_REG_NONE) {
if (sizeof(void*) == 8) {
stack_offset += sizeof(void*);
} else {
stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]);
}
}
}
}
}
static ir_reg ir_get_free_reg(ir_type type, ir_regset available)
{
if (IR_IS_TYPE_INT(type)) {
available = IR_REGSET_INTERSECTION(available, IR_REGSET_GP);
} else {
IR_ASSERT(IR_IS_TYPE_FP(type));
available = IR_REGSET_INTERSECTION(available, IR_REGSET_FP);
}
IR_ASSERT(!IR_REGSET_IS_EMPTY(available));
return IR_REGSET_FIRST(available);
}
static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
{
ir_backend_data *data = ctx->data;
ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end;
if (to == 0) {
if (IR_IS_TYPE_INT(type)) {
if (ctx->regs[ref][0] == IR_REG_NONE) {
ctx->regs[ref][0] = ctx->regs[ref][1] != IR_REG_RAX ? IR_REG_RAX : IR_REG_RDX;
}
} else if (IR_IS_TYPE_FP(type)) {
if (ctx->regs[ref][2] == IR_REG_NONE) {
ctx->regs[ref][2] = ctx->regs[ref][3] != IR_REG_XMM0 ? IR_REG_XMM0 : IR_REG_XMM1;
}
} else {
IR_ASSERT(0);
return 0;
}
} else if (from != 0) {
if (IR_IS_TYPE_INT(type)) {
if (ctx->regs[ref][1] == IR_REG_NONE) {
ctx->regs[ref][1] = ctx->regs[ref][0] != IR_REG_RAX ? IR_REG_RAX : IR_REG_RDX;
}
} else if (IR_IS_TYPE_FP(type)) {
if (ctx->regs[ref][3] == IR_REG_NONE) {
ctx->regs[ref][3] = ctx->regs[ref][2] != IR_REG_XMM0 ? IR_REG_XMM0 : IR_REG_XMM1;
}
} else {
IR_ASSERT(0);
return 0;
}
}
return 1;
}
static void ir_fix_param_spills(ir_ctx *ctx)
{
ir_backend_data *data = ctx->data;
ir_use_list *use_list = &ctx->use_lists[1];
ir_insn *insn;
ir_ref i, n, *p, use;
int int_param_num = 0;
int fp_param_num = 0;
ir_reg src_reg;
// TODO: Calling convention specific
int int_reg_params_count = IR_REG_INT_ARGS;
int fp_reg_params_count = IR_REG_FP_ARGS;
const int8_t *int_reg_params = _ir_int_reg_params;
const int8_t *fp_reg_params = _ir_fp_reg_params;
int32_t stack_start = 0;
int32_t stack_offset = 0;
#if !defined(_WIN64) && !defined(__x86_64__)
if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) {
int_reg_params_count = IR_REG_INT_FCARGS;
fp_reg_params_count = IR_REG_FP_FCARGS;
int_reg_params = _ir_int_fc_reg_params;
fp_reg_params = _ir_fp_fc_reg_params;
}
#endif
if (ctx->flags & IR_USE_FRAME_POINTER) {
/* skip old frame pointer and return address */
stack_start = sizeof(void*) * 2 + (data->ra_data.stack_frame_size - data->stack_frame_alignment);
} else {
/* skip return address */
stack_start = sizeof(void*) + data->ra_data.stack_frame_size;
}
n = use_list->count;
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
use = *p;
insn = &ctx->ir_base[use];
if (insn->op == IR_PARAM) {
if (IR_IS_TYPE_INT(insn->type)) {
if (int_param_num < int_reg_params_count) {
src_reg = int_reg_params[int_param_num];
} else {
src_reg = IR_REG_NONE;
}
int_param_num++;
} else {
if (fp_param_num < fp_reg_params_count) {
src_reg = fp_reg_params[fp_param_num];
} else {
src_reg = IR_REG_NONE;
}
fp_param_num++;
}
if (src_reg == IR_REG_NONE) {
if (ctx->vregs[use]) {
ir_live_interval *ival = ctx->live_intervals[ctx->vregs[use]];
if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)
&& ival->stack_spill_pos == -1
&& (ival->next || ival->reg == IR_REG_NONE)) {
ival->stack_spill_pos = stack_start + stack_offset;
ctx->regs[use][0] = IR_REG_NONE;
}
}
if (sizeof(void*) == 8) {
stack_offset += sizeof(void*);
} else {
stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]);
}
}
}
}
data->param_stack_size = stack_offset;
}
#ifndef IR_REG_FP_RET1
static uint8_t ir_get_return_type(ir_ctx *ctx)
{
ir_ref ref;
ir_insn *insn;
uint8_t ret_type = 255;
/* Check all RETURN nodes */
ref = ctx->ir_base[1].op1;
while (ref) {
insn = &ctx->ir_base[ref];
if (insn->op == IR_RETURN) {
if (ret_type == 255) {
if (insn->op2) {
ret_type = ctx->ir_base[insn->op2].type;
} else {
ret_type = IR_VOID;
}
} else if (insn->op2) {
if (ret_type != ctx->ir_base[insn->op2].type) {
IR_ASSERT(0 && "conflicting return types");
return 0;
}
} else {
if (ret_type != IR_VOID) {
IR_ASSERT(0 && "conflicting return types");
return 0;
}
}
}
ref = ctx->ir_base[ref].op3;
}
if (ret_type == 255) {
ret_type = IR_VOID;
}
return ret_type;
}
#endif
static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
{
int b;
ir_block *bb;
ir_insn *insn;
ir_ref i, n, j, *p;
uint32_t *rule, insn_flags;
ir_backend_data *data = ctx->data;
ir_regset available = 0;
ir_tmp_reg tmp_regs[4];
uint8_t def_flags;
ir_reg reg;
#ifndef IR_REG_FP_RET1
ir_type ret_type = ir_get_return_type(ctx);
if (ret_type == IR_FLOAT) {
if (data->float_ret_slot == -1) {
data->float_ret_slot = ir_allocate_spill_slot(ctx, ret_type, &data->ra_data);
}
} else if (ret_type == IR_DOUBLE) {
if (data->double_ret_slot == -1) {
data->double_ret_slot = ir_allocate_spill_slot(ctx, ret_type, &data->ra_data);
}
}
#endif
ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM, sizeof(ir_live_interval*));
for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
if (bb->flags & IR_BB_UNREACHABLE) {
continue;
}
for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) {
switch (ctx->rules ? *rule : insn->op) {
case IR_START:
case IR_BEGIN:
case IR_END:
case IR_IF_TRUE:
case IR_IF_FALSE:
case IR_CASE_VAL:
case IR_CASE_DEFAULT:
case IR_MERGE:
case IR_LOOP_BEGIN:
case IR_LOOP_END:
break;
default:
def_flags = ir_get_def_flags(ctx, i, &reg);
if (ctx->rules
&& *rule != IR_CMP_AND_BRANCH_INT
&& *rule != IR_CMP_AND_BRANCH_FP
&& *rule != IR_TEST_AND_BRANCH_INT
&& *rule != IR_GUARD_CMP_INT
&& *rule != IR_GUARD_CMP_FP) {
available = IR_REGSET_SCRATCH;
}
if (ctx->vregs[i]) {
if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) {
IR_REGSET_EXCL(available, reg);
ctx->regs[i][0] = reg | IR_REG_SPILL_STORE;
} else if (def_flags & IR_USE_MUST_BE_IN_REG) {
if (insn->op == IR_VLOAD
&& ctx->live_intervals[ctx->vregs[i]]
&& ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) {
/* pass */
} else if (insn->op != IR_PARAM) {
reg = ir_get_free_reg(insn->type, available);
IR_REGSET_EXCL(available, reg);
ctx->regs[i][0] = reg | IR_REG_SPILL_STORE;
}
}
if (!ctx->live_intervals[ctx->vregs[i]]) {
ir_live_interval *ival = ir_mem_calloc(1, sizeof(ir_live_interval));
ctx->live_intervals[ctx->vregs[i]] = ival;
ival->type = insn->type;
ival->reg = IR_REG_NONE;
ival->vreg = ctx->vregs[i];
ival->stack_spill_pos = -1;
if (insn->op == IR_PARAM && reg == IR_REG_NONE) {
ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
} else {
ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data);
}
ival->top = ival;
if (insn->op == IR_VAR) {
ir_use_list *use_list = &ctx->use_lists[i];
ir_ref i, n, *p, use;
ir_insn *use_insn;
int32_t stack_spill_pos = ival->stack_spill_pos;
n = use_list->count;
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
use = *p;
use_insn = &ctx->ir_base[use];
if (use_insn->op == IR_VLOAD) {
if (ctx->vregs[use]
&& !ctx->live_intervals[ctx->vregs[use]]) {
ir_live_interval *ival = ir_mem_calloc(1, sizeof(ir_live_interval));
ctx->live_intervals[ctx->vregs[use]] = ival;
ival->type = insn->type;
ival->reg = IR_REG_NONE;
ival->vreg = ctx->vregs[use];
ival->stack_spill_pos = stack_spill_pos;
ival->top = ival;
}
} else if (use_insn->op == IR_VSTORE) {
if (!IR_IS_CONST_REF(use_insn->op3)
&& ctx->vregs[use_insn->op3]
&& !ctx->live_intervals[ctx->vregs[use_insn->op3]]) {
ir_live_interval *ival = ir_mem_calloc(1, sizeof(ir_live_interval));
ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival;
ival->type = insn->type;
ival->reg = IR_REG_NONE;
ival->vreg = ctx->vregs[insn->op3];
ival->stack_spill_pos = stack_spill_pos;
ival->top = ival;
}
}
}
}
} else if (insn->op == IR_PARAM) {
IR_ASSERT(0 && "unexpected PARAM");
return;
}
}
insn_flags = ir_op_flags[insn->op];
n = ir_input_edges_count(ctx, insn);
if (n > 0) {
ir_live_pos start, end;
ir_regset scratch = ir_get_scratch_regset(ctx, i, &start, &end);
if (!IR_REGSET_IS_EMPTY(scratch) && start == IR_LOAD_SUB_REF) {
available = IR_REGSET_DIFFERENCE(available, scratch);
}
}
for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
ir_ref input = *p;
if (IR_OPND_KIND(insn_flags, j) == IR_OPND_DATA && input > 0 && ctx->vregs[input]) {
if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) {
ir_reg reg = ctx->regs[i][0] & ~IR_REG_SPILL_STORE;
ctx->regs[i][1] = reg | IR_REG_SPILL_LOAD;
} else {
ir_reg reg;
uint8_t use_flags = ir_get_use_flags(ctx, i, j, &reg);
if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) {
IR_REGSET_EXCL(available, reg);
ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD;
} else if (j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) {
ctx->regs[i][j] = ctx->regs[i][1];
} else if (use_flags & IR_USE_MUST_BE_IN_REG) {
reg = ir_get_free_reg(ctx->ir_base[input].type, available);
IR_REGSET_EXCL(available, reg);
ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD;
}
}
}
}
n = ir_get_temporary_regs(ctx, i, tmp_regs);
if (n) {
ir_reg reg;
do {
n--;
reg = ir_get_free_reg(tmp_regs[n].type, available);
IR_REGSET_EXCL(available, reg);
ctx->regs[i][tmp_regs[n].num] = reg;
} while (n);
}
break;
}
n = ir_operands_count(ctx, insn);
n = 1 + (n >> 2); // support for multi-word instructions like MERGE and PHI
i += n;
insn += n;
rule += n;
}
if (bb->flags & IR_BB_DESSA_MOVES) {
data->dessa_from_block = b;
ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps);
}
}
if (ctx->fixed_save_regset) {
ir_reg reg;
(void) reg;
data->used_preserved_regs = (ir_regset)ctx->fixed_save_regset;
IR_REGSET_FOREACH(data->used_preserved_regs, reg) {
data->ra_data.stack_frame_size += sizeof(void*);
} IR_REGSET_FOREACH_END();
}
if (ctx->flags & IR_HAS_CALLS) {
/* Stack must be 16 byte aligned */
if (!(ctx->flags & IR_FUNCTION)) {
while (IR_ALIGNED_SIZE(data->ra_data.stack_frame_size, 16) != data->ra_data.stack_frame_size) {
data->ra_data.stack_frame_size += sizeof(void*);
data->stack_frame_alignment += sizeof(void*);
}
} else if (ctx->flags & IR_USE_FRAME_POINTER) {
while (IR_ALIGNED_SIZE(data->ra_data.stack_frame_size + sizeof(void*) * 2, 16) != data->ra_data.stack_frame_size + sizeof(void*) * 2) {
data->ra_data.stack_frame_size += sizeof(void*);
data->stack_frame_alignment += sizeof(void*);
}
} else {
while (IR_ALIGNED_SIZE(data->ra_data.stack_frame_size + sizeof(void*), 16) != data->ra_data.stack_frame_size + sizeof(void*)) {
data->ra_data.stack_frame_size += sizeof(void*);
data->stack_frame_alignment += sizeof(void*);
}
}
}
ir_fix_param_spills(ctx);
}
static void ir_preallocate_call_stack(ir_ctx *ctx, ir_backend_data *data)
{
int call_stack_size, peak_call_stack_size = 0;
ir_ref i, n;
ir_insn *insn;
for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) {
if (insn->op == IR_CALL) {
call_stack_size = ir_call_used_stack(ctx, insn);
if (call_stack_size > peak_call_stack_size) {
peak_call_stack_size = call_stack_size;
}
}
n = ir_operands_count(ctx, insn);
n = 1 + (n >> 2); // support for multi-word instructions like MERGE and PHI
i += n;
insn += n;
}
data->call_stack_size = peak_call_stack_size;
ctx->flags |= IR_PREALLOCATED_STACK;
}
static void ir_calc_stack_frame_size(ir_ctx *ctx, ir_backend_data *data)
{
int i;
ir_live_interval **p, *ival;
uint32_t additional_size = 0;
ir_regset fixed_regset = (ctx->flags & IR_FUNCTION) ? (ir_regset)ctx->fixed_regset : IR_REGSET_PRESERVED;
if (ctx->fixed_save_regset) {
ir_reg reg;
(void) reg;
data->used_preserved_regs = (ir_regset)ctx->fixed_save_regset;
IR_REGSET_FOREACH(data->used_preserved_regs, reg) {
additional_size += sizeof(void*);
} IR_REGSET_FOREACH_END();
}
for (i = 1, p = ctx->live_intervals + i; i <= ctx->vregs_count; i++, p++) {
ival = *p;
if (ival) {
if (ival->stack_spill_pos != -1 && !(ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) {
if (ival->stack_spill_pos + ir_type_size[ival->type] > data->ra_data.stack_frame_size) {
data->ra_data.stack_frame_size = ival->stack_spill_pos + ir_type_size[ival->type];
}
}
if (ival->reg != IR_REG_NONE) {
if (!IR_REGSET_IN(data->used_preserved_regs, ival->reg)
&& !IR_REGSET_IN(fixed_regset, ival->reg)
&& IR_REGSET_IN(IR_REGSET_PRESERVED, ival->reg)) {
IR_REGSET_INCL(data->used_preserved_regs, ival->reg);
additional_size += sizeof(void*);
}
}
}
}
ival = ctx->live_intervals[0];
while (ival) {
if (ival->reg != IR_REG_NONE) {
if (!IR_REGSET_IN(data->used_preserved_regs, ival->reg)
&& !IR_REGSET_IN(fixed_regset, ival->reg)
&& IR_REGSET_IN(IR_REGSET_PRESERVED, ival->reg)) {
IR_REGSET_INCL(data->used_preserved_regs, ival->reg);
additional_size += sizeof(void*);
}
}
ival = ival->next;
}
#ifndef IR_REG_FP_RET1
ir_type ret_type = ir_get_return_type(ctx);
if (ret_type == IR_FLOAT) {
if (data->float_ret_slot == -1) {
data->float_ret_slot = ir_allocate_spill_slot(ctx, ret_type, &data->ra_data);
}
} else if (ret_type == IR_DOUBLE) {
if (data->double_ret_slot == -1) {
data->double_ret_slot = ir_allocate_spill_slot(ctx, ret_type, &data->ra_data);
}
}
#endif
data->ra_data.stack_frame_size = IR_ALIGNED_SIZE(data->ra_data.stack_frame_size, sizeof(void*));
data->ra_data.stack_frame_size += additional_size;
if (ctx->flags & IR_HAS_CALLS) {
/* Stack must be 16 byte aligned */
if (!(ctx->flags & IR_FUNCTION)) {
while (IR_ALIGNED_SIZE(data->ra_data.stack_frame_size, 16) != data->ra_data.stack_frame_size) {
data->ra_data.stack_frame_size += sizeof(void*);
data->stack_frame_alignment += sizeof(void*);
}
} else if (ctx->flags & IR_USE_FRAME_POINTER) {
while (IR_ALIGNED_SIZE(data->ra_data.stack_frame_size + sizeof(void*) * 2, 16) != data->ra_data.stack_frame_size + sizeof(void*) * 2) {
data->ra_data.stack_frame_size += sizeof(void*);
data->stack_frame_alignment += sizeof(void*);
}
} else {
if (!(ctx->flags & IR_NO_STACK_COMBINE)) {
ir_preallocate_call_stack(ctx, data);
}
while (IR_ALIGNED_SIZE(data->ra_data.stack_frame_size + data->call_stack_size + sizeof(void*), 16) !=
data->ra_data.stack_frame_size + data->call_stack_size + sizeof(void*)) {
data->ra_data.stack_frame_size += sizeof(void*);
data->stack_frame_alignment += sizeof(void*);
}
}
}
ir_fix_param_spills(ctx);
}
static void* dasm_labels[ir_lb_MAX];
void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
{
int b, n, target;
ir_block *bb;
ir_ref i;
ir_insn *insn;
uint32_t *rule;
ir_backend_data data;
dasm_State **Dst;
int ret;
void *entry;
size_t size;
uint32_t entries_count;
ctx->data = &data;
data.ra_data.stack_frame_size = 0;
data.ra_data.unused_slot_4 = 0;
data.ra_data.unused_slot_2 = 0;
data.ra_data.unused_slot_1 = 0;
data.stack_frame_alignment = 0;
data.call_stack_size = 0;
#ifndef IR_REG_FP_RET1
data.float_ret_slot = -1;
data.double_ret_slot = -1;
#endif
data.used_preserved_regs = 0;
data.rodata_label = 0;
data.jmp_table_label = 0;
data.double_neg_const = 0;
data.float_neg_const = 0;
data.double_abs_const = 0;
data.float_abs_const = 0;
if (!ctx->live_intervals) {
ir_allocate_unique_spill_slots(ctx);
} else {
ir_calc_stack_frame_size(ctx, &data);
}
if (ctx->fixed_stack_frame_size != -1) {
IR_ASSERT(data.ra_data.stack_frame_size <= ctx->fixed_stack_frame_size + ctx->fixed_stack_red_zone);
data.ra_data.stack_frame_size = ctx->fixed_stack_frame_size;
data.stack_frame_alignment = 0;
}
/* For all entries */
entries_count = 0;
i = ctx->ir_base[1].op2;
while (i != IR_UNUSED) {
entries_count++;
ctx->ir_base[i].op3 = entries_count;
i = ctx->ir_base[i].op2;
}
Dst = &data.dasm_state;
data.dasm_state = NULL;
dasm_init(&data.dasm_state, DASM_MAXSECTION);
dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX);
dasm_setup(&data.dasm_state, dasm_actions);
/* labels for each block + for each constant + rodata label + jmp_table label + for each entry */
dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + entries_count);
ir_emit_prologue(ctx);
if (ctx->flags & IR_FUNCTION) {
ir_emit_load_params(ctx);
}
for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
if (bb->flags & IR_BB_UNREACHABLE) {
continue;
}
if (bb->end - ctx->prev_insn_len[bb->end] == bb->start
&& bb->successors_count == 1
&& !(bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_DESSA_MOVES))) {
continue;
}
|=>b:
for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) {
switch (*rule) {
case IR_SKIP:
case IR_SKIP_REG:
case IR_SKIP_MEM:
case IR_SKIP_MEM_BINOP_INT:
case IR_SKIP_REG_BINOP_INT:
case IR_SKIP_TEST_INT:
case IR_SKIP_SHIFT:
case IR_VAR:
break;
case IR_ENTRY:
{
uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 3 + insn->op3;
|=>label:
ir_emit_prologue(ctx);
}
break;
case IR_LEA_OB:
{
ir_reg op1_reg = ctx->regs[i][1];
int32_t offset = ctx->ir_base[insn->op2].val.i32;
if (insn->op == IR_SUB) {
offset = -offset;
}
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op1_reg, insn->op1);
}
ir_emit_lea(ctx, i, insn->type, op1_reg, IR_REG_NONE, 1, offset);
}
break;
case IR_LEA_SI:
{
ir_reg op1_reg = ctx->regs[i][1];
int32_t scale = ctx->ir_base[insn->op2].val.i32;
IR_ASSERT(op1_reg != IR_REG_NONE);
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op1_reg, insn->op1);
}
ir_emit_lea(ctx, i, insn->type, IR_REG_NONE, op1_reg, scale, 0);
}
break;
case IR_LEA_SIB:
{
ir_reg op1_reg = ctx->regs[i][1];
int32_t scale = ctx->ir_base[insn->op2].val.i32;
IR_ASSERT(op1_reg != IR_REG_NONE);
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op1_reg, insn->op1);
}
ir_emit_lea(ctx, i, insn->type, op1_reg, op1_reg, scale - 1, 0);
}
break;
case IR_LEA_IB:
{
ir_reg op1_reg = ctx->regs[i][1];
ir_reg op2_reg = ctx->regs[i][2];
IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op1_reg, insn->op1);
}
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op2_reg, insn->op2);
}
ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, 1, 0);
}
break;
case IR_LEA_OB_I:
{
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
ir_reg op1_reg = ctx->regs[insn->op1][1];
ir_reg op2_reg = ctx->regs[i][2];
int32_t offset = ctx->ir_base[op1_insn->op2].val.i32;
IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1);
}
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op2_reg, insn->op2);
}
if (op1_insn->op == IR_SUB) {
offset = -offset;
}
ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, 1, offset);
}
break;
case IR_LEA_I_OB:
{
ir_insn *op2_insn = &ctx->ir_base[insn->op2];
ir_reg op1_reg = ctx->regs[i][1];
ir_reg op2_reg = ctx->regs[insn->op2][1];
int32_t offset = ctx->ir_base[op2_insn->op2].val.i32;
IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op1_reg, insn->op1);
}
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op2_reg, op2_insn->op1);
}
if (op2_insn->op == IR_SUB) {
offset = -offset;
}
ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, 1, offset);
}
break;
case IR_LEA_SI_O:
{
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
ir_reg op1_reg = ctx->regs[insn->op1][1];
int32_t scale = ctx->ir_base[op1_insn->op2].val.i32;
int32_t offset = ctx->ir_base[insn->op2].val.i32;
IR_ASSERT(op1_reg != IR_REG_NONE);
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1);
}
if (insn->op == IR_SUB) {
offset = -offset;
}
ir_emit_lea(ctx, i, insn->type, IR_REG_NONE, op1_reg, scale, offset);
}
break;
case IR_LEA_SIB_O:
{
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
ir_reg op1_reg = ctx->regs[insn->op1][1];
int32_t scale = ctx->ir_base[op1_insn->op2].val.i32;
int32_t offset = ctx->ir_base[insn->op2].val.i32;
IR_ASSERT(op1_reg != IR_REG_NONE);
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1);
}
if (insn->op == IR_SUB) {
offset = -offset;
}
ir_emit_lea(ctx, i, insn->type, op1_reg, op1_reg, scale - 1, offset);
}
break;
case IR_LEA_IB_O:
{
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
ir_reg op1_reg = ctx->regs[insn->op1][1];
ir_reg op2_reg = ctx->regs[insn->op1][2];
int32_t offset = ctx->ir_base[insn->op2].val.i32;
IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1);
}
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op2_reg, op1_insn->op2);
}
if (insn->op == IR_SUB) {
offset = -offset;
}
ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, 1, offset);
}
break;
case IR_LEA_OB_SI:
{
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
ir_insn *op2_insn = &ctx->ir_base[insn->op2];
ir_reg op1_reg = ctx->regs[insn->op1][1];
ir_reg op2_reg = ctx->regs[insn->op2][1];
int32_t offset = ctx->ir_base[op1_insn->op2].val.i32;
int32_t scale = ctx->ir_base[op2_insn->op2].val.i32;
IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1);
}
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op2_reg, op2_insn->op1);
}
if (op1_insn->op == IR_SUB) {
offset = -offset;
}
ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, scale, offset);
}
break;
case IR_LEA_SI_OB:
{
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
ir_insn *op2_insn = &ctx->ir_base[insn->op2];
ir_reg op1_reg = ctx->regs[insn->op1][1];
ir_reg op2_reg = ctx->regs[insn->op2][1];
int32_t scale = ctx->ir_base[op1_insn->op2].val.i32;
int32_t offset = ctx->ir_base[op2_insn->op2].val.i32;
IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1);
}
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op2_reg, op2_insn->op1);
}
if (op1_insn->op == IR_SUB) {
offset = -offset;
}
ir_emit_lea(ctx, i, insn->type, op2_reg, op1_reg, scale, offset);
}
break;
case IR_LEA_B_SI:
{
ir_insn *op2_insn = &ctx->ir_base[insn->op2];
ir_reg op1_reg = ctx->regs[i][1];
ir_reg op2_reg = ctx->regs[insn->op2][1];
int32_t scale = ctx->ir_base[op2_insn->op2].val.i32;
IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op1_reg, insn->op1);
}
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op2_reg, op2_insn->op1);
}
ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, scale, 0);
}
break;
case IR_LEA_SI_B:
{
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
ir_reg op1_reg = ctx->regs[insn->op1][1];
ir_reg op2_reg = ctx->regs[i][2];
int32_t scale = ctx->ir_base[op1_insn->op2].val.i32;
IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1);
}
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, insn->type, op2_reg, insn->op2);
}
ir_emit_lea(ctx, i, insn->type, op2_reg, op1_reg, scale, 0);
}
break;
case IR_MUL_PWR2:
case IR_DIV_PWR2:
case IR_MOD_PWR2:
ir_emit_mul_div_mod_pwr2(ctx, i, insn);
break;
case IR_SHIFT:
ir_emit_shift(ctx, i, insn);
break;
case IR_SHIFT_CONST:
ir_emit_shift_const(ctx, i, insn);
break;
case IR_INC:
case IR_DEC:
case IR_OP_INT:
ir_emit_op_int(ctx, i, insn);
break;
case IR_ABS_INT:
ir_emit_abs_int(ctx, i, insn);
break;
case IR_BOOL_NOT_INT:
ir_emit_bool_not_int(ctx, i, insn);
break;
case IR_OP_FP:
ir_emit_op_fp(ctx, i, insn);
break;
case IR_IMUL3:
ir_emit_imul3(ctx, i, insn);
break;
case IR_BINOP_INT:
ir_emit_binop_int(ctx, i, insn);
break;
case IR_BINOP_SSE2:
ir_emit_binop_sse2(ctx, i, insn);
break;
case IR_BINOP_AVX:
ir_emit_binop_avx(ctx, i, insn);
break;
case IR_MUL_INT:
case IR_DIV_INT:
case IR_MOD_INT:
ir_emit_mul_div_mod(ctx, i, insn);
break;
case IR_CMP_INT:
ir_emit_cmp_int(ctx, i, insn);
break;
case IR_TEST_INT:
ir_emit_test_int(ctx, i, insn);
break;
case IR_SETCC_INT:
ir_emit_setcc_int(ctx, i, insn);
break;
case IR_CMP_FP:
ir_emit_cmp_fp(ctx, i, insn);
break;
case IR_SEXT:
ir_emit_sext(ctx, i, insn);
break;
case IR_ZEXT:
ir_emit_zext(ctx, i, insn);
break;
case IR_TRUNC:
ir_emit_trunc(ctx, i, insn);
break;
case IR_BITCAST:
ir_emit_bitcast(ctx, i, insn);
break;
case IR_INT2FP:
ir_emit_int2fp(ctx, i, insn);
break;
case IR_FP2INT:
ir_emit_fp2int(ctx, i, insn);
break;
case IR_FP2FP:
ir_emit_fp2fp(ctx, i, insn);
break;
case IR_COPY_INT:
ir_emit_copy_int(ctx, i, insn);
break;
case IR_COPY_FP:
ir_emit_copy_fp(ctx, i, insn);
break;
case IR_CMP_AND_BRANCH_INT:
ir_emit_cmp_and_branch_int(ctx, b, i, insn);
break;
case IR_CMP_AND_BRANCH_FP:
ir_emit_cmp_and_branch_fp(ctx, b, i, insn);
break;
case IR_TEST_AND_BRANCH_INT:
ir_emit_test_and_branch_int(ctx, b, i, insn);
break;
case IR_JCC_INT:
ir_emit_jcc(ctx, IR_NE, b, i, insn, 1);
break;
case IR_GUARD_CMP_INT:
ir_emit_guard_cmp_int(ctx, b, i, insn);
break;
case IR_GUARD_CMP_FP:
ir_emit_guard_cmp_fp(ctx, b, i, insn);
break;
case IR_IF_INT:
ir_emit_if_int(ctx, b, i, insn);
break;
case IR_SWITCH:
ir_emit_switch(ctx, b, i, insn);
break;
case IR_MIN_MAX_INT:
ir_emit_min_max_int(ctx, i, insn);
break;
case IR_OVERFLOW:
ir_emit_overflow(ctx, i, insn);
break;
case IR_OVERFLOW_AND_BRANCH:
ir_emit_overflow_and_branch(ctx, b, i, insn);
break;
case IR_END:
case IR_LOOP_END:
if (bb->flags & IR_BB_DESSA_MOVES) {
data.dessa_from_block = b;
ir_gen_dessa_moves(ctx, b, ir_emit_dessa_move);
}
IR_ASSERT(bb->successors_count == 1);
target = ir_skip_empty_target_blocks(ctx, ctx->cfg_edges[bb->successors]);
if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) {
| jmp =>target
}
break;
case IR_RETURN_VOID:
ir_emit_return_void(ctx);
break;
case IR_RETURN_INT:
ir_emit_return_int(ctx, i, insn);
break;
case IR_RETURN_FP:
ir_emit_return_fp(ctx, i, insn);
break;
case IR_CALL:
ir_emit_call(ctx, i, insn);
break;
case IR_TAILCALL:
ir_emit_tailcall(ctx, i, insn);
break;
case IR_IJMP:
ir_emit_ijmp(ctx, i, insn);
break;
case IR_MEM_OP_INT:
case IR_MEM_INC:
case IR_MEM_DEC:
ir_emit_mem_op_int(ctx, i, insn);
break;
case IR_MEM_BINOP_INT:
ir_emit_mem_binop_int(ctx, i, insn);
break;
case IR_MEM_MUL_PWR2:
case IR_MEM_DIV_PWR2:
case IR_MEM_MOD_PWR2:
ir_emit_mem_mul_div_mod_pwr2(ctx, i, insn);
break;
case IR_MEM_SHIFT:
ir_emit_mem_shift(ctx, i, insn);
break;
case IR_MEM_SHIFT_CONST:
ir_emit_mem_shift_const(ctx, i, insn);
break;
case IR_REG_BINOP_INT:
ir_emit_reg_binop_int(ctx, i, insn);
break;
case IR_VADDR:
ir_emit_vaddr(ctx, i, insn);
break;
case IR_VLOAD:
ir_emit_vload(ctx, i, insn);
break;
case IR_VSTORE_INT:
ir_emit_vstore_int(ctx, i, insn);
break;
case IR_VSTORE_FP:
ir_emit_vstore_fp(ctx, i, insn);
break;
case IR_RSTORE:
ir_emit_rstore(ctx, i, insn);
break;
case IR_LOAD_INT:
ir_emit_load_int(ctx, i, insn);
break;
case IR_LOAD_FP:
ir_emit_load_fp(ctx, i, insn);
break;
case IR_STORE_INT:
ir_emit_store_int(ctx, i, insn);
break;
case IR_STORE_FP:
ir_emit_store_fp(ctx, i, insn);
break;
case IR_ALLOCA:
ir_emit_alloca(ctx, i, insn);
break;
case IR_EXITCALL:
ir_emit_exitcall(ctx, i, insn);
break;
case IR_EXITGROUP:
ir_emit_exitgroup(ctx, i, insn);
break;
case IR_GUARD:
case IR_GUARD_NOT:
ir_emit_guard(ctx, i, insn);
break;
case IR_GUARD_OVERFLOW:
ir_emit_guard_overflow(ctx, i, insn);
break;
case IR_TLS:
ir_emit_tls(ctx, i, insn);
break;
case IR_TRAP:
| int3
break;
default:
IR_ASSERT(0 && "NIY rule/insruction");
break;
}
n = ir_operands_count(ctx, insn);
n = 1 + (n >> 2); // support for multi-word instructions like MERGE and PHI
i += n;
insn += n;
rule += n;
}
}
if (data.rodata_label) {
|.rodata
}
for (i = IR_UNUSED + 1, insn = ctx->ir_base - i; i < ctx->consts_count; i++, insn--) {
if (insn->const_flags & IR_CONST_EMIT) {
if (IR_IS_TYPE_FP(insn->type)) {
int label = ctx->cfg_blocks_count + i;
if (!data.rodata_label) {
data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2;
|.rodata
|=>data.rodata_label:
}
if (insn->type == IR_DOUBLE) {
|.align 8
|=>label:
|.dword insn->val.u32, insn->val.u32_hi
} else {
IR_ASSERT(insn->type == IR_FLOAT);
|.align 4
|=>label:
|.dword insn->val.u32
}
} else if (insn->op == IR_STR) {
int label = ctx->cfg_blocks_count + i;
const char *str = ir_get_str(ctx, insn->val.addr);
int i = 0;
if (!data.rodata_label) {
data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2;
|.rodata
|=>data.rodata_label:
}
|.align 8
|=>label:
while (str[i]) {
char c = str[i];
if (c == '\\') {
if (str[i+1] == '\\') {
i++;
c = '\\';
} else if (str[i+1] == 'n') {
i++;
c = '\n';
} else if (str[i+1] == 'r') {
i++;
c = '\r';
} else if (str[i+1] == 't') {
i++;
c = '\t';
}
}
|.byte c
i++;
}
|.byte 0
} else {
IR_ASSERT(0);
}
}
}
if (data.rodata_label) {
|.code
}
ret = dasm_link(&data.dasm_state, size_ptr);
if (ret != DASM_S_OK) {
IR_ASSERT(0);
return NULL;
}
size = *size_ptr;
if (ctx->code_buffer != NULL) {
if (IR_ALIGNED_SIZE(size, 16) > ctx->code_buffer_size) {
return NULL;
}
entry = ctx->code_buffer;
IR_ASSERT((uintptr_t)entry % 16 == 0);
} else {
entry = ir_mem_mmap(size);
ir_mem_unprotect(entry, size);
}
ret = dasm_encode(&data.dasm_state, entry);
if (ret != DASM_S_OK) {
IR_ASSERT(0);
if (ctx->code_buffer == NULL) {
ir_mem_unmap(entry, size);
}
return NULL;
}
if (data.jmp_table_label) {
uint32_t offset = dasm_getpclabel(&data.dasm_state, data.jmp_table_label);
ctx->jmp_table_offset = offset;
} else {
ctx->jmp_table_offset = 0;
}
if (data.rodata_label) {
uint32_t offset = dasm_getpclabel(&data.dasm_state, data.rodata_label);
ctx->rodata_offset = offset;
} else {
ctx->rodata_offset = 0;
}
/* For all entries */
i = ctx->ir_base[1].op2;
while (i != IR_UNUSED) {
uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 3 + ctx->ir_base[i].op3);
ctx->ir_base[i].op3 = offset;
i = ctx->ir_base[i].op2;
}
dasm_free(&data.dasm_state);
ir_mem_flush(entry, size);
if (ctx->code_buffer == NULL) {
ir_mem_protect(entry, size);
}
return entry;
}