Improve LOAD fusion

This commit is contained in:
Dmitry Stogov 2022-08-25 18:16:17 +03:00
parent aa28e865da
commit 47083e0f9f
4 changed files with 304 additions and 383 deletions

View File

@ -2459,15 +2459,15 @@ static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *ins
int true_block, false_block, next_block;
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
bool swap = 0;
ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block);
if (true_block == next_block) {
if (int_cmp || (op != IR_GT && op != IR_GE)) {
/* swap to avoid unconditional JMP if this doesn't introduce additional JP instruction */
op ^= 1; // reverse
true_block = false_block;
false_block = 0;
}
/* swap to avoid unconditional JMP */
op ^= 1; // reverse
true_block = false_block;
false_block = 0;
swap = 1;
} else if (false_block == next_block) {
false_block = 0;
}
@ -2525,7 +2525,9 @@ static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *ins
| bvs =>true_block
break;
case IR_LT:
if (!false_block) {
if (swap) {
| blo =>true_block
} else if (!false_block) {
| bvs >1
| blo =>true_block
|1:
@ -2535,10 +2537,15 @@ static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *ins
}
break;
case IR_GE:
if (swap) {
| bvs =>true_block
}
| bhs =>true_block
break;
case IR_LE:
if (!false_block) {
if (swap) {
| bls =>true_block
} else if (!false_block) {
| bvs >1
| bls =>true_block
|1:
@ -2548,6 +2555,9 @@ static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *ins
}
break;
case IR_GT:
if (swap) {
| bvs =>true_block
}
| bhi =>true_block
break;
// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break;

View File

@ -1180,6 +1180,83 @@ cmp_fp:
return 0;
}
static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb);
static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref, ir_block *bb)
{
if (!IR_IS_CONST_REF(addr_ref) && ctx->use_lists[addr_ref].count == 1) {
if (!ctx->rules[addr_ref]) {
ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb);
}
if (ctx->rules[addr_ref] == IR_LEA_OB) {
ctx->rules[addr_ref] = IR_SKIP_MEM;
}
}
}
static void ir_match_fuse_load(ir_ctx *ctx, ir_ref ref, ir_block *bb)
{
if (ctx->ir_base[ref].op == IR_LOAD
&& ctx->use_lists[ref].count == 2) {
ir_ref addr_ref = ctx->ir_base[ref].op2;
ir_insn *addr_insn = &ctx->ir_base[addr_ref];
if (addr_insn->op == IR_RLOAD ||
(addr_insn->op == IR_C_ADDR &&
(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64)))) {
ctx->rules[ref] = IR_SKIP_MEM;
} else if (!IR_IS_CONST_REF(addr_ref) /*&& ctx->use_lists[addr_ref].count == 1*/) {
if (!ctx->rules[addr_ref]) {
ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb);
}
if (ctx->rules[addr_ref] == IR_LEA_OB || ctx->rules[addr_ref] == IR_SKIP_MEM) {
ctx->rules[ref] = IR_SKIP_MEM;
ctx->rules[addr_ref] = IR_SKIP_MEM;
}
}
}
}
static void ir_match_swap_cmp(ir_ctx *ctx, ir_insn *insn)
{
if ((ctx->flags & IR_OPT_CODEGEN)
&& !IR_IS_CONST_REF(insn->op2)
&& !IR_IS_CONST_REF(insn->op1)) {
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
ir_insn *op2_insn = &ctx->ir_base[insn->op2];
if ((op1_insn->op == IR_VLOAD || op1_insn->op == IR_LOAD)
&& (op2_insn->op != IR_VLOAD && op2_insn->op != IR_LOAD)) {
/* swap for better load fusion */
ir_ref tmp = insn->op1;
insn->op1 = insn->op2;
insn->op2 = tmp;
if (insn->op != IR_EQ && insn->op != IR_NE) {
insn->op ^= 3;
}
}
}
}
static void ir_match_swap_commutative(ir_ctx *ctx, ir_insn *insn)
{
if ((ctx->flags & IR_OPT_CODEGEN)
&& (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE)
&& !IR_IS_CONST_REF(insn->op2)
&& !IR_IS_CONST_REF(insn->op1)) {
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
ir_insn *op2_insn = &ctx->ir_base[insn->op2];
if ((op1_insn->op == IR_VLOAD || op1_insn->op == IR_LOAD)
&& (op2_insn->op != IR_VLOAD && op2_insn->op != IR_LOAD)) {
/* swap for better load fusion */
ir_ref tmp = insn->op1;
insn->op1 = insn->op2;
insn->op2 = tmp;
}
}
}
static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb)
{
ir_insn *op2_insn;
@ -1198,28 +1275,18 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb)
case IR_UGE:
case IR_ULE:
case IR_UGT:
ir_match_swap_cmp(ctx, insn);
if (insn->op2 > bb->start
&& insn->op2 < ref
&& !ctx->rules[insn->op2]) {
ir_match_fuse_load(ctx, insn->op2, bb);
}
if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) {
if (insn->op1 > bb->start
if (IR_IS_CONST_REF(insn->op2)
&& insn->op1 > bb->start
&& insn->op1 < ref
&& !ctx->rules[insn->op1]
&& ctx->ir_base[insn->op1].op == IR_LOAD
&& ctx->use_lists[insn->op1].count == 2) {
ir_ref addr_ref = ctx->ir_base[insn->op1].op2;
ir_insn *addr_insn = &ctx->ir_base[addr_ref];
if (addr_insn->op == IR_RLOAD ||
(addr_insn->op == IR_C_ADDR &&
(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64)))) {
ctx->rules[insn->op1] = IR_SKIP_MEM;
} else {
if (!ctx->rules[addr_ref]) {
ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb);
}
if (ctx->rules[addr_ref] == IR_LEA_OB || ctx->rules[addr_ref] == IR_SKIP_MEM) {
ctx->rules[insn->op1] = IR_SKIP_MEM;
ctx->rules[addr_ref] = IR_SKIP_MEM;
}
}
&& !ctx->rules[insn->op1]) {
ir_match_fuse_load(ctx, insn->op1, bb);
}
return IR_CMP_INT;
} else {
@ -1298,65 +1365,13 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb)
return IR_LEA_IB; // lea ret, [op1.reg+op2.reg]
}
binop_int:
if ((ctx->flags & IR_OPT_CODEGEN)
&& !IR_IS_CONST_REF(insn->op2)
&& !IR_IS_CONST_REF(insn->op1)) {
if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) {
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
ir_insn *op2_insn = &ctx->ir_base[insn->op2];
if ((op1_insn->op == IR_VLOAD || op1_insn->op == IR_LOAD)
&& (op2_insn->op != IR_VLOAD && op2_insn->op != IR_LOAD)) {
/* swap for better load fusion */
ir_ref tmp = insn->op1;
insn->op1 = insn->op2;
insn->op2 = tmp;
}
}
if (ctx->ir_base[insn->op2].op == IR_LOAD
&& ctx->use_lists[insn->op2].count == 2) {
ir_ref addr_ref = ctx->ir_base[insn->op2].op2;
if (!ctx->rules[addr_ref]) {
ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb);
}
if (ctx->rules[addr_ref] == IR_LEA_OB || ctx->rules[addr_ref] == IR_SKIP_MEM) {
ctx->rules[insn->op2] = IR_SKIP_MEM;
ctx->rules[addr_ref] = IR_SKIP_MEM;
}
}
}
ir_match_swap_commutative(ctx, insn);
ir_match_fuse_load(ctx, insn->op2, bb);
return IR_BINOP_INT;
} else {
binop_fp:
if ((ctx->flags & IR_OPT_CODEGEN)
&& !IR_IS_CONST_REF(insn->op2)
&& !IR_IS_CONST_REF(insn->op1)) {
if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) {
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
ir_insn *op2_insn = &ctx->ir_base[insn->op2];
if ((op1_insn->op == IR_VLOAD || op1_insn->op == IR_LOAD)
&& (op2_insn->op != IR_VLOAD && op2_insn->op != IR_LOAD)) {
/* swap for better load fusion */
ir_ref tmp = insn->op1;
insn->op1 = insn->op2;
insn->op2 = tmp;
}
}
if (ctx->ir_base[insn->op2].op == IR_LOAD
&& ctx->use_lists[insn->op2].count == 2) {
ir_ref addr_ref = ctx->ir_base[insn->op2].op2;
if (!ctx->rules[addr_ref]) {
ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb);
}
if (ctx->rules[addr_ref] == IR_LEA_OB || ctx->rules[addr_ref] == IR_SKIP_MEM) {
ctx->rules[insn->op2] = IR_SKIP_MEM;
ctx->rules[addr_ref] = IR_SKIP_MEM;
}
}
}
ir_match_swap_commutative(ctx, insn);
ir_match_fuse_load(ctx, insn->op2, bb);
if (ctx->flags & IR_AVX) {
return IR_BINOP_AVX;
} else {
@ -1709,16 +1724,7 @@ store_int:
}
break;
case IR_LOAD:
if (ctx->use_lists[insn->op2].count == 1) {
ir_ref addr_ref = insn->op2;
if (!ctx->rules[addr_ref]) {
ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb);
}
if (ctx->rules[addr_ref] == IR_LEA_OB) {
ctx->rules[addr_ref] = IR_SKIP_MEM;
}
}
ir_match_fuse_addr(ctx, insn->op2, bb);
if (IR_IS_TYPE_INT(insn->type)) {
return IR_LOAD_INT;
} else {
@ -1726,16 +1732,7 @@ store_int:
}
break;
case IR_STORE:
if (ctx->use_lists[insn->op2].count == 1) {
ir_ref addr_ref = insn->op2;
if (!ctx->rules[addr_ref]) {
ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb);
}
if (ctx->rules[addr_ref] == IR_LEA_OB) {
ctx->rules[addr_ref] = IR_SKIP_MEM;
}
}
ir_match_fuse_addr(ctx, insn->op2, bb);
if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) {
store_rule = IR_STORE_INT;
load_op = IR_LOAD;
@ -1798,29 +1795,19 @@ store_int:
if (insn->op2 > bb->start && insn->op2 < ref && ctx->use_lists[insn->op2].count == 1) {
op2_insn = &ctx->ir_base[insn->op2];
if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) {
ir_match_swap_cmp(ctx, op2_insn);
if (op2_insn->op2 > bb->start
&& op2_insn->op2 < ref
&& !ctx->rules[op2_insn->op2]) {
ir_match_fuse_load(ctx, op2_insn->op2, bb);
}
ctx->rules[insn->op2] = IR_SKIP;
if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
if (op2_insn->op1 > bb->start
if (IR_IS_CONST_REF(op2_insn->op2)
&& op2_insn->op1 > bb->start
&& op2_insn->op1 < ref
&& !ctx->rules[op2_insn->op1]
&& ctx->ir_base[op2_insn->op1].op == IR_LOAD
&& ctx->use_lists[op2_insn->op1].count == 2) {
ir_ref addr_ref = ctx->ir_base[op2_insn->op1].op2;
ir_insn *addr_insn = &ctx->ir_base[addr_ref];
if (addr_insn->op == IR_RLOAD ||
(addr_insn->op == IR_C_ADDR &&
(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64)))) {
ctx->rules[op2_insn->op1] = IR_SKIP_MEM;
} else {
if (!ctx->rules[addr_ref]) {
ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb);
}
if (ctx->rules[addr_ref] == IR_LEA_OB || ctx->rules[addr_ref] == IR_SKIP_MEM) {
ctx->rules[op2_insn->op1] = IR_SKIP_MEM;
ctx->rules[addr_ref] = IR_SKIP_MEM;
}
}
&& !ctx->rules[op2_insn->op1]) {
ir_match_fuse_load(ctx, op2_insn->op1, bb);
}
return IR_CMP_AND_BRANCH_INT;
} else {
@ -1845,25 +1832,8 @@ store_int:
if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
if (insn->op1 > bb->start
&& insn->op1 < ref
&& !ctx->rules[insn->op1]
&& ctx->ir_base[insn->op1].op == IR_LOAD
&& ctx->use_lists[insn->op1].count == 2) {
ir_ref addr_ref = ctx->ir_base[insn->op1].op2;
ir_insn *addr_insn = &ctx->ir_base[addr_ref];
if (addr_insn->op == IR_RLOAD ||
(addr_insn->op == IR_C_ADDR &&
(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64)))) {
ctx->rules[insn->op1] = IR_SKIP_MEM;
} else {
if (!ctx->rules[addr_ref]) {
ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb);
}
if (ctx->rules[addr_ref] == IR_LEA_OB || ctx->rules[addr_ref] == IR_SKIP_MEM) {
ctx->rules[insn->op1] = IR_SKIP_MEM;
ctx->rules[addr_ref] = IR_SKIP_MEM;
}
}
&& !ctx->rules[insn->op1]) {
ir_match_fuse_load(ctx, insn->op1, bb);
}
return IR_GUARD_CMP_INT;
} else {
@ -1878,27 +1848,13 @@ store_int:
case IR_IJMP:
if (insn->op2 > bb->start
&& insn->op2 < ref
&& !ctx->rules[insn->op2]
&& ctx->ir_base[insn->op2].op == IR_LOAD
&& ctx->use_lists[insn->op1].count == 2) {
ir_ref addr_ref = ctx->ir_base[insn->op2].op2;
ir_insn *addr_insn = &ctx->ir_base[addr_ref];
if (addr_insn->op == IR_RLOAD ||
(addr_insn->op == IR_C_ADDR &&
(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64)))) {
ctx->rules[insn->op2] = IR_SKIP_MEM;
} else {
if (!ctx->rules[addr_ref]) {
ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb);
}
if (ctx->rules[addr_ref] == IR_LEA_OB || ctx->rules[addr_ref] == IR_SKIP_MEM) {
ctx->rules[insn->op2] = IR_SKIP_MEM;
ctx->rules[addr_ref] = IR_SKIP_MEM;
}
}
&& !ctx->rules[insn->op2]) {
ir_match_fuse_load(ctx, insn->op2, bb);
}
return insn->op;
case IR_INT2FP:
ir_match_fuse_load(ctx, insn->op1, bb);
return insn->op;
default:
break;
}
@ -2120,6 +2076,41 @@ static void ir_emit_fp_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src)
| ASM_FP_REG_REG_OP movaps, movapd, vmovaps, vmovapd, type, dst, src
}
static ir_reg ir_fuse_addr(ir_ctx *ctx, ir_ref ref, int32_t *offset)
{
ir_insn *addr_insn = &ctx->ir_base[ref];
ir_reg reg = IR_REG_NONE;
if (addr_insn->op == IR_C_ADDR
&& (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64))) {
*offset = addr_insn->val.i32;
} else if (addr_insn->op == IR_RLOAD) {
reg = addr_insn->op2;
IR_ASSERT(reg != IR_REG_NONE);
*offset = 0;
} else if (addr_insn->op == IR_ADD) {
IR_ASSERT(!IR_IS_CONST_REF(addr_insn->op1) && IR_IS_CONST_REF(addr_insn->op2));
reg = ctx->regs[ref][1];
IR_ASSERT(reg != IR_REG_NONE);
if (reg & IR_REG_SPILL_LOAD) {
reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, IR_ADDR, reg, addr_insn->op1);
}
*offset = ctx->ir_base[addr_insn->op2].val.i32;
} else {
IR_ASSERT(0);
}
return reg;
}
static ir_reg ir_fuse_load(ir_ctx *ctx, ir_ref ref, int32_t *offset)
{
ir_insn *load_insn = &ctx->ir_base[ref];
IR_ASSERT(load_insn->op == IR_LOAD);
return ir_fuse_addr(ctx, load_insn->op2, offset);
}
static void ir_emit_prologue(ir_ctx *ctx)
{
ir_backend_data *data = ctx->data;
@ -2286,52 +2277,36 @@ static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
break;
}
} else {
ir_reg reg = IR_REG_NONE;
int32_t offset = 0;
if (ctx->rules[op2] == IR_SKIP_MEM) {
ir_insn *load_insn = &ctx->ir_base[op2];
ir_insn *addr_insn;
IR_ASSERT(load_insn->op == IR_LOAD);
addr_insn = &ctx->ir_base[load_insn->op2];
if (addr_insn->op == IR_ADD) {
reg = ctx->regs[load_insn->op2][1];
IR_ASSERT(reg != IR_REG_NONE);
if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) {
reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, reg, addr_insn->op1);
}
offset = ctx->ir_base[addr_insn->op2].val.i32;
} else {
IR_ASSERT(0);
}
op2_reg = ir_fuse_load(ctx, op2, &offset);
IR_ASSERT(op2_reg != IR_REG_NONE);
} else {
reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = ir_ref_spill_slot(ctx, op2);
}
switch (insn->op) {
case IR_ADD:
case IR_ADD_OV:
| ASM_REG_MEM_OP add, type, def_reg, [Ra(reg)+offset]
| ASM_REG_MEM_OP add, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_SUB:
case IR_SUB_OV:
| ASM_REG_MEM_OP sub, type, def_reg, [Ra(reg)+offset]
| ASM_REG_MEM_OP sub, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_MUL:
case IR_MUL_OV:
| ASM_REG_MEM_IMUL type, def_reg, [Ra(reg)+offset]
| ASM_REG_MEM_IMUL type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_OR:
| ASM_REG_MEM_OP or, type, def_reg, [Ra(reg)+offset]
| ASM_REG_MEM_OP or, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_AND:
| ASM_REG_MEM_OP and, type, def_reg, [Ra(reg)+offset]
| ASM_REG_MEM_OP and, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_XOR:
| ASM_REG_MEM_OP xor, type, def_reg, [Ra(reg)+offset]
| ASM_REG_MEM_OP xor, type, def_reg, [Ra(op2_reg)+offset]
break;
default:
IR_ASSERT(0 && "NIY binary op");
@ -2472,7 +2447,7 @@ static void ir_emit_mem_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else if (insn->op == IR_STORE) {
reg = ctx->regs[def][2];
if (reg != IR_REG_NONE) {
if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) {
if (reg & IR_REG_SPILL_LOAD) {
reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, reg, insn->op2);
}
@ -2482,7 +2457,7 @@ static void ir_emit_mem_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
if (addr_insn->op == IR_ADD) {
reg = ctx->regs[insn->op2][1];
IR_ASSERT(reg != IR_REG_NONE);
if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) {
if (reg & IR_REG_SPILL_LOAD) {
reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, reg, addr_insn->op1);
}
@ -2676,7 +2651,7 @@ static void ir_emit_mem_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else if (insn->op == IR_STORE) {
reg = ctx->regs[def][2];
IR_ASSERT(reg != IR_REG_NONE);
if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) {
if (reg & IR_REG_SPILL_LOAD) {
reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, reg, insn->op2);
}
@ -2778,7 +2753,7 @@ static void ir_emit_mem_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else if (insn->op == IR_STORE) {
reg = ctx->regs[def][2];
IR_ASSERT(reg != IR_REG_NONE);
if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) {
if (reg & IR_REG_SPILL_LOAD) {
reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, reg, insn->op2);
}
@ -2883,7 +2858,7 @@ static void ir_emit_mem_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else if (insn->op == IR_STORE) {
reg = ctx->regs[def][2];
IR_ASSERT(reg != IR_REG_NONE);
if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) {
if (reg & IR_REG_SPILL_LOAD) {
reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, reg, insn->op2);
}
@ -2981,7 +2956,7 @@ static void ir_emit_mem_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else if (insn->op == IR_STORE) {
reg = ctx->regs[def][2];
IR_ASSERT(reg != IR_REG_NONE);
if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) {
if (reg & IR_REG_SPILL_LOAD) {
reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, reg, insn->op2);
}
@ -3354,49 +3329,33 @@ static void ir_emit_binop_sse2(ir_ctx *ctx, ir_ref def, ir_insn *insn)
break;
}
} else {
ir_reg reg = IR_REG_NONE;
int32_t offset = 0;
if (ctx->rules[op2] == IR_SKIP_MEM) {
ir_insn *load_insn = &ctx->ir_base[op2];
ir_insn *addr_insn;
IR_ASSERT(load_insn->op == IR_LOAD);
addr_insn = &ctx->ir_base[load_insn->op2];
if (addr_insn->op == IR_ADD) {
reg = ctx->regs[load_insn->op2][1];
IR_ASSERT(reg != IR_REG_NONE);
if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) {
reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, reg, addr_insn->op1);
}
offset = ctx->ir_base[addr_insn->op2].val.i32;
} else {
IR_ASSERT(0);
}
op2_reg = ir_fuse_load(ctx, op2, &offset);
IR_ASSERT(op2_reg != IR_REG_NONE);
} else {
reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = ir_ref_spill_slot(ctx, op2);
}
switch (insn->op) {
case IR_ADD:
| ASM_SSE2_REG_MEM_OP addss, addsd, type, def_reg, [Ra(reg)+offset]
| ASM_SSE2_REG_MEM_OP addss, addsd, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_SUB:
| ASM_SSE2_REG_MEM_OP subss, subsd, type, def_reg, [Ra(reg)+offset]
| ASM_SSE2_REG_MEM_OP subss, subsd, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_MUL:
| ASM_SSE2_REG_MEM_OP mulss, mulsd, type, def_reg, [Ra(reg)+offset]
| ASM_SSE2_REG_MEM_OP mulss, mulsd, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_DIV:
| ASM_SSE2_REG_MEM_OP divss, divsd, type, def_reg, [Ra(reg)+offset]
| ASM_SSE2_REG_MEM_OP divss, divsd, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_MIN:
| ASM_SSE2_REG_MEM_OP minss, minsd, type, def_reg, [Ra(reg)+offset]
| ASM_SSE2_REG_MEM_OP minss, minsd, type, def_reg, [Ra(op2_reg)+offset]
break;
case IR_MAX:
| ASM_SSE2_REG_MEM_OP maxss, maxsd, type, def_reg, [Ra(reg)+offset]
| ASM_SSE2_REG_MEM_OP maxss, maxsd, type, def_reg, [Ra(op2_reg)+offset]
break;
default:
IR_ASSERT(0 && "NIY binary op");
@ -3484,49 +3443,33 @@ static void ir_emit_binop_avx(ir_ctx *ctx, ir_ref def, ir_insn *insn)
break;
}
} else {
ir_reg reg = IR_REG_NONE;
int32_t offset = 0;
if (ctx->rules[op2] == IR_SKIP_MEM) {
ir_insn *load_insn = &ctx->ir_base[op2];
ir_insn *addr_insn;
IR_ASSERT(load_insn->op == IR_LOAD);
addr_insn = &ctx->ir_base[load_insn->op2];
if (addr_insn->op == IR_ADD) {
reg = ctx->regs[load_insn->op2][1];
IR_ASSERT(reg != IR_REG_NONE);
if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) {
reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, reg, addr_insn->op1);
}
offset = ctx->ir_base[addr_insn->op2].val.i32;
} else {
IR_ASSERT(0);
}
op2_reg = ir_fuse_load(ctx, op2, &offset);
IR_ASSERT(op2_reg != IR_REG_NONE);
} else {
reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = ir_ref_spill_slot(ctx, op2);
}
switch (insn->op) {
case IR_ADD:
| ASM_AVX_REG_REG_MEM_OP vaddss, vaddsd, type, def_reg, op1_reg, [Ra(reg)+offset]
| ASM_AVX_REG_REG_MEM_OP vaddss, vaddsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset]
break;
case IR_SUB:
| ASM_AVX_REG_REG_MEM_OP vsubss, vsubsd, type, def_reg, op1_reg, [Ra(reg)+offset]
| ASM_AVX_REG_REG_MEM_OP vsubss, vsubsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset]
break;
case IR_MUL:
| ASM_AVX_REG_REG_MEM_OP vmulss, vmulsd, type, def_reg, op1_reg, [Ra(reg)+offset]
| ASM_AVX_REG_REG_MEM_OP vmulss, vmulsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset]
break;
case IR_DIV:
| ASM_AVX_REG_REG_MEM_OP vdivss, vdivsd, type, def_reg, op1_reg, [Ra(reg)+offset]
| ASM_AVX_REG_REG_MEM_OP vdivss, vdivsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset]
break;
case IR_MIN:
| ASM_AVX_REG_REG_MEM_OP vminss, vminsd, type, def_reg, op1_reg, [Ra(reg)+offset]
| ASM_AVX_REG_REG_MEM_OP vminss, vminsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset]
break;
case IR_MAX:
| ASM_AVX_REG_REG_MEM_OP vmaxss, vmaxsd, type, def_reg, op1_reg, [Ra(reg)+offset]
| ASM_AVX_REG_REG_MEM_OP vmaxss, vmaxsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset]
break;
default:
IR_ASSERT(0 && "NIY binary op");
@ -3544,47 +3487,23 @@ static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_insn *insn, ir_
dasm_State **Dst = &data->dasm_state;
if (ctx->rules[insn->op1] == IR_SKIP_MEM) {
ir_insn *load_insn = &ctx->ir_base[insn->op1];
ir_insn *addr_insn;
int32_t offset = 0;
IR_ASSERT(load_insn->op == IR_LOAD);
addr_insn = &ctx->ir_base[load_insn->op2];
op1_reg = ir_fuse_load(ctx, insn->op1, &offset);
if (op2_reg != IR_REG_NONE) {
if (addr_insn->op == IR_C_ADDR
&& (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64))) {
| ASM_MEM_REG_OP cmp, type, [addr_insn->val.i32], op2_reg
} else if (addr_insn->op == IR_RLOAD) {
op1_reg = ctx->regs[insn->op1][2];
IR_ASSERT(op1_reg != IR_REG_NONE);
| ASM_MEM_REG_OP cmp, type, [Ra(op1_reg)], op2_reg
} else if (addr_insn->op == IR_ADD) {
op1_reg = ctx->regs[load_insn->op2][1];
IR_ASSERT(op1_reg != IR_REG_NONE && IR_IS_CONST_REF(addr_insn->op2));
| ASM_MEM_REG_OP cmp, type, [Ra(op1_reg)+ctx->ir_base[addr_insn->op2].val.i32], op2_reg
if (op1_reg == IR_REG_NONE) {
| ASM_MEM_REG_OP cmp, type, [offset], op2_reg
} else {
IR_ASSERT(0);
| ASM_MEM_REG_OP cmp, type, [Ra(op1_reg)+offset], op2_reg
}
} else {
IR_ASSERT(!IR_IS_CONST_REF(op1));
IR_ASSERT(IR_IS_CONST_REF(op2));
IR_ASSERT(IR_IS_32BIT(ctx->ir_base[op2].type, ctx->ir_base[op2].val));
if (addr_insn->op == IR_C_ADDR
&& (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64))) {
| ASM_MEM_IMM_OP cmp, type, [addr_insn->val.i32], ctx->ir_base[op2].val.i32
} else if (addr_insn->op == IR_RLOAD) {
op1_reg = ctx->regs[insn->op1][2];
IR_ASSERT(op1_reg != IR_REG_NONE);
| ASM_MEM_IMM_OP cmp, type, [Ra(op1_reg)], ctx->ir_base[op2].val.i32
} else if (addr_insn->op == IR_ADD) {
op1_reg = ctx->regs[load_insn->op2][1];
if (op1_reg & IR_REG_SPILL_LOAD) {
op1_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op1_reg, addr_insn->op1);
}
IR_ASSERT(op1_reg != IR_REG_NONE && IR_IS_CONST_REF(addr_insn->op2));
| ASM_MEM_IMM_OP cmp, type, [Ra(op1_reg)+ctx->ir_base[addr_insn->op2].val.i32], ctx->ir_base[op2].val.i32
if (op1_reg == IR_REG_NONE) {
| ASM_MEM_IMM_OP cmp, type, [offset], ctx->ir_base[op2].val.i32
} else {
IR_ASSERT(0);
| ASM_MEM_IMM_OP cmp, type, [Ra(op1_reg)+offset], ctx->ir_base[op2].val.i32
}
}
} else if (op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE) {
@ -3592,8 +3511,22 @@ static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_insn *insn, ir_
} else if (op1_reg != IR_REG_NONE) {
if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) {
| ASM_REG_REG_OP test, type, op1_reg, op1_reg
} else if (IR_IS_CONST_REF(op2)) {
ir_insn *val_insn = &ctx->ir_base[op2];
IR_ASSERT(IR_IS_32BIT(val_insn->type, val_insn->val));
| ASM_REG_IMM_OP cmp, type, op1_reg, val_insn->val.i32
} else {
| ASM_REG_MREF_OP cmp, type, op1_reg, op2
int32_t offset = 0;
if (ctx->rules[op2] == IR_SKIP_MEM) {
op2_reg = ir_fuse_load(ctx, op2, &offset);
IR_ASSERT(op2_reg != IR_REG_NONE);
} else {
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = ir_ref_spill_slot(ctx, op2);
}
| ASM_REG_MEM_OP cmp, type, op1_reg, [Ra(op2_reg)+offset]
}
} else if (op2_reg != IR_REG_NONE) {
| ASM_MREF_REG_OP cmp, type, op1, op2_reg
@ -3699,7 +3632,7 @@ static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_ins
ir_ref op1, op2;
ir_reg op1_reg, op2_reg;
if (op == IR_LT || op == IR_LE) {
if ((ctx->regs[cmp_ref][2] != IR_REG_NONE) && (op == IR_LT || op == IR_LE)) {
/* swap operands to avoid P flag check */
op ^= 3;
op1 = cmp_insn->op2;
@ -3725,6 +3658,7 @@ static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_ins
op2_reg = tmp_reg;
}
IR_ASSERT(op1_reg != IR_REG_NONE);
if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1)) {
op1_reg &= ~IR_REG_SPILL_LOAD;
@ -3738,8 +3672,23 @@ static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_ins
}
}
| ASM_FP_REG_REG_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, op2_reg
} else if (IR_IS_CONST_REF(op2)) {
ir_insn *val_insn = &ctx->ir_base[op2];
int label = ctx->cfg_blocks_count - op2;
val_insn->const_flags |= IR_CONST_EMIT;
| ASM_FP_REG_MEM_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, [=>label]
} else {
| ASM_FP_REG_MREF_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, op2
int32_t offset = 0;
if (ctx->rules[op2] == IR_SKIP_MEM) {
op2_reg = ir_fuse_load(ctx, op2, &offset);
IR_ASSERT(op2_reg != IR_REG_NONE);
} else {
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = ir_ref_spill_slot(ctx, op2);
}
| ASM_FP_REG_MEM_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, [Ra(op2_reg)+offset]
}
return op;
}
@ -3818,15 +3767,15 @@ static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *ins
int true_block, false_block, next_block;
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
bool swap = 0;
ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block);
if (true_block == next_block) {
if (int_cmp || (op != IR_GT && op != IR_GE)) {
/* swap to avoid unconditional JMP if this doesn't introduce additional JP instruction */
op ^= 1; // reverse
true_block = false_block;
false_block = 0;
}
/* swap to avoid unconditional JMP */
op ^= 1; // reverse
true_block = false_block;
false_block = 0;
swap = 1;
} else if (false_block == next_block) {
false_block = 0;
}
@ -3884,7 +3833,9 @@ static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *ins
| jp =>true_block
break;
case IR_LT:
if (!false_block) {
if (swap) {
| jb =>true_block
} if (!false_block) {
| jp >1
| jb =>true_block
|1:
@ -3894,10 +3845,15 @@ static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *ins
}
break;
case IR_GE:
if (swap) {
| jp =>true_block
}
| jae =>true_block
break;
case IR_LE:
if (!false_block) {
if (swap) {
| jbe =>true_block
} else if (!false_block) {
| jp >1
| jbe =>true_block
|1:
@ -3907,6 +3863,9 @@ static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *ins
}
break;
case IR_GT:
if (swap) {
| jp =>true_block
}
| ja =>true_block
break;
// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break;
@ -4440,22 +4399,29 @@ static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
|.endif
}
} else {
int32_t offset = ir_ref_spill_slot(ctx, insn->op1);
ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
int32_t offset = 0;
if (ctx->rules[insn->op1] == IR_SKIP_MEM) {
op1_reg = ir_fuse_load(ctx, insn->op1, &offset);
IR_ASSERT(op1_reg != IR_REG_NONE);
} else {
op1_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = ir_ref_spill_slot(ctx, insn->op1);
}
if (!src64) {
if (dst_type == IR_DOUBLE) {
if (ctx->flags & IR_AVX) {
| vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(fp)+offset]
| vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset]
} else {
| cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(fp)+offset]
| cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset]
}
} else {
IR_ASSERT(dst_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(fp)+offset]
| vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset]
} else {
| cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(fp)+offset]
| cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset]
}
}
} else {
@ -4463,16 +4429,16 @@ static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
|.if X64
if (dst_type == IR_DOUBLE) {
if (ctx->flags & IR_AVX) {
| vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(fp)+offset]
| vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset]
} else {
| cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(fp)+offset]
| cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset]
}
} else {
IR_ASSERT(dst_type == IR_FLOAT);
if (ctx->flags & IR_AVX) {
| vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(fp)+offset]
| vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset]
} else {
| cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(fp)+offset]
| cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset]
}
}
|.endif
@ -4856,21 +4822,9 @@ static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
return;
}
} else if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
ir_insn *addr_insn = &ctx->ir_base[insn->op2];
if (addr_insn->op == IR_ADD) {
op2_reg = ctx->regs[insn->op2][1];
IR_ASSERT(op2_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE && (op2_reg & IR_REG_SPILL_LOAD)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, IR_ADDR, op2_reg, addr_insn->op1);
}
offset = ctx->ir_base[addr_insn->op2].val.i32;
} else {
IR_ASSERT(0);
}
}
if (op2_reg == IR_REG_NONE) {
op2_reg = ir_fuse_addr(ctx, insn->op2, &offset);
IR_ASSERT(op2_reg != IR_REG_NONE);
} else if (op2_reg == IR_REG_NONE) {
op2_reg = def_reg;
}
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op2)) {
@ -4907,19 +4861,8 @@ static void ir_emit_load_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
return;
}
} else if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
ir_insn *addr_insn = &ctx->ir_base[insn->op2];
if (addr_insn->op == IR_ADD) {
op2_reg = ctx->regs[insn->op2][1];
IR_ASSERT(op2_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE && (op2_reg & IR_REG_SPILL_LOAD)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, IR_ADDR, op2_reg, addr_insn->op1);
}
offset = ctx->ir_base[addr_insn->op2].val.i32;
} else {
IR_ASSERT(0);
}
op2_reg = ir_fuse_addr(ctx, insn->op2, &offset);
IR_ASSERT(op2_reg != IR_REG_NONE);
} else {
IR_ASSERT(op2_reg != IR_REG_NONE);
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op2)) {
@ -4962,19 +4905,8 @@ static void ir_emit_store_int(ir_ctx *ctx, ir_reg ref, ir_insn *insn)
return;
}
} else if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
ir_insn *addr_insn = &ctx->ir_base[insn->op2];
if (addr_insn->op == IR_ADD) {
op2_reg = ctx->regs[insn->op2][1];
IR_ASSERT(op2_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE && (op2_reg & IR_REG_SPILL_LOAD)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, IR_ADDR, op2_reg, addr_insn->op1);
}
offset = ctx->ir_base[addr_insn->op2].val.i32;
} else {
IR_ASSERT(0);
}
op2_reg = ir_fuse_addr(ctx, insn->op2, &offset);
IR_ASSERT(op2_reg != IR_REG_NONE);
} else {
IR_ASSERT(op2_reg != IR_REG_NONE);
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op2)) {
@ -5018,19 +4950,8 @@ static void ir_emit_store_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
return;
}
} else if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
ir_insn *addr_insn = &ctx->ir_base[insn->op2];
if (addr_insn->op == IR_ADD) {
op2_reg = ctx->regs[insn->op2][1];
IR_ASSERT(op2_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE && (op2_reg & IR_REG_SPILL_LOAD)) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, IR_ADDR, op2_reg, addr_insn->op1);
}
offset = ctx->ir_base[addr_insn->op2].val.i32;
} else {
IR_ASSERT(0);
}
op2_reg = ir_fuse_addr(ctx, insn->op2, &offset);
IR_ASSERT(op2_reg != IR_REG_NONE);
} else {
IR_ASSERT(op2_reg != IR_REG_NONE);
if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op2)) {
@ -5863,20 +5784,13 @@ static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
|.endif
}
} else if (ctx->rules[insn->op2] == IR_SKIP_MEM) {
ir_insn *load_insn = &ctx->ir_base[insn->op2];
ir_insn *addr_insn;
int32_t offset;
IR_ASSERT(load_insn->op == IR_LOAD);
addr_insn = &ctx->ir_base[load_insn->op2];
if (addr_insn->op == IR_C_ADDR
&& (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64))) {
| jmp aword [addr_insn->val.i32]
} else if (addr_insn->op == IR_RLOAD) {
op2_reg = ctx->regs[insn->op2][2];
IR_ASSERT(op2_reg != IR_REG_NONE);
| jmp aword [Ra(op2_reg)]
op2_reg = ir_fuse_load(ctx, insn->op2, &offset);
if (op2_reg == IR_REG_NONE) {
| jmp aword [offset]
} else {
IR_ASSERT(0);
| jmp aword [Ra(op2_reg)+offset]
}
} else if (op2_reg != IR_REG_NONE) {
if (op2_reg & IR_REG_SPILL_LOAD) {

View File

@ -59,7 +59,7 @@ test:
movsd %xmm0, (%rsp)
movsd %xmm1, 8(%rsp)
movsd 8(%rsp), %xmm0
subsd .L5(%rip), %xmm0
subsd .L4(%rip), %xmm0
movsd %xmm0, 0x10(%rsp)
xorpd %xmm0, %xmm0
movsd %xmm0, 0x18(%rsp)
@ -95,20 +95,18 @@ test:
addsd 0x38(%rsp), %xmm0
movsd %xmm0, 0x68(%rsp)
movsd 0x68(%rsp), %xmm0
ucomisd .L6(%rip), %xmm0
ja .L2
jmp .L3
.L2:
ucomisd .L5(%rip), %xmm0
jbe .L2
movl 0x2c(%rsp), %eax
addq $0x70, %rsp
retq
.L3:
.L2:
cmpl $0x3e8, 0x2c(%rsp)
jle .L4
jle .L3
xorl %eax, %eax
addq $0x70, %rsp
retq
.L4:
.L3:
movsd 0x60(%rsp), %xmm0
movsd %xmm0, 0x18(%rsp)
movsd 0x50(%rsp), %xmm0
@ -117,8 +115,8 @@ test:
movl %eax, 0x28(%rsp)
jmp .L1
.rodata
.db 0x90, 0x90, 0x90, 0x90
.L5:
.db 0x90, 0x90, 0x90, 0x90, 0x90, 0x90
.L4:
.db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x3f
.L6:
.L5:
.db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x40

View File

@ -79,7 +79,7 @@ test:
movsd %xmm0, (%rsp)
movsd %xmm1, 8(%rsp)
movsd 8(%rsp), %xmm0
subsd .L5(%rip), %xmm0
subsd .L4(%rip), %xmm0
movsd %xmm0, 0x10(%rsp)
movsd (%rsp), %xmm0
movsd %xmm0, 0x18(%rsp)
@ -117,23 +117,22 @@ test:
addsd 0x40(%rsp), %xmm0
movsd %xmm0, 0x60(%rsp)
movsd 0x60(%rsp), %xmm0
ucomisd .L6(%rip), %xmm0
ja .L2
jmp .L3
.L2:
ucomisd .L5(%rip), %xmm0
jbe .L2
movl 0x30(%rsp), %eax
addq $0x68, %rsp
retq
.L3:
.L2:
cmpl $0x3e8, 0x30(%rsp)
jle .L4
jle .L3
xorl %eax, %eax
addq $0x68, %rsp
retq
.L4:
.L3:
jmp .L1
.rodata
.L5:
.db 0x90, 0x90
.L4:
.db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x3f
.L6:
.L5:
.db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x40