From 47083e0f9f2195cad7567d70b75ebd8e27a4839e Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Thu, 25 Aug 2022 18:16:17 +0300 Subject: [PATCH] Improve LOAD fusion --- ir_aarch64.dasc | 26 +- ir_x86.dasc | 622 ++++++++++++++++-------------------- tests/debug/test-O0.irt | 20 +- tests/debug/test_var-O0.irt | 19 +- 4 files changed, 304 insertions(+), 383 deletions(-) diff --git a/ir_aarch64.dasc b/ir_aarch64.dasc index d4cf918..389df2b 100644 --- a/ir_aarch64.dasc +++ b/ir_aarch64.dasc @@ -2459,15 +2459,15 @@ static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *ins int true_block, false_block, next_block; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; + bool swap = 0; ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); if (true_block == next_block) { - if (int_cmp || (op != IR_GT && op != IR_GE)) { - /* swap to avoid unconditional JMP if this doesn't introduce additional JP instruction */ - op ^= 1; // reverse - true_block = false_block; - false_block = 0; - } + /* swap to avoid unconditional JMP */ + op ^= 1; // reverse + true_block = false_block; + false_block = 0; + swap = 1; } else if (false_block == next_block) { false_block = 0; } @@ -2525,7 +2525,9 @@ static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *ins | bvs =>true_block break; case IR_LT: - if (!false_block) { + if (swap) { + | blo =>true_block + } else if (!false_block) { | bvs >1 | blo =>true_block |1: @@ -2535,10 +2537,15 @@ static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *ins } break; case IR_GE: + if (swap) { + | bvs =>true_block + } | bhs =>true_block break; case IR_LE: - if (!false_block) { + if (swap) { + | bls =>true_block + } else if (!false_block) { | bvs >1 | bls =>true_block |1: @@ -2548,6 +2555,9 @@ static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *ins } break; case IR_GT: + if (swap) { + | bvs =>true_block + } | bhi =>true_block break; // case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; diff --git a/ir_x86.dasc b/ir_x86.dasc index ea140d1..52891c9 100644 --- a/ir_x86.dasc +++ b/ir_x86.dasc @@ -1180,6 +1180,83 @@ cmp_fp: return 0; } +static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb); + +static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref, ir_block *bb) +{ + if (!IR_IS_CONST_REF(addr_ref) && ctx->use_lists[addr_ref].count == 1) { + if (!ctx->rules[addr_ref]) { + ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb); + } + if (ctx->rules[addr_ref] == IR_LEA_OB) { + ctx->rules[addr_ref] = IR_SKIP_MEM; + } + } +} + +static void ir_match_fuse_load(ir_ctx *ctx, ir_ref ref, ir_block *bb) +{ + if (ctx->ir_base[ref].op == IR_LOAD + && ctx->use_lists[ref].count == 2) { + ir_ref addr_ref = ctx->ir_base[ref].op2; + ir_insn *addr_insn = &ctx->ir_base[addr_ref]; + + if (addr_insn->op == IR_RLOAD || + (addr_insn->op == IR_C_ADDR && + (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64)))) { + ctx->rules[ref] = IR_SKIP_MEM; + } else if (!IR_IS_CONST_REF(addr_ref) /*&& ctx->use_lists[addr_ref].count == 1*/) { + if (!ctx->rules[addr_ref]) { + ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb); + } + if (ctx->rules[addr_ref] == IR_LEA_OB || ctx->rules[addr_ref] == IR_SKIP_MEM) { + ctx->rules[ref] = IR_SKIP_MEM; + ctx->rules[addr_ref] = IR_SKIP_MEM; + } + } + } +} + +static void ir_match_swap_cmp(ir_ctx *ctx, ir_insn *insn) +{ + if ((ctx->flags & IR_OPT_CODEGEN) + && !IR_IS_CONST_REF(insn->op2) + && !IR_IS_CONST_REF(insn->op1)) { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + ir_insn *op2_insn = &ctx->ir_base[insn->op2]; + + if ((op1_insn->op == IR_VLOAD || op1_insn->op == IR_LOAD) + && (op2_insn->op != IR_VLOAD && op2_insn->op != IR_LOAD)) { + /* swap for better load fusion */ + ir_ref tmp = insn->op1; + insn->op1 = insn->op2; + insn->op2 = tmp; + if (insn->op != IR_EQ && insn->op != IR_NE) { + insn->op ^= 3; + } + } + } +} + +static void ir_match_swap_commutative(ir_ctx *ctx, ir_insn *insn) +{ + if ((ctx->flags & IR_OPT_CODEGEN) + && (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) + && !IR_IS_CONST_REF(insn->op2) + && !IR_IS_CONST_REF(insn->op1)) { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + ir_insn *op2_insn = &ctx->ir_base[insn->op2]; + + if ((op1_insn->op == IR_VLOAD || op1_insn->op == IR_LOAD) + && (op2_insn->op != IR_VLOAD && op2_insn->op != IR_LOAD)) { + /* swap for better load fusion */ + ir_ref tmp = insn->op1; + insn->op1 = insn->op2; + insn->op2 = tmp; + } + } +} + static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb) { ir_insn *op2_insn; @@ -1198,28 +1275,18 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb) case IR_UGE: case IR_ULE: case IR_UGT: + ir_match_swap_cmp(ctx, insn); + if (insn->op2 > bb->start + && insn->op2 < ref + && !ctx->rules[insn->op2]) { + ir_match_fuse_load(ctx, insn->op2, bb); + } if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { - if (insn->op1 > bb->start + if (IR_IS_CONST_REF(insn->op2) + && insn->op1 > bb->start && insn->op1 < ref - && !ctx->rules[insn->op1] - && ctx->ir_base[insn->op1].op == IR_LOAD - && ctx->use_lists[insn->op1].count == 2) { - ir_ref addr_ref = ctx->ir_base[insn->op1].op2; - ir_insn *addr_insn = &ctx->ir_base[addr_ref]; - - if (addr_insn->op == IR_RLOAD || - (addr_insn->op == IR_C_ADDR && - (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64)))) { - ctx->rules[insn->op1] = IR_SKIP_MEM; - } else { - if (!ctx->rules[addr_ref]) { - ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb); - } - if (ctx->rules[addr_ref] == IR_LEA_OB || ctx->rules[addr_ref] == IR_SKIP_MEM) { - ctx->rules[insn->op1] = IR_SKIP_MEM; - ctx->rules[addr_ref] = IR_SKIP_MEM; - } - } + && !ctx->rules[insn->op1]) { + ir_match_fuse_load(ctx, insn->op1, bb); } return IR_CMP_INT; } else { @@ -1298,65 +1365,13 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb) return IR_LEA_IB; // lea ret, [op1.reg+op2.reg] } binop_int: - if ((ctx->flags & IR_OPT_CODEGEN) - && !IR_IS_CONST_REF(insn->op2) - && !IR_IS_CONST_REF(insn->op1)) { - if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) { - ir_insn *op1_insn = &ctx->ir_base[insn->op1]; - ir_insn *op2_insn = &ctx->ir_base[insn->op2]; - - if ((op1_insn->op == IR_VLOAD || op1_insn->op == IR_LOAD) - && (op2_insn->op != IR_VLOAD && op2_insn->op != IR_LOAD)) { - /* swap for better load fusion */ - ir_ref tmp = insn->op1; - insn->op1 = insn->op2; - insn->op2 = tmp; - } - } - if (ctx->ir_base[insn->op2].op == IR_LOAD - && ctx->use_lists[insn->op2].count == 2) { - ir_ref addr_ref = ctx->ir_base[insn->op2].op2; - - if (!ctx->rules[addr_ref]) { - ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb); - } - if (ctx->rules[addr_ref] == IR_LEA_OB || ctx->rules[addr_ref] == IR_SKIP_MEM) { - ctx->rules[insn->op2] = IR_SKIP_MEM; - ctx->rules[addr_ref] = IR_SKIP_MEM; - } - } - } + ir_match_swap_commutative(ctx, insn); + ir_match_fuse_load(ctx, insn->op2, bb); return IR_BINOP_INT; } else { binop_fp: - if ((ctx->flags & IR_OPT_CODEGEN) - && !IR_IS_CONST_REF(insn->op2) - && !IR_IS_CONST_REF(insn->op1)) { - if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) { - ir_insn *op1_insn = &ctx->ir_base[insn->op1]; - ir_insn *op2_insn = &ctx->ir_base[insn->op2]; - - if ((op1_insn->op == IR_VLOAD || op1_insn->op == IR_LOAD) - && (op2_insn->op != IR_VLOAD && op2_insn->op != IR_LOAD)) { - /* swap for better load fusion */ - ir_ref tmp = insn->op1; - insn->op1 = insn->op2; - insn->op2 = tmp; - } - } - if (ctx->ir_base[insn->op2].op == IR_LOAD - && ctx->use_lists[insn->op2].count == 2) { - ir_ref addr_ref = ctx->ir_base[insn->op2].op2; - - if (!ctx->rules[addr_ref]) { - ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb); - } - if (ctx->rules[addr_ref] == IR_LEA_OB || ctx->rules[addr_ref] == IR_SKIP_MEM) { - ctx->rules[insn->op2] = IR_SKIP_MEM; - ctx->rules[addr_ref] = IR_SKIP_MEM; - } - } - } + ir_match_swap_commutative(ctx, insn); + ir_match_fuse_load(ctx, insn->op2, bb); if (ctx->flags & IR_AVX) { return IR_BINOP_AVX; } else { @@ -1709,16 +1724,7 @@ store_int: } break; case IR_LOAD: - if (ctx->use_lists[insn->op2].count == 1) { - ir_ref addr_ref = insn->op2; - - if (!ctx->rules[addr_ref]) { - ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb); - } - if (ctx->rules[addr_ref] == IR_LEA_OB) { - ctx->rules[addr_ref] = IR_SKIP_MEM; - } - } + ir_match_fuse_addr(ctx, insn->op2, bb); if (IR_IS_TYPE_INT(insn->type)) { return IR_LOAD_INT; } else { @@ -1726,16 +1732,7 @@ store_int: } break; case IR_STORE: - if (ctx->use_lists[insn->op2].count == 1) { - ir_ref addr_ref = insn->op2; - - if (!ctx->rules[addr_ref]) { - ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb); - } - if (ctx->rules[addr_ref] == IR_LEA_OB) { - ctx->rules[addr_ref] = IR_SKIP_MEM; - } - } + ir_match_fuse_addr(ctx, insn->op2, bb); if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { store_rule = IR_STORE_INT; load_op = IR_LOAD; @@ -1798,29 +1795,19 @@ store_int: if (insn->op2 > bb->start && insn->op2 < ref && ctx->use_lists[insn->op2].count == 1) { op2_insn = &ctx->ir_base[insn->op2]; if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { + ir_match_swap_cmp(ctx, op2_insn); + if (op2_insn->op2 > bb->start + && op2_insn->op2 < ref + && !ctx->rules[op2_insn->op2]) { + ir_match_fuse_load(ctx, op2_insn->op2, bb); + } ctx->rules[insn->op2] = IR_SKIP; if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { - if (op2_insn->op1 > bb->start + if (IR_IS_CONST_REF(op2_insn->op2) + && op2_insn->op1 > bb->start && op2_insn->op1 < ref - && !ctx->rules[op2_insn->op1] - && ctx->ir_base[op2_insn->op1].op == IR_LOAD - && ctx->use_lists[op2_insn->op1].count == 2) { - ir_ref addr_ref = ctx->ir_base[op2_insn->op1].op2; - ir_insn *addr_insn = &ctx->ir_base[addr_ref]; - - if (addr_insn->op == IR_RLOAD || - (addr_insn->op == IR_C_ADDR && - (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64)))) { - ctx->rules[op2_insn->op1] = IR_SKIP_MEM; - } else { - if (!ctx->rules[addr_ref]) { - ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb); - } - if (ctx->rules[addr_ref] == IR_LEA_OB || ctx->rules[addr_ref] == IR_SKIP_MEM) { - ctx->rules[op2_insn->op1] = IR_SKIP_MEM; - ctx->rules[addr_ref] = IR_SKIP_MEM; - } - } + && !ctx->rules[op2_insn->op1]) { + ir_match_fuse_load(ctx, op2_insn->op1, bb); } return IR_CMP_AND_BRANCH_INT; } else { @@ -1845,25 +1832,8 @@ store_int: if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { if (insn->op1 > bb->start && insn->op1 < ref - && !ctx->rules[insn->op1] - && ctx->ir_base[insn->op1].op == IR_LOAD - && ctx->use_lists[insn->op1].count == 2) { - ir_ref addr_ref = ctx->ir_base[insn->op1].op2; - ir_insn *addr_insn = &ctx->ir_base[addr_ref]; - - if (addr_insn->op == IR_RLOAD || - (addr_insn->op == IR_C_ADDR && - (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64)))) { - ctx->rules[insn->op1] = IR_SKIP_MEM; - } else { - if (!ctx->rules[addr_ref]) { - ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb); - } - if (ctx->rules[addr_ref] == IR_LEA_OB || ctx->rules[addr_ref] == IR_SKIP_MEM) { - ctx->rules[insn->op1] = IR_SKIP_MEM; - ctx->rules[addr_ref] = IR_SKIP_MEM; - } - } + && !ctx->rules[insn->op1]) { + ir_match_fuse_load(ctx, insn->op1, bb); } return IR_GUARD_CMP_INT; } else { @@ -1878,27 +1848,13 @@ store_int: case IR_IJMP: if (insn->op2 > bb->start && insn->op2 < ref - && !ctx->rules[insn->op2] - && ctx->ir_base[insn->op2].op == IR_LOAD - && ctx->use_lists[insn->op1].count == 2) { - ir_ref addr_ref = ctx->ir_base[insn->op2].op2; - ir_insn *addr_insn = &ctx->ir_base[addr_ref]; - - if (addr_insn->op == IR_RLOAD || - (addr_insn->op == IR_C_ADDR && - (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64)))) { - ctx->rules[insn->op2] = IR_SKIP_MEM; - } else { - if (!ctx->rules[addr_ref]) { - ctx->rules[addr_ref] = ir_match_insn(ctx, addr_ref, bb); - } - if (ctx->rules[addr_ref] == IR_LEA_OB || ctx->rules[addr_ref] == IR_SKIP_MEM) { - ctx->rules[insn->op2] = IR_SKIP_MEM; - ctx->rules[addr_ref] = IR_SKIP_MEM; - } - } + && !ctx->rules[insn->op2]) { + ir_match_fuse_load(ctx, insn->op2, bb); } return insn->op; + case IR_INT2FP: + ir_match_fuse_load(ctx, insn->op1, bb); + return insn->op; default: break; } @@ -2120,6 +2076,41 @@ static void ir_emit_fp_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) | ASM_FP_REG_REG_OP movaps, movapd, vmovaps, vmovapd, type, dst, src } +static ir_reg ir_fuse_addr(ir_ctx *ctx, ir_ref ref, int32_t *offset) +{ + ir_insn *addr_insn = &ctx->ir_base[ref]; + ir_reg reg = IR_REG_NONE; + + if (addr_insn->op == IR_C_ADDR + && (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64))) { + *offset = addr_insn->val.i32; + } else if (addr_insn->op == IR_RLOAD) { + reg = addr_insn->op2; + IR_ASSERT(reg != IR_REG_NONE); + *offset = 0; + } else if (addr_insn->op == IR_ADD) { + IR_ASSERT(!IR_IS_CONST_REF(addr_insn->op1) && IR_IS_CONST_REF(addr_insn->op2)); + reg = ctx->regs[ref][1]; + IR_ASSERT(reg != IR_REG_NONE); + if (reg & IR_REG_SPILL_LOAD) { + reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, IR_ADDR, reg, addr_insn->op1); + } + *offset = ctx->ir_base[addr_insn->op2].val.i32; + } else { + IR_ASSERT(0); + } + return reg; +} + +static ir_reg ir_fuse_load(ir_ctx *ctx, ir_ref ref, int32_t *offset) +{ + ir_insn *load_insn = &ctx->ir_base[ref]; + + IR_ASSERT(load_insn->op == IR_LOAD); + return ir_fuse_addr(ctx, load_insn->op2, offset); +} + static void ir_emit_prologue(ir_ctx *ctx) { ir_backend_data *data = ctx->data; @@ -2286,52 +2277,36 @@ static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) break; } } else { - ir_reg reg = IR_REG_NONE; int32_t offset = 0; if (ctx->rules[op2] == IR_SKIP_MEM) { - ir_insn *load_insn = &ctx->ir_base[op2]; - ir_insn *addr_insn; - - IR_ASSERT(load_insn->op == IR_LOAD); - addr_insn = &ctx->ir_base[load_insn->op2]; - - if (addr_insn->op == IR_ADD) { - reg = ctx->regs[load_insn->op2][1]; - IR_ASSERT(reg != IR_REG_NONE); - if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) { - reg &= ~IR_REG_SPILL_LOAD; - ir_emit_load(ctx, type, reg, addr_insn->op1); - } - offset = ctx->ir_base[addr_insn->op2].val.i32; - } else { - IR_ASSERT(0); - } + op2_reg = ir_fuse_load(ctx, op2, &offset); + IR_ASSERT(op2_reg != IR_REG_NONE); } else { - reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; offset = ir_ref_spill_slot(ctx, op2); } switch (insn->op) { case IR_ADD: case IR_ADD_OV: - | ASM_REG_MEM_OP add, type, def_reg, [Ra(reg)+offset] + | ASM_REG_MEM_OP add, type, def_reg, [Ra(op2_reg)+offset] break; case IR_SUB: case IR_SUB_OV: - | ASM_REG_MEM_OP sub, type, def_reg, [Ra(reg)+offset] + | ASM_REG_MEM_OP sub, type, def_reg, [Ra(op2_reg)+offset] break; case IR_MUL: case IR_MUL_OV: - | ASM_REG_MEM_IMUL type, def_reg, [Ra(reg)+offset] + | ASM_REG_MEM_IMUL type, def_reg, [Ra(op2_reg)+offset] break; case IR_OR: - | ASM_REG_MEM_OP or, type, def_reg, [Ra(reg)+offset] + | ASM_REG_MEM_OP or, type, def_reg, [Ra(op2_reg)+offset] break; case IR_AND: - | ASM_REG_MEM_OP and, type, def_reg, [Ra(reg)+offset] + | ASM_REG_MEM_OP and, type, def_reg, [Ra(op2_reg)+offset] break; case IR_XOR: - | ASM_REG_MEM_OP xor, type, def_reg, [Ra(reg)+offset] + | ASM_REG_MEM_OP xor, type, def_reg, [Ra(op2_reg)+offset] break; default: IR_ASSERT(0 && "NIY binary op"); @@ -2472,7 +2447,7 @@ static void ir_emit_mem_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else if (insn->op == IR_STORE) { reg = ctx->regs[def][2]; if (reg != IR_REG_NONE) { - if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) { + if (reg & IR_REG_SPILL_LOAD) { reg &= ~IR_REG_SPILL_LOAD; ir_emit_load(ctx, type, reg, insn->op2); } @@ -2482,7 +2457,7 @@ static void ir_emit_mem_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) if (addr_insn->op == IR_ADD) { reg = ctx->regs[insn->op2][1]; IR_ASSERT(reg != IR_REG_NONE); - if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) { + if (reg & IR_REG_SPILL_LOAD) { reg &= ~IR_REG_SPILL_LOAD; ir_emit_load(ctx, type, reg, addr_insn->op1); } @@ -2676,7 +2651,7 @@ static void ir_emit_mem_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else if (insn->op == IR_STORE) { reg = ctx->regs[def][2]; IR_ASSERT(reg != IR_REG_NONE); - if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) { + if (reg & IR_REG_SPILL_LOAD) { reg &= ~IR_REG_SPILL_LOAD; ir_emit_load(ctx, type, reg, insn->op2); } @@ -2778,7 +2753,7 @@ static void ir_emit_mem_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else if (insn->op == IR_STORE) { reg = ctx->regs[def][2]; IR_ASSERT(reg != IR_REG_NONE); - if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) { + if (reg & IR_REG_SPILL_LOAD) { reg &= ~IR_REG_SPILL_LOAD; ir_emit_load(ctx, type, reg, insn->op2); } @@ -2883,7 +2858,7 @@ static void ir_emit_mem_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else if (insn->op == IR_STORE) { reg = ctx->regs[def][2]; IR_ASSERT(reg != IR_REG_NONE); - if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) { + if (reg & IR_REG_SPILL_LOAD) { reg &= ~IR_REG_SPILL_LOAD; ir_emit_load(ctx, type, reg, insn->op2); } @@ -2981,7 +2956,7 @@ static void ir_emit_mem_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else if (insn->op == IR_STORE) { reg = ctx->regs[def][2]; IR_ASSERT(reg != IR_REG_NONE); - if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) { + if (reg & IR_REG_SPILL_LOAD) { reg &= ~IR_REG_SPILL_LOAD; ir_emit_load(ctx, type, reg, insn->op2); } @@ -3354,49 +3329,33 @@ static void ir_emit_binop_sse2(ir_ctx *ctx, ir_ref def, ir_insn *insn) break; } } else { - ir_reg reg = IR_REG_NONE; int32_t offset = 0; if (ctx->rules[op2] == IR_SKIP_MEM) { - ir_insn *load_insn = &ctx->ir_base[op2]; - ir_insn *addr_insn; - - IR_ASSERT(load_insn->op == IR_LOAD); - addr_insn = &ctx->ir_base[load_insn->op2]; - - if (addr_insn->op == IR_ADD) { - reg = ctx->regs[load_insn->op2][1]; - IR_ASSERT(reg != IR_REG_NONE); - if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) { - reg &= ~IR_REG_SPILL_LOAD; - ir_emit_load(ctx, type, reg, addr_insn->op1); - } - offset = ctx->ir_base[addr_insn->op2].val.i32; - } else { - IR_ASSERT(0); - } + op2_reg = ir_fuse_load(ctx, op2, &offset); + IR_ASSERT(op2_reg != IR_REG_NONE); } else { - reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; offset = ir_ref_spill_slot(ctx, op2); } switch (insn->op) { case IR_ADD: - | ASM_SSE2_REG_MEM_OP addss, addsd, type, def_reg, [Ra(reg)+offset] + | ASM_SSE2_REG_MEM_OP addss, addsd, type, def_reg, [Ra(op2_reg)+offset] break; case IR_SUB: - | ASM_SSE2_REG_MEM_OP subss, subsd, type, def_reg, [Ra(reg)+offset] + | ASM_SSE2_REG_MEM_OP subss, subsd, type, def_reg, [Ra(op2_reg)+offset] break; case IR_MUL: - | ASM_SSE2_REG_MEM_OP mulss, mulsd, type, def_reg, [Ra(reg)+offset] + | ASM_SSE2_REG_MEM_OP mulss, mulsd, type, def_reg, [Ra(op2_reg)+offset] break; case IR_DIV: - | ASM_SSE2_REG_MEM_OP divss, divsd, type, def_reg, [Ra(reg)+offset] + | ASM_SSE2_REG_MEM_OP divss, divsd, type, def_reg, [Ra(op2_reg)+offset] break; case IR_MIN: - | ASM_SSE2_REG_MEM_OP minss, minsd, type, def_reg, [Ra(reg)+offset] + | ASM_SSE2_REG_MEM_OP minss, minsd, type, def_reg, [Ra(op2_reg)+offset] break; case IR_MAX: - | ASM_SSE2_REG_MEM_OP maxss, maxsd, type, def_reg, [Ra(reg)+offset] + | ASM_SSE2_REG_MEM_OP maxss, maxsd, type, def_reg, [Ra(op2_reg)+offset] break; default: IR_ASSERT(0 && "NIY binary op"); @@ -3484,49 +3443,33 @@ static void ir_emit_binop_avx(ir_ctx *ctx, ir_ref def, ir_insn *insn) break; } } else { - ir_reg reg = IR_REG_NONE; int32_t offset = 0; if (ctx->rules[op2] == IR_SKIP_MEM) { - ir_insn *load_insn = &ctx->ir_base[op2]; - ir_insn *addr_insn; - - IR_ASSERT(load_insn->op == IR_LOAD); - addr_insn = &ctx->ir_base[load_insn->op2]; - - if (addr_insn->op == IR_ADD) { - reg = ctx->regs[load_insn->op2][1]; - IR_ASSERT(reg != IR_REG_NONE); - if (reg != IR_REG_NONE && (reg & IR_REG_SPILL_LOAD)) { - reg &= ~IR_REG_SPILL_LOAD; - ir_emit_load(ctx, type, reg, addr_insn->op1); - } - offset = ctx->ir_base[addr_insn->op2].val.i32; - } else { - IR_ASSERT(0); - } + op2_reg = ir_fuse_load(ctx, op2, &offset); + IR_ASSERT(op2_reg != IR_REG_NONE); } else { - reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; offset = ir_ref_spill_slot(ctx, op2); } switch (insn->op) { case IR_ADD: - | ASM_AVX_REG_REG_MEM_OP vaddss, vaddsd, type, def_reg, op1_reg, [Ra(reg)+offset] + | ASM_AVX_REG_REG_MEM_OP vaddss, vaddsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] break; case IR_SUB: - | ASM_AVX_REG_REG_MEM_OP vsubss, vsubsd, type, def_reg, op1_reg, [Ra(reg)+offset] + | ASM_AVX_REG_REG_MEM_OP vsubss, vsubsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] break; case IR_MUL: - | ASM_AVX_REG_REG_MEM_OP vmulss, vmulsd, type, def_reg, op1_reg, [Ra(reg)+offset] + | ASM_AVX_REG_REG_MEM_OP vmulss, vmulsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] break; case IR_DIV: - | ASM_AVX_REG_REG_MEM_OP vdivss, vdivsd, type, def_reg, op1_reg, [Ra(reg)+offset] + | ASM_AVX_REG_REG_MEM_OP vdivss, vdivsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] break; case IR_MIN: - | ASM_AVX_REG_REG_MEM_OP vminss, vminsd, type, def_reg, op1_reg, [Ra(reg)+offset] + | ASM_AVX_REG_REG_MEM_OP vminss, vminsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] break; case IR_MAX: - | ASM_AVX_REG_REG_MEM_OP vmaxss, vmaxsd, type, def_reg, op1_reg, [Ra(reg)+offset] + | ASM_AVX_REG_REG_MEM_OP vmaxss, vmaxsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] break; default: IR_ASSERT(0 && "NIY binary op"); @@ -3544,47 +3487,23 @@ static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_insn *insn, ir_ dasm_State **Dst = &data->dasm_state; if (ctx->rules[insn->op1] == IR_SKIP_MEM) { - ir_insn *load_insn = &ctx->ir_base[insn->op1]; - ir_insn *addr_insn; + int32_t offset = 0; - IR_ASSERT(load_insn->op == IR_LOAD); - addr_insn = &ctx->ir_base[load_insn->op2]; + op1_reg = ir_fuse_load(ctx, insn->op1, &offset); if (op2_reg != IR_REG_NONE) { - if (addr_insn->op == IR_C_ADDR - && (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64))) { - | ASM_MEM_REG_OP cmp, type, [addr_insn->val.i32], op2_reg - } else if (addr_insn->op == IR_RLOAD) { - op1_reg = ctx->regs[insn->op1][2]; - IR_ASSERT(op1_reg != IR_REG_NONE); - | ASM_MEM_REG_OP cmp, type, [Ra(op1_reg)], op2_reg - } else if (addr_insn->op == IR_ADD) { - op1_reg = ctx->regs[load_insn->op2][1]; - IR_ASSERT(op1_reg != IR_REG_NONE && IR_IS_CONST_REF(addr_insn->op2)); - | ASM_MEM_REG_OP cmp, type, [Ra(op1_reg)+ctx->ir_base[addr_insn->op2].val.i32], op2_reg + if (op1_reg == IR_REG_NONE) { + | ASM_MEM_REG_OP cmp, type, [offset], op2_reg } else { - IR_ASSERT(0); + | ASM_MEM_REG_OP cmp, type, [Ra(op1_reg)+offset], op2_reg } } else { IR_ASSERT(!IR_IS_CONST_REF(op1)); IR_ASSERT(IR_IS_CONST_REF(op2)); IR_ASSERT(IR_IS_32BIT(ctx->ir_base[op2].type, ctx->ir_base[op2].val)); - if (addr_insn->op == IR_C_ADDR - && (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64))) { - | ASM_MEM_IMM_OP cmp, type, [addr_insn->val.i32], ctx->ir_base[op2].val.i32 - } else if (addr_insn->op == IR_RLOAD) { - op1_reg = ctx->regs[insn->op1][2]; - IR_ASSERT(op1_reg != IR_REG_NONE); - | ASM_MEM_IMM_OP cmp, type, [Ra(op1_reg)], ctx->ir_base[op2].val.i32 - } else if (addr_insn->op == IR_ADD) { - op1_reg = ctx->regs[load_insn->op2][1]; - if (op1_reg & IR_REG_SPILL_LOAD) { - op1_reg &= ~IR_REG_SPILL_LOAD; - ir_emit_load(ctx, type, op1_reg, addr_insn->op1); - } - IR_ASSERT(op1_reg != IR_REG_NONE && IR_IS_CONST_REF(addr_insn->op2)); - | ASM_MEM_IMM_OP cmp, type, [Ra(op1_reg)+ctx->ir_base[addr_insn->op2].val.i32], ctx->ir_base[op2].val.i32 + if (op1_reg == IR_REG_NONE) { + | ASM_MEM_IMM_OP cmp, type, [offset], ctx->ir_base[op2].val.i32 } else { - IR_ASSERT(0); + | ASM_MEM_IMM_OP cmp, type, [Ra(op1_reg)+offset], ctx->ir_base[op2].val.i32 } } } else if (op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE) { @@ -3592,8 +3511,22 @@ static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_insn *insn, ir_ } else if (op1_reg != IR_REG_NONE) { if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) { | ASM_REG_REG_OP test, type, op1_reg, op1_reg + } else if (IR_IS_CONST_REF(op2)) { + ir_insn *val_insn = &ctx->ir_base[op2]; + + IR_ASSERT(IR_IS_32BIT(val_insn->type, val_insn->val)); + | ASM_REG_IMM_OP cmp, type, op1_reg, val_insn->val.i32 } else { - | ASM_REG_MREF_OP cmp, type, op1_reg, op2 + int32_t offset = 0; + + if (ctx->rules[op2] == IR_SKIP_MEM) { + op2_reg = ir_fuse_load(ctx, op2, &offset); + IR_ASSERT(op2_reg != IR_REG_NONE); + } else { + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = ir_ref_spill_slot(ctx, op2); + } + | ASM_REG_MEM_OP cmp, type, op1_reg, [Ra(op2_reg)+offset] } } else if (op2_reg != IR_REG_NONE) { | ASM_MREF_REG_OP cmp, type, op1, op2_reg @@ -3699,7 +3632,7 @@ static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_ins ir_ref op1, op2; ir_reg op1_reg, op2_reg; - if (op == IR_LT || op == IR_LE) { + if ((ctx->regs[cmp_ref][2] != IR_REG_NONE) && (op == IR_LT || op == IR_LE)) { /* swap operands to avoid P flag check */ op ^= 3; op1 = cmp_insn->op2; @@ -3725,6 +3658,7 @@ static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_ins op2_reg = tmp_reg; } + IR_ASSERT(op1_reg != IR_REG_NONE); if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1)) { op1_reg &= ~IR_REG_SPILL_LOAD; @@ -3738,8 +3672,23 @@ static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_ins } } | ASM_FP_REG_REG_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, op2_reg + } else if (IR_IS_CONST_REF(op2)) { + ir_insn *val_insn = &ctx->ir_base[op2]; + int label = ctx->cfg_blocks_count - op2; + + val_insn->const_flags |= IR_CONST_EMIT; + | ASM_FP_REG_MEM_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, [=>label] } else { - | ASM_FP_REG_MREF_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, op2 + int32_t offset = 0; + + if (ctx->rules[op2] == IR_SKIP_MEM) { + op2_reg = ir_fuse_load(ctx, op2, &offset); + IR_ASSERT(op2_reg != IR_REG_NONE); + } else { + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = ir_ref_spill_slot(ctx, op2); + } + | ASM_FP_REG_MEM_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, [Ra(op2_reg)+offset] } return op; } @@ -3818,15 +3767,15 @@ static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *ins int true_block, false_block, next_block; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; + bool swap = 0; ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); if (true_block == next_block) { - if (int_cmp || (op != IR_GT && op != IR_GE)) { - /* swap to avoid unconditional JMP if this doesn't introduce additional JP instruction */ - op ^= 1; // reverse - true_block = false_block; - false_block = 0; - } + /* swap to avoid unconditional JMP */ + op ^= 1; // reverse + true_block = false_block; + false_block = 0; + swap = 1; } else if (false_block == next_block) { false_block = 0; } @@ -3884,7 +3833,9 @@ static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *ins | jp =>true_block break; case IR_LT: - if (!false_block) { + if (swap) { + | jb =>true_block + } if (!false_block) { | jp >1 | jb =>true_block |1: @@ -3894,10 +3845,15 @@ static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *ins } break; case IR_GE: + if (swap) { + | jp =>true_block + } | jae =>true_block break; case IR_LE: - if (!false_block) { + if (swap) { + | jbe =>true_block + } else if (!false_block) { | jp >1 | jbe =>true_block |1: @@ -3907,6 +3863,9 @@ static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *ins } break; case IR_GT: + if (swap) { + | jp =>true_block + } | ja =>true_block break; // case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; @@ -4440,22 +4399,29 @@ static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) |.endif } } else { - int32_t offset = ir_ref_spill_slot(ctx, insn->op1); - ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + int32_t offset = 0; + + if (ctx->rules[insn->op1] == IR_SKIP_MEM) { + op1_reg = ir_fuse_load(ctx, insn->op1, &offset); + IR_ASSERT(op1_reg != IR_REG_NONE); + } else { + op1_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = ir_ref_spill_slot(ctx, insn->op1); + } if (!src64) { if (dst_type == IR_DOUBLE) { if (ctx->flags & IR_AVX) { - | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(fp)+offset] + | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset] } else { - | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(fp)+offset] + | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset] } } else { IR_ASSERT(dst_type == IR_FLOAT); if (ctx->flags & IR_AVX) { - | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(fp)+offset] + | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset] } else { - | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(fp)+offset] + | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset] } } } else { @@ -4463,16 +4429,16 @@ static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) |.if X64 if (dst_type == IR_DOUBLE) { if (ctx->flags & IR_AVX) { - | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(fp)+offset] + | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset] } else { - | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(fp)+offset] + | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset] } } else { IR_ASSERT(dst_type == IR_FLOAT); if (ctx->flags & IR_AVX) { - | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(fp)+offset] + | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset] } else { - | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(fp)+offset] + | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset] } } |.endif @@ -4856,21 +4822,9 @@ static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) return; } } else if (ctx->rules[insn->op2] == IR_SKIP_MEM) { - ir_insn *addr_insn = &ctx->ir_base[insn->op2]; - - if (addr_insn->op == IR_ADD) { - op2_reg = ctx->regs[insn->op2][1]; - IR_ASSERT(op2_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE && (op2_reg & IR_REG_SPILL_LOAD)) { - op2_reg &= ~IR_REG_SPILL_LOAD; - ir_emit_load(ctx, IR_ADDR, op2_reg, addr_insn->op1); - } - offset = ctx->ir_base[addr_insn->op2].val.i32; - } else { - IR_ASSERT(0); - } - } - if (op2_reg == IR_REG_NONE) { + op2_reg = ir_fuse_addr(ctx, insn->op2, &offset); + IR_ASSERT(op2_reg != IR_REG_NONE); + } else if (op2_reg == IR_REG_NONE) { op2_reg = def_reg; } if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op2)) { @@ -4907,19 +4861,8 @@ static void ir_emit_load_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) return; } } else if (ctx->rules[insn->op2] == IR_SKIP_MEM) { - ir_insn *addr_insn = &ctx->ir_base[insn->op2]; - - if (addr_insn->op == IR_ADD) { - op2_reg = ctx->regs[insn->op2][1]; - IR_ASSERT(op2_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE && (op2_reg & IR_REG_SPILL_LOAD)) { - op2_reg &= ~IR_REG_SPILL_LOAD; - ir_emit_load(ctx, IR_ADDR, op2_reg, addr_insn->op1); - } - offset = ctx->ir_base[addr_insn->op2].val.i32; - } else { - IR_ASSERT(0); - } + op2_reg = ir_fuse_addr(ctx, insn->op2, &offset); + IR_ASSERT(op2_reg != IR_REG_NONE); } else { IR_ASSERT(op2_reg != IR_REG_NONE); if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op2)) { @@ -4962,19 +4905,8 @@ static void ir_emit_store_int(ir_ctx *ctx, ir_reg ref, ir_insn *insn) return; } } else if (ctx->rules[insn->op2] == IR_SKIP_MEM) { - ir_insn *addr_insn = &ctx->ir_base[insn->op2]; - - if (addr_insn->op == IR_ADD) { - op2_reg = ctx->regs[insn->op2][1]; - IR_ASSERT(op2_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE && (op2_reg & IR_REG_SPILL_LOAD)) { - op2_reg &= ~IR_REG_SPILL_LOAD; - ir_emit_load(ctx, IR_ADDR, op2_reg, addr_insn->op1); - } - offset = ctx->ir_base[addr_insn->op2].val.i32; - } else { - IR_ASSERT(0); - } + op2_reg = ir_fuse_addr(ctx, insn->op2, &offset); + IR_ASSERT(op2_reg != IR_REG_NONE); } else { IR_ASSERT(op2_reg != IR_REG_NONE); if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op2)) { @@ -5018,19 +4950,8 @@ static void ir_emit_store_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) return; } } else if (ctx->rules[insn->op2] == IR_SKIP_MEM) { - ir_insn *addr_insn = &ctx->ir_base[insn->op2]; - - if (addr_insn->op == IR_ADD) { - op2_reg = ctx->regs[insn->op2][1]; - IR_ASSERT(op2_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE && (op2_reg & IR_REG_SPILL_LOAD)) { - op2_reg &= ~IR_REG_SPILL_LOAD; - ir_emit_load(ctx, IR_ADDR, op2_reg, addr_insn->op1); - } - offset = ctx->ir_base[addr_insn->op2].val.i32; - } else { - IR_ASSERT(0); - } + op2_reg = ir_fuse_addr(ctx, insn->op2, &offset); + IR_ASSERT(op2_reg != IR_REG_NONE); } else { IR_ASSERT(op2_reg != IR_REG_NONE); if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op2)) { @@ -5863,20 +5784,13 @@ static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn) |.endif } } else if (ctx->rules[insn->op2] == IR_SKIP_MEM) { - ir_insn *load_insn = &ctx->ir_base[insn->op2]; - ir_insn *addr_insn; + int32_t offset; - IR_ASSERT(load_insn->op == IR_LOAD); - addr_insn = &ctx->ir_base[load_insn->op2]; - if (addr_insn->op == IR_C_ADDR - && (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64))) { - | jmp aword [addr_insn->val.i32] - } else if (addr_insn->op == IR_RLOAD) { - op2_reg = ctx->regs[insn->op2][2]; - IR_ASSERT(op2_reg != IR_REG_NONE); - | jmp aword [Ra(op2_reg)] + op2_reg = ir_fuse_load(ctx, insn->op2, &offset); + if (op2_reg == IR_REG_NONE) { + | jmp aword [offset] } else { - IR_ASSERT(0); + | jmp aword [Ra(op2_reg)+offset] } } else if (op2_reg != IR_REG_NONE) { if (op2_reg & IR_REG_SPILL_LOAD) { diff --git a/tests/debug/test-O0.irt b/tests/debug/test-O0.irt index dd60353..a7c5d86 100644 --- a/tests/debug/test-O0.irt +++ b/tests/debug/test-O0.irt @@ -59,7 +59,7 @@ test: movsd %xmm0, (%rsp) movsd %xmm1, 8(%rsp) movsd 8(%rsp), %xmm0 - subsd .L5(%rip), %xmm0 + subsd .L4(%rip), %xmm0 movsd %xmm0, 0x10(%rsp) xorpd %xmm0, %xmm0 movsd %xmm0, 0x18(%rsp) @@ -95,20 +95,18 @@ test: addsd 0x38(%rsp), %xmm0 movsd %xmm0, 0x68(%rsp) movsd 0x68(%rsp), %xmm0 - ucomisd .L6(%rip), %xmm0 - ja .L2 - jmp .L3 -.L2: + ucomisd .L5(%rip), %xmm0 + jbe .L2 movl 0x2c(%rsp), %eax addq $0x70, %rsp retq -.L3: +.L2: cmpl $0x3e8, 0x2c(%rsp) - jle .L4 + jle .L3 xorl %eax, %eax addq $0x70, %rsp retq -.L4: +.L3: movsd 0x60(%rsp), %xmm0 movsd %xmm0, 0x18(%rsp) movsd 0x50(%rsp), %xmm0 @@ -117,8 +115,8 @@ test: movl %eax, 0x28(%rsp) jmp .L1 .rodata - .db 0x90, 0x90, 0x90, 0x90 -.L5: + .db 0x90, 0x90, 0x90, 0x90, 0x90, 0x90 +.L4: .db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x3f -.L6: +.L5: .db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x40 diff --git a/tests/debug/test_var-O0.irt b/tests/debug/test_var-O0.irt index 55a2f83..98c29ea 100644 --- a/tests/debug/test_var-O0.irt +++ b/tests/debug/test_var-O0.irt @@ -79,7 +79,7 @@ test: movsd %xmm0, (%rsp) movsd %xmm1, 8(%rsp) movsd 8(%rsp), %xmm0 - subsd .L5(%rip), %xmm0 + subsd .L4(%rip), %xmm0 movsd %xmm0, 0x10(%rsp) movsd (%rsp), %xmm0 movsd %xmm0, 0x18(%rsp) @@ -117,23 +117,22 @@ test: addsd 0x40(%rsp), %xmm0 movsd %xmm0, 0x60(%rsp) movsd 0x60(%rsp), %xmm0 - ucomisd .L6(%rip), %xmm0 - ja .L2 - jmp .L3 -.L2: + ucomisd .L5(%rip), %xmm0 + jbe .L2 movl 0x30(%rsp), %eax addq $0x68, %rsp retq -.L3: +.L2: cmpl $0x3e8, 0x30(%rsp) - jle .L4 + jle .L3 xorl %eax, %eax addq $0x68, %rsp retq -.L4: +.L3: jmp .L1 .rodata -.L5: + .db 0x90, 0x90 +.L4: .db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x3f -.L6: +.L5: .db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x40