From 0596de22919f4dfb372dcf84a0fee959263021e1 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Tue, 30 Aug 2022 11:26:38 +0300 Subject: [PATCH] Fuse LOAD into IMULL/3 --- ir_x86.dasc | 86 +++++++++++++++++++++++++++++++++++++ tests/x86_64/mul_ov_001.irt | 2 +- 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/ir_x86.dasc b/ir_x86.dasc index 18cdff7..bc3813f 100644 --- a/ir_x86.dasc +++ b/ir_x86.dasc @@ -519,6 +519,7 @@ typedef enum _ir_rule { IR_ABS_INT, IR_OP_INT, IR_OP_FP, + IR_IMUL3, IR_BINOP_INT, IR_BINOP_SSE2, IR_BINOP_AVX, @@ -841,6 +842,7 @@ uint8_t ir_get_use_flags(ir_ctx *ctx, ir_ref ref, int op_num, ir_reg *reg) return IR_USE_SHOULD_BE_IN_REG; } return IR_USE_SHOULD_BE_IN_REG; + case IR_IMUL3: case IR_IJMP: case IR_RSTORE: case IR_SKIP_REG: /* PARAM PHI PI */ @@ -1392,6 +1394,11 @@ binop_fp: return IR_LEA_SIB; // lea ret, [op1.reg+op1.reg*op2.scale] } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { return IR_MUL_PWR2; // shl op1, IR_LOG2(op2_insn->val.u64) + } else if (IR_IS_TYPE_SIGNED(insn->type) + && ir_type_size[insn->type] != 1 + && IR_IS_SIGNED_32BIT(op2_insn->val.i64)) { + ir_match_fuse_load(ctx, insn->op1, bb); + return IR_IMUL3; } } if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) { @@ -1410,6 +1417,13 @@ binop_fp: case IR_MUL_OV: IR_ASSERT(IR_IS_TYPE_INT(insn->type)); if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_SIGNED_32BIT(op2_insn->val.i64)) { + ir_match_fuse_load(ctx, insn->op1, bb); + return IR_IMUL3; + } + } goto binop_int; } ir_match_fuse_load(ctx, insn->op2, bb); @@ -2412,6 +2426,75 @@ static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) } } +static void ir_emit_imul3(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_insn *val_insn = &ctx->ir_base[op2]; + int32_t val; + + IR_ASSERT(def_reg != IR_REG_NONE); + IR_ASSERT(IR_IS_CONST_REF(op2)); + IR_ASSERT(IR_IS_32BIT(val_insn->type, val_insn->val)); + val = val_insn->val.i32; + + if (op1_reg != IR_REG_NONE) { + if (op1_reg & IR_REG_SPILL_LOAD) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op1_reg, op1); + } + + switch (ir_type_size[type]) { + case 2: + | imul Rw(def_reg), Rw(op1_reg), val + break; + case 4: + | imul Rd(def_reg), Rd(op1_reg), val + break; +|.if X64 +|| case 8: +| imul Rq(def_reg), Rq(op1_reg), val +|| break; +|.endif + default: + IR_ASSERT(0); + } + } else { + int32_t offset = 0; + + if (ctx->rules[op1] == IR_SKIP_MEM) { + op1_reg = ir_fuse_load(ctx, op1, &offset); + IR_ASSERT(op1_reg != IR_REG_NONE); + } else { + op1_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = ir_ref_spill_slot(ctx, op1); + } + switch (ir_type_size[type]) { + case 2: + | imul Rw(def_reg), word [Ra(op1_reg)+offset], val + break; + case 4: + | imul Rd(def_reg), dword [Ra(op1_reg)+offset], val + break; +|.if X64 +|| case 8: +| imul Rq(def_reg), qword [Ra(op1_reg)+offset], val +|| break; +|.endif + default: + IR_ASSERT(0); + } + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, type, def, def_reg); + } +} + static void ir_emit_min_max_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; @@ -7621,6 +7704,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) case IR_OP_FP: ir_emit_op_fp(ctx, i, insn); break; + case IR_IMUL3: + ir_emit_imul3(ctx, i, insn); + break; case IR_BINOP_INT: ir_emit_binop_int(ctx, i, insn); break; diff --git a/tests/x86_64/mul_ov_001.irt b/tests/x86_64/mul_ov_001.irt index 4625d44..6c93333 100644 --- a/tests/x86_64/mul_ov_001.irt +++ b/tests/x86_64/mul_ov_001.irt @@ -15,6 +15,6 @@ x86_64 } --EXPECT-- test: - imull $2, %edi, %edi + imull $2, %edi, %eax seto %al retq