From 85beed7901e1718b3cd42433da44947e5e3eca59 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Thu, 22 Jun 2023 12:07:19 +0300 Subject: [PATCH] Fixed incorrect oredering of moves during de-SSA Temporary de-SSA registers may conflict with outpot registers, therefore these output resisters should be assigned last. --- ir_aarch64.dasc | 38 ++++++- ir_emit.c | 172 +++++++++++++++-------------- ir_x86.dasc | 57 +++++++++- tests/debug.Windows-x86_64/fig.irt | 2 +- tests/debug.aarch64/fig.irt | 2 +- tests/debug/fig.irt | 2 +- 6 files changed, 178 insertions(+), 95 deletions(-) diff --git a/ir_aarch64.dasc b/ir_aarch64.dasc index 2e8508f..41b6de0 100644 --- a/ir_aarch64.dasc +++ b/ir_aarch64.dasc @@ -3464,9 +3464,10 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_ { int i; int8_t *pred, *loc, *types; - ir_reg to, from_reg; + ir_reg to, from_reg, c; ir_type type; ir_regset todo, ready; + ir_reg last_reg = IR_REG_NONE, last_fp_reg = IR_REG_NONE; loc = ir_mem_malloc(IR_REG_NUM * 3 * sizeof(int8_t)); pred = loc + IR_REG_NUM; @@ -3482,7 +3483,16 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_ loc[from_reg] = from_reg; pred[to] = from_reg; types[from_reg] = copies[i].type; - IR_REGSET_INCL(todo, to); + if (to == tmp_reg) { + IR_ASSERT(last_reg == IR_REG_NONE); + last_reg = to; + } else if (to == tmp_fp_reg) { + IR_ASSERT(last_fp_reg == IR_REG_NONE); + last_fp_reg = to; + } else { + IR_ASSERT(!IR_REGSET_IN(todo, to)); + IR_REGSET_INCL(todo, to); + } } } @@ -3493,8 +3503,6 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_ } IR_REGSET_FOREACH_END(); while (1) { - ir_ref /*a, b,*/ c; - while (ready != IR_REGSET_EMPTY) { to = ir_regset_pop_first(&ready); from_reg = pred[to]; @@ -3533,6 +3541,28 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_ IR_REGSET_INCL(ready, to); } + if (last_reg != IR_REG_NONE) { + to = last_reg; + from_reg = pred[to]; + c = loc[from_reg]; + if (to != c) { + type = types[from_reg]; + IR_ASSERT(IR_IS_TYPE_INT(type)); + ir_emit_mov(ctx, type, to, c); + } + } + + if (last_fp_reg != IR_REG_NONE) { + to = last_fp_reg; + from_reg = pred[to]; + c = loc[from_reg]; + if (to != c) { + type = types[from_reg]; + IR_ASSERT(!IR_IS_TYPE_INT(type)); + ir_emit_fp_mov(ctx, type, to, c); + } + } + ir_mem_free(loc); return 1; diff --git a/ir_emit.c b/ir_emit.c index e29a994..c6c743d 100644 --- a/ir_emit.c +++ b/ir_emit.c @@ -47,6 +47,14 @@ typedef struct _ir_copy { ir_reg to; } ir_copy; +typedef struct _ir_delayed_copy { + ir_ref input; + ir_ref output; + ir_type type; + ir_reg from; + ir_reg to; +} ir_delayed_copy; + #if IR_REG_INT_ARGS static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS]; #else @@ -394,16 +402,14 @@ static IR_NEVER_INLINE void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb) { - uint32_t succ, k, n = 0; + uint32_t succ, k, n = 0, n2 = 0; ir_block *succ_bb; ir_use_list *use_list; - ir_ref i, *p, ref, input; - ir_insn *insn; - bool do_third_pass = 0; + ir_ref i, *p; ir_copy *copies; + ir_delayed_copy *copies2; ir_reg tmp_reg = ctx->regs[bb->end][0]; ir_reg tmp_fp_reg = ctx->regs[bb->end][1]; - ir_reg src, dst; IR_ASSERT(bb->successors_count == 1); succ = ctx->cfg_edges[bb->successors]; @@ -412,57 +418,74 @@ static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb) use_list = &ctx->use_lists[succ_bb->start]; k = ir_phi_input_number(ctx, succ_bb, b); - copies = ir_mem_malloc(use_list->count * sizeof(ir_copy)); + copies = ir_mem_malloc(use_list->count * sizeof(ir_copy) + use_list->count * sizeof(ir_delayed_copy)); + copies2 = (ir_delayed_copy*)(copies + use_list->count); for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) { - ref = *p; - insn = &ctx->ir_base[ref]; + ir_ref ref = *p; + ir_insn *insn = &ctx->ir_base[ref]; + if (insn->op == IR_PHI) { - input = ir_insn_op(insn, k); - if (IR_IS_CONST_REF(input)) { - do_third_pass = 1; - } else { - dst = IR_REG_NUM(ctx->regs[ref][0]); - src = ir_get_alocated_reg(ctx, ref, k); + ir_ref input = ir_insn_op(insn, k); + ir_reg src = ir_get_alocated_reg(ctx, ref, k); + ir_reg dst = ctx->regs[ref][0]; - if (dst == IR_REG_NONE) { - /* STORE to memory */ - if (src == IR_REG_NONE) { - if (!ir_is_same_mem(ctx, input, ref)) { - ir_reg tmp = IR_IS_TYPE_INT(insn->type) ? tmp_reg : tmp_fp_reg; + if (dst == IR_REG_NONE) { + /* STORE to memory cannot clobber any input register (do it right now) */ + if (IR_IS_CONST_REF(input)) { + IR_ASSERT(src == IR_REG_NONE); +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) + if (IR_IS_TYPE_INT(insn->type) + && (ir_type_size[insn->type] != 8 || IR_IS_SIGNED_32BIT(ctx->ir_base[input].val.i64))) { + ir_emit_store_imm(ctx, insn->type, ref, ctx->ir_base[input].val.i32); + continue; + } +#endif + ir_reg tmp = IR_IS_TYPE_INT(insn->type) ? tmp_reg : tmp_fp_reg; - IR_ASSERT(tmp != IR_REG_NONE); - ir_emit_load(ctx, insn->type, tmp, input); - ir_emit_store(ctx, insn->type, ref, tmp); - } - } else { - if (IR_REG_SPILLED(src)) { - src = IR_REG_NUM(src); - ir_emit_load(ctx, insn->type, src, input); - if (ir_is_same_mem(ctx, input, ref)) { - continue; - } - } - ir_emit_store(ctx, insn->type, ref, src); + IR_ASSERT(tmp != IR_REG_NONE); + ir_emit_load(ctx, insn->type, tmp, input); + ir_emit_store(ctx, insn->type, ref, tmp); + } else if (src == IR_REG_NONE) { + if (!ir_is_same_mem(ctx, input, ref)) { + ir_reg tmp = IR_IS_TYPE_INT(insn->type) ? tmp_reg : tmp_fp_reg; + + IR_ASSERT(tmp != IR_REG_NONE); + ir_emit_load(ctx, insn->type, tmp, input); + ir_emit_store(ctx, insn->type, ref, tmp); } } else { - if (src == IR_REG_NONE) { - do_third_pass = 1; - } else { - if (IR_REG_SPILLED(src)) { - src = IR_REG_NUM(src); - ir_emit_load(ctx, insn->type, src, input); - } - if (src != dst) { - copies[n].type = insn->type; - copies[n].from = src; - copies[n].to = dst; - n++; + if (IR_REG_SPILLED(src)) { + src = IR_REG_NUM(src); + ir_emit_load(ctx, insn->type, src, input); + if (ir_is_same_mem(ctx, input, ref)) { + continue; } } + ir_emit_store(ctx, insn->type, ref, src); } - if (IR_REG_SPILLED(ctx->regs[ref][0])) { - do_third_pass = 1; + } else if (src == IR_REG_NONE) { + /* STORE of constant or memory can't be clobber by parallel reg->reg copies (delay it) */ + copies2[n2].input = input; + copies2[n2].output = ref; + copies2[n2].type = insn->type; + copies2[n2].from = src; + copies2[n2].to = dst; + n2++; + } else { + IR_ASSERT(!IR_IS_CONST_REF(input)); + if (IR_REG_SPILLED(src)) { + ir_emit_load(ctx, insn->type, IR_REG_NUM(src), input); + } + if (IR_REG_SPILLED(dst) && (!IR_REG_SPILLED(src) || !ir_is_same_mem(ctx, input, ref))) { + ir_emit_store(ctx, insn->type, ref, IR_REG_NUM(src)); + } + if (IR_REG_NUM(src) != IR_REG_NUM(dst)) { + /* Schedule parallel reg->reg copy */ + copies[n].type = insn->type; + copies[n].from = IR_REG_NUM(src); + copies[n].to = IR_REG_NUM(dst); + n++; } } } @@ -471,49 +494,30 @@ static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb) if (n > 0) { ir_parallel_copy(ctx, copies, n, tmp_reg, tmp_fp_reg); } - ir_mem_free(copies); - if (do_third_pass) { - for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) { - ref = *p; - insn = &ctx->ir_base[ref]; - if (insn->op == IR_PHI) { - input = ir_insn_op(insn, k); - dst = IR_REG_NUM(ctx->regs[ref][0]); + for (n = 0; n < n2; n++) { + ir_ref input = copies2[n].input; + ir_ref ref = copies2[n].output; + ir_type type = copies2[n].type; + ir_reg dst = copies2[n].to; - if (IR_IS_CONST_REF(input)) { - if (dst == IR_REG_NONE) { -#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) - if (IR_IS_TYPE_INT(insn->type) && (ir_type_size[insn->type] != 8 || IR_IS_SIGNED_32BIT(ctx->ir_base[input].val.i64))) { - ir_emit_store_imm(ctx, insn->type, ref, ctx->ir_base[input].val.i32); - continue; - } -#endif - ir_reg tmp = IR_IS_TYPE_INT(insn->type) ? tmp_reg : tmp_fp_reg; - - IR_ASSERT(tmp != IR_REG_NONE); - ir_emit_load(ctx, insn->type, tmp, input); - ir_emit_store(ctx, insn->type, ref, tmp); - } else { - ir_emit_load(ctx, insn->type, dst, input); - } - } else if (dst != IR_REG_NONE) { - src = ir_get_alocated_reg(ctx, ref, k); - - if (src == IR_REG_NONE) { - if (IR_REG_SPILLED(ctx->regs[ref][0]) && ir_is_same_mem(ctx, input, ref)) { - /* avoid LOAD and SAVE to the same memory */ - continue; - } - ir_emit_load(ctx, insn->type, dst, input); - } - } - if (dst != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[ref][0])) { - ir_emit_store(ctx, insn->type, ref, dst); - } + IR_ASSERT(dst != IR_REG_NONE); + if (IR_IS_CONST_REF(input)) { + ir_emit_load(ctx, type, IR_REG_NUM(dst), input); + } else { + IR_ASSERT(copies2[n].from == IR_REG_NONE); + if (IR_REG_SPILLED(dst) && ir_is_same_mem(ctx, input, ref)) { + /* avoid LOAD and STORE to the same memory */ + continue; } + ir_emit_load(ctx, type, IR_REG_NUM(dst), input); + } + if (IR_REG_SPILLED(dst)) { + ir_emit_store(ctx, type, ref, IR_REG_NUM(dst)); } } + + ir_mem_free(copies); } int ir_match(ir_ctx *ctx) diff --git a/ir_x86.dasc b/ir_x86.dasc index 18aaa2d..b2fcede 100644 --- a/ir_x86.dasc +++ b/ir_x86.dasc @@ -5873,9 +5873,10 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_ dasm_State **Dst = &data->dasm_state; int i; int8_t *pred, *loc, *types; - ir_reg to, from_reg; + ir_reg to, from_reg, c; ir_type type; ir_regset todo, ready; + ir_reg last_reg = IR_REG_NONE, last_fp_reg = IR_REG_NONE; loc = ir_mem_malloc(IR_REG_NUM * 3 * sizeof(int8_t)); pred = loc + IR_REG_NUM; @@ -5891,7 +5892,17 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_ loc[from_reg] = from_reg; pred[to] = from_reg; types[from_reg] = copies[i].type; - IR_REGSET_INCL(todo, to); + /* temporary register may de the same as some of destionations */ + if (to == tmp_reg) { + IR_ASSERT(last_reg == IR_REG_NONE); + last_reg = to; + } else if (to == tmp_fp_reg) { + IR_ASSERT(last_fp_reg == IR_REG_NONE); + last_fp_reg = to; + } else { + IR_ASSERT(!IR_REGSET_IN(todo, to)); + IR_REGSET_INCL(todo, to); + } } } @@ -5902,8 +5913,6 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_ } IR_REGSET_FOREACH_END(); while (1) { - ir_ref /*a, b,*/ c; - while (ready != IR_REGSET_EMPTY) { to = ir_regset_pop_first(&ready); from_reg = pred[to]; @@ -5960,6 +5969,46 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_ IR_REGSET_INCL(ready, to); } + if (last_reg != IR_REG_NONE) { + to = last_reg; + from_reg = pred[to]; + c = loc[from_reg]; + if (to != c) { + type = types[from_reg]; + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (ir_type_size[type] > 2) { + ir_emit_mov(ctx, type, to, c); + } else if (ir_type_size[type] == 2) { + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Rd(to), Rw(c) + type = IR_I32; + } else { + | movzx Rd(to), Rw(c) + type = IR_U32; + } + } else /* if (ir_type_size[type] == 1) */ { + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Rd(to), Rb(c) + type = IR_I32; + } else { + | movzx Rd(to), Rb(c) + type = IR_U32; + } + } + } + } + + if (last_fp_reg != IR_REG_NONE) { + to = last_fp_reg; + from_reg = pred[to]; + c = loc[from_reg]; + if (to != c) { + type = types[from_reg]; + IR_ASSERT(!IR_IS_TYPE_INT(type)); + ir_emit_fp_mov(ctx, type, to, c); + } + } + ir_mem_free(loc); return 1; diff --git a/tests/debug.Windows-x86_64/fig.irt b/tests/debug.Windows-x86_64/fig.irt index 9d04efd..47037ac 100755 --- a/tests/debug.Windows-x86_64/fig.irt +++ b/tests/debug.Windows-x86_64/fig.irt @@ -82,8 +82,8 @@ test: .L1: testl %ebx, %ebx je .L3 - movl %edi, %eax movl %r8d, %ecx + movl %edi, %eax .L2: testl %ebp, %ebp jne .L1 diff --git a/tests/debug.aarch64/fig.irt b/tests/debug.aarch64/fig.irt index eae25aa..b1027a5 100644 --- a/tests/debug.aarch64/fig.irt +++ b/tests/debug.aarch64/fig.irt @@ -73,8 +73,8 @@ test: .L1: cmp w4, #0 b.eq .L3 - mov w0, w2 mov w5, w10 + mov w0, w2 .L2: cmp w8, #0 b.ne .L1 diff --git a/tests/debug/fig.irt b/tests/debug/fig.irt index 5763cb0..d872855 100644 --- a/tests/debug/fig.irt +++ b/tests/debug/fig.irt @@ -79,8 +79,8 @@ test: .L1: testl %r10d, %r10d je .L3 - movl %edx, %edi movl %ebp, %r9d + movl %edx, %edi .L2: testl %r11d, %r11d jne .L1