Fixed incorrect oredering of moves during de-SSA

Temporary de-SSA registers may conflict with outpot registers, therefore these output resisters should be assigned last.
This commit is contained in:
Dmitry Stogov 2023-06-22 12:07:19 +03:00
parent 35f94d570f
commit 85beed7901
6 changed files with 178 additions and 95 deletions

View File

@ -3464,9 +3464,10 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_
{ {
int i; int i;
int8_t *pred, *loc, *types; int8_t *pred, *loc, *types;
ir_reg to, from_reg; ir_reg to, from_reg, c;
ir_type type; ir_type type;
ir_regset todo, ready; ir_regset todo, ready;
ir_reg last_reg = IR_REG_NONE, last_fp_reg = IR_REG_NONE;
loc = ir_mem_malloc(IR_REG_NUM * 3 * sizeof(int8_t)); loc = ir_mem_malloc(IR_REG_NUM * 3 * sizeof(int8_t));
pred = loc + IR_REG_NUM; pred = loc + IR_REG_NUM;
@ -3482,9 +3483,18 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_
loc[from_reg] = from_reg; loc[from_reg] = from_reg;
pred[to] = from_reg; pred[to] = from_reg;
types[from_reg] = copies[i].type; types[from_reg] = copies[i].type;
if (to == tmp_reg) {
IR_ASSERT(last_reg == IR_REG_NONE);
last_reg = to;
} else if (to == tmp_fp_reg) {
IR_ASSERT(last_fp_reg == IR_REG_NONE);
last_fp_reg = to;
} else {
IR_ASSERT(!IR_REGSET_IN(todo, to));
IR_REGSET_INCL(todo, to); IR_REGSET_INCL(todo, to);
} }
} }
}
IR_REGSET_FOREACH(todo, i) { IR_REGSET_FOREACH(todo, i) {
if (loc[i] == IR_REG_NONE) { if (loc[i] == IR_REG_NONE) {
@ -3493,8 +3503,6 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_
} IR_REGSET_FOREACH_END(); } IR_REGSET_FOREACH_END();
while (1) { while (1) {
ir_ref /*a, b,*/ c;
while (ready != IR_REGSET_EMPTY) { while (ready != IR_REGSET_EMPTY) {
to = ir_regset_pop_first(&ready); to = ir_regset_pop_first(&ready);
from_reg = pred[to]; from_reg = pred[to];
@ -3533,6 +3541,28 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_
IR_REGSET_INCL(ready, to); IR_REGSET_INCL(ready, to);
} }
if (last_reg != IR_REG_NONE) {
to = last_reg;
from_reg = pred[to];
c = loc[from_reg];
if (to != c) {
type = types[from_reg];
IR_ASSERT(IR_IS_TYPE_INT(type));
ir_emit_mov(ctx, type, to, c);
}
}
if (last_fp_reg != IR_REG_NONE) {
to = last_fp_reg;
from_reg = pred[to];
c = loc[from_reg];
if (to != c) {
type = types[from_reg];
IR_ASSERT(!IR_IS_TYPE_INT(type));
ir_emit_fp_mov(ctx, type, to, c);
}
}
ir_mem_free(loc); ir_mem_free(loc);
return 1; return 1;

132
ir_emit.c
View File

@ -47,6 +47,14 @@ typedef struct _ir_copy {
ir_reg to; ir_reg to;
} ir_copy; } ir_copy;
typedef struct _ir_delayed_copy {
ir_ref input;
ir_ref output;
ir_type type;
ir_reg from;
ir_reg to;
} ir_delayed_copy;
#if IR_REG_INT_ARGS #if IR_REG_INT_ARGS
static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS]; static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS];
#else #else
@ -394,16 +402,14 @@ static IR_NEVER_INLINE void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block
static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb) static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb)
{ {
uint32_t succ, k, n = 0; uint32_t succ, k, n = 0, n2 = 0;
ir_block *succ_bb; ir_block *succ_bb;
ir_use_list *use_list; ir_use_list *use_list;
ir_ref i, *p, ref, input; ir_ref i, *p;
ir_insn *insn;
bool do_third_pass = 0;
ir_copy *copies; ir_copy *copies;
ir_delayed_copy *copies2;
ir_reg tmp_reg = ctx->regs[bb->end][0]; ir_reg tmp_reg = ctx->regs[bb->end][0];
ir_reg tmp_fp_reg = ctx->regs[bb->end][1]; ir_reg tmp_fp_reg = ctx->regs[bb->end][1];
ir_reg src, dst;
IR_ASSERT(bb->successors_count == 1); IR_ASSERT(bb->successors_count == 1);
succ = ctx->cfg_edges[bb->successors]; succ = ctx->cfg_edges[bb->successors];
@ -412,22 +418,35 @@ static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb)
use_list = &ctx->use_lists[succ_bb->start]; use_list = &ctx->use_lists[succ_bb->start];
k = ir_phi_input_number(ctx, succ_bb, b); k = ir_phi_input_number(ctx, succ_bb, b);
copies = ir_mem_malloc(use_list->count * sizeof(ir_copy)); copies = ir_mem_malloc(use_list->count * sizeof(ir_copy) + use_list->count * sizeof(ir_delayed_copy));
copies2 = (ir_delayed_copy*)(copies + use_list->count);
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) { for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
ref = *p; ir_ref ref = *p;
insn = &ctx->ir_base[ref]; ir_insn *insn = &ctx->ir_base[ref];
if (insn->op == IR_PHI) { if (insn->op == IR_PHI) {
input = ir_insn_op(insn, k); ir_ref input = ir_insn_op(insn, k);
if (IR_IS_CONST_REF(input)) { ir_reg src = ir_get_alocated_reg(ctx, ref, k);
do_third_pass = 1; ir_reg dst = ctx->regs[ref][0];
} else {
dst = IR_REG_NUM(ctx->regs[ref][0]);
src = ir_get_alocated_reg(ctx, ref, k);
if (dst == IR_REG_NONE) { if (dst == IR_REG_NONE) {
/* STORE to memory */ /* STORE to memory cannot clobber any input register (do it right now) */
if (src == IR_REG_NONE) { if (IR_IS_CONST_REF(input)) {
IR_ASSERT(src == IR_REG_NONE);
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
if (IR_IS_TYPE_INT(insn->type)
&& (ir_type_size[insn->type] != 8 || IR_IS_SIGNED_32BIT(ctx->ir_base[input].val.i64))) {
ir_emit_store_imm(ctx, insn->type, ref, ctx->ir_base[input].val.i32);
continue;
}
#endif
ir_reg tmp = IR_IS_TYPE_INT(insn->type) ? tmp_reg : tmp_fp_reg;
IR_ASSERT(tmp != IR_REG_NONE);
ir_emit_load(ctx, insn->type, tmp, input);
ir_emit_store(ctx, insn->type, ref, tmp);
} else if (src == IR_REG_NONE) {
if (!ir_is_same_mem(ctx, input, ref)) { if (!ir_is_same_mem(ctx, input, ref)) {
ir_reg tmp = IR_IS_TYPE_INT(insn->type) ? tmp_reg : tmp_fp_reg; ir_reg tmp = IR_IS_TYPE_INT(insn->type) ? tmp_reg : tmp_fp_reg;
@ -445,75 +464,60 @@ static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb)
} }
ir_emit_store(ctx, insn->type, ref, src); ir_emit_store(ctx, insn->type, ref, src);
} }
} else if (src == IR_REG_NONE) {
/* STORE of constant or memory can't be clobber by parallel reg->reg copies (delay it) */
copies2[n2].input = input;
copies2[n2].output = ref;
copies2[n2].type = insn->type;
copies2[n2].from = src;
copies2[n2].to = dst;
n2++;
} else { } else {
if (src == IR_REG_NONE) { IR_ASSERT(!IR_IS_CONST_REF(input));
do_third_pass = 1;
} else {
if (IR_REG_SPILLED(src)) { if (IR_REG_SPILLED(src)) {
src = IR_REG_NUM(src); ir_emit_load(ctx, insn->type, IR_REG_NUM(src), input);
ir_emit_load(ctx, insn->type, src, input);
} }
if (src != dst) { if (IR_REG_SPILLED(dst) && (!IR_REG_SPILLED(src) || !ir_is_same_mem(ctx, input, ref))) {
ir_emit_store(ctx, insn->type, ref, IR_REG_NUM(src));
}
if (IR_REG_NUM(src) != IR_REG_NUM(dst)) {
/* Schedule parallel reg->reg copy */
copies[n].type = insn->type; copies[n].type = insn->type;
copies[n].from = src; copies[n].from = IR_REG_NUM(src);
copies[n].to = dst; copies[n].to = IR_REG_NUM(dst);
n++; n++;
} }
} }
} }
if (IR_REG_SPILLED(ctx->regs[ref][0])) {
do_third_pass = 1;
}
}
}
} }
if (n > 0) { if (n > 0) {
ir_parallel_copy(ctx, copies, n, tmp_reg, tmp_fp_reg); ir_parallel_copy(ctx, copies, n, tmp_reg, tmp_fp_reg);
} }
ir_mem_free(copies);
if (do_third_pass) { for (n = 0; n < n2; n++) {
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) { ir_ref input = copies2[n].input;
ref = *p; ir_ref ref = copies2[n].output;
insn = &ctx->ir_base[ref]; ir_type type = copies2[n].type;
if (insn->op == IR_PHI) { ir_reg dst = copies2[n].to;
input = ir_insn_op(insn, k);
dst = IR_REG_NUM(ctx->regs[ref][0]);
IR_ASSERT(dst != IR_REG_NONE);
if (IR_IS_CONST_REF(input)) { if (IR_IS_CONST_REF(input)) {
if (dst == IR_REG_NONE) { ir_emit_load(ctx, type, IR_REG_NUM(dst), input);
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
if (IR_IS_TYPE_INT(insn->type) && (ir_type_size[insn->type] != 8 || IR_IS_SIGNED_32BIT(ctx->ir_base[input].val.i64))) {
ir_emit_store_imm(ctx, insn->type, ref, ctx->ir_base[input].val.i32);
continue;
}
#endif
ir_reg tmp = IR_IS_TYPE_INT(insn->type) ? tmp_reg : tmp_fp_reg;
IR_ASSERT(tmp != IR_REG_NONE);
ir_emit_load(ctx, insn->type, tmp, input);
ir_emit_store(ctx, insn->type, ref, tmp);
} else { } else {
ir_emit_load(ctx, insn->type, dst, input); IR_ASSERT(copies2[n].from == IR_REG_NONE);
} if (IR_REG_SPILLED(dst) && ir_is_same_mem(ctx, input, ref)) {
} else if (dst != IR_REG_NONE) { /* avoid LOAD and STORE to the same memory */
src = ir_get_alocated_reg(ctx, ref, k);
if (src == IR_REG_NONE) {
if (IR_REG_SPILLED(ctx->regs[ref][0]) && ir_is_same_mem(ctx, input, ref)) {
/* avoid LOAD and SAVE to the same memory */
continue; continue;
} }
ir_emit_load(ctx, insn->type, dst, input); ir_emit_load(ctx, type, IR_REG_NUM(dst), input);
} }
} if (IR_REG_SPILLED(dst)) {
if (dst != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[ref][0])) { ir_emit_store(ctx, type, ref, IR_REG_NUM(dst));
ir_emit_store(ctx, insn->type, ref, dst);
}
}
} }
} }
ir_mem_free(copies);
} }
int ir_match(ir_ctx *ctx) int ir_match(ir_ctx *ctx)

View File

@ -5873,9 +5873,10 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_
dasm_State **Dst = &data->dasm_state; dasm_State **Dst = &data->dasm_state;
int i; int i;
int8_t *pred, *loc, *types; int8_t *pred, *loc, *types;
ir_reg to, from_reg; ir_reg to, from_reg, c;
ir_type type; ir_type type;
ir_regset todo, ready; ir_regset todo, ready;
ir_reg last_reg = IR_REG_NONE, last_fp_reg = IR_REG_NONE;
loc = ir_mem_malloc(IR_REG_NUM * 3 * sizeof(int8_t)); loc = ir_mem_malloc(IR_REG_NUM * 3 * sizeof(int8_t));
pred = loc + IR_REG_NUM; pred = loc + IR_REG_NUM;
@ -5891,9 +5892,19 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_
loc[from_reg] = from_reg; loc[from_reg] = from_reg;
pred[to] = from_reg; pred[to] = from_reg;
types[from_reg] = copies[i].type; types[from_reg] = copies[i].type;
/* temporary register may de the same as some of destionations */
if (to == tmp_reg) {
IR_ASSERT(last_reg == IR_REG_NONE);
last_reg = to;
} else if (to == tmp_fp_reg) {
IR_ASSERT(last_fp_reg == IR_REG_NONE);
last_fp_reg = to;
} else {
IR_ASSERT(!IR_REGSET_IN(todo, to));
IR_REGSET_INCL(todo, to); IR_REGSET_INCL(todo, to);
} }
} }
}
IR_REGSET_FOREACH(todo, i) { IR_REGSET_FOREACH(todo, i) {
if (loc[i] == IR_REG_NONE) { if (loc[i] == IR_REG_NONE) {
@ -5902,8 +5913,6 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_
} IR_REGSET_FOREACH_END(); } IR_REGSET_FOREACH_END();
while (1) { while (1) {
ir_ref /*a, b,*/ c;
while (ready != IR_REGSET_EMPTY) { while (ready != IR_REGSET_EMPTY) {
to = ir_regset_pop_first(&ready); to = ir_regset_pop_first(&ready);
from_reg = pred[to]; from_reg = pred[to];
@ -5960,6 +5969,46 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_
IR_REGSET_INCL(ready, to); IR_REGSET_INCL(ready, to);
} }
if (last_reg != IR_REG_NONE) {
to = last_reg;
from_reg = pred[to];
c = loc[from_reg];
if (to != c) {
type = types[from_reg];
IR_ASSERT(IR_IS_TYPE_INT(type));
if (ir_type_size[type] > 2) {
ir_emit_mov(ctx, type, to, c);
} else if (ir_type_size[type] == 2) {
if (IR_IS_TYPE_SIGNED(type)) {
| movsx Rd(to), Rw(c)
type = IR_I32;
} else {
| movzx Rd(to), Rw(c)
type = IR_U32;
}
} else /* if (ir_type_size[type] == 1) */ {
if (IR_IS_TYPE_SIGNED(type)) {
| movsx Rd(to), Rb(c)
type = IR_I32;
} else {
| movzx Rd(to), Rb(c)
type = IR_U32;
}
}
}
}
if (last_fp_reg != IR_REG_NONE) {
to = last_fp_reg;
from_reg = pred[to];
c = loc[from_reg];
if (to != c) {
type = types[from_reg];
IR_ASSERT(!IR_IS_TYPE_INT(type));
ir_emit_fp_mov(ctx, type, to, c);
}
}
ir_mem_free(loc); ir_mem_free(loc);
return 1; return 1;

View File

@ -82,8 +82,8 @@ test:
.L1: .L1:
testl %ebx, %ebx testl %ebx, %ebx
je .L3 je .L3
movl %edi, %eax
movl %r8d, %ecx movl %r8d, %ecx
movl %edi, %eax
.L2: .L2:
testl %ebp, %ebp testl %ebp, %ebp
jne .L1 jne .L1

View File

@ -73,8 +73,8 @@ test:
.L1: .L1:
cmp w4, #0 cmp w4, #0
b.eq .L3 b.eq .L3
mov w0, w2
mov w5, w10 mov w5, w10
mov w0, w2
.L2: .L2:
cmp w8, #0 cmp w8, #0
b.ne .L1 b.ne .L1

View File

@ -79,8 +79,8 @@ test:
.L1: .L1:
testl %r10d, %r10d testl %r10d, %r10d
je .L3 je .L3
movl %edx, %edi
movl %ebp, %r9d movl %ebp, %r9d
movl %edx, %edi
.L2: .L2:
testl %r11d, %r11d testl %r11d, %r11d
jne .L1 jne .L1