Improve parallel copy algorithm to support move of single source into multiple destinations

This commit is contained in:
Dmitry Stogov 2023-04-26 10:56:55 +03:00
parent c2470bc684
commit 0de0c1d0fa
4 changed files with 85 additions and 47 deletions

View File

@ -3428,12 +3428,11 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_
} }
} IR_REGSET_FOREACH_END(); } IR_REGSET_FOREACH_END();
while (todo != IR_REGSET_EMPTY) { while (1) {
ir_ref /*a, b,*/ c; ir_ref /*a, b,*/ c;
while (ready != IR_REGSET_EMPTY) { while (ready != IR_REGSET_EMPTY) {
to = IR_REGSET_FIRST(ready); to = ir_regset_pop_first(&ready);
IR_REGSET_EXCL(ready, to);
from_reg = pred[to]; from_reg = pred[to];
type = types[to]; type = types[to];
c = loc[from_reg]; c = loc[from_reg];
@ -3442,29 +3441,32 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_
} else { } else {
ir_emit_fp_mov(ctx, type, to, c); ir_emit_fp_mov(ctx, type, to, c);
} }
IR_REGSET_EXCL(todo, to);
loc[from_reg] = to; loc[from_reg] = to;
if (from_reg == c && pred[from_reg] != IR_REG_NONE) { if (from_reg == c && pred[from_reg] != IR_REG_NONE) {
IR_REGSET_INCL(ready, from_reg); IR_REGSET_INCL(ready, from_reg);
} }
} }
to = IR_REGSET_FIRST(todo);
IR_REGSET_EXCL(todo, to); if (todo == IR_REGSET_EMPTY) {
from_reg = pred[to]; break;
if (to != loc[from_reg]) {
type = types[to];
if (IR_IS_TYPE_INT(type)) {
IR_ASSERT(tmp_reg != IR_REG_NONE);
IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST);
ir_emit_mov(ctx, type, tmp_reg, to);
loc[to] = tmp_reg;
} else {
IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST);
ir_emit_fp_mov(ctx, type, tmp_fp_reg, to);
loc[to] = tmp_fp_reg;
}
IR_REGSET_INCL(ready, to);
} }
to = ir_regset_pop_first(&todo);
from_reg = pred[to];
IR_ASSERT(to != loc[from_reg]);
type = types[to];
if (IR_IS_TYPE_INT(type)) {
IR_ASSERT(tmp_reg != IR_REG_NONE);
IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST);
ir_emit_mov(ctx, type, tmp_reg, to);
loc[to] = tmp_reg;
} else {
IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST);
ir_emit_fp_mov(ctx, type, tmp_fp_reg, to);
loc[to] = tmp_fp_reg;
}
IR_REGSET_INCL(ready, to);
} }
ir_mem_free(loc); ir_mem_free(loc);

15
ir_ra.c
View File

@ -1469,24 +1469,27 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
} }
} IR_BITSET_FOREACH_END(); } IR_BITSET_FOREACH_END();
while ((i = ir_bitset_pop_first(todo, len)) >= 0) { while (1) {
ir_ref a, b, c; ir_ref a, b, c;
while ((b = ir_bitset_pop_first(ready, len)) >= 0) { while ((b = ir_bitset_pop_first(ready, len)) >= 0) {
a = pred[b]; a = pred[b];
c = loc[a]; c = loc[a];
emit_copy(ctx, ctx->ir_base[b].type, c, b); emit_copy(ctx, ctx->ir_base[b].type, c, b);
ir_bitset_excl(todo, b);
loc[a] = b; loc[a] = b;
if (a == c && pred[a]) { if (a == c && pred[a]) {
ir_bitset_incl(ready, a); ir_bitset_incl(ready, a);
} }
} }
b = i; b = ir_bitset_pop_first(todo, len);
if (b != loc[pred[b]]) { if (b < 0) {
emit_copy(ctx, ctx->ir_base[b].type, b, 0); break;
loc[b] = 0;
ir_bitset_incl(ready, b);
} }
IR_ASSERT(b != loc[pred[b]]);
emit_copy(ctx, ctx->ir_base[b].type, b, 0);
loc[b] = 0;
ir_bitset_incl(ready, b);
} }
ir_mem_free(ready); ir_mem_free(ready);

View File

@ -5782,12 +5782,11 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_
} }
} IR_REGSET_FOREACH_END(); } IR_REGSET_FOREACH_END();
while (todo != IR_REGSET_EMPTY) { while (1) {
ir_ref /*a, b,*/ c; ir_ref /*a, b,*/ c;
while (ready != IR_REGSET_EMPTY) { while (ready != IR_REGSET_EMPTY) {
to = IR_REGSET_FIRST(ready); to = ir_regset_pop_first(&ready);
IR_REGSET_EXCL(ready, to);
from_reg = pred[to]; from_reg = pred[to];
c = loc[from_reg]; c = loc[from_reg];
type = types[c]; type = types[c];
@ -5814,31 +5813,34 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_
} else { } else {
ir_emit_fp_mov(ctx, type, to, c); ir_emit_fp_mov(ctx, type, to, c);
} }
IR_REGSET_EXCL(todo, to);
types[to] = type; types[to] = type;
loc[from_reg] = to; loc[from_reg] = to;
if (from_reg == c && pred[from_reg] != IR_REG_NONE) { if (from_reg == c && pred[from_reg] != IR_REG_NONE) {
IR_REGSET_INCL(ready, from_reg); IR_REGSET_INCL(ready, from_reg);
} }
} }
to = IR_REGSET_FIRST(todo);
IR_REGSET_EXCL(todo, to); if (todo == IR_REGSET_EMPTY) {
from_reg = pred[to]; break;
if (to != loc[from_reg]) {
type = types[from_reg];
if (IR_IS_TYPE_INT(type)) {
IR_ASSERT(tmp_reg != IR_REG_NONE);
IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST);
ir_emit_mov(ctx, type, tmp_reg, to);
types[tmp_reg] = type;
loc[to] = tmp_reg;
} else {
IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST);
ir_emit_fp_mov(ctx, type, tmp_fp_reg, to);
loc[to] = tmp_fp_reg;
}
IR_REGSET_INCL(ready, to);
} }
to = ir_regset_pop_first(&todo);
from_reg = pred[to];
IR_ASSERT(to != loc[from_reg]);
type = types[from_reg];
if (IR_IS_TYPE_INT(type)) {
IR_ASSERT(tmp_reg != IR_REG_NONE);
IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST);
ir_emit_mov(ctx, type, tmp_reg, to);
types[tmp_reg] = type;
loc[to] = tmp_reg;
} else {
IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST);
ir_emit_fp_mov(ctx, type, tmp_fp_reg, to);
loc[to] = tmp_fp_reg;
}
IR_REGSET_INCL(ready, to);
} }
ir_mem_free(loc); ir_mem_free(loc);

31
tests/debug/call_003.irt Normal file
View File

@ -0,0 +1,31 @@
--TEST--
Simple CALL (dup reg params)
--TARGET--
x86_64
--ARGS--
-S
--CODE--
{
uintptr_t c_1 = 0;
bool c_2 = 0;
bool c_3 = 1;
uintptr_t c_5 = func(printf);
uintptr_t c_6 = "hello %d %d!\n";
l_1 = START(l_4);
int32_t p_1 = PARAM(l_1, "x", 0);
int32_t d_2, l_2 = CALL/3(l_1, c_5, c_6, p_1, p_1);
l_4 = RETURN(l_2, d_2);
}
--EXPECT--
test:
subq $8, %rsp
movl %edi, %edx
movl %edx, %esi
leaq .L1(%rip), %rdi
movabsq $_IO_printf, %rax
callq *%rax
addq $8, %rsp
retq
.rodata
.L1:
.db 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x25, 0x64, 0x20, 0x25, 0x64, 0x21, 0x0a, 0x00