Reorder basic blocks to reduce number of jumps and improve code locality

This commit is contained in:
Dmitry Stogov 2022-05-24 00:43:35 +03:00
parent 596f03f263
commit d3c1e4a02f
19 changed files with 208 additions and 111 deletions

1
ir.h
View File

@ -731,6 +731,7 @@ void ir_build_def_use_lists(ir_ctx *ctx);
int ir_build_cfg(ir_ctx *ctx);
int ir_build_dominators_tree(ir_ctx *ctx);
int ir_find_loops(ir_ctx *ctx);
int ir_schedule_blocks(ir_ctx *ctx);
/* SCCP - Sparse Conditional Constant Propagation (implementation in ir_sccp.c) */
int ir_sccp(ir_ctx *ctx);

127
ir_cfg.c
View File

@ -422,3 +422,130 @@ next:
return 1;
}
int ir_schedule_blocks(ir_ctx *ctx)
{
uint32_t len = ir_bitset_len(ctx->cfg_blocks_count + 1);
ir_bitset blocks = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
uint32_t b, *p, successor, best_successor, j;
ir_block *bb, *successor_bb, *best_successor_bb;
uint32_t *list, *map;
uint32_t count = 0;
bool reorder = 0;
list = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 1) * 2);
map = list + (ctx->cfg_blocks_count + 1);
for (b = 1; b <= ctx->cfg_blocks_count; b++) {
ir_bitset_incl(blocks, b);
}
while (!ir_bitset_empty(blocks, len)) {
b = ir_bitset_pop_first(blocks, len);
bb = &ctx->cfg_blocks[b];
do {
if (bb->predecessors_count == 2) {
uint32_t predecessor = ctx->cfg_edges[bb->predecessors];
if (!ir_bitset_in(blocks, predecessor)) {
predecessor = ctx->cfg_edges[bb->predecessors + 1];
}
if (ir_bitset_in(blocks, predecessor)) {
ir_block *predecessor_bb = &ctx->cfg_blocks[predecessor];
if (predecessor_bb->successors_count == 1
&& predecessor_bb->predecessors_count == 1
&& predecessor_bb->end == predecessor_bb->start + 1
&& !(predecessor_bb->flags & IR_BB_DESSA_MOVES)) {
ir_bitset_excl(blocks, predecessor);
count++;
list[count] = predecessor;
map[predecessor] = count;
if (predecessor != count) {
reorder = 1;
}
}
}
}
count++;
list[count] = b;
map[b] = count;
if (b != count) {
reorder = 1;
}
if (!bb->successors_count) {
break;
}
best_successor_bb = NULL;
for (b = 0, p = &ctx->cfg_edges[bb->successors]; b < bb->successors_count; b++, p++) {
successor = *p;
successor_bb = &ctx->cfg_blocks[successor];
if (ir_bitset_in(blocks, successor)) {
if (!best_successor_bb || successor_bb->loop_depth > best_successor_bb->loop_depth) {
// TODO: use block frequency
best_successor = successor;
best_successor_bb = successor_bb;
}
}
}
if (!best_successor_bb) {
if (bb->successors_count == 1
&& bb->predecessors_count == 1
&& bb->end == bb->start + 1
&& !(bb->flags & IR_BB_DESSA_MOVES)) {
uint32_t predecessor = ctx->cfg_edges[bb->predecessors];
ir_block *predecessor_bb = &ctx->cfg_blocks[predecessor];
if (predecessor_bb->successors_count == 2) {
b = ctx->cfg_edges[predecessor_bb->successors];
if (!ir_bitset_in(blocks, b)) {
b = ctx->cfg_edges[predecessor_bb->successors + 1];
}
if (ir_bitset_in(blocks, b)) {
bb = &ctx->cfg_blocks[b];
ir_bitset_excl(blocks, b);
continue;
}
}
}
break;
}
b = best_successor;
bb = best_successor_bb;
ir_bitset_excl(blocks, b);
} while (1);
}
if (reorder) {
ir_block *cfg_blocks = ir_mem_calloc(sizeof(ir_block), ctx->cfg_blocks_count + 1);
for (b = 1, bb = cfg_blocks + 1; b <= count; b++, bb++) {
*bb = ctx->cfg_blocks[list[b]];
if (bb->dom_parent > 0) {
bb->dom_parent = map[bb->dom_parent];
}
if (bb->dom_child > 0) {
bb->dom_child = map[bb->dom_child];
}
if (bb->dom_next_child > 0) {
bb->dom_next_child = map[bb->dom_next_child];
}
if (bb->loop_header > 0) {
bb->loop_header = map[bb->loop_header];
}
ctx->bb_num[bb->start] = b;
}
for (j = 0; j < ctx->cfg_edges_count; j++) {
if (ctx->cfg_edges[j] > 0) {
ctx->cfg_edges[j] = map[ctx->cfg_edges[j]];
}
}
ir_mem_free(ctx->cfg_blocks);
ctx->cfg_blocks = cfg_blocks;
}
ir_mem_free(list);
ir_mem_free(blocks);
return 1;
}

View File

@ -181,6 +181,8 @@ int ir_compile_func(ir_ctx *ctx, int opt_level, uint32_t dump, const char *dump_
if (ctx->flags & IR_GEN_NATIVE) {
ir_reg_alloc(ctx);
}
ir_schedule_blocks(ctx);
} else if (ctx->flags & (IR_GEN_NATIVE|IR_GEN_C)) {
ir_assign_virtual_registers(ctx);
ir_compute_dessa_moves(ctx);

View File

@ -199,6 +199,7 @@ int main(int argc, char **argv)
ir_compute_live_ranges(&ctx);
ir_coalesce(&ctx);
ir_reg_alloc(&ctx);
ir_schedule_blocks(&ctx);
} else {
ir_compute_dessa_moves(&ctx);
}

View File

@ -37,8 +37,6 @@ Combo 001
test:
.L1:
testb %dil, %dil
je .L2
jmp .L1
.L2:
jne .L1
movl $1, %eax
retq

View File

@ -49,8 +49,6 @@ Combo 002
test:
.L1:
testb %dil, %dil
je .L2
jmp .L1
.L2:
jne .L1
movl $1, %eax
retq

View File

@ -49,8 +49,6 @@ Combo 003
test:
.L1:
testb %dil, %dil
je .L2
jmp .L1
.L2:
jne .L1
movl %esi, %eax
retq

View File

@ -41,7 +41,5 @@ test:
imull %esi, %eax
.L1:
testb %dil, %dil
je .L2
jmp .L1
.L2:
jne .L1
retq

View File

@ -36,8 +36,6 @@ test:
.L1:
leal (%rax, %rcx), %ecx
cmpl $0xa, %ecx
jg .L2
jmp .L1
.L2:
jl .L1
leal (%rcx, %rcx), %eax
retq

View File

@ -78,20 +78,12 @@ test:
leal 4(%rbx), %ebp
.L1:
testl %r10d, %r10d
je .L2
je .L3
movl %edx, %edi
movl %ebp, %r9d
jmp .L3
.L2:
movl %edi, %eax
imull %esi, %eax
leal 1(%rax), %r8d
movl %edx, %ecx
.L3:
testl %r11d, %r11d
je .L4
jmp .L1
.L4:
jne .L1
movl %edi, %r10d
imull %esi, %r10d
leal (%rdi, %rsi), %esi
@ -106,3 +98,9 @@ test:
movq (%rsp), %rbp
addq $0x10, %rsp
retq
.L3:
movl %edi, %eax
imull %esi, %eax
leal 1(%rax), %r8d
movl %edx, %ecx
jmp .L2

View File

@ -35,8 +35,6 @@ test:
.L1:
leal (%rax, %rcx), %ecx
cmpl $0xa, %ecx
jg .L2
jmp .L1
.L2:
jl .L1
leal (%rcx, %rcx), %eax
retq

View File

@ -51,24 +51,22 @@ SCCP 002
--EXPECT--
test:
cmpl $3, %edi
jg .L1
jg .L2
cmpl $1, %edi
jl .L1
jl .L2
movslq %edi, %rdi
leaq 0x2c(%rip), %rax
leaq 0x24(%rip), %rax
jmpq *-8(%rax, %rdi, 8)
movl $1, %eax
jmp .L3
movl $2, %eax
jmp .L3
movl $3, %eax
jmp .L3
.L1:
testb %sil, %sil
je .L2
retq
movl $2, %eax
jmp .L1
movl $3, %eax
jmp .L1
.L2:
testb %sil, %sil
jne .L2
movl $1, %eax
retq
.L3:
retq

View File

@ -33,20 +33,19 @@ SWITCH 001
--EXPECT--
test:
cmpl $3, %edi
jg .L1
jg .L2
cmpl $1, %edi
jl .L1
jl .L2
movslq %edi, %rdi
leaq 0x24(%rip), %rax
jmpq *-8(%rax, %rdi, 8)
movl $1, %eax
jmp .L2
movl $2, %eax
jmp .L2
movl $3, %eax
jmp .L2
.L1:
movl $4, %eax
.L2:
retq
movl $2, %eax
jmp .L1
movl $3, %eax
jmp .L1
.L2:
movl $4, %eax
jmp .L1

View File

@ -37,22 +37,21 @@ test:
je .L1
movabsq $0x200000001, %rax
cmpq %rax, %rdi
je .L2
je .L3
movabsq $0x2000003ea, %rax
cmpq %rax, %rdi
je .L3
jmp .L4
je .L4
jmp .L5
.L1:
movabsq $0x200000000, %rax
jmp .L5
.L2:
movabsq $0x200000001, %rax
jmp .L5
.L3:
movabsq $0x2000003ea, %rax
jmp .L5
.L4:
movabsq $0x200000003, %rax
.L5:
retq
.L3:
movabsq $0x200000001, %rax
jmp .L2
.L4:
movabsq $0x2000003ea, %rax
jmp .L2
.L5:
movabsq $0x200000003, %rax
jmp .L2

View File

@ -64,19 +64,17 @@ test:
vaddsd %xmm4, %xmm5, %xmm6
vucomisd 0x2f(%rip), %xmm6
ja .L2
jmp .L3
.L2:
retq
.L3:
cmpl $0x3e8, %eax
jl .L4
xorl %eax, %eax
retq
.L4:
jg .L3
vmulsd %xmm3, %xmm2, %xmm3
vsubsd %xmm5, %xmm4, %xmm2
vaddsd %xmm1, %xmm2, %xmm2
vaddsd %xmm3, %xmm3, %xmm3
vaddsd %xmm0, %xmm3, %xmm3
jmp .L1
.L2:
retq
.L3:
xorl %eax, %eax
retq

View File

@ -67,15 +67,8 @@ test:
addsd %xmm4, %xmm6
ucomisd 0x33(%rip), %xmm6
ja .L2
jmp .L3
.L2:
retq
.L3:
cmpl $0x3e8, %eax
jl .L4
xorl %eax, %eax
retq
.L4:
jg .L3
mulsd %xmm2, %xmm3
subsd %xmm5, %xmm4
movapd %xmm4, %xmm2
@ -83,4 +76,8 @@ test:
addsd %xmm3, %xmm3
addsd %xmm0, %xmm3
jmp .L1
.L2:
retq
.L3:
xorl %eax, %eax
retq

View File

@ -67,16 +67,9 @@ test:
addsd %xmm4, %xmm6
ucomisd 0x42(%rip), %xmm6
ja .L2
jmp .L3
.L2:
retq
.L3:
movabsq $0x2000003e8, %rcx
cmpq %rcx, %rax
jl .L4
movabsq $0x200000000, %rax
retq
.L4:
jg .L3
mulsd %xmm2, %xmm3
subsd %xmm5, %xmm4
movapd %xmm4, %xmm2
@ -84,4 +77,8 @@ test:
addsd %xmm3, %xmm3
addsd %xmm0, %xmm3
jmp .L1
.L2:
retq
.L3:
movabsq $0x200000000, %rax
retq

View File

@ -81,7 +81,7 @@ test:
pushq %rbp
movq %rsp, %rbp
subq $0x38, %rsp
subsd 0xb8(%rip), %xmm1
subsd 0xb0(%rip), %xmm1
movsd %xmm1, -0x38(%rbp)
movsd %xmm0, -0x30(%rbp)
xorpd %xmm0, %xmm0
@ -114,21 +114,17 @@ test:
movsd %xmm0, -0x28(%rbp)
movsd -0x10(%rbp), %xmm0
addsd -8(%rbp), %xmm0
ucomisd 0x2e(%rip), %xmm0
ucomisd 0x26(%rip), %xmm0
ja .L2
jmp .L3
movl (%rax), %ecx
cmpl $0x3e8, %ecx
jl .L1
xorl %eax, %eax
movq %rbp, %rsp
popq %rbp
retq
.L2:
movl (%rax), %eax
movq %rbp, %rsp
popq %rbp
retq
.L3:
movl (%rax), %ecx
cmpl $0x3e8, %ecx
jl .L4
xorl %eax, %eax
movq %rbp, %rsp
popq %rbp
retq
.L4:
jmp .L1

View File

@ -107,16 +107,12 @@ test:
addsd 0x38(%rsp), %xmm0
ucomisd 0x2b(%rip), %xmm0
ja .L2
jmp .L3
cmpl $0x3e8, 0x20(%rsp)
jl .L1
xorl %eax, %eax
addq $0x40, %rsp
retq
.L2:
movl 0x20(%rsp), %eax
addq $0x40, %rsp
retq
.L3:
cmpl $0x3e8, 0x20(%rsp)
jl .L4
xorl %eax, %eax
addq $0x40, %rsp
retq
.L4:
jmp .L1