mirror of
https://github.com/danog/ir.git
synced 2025-01-22 05:31:32 +01:00
Reorder basic blocks to reduce number of jumps and improve code locality
This commit is contained in:
parent
596f03f263
commit
d3c1e4a02f
1
ir.h
1
ir.h
@ -731,6 +731,7 @@ void ir_build_def_use_lists(ir_ctx *ctx);
|
||||
int ir_build_cfg(ir_ctx *ctx);
|
||||
int ir_build_dominators_tree(ir_ctx *ctx);
|
||||
int ir_find_loops(ir_ctx *ctx);
|
||||
int ir_schedule_blocks(ir_ctx *ctx);
|
||||
|
||||
/* SCCP - Sparse Conditional Constant Propagation (implementation in ir_sccp.c) */
|
||||
int ir_sccp(ir_ctx *ctx);
|
||||
|
127
ir_cfg.c
127
ir_cfg.c
@ -422,3 +422,130 @@ next:
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ir_schedule_blocks(ir_ctx *ctx)
|
||||
{
|
||||
uint32_t len = ir_bitset_len(ctx->cfg_blocks_count + 1);
|
||||
ir_bitset blocks = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
|
||||
uint32_t b, *p, successor, best_successor, j;
|
||||
ir_block *bb, *successor_bb, *best_successor_bb;
|
||||
uint32_t *list, *map;
|
||||
uint32_t count = 0;
|
||||
bool reorder = 0;
|
||||
|
||||
list = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 1) * 2);
|
||||
map = list + (ctx->cfg_blocks_count + 1);
|
||||
for (b = 1; b <= ctx->cfg_blocks_count; b++) {
|
||||
ir_bitset_incl(blocks, b);
|
||||
}
|
||||
|
||||
while (!ir_bitset_empty(blocks, len)) {
|
||||
b = ir_bitset_pop_first(blocks, len);
|
||||
bb = &ctx->cfg_blocks[b];
|
||||
do {
|
||||
if (bb->predecessors_count == 2) {
|
||||
uint32_t predecessor = ctx->cfg_edges[bb->predecessors];
|
||||
|
||||
if (!ir_bitset_in(blocks, predecessor)) {
|
||||
predecessor = ctx->cfg_edges[bb->predecessors + 1];
|
||||
}
|
||||
if (ir_bitset_in(blocks, predecessor)) {
|
||||
ir_block *predecessor_bb = &ctx->cfg_blocks[predecessor];
|
||||
|
||||
if (predecessor_bb->successors_count == 1
|
||||
&& predecessor_bb->predecessors_count == 1
|
||||
&& predecessor_bb->end == predecessor_bb->start + 1
|
||||
&& !(predecessor_bb->flags & IR_BB_DESSA_MOVES)) {
|
||||
ir_bitset_excl(blocks, predecessor);
|
||||
count++;
|
||||
list[count] = predecessor;
|
||||
map[predecessor] = count;
|
||||
if (predecessor != count) {
|
||||
reorder = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
count++;
|
||||
list[count] = b;
|
||||
map[b] = count;
|
||||
if (b != count) {
|
||||
reorder = 1;
|
||||
}
|
||||
if (!bb->successors_count) {
|
||||
break;
|
||||
}
|
||||
best_successor_bb = NULL;
|
||||
for (b = 0, p = &ctx->cfg_edges[bb->successors]; b < bb->successors_count; b++, p++) {
|
||||
successor = *p;
|
||||
successor_bb = &ctx->cfg_blocks[successor];
|
||||
if (ir_bitset_in(blocks, successor)) {
|
||||
if (!best_successor_bb || successor_bb->loop_depth > best_successor_bb->loop_depth) {
|
||||
// TODO: use block frequency
|
||||
best_successor = successor;
|
||||
best_successor_bb = successor_bb;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!best_successor_bb) {
|
||||
if (bb->successors_count == 1
|
||||
&& bb->predecessors_count == 1
|
||||
&& bb->end == bb->start + 1
|
||||
&& !(bb->flags & IR_BB_DESSA_MOVES)) {
|
||||
uint32_t predecessor = ctx->cfg_edges[bb->predecessors];
|
||||
ir_block *predecessor_bb = &ctx->cfg_blocks[predecessor];
|
||||
|
||||
if (predecessor_bb->successors_count == 2) {
|
||||
b = ctx->cfg_edges[predecessor_bb->successors];
|
||||
|
||||
if (!ir_bitset_in(blocks, b)) {
|
||||
b = ctx->cfg_edges[predecessor_bb->successors + 1];
|
||||
}
|
||||
if (ir_bitset_in(blocks, b)) {
|
||||
bb = &ctx->cfg_blocks[b];
|
||||
ir_bitset_excl(blocks, b);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
b = best_successor;
|
||||
bb = best_successor_bb;
|
||||
ir_bitset_excl(blocks, b);
|
||||
} while (1);
|
||||
}
|
||||
|
||||
if (reorder) {
|
||||
ir_block *cfg_blocks = ir_mem_calloc(sizeof(ir_block), ctx->cfg_blocks_count + 1);
|
||||
|
||||
for (b = 1, bb = cfg_blocks + 1; b <= count; b++, bb++) {
|
||||
*bb = ctx->cfg_blocks[list[b]];
|
||||
if (bb->dom_parent > 0) {
|
||||
bb->dom_parent = map[bb->dom_parent];
|
||||
}
|
||||
if (bb->dom_child > 0) {
|
||||
bb->dom_child = map[bb->dom_child];
|
||||
}
|
||||
if (bb->dom_next_child > 0) {
|
||||
bb->dom_next_child = map[bb->dom_next_child];
|
||||
}
|
||||
if (bb->loop_header > 0) {
|
||||
bb->loop_header = map[bb->loop_header];
|
||||
}
|
||||
ctx->bb_num[bb->start] = b;
|
||||
}
|
||||
for (j = 0; j < ctx->cfg_edges_count; j++) {
|
||||
if (ctx->cfg_edges[j] > 0) {
|
||||
ctx->cfg_edges[j] = map[ctx->cfg_edges[j]];
|
||||
}
|
||||
}
|
||||
ir_mem_free(ctx->cfg_blocks);
|
||||
ctx->cfg_blocks = cfg_blocks;
|
||||
}
|
||||
|
||||
ir_mem_free(list);
|
||||
ir_mem_free(blocks);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -181,6 +181,8 @@ int ir_compile_func(ir_ctx *ctx, int opt_level, uint32_t dump, const char *dump_
|
||||
if (ctx->flags & IR_GEN_NATIVE) {
|
||||
ir_reg_alloc(ctx);
|
||||
}
|
||||
|
||||
ir_schedule_blocks(ctx);
|
||||
} else if (ctx->flags & (IR_GEN_NATIVE|IR_GEN_C)) {
|
||||
ir_assign_virtual_registers(ctx);
|
||||
ir_compute_dessa_moves(ctx);
|
||||
|
@ -199,6 +199,7 @@ int main(int argc, char **argv)
|
||||
ir_compute_live_ranges(&ctx);
|
||||
ir_coalesce(&ctx);
|
||||
ir_reg_alloc(&ctx);
|
||||
ir_schedule_blocks(&ctx);
|
||||
} else {
|
||||
ir_compute_dessa_moves(&ctx);
|
||||
}
|
||||
|
@ -37,8 +37,6 @@ Combo 001
|
||||
test:
|
||||
.L1:
|
||||
testb %dil, %dil
|
||||
je .L2
|
||||
jmp .L1
|
||||
.L2:
|
||||
jne .L1
|
||||
movl $1, %eax
|
||||
retq
|
||||
|
@ -49,8 +49,6 @@ Combo 002
|
||||
test:
|
||||
.L1:
|
||||
testb %dil, %dil
|
||||
je .L2
|
||||
jmp .L1
|
||||
.L2:
|
||||
jne .L1
|
||||
movl $1, %eax
|
||||
retq
|
||||
|
@ -49,8 +49,6 @@ Combo 003
|
||||
test:
|
||||
.L1:
|
||||
testb %dil, %dil
|
||||
je .L2
|
||||
jmp .L1
|
||||
.L2:
|
||||
jne .L1
|
||||
movl %esi, %eax
|
||||
retq
|
||||
|
@ -41,7 +41,5 @@ test:
|
||||
imull %esi, %eax
|
||||
.L1:
|
||||
testb %dil, %dil
|
||||
je .L2
|
||||
jmp .L1
|
||||
.L2:
|
||||
jne .L1
|
||||
retq
|
||||
|
@ -36,8 +36,6 @@ test:
|
||||
.L1:
|
||||
leal (%rax, %rcx), %ecx
|
||||
cmpl $0xa, %ecx
|
||||
jg .L2
|
||||
jmp .L1
|
||||
.L2:
|
||||
jl .L1
|
||||
leal (%rcx, %rcx), %eax
|
||||
retq
|
||||
|
@ -78,20 +78,12 @@ test:
|
||||
leal 4(%rbx), %ebp
|
||||
.L1:
|
||||
testl %r10d, %r10d
|
||||
je .L2
|
||||
je .L3
|
||||
movl %edx, %edi
|
||||
movl %ebp, %r9d
|
||||
jmp .L3
|
||||
.L2:
|
||||
movl %edi, %eax
|
||||
imull %esi, %eax
|
||||
leal 1(%rax), %r8d
|
||||
movl %edx, %ecx
|
||||
.L3:
|
||||
testl %r11d, %r11d
|
||||
je .L4
|
||||
jmp .L1
|
||||
.L4:
|
||||
jne .L1
|
||||
movl %edi, %r10d
|
||||
imull %esi, %r10d
|
||||
leal (%rdi, %rsi), %esi
|
||||
@ -106,3 +98,9 @@ test:
|
||||
movq (%rsp), %rbp
|
||||
addq $0x10, %rsp
|
||||
retq
|
||||
.L3:
|
||||
movl %edi, %eax
|
||||
imull %esi, %eax
|
||||
leal 1(%rax), %r8d
|
||||
movl %edx, %ecx
|
||||
jmp .L2
|
||||
|
@ -35,8 +35,6 @@ test:
|
||||
.L1:
|
||||
leal (%rax, %rcx), %ecx
|
||||
cmpl $0xa, %ecx
|
||||
jg .L2
|
||||
jmp .L1
|
||||
.L2:
|
||||
jl .L1
|
||||
leal (%rcx, %rcx), %eax
|
||||
retq
|
||||
|
@ -51,24 +51,22 @@ SCCP 002
|
||||
--EXPECT--
|
||||
test:
|
||||
cmpl $3, %edi
|
||||
jg .L1
|
||||
jg .L2
|
||||
cmpl $1, %edi
|
||||
jl .L1
|
||||
jl .L2
|
||||
movslq %edi, %rdi
|
||||
leaq 0x2c(%rip), %rax
|
||||
leaq 0x24(%rip), %rax
|
||||
jmpq *-8(%rax, %rdi, 8)
|
||||
movl $1, %eax
|
||||
jmp .L3
|
||||
movl $2, %eax
|
||||
jmp .L3
|
||||
movl $3, %eax
|
||||
jmp .L3
|
||||
.L1:
|
||||
testb %sil, %sil
|
||||
je .L2
|
||||
retq
|
||||
movl $2, %eax
|
||||
jmp .L1
|
||||
movl $3, %eax
|
||||
jmp .L1
|
||||
.L2:
|
||||
testb %sil, %sil
|
||||
jne .L2
|
||||
movl $1, %eax
|
||||
retq
|
||||
.L3:
|
||||
retq
|
||||
|
||||
|
@ -33,20 +33,19 @@ SWITCH 001
|
||||
--EXPECT--
|
||||
test:
|
||||
cmpl $3, %edi
|
||||
jg .L1
|
||||
jg .L2
|
||||
cmpl $1, %edi
|
||||
jl .L1
|
||||
jl .L2
|
||||
movslq %edi, %rdi
|
||||
leaq 0x24(%rip), %rax
|
||||
jmpq *-8(%rax, %rdi, 8)
|
||||
movl $1, %eax
|
||||
jmp .L2
|
||||
movl $2, %eax
|
||||
jmp .L2
|
||||
movl $3, %eax
|
||||
jmp .L2
|
||||
.L1:
|
||||
movl $4, %eax
|
||||
.L2:
|
||||
retq
|
||||
|
||||
movl $2, %eax
|
||||
jmp .L1
|
||||
movl $3, %eax
|
||||
jmp .L1
|
||||
.L2:
|
||||
movl $4, %eax
|
||||
jmp .L1
|
||||
|
@ -37,22 +37,21 @@ test:
|
||||
je .L1
|
||||
movabsq $0x200000001, %rax
|
||||
cmpq %rax, %rdi
|
||||
je .L2
|
||||
je .L3
|
||||
movabsq $0x2000003ea, %rax
|
||||
cmpq %rax, %rdi
|
||||
je .L3
|
||||
jmp .L4
|
||||
je .L4
|
||||
jmp .L5
|
||||
.L1:
|
||||
movabsq $0x200000000, %rax
|
||||
jmp .L5
|
||||
.L2:
|
||||
movabsq $0x200000001, %rax
|
||||
jmp .L5
|
||||
.L3:
|
||||
movabsq $0x2000003ea, %rax
|
||||
jmp .L5
|
||||
.L4:
|
||||
movabsq $0x200000003, %rax
|
||||
.L5:
|
||||
retq
|
||||
|
||||
.L3:
|
||||
movabsq $0x200000001, %rax
|
||||
jmp .L2
|
||||
.L4:
|
||||
movabsq $0x2000003ea, %rax
|
||||
jmp .L2
|
||||
.L5:
|
||||
movabsq $0x200000003, %rax
|
||||
jmp .L2
|
||||
|
@ -64,19 +64,17 @@ test:
|
||||
vaddsd %xmm4, %xmm5, %xmm6
|
||||
vucomisd 0x2f(%rip), %xmm6
|
||||
ja .L2
|
||||
jmp .L3
|
||||
.L2:
|
||||
retq
|
||||
.L3:
|
||||
cmpl $0x3e8, %eax
|
||||
jl .L4
|
||||
xorl %eax, %eax
|
||||
retq
|
||||
.L4:
|
||||
jg .L3
|
||||
vmulsd %xmm3, %xmm2, %xmm3
|
||||
vsubsd %xmm5, %xmm4, %xmm2
|
||||
vaddsd %xmm1, %xmm2, %xmm2
|
||||
vaddsd %xmm3, %xmm3, %xmm3
|
||||
vaddsd %xmm0, %xmm3, %xmm3
|
||||
jmp .L1
|
||||
.L2:
|
||||
retq
|
||||
.L3:
|
||||
xorl %eax, %eax
|
||||
retq
|
||||
|
||||
|
@ -67,15 +67,8 @@ test:
|
||||
addsd %xmm4, %xmm6
|
||||
ucomisd 0x33(%rip), %xmm6
|
||||
ja .L2
|
||||
jmp .L3
|
||||
.L2:
|
||||
retq
|
||||
.L3:
|
||||
cmpl $0x3e8, %eax
|
||||
jl .L4
|
||||
xorl %eax, %eax
|
||||
retq
|
||||
.L4:
|
||||
jg .L3
|
||||
mulsd %xmm2, %xmm3
|
||||
subsd %xmm5, %xmm4
|
||||
movapd %xmm4, %xmm2
|
||||
@ -83,4 +76,8 @@ test:
|
||||
addsd %xmm3, %xmm3
|
||||
addsd %xmm0, %xmm3
|
||||
jmp .L1
|
||||
|
||||
.L2:
|
||||
retq
|
||||
.L3:
|
||||
xorl %eax, %eax
|
||||
retq
|
||||
|
@ -67,16 +67,9 @@ test:
|
||||
addsd %xmm4, %xmm6
|
||||
ucomisd 0x42(%rip), %xmm6
|
||||
ja .L2
|
||||
jmp .L3
|
||||
.L2:
|
||||
retq
|
||||
.L3:
|
||||
movabsq $0x2000003e8, %rcx
|
||||
cmpq %rcx, %rax
|
||||
jl .L4
|
||||
movabsq $0x200000000, %rax
|
||||
retq
|
||||
.L4:
|
||||
jg .L3
|
||||
mulsd %xmm2, %xmm3
|
||||
subsd %xmm5, %xmm4
|
||||
movapd %xmm4, %xmm2
|
||||
@ -84,4 +77,8 @@ test:
|
||||
addsd %xmm3, %xmm3
|
||||
addsd %xmm0, %xmm3
|
||||
jmp .L1
|
||||
|
||||
.L2:
|
||||
retq
|
||||
.L3:
|
||||
movabsq $0x200000000, %rax
|
||||
retq
|
||||
|
@ -81,7 +81,7 @@ test:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
subq $0x38, %rsp
|
||||
subsd 0xb8(%rip), %xmm1
|
||||
subsd 0xb0(%rip), %xmm1
|
||||
movsd %xmm1, -0x38(%rbp)
|
||||
movsd %xmm0, -0x30(%rbp)
|
||||
xorpd %xmm0, %xmm0
|
||||
@ -114,21 +114,17 @@ test:
|
||||
movsd %xmm0, -0x28(%rbp)
|
||||
movsd -0x10(%rbp), %xmm0
|
||||
addsd -8(%rbp), %xmm0
|
||||
ucomisd 0x2e(%rip), %xmm0
|
||||
ucomisd 0x26(%rip), %xmm0
|
||||
ja .L2
|
||||
jmp .L3
|
||||
movl (%rax), %ecx
|
||||
cmpl $0x3e8, %ecx
|
||||
jl .L1
|
||||
xorl %eax, %eax
|
||||
movq %rbp, %rsp
|
||||
popq %rbp
|
||||
retq
|
||||
.L2:
|
||||
movl (%rax), %eax
|
||||
movq %rbp, %rsp
|
||||
popq %rbp
|
||||
retq
|
||||
.L3:
|
||||
movl (%rax), %ecx
|
||||
cmpl $0x3e8, %ecx
|
||||
jl .L4
|
||||
xorl %eax, %eax
|
||||
movq %rbp, %rsp
|
||||
popq %rbp
|
||||
retq
|
||||
.L4:
|
||||
jmp .L1
|
||||
|
@ -107,16 +107,12 @@ test:
|
||||
addsd 0x38(%rsp), %xmm0
|
||||
ucomisd 0x2b(%rip), %xmm0
|
||||
ja .L2
|
||||
jmp .L3
|
||||
cmpl $0x3e8, 0x20(%rsp)
|
||||
jl .L1
|
||||
xorl %eax, %eax
|
||||
addq $0x40, %rsp
|
||||
retq
|
||||
.L2:
|
||||
movl 0x20(%rsp), %eax
|
||||
addq $0x40, %rsp
|
||||
retq
|
||||
.L3:
|
||||
cmpl $0x3e8, 0x20(%rsp)
|
||||
jl .L4
|
||||
xorl %eax, %eax
|
||||
addq $0x40, %rsp
|
||||
retq
|
||||
.L4:
|
||||
jmp .L1
|
||||
|
Loading…
x
Reference in New Issue
Block a user