Improve load fusion, register allocateion and code selection for ADD

This commit is contained in:
Dmitry Stogov 2023-02-21 22:55:47 +03:00
parent 8b4678a9ae
commit 00d5e471ad
24 changed files with 75 additions and 74 deletions

View File

@ -1431,9 +1431,7 @@ store_int:
if (op_insn->op2 > bb->start
&& ctx->use_lists[op_insn->op2].count == 2
&& insn->op1 == op_insn->op2) {
ir_ref tmp = op_insn->op1;
op_insn->op1 = op_insn->op2;
op_insn->op2 = tmp;
ir_swap_ops(op_insn);
ctx->rules[insn->op3] = IR_SKIP_MEM_BINOP_INT;
ctx->rules[op_insn->op1] = IR_SKIP;
ir_match_fuse_addr(ctx, insn->op2, bb);
@ -1576,9 +1574,7 @@ store_int:
} else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE)
&& ctx->ir_base[op_insn->op2].op == IR_RLOAD
&& ctx->ir_base[op_insn->op2].op2 == insn->op3) {
ir_ref tmp = op_insn->op1;
op_insn->op1 = op_insn->op2;
op_insn->op2 = tmp;
ir_swap_ops(op_insn);
ctx->rules[insn->op2] = IR_SKIP_REG_BINOP_INT;
ctx->rules[op_insn->op1] = IR_SKIP;
return IR_REG_BINOP_INT;
@ -1727,9 +1723,7 @@ store_int:
if (op_insn->op2 > bb->start
&& ctx->use_lists[op_insn->op2].count == 2
&& store_insn->op1 == op_insn->op2) {
ir_ref tmp = op_insn->op1;
op_insn->op1 = op_insn->op2;
op_insn->op2 = tmp;
ir_swap_ops(op_insn);
ctx->rules[insn->op2] = IR_SKIP_MEM_BINOP_INT;
ctx->rules[op_insn->op1] = IR_SKIP;
ir_match_fuse_addr(ctx, store_insn->op2, bb);
@ -1807,15 +1801,11 @@ static void ir_match_insn2(ir_ctx *ctx, ir_ref ref, ir_block *bb)
if (ctx->rules[ref] == IR_LEA_IB) {
ir_insn *insn = &ctx->ir_base[ref];
if (ctx->ir_base[insn->op2].op == IR_LOAD) {
ir_match_fuse_load(ctx, insn->op2, bb);
if (ir_match_fuse_load(ctx, insn->op2, bb)) {
ctx->rules[ref] = IR_BINOP_INT;
} else if (ctx->ir_base[insn->op1].op == IR_LOAD) {
} else if (ir_match_fuse_load(ctx, insn->op1, bb)) {
/* swap for better load fusion */
ir_ref tmp = insn->op1;
insn->op1 = insn->op2;
insn->op2 = tmp;
ir_match_fuse_load(ctx, insn->op2, bb);
ir_swap_ops(insn);
ctx->rules[ref] = IR_BINOP_INT;
}
}
@ -6807,11 +6797,24 @@ static void ir_emit_lea(ir_ctx *ctx, ir_ref def, ir_type type, ir_reg base_reg,
if (!offset) {
switch (scale) {
case 1:
if (ir_type_size[type] == 4) {
| lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)]
if (ir_type_size[type] == sizeof(void*)) {
if (def_reg == base_reg) {
| add Ra(def_reg), Ra(index_reg)
} else if (def_reg == index_reg) {
| add Ra(def_reg), Ra(base_reg)
} else {
| lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)]
}
} else {
IR_ASSERT(sizeof(void*) == 8 && ir_type_size[type] == 4);
if (def_reg == base_reg) {
| add Rd(def_reg), Rd(index_reg)
} else if (def_reg == index_reg) {
| add Rd(def_reg), Rd(base_reg)
} else {
| lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)]
}
}
break;
case 2:
if (ir_type_size[type] == 4) {

View File

@ -35,11 +35,11 @@ test:
movl 0x24(%esp), %eax
movl %eax, 0xc(%esp)
calll printf
leal (%eax, %ebx), %eax
addl %ebx, %eax
movl 0x18(%esp), %ebx
addl $0x1c, %esp
retl
.rodata
.db 0x90, 0x90, 0x90, 0x90
.db 0x90, 0x90, 0x90, 0x90, 0x90
.L1:
.db 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x25, 0x64, 0x21, 0x0a, 0x00

View File

@ -33,11 +33,11 @@ test:
movl 0x24(%esp), %ecx
movl %ecx, 0xc(%esp)
calll *(%eax)
leal (%eax, %ebx), %eax
addl %ebx, %eax
movl 0x18(%esp), %ebx
addl $0x1c, %esp
retl
.rodata
.db 0x90, 0x90, 0x90
.db 0x90, 0x90, 0x90, 0x90
.L1:
.db 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x25, 0x64, 0x21, 0x0a, 0x00

View File

@ -40,7 +40,7 @@ x86
test:
movb 4(%esp), %al
movl 8(%esp), %ecx
leal (%ecx, %ecx), %ecx
addl %ecx, %ecx
.L1:
testb %al, %al
jne .L1

View File

@ -35,7 +35,7 @@ test:
leal 1(%eax), %eax
xorl %ecx, %ecx
.L1:
leal (%eax, %ecx), %ecx
addl %eax, %ecx
cmpl $0xa, %ecx
jl .L1
leal (%ecx, %ecx), %eax

View File

@ -91,14 +91,14 @@ test:
jne .L1
movl %eax, %esi
imull %ecx, %esi
leal (%eax, %ecx), %eax
leal (%eax, %edx), %eax
leal (%eax, %ebx), %eax
addl %ecx, %eax
addl %edx, %eax
addl %ebx, %eax
movl (%esp), %edi
leal (%eax, %edi), %eax
leal (%eax, %ebp), %eax
leal (%eax, %ecx), %eax
leal (%eax, %ecx), %eax
addl %edi, %eax
addl %ebp, %eax
addl %ecx, %eax
addl %ecx, %eax
leal 1(%eax, %esi), %eax
movl 0x14(%esp), %ebx
movl 0x10(%esp), %ebp

View File

@ -34,7 +34,7 @@ test:
leal 1(%eax), %eax
xorl %ecx, %ecx
.L1:
leal (%eax, %ecx), %ecx
addl %eax, %ecx
cmpl $0xa, %ecx
jl .L1
leal (%ecx, %ecx), %eax

View File

@ -22,7 +22,7 @@ test:
subl $4, %esp
movl 8(%esp), %eax
movl %eax, (%esp)
leal (%eax, %eax), %eax
addl %eax, %eax
andl %eax, (%esp)
addl $4, %esp
retl

View File

@ -42,14 +42,14 @@ test:
movl 0x24(%esp), %ebp
movl 0x28(%esp), %esi
movl 0x34(%esp), %edi
leal (%ecx, %eax), %eax
leal (%eax, %edx), %eax
leal (%eax, %ebx), %eax
leal (%eax, %ebp), %eax
leal (%eax, %esi), %eax
leal (%eax, %ecx), %eax
leal (%eax, %ecx), %eax
leal (%eax, %edi), %eax
addl %ecx, %eax
addl %edx, %eax
addl %ebx, %eax
addl %ebp, %eax
addl %esi, %eax
addl %ecx, %eax
addl %ecx, %eax
addl %edi, %eax
subl 0x38(%esp), %eax
movl 0xc(%esp), %ebx
movl 8(%esp), %ebp

View File

@ -73,7 +73,7 @@ test:
.L1:
cmpl $0x2710, %ebx
jge .L2
leal (%ebx, %eax), %ebx
addl %eax, %ebx
movl %ebx, %ebp
subl %eax, %ebp
movl %ebp, 4(%esp)
@ -89,7 +89,7 @@ test:
addl $0x1c, %esp
retl
.rodata
.db 0x90, 0x90, 0x90
.db 0x90, 0x90, 0x90, 0x90
.L3:
.db 0x25, 0x64, 0x0a, 0x00

View File

@ -24,7 +24,7 @@ test:
movl 0xc(%esp), %ecx
movl %ecx, (%esp)
andl (%esp), %eax
leal (%eax, %eax), %eax
addl %eax, %eax
movl %eax, (%esp)
addl $4, %esp
retl

View File

@ -33,12 +33,11 @@ test:
leaq .L1(%rip), %rdi
movabsq $_IO_printf, %rax
callq *%rax
leal (%rax, %rbx), %eax
addl %ebx, %eax
movq (%rsp), %rbx
addq $8, %rsp
retq
.rodata
.db 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90
.L1:
.db 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x25, 0x64, 0x21, 0x0a, 0x00

View File

@ -29,10 +29,11 @@ test:
movl %eax, %edx
leaq .L1(%rip), %rdi
callq *(%r8)
leal (%rax, %rbx), %eax
addl %ebx, %eax
movq (%rsp), %rbx
addq $8, %rsp
retq
.rodata
.db 0x90
.L1:
.db 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x25, 0x64, 0x21, 0x0a, 0x00

View File

@ -34,7 +34,7 @@ test:
leal 1(%rdi), %eax
xorl %ecx, %ecx
.L1:
leal (%rax, %rcx), %ecx
addl %eax, %ecx
cmpl $0xa, %ecx
jl .L1
leal (%rcx, %rcx), %eax

View File

@ -86,13 +86,13 @@ test:
jne .L1
movl %edi, %r10d
imull %esi, %r10d
leal (%rdi, %rsi), %esi
leal (%rsi, %rdx), %edx
leal (%rdx, %rcx), %ecx
leal (%rcx, %rbx), %ecx
leal (%rcx, %r9), %ecx
leal (%rcx, %rax), %eax
leal (%rax, %r8), %eax
addl %edi, %esi
addl %esi, %edx
addl %edx, %ecx
addl %ebx, %ecx
addl %r9d, %ecx
addl %ecx, %eax
addl %r8d, %eax
leal 1(%rax, %r10), %eax
movq 8(%rsp), %rbx
movq (%rsp), %rbp

View File

@ -33,7 +33,7 @@ test:
leal 1(%rdi), %eax
xorl %ecx, %ecx
.L1:
leal (%rax, %rcx), %ecx
addl %eax, %ecx
cmpl $0xa, %ecx
jl .L1
leal (%rcx, %rcx), %eax

View File

@ -33,13 +33,13 @@ test:
movl 8(%rsp), %eax
movl 0x10(%rsp), %r10d
movl 0x18(%rsp), %r11d
leal (%rsi, %rdi), %esi
leal (%rsi, %rdx), %edx
leal (%rdx, %rcx), %ecx
leal (%rcx, %r8), %ecx
leal (%rcx, %r9), %ecx
leal (%rcx, %rax), %eax
leal (%rax, %r10), %eax
leal (%rax, %r11), %eax
addl %edi, %esi
addl %esi, %edx
addl %edx, %ecx
addl %r8d, %ecx
addl %r9d, %ecx
addl %ecx, %eax
addl %r10d, %eax
addl %r11d, %eax
subl 0x20(%rsp), %eax
retq

View File

@ -2,8 +2,6 @@
001: register allocation
--TARGET--
x86_64
--XFAIL--
Not implemented register allocation optimization pattern
--ARGS--
-S
--CODE--

View File

@ -75,7 +75,7 @@ test:
.L1:
cmpl $0x2710, %ebx
jge .L2
leal (%rbx, %rax), %ebx
addl %eax, %ebx
movl %ebx, %ebp
subl %eax, %ebp
movl %ebp, %esi
@ -91,7 +91,7 @@ test:
addq $0x18, %rsp
retq
.rodata
.db 0x90, 0x90
.db 0x90, 0x90, 0x90
.L3:
.db 0x25, 0x64, 0x0a, 0x00

View File

@ -16,5 +16,5 @@ x86
test:
movl 4(%esp), %eax
movl 8(%esp), %ecx
leal (%ecx, %eax), %eax
addl %ecx, %eax
retl

View File

@ -16,5 +16,5 @@ x86
test:
movl 4(%esp), %eax
movl 8(%esp), %ecx
leal (%ecx, %eax), %eax
addl %ecx, %eax
retl

View File

@ -18,6 +18,6 @@ x86
test:
movl 4(%esp), %eax
movl 8(%esp), %ecx
leal (%ecx, %eax), %eax
addl %ecx, %eax
imull 0xc(%esp), %eax
retl

View File

@ -18,7 +18,7 @@ x86
test:
movl 4(%esp), %eax
movl 8(%esp), %ecx
leal (%ecx, %eax), %eax
addl %ecx, %eax
xorl %edx, %edx
divl 0xc(%esp)
retl

View File

@ -18,7 +18,7 @@ x86
test:
movl 4(%esp), %eax
movl 8(%esp), %ecx
leal (%ecx, %eax), %eax
addl %ecx, %eax
xorl %edx, %edx
divl 0xc(%esp)
movl %edx, %eax