Better 'jp' elimination for IR_CMP_AND_BRANCH_FP

This commit is contained in:
Dmitry Stogov 2023-02-07 01:57:07 +03:00
parent 6521c0b7e4
commit 2e31446e37
5 changed files with 82 additions and 98 deletions

View File

@ -18,6 +18,7 @@
#define IR_IS_SIGNED_32BIT(val) ((((intptr_t)(val)) <= 0x7fffffff) && (((intptr_t)(val)) >= (-2147483647 - 1)))
#define IR_IS_UNSIGNED_32BIT(val) (((uintptr_t)(val)) <= 0xffffffff)
#define IR_IS_32BIT(type, val) (IR_IS_TYPE_SIGNED(type) ? IR_IS_SIGNED_32BIT((val).i64) : IR_IS_UNSIGNED_32BIT((val).u64))
#define IR_IS_FP_ZERO(insn) ((insn.type == IR_DOUBLE) ? (insn.val.u64 == 0) : (insn.val.u32 == 0))
#define IR_MAY_USE_32BIT_ADDR(a) \
(ctx->code_buffer && \
IR_IS_SIGNED_32BIT((char*)addr - (char*)ctx->code_buffer) && \
@ -1002,7 +1003,7 @@ static void ir_match_fuse_load_cmp_fp(ir_ctx *ctx, ir_insn *insn, ir_block *bb,
}
if (IR_IS_CONST_REF(insn->op2)) {
} else if (ir_match_fuse_load(ctx, insn->op2, bb)) {
} else if (IR_IS_CONST_REF(insn->op1) || ir_match_fuse_load(ctx, insn->op1, bb)) {
} else if ((IR_IS_CONST_REF(insn->op1) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op1])) || ir_match_fuse_load(ctx, insn->op1, bb)) {
ir_swap_ops(insn);
if (insn->op != IR_EQ && insn->op != IR_NE) {
insn->op ^= 3;
@ -1616,7 +1617,10 @@ store_int:
ctx->rules[insn->op2] = IR_SKIP_CMP_INT;
return IR_CMP_AND_BRANCH_INT;
} else {
ir_match_fuse_load_cmp_fp(ctx, op2_insn, bb, 1);
uint32_t true_block, false_block, next_block;
ir_get_true_false_blocks(ctx, bb - ctx->cfg_blocks, &true_block, &false_block, &next_block);
ir_match_fuse_load_cmp_fp(ctx, op2_insn, bb, true_block == next_block);
ctx->rules[insn->op2] = IR_SKIP_CMP_FP;
return IR_CMP_AND_BRANCH_FP;
}

View File

@ -47,7 +47,7 @@ x86
l_3 = LOOP_BEGIN(l_2, l_13);
double d_4 = PHI(l_3, c_5, d_10);
double d_5 = PHI(l_3, c_6, d_9);
bool d_6 = GT(c_7, d_5);
bool d_6 = LT(d_5, c_7);
l_7 = IF(l_3, d_6);
l_8 = IF_TRUE(l_7);
double d_9 = ADD(d_5, d_4);
@ -60,52 +60,47 @@ x86
{ # LIVE-RANGES (vregs_count=3)
TMP
[%xmm0]: [2.2-2.3)
[%xmm1]: [6.0-6.2)
[%eax]: [11.0-11.1)
[%xmm0]: [13.2-13.3)
R1 (d_4) [SPILL=0x0]
[%xmm1]: [3.0-6.0), DEF(4.2)
: [6.0-8.0)
[%xmm1]: [8.0-10.1), USE(9.1/2), USE(10.1/2)
R2 (d_5, d_9) [SPILL=0x8]
[%xmm0]: [3.0-10.0), DEF(5.2), USE(6.1/2), USE(9.0/1)!, DEF(9.0)!, DEF(9.2)!, USE(10.0/1, hint=R3)!
R1 (d_4) [%xmm1]: [3.0-10.1), DEF(4.2), USE(9.1/2), USE(10.1/2)
R2 (d_5, d_9) [SPILL=0x0]
[%xmm0]: [3.0-10.0), DEF(5.2), USE(6.1/1)!, USE(9.0/1)!, DEF(9.0)!, DEF(9.2)!, USE(10.0/1, hint=R3)!
: [10.0-14.0), PHI_USE(13.2, phi=d_5/3)
R3 (d_10) [SPILL=0x10]
R3 (d_10) [SPILL=0x8]
[%xmm0]: [10.0-11.1), DEF(10.0, hint=R2)!, DEF(10.2)!, USE(11.1/4)
: [11.1-14.0), PHI_USE(13.2, phi=d_4/3)
[%eax] : [15.0-15.1)
[%SCRATCH] : [11.1-11.2)
}
test:
subl $0x2c, %esp
subl $0x1c, %esp
xorpd %xmm1, %xmm1
movsd %xmm1, 0xc(%esp)
movsd .L3, %xmm0
movsd %xmm0, 0x14(%esp)
movsd %xmm0, 0xc(%esp)
.L1:
movsd .L4, %xmm1
ucomisd 0x14(%esp), %xmm1
jbe .L2
movsd 0x14(%esp), %xmm0
addsd 0xc(%esp), %xmm0
movsd 0xc(%esp), %xmm0
ucomisd .L4, %xmm0
jp .L2
jae .L2
movsd 0xc(%esp), %xmm0
addsd %xmm1, %xmm0
movsd %xmm0, 0xc(%esp)
movsd 0xc(%esp), %xmm0
subsd %xmm1, %xmm0
movsd %xmm0, 0x14(%esp)
movsd 0x14(%esp), %xmm0
subsd 0xc(%esp), %xmm0
movsd %xmm0, 0x1c(%esp)
leal .L5, %eax
movl %eax, (%esp)
movsd 0x1c(%esp), %xmm0
movsd 0x14(%esp), %xmm0
movsd %xmm0, 4(%esp)
calll printf
movsd 0x1c(%esp), %xmm1
movsd %xmm1, 0xc(%esp)
movsd 0x14(%esp), %xmm1
jmp .L1
.L2:
xorl %eax, %eax
addl $0x2c, %esp
addl $0x1c, %esp
retl
.rodata
.db 0x90, 0x90, 0x90
.db 0x90
.L3:
.db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f
.L4:

View File

@ -46,7 +46,7 @@ x86
l_3 = LOOP_BEGIN(l_2, l_15);
double d_4 = PHI(l_3, c_5, d_10);
double d_5 = PHI(l_3, c_6, d_9);
bool d_6 = GT(c_7, d_5);
bool d_6 = LT(d_5, c_7);
l_7 = IF(l_3, d_6);
l_8 = IF_TRUE(l_7);
double d_9 = ADD(d_5, d_4);
@ -60,58 +60,53 @@ x86
{ # LIVE-RANGES (vregs_count=3)
TMP
[%xmm0]: [2.2-2.3)
[%xmm1]: [6.0-6.2)
[%eax]: [11.0-11.1)
[%eax]: [13.0-13.1)
[%xmm0]: [15.2-15.3)
R1 (d_4) [SPILL=0x0]
[%xmm1]: [3.0-6.0), DEF(4.2)
: [6.0-8.0)
[%xmm1]: [8.0-10.1), USE(9.1/2), USE(10.1/2)
R2 (d_5, d_9) [SPILL=0x8]
[%xmm0]: [3.0-10.0), DEF(5.2), USE(6.1/2), USE(9.0/1)!, DEF(9.0)!, DEF(9.2)!, USE(10.0/1, hint=R3)!
R1 (d_4) [%xmm1]: [3.0-10.1), DEF(4.2), USE(9.1/2), USE(10.1/2)
R2 (d_5, d_9) [SPILL=0x0]
[%xmm0]: [3.0-10.0), DEF(5.2), USE(6.1/1)!, USE(9.0/1)!, DEF(9.0)!, DEF(9.2)!, USE(10.0/1, hint=R3)!
: [10.0-16.0), PHI_USE(15.2, phi=d_5/3)
R3 (d_10) [SPILL=0x10]
R3 (d_10) [SPILL=0x8]
[%xmm0]: [10.0-11.1), DEF(10.0, hint=R2)!, DEF(10.2)!, USE(11.1/4)
: [11.1-16.0), USE(13.1/4), PHI_USE(15.2, phi=d_4/3)
[%eax] : [17.0-17.1)
[%SCRATCH] : [11.1-11.2), [13.1-13.2)
}
test:
subl $0x2c, %esp
subl $0x1c, %esp
xorpd %xmm1, %xmm1
movsd %xmm1, 0xc(%esp)
movsd .L3, %xmm0
movsd %xmm0, 0x14(%esp)
movsd %xmm0, 0xc(%esp)
.L1:
movsd .L4, %xmm1
ucomisd 0x14(%esp), %xmm1
jbe .L2
movsd 0x14(%esp), %xmm0
addsd 0xc(%esp), %xmm0
movsd 0xc(%esp), %xmm0
ucomisd .L4, %xmm0
jp .L2
jae .L2
movsd 0xc(%esp), %xmm0
addsd %xmm1, %xmm0
movsd %xmm0, 0xc(%esp)
movsd 0xc(%esp), %xmm0
subsd %xmm1, %xmm0
movsd %xmm0, 0x14(%esp)
movsd 0x14(%esp), %xmm0
subsd 0xc(%esp), %xmm0
movsd %xmm0, 0x1c(%esp)
leal .L5, %eax
movl %eax, (%esp)
movsd 0x1c(%esp), %xmm0
movsd 0x14(%esp), %xmm0
movsd %xmm0, 4(%esp)
calll printf
leal .L5, %eax
movl %eax, (%esp)
movsd 0x1c(%esp), %xmm7
movsd 0x14(%esp), %xmm7
movsd %xmm7, 4(%esp)
calll printf
movsd 0x1c(%esp), %xmm1
movsd %xmm1, 0xc(%esp)
movsd 0x14(%esp), %xmm1
jmp .L1
.L2:
xorl %eax, %eax
addl $0x2c, %esp
addl $0x1c, %esp
retl
.rodata
.db 0x90
.db 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90
.L3:
.db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f
.L4:

View File

@ -47,7 +47,7 @@ x86_64
l_3 = LOOP_BEGIN(l_2, l_13);
double d_4 = PHI(l_3, c_5, d_10);
double d_5 = PHI(l_3, c_6, d_9);
bool d_6 = GT(c_7, d_5);
bool d_6 = LT(d_5, c_7);
l_7 = IF(l_3, d_6);
l_8 = IF_TRUE(l_7);
double d_9 = ADD(d_5, d_4);
@ -60,17 +60,13 @@ x86_64
{ # LIVE-RANGES (vregs_count=3)
TMP
[%xmm0]: [2.2-2.3)
[%xmm1]: [6.0-6.2)
[%rax]: [11.0-11.1)
[%xmm0]: [13.2-13.3)
R1 (d_4) [SPILL=0x0]
[%xmm1]: [3.0-6.0), DEF(4.2)
: [6.0-8.0)
[%xmm1]: [8.0-10.1), USE(9.1/2), USE(10.1/2)
R2 (d_5, d_9) [SPILL=0x8]
[%xmm0]: [3.0-10.0), DEF(5.2), USE(6.1/2), USE(9.0/1)!, DEF(9.0)!, DEF(9.2)!, USE(10.0/1, hint=%xmm0, hint=R3)!
R1 (d_4) [%xmm1]: [3.0-10.1), DEF(4.2), USE(9.1/2), USE(10.1/2)
R2 (d_5, d_9) [SPILL=0x0]
[%xmm0]: [3.0-10.0), DEF(5.2), USE(6.1/1)!, USE(9.0/1)!, DEF(9.0)!, DEF(9.2)!, USE(10.0/1, hint=%xmm0, hint=R3)!
: [10.0-14.0), PHI_USE(13.2, phi=d_5/3)
R3 (d_10) [SPILL=0x10]
R3 (d_10) [SPILL=0x8]
[%xmm0]: [10.0-11.0), DEF(10.0, hint=R2)!, DEF(10.2)!, USE(11.0/4, hint=%xmm0)
: [11.0-14.0), PHI_USE(13.2, phi=d_4/3)
[%rax] : [15.0-15.1)
@ -81,32 +77,31 @@ R3 (d_10) [SPILL=0x10]
test:
subq $0x18, %rsp
xorpd %xmm1, %xmm1
movsd %xmm1, (%rsp)
movsd .L3(%rip), %xmm0
movsd %xmm0, 8(%rsp)
movsd %xmm0, (%rsp)
.L1:
movsd .L4(%rip), %xmm1
ucomisd 8(%rsp), %xmm1
jbe .L2
movsd 8(%rsp), %xmm0
addsd (%rsp), %xmm0
movsd (%rsp), %xmm0
ucomisd .L4(%rip), %xmm0
jp .L2
jae .L2
movsd (%rsp), %xmm0
addsd %xmm1, %xmm0
movsd %xmm0, (%rsp)
movsd (%rsp), %xmm0
subsd %xmm1, %xmm0
movsd %xmm0, 8(%rsp)
movsd 8(%rsp), %xmm0
subsd (%rsp), %xmm0
movsd %xmm0, 0x10(%rsp)
movsd 0x10(%rsp), %xmm0
leaq .L5(%rip), %rdi
movabsq $_IO_printf, %rax
callq *%rax
movsd 0x10(%rsp), %xmm1
movsd %xmm1, (%rsp)
movsd 8(%rsp), %xmm1
jmp .L1
.L2:
xorl %eax, %eax
addq $0x18, %rsp
retq
.rodata
.db 0x90, 0x90, 0x90, 0x90, 0x90, 0x90
.db 0x90, 0x90, 0x90, 0x90, 0x90
.L3:
.db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f
.L4:

View File

@ -46,7 +46,7 @@ x86_64
l_3 = LOOP_BEGIN(l_2, l_15);
double d_4 = PHI(l_3, c_5, d_10);
double d_5 = PHI(l_3, c_6, d_9);
bool d_6 = GT(c_7, d_5);
bool d_6 = LT(d_5, c_7);
l_7 = IF(l_3, d_6);
l_8 = IF_TRUE(l_7);
double d_9 = ADD(d_5, d_4);
@ -60,18 +60,14 @@ x86_64
{ # LIVE-RANGES (vregs_count=3)
TMP
[%xmm0]: [2.2-2.3)
[%xmm1]: [6.0-6.2)
[%rax]: [11.0-11.1)
[%rax]: [13.0-13.1)
[%xmm0]: [15.2-15.3)
R1 (d_4) [SPILL=0x0]
[%xmm1]: [3.0-6.0), DEF(4.2)
: [6.0-8.0)
[%xmm1]: [8.0-10.1), USE(9.1/2), USE(10.1/2)
R2 (d_5, d_9) [SPILL=0x8]
[%xmm0]: [3.0-10.0), DEF(5.2), USE(6.1/2), USE(9.0/1)!, DEF(9.0)!, DEF(9.2)!, USE(10.0/1, hint=%xmm0, hint=R3)!
R1 (d_4) [%xmm1]: [3.0-10.1), DEF(4.2), USE(9.1/2), USE(10.1/2)
R2 (d_5, d_9) [SPILL=0x0]
[%xmm0]: [3.0-10.0), DEF(5.2), USE(6.1/1)!, USE(9.0/1)!, DEF(9.0)!, DEF(9.2)!, USE(10.0/1, hint=%xmm0, hint=R3)!
: [10.0-16.0), PHI_USE(15.2, phi=d_5/3)
R3 (d_10) [SPILL=0x10]
R3 (d_10) [SPILL=0x8]
[%xmm0]: [10.0-11.0), DEF(10.0, hint=R2)!, DEF(10.2)!, USE(11.0/4, hint=%xmm0)
: [11.0-16.0), USE(13.0/4, hint=%xmm0), PHI_USE(15.2, phi=d_4/3)
[%rax] : [17.0-17.1)
@ -82,36 +78,35 @@ R3 (d_10) [SPILL=0x10]
test:
subq $0x18, %rsp
xorpd %xmm1, %xmm1
movsd %xmm1, (%rsp)
movsd .L3(%rip), %xmm0
movsd %xmm0, 8(%rsp)
movsd %xmm0, (%rsp)
.L1:
movsd .L4(%rip), %xmm1
ucomisd 8(%rsp), %xmm1
jbe .L2
movsd 8(%rsp), %xmm0
addsd (%rsp), %xmm0
movsd (%rsp), %xmm0
ucomisd .L4(%rip), %xmm0
jp .L2
jae .L2
movsd (%rsp), %xmm0
addsd %xmm1, %xmm0
movsd %xmm0, (%rsp)
movsd (%rsp), %xmm0
subsd %xmm1, %xmm0
movsd %xmm0, 8(%rsp)
movsd 8(%rsp), %xmm0
subsd (%rsp), %xmm0
movsd %xmm0, 0x10(%rsp)
movsd 0x10(%rsp), %xmm0
leaq .L5(%rip), %rdi
movabsq $_IO_printf, %rax
callq *%rax
leaq .L5(%rip), %rdi
movsd 0x10(%rsp), %xmm0
movsd 8(%rsp), %xmm0
movabsq $_IO_printf, %rax
callq *%rax
movsd 0x10(%rsp), %xmm1
movsd %xmm1, (%rsp)
movsd 8(%rsp), %xmm1
jmp .L1
.L2:
xorl %eax, %eax
addq $0x18, %rsp
retq
.rodata
.db 0x90, 0x90, 0x90, 0x90, 0x90
.db 0x90, 0x90, 0x90, 0x90
.L3:
.db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f
.L4: