Swap operands for better load fusion

This commit is contained in:
Dmitry Stogov 2022-05-19 13:17:50 +03:00
parent 58063dd470
commit bf369d0eac
7 changed files with 124 additions and 28 deletions

1
TODO
View File

@ -39,7 +39,6 @@
- OVERFLOW - OVERFLOW
- MIN, MAX, COND - MIN, MAX, COND
- CAST - CAST
? operands swap (binop_int, binop_sse, binop_avx, cmp_int, cmp_fp, cmp_br_int)
- return merge/split - return merge/split
? binary code emission without DynAsm ? binary code emission without DynAsm
- 32-bit x86 code - 32-bit x86 code

View File

@ -1161,12 +1161,46 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb)
} }
return IR_LEA_IB; // lea ret, [op1.reg+op2.reg] return IR_LEA_IB; // lea ret, [op1.reg+op2.reg]
} }
binop_int:
if ((ctx->flags & IR_OPT_CODEGEN)
&& (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE)
&& !IR_IS_CONST_REF(insn->op2)
&& !IR_IS_CONST_REF(insn->op1)) {
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
ir_insn *op2_insn = &ctx->ir_base[insn->op2];
if ((op1_insn->op == IR_VLOAD || op1_insn->op == IR_LOAD)
&& (op2_insn->op != IR_VLOAD && op2_insn->op != IR_LOAD)) {
/* swap for better load fusion */
ir_ref tmp = insn->op1;
insn->op1 = insn->op2;
insn->op2 = tmp;
}
}
return IR_BINOP_INT; return IR_BINOP_INT;
} else if (ctx->flags & IR_AVX) { } else {
binop_fp:
if ((ctx->flags & IR_OPT_CODEGEN)
&& (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE)
&& !IR_IS_CONST_REF(insn->op2)
&& !IR_IS_CONST_REF(insn->op1)) {
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
ir_insn *op2_insn = &ctx->ir_base[insn->op2];
if ((op1_insn->op == IR_VLOAD || op1_insn->op == IR_LOAD)
&& (op2_insn->op != IR_VLOAD && op2_insn->op != IR_LOAD)) {
/* swap for better load fusion */
ir_ref tmp = insn->op1;
insn->op1 = insn->op2;
insn->op2 = tmp;
}
}
if (ctx->flags & IR_AVX) {
return IR_BINOP_AVX; return IR_BINOP_AVX;
} else { } else {
return IR_BINOP_SSE2; return IR_BINOP_SSE2;
} }
}
break; break;
case IR_MUL: case IR_MUL:
if (IR_IS_TYPE_INT(insn->type)) { if (IR_IS_TYPE_INT(insn->type)) {
@ -1191,10 +1225,8 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb)
} }
} }
return (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) ? IR_BINOP_INT : IR_MUL_INT; return (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) ? IR_BINOP_INT : IR_MUL_INT;
} else if (ctx->flags & IR_AVX) {
return IR_BINOP_AVX;
} else { } else {
return IR_BINOP_SSE2; goto binop_fp;
} }
break; break;
case IR_DIV: case IR_DIV:
@ -1210,10 +1242,8 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb)
} }
} }
return IR_DIV_INT; return IR_DIV_INT;
} else if (ctx->flags & IR_AVX) {
return IR_BINOP_AVX;
} else { } else {
return IR_BINOP_SSE2; goto binop_fp;
} }
break; break;
case IR_MOD: case IR_MOD:
@ -1268,7 +1298,7 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb)
// -1 // -1
} }
} }
return IR_BINOP_INT; goto binop_int;
case IR_AND: case IR_AND:
if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
op2_insn = &ctx->ir_base[insn->op2]; op2_insn = &ctx->ir_base[insn->op2];
@ -1280,7 +1310,7 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb)
return IR_COPY_INT; return IR_COPY_INT;
} }
} }
return IR_BINOP_INT; goto binop_int;
case IR_XOR: case IR_XOR:
if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
op2_insn = &ctx->ir_base[insn->op2]; op2_insn = &ctx->ir_base[insn->op2];
@ -1288,7 +1318,7 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb)
// const // const
} }
} }
return IR_BINOP_INT; goto binop_int;
case IR_SHL: case IR_SHL:
if (IR_IS_CONST_REF(insn->op2)) { if (IR_IS_CONST_REF(insn->op2)) {
if (ctx->flags & IR_OPT_CODEGEN) { if (ctx->flags & IR_OPT_CODEGEN) {

View File

@ -0,0 +1,45 @@
--TEST--
003: Parameter Loading and argument passing
--ARGS--
-S
--CODE--
{
uintptr_t f = func(printf);
uintptr_t fmt = "%d %d %d %d %d %d %d %d %d %d\n";
l_1 = START(l_3);
int32_t p_1 = PARAM(l_1, "p_1", 1);
int32_t p_2 = PARAM(l_1, "p_2", 2);
int32_t p_3 = PARAM(l_1, "p_3", 3);
int32_t p_4 = PARAM(l_1, "p_4", 4);
int32_t p_5 = PARAM(l_1, "p_5", 5);
int32_t p_6 = PARAM(l_1, "p_6", 6);
int32_t p_7 = PARAM(l_1, "p_7", 7);
int32_t p_8 = PARAM(l_1, "p_8", 8);
int32_t p_9 = PARAM(l_1, "p_9", 9);
int32_t p_10 = PARAM(l_1, "p_10", 10);
int32_t ret, l_2 = CALL/11(l_1, f, fmt, p_1, p_2, p_3, p_4, p_5, p_6, p_7, p_8, p_9, p_10);
l_3 = RETURN(l_2, ret);
}
--EXPECT--
test:
subq $0x28, %rsp
movl %r9d, %eax
movl %eax, (%rsp)
movl 0x30(%rsp), %r10d
movl %r10d, 8(%rsp)
movl 0x38(%rsp), %r10d
movl %r10d, 0x10(%rsp)
movl 0x40(%rsp), %r10d
movl %r10d, 0x18(%rsp)
movl 0x48(%rsp), %r10d
movl %r10d, 0x20(%rsp)
movl %r8d, %r9d
movl %ecx, %r8d
movl %edx, %ecx
movl %esi, %edx
movl %edi, %esi
leaq 0x13(%rip), %rdi
movabsq $_IO_printf, %rax
callq *%rax
addq $0x28, %rsp
retq

View File

@ -1,5 +1,5 @@
--TEST-- --TEST--
Swap operands of commutative instruction 001: Swap operands of commutative instruction
--ARGS-- --ARGS--
-S -S
--CODE-- --CODE--

26
tests/debug/swap_002.irt Normal file
View File

@ -0,0 +1,26 @@
--TEST--
002: Swap operands of commutative instruction
--ARGS--
-S
--CODE--
{
l_1 = START(l_5);
int32_t x = PARAM(l_1, "x", 1);
int32_t y = PARAM(l_1, "y", 2);
int32_t v = VAR(l_1, "_spill_");
l_2 = VSTORE(l_1, v, y);
int32_t z, l_3 = VLOAD(l_2, v);
int32_t ret = AND(z, x);
int32_t ret2 = ADD(ret, ret);
l_4 = VSTORE(l_3, v, ret2);
l_5 = RETURN(l_4);
}
--EXPECT--
test:
subq $8, %rsp
movl %esi, (%rsp)
andl (%rsp), %edi
leal (%rdi, %rdi), %eax
movl %eax, (%rsp)
addq $8, %rsp
retq

View File

@ -81,7 +81,7 @@ test:
pushq %rbp pushq %rbp
movq %rsp, %rbp movq %rsp, %rbp
subq $0x38, %rsp subq $0x38, %rsp
subsd 0xc0(%rip), %xmm1 subsd 0xb8(%rip), %xmm1
movsd %xmm1, -0x38(%rbp) movsd %xmm1, -0x38(%rbp)
movsd %xmm0, -0x30(%rbp) movsd %xmm0, -0x30(%rbp)
xorpd %xmm0, %xmm0 xorpd %xmm0, %xmm0
@ -106,14 +106,12 @@ test:
movsd %xmm1, -8(%rbp) movsd %xmm1, -8(%rbp)
movsd -0x10(%rbp), %xmm0 movsd -0x10(%rbp), %xmm0
subsd -8(%rbp), %xmm0 subsd -8(%rbp), %xmm0
movsd -0x38(%rbp), %xmm1 addsd -0x38(%rbp), %xmm0
addsd %xmm0, %xmm1 movsd %xmm0, -0x20(%rbp)
movsd %xmm1, -0x20(%rbp)
movsd -0x18(%rbp), %xmm0 movsd -0x18(%rbp), %xmm0
addsd %xmm0, %xmm0 addsd %xmm0, %xmm0
movsd -0x30(%rbp), %xmm1 addsd -0x30(%rbp), %xmm0
addsd %xmm0, %xmm1 movsd %xmm0, -0x28(%rbp)
movsd %xmm1, -0x28(%rbp)
movsd -0x10(%rbp), %xmm0 movsd -0x10(%rbp), %xmm0
addsd -8(%rbp), %xmm0 addsd -8(%rbp), %xmm0
ucomisd 0x2e(%rip), %xmm0 ucomisd 0x2e(%rip), %xmm0

View File

@ -74,7 +74,7 @@ Mandelbrot Test (var)
--EXPECT-- --EXPECT--
test: test:
subq $0x40, %rsp subq $0x40, %rsp
subsd 0xcc(%rip), %xmm1 subsd 0xc4(%rip), %xmm1
movsd %xmm1, (%rsp) movsd %xmm1, (%rsp)
movsd %xmm0, 8(%rsp) movsd %xmm0, 8(%rsp)
xorpd %xmm0, %xmm0 xorpd %xmm0, %xmm0
@ -97,14 +97,12 @@ test:
movsd %xmm1, 0x38(%rsp) movsd %xmm1, 0x38(%rsp)
movsd 0x30(%rsp), %xmm0 movsd 0x30(%rsp), %xmm0
subsd 0x38(%rsp), %xmm0 subsd 0x38(%rsp), %xmm0
movsd (%rsp), %xmm1 addsd (%rsp), %xmm0
addsd %xmm0, %xmm1 movsd %xmm0, 0x18(%rsp)
movsd %xmm1, 0x18(%rsp)
movsd 0x28(%rsp), %xmm0 movsd 0x28(%rsp), %xmm0
addsd %xmm0, %xmm0 addsd %xmm0, %xmm0
movsd 8(%rsp), %xmm1 addsd 8(%rsp), %xmm0
addsd %xmm0, %xmm1 movsd %xmm0, 0x10(%rsp)
movsd %xmm1, 0x10(%rsp)
movsd 0x30(%rsp), %xmm0 movsd 0x30(%rsp), %xmm0
addsd 0x38(%rsp), %xmm0 addsd 0x38(%rsp), %xmm0
ucomisd 0x2b(%rip), %xmm0 ucomisd 0x2b(%rip), %xmm0