Fuse loads without register allocation when this makes sense.

Make oarameters passed through stack to reuse the same stack slot for spilling.
This commit is contained in:
Dmitry Stogov 2022-05-19 10:53:08 +03:00
parent 1af065058b
commit c9bb858e50
9 changed files with 293 additions and 180 deletions

1
TODO
View File

@ -46,7 +46,6 @@
- MIN, MAX, COND
- CAST
? operands swap (binop_int, binop_sse, binop_avx, cmp_int, cmp_fp, cmp_br_int)
- spill slots for parameters
- return merge/split
? binary code emission without DynAsm
- 32-bit x86 code

4
ir.h
View File

@ -553,11 +553,13 @@ struct _ir_live_range {
#define IR_LIVE_INTERVAL_VAR (1<<4)
#define IR_LIVE_INTERVAL_COALESCED (1<<5)
#define IR_LIVE_INTERVAL_HAS_HINTS (1<<6)
#define IR_LIVE_INTERVAL_MEM_PARAM (1<<7)
#define IR_LIVE_INTERVAL_MEM_LOAD (1<<8)
struct _ir_live_interval {
uint8_t type;
int8_t reg;
uint8_t flags;
uint16_t flags;
int32_t vreg;
int32_t stack_spill_pos;
ir_live_range range;

173
ir_ra.c
View File

@ -70,8 +70,8 @@ static void ir_add_local_var(ir_ctx *ctx, int v, uint8_t type)
ival->flags = IR_LIVE_INTERVAL_VAR;
ival->vreg = v;
ival->stack_spill_pos = -1; // not allocated
ival->range.start = 0;
ival->range.end = ctx->insns_count;
ival->range.start = IR_START_LIVE_POS_FROM_REF(1);
ival->range.end = IR_END_LIVE_POS_FROM_REF(ctx->insns_count - 1);
ival->range.next = NULL;
ival->use_pos = NULL;
@ -413,6 +413,13 @@ int ir_compute_live_ranges(ir_ctx *ctx)
def_pos = IR_LOAD_LIVE_POS_FROM_REF(i);
hint_ref = IR_IS_CONST_REF(insn->op1) ? 0 : insn->op1;
} else {
if (insn->op == IR_PARAM) {
/* We may reuse parameter stack slot for spilling */
ctx->live_intervals[ctx->vregs[i]]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
} else if (insn->op == IR_VLOAD) {
/* Load may be fused into the useage instruction */
ctx->live_intervals[ctx->vregs[i]]->flags |= IR_LIVE_INTERVAL_MEM_LOAD;
}
def_pos = IR_DEF_LIVE_POS_FROM_REF(i);
}
/* intervals[opd].setFrom(op.id) */
@ -1362,59 +1369,56 @@ static ir_live_interval *ir_split_interval_at(ir_ctx *ctx, ir_live_interval *iva
static void ir_allocate_spill_slot(ir_ctx *ctx, ir_live_interval *ival, ir_lsra_data *data)
{
ival = ival->top;
if (ival->stack_spill_pos == -1) {
uint8_t size = ir_type_size[ival->type];
uint8_t size = ir_type_size[ival->type];
if (size == 8) {
ival->stack_spill_pos = data->stack_frame_size;
data->stack_frame_size += 8;
} else if (size == 4) {
if (data->unused_slot_4) {
ival->stack_spill_pos = data->unused_slot_4;
data->unused_slot_4 = 0;
} else {
ival->stack_spill_pos = data->stack_frame_size;
data->unused_slot_4 = data->stack_frame_size + 4;
data->stack_frame_size += 8;
}
} else if (size == 2) {
if (data->unused_slot_2) {
ival->stack_spill_pos = data->unused_slot_2;
data->unused_slot_2 = 0;
} else if (data->unused_slot_4) {
ival->stack_spill_pos = data->unused_slot_4;
data->unused_slot_2 = data->unused_slot_4 + 2;
data->unused_slot_4 = 0;
} else {
ival->stack_spill_pos = data->stack_frame_size;
data->unused_slot_2 = data->stack_frame_size + 2;
data->unused_slot_4 = data->stack_frame_size + 4;
data->stack_frame_size += 8;
}
} else if (size == 1) {
if (data->unused_slot_1) {
ival->stack_spill_pos = data->unused_slot_1;
data->unused_slot_1 = 0;
} else if (data->unused_slot_2) {
ival->stack_spill_pos = data->unused_slot_2;
data->unused_slot_1 = data->unused_slot_2 + 1;
data->unused_slot_2 = 0;
} else if (data->unused_slot_4) {
ival->stack_spill_pos = data->unused_slot_4;
data->unused_slot_1 = data->unused_slot_4 + 1;
data->unused_slot_2 = data->unused_slot_4 + 2;
data->unused_slot_4 = 0;
} else {
ival->stack_spill_pos = data->stack_frame_size;
data->unused_slot_1 = data->stack_frame_size + 1;
data->unused_slot_2 = data->stack_frame_size + 2;
data->unused_slot_4 = data->stack_frame_size + 4;
data->stack_frame_size += 8;
}
if (size == 8) {
ival->stack_spill_pos = data->stack_frame_size;
data->stack_frame_size += 8;
} else if (size == 4) {
if (data->unused_slot_4) {
ival->stack_spill_pos = data->unused_slot_4;
data->unused_slot_4 = 0;
} else {
IR_ASSERT(0);
ival->stack_spill_pos = data->stack_frame_size;
data->unused_slot_4 = data->stack_frame_size + 4;
data->stack_frame_size += 8;
}
} else if (size == 2) {
if (data->unused_slot_2) {
ival->stack_spill_pos = data->unused_slot_2;
data->unused_slot_2 = 0;
} else if (data->unused_slot_4) {
ival->stack_spill_pos = data->unused_slot_4;
data->unused_slot_2 = data->unused_slot_4 + 2;
data->unused_slot_4 = 0;
} else {
ival->stack_spill_pos = data->stack_frame_size;
data->unused_slot_2 = data->stack_frame_size + 2;
data->unused_slot_4 = data->stack_frame_size + 4;
data->stack_frame_size += 8;
}
} else if (size == 1) {
if (data->unused_slot_1) {
ival->stack_spill_pos = data->unused_slot_1;
data->unused_slot_1 = 0;
} else if (data->unused_slot_2) {
ival->stack_spill_pos = data->unused_slot_2;
data->unused_slot_1 = data->unused_slot_2 + 1;
data->unused_slot_2 = 0;
} else if (data->unused_slot_4) {
ival->stack_spill_pos = data->unused_slot_4;
data->unused_slot_1 = data->unused_slot_4 + 1;
data->unused_slot_2 = data->unused_slot_4 + 2;
data->unused_slot_4 = 0;
} else {
ival->stack_spill_pos = data->stack_frame_size;
data->unused_slot_1 = data->stack_frame_size + 1;
data->unused_slot_2 = data->stack_frame_size + 2;
data->unused_slot_4 = data->stack_frame_size + 4;
data->stack_frame_size += 8;
}
} else {
IR_ASSERT(0);
}
}
@ -1912,6 +1916,58 @@ static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
return 1;
}
static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival, ir_lsra_data *data)
{
ir_use_pos *use_pos = ival->use_pos;
ir_insn *insn;
if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) {
IR_ASSERT(ival->top == ival && !ival->next && use_pos && use_pos->op_num == 0);
insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)];
IR_ASSERT(insn->op == IR_PARAM);
use_pos =use_pos->next;
if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) {
return 0;
}
if (use_pos) {
ir_block *bb = ir_block_from_live_pos(ctx, use_pos->pos);
if (bb->loop_depth) {
return 0;
}
}
return 1;
} else if (ival->flags & IR_LIVE_INTERVAL_MEM_LOAD) {
insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)];
IR_ASSERT(insn->op == IR_VLOAD);
use_pos =use_pos->next;
if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) {
return 0;
}
if (use_pos) {
ir_block *bb = ir_block_from_live_pos(ctx, use_pos->pos);
if (bb->loop_depth && bb != ir_block_from_live_pos(ctx, ival->use_pos->pos)) {
return 0;
}
}
IR_ASSERT(ctx->ir_base[insn->op2].op == IR_VAR);
if (ctx->live_intervals[ctx->vregs[insn->op2]]->stack_spill_pos != -1) {
ival->stack_spill_pos =
ctx->live_intervals[ctx->vregs[insn->op2]]->stack_spill_pos;
} else {
ir_allocate_spill_slot(ctx, ival, data);
ctx->live_intervals[ctx->vregs[insn->op2]]->stack_spill_pos =
ival->stack_spill_pos;
}
return 1;
}
return 0;
}
static int ir_linear_scan(ir_ctx *ctx)
{
int b;
@ -1945,8 +2001,15 @@ static int ir_linear_scan(ir_ctx *ctx)
for (j = 1; j <= ctx->vregs_count; j++) {
ival = ctx->live_intervals[j];
if (ival && !(ival->flags & IR_LIVE_INTERVAL_VAR)) {
ir_add_to_unhandled(&unhandled, ival);
if (ival) {
if (ival->flags & IR_LIVE_INTERVAL_VAR) {
if (ival->stack_spill_pos == -1) {
ir_allocate_spill_slot(ctx, ival, &data);
}
} else if (!(ival->flags & (IR_LIVE_INTERVAL_MEM_PARAM|IR_LIVE_INTERVAL_MEM_LOAD))
|| !ir_ival_spill_for_fuse_load(ctx, ival, &data)) {
ir_add_to_unhandled(&unhandled, ival);
}
}
}
@ -2053,7 +2116,7 @@ static int ir_linear_scan(ir_ctx *ctx)
for (j = 1; j <= ctx->vregs_count; j++) {
ival = ctx->live_intervals[j];
if (ival) {
if (ival && ival->stack_spill_pos == -1 && !(ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)) {
if (ival->next || ival->reg == IR_REG_NONE) {
ir_allocate_spill_slot(ctx, ival, &data);
}

View File

@ -4381,7 +4381,9 @@ static void ir_emit_load_params(ir_ctx *ctx)
}
if (ctx->vregs[use]) {
dst_reg = IR_REG_NUM(ctx->regs[use][0]);
IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE);
IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE ||
stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos +
((ctx->flags & IR_USE_FRAME_POINTER) ? -data->stack_frame_size : data->call_stack_size));
if (src_reg != dst_reg) {
ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset);
}
@ -4495,6 +4497,66 @@ static void ir_allocate_spill_slot(ir_ctx *ctx, ir_live_interval *ival, ir_backe
}
}
static void ir_fix_param_spills(ir_ctx *ctx)
{
ir_backend_data *data = ctx->data;
ir_use_list *use_list = &ctx->use_lists[1];
ir_insn *insn;
ir_ref i, n, *p, use;
int int_param_num = 0;
int fp_param_num = 0;
ir_reg src_reg;
// TODO: Calling convention specific
int int_reg_params_count = IR_REG_INT_ARGS;
int fp_reg_params_count = IR_REG_FP_ARGS;
const int8_t *int_reg_params = _ir_int_reg_params;
const int8_t *fp_reg_params = _ir_fp_reg_params;
int32_t stack_offset = 0;
if (ctx->flags & IR_USE_FRAME_POINTER) {
/* skip old frame pointer and return address */
stack_offset = sizeof(void*) + (data->stack_frame_size - data->stack_frame_alignment);
} else {
/* skip return address */
stack_offset = data->stack_frame_size;
}
n = use_list->count;
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
use = *p;
insn = &ctx->ir_base[use];
if (insn->op == IR_PARAM) {
if (IR_IS_TYPE_INT(insn->type)) {
if (int_param_num < int_reg_params_count) {
src_reg = int_reg_params[int_param_num];
} else {
src_reg = IR_REG_NONE;
stack_offset += sizeof(void*);
}
int_param_num++;
} else {
if (fp_param_num < fp_reg_params_count) {
src_reg = fp_reg_params[fp_param_num];
} else {
src_reg = IR_REG_NONE;
stack_offset += sizeof(void*);
}
fp_param_num++;
}
if (ctx->vregs[use]) {
if (src_reg == IR_REG_NONE) {
ir_live_interval *ival = ctx->live_intervals[ctx->vregs[use]];
if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)
&& ival->stack_spill_pos == -1
&& (ival->next || ival->reg == IR_REG_NONE)) {
ival->stack_spill_pos = stack_offset;
ctx->regs[use][0] = IR_REG_NONE;
}
}
}
}
}
}
static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
{
int b;
@ -4552,7 +4614,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
&& ctx->live_intervals[ctx->vregs[i]]
&& ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) {
/* pass */
} else {
} else if (insn->op != IR_PARAM) {
reg = ir_get_free_reg(insn->type, available);
IR_REGSET_EXCL(available, reg);
ctx->regs[i][0] = reg | IR_REG_SPILL_STORE;
@ -4564,7 +4626,12 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
ival->type = insn->type;
ival->reg = IR_REG_NONE;
ival->vreg = ctx->vregs[i];
ir_allocate_spill_slot(ctx, ival, data);
ival->stack_spill_pos = -1;
if (insn->op == IR_PARAM && reg == IR_REG_NONE) {
ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
} else {
ir_allocate_spill_slot(ctx, ival, data);
}
ival->top = ival;
if (insn->op == IR_VAR) {
ir_use_list *use_list = &ctx->use_lists[i];
@ -4681,6 +4748,8 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
}
}
}
ir_fix_param_spills(ctx);
}
static void ir_mark_empty_blocks(ir_ctx *ctx)
@ -4777,6 +4846,8 @@ static void ir_calc_stack_frame_size(ir_ctx *ctx, ir_backend_data *data)
}
}
}
ir_fix_param_spills(ctx);
}
static void* dasm_labels[ir_lb_MAX];

View File

@ -66,123 +66,115 @@ Fig -O0
}
--EXPECT--
test:
subq $0x98, %rsp
subq $0x88, %rsp
movl %edi, (%rsp)
movl %esi, 4(%rsp)
movl %edx, 8(%rsp)
movl %ecx, 0xc(%rsp)
movl %r9d, 0x10(%rsp)
movl 0xa0(%rsp), %eax
movl %eax, 0x14(%rsp)
movl 0xa8(%rsp), %eax
movl %eax, 0x18(%rsp)
movl 0xb0(%rsp), %eax
movl %eax, 0x1c(%rsp)
movl 0xb8(%rsp), %eax
movl %eax, 0x20(%rsp)
movl (%rsp), %eax
movl %eax, 0x24(%rsp)
movl %eax, 0x14(%rsp)
movl 0xc(%rsp), %eax
movl %eax, 0x28(%rsp)
movl %eax, 0x18(%rsp)
movl 0x10(%rsp), %eax
movl %eax, 0x2c(%rsp)
movl 0x14(%rsp), %eax
movl %eax, 0x30(%rsp)
movl 0x18(%rsp), %eax
movl %eax, 0x34(%rsp)
movl %eax, 0x1c(%rsp)
movl 0x90(%rsp), %eax
movl %eax, 0x20(%rsp)
movl 0x98(%rsp), %eax
movl %eax, 0x24(%rsp)
.L1:
cmpl $0, 0x1c(%rsp)
cmpl $0, 0xa0(%rsp)
je .L2
movl 8(%rsp), %eax
imull 4(%rsp), %eax
movl %eax, 0x38(%rsp)
movl 0x38(%rsp), %eax
addl $4, %eax
movl %eax, 0x3c(%rsp)
movl 8(%rsp), %eax
movl %eax, 0x40(%rsp)
movl 0x40(%rsp), %eax
movl %eax, 0x54(%rsp)
movl %eax, 0x28(%rsp)
movl 0x28(%rsp), %eax
movl %eax, 0x58(%rsp)
movl 0x38(%rsp), %eax
movl %eax, 0x5c(%rsp)
movl 0x3c(%rsp), %eax
movl %eax, 0x60(%rsp)
addl $4, %eax
movl %eax, 0x2c(%rsp)
movl 8(%rsp), %eax
movl %eax, 0x30(%rsp)
movl 0x30(%rsp), %eax
movl %eax, 0x64(%rsp)
movl 0x34(%rsp), %eax
movl %eax, 0x68(%rsp)
movl %eax, 0x44(%rsp)
movl 0x18(%rsp), %eax
movl %eax, 0x48(%rsp)
movl 0x28(%rsp), %eax
movl %eax, 0x4c(%rsp)
movl 0x2c(%rsp), %eax
movl %eax, 0x50(%rsp)
movl 0x20(%rsp), %eax
movl %eax, 0x54(%rsp)
movl 0x24(%rsp), %eax
movl %eax, 0x58(%rsp)
jmp .L3
.L2:
movl 8(%rsp), %eax
movl %eax, 0x44(%rsp)
movl 0x44(%rsp), %eax
imull 4(%rsp), %eax
movl %eax, 0x48(%rsp)
movl 0x24(%rsp), %eax
imull 4(%rsp), %eax
movl %eax, 0x4c(%rsp)
movl 0x4c(%rsp), %eax
addl $1, %eax
movl %eax, 0x50(%rsp)
movl 0x24(%rsp), %eax
movl %eax, 0x54(%rsp)
movl 0x44(%rsp), %eax
movl %eax, 0x58(%rsp)
movl 0x48(%rsp), %eax
movl %eax, 0x5c(%rsp)
movl 0x2c(%rsp), %eax
movl %eax, 0x60(%rsp)
movl 0x4c(%rsp), %eax
movl %eax, 0x64(%rsp)
movl 0x50(%rsp), %eax
movl %eax, 0x68(%rsp)
.L3:
movl 0x54(%rsp), %eax
imull 4(%rsp), %eax
movl %eax, 0x6c(%rsp)
movl 0x6c(%rsp), %eax
addl $1, %eax
movl %eax, 0x70(%rsp)
cmpl $0, 0x20(%rsp)
je .L4
movl 0x54(%rsp), %eax
movl %eax, 0x24(%rsp)
movl 0x58(%rsp), %eax
movl %eax, 0x28(%rsp)
movl 0x60(%rsp), %eax
movl %eax, 0x2c(%rsp)
movl 0x64(%rsp), %eax
movl %eax, 0x30(%rsp)
movl 0x68(%rsp), %eax
movl %eax, 0x34(%rsp)
movl 0x34(%rsp), %eax
imull 4(%rsp), %eax
movl %eax, 0x38(%rsp)
movl 0x14(%rsp), %eax
imull 4(%rsp), %eax
movl %eax, 0x3c(%rsp)
movl 0x3c(%rsp), %eax
addl $1, %eax
movl %eax, 0x40(%rsp)
movl 0x14(%rsp), %eax
movl %eax, 0x44(%rsp)
movl 0x34(%rsp), %eax
movl %eax, 0x48(%rsp)
movl 0x38(%rsp), %eax
movl %eax, 0x4c(%rsp)
movl 0x1c(%rsp), %eax
movl %eax, 0x50(%rsp)
movl 0x3c(%rsp), %eax
movl %eax, 0x54(%rsp)
movl 0x40(%rsp), %eax
movl %eax, 0x58(%rsp)
.L3:
movl 0x44(%rsp), %eax
imull 4(%rsp), %eax
movl %eax, 0x5c(%rsp)
movl 0x5c(%rsp), %eax
addl $1, %eax
movl %eax, 0x60(%rsp)
cmpl $0, 0xa8(%rsp)
je .L4
movl 0x44(%rsp), %eax
movl %eax, 0x14(%rsp)
movl 0x48(%rsp), %eax
movl %eax, 0x18(%rsp)
movl 0x50(%rsp), %eax
movl %eax, 0x1c(%rsp)
movl 0x54(%rsp), %eax
movl %eax, 0x20(%rsp)
movl 0x58(%rsp), %eax
movl %eax, 0x24(%rsp)
jmp .L1
.L4:
movl 0x54(%rsp), %eax
movl 0x44(%rsp), %eax
addl 4(%rsp), %eax
movl %eax, 0x64(%rsp)
movl 0x64(%rsp), %eax
addl 8(%rsp), %eax
movl %eax, 0x68(%rsp)
movl 0x68(%rsp), %eax
addl 0x48(%rsp), %eax
movl %eax, 0x6c(%rsp)
movl 0x6c(%rsp), %eax
addl 0x4c(%rsp), %eax
movl %eax, 0x70(%rsp)
movl 0x70(%rsp), %eax
addl 0x50(%rsp), %eax
movl %eax, 0x74(%rsp)
movl 0x74(%rsp), %eax
addl 8(%rsp), %eax
addl 0x54(%rsp), %eax
movl %eax, 0x78(%rsp)
movl 0x78(%rsp), %eax
addl 0x58(%rsp), %eax
movl %eax, 0x7c(%rsp)
movl 0x7c(%rsp), %eax
addl 0x5c(%rsp), %eax
addl 0x60(%rsp), %eax
movl %eax, 0x80(%rsp)
movl 0x80(%rsp), %eax
addl 0x60(%rsp), %eax
movl %eax, 0x84(%rsp)
movl 0x84(%rsp), %eax
addl 0x64(%rsp), %eax
movl %eax, 0x88(%rsp)
movl 0x88(%rsp), %eax
addl 0x68(%rsp), %eax
movl %eax, 0x8c(%rsp)
movl 0x8c(%rsp), %eax
addl 0x70(%rsp), %eax
movl %eax, 0x90(%rsp)
movl 0x90(%rsp), %eax
addq $0x98, %rsp
addq $0x88, %rsp
retq

View File

@ -28,12 +28,9 @@
}
--EXPECT--
test:
subq $8, %rsp
movq %rbx, (%rsp)
movl 0x10(%rsp), %eax
movl 0x18(%rsp), %r10d
movl 0x20(%rsp), %r11d
movl 0x28(%rsp), %ebx
movl 8(%rsp), %eax
movl 0x10(%rsp), %r10d
movl 0x18(%rsp), %r11d
leal (%rsi, %rdi), %esi
leal (%rsi, %rdx), %edx
leal (%rdx, %rcx), %ecx
@ -42,7 +39,5 @@ test:
leal (%rcx, %rax), %eax
leal (%rax, %r10), %eax
leal (%rax, %r11), %eax
subl %ebx, %eax
movq (%rsp), %rbx
addq $8, %rsp
subl 0x20(%rsp), %eax
retq

View File

@ -28,8 +28,6 @@
}
--EXPECT--
test:
movsd 8(%rsp), %xmm8
movsd 0x10(%rsp), %xmm9
addsd %xmm0, %xmm1
addsd %xmm2, %xmm1
addsd %xmm3, %xmm1
@ -38,6 +36,6 @@ test:
addsd %xmm6, %xmm1
addsd %xmm7, %xmm1
movapd %xmm1, %xmm0
addsd %xmm8, %xmm0
subsd %xmm9, %xmm0
addsd 8(%rsp), %xmm0
subsd 0x10(%rsp), %xmm0
retq

View File

@ -81,7 +81,7 @@ test:
pushq %rbp
movq %rsp, %rbp
subq $0x38, %rsp
subsd 0xc8(%rip), %xmm1
subsd 0xc0(%rip), %xmm1
movsd %xmm1, -0x38(%rbp)
movsd %xmm0, -0x30(%rbp)
xorpd %xmm0, %xmm0
@ -105,8 +105,7 @@ test:
mulsd %xmm1, %xmm1
movsd %xmm1, -8(%rbp)
movsd -0x10(%rbp), %xmm0
movsd -8(%rbp), %xmm1
subsd %xmm1, %xmm0
subsd -8(%rbp), %xmm0
movsd -0x38(%rbp), %xmm1
addsd %xmm0, %xmm1
movsd %xmm1, -0x20(%rbp)
@ -115,10 +114,9 @@ test:
movsd -0x30(%rbp), %xmm1
addsd %xmm0, %xmm1
movsd %xmm1, -0x28(%rbp)
movsd -8(%rbp), %xmm0
movsd -0x10(%rbp), %xmm1
addsd %xmm0, %xmm1
ucomisd 0x2e(%rip), %xmm1
movsd -0x10(%rbp), %xmm0
addsd -8(%rbp), %xmm0
ucomisd 0x2e(%rip), %xmm0
ja .L2
jmp .L3
.L2:
@ -136,4 +134,3 @@ test:
retq
.L4:
jmp .L1

View File

@ -74,7 +74,7 @@ Mandelbrot Test (var)
--EXPECT--
test:
subq $0x40, %rsp
subsd 0xdc(%rip), %xmm1
subsd 0xcc(%rip), %xmm1
movsd %xmm1, (%rsp)
movsd %xmm0, 8(%rsp)
xorpd %xmm0, %xmm0
@ -96,8 +96,7 @@ test:
mulsd %xmm1, %xmm1
movsd %xmm1, 0x38(%rsp)
movsd 0x30(%rsp), %xmm0
movsd 0x38(%rsp), %xmm1
subsd %xmm1, %xmm0
subsd 0x38(%rsp), %xmm0
movsd (%rsp), %xmm1
addsd %xmm0, %xmm1
movsd %xmm1, 0x18(%rsp)
@ -106,10 +105,9 @@ test:
movsd 8(%rsp), %xmm1
addsd %xmm0, %xmm1
movsd %xmm1, 0x10(%rsp)
movsd 0x38(%rsp), %xmm0
movsd 0x30(%rsp), %xmm1
addsd %xmm0, %xmm1
ucomisd 0x33(%rip), %xmm1
movsd 0x30(%rsp), %xmm0
addsd 0x38(%rsp), %xmm0
ucomisd 0x2b(%rip), %xmm0
ja .L2
jmp .L3
.L2:
@ -117,12 +115,10 @@ test:
addq $0x40, %rsp
retq
.L3:
movl 0x20(%rsp), %eax
cmpl $0x3e8, %eax
cmpl $0x3e8, 0x20(%rsp)
jl .L4
xorl %eax, %eax
addq $0x40, %rsp
retq
.L4:
jmp .L1