From e7944514519c87a6102542bc5e772b7e664a6566 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Tue, 17 May 2022 22:37:13 +0300 Subject: [PATCH] Preallocate call stack --- TODO | 1 - ir_main.c | 3 +++ ir_test.c | 2 ++ ir_x86.dasc | 52 ++++++++++++++++++++++++++++++---------- tests/debug/args_001.irt | 8 +++---- tests/debug/args_002.irt | 24 +++++++++---------- 6 files changed, 58 insertions(+), 32 deletions(-) diff --git a/TODO b/TODO index e47d0aa..2204a1e 100644 --- a/TODO +++ b/TODO @@ -47,7 +47,6 @@ - CAST - update memory (binop_int, mul_div_mod_pwr2, shift, shift_const, op_int, copy_int/fp?) ? operands swap (binop_int, binop_sse, binop_avx, cmp_int, cmp_fp, cmp_br_int) - - IR_PREALLOCATED_STACK - spill slots for parameters - improve register allocation speific to arguments passing - return merge/split diff --git a/ir_main.c b/ir_main.c index 555e7d5..9bce5e8 100644 --- a/ir_main.c +++ b/ir_main.c @@ -9,6 +9,7 @@ static void help(const char *cmd) " -O[012] - optimiztion level\n" " -S - dump final target assembler code\n" " -mavx - use AVX instruction set\n" + " -muse-fp - use base frame pointer register\n" " --emit-c [file-name] - convert to C source\n" " --save [file-name] - save IR\n" " --dot [file-name] - dump IR graph\n" @@ -308,6 +309,8 @@ int main(int argc, char **argv) run = 1; } else if (strcmp(argv[i], "-mavx") == 0) { mflags |= IR_AVX; + } else if (strcmp(argv[i], "-muse-fp") == 0) { + mflags |= IR_USE_FRAME_POINTER; #ifdef IR_DEBUG } else if (strcmp(argv[i], "--debug-sccp") == 0) { mflags |= IR_DEBUG_SCCP; diff --git a/ir_test.c b/ir_test.c index b7d27a3..b3e9d22 100644 --- a/ir_test.c +++ b/ir_test.c @@ -137,6 +137,8 @@ int main(int argc, char **argv) } } else if (strcmp(argv[i], "-mavx") == 0) { mflags |= IR_AVX; + } else if (strcmp(argv[i], "-muse-fp") == 0) { + mflags |= IR_USE_FRAME_POINTER; #ifdef IR_DEBUG } else if (strcmp(argv[i], "--debug-sccp") == 0) { mflags |= IR_DEBUG_SCCP; diff --git a/ir_x86.dasc b/ir_x86.dasc index f7c471c..3abab8e 100644 --- a/ir_x86.dasc +++ b/ir_x86.dasc @@ -408,6 +408,7 @@ typedef struct _ir_backend_data { int32_t unused_slot_2; int32_t unused_slot_1; int32_t stack_frame_alignment; + int32_t call_stack_size; ir_regset used_preserved_regs; uint32_t dessa_from_block; dasm_State *dasm_state; @@ -1471,9 +1472,9 @@ static ir_reg ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref) offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; IR_ASSERT(offset != -1); if (ctx->flags & IR_USE_FRAME_POINTER) { - return offset - data->stack_frame_size - data->stack_frame_alignment; + return offset - data->stack_frame_size; } else { - return offset + data->stack_frame_alignment; + return offset + data->call_stack_size + data->stack_frame_alignment; } } @@ -1532,10 +1533,10 @@ static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) offset = ctx->live_intervals[ctx->vregs[src]]->stack_spill_pos; IR_ASSERT(offset != -1); if (ctx->flags & IR_USE_FRAME_POINTER) { - offset = offset - data->stack_frame_size - data->stack_frame_alignment; + offset = offset - data->stack_frame_size; fp = IR_REG_RBP; } else { - offset = offset + data->stack_frame_alignment; + offset = offset + data->call_stack_size + data->stack_frame_alignment; fp = IR_REG_RSP; } @@ -1558,10 +1559,10 @@ static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg) offset = ctx->live_intervals[ctx->vregs[dst]]->stack_spill_pos; IR_ASSERT(offset != -1); if (ctx->flags & IR_USE_FRAME_POINTER) { - offset = offset - data->stack_frame_size - data->stack_frame_alignment; + offset = offset - data->stack_frame_size; fp = IR_REG_RBP; } else { - offset = offset + data->stack_frame_alignment; + offset = offset + data->call_stack_size + data->stack_frame_alignment; fp = IR_REG_RSP; } @@ -1611,7 +1612,7 @@ static void ir_emit_prologue(ir_ctx *ctx) | mov rbp, rsp } if (data->stack_frame_size) { - | sub rsp, data->stack_frame_size + | sub rsp, (data->stack_frame_size + data->call_stack_size) } if (data->used_preserved_regs) { int offset; @@ -1620,7 +1621,7 @@ static void ir_emit_prologue(ir_ctx *ctx) if (ctx->flags & IR_USE_FRAME_POINTER) { offset = -(int)sizeof(void*); } else { - offset = data->stack_frame_size - (int)sizeof(void*); + offset = data->stack_frame_size + data->call_stack_size - (int)sizeof(void*); } for (i = 0; i < IR_REG_NUM; i++) { if (IR_REGSET_IN(data->used_preserved_regs, i)) { @@ -1650,7 +1651,7 @@ static void ir_emit_epilogue(ir_ctx *ctx) if (ctx->flags & IR_USE_FRAME_POINTER) { offset = -(int)sizeof(void*); } else { - offset = data->stack_frame_size - (int)sizeof(void*); + offset = data->stack_frame_size + data->call_stack_size - (int)sizeof(void*); } for (i = 0; i < IR_REG_NUM; i++) { if (IR_REGSET_IN(data->used_preserved_regs, i)) { @@ -1671,7 +1672,7 @@ static void ir_emit_epilogue(ir_ctx *ctx) | mov rsp, rbp | pop rbp } else if (data->stack_frame_size) { - | add rsp, data->stack_frame_size + | add rsp, (data->stack_frame_size + data->call_stack_size) } } @@ -3626,6 +3627,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg used_stack = 0; } else { used_stack = ir_call_used_stack(ctx, insn); + data->call_stack_size += used_stack; if (used_stack) { | sub rsp, used_stack } @@ -3798,6 +3800,7 @@ static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn) if (used_stack) { | add rsp, used_stack + data->call_stack_size -= used_stack; } if (insn->type != IR_VOID) { @@ -4019,9 +4022,9 @@ static void ir_emit_load_params(ir_ctx *ctx) int32_t stack_offset = 0; if (ctx->flags & IR_USE_FRAME_POINTER) { - stack_offset = sizeof(void*); /* skip old frame pointer */ + stack_offset = sizeof(void*); /* skip old frame pointer and return address */ } else { - stack_offset = data->stack_frame_size + data->stack_frame_alignment; + stack_offset = data->stack_frame_size + data->call_stack_size; /* skip return address */ } n = use_list->count; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { @@ -4384,6 +4387,28 @@ static void ir_mark_empty_blocks(ir_ctx *ctx) } } +static void ir_preallocate_call_stack(ir_ctx *ctx, ir_backend_data *data) +{ + int call_stack_size, peak_call_stack_size = 0; + ir_ref i, n; + ir_insn *insn; + + for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) { + if (insn->op == IR_CALL) { + call_stack_size = ir_call_used_stack(ctx, insn); + if (call_stack_size > peak_call_stack_size) { + peak_call_stack_size = call_stack_size; + } + } + n = ir_operands_count(ctx, insn); + n = 1 + (n >> 2); // support for multi-word instructions like MERGE and PHI + i += n; + insn += n; + } + data->call_stack_size = peak_call_stack_size; + ctx->flags |= IR_PREALLOCATED_STACK; +} + static void ir_calc_stack_frame_size(ir_ctx *ctx, ir_backend_data *data) { int i; @@ -4418,11 +4443,11 @@ static void ir_calc_stack_frame_size(ir_ctx *ctx, ir_backend_data *data) data->stack_frame_alignment += 8; } } else { - // TODO: Preallocate stack for calls (ctx->flags |= IR_PREALLOCATED_STACK) while (IR_ALIGNED_SIZE(data->stack_frame_size + sizeof(void*), 16) != data->stack_frame_size + sizeof(void*)) { data->stack_frame_size += 8; data->stack_frame_alignment += 8; } + ir_preallocate_call_stack(ctx, data); } } } @@ -4447,6 +4472,7 @@ void *ir_emit(ir_ctx *ctx, size_t *size) data.unused_slot_2 = 0; data.unused_slot_1 = 0; data.stack_frame_alignment = 0; + data.call_stack_size = 0; data.used_preserved_regs = 0; data.rodata_label = 0; data.jmp_table_label = 0; diff --git a/tests/debug/args_001.irt b/tests/debug/args_001.irt index 768559d..5806981 100644 --- a/tests/debug/args_001.irt +++ b/tests/debug/args_001.irt @@ -22,15 +22,14 @@ } --EXPECT-- test: - subq $8, %rsp - subq $0x30, %rsp + subq $0x38, %rsp movl $6, (%rsp) movl $7, 8(%rsp) movl $8, 0x10(%rsp) movq $9, 0x18(%rsp) movabsq $0x100000000, %rax movq %rax, 0x20(%rsp) - leaq 0x3a(%rip), %rdi + leaq 0x36(%rip), %rdi movq $1, %rsi movw $2, %dx movl $3, %ecx @@ -38,8 +37,7 @@ test: movabsq $0x100000000, %r9 movabsq $_IO_printf, %rax callq *%rax - addq $0x30, %rsp - addq $8, %rsp + addq $0x38, %rsp retq 1 2 3 4 0x100000000 6 7 8 9 0x100000000 diff --git a/tests/debug/args_002.irt b/tests/debug/args_002.irt index a673d1b..b6a7e17 100644 --- a/tests/debug/args_002.irt +++ b/tests/debug/args_002.irt @@ -22,24 +22,22 @@ } --EXPECT-- test: - subq $8, %rsp - subq $0x10, %rsp + subq $0x18, %rsp movabsq $0x3feccccccccccccd, %rax movq %rax, (%rsp) movq $0, 8(%rsp) - leaq 0x9a(%rip), %rdi - movsd 0x52(%rip), %xmm0 - movsd 0x52(%rip), %xmm1 - movsd 0x52(%rip), %xmm2 - movsd 0x52(%rip), %xmm3 - movsd 0x52(%rip), %xmm4 - movsd 0x52(%rip), %xmm5 - movsd 0x52(%rip), %xmm6 - movsd 0x52(%rip), %xmm7 + leaq 0x96(%rip), %rdi + movsd 0x4e(%rip), %xmm0 + movsd 0x4e(%rip), %xmm1 + movsd 0x4e(%rip), %xmm2 + movsd 0x4e(%rip), %xmm3 + movsd 0x4e(%rip), %xmm4 + movsd 0x4e(%rip), %xmm5 + movsd 0x4e(%rip), %xmm6 + movsd 0x4e(%rip), %xmm7 movabsq $_IO_printf, %rax callq *%rax - addq $0x10, %rsp - addq $8, %rsp + addq $0x18, %rsp retq 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 0