From 81047af575864c5dfd5fda62394ada46cc9cae27 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Thu, 23 Nov 2023 22:52:08 +0300 Subject: [PATCH] Cleanup code-generation for VA_ARG --- ir.h | 2 +- ir_aarch64.dasc | 166 +++++++++++++++++++++--------------------------- ir_x86.dasc | 117 ++++------------------------------ 3 files changed, 85 insertions(+), 200 deletions(-) diff --git a/ir.h b/ir.h index fb90fde..669f94e 100644 --- a/ir.h +++ b/ir.h @@ -567,6 +567,7 @@ struct _ir_ctx { uint32_t locals_area_size; uint32_t gp_reg_params; uint32_t fp_reg_params; + int32_t param_stack_size; ir_live_interval **live_intervals; ir_arena *arena; ir_live_range *unused_ranges; @@ -584,7 +585,6 @@ struct _ir_ctx { int32_t call_stack_size; /* stack for parameter passing (used by register allocator and code generator) */ uint64_t used_preserved_regs; #ifdef IR_TARGET_X86 - int32_t param_stack_size; int32_t ret_slot; #endif uint32_t rodata_offset; diff --git a/ir_aarch64.dasc b/ir_aarch64.dasc index a51372d..709b7af 100644 --- a/ir_aarch64.dasc +++ b/ir_aarch64.dasc @@ -3644,7 +3644,36 @@ static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) { -#ifndef __APPLE__ +#ifdef __APPLE__ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg fp; + int arg_area_offset; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + + if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; + arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size; + } else { + fp = IR_REG_STACK_POINTER; + arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size; + } + | add Rx(tmp_reg), Rx(fp), #arg_area_offset + if (op2_reg != IR_REG_NONE) { + | str Rx(tmp_reg), [Rx(op2_reg)] + } else { + int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); + + | str Rx(tmp_reg), [Rx(op2_reg), #offset] + } +#else ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg fp; @@ -3662,12 +3691,11 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) if (ctx->flags & IR_USE_FRAME_POINTER) { fp = IR_REG_FRAME_POINTER; reg_save_area_offset = ctx->locals_area_size + sizeof(void*) * 2; - overflow_arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2; // TODO: ??? + overflow_arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size; } else { fp = IR_REG_STACK_POINTER; - reg_save_area_offset = ctx->stack_frame_size + ctx->call_stack_size + 4; // TODO: ??? - overflow_arg_area_offset = 0; - IR_ASSERT(0); // TODO: ??? + reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size; + overflow_arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size; } /* Set va_list.stack */ @@ -3701,36 +3729,6 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) /* Set va_list.vr_offset */ | str wzr, [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)] } -#else - ir_backend_data *data = ctx->data; - dasm_State **Dst = &data->dasm_state; - ir_reg fp; - int arg_area_offset; - ir_reg op2_reg = ctx->regs[def][2]; - ir_reg tmp_reg = ctx->regs[def][3]; - - IR_ASSERT(tmp_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); - } - - if (ctx->flags & IR_USE_FRAME_POINTER) { - fp = IR_REG_FRAME_POINTER; - arg_area_offset = 8 + 4; // TODO: ??? - } else { - fp = IR_REG_STACK_POINTER; - arg_area_offset = 0; - IR_ASSERT(0); // TODO: ??? - } - | add Rx(tmp_reg), Rx(fp), #arg_area_offset - if (op2_reg != IR_REG_NONE) { - | str Rx(tmp_reg), [Rx(op2_reg)] - } else { - int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); - - | str Rx(tmp_reg), [Rx(op2_reg), #offset] -} #endif } @@ -3741,7 +3739,37 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) { -#ifndef __APPLE__ +#ifdef __APPLE__ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_reg def_reg = ctx->regs[def][0]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + + IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | ldr Rx(tmp_reg), [Rx(op2_reg)] + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, def_reg, tmp_reg, 0); + } else { + ir_emit_load_mem_fp(ctx, type, def_reg, tmp_reg, 0); + } + | add Rx(tmp_reg), Rx(tmp_reg), #IR_MAX(ir_type_size[type], sizeof(void*)) + if (op2_reg != IR_REG_NONE) { + | str Rx(tmp_reg), [Rx(op2_reg)] + } else { + int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); + + | str Rx(tmp_reg), [Rx(op2_reg), #offset] + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +#else ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; @@ -3792,36 +3820,6 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } -#else - ir_backend_data *data = ctx->data; - dasm_State **Dst = &data->dasm_state; - ir_type type = insn->type; - ir_reg def_reg = ctx->regs[def][0]; - ir_reg op2_reg = ctx->regs[def][2]; - ir_reg tmp_reg = ctx->regs[def][3]; - - IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); - } - | ldr Rx(tmp_reg), [Rx(op2_reg)] - if (IR_IS_TYPE_INT(type)) { - ir_emit_load_mem_int(ctx, type, def_reg, tmp_reg, 0); - } else { - ir_emit_load_mem_fp(ctx, type, def_reg, tmp_reg, 0); - } - | add Rx(tmp_reg), Rx(tmp_reg), #IR_MAX(ir_type_size[type], sizeof(void*)) - if (op2_reg != IR_REG_NONE) { - | str Rx(tmp_reg), [Rx(op2_reg)] - } else { - int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); - - | str Rx(tmp_reg), [Rx(op2_reg), #offset] - } - if (IR_REG_SPILLED(ctx->regs[def][0])) { - ir_emit_store(ctx, type, def, def_reg); - } #endif } @@ -4996,6 +4994,7 @@ static void ir_fix_param_spills(ir_ctx *ctx) const int8_t *int_reg_params = _ir_int_reg_params; const int8_t *fp_reg_params = _ir_fp_reg_params; int32_t stack_offset = 0; + int32_t param_stack_size = 0; if (ctx->flags & IR_USE_FRAME_POINTER) { /* skip old frame pointer and return address */ @@ -5035,12 +5034,18 @@ static void ir_fix_param_spills(ir_ctx *ctx) } if (sizeof(void*) == 8) { stack_offset += sizeof(void*); + param_stack_size += sizeof(void*); } else { stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); + param_stack_size += IR_MAX(sizeof(void*), ir_type_size[insn->type]); } } } } + + ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count); + ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count); + ctx->param_stack_size = param_stack_size; } static void ir_allocate_unique_spill_slots(ir_ctx *ctx) @@ -5244,40 +5249,11 @@ static void ir_preallocate_call_stack(ir_ctx *ctx) } } -static void ir_count_reg_params(ir_ctx *ctx) -{ - ir_use_list *use_list = &ctx->use_lists[1]; - ir_insn *insn; - ir_ref i, n, *p, use; - int gp_reg_params = 0; - int fp_reg_params = 0; - - n = use_list->count; - for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { - use = *p; - insn = &ctx->ir_base[use]; - if (insn->op == IR_PARAM) { - if (IR_IS_TYPE_INT(insn->type)) { - if (gp_reg_params <= IR_REG_INT_ARGS) { - gp_reg_params++; - } - } else { - if (fp_reg_params <= IR_REG_FP_ARGS) { - fp_reg_params++; - } - } - } - } - ctx->gp_reg_params = gp_reg_params; - ctx->fp_reg_params = fp_reg_params; -} - void ir_fix_stack_frame(ir_ctx *ctx) { uint32_t additional_size = 0; ctx->locals_area_size = ctx->stack_frame_size; - ir_count_reg_params(ctx); if (ctx->used_preserved_regs) { ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; diff --git a/ir_x86.dasc b/ir_x86.dasc index fb614cc..7e7eeab 100644 --- a/ir_x86.dasc +++ b/ir_x86.dasc @@ -6356,7 +6356,7 @@ static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) { -#if defined(_WIN64) +#if defined(_WIN64) || defined(IR_TARGET_X86) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg fp; @@ -6372,10 +6372,10 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) if (ctx->flags & IR_USE_FRAME_POINTER) { fp = IR_REG_FRAME_POINTER; - arg_area_offset = sizeof(void*) * 2 + sizeof(void*); // TODO: ??? + arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; } else { fp = IR_REG_STACK_POINTER; - arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*); // TODO: ??? + arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size; } | lea Ra(tmp_reg), aword [Ra(fp)+arg_area_offset] if (op2_reg != IR_REG_NONE) { @@ -6405,12 +6405,11 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) if (ctx->flags & IR_USE_FRAME_POINTER) { fp = IR_REG_FRAME_POINTER; reg_save_area_offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size); - overflow_arg_area_offset = 16; + overflow_arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; } else { fp = IR_REG_STACK_POINTER; - reg_save_area_offset = ctx->stack_frame_size + ctx->call_stack_size + 4; // TODO: ??? - overflow_arg_area_offset = 0; - IR_ASSERT(0); // TODO: ??? + reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size; + overflow_arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size; } if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { @@ -6442,35 +6441,6 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) /* Set va_list.overflow_arg_area */ | mov qword [Ra(op2_reg)+offsetof(ir_va_list, overflow_arg_area)], Ra(tmp_reg) |.endif -#elif defined(IR_TARGET_X86) - ir_backend_data *data = ctx->data; - dasm_State **Dst = &data->dasm_state; - ir_reg fp; - int arg_area_offset; - ir_reg op2_reg = ctx->regs[def][2]; - ir_reg tmp_reg = ctx->regs[def][3]; - - IR_ASSERT(tmp_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); - } - - if (ctx->flags & IR_USE_FRAME_POINTER) { - fp = IR_REG_FRAME_POINTER; - arg_area_offset = sizeof(void*) * 2 + sizeof(void*); // TODO: ??? - } else { - fp = IR_REG_STACK_POINTER; - arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*); // TODO: ??? - } - | lea Ra(tmp_reg), aword [Ra(fp)+arg_area_offset] - if (op2_reg != IR_REG_NONE) { - | mov aword [Ra(op2_reg)], Ra(tmp_reg) - } else { - int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); - - | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) - } #else IR_ASSERT(0 && "NIY va_start"); #endif @@ -6483,7 +6453,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) { -#if defined(_WIN64) +#if defined(_WIN64) || defined(IR_TARGET_X86) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; @@ -6561,36 +6531,6 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_emit_store(ctx, type, def, def_reg); } |.endif -#elif defined(IR_TARGET_X86) - ir_backend_data *data = ctx->data; - dasm_State **Dst = &data->dasm_state; - ir_type type = insn->type; - ir_reg def_reg = ctx->regs[def][0]; - ir_reg op2_reg = ctx->regs[def][2]; - ir_reg tmp_reg = ctx->regs[def][3]; - - IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); - } - | mov Ra(tmp_reg), aword [Ra(op2_reg)] - if (IR_IS_TYPE_INT(type)) { - ir_emit_load_mem_int(ctx, type, def_reg, tmp_reg, 0); - } else { - ir_emit_load_mem_fp(ctx, type, def_reg, tmp_reg, 0); - } - | add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*)) - if (op2_reg != IR_REG_NONE) { - | mov aword [Ra(op2_reg)], Ra(tmp_reg) - } else { - int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); - - | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) - } - if (IR_REG_SPILLED(ctx->regs[def][0])) { - ir_emit_store(ctx, type, def, def_reg); - } #else IR_ASSERT(0 && "NIY va_arg"); #endif @@ -8648,11 +8588,13 @@ static void ir_fix_param_spills(ir_ctx *ctx) } } -#ifdef IR_TARGET_X86 - if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { - ctx->param_stack_size = stack_offset; - } +#ifdef _WIN64 + /* WIN64 uses shsow area for registers */ + stack_offset += IR_MIN(int_param_num, int_reg_params_count) * sizeof(void*); #endif + ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count); + ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count); + ctx->param_stack_size = stack_offset; } static void ir_allocate_unique_spill_slots(ir_ctx *ctx) @@ -8878,44 +8820,11 @@ static void ir_preallocate_call_stack(ir_ctx *ctx) } } -#if defined(IR_TARGET_X64) && !defined(_WIN64) -static void ir_count_reg_params(ir_ctx *ctx) -{ - ir_use_list *use_list = &ctx->use_lists[1]; - ir_insn *insn; - ir_ref i, n, *p, use; - int gp_reg_params = 0; - int fp_reg_params = 0; - - n = use_list->count; - for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { - use = *p; - insn = &ctx->ir_base[use]; - if (insn->op == IR_PARAM) { - if (IR_IS_TYPE_INT(insn->type)) { - if (gp_reg_params <= IR_REG_INT_ARGS) { - gp_reg_params++; - } - } else { - if (fp_reg_params <= IR_REG_FP_ARGS) { - fp_reg_params++; - } - } - } - } - ctx->gp_reg_params = gp_reg_params; - ctx->fp_reg_params = fp_reg_params; -} -#endif - void ir_fix_stack_frame(ir_ctx *ctx) { uint32_t additional_size = 0; ctx->locals_area_size = ctx->stack_frame_size; -#if defined(IR_TARGET_X64) && !defined(_WIN64) - ir_count_reg_params(ctx); -#endif if (ctx->used_preserved_regs) { ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;