From 5c2023fd7f4477e9a14060001d0fbd724c518881 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Thu, 18 May 2023 21:00:57 +0300 Subject: [PATCH] Avoid live range constrction for VARs --- ir.h | 5 +- ir_aarch64.dasc | 132 +++++++------ ir_emit.c | 17 ++ ir_private.h | 17 +- ir_ra.c | 500 +++++++++++++++++++++--------------------------- ir_x86.dasc | 152 +++++++++------ 6 files changed, 412 insertions(+), 411 deletions(-) diff --git a/ir.h b/ir.h index b2119a8..bf88f63 100644 --- a/ir.h +++ b/ir.h @@ -493,8 +493,7 @@ void ir_strtab_free(ir_strtab *strtab); #define IR_NO_LOOPS (1<<25) /* Temporary: Live Ranges */ -#define IR_LR_HAVE_VARS (1<<25) -#define IR_LR_HAVE_DESSA_MOVES (1<<26) +#define IR_LR_HAVE_DESSA_MOVES (1<<25) /* Temporary: Register Allocator */ #define IR_RA_HAVE_SPLITS (1<<25) @@ -555,6 +554,8 @@ struct _ir_ctx { void *data; ir_ref control; /* used by IR construction API (see ir_builder.h) */ ir_ref bb_start; /* used by target CPU instruction matcher */ + ir_ref vars; /* list of VARs (used by register allocator) */ + int32_t stack_frame_size; /* spill stack frame size (used by register allocator) */ }; ir_snapshot_create_t snapshot_create; uint32_t rodata_offset; diff --git a/ir_aarch64.dasc b/ir_aarch64.dasc index 836e814..8ca1f87 100644 --- a/ir_aarch64.dasc +++ b/ir_aarch64.dasc @@ -712,12 +712,7 @@ binop_fp: ctx->flags |= IR_HAS_CALLS; return IR_CALL; case IR_VAR: - if (ctx->use_lists[ref].count > 0) { - return IR_VAR; - } else { - return IR_SKIPPED | IR_VAR; - } - break; + return IR_SKIPPED | IR_VAR; case IR_ALLOCA: if (ctx->flags & IR_FUNCTION) { ctx->flags |= IR_USE_FRAME_POINTER | IR_HAS_ALLOCA; @@ -863,6 +858,16 @@ static int32_t ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg) return IR_SPILL_POS_TO_OFFSET(offset); } +static int32_t ir_var_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg) +{ + ir_backend_data *data = ctx->data; + ir_insn *var_insn = &ctx->ir_base[ref]; + + IR_ASSERT(var_insn->op == IR_VAR); + *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + return IR_SPILL_POS_TO_OFFSET(var_insn->op3); +} + static void ir_emit_load_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val) { ir_backend_data *data = ctx->data; @@ -2814,7 +2819,7 @@ static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_reg fp; IR_ASSERT(def_reg != IR_REG_NONE); - offset = ir_ref_spill_slot(ctx, insn->op1, &fp); + offset = ir_var_spill_slot(ctx, insn->op1, &fp); | add Rx(def_reg), Rx(fp), #offset if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { ir_emit_store(ctx, type, def, def_reg); @@ -2823,14 +2828,25 @@ static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) { + ir_backend_data *data = ctx->data; + ir_insn *var_insn = &ctx->ir_base[insn->op2]; ir_ref type = insn->type; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg fp; + int32_t offset; - if (def_reg == IR_REG_NONE && ir_is_same_mem(ctx, insn->op2, def)) { + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); + if (def_reg == IR_REG_NONE && ir_is_same_mem_var(ctx, def, var_insn->op3)) { return; // fake load } IR_ASSERT(def_reg != IR_REG_NONE); - ir_emit_load(ctx, type, def_reg, insn->op2); + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, def_reg, fp, offset); + } else { + ir_emit_load_mem_fp(ctx, type, def_reg, fp, offset); + } if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { ir_emit_store(ctx, type, def, def_reg); } @@ -2838,19 +2854,30 @@ static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) static void ir_emit_vstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) { + ir_backend_data *data = ctx->data; + ir_insn *var_insn = &ctx->ir_base[insn->op2]; ir_insn *val_insn = &ctx->ir_base[insn->op3]; ir_ref type = val_insn->type; ir_reg op3_reg = ctx->regs[ref][3]; + ir_reg fp; + int32_t offset; + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); IR_ASSERT(op3_reg != IR_REG_NONE); - if ((op3_reg & IR_REG_SPILL_LOAD) && ir_is_same_mem(ctx, insn->op3, insn->op2)) { + if ((op3_reg & IR_REG_SPILL_LOAD) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { return; // fake store } if ((op3_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op3)) { op3_reg &= ~IR_REG_SPILL_LOAD; ir_emit_load(ctx, type, op3_reg, insn->op3); } - ir_emit_store(ctx, type, insn->op2, op3_reg); + if (IR_IS_TYPE_INT(type)) { + ir_emit_store_mem_int(ctx, type, fp, offset, op3_reg); + } else { + ir_emit_store_mem_fp(ctx, type, fp, offset, op3_reg); + } } static int32_t ir_fuse_addr(ir_ctx *ctx, ir_ref ref, ir_reg *preg1, ir_reg *preg2) @@ -4464,46 +4491,48 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) } else { ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data); } - if (insn->op == IR_VAR) { - ir_use_list *use_list = &ctx->use_lists[i]; - ir_ref i, n, *p, use; - ir_insn *use_insn; - int32_t stack_spill_pos = ival->stack_spill_pos; - - n = use_list->count; - for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { - use = *p; - use_insn = &ctx->ir_base[use]; - if (use_insn->op == IR_VLOAD) { - if (ctx->vregs[use] - && !ctx->live_intervals[ctx->vregs[use]]) { - ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); - memset(ival, 0, sizeof(ir_live_interval)); - ctx->live_intervals[ctx->vregs[use]] = ival; - ival->type = insn->type; - ival->reg = IR_REG_NONE; - ival->vreg = ctx->vregs[use]; - ival->stack_spill_pos = stack_spill_pos; - } - } else if (use_insn->op == IR_VSTORE) { - if (!IR_IS_CONST_REF(use_insn->op3) - && ctx->vregs[use_insn->op3] - && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { - ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); - memset(ival, 0, sizeof(ir_live_interval)); - ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival; - ival->type = insn->type; - ival->reg = IR_REG_NONE; - ival->vreg = ctx->vregs[insn->op3]; - ival->stack_spill_pos = stack_spill_pos; - } - } - } - } } else if (insn->op == IR_PARAM) { IR_ASSERT(0 && "unexpected PARAM"); return; } + } else if (insn->op == IR_VAR) { + ir_use_list *use_list = &ctx->use_lists[i]; + ir_ref n = use_list->count; + + if (n > 0) { + int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data); + ir_ref i, *p, use; + ir_insn *use_insn; + + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_VLOAD) { + if (ctx->vregs[use] + && !ctx->live_intervals[ctx->vregs[use]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[use]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[use]; + ival->stack_spill_pos = stack_spill_pos; + } + } else if (use_insn->op == IR_VSTORE) { + if (!IR_IS_CONST_REF(use_insn->op3) + && ctx->vregs[use_insn->op3] + && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[insn->op3]; + ival->stack_spill_pos = stack_spill_pos; + } + } + } + } } insn_flags = ir_op_flags[insn->op]; @@ -4642,11 +4671,6 @@ static void ir_calc_stack_frame_size(ir_ctx *ctx, ir_backend_data *data) for (i = 1, p = ctx->live_intervals + i; i <= ctx->vregs_count; i++, p++) { ival = *p; if (ival) { - if (ival->stack_spill_pos != -1 && !(ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) { - if (ival->stack_spill_pos + ir_type_size[ival->type] > data->ra_data.stack_frame_size) { - data->ra_data.stack_frame_size = ival->stack_spill_pos + ir_type_size[ival->type]; - } - } if (ival->reg != IR_REG_NONE) { if (!IR_REGSET_IN(data->used_preserved_regs, ival->reg) && !IR_REGSET_IN(fixed_regset, ival->reg) @@ -4727,8 +4751,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) void *entry; size_t size; - ctx->data = &data; - data.ra_data.stack_frame_size = 0; + data.ra_data.stack_frame_size = (!ctx->live_intervals) ? 0 : ctx->stack_frame_size; data.ra_data.unused_slot_4 = 0; data.ra_data.unused_slot_2 = 0; data.ra_data.unused_slot_1 = 0; @@ -4737,6 +4760,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) data.used_preserved_regs = 0; data.rodata_label = 0; data.jmp_table_label = 0; + ctx->data = &data; if (!ctx->live_intervals) { ir_allocate_unique_spill_slots(ctx); diff --git a/ir_emit.c b/ir_emit.c index 83ce0c0..63ea4c6 100644 --- a/ir_emit.c +++ b/ir_emit.c @@ -237,6 +237,23 @@ static bool ir_is_same_mem(const ir_ctx *ctx, ir_ref r1, ir_ref r2) return o1 == o2; } +static bool ir_is_same_mem_var(const ir_ctx *ctx, ir_ref r1, int32_t offset) +{ + ir_live_interval *ival1; + int32_t o1; + + if (IR_IS_CONST_REF(r1)) { + return 0; + } + + IR_ASSERT(ctx->vregs[r1]); + ival1 = ctx->live_intervals[ctx->vregs[r1]]; + IR_ASSERT(ival1); + o1 = ival1->stack_spill_pos; + IR_ASSERT(o1 != -1); + return o1 == offset; +} + static void *ir_resolve_sym_name(const char *name) { void *handle = NULL; diff --git a/ir_private.h b/ir_private.h index e4b27bb..a5c8f8b 100644 --- a/ir_private.h +++ b/ir_private.h @@ -1009,15 +1009,14 @@ struct _ir_live_range { /* ir_live_interval.flags bits (two low bits are reserved for temporary register number) */ #define IR_LIVE_INTERVAL_FIXED (1<<0) #define IR_LIVE_INTERVAL_TEMP (1<<1) -#define IR_LIVE_INTERVAL_VAR (1<<2) -#define IR_LIVE_INTERVAL_COALESCED (1<<3) -#define IR_LIVE_INTERVAL_HAS_HINT_REGS (1<<4) -#define IR_LIVE_INTERVAL_HAS_HINT_REFS (1<<5) -#define IR_LIVE_INTERVAL_MEM_PARAM (1<<6) -#define IR_LIVE_INTERVAL_MEM_LOAD (1<<7) -#define IR_LIVE_INTERVAL_SPILL_SPECIAL (1<<8) /* spill slot is pre-allocated in a special area (see ir_ctx.spill_reserved_base) */ -#define IR_LIVE_INTERVAL_SPILLED (1<<9) -#define IR_LIVE_INTERVAL_SPLIT_CHILD (1<<10) +#define IR_LIVE_INTERVAL_HAS_HINT_REGS (1<<2) +#define IR_LIVE_INTERVAL_HAS_HINT_REFS (1<<3) +#define IR_LIVE_INTERVAL_MEM_PARAM (1<<4) +#define IR_LIVE_INTERVAL_MEM_LOAD (1<<5) +#define IR_LIVE_INTERVAL_COALESCED (1<<6) +#define IR_LIVE_INTERVAL_SPILL_SPECIAL (1<<7) /* spill slot is pre-allocated in a special area (see ir_ctx.spill_reserved_base) */ +#define IR_LIVE_INTERVAL_SPILLED (1<<8) +#define IR_LIVE_INTERVAL_SPLIT_CHILD (1<<9) struct _ir_live_interval { uint8_t type; diff --git a/ir_ra.c b/ir_ra.c index 3134dab..b0dea3b 100644 --- a/ir_ra.c +++ b/ir_ra.c @@ -115,29 +115,6 @@ int ir_assign_virtual_registers(ir_ctx *ctx) /* Lifetime intervals construction */ -static void ir_add_local_var(ir_ctx *ctx, int v, uint8_t type) -{ - ir_live_interval *ival = ctx->live_intervals[v]; - - IR_ASSERT(!ival); - - ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); - IR_ASSERT(type != IR_VOID); - ival->type = type; - ival->reg = IR_REG_NONE; - ival->flags = IR_LIVE_INTERVAL_VAR; - ival->vreg = v; - ival->stack_spill_pos = -1; // not allocated - ival->range.start = IR_START_LIVE_POS_FROM_REF(1); - ival->range.end = ival->end = IR_END_LIVE_POS_FROM_REF(ctx->insns_count - 1); - ival->range.next = NULL; - ival->use_pos = NULL; - ival->next = NULL; - - ctx->live_intervals[v] = ival; - ctx->flags |= IR_LR_HAVE_VARS; -} - static ir_live_interval *ir_new_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end) { ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); @@ -577,8 +554,11 @@ int ir_compute_live_ranges(ir_ctx *ctx) memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); } + /* Root of the list of IR_VARs */ + ctx->vars = IR_UNUSED; + /* Compute Live Ranges */ - ctx->flags &= ~(IR_LR_HAVE_VARS|IR_LR_HAVE_DESSA_MOVES); + ctx->flags &= ~IR_LR_HAVE_DESSA_MOVES; len = ir_bitset_len(ctx->vregs_count + 1); bb_live = ir_mem_malloc((ctx->cfg_blocks_count + 1) * len * sizeof(ir_bitset_base_t)); @@ -690,6 +670,11 @@ int ir_compute_live_ranges(ir_ctx *ctx) int n; if (ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)) { + if (ctx->rules[ref] == (IR_SKIPPED|IR_VAR) && ctx->use_lists[ref].count > 0) { + insn = &ctx->ir_base[ref]; + insn->op3 = ctx->vars; + ctx->vars = ref; + } continue; } @@ -715,89 +700,66 @@ int ir_compute_live_ranges(ir_ctx *ctx) insn = &ctx->ir_base[ref]; v = ctx->vregs[ref]; if (v) { - if (ir_bitset_in(live, v)) { - if (insn->op == IR_RLOAD) { - /* live.remove(opd) */ - ir_bitset_excl(live, v); - ival = ir_fix_live_range(ctx, v, - IR_START_LIVE_POS_FROM_REF(bb->start), IR_DEF_LIVE_POS_FROM_REF(ref)); - ival->type = insn->type; - /* Fixed RLOADs are handled without live-ranges */ - IR_ASSERT(!IR_REGSET_IN(IR_REGSET_UNION(ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)); - ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), insn->op2, IR_USE_SHOULD_BE_IN_REG, 0); - continue; - } else if (insn->op != IR_PHI) { - ir_live_pos def_pos; - ir_ref hint_ref = 0; - ir_reg reg = constraints.def_reg; + IR_ASSERT(ir_bitset_in(live, v)); - if (reg != IR_REG_NONE) { - def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref); - if (insn->op == IR_PARAM) { - /* parameter register must be kept before it's copied */ - ir_add_fixed_live_range(ctx, reg, - IR_START_LIVE_POS_FROM_REF(bb->start), def_pos); - } else { - ir_add_fixed_live_range(ctx, reg, - IR_DEF_LIVE_POS_FROM_REF(ref), def_pos); - } - } else if (def_flags & IR_DEF_REUSES_OP1_REG) { - if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) { - hint_ref = insn->op1; - } - def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); - } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) { - def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + if (insn->op == IR_RLOAD) { + /* live.remove(opd) */ + ir_bitset_excl(live, v); + ival = ir_fix_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), IR_DEF_LIVE_POS_FROM_REF(ref)); + ival->type = insn->type; + /* Fixed RLOADs are handled without live-ranges */ + IR_ASSERT(!IR_REGSET_IN(IR_REGSET_UNION(ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)); + ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), insn->op2, IR_USE_SHOULD_BE_IN_REG, 0); + continue; + } else if (insn->op != IR_PHI) { + ir_live_pos def_pos; + ir_ref hint_ref = 0; + ir_reg reg = constraints.def_reg; + + if (reg != IR_REG_NONE) { + def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref); + if (insn->op == IR_PARAM) { + /* parameter register must be kept before it's copied */ + ir_add_fixed_live_range(ctx, reg, + IR_START_LIVE_POS_FROM_REF(bb->start), def_pos); } else { - if (insn->op == IR_PARAM) { - /* We may reuse parameter stack slot for spilling */ - ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM; - } else if (insn->op == IR_VLOAD) { - /* Load may be fused into the usage instruction */ - ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD; - } - def_pos = IR_DEF_LIVE_POS_FROM_REF(ref); + ir_add_fixed_live_range(ctx, reg, + IR_DEF_LIVE_POS_FROM_REF(ref), def_pos); } - /* live.remove(opd) */ - ir_bitset_excl(live, v); - /* intervals[opd].setFrom(op.id) */ - ival = ir_fix_live_range(ctx, v, - IR_START_LIVE_POS_FROM_REF(bb->start), def_pos); - ival->type = insn->type; - ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref); + } else if (def_flags & IR_DEF_REUSES_OP1_REG) { + if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) { + hint_ref = insn->op1; + } + def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) { + def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); } else { - /* live.remove(opd) */ - ir_bitset_excl(live, v); - /* PHIs inputs must not be processed */ - ival = ctx->live_intervals[v]; - ival->type = insn->type; - ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0); - continue; + if (insn->op == IR_PARAM) { + /* We may reuse parameter stack slot for spilling */ + ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM; + } else if (insn->op == IR_VLOAD) { + /* Load may be fused into the usage instruction */ + ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD; + } + def_pos = IR_DEF_LIVE_POS_FROM_REF(ref); } + /* live.remove(opd) */ + ir_bitset_excl(live, v); + /* intervals[opd].setFrom(op.id) */ + ival = ir_fix_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), def_pos); + ival->type = insn->type; + ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref); } else { - IR_ASSERT(insn->op == IR_VAR); - IR_ASSERT(ctx->use_lists[ref].count > 0); - ir_add_local_var(ctx, v, insn->type); + /* live.remove(opd) */ + ir_bitset_excl(live, v); + /* PHIs inputs must not be processed */ + ival = ctx->live_intervals[v]; + ival->type = insn->type; + ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0); continue; } -#if 0 - } else if (insn->op == IR_RSTORE - && insn->op2 == ref - 1 /* previous istruction */ - && ctx->vregs[insn->op2] - && (insn-1)->op != IR_RLOAD - && ctx->use_lists[insn->op2].count == 1 - && IR_REGSET_IN(IR_REGSET_UNION(ctx->fixed_regset, IR_REGSET_FIXED), insn->op3)) { - - ir_live_pos use_pos = IR_USE_LIVE_POS_FROM_REF(ref); - - ival = ir_add_live_range(ctx, ctx->vregs[insn->op2], - IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); - ir_add_use(ctx, ival, 2, use_pos, IR_REG_NONE, 0, IR_UNUSED); - ir_bitset_incl(live, ctx->vregs[insn->op2]); - ival->flags = IR_LIVE_INTERVAL_REG_LOAD; - ival->reg = insn->op3; - continue; -#endif } IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)))); @@ -810,60 +772,58 @@ int ir_compute_live_ranges(ir_ctx *ctx) p++; } for (; j <= n; j++, p++) { - if (IR_OPND_KIND(flags, j) == IR_OPND_DATA) { - ir_ref input = *p; - uint8_t use_flags = IR_USE_FLAGS(def_flags, j); - ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; + ir_ref input = *p; + uint8_t use_flags = IR_USE_FLAGS(def_flags, j); + ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; - if (input > 0 && ctx->vregs[input]) { - ir_live_pos use_pos; - ir_ref hint_ref = 0; + if (input > 0 && ctx->vregs[input]) { + ir_live_pos use_pos; + ir_ref hint_ref = 0; - if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) { - use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); - IR_ASSERT(ctx->vregs[ref]); - hint_ref = ref; - if (reg != IR_REG_NONE) { - ir_add_fixed_live_range(ctx, reg, - use_pos, IR_USE_LIVE_POS_FROM_REF(ref)); - } - } else { - if (reg != IR_REG_NONE) { - use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); - ir_add_fixed_live_range(ctx, reg, - use_pos, IR_USE_LIVE_POS_FROM_REF(ref)); - } else if ((def_flags & IR_DEF_REUSES_OP1_REG) && input == insn->op1) { - /* Input is the same as "op1" */ - use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); - } else { - use_pos = IR_USE_LIVE_POS_FROM_REF(ref); - } - } - - uint32_t v = ctx->vregs[input]; - if (!ir_bitset_in(live, v)) { - /* live.add(opd) */ - ir_bitset_incl(live, v); - /* intervals[opd].addRange(b.from, op.id) */ - ival = ir_add_live_range(ctx, v, - IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); - } else { - ival = ctx->live_intervals[v]; - } - ir_add_use(ctx, ival, j, use_pos, reg, use_flags, hint_ref); - } else if (input > 0) { - IR_ASSERT(ctx->rules); - if (ctx->rules[input] & IR_FUSED) { - ir_add_fusion_ranges(ctx, ref, input, bb, live); - } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) { - ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2); + if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + IR_ASSERT(ctx->vregs[ref]); + hint_ref = ref; + if (reg != IR_REG_NONE) { + ir_add_fixed_live_range(ctx, reg, + use_pos, IR_USE_LIVE_POS_FROM_REF(ref)); } } else { if (reg != IR_REG_NONE) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); ir_add_fixed_live_range(ctx, reg, - IR_LOAD_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref)); + use_pos, IR_USE_LIVE_POS_FROM_REF(ref)); + } else if ((def_flags & IR_DEF_REUSES_OP1_REG) && input == insn->op1) { + /* Input is the same as "op1" */ + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } else { + use_pos = IR_USE_LIVE_POS_FROM_REF(ref); } } + + uint32_t v = ctx->vregs[input]; + if (!ir_bitset_in(live, v)) { + /* live.add(opd) */ + ir_bitset_incl(live, v); + /* intervals[opd].addRange(b.from, op.id) */ + ival = ir_add_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); + } else { + ival = ctx->live_intervals[v]; + } + ir_add_use(ctx, ival, j, use_pos, reg, use_flags, hint_ref); + } else if (input > 0) { + IR_ASSERT(ctx->rules); + if (ctx->rules[input] & IR_FUSED) { + ir_add_fusion_ranges(ctx, ref, input, bb, live); + } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) { + ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2); + } + } else { + if (reg != IR_REG_NONE) { + ir_add_fixed_live_range(ctx, reg, + IR_LOAD_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref)); + } } } } @@ -994,7 +954,7 @@ static void ir_compute_live_sets(ir_ctx *ctx, uint32_t *live_outs, ir_list *live for (i = ctx->insns_count - 1; i > 0; i--) { uint32_t v = ctx->vregs[i]; - if (v && ctx->ir_base[i].op != IR_VAR) { + if (v) { uint32_t def_block = ctx->cfg_map[i]; ir_use_list *use_list = &ctx->use_lists[i]; ir_ref *p, n = use_list->count; @@ -1257,8 +1217,11 @@ int ir_compute_live_ranges(ir_ctx *ctx) memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); } + /* Root of the list of IR_VARs */ + ctx->vars = IR_UNUSED; + /* Compute Live Ranges */ - ctx->flags &= ~(IR_LR_HAVE_VARS|IR_LR_HAVE_DESSA_MOVES); + ctx->flags &= ~IR_LR_HAVE_DESSA_MOVES; /* vregs + tmp + fixed + SRATCH + ALL */ ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); @@ -1332,6 +1295,11 @@ int ir_compute_live_ranges(ir_ctx *ctx) int n; if (ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)) { + if (ctx->rules[ref] == (IR_SKIPPED|IR_VAR) && ctx->use_lists[ref].count > 0) { + insn = &ctx->ir_base[ref]; + insn->op3 = ctx->vars; + ctx->vars = ref; + } continue; } @@ -1357,84 +1325,59 @@ int ir_compute_live_ranges(ir_ctx *ctx) insn = &ctx->ir_base[ref]; v = ctx->vregs[ref]; if (v) { - if (EXPECTED(insn->op != IR_VAR)) { - if (insn->op == IR_RLOAD) { - ival = ir_fix_live_range(ctx, v, - IR_START_LIVE_POS_FROM_REF(bb->start), IR_DEF_LIVE_POS_FROM_REF(ref)); - ival->type = insn->type; - /* Fixed RLOADs are handled without live-ranges */ - IR_ASSERT(!IR_REGSET_IN(IR_REGSET_UNION(ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)); - ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), insn->op2, IR_USE_SHOULD_BE_IN_REG, 0); - continue; - } else if (insn->op != IR_PHI) { - ir_live_pos def_pos; - ir_ref hint_ref = 0; - ir_reg reg = constraints.def_reg; + if (insn->op == IR_RLOAD) { + ival = ir_fix_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), IR_DEF_LIVE_POS_FROM_REF(ref)); + ival->type = insn->type; + /* Fixed RLOADs are handled without live-ranges */ + IR_ASSERT(!IR_REGSET_IN(IR_REGSET_UNION(ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)); + ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), insn->op2, IR_USE_SHOULD_BE_IN_REG, 0); + continue; + } else if (insn->op != IR_PHI) { + ir_live_pos def_pos; + ir_ref hint_ref = 0; + ir_reg reg = constraints.def_reg; - if (reg != IR_REG_NONE) { - ir_live_pos start_pos; + if (reg != IR_REG_NONE) { + ir_live_pos start_pos; - def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref); - if (insn->op == IR_PARAM) { - /* parameter register must be kept before it's copied */ - start_pos = IR_START_LIVE_POS_FROM_REF(bb->start); - } else { - start_pos = IR_DEF_LIVE_POS_FROM_REF(ref); - } - ir_add_fixed_live_range(ctx, reg, start_pos, def_pos); - } else if (def_flags & IR_DEF_REUSES_OP1_REG) { - if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) { - hint_ref = insn->op1; - } - def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); - } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) { - def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref); + if (insn->op == IR_PARAM) { + /* parameter register must be kept before it's copied */ + start_pos = IR_START_LIVE_POS_FROM_REF(bb->start); } else { - if (insn->op == IR_PARAM) { - /* We may reuse parameter stack slot for spilling */ - ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM; - } else if (insn->op == IR_VLOAD) { - /* Load may be fused into the usage instruction */ - ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD; - } - def_pos = IR_DEF_LIVE_POS_FROM_REF(ref); + start_pos = IR_DEF_LIVE_POS_FROM_REF(ref); } - /* intervals[opd].setFrom(op.id) */ - ival = ir_fix_live_range(ctx, v, - IR_START_LIVE_POS_FROM_REF(bb->start), def_pos); - ival->type = insn->type; - ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref); + ir_add_fixed_live_range(ctx, reg, start_pos, def_pos); + } else if (def_flags & IR_DEF_REUSES_OP1_REG) { + if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) { + hint_ref = insn->op1; + } + def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) { + def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); } else { - /* PHIs inputs must not be processed */ - ival = ctx->live_intervals[v]; - ival->type = insn->type; - ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0); - continue; + if (insn->op == IR_PARAM) { + /* We may reuse parameter stack slot for spilling */ + ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM; + } else if (insn->op == IR_VLOAD) { + /* Load may be fused into the usage instruction */ + ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD; + } + def_pos = IR_DEF_LIVE_POS_FROM_REF(ref); } + /* intervals[opd].setFrom(op.id) */ + ival = ir_fix_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), def_pos); + ival->type = insn->type; + ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref); } else { - IR_ASSERT(insn->op == IR_VAR); - IR_ASSERT(ctx->use_lists[ref].count > 0); - ir_add_local_var(ctx, v, insn->type); + /* PHIs inputs must not be processed */ + ival = ctx->live_intervals[v]; + ival->type = insn->type; + ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0); continue; } -#if 0 - } else if (insn->op == IR_RSTORE - && insn->op2 == ref - 1 /* previous istruction */ - && ctx->vregs[insn->op2] - && (insn-1)->op != IR_RLOAD - && ctx->use_lists[insn->op2].count == 1 - && IR_REGSET_IN(IR_REGSET_UNION(ctx->fixed_regset, IR_REGSET_FIXED), insn->op3)) { - - ir_live_pos use_pos = IR_USE_LIVE_POS_FROM_REF(ref); - - ival = ir_add_live_range(ctx, ctx->vregs[insn->op2], - IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); - ir_add_use(ctx, ival, 2, use_pos, IR_REG_NONE, 0, IR_UNUSED); - ir_bitset_incl(live, ctx->vregs[insn->op2]); - ival->flags = IR_LIVE_INTERVAL_REG_LOAD; - ival->reg = insn->op3; - continue; -#endif } IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)))); @@ -1447,61 +1390,59 @@ int ir_compute_live_ranges(ir_ctx *ctx) p++; } for (; j <= n; j++, p++) { - if (IR_OPND_KIND(flags, j) == IR_OPND_DATA) { - ir_ref input = *p; - ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; + ir_ref input = *p; + ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; - if (input > 0 && ctx->vregs[input]) { - ir_live_pos use_pos; - ir_ref hint_ref = 0; + if (input > 0 && ctx->vregs[input]) { + ir_live_pos use_pos; + ir_ref hint_ref = 0; - if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) { - use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); - IR_ASSERT(ctx->vregs[ref]); - hint_ref = ref; - if (reg != IR_REG_NONE) { - ir_add_fixed_live_range(ctx, reg, - use_pos, IR_USE_LIVE_POS_FROM_REF(ref)); - } - } else { - if (reg != IR_REG_NONE) { - use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); - ir_add_fixed_live_range(ctx, reg, - use_pos, IR_USE_LIVE_POS_FROM_REF(ref)); - } else if ((def_flags & IR_DEF_REUSES_OP1_REG) && input == insn->op1) { - /* Input is the same as "op1" */ - use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); - } else { - use_pos = IR_USE_LIVE_POS_FROM_REF(ref); - } - } - - uint32_t v = ctx->vregs[input]; - if (!IS_LIVE_IN_BLOCK(v, b)) { - /* live.add(opd) */ - SET_LIVE_IN_BLOCK(v, b); - /* intervals[opd].addRange(b.from, op.id) */ - ival = ir_add_live_range(ctx, v, - IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); - } else { - ival = ctx->live_intervals[v]; - } - - uint8_t use_flags = IR_USE_FLAGS(def_flags, j); - ir_add_use(ctx, ival, j, use_pos, reg, use_flags, hint_ref); - } else if (input > 0) { - IR_ASSERT(ctx->rules); - if (ctx->rules[input] & IR_FUSED) { - ir_add_fusion_ranges(ctx, ref, input, bb, live_in_block, b); - } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) { - ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2); + if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + IR_ASSERT(ctx->vregs[ref]); + hint_ref = ref; + if (reg != IR_REG_NONE) { + ir_add_fixed_live_range(ctx, reg, + use_pos, IR_USE_LIVE_POS_FROM_REF(ref)); } } else { if (reg != IR_REG_NONE) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); ir_add_fixed_live_range(ctx, reg, - IR_LOAD_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref)); + use_pos, IR_USE_LIVE_POS_FROM_REF(ref)); + } else if ((def_flags & IR_DEF_REUSES_OP1_REG) && input == insn->op1) { + /* Input is the same as "op1" */ + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } else { + use_pos = IR_USE_LIVE_POS_FROM_REF(ref); } } + + uint32_t v = ctx->vregs[input]; + if (!IS_LIVE_IN_BLOCK(v, b)) { + /* live.add(opd) */ + SET_LIVE_IN_BLOCK(v, b); + /* intervals[opd].addRange(b.from, op.id) */ + ival = ir_add_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); + } else { + ival = ctx->live_intervals[v]; + } + + uint8_t use_flags = IR_USE_FLAGS(def_flags, j); + ir_add_use(ctx, ival, j, use_pos, reg, use_flags, hint_ref); + } else if (input > 0) { + IR_ASSERT(ctx->rules); + if (ctx->rules[input] & IR_FUSED) { + ir_add_fusion_ranges(ctx, ref, input, bb, live_in_block, b); + } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) { + ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2); + } + } else { + if (reg != IR_REG_NONE) { + ir_add_fixed_live_range(ctx, reg, + IR_LOAD_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref)); + } } } } @@ -3172,8 +3113,6 @@ static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival, ir_ return 1; } else if (ival->flags & IR_LIVE_INTERVAL_MEM_LOAD) { - ir_live_interval *var_ival; - insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)]; IR_ASSERT(insn->op == IR_VLOAD); use_pos = use_pos->next; @@ -3189,11 +3128,7 @@ static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival, ir_ } IR_ASSERT(ctx->ir_base[insn->op2].op == IR_VAR); - var_ival = ctx->live_intervals[ctx->vregs[insn->op2]]; - if (var_ival->stack_spill_pos == -1) { - var_ival->stack_spill_pos = ir_allocate_spill_slot(ctx, var_ival->type, data); - } - ival->stack_spill_pos = var_ival->stack_spill_pos; + ival->stack_spill_pos = ctx->ir_base[insn->op2].op3; return 1; } @@ -3249,6 +3184,7 @@ static int ir_linear_scan(ir_ctx *ctx) ir_live_pos position; ir_reg reg; ir_reg_alloc_data data; + ir_ref vars = ctx->vars; if (!ctx->live_intervals) { return 0; @@ -3271,25 +3207,19 @@ static int ir_linear_scan(ir_ctx *ctx) data.unused_slot_2 = 0; data.unused_slot_1 = 0; - if (ctx->flags & IR_LR_HAVE_VARS) { - for (j = 1; j <= ctx->vregs_count; j++) { - ival = ctx->live_intervals[j]; - if (ival) { - if (ival->flags & IR_LIVE_INTERVAL_VAR) { - if (ival->stack_spill_pos == -1) { - ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data); - } - } - } - } + while (vars) { + ir_insn *insn = &ctx->ir_base[vars]; + + IR_ASSERT(insn->op == IR_VAR); + vars = insn->op3; /* list next */ + + insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data); } for (j = ctx->vregs_count; j != 0; j--) { ival = ctx->live_intervals[j]; if (ival) { - if (ival->flags & IR_LIVE_INTERVAL_VAR) { - /* pass */ - } else if (!(ival->flags & (IR_LIVE_INTERVAL_MEM_PARAM|IR_LIVE_INTERVAL_MEM_LOAD)) + if (!(ival->flags & (IR_LIVE_INTERVAL_MEM_PARAM|IR_LIVE_INTERVAL_MEM_LOAD)) || !ir_ival_spill_for_fuse_load(ctx, ival, &data)) { ir_add_to_unhandled(&unhandled, ival); } @@ -3502,6 +3432,8 @@ static int ir_linear_scan(ir_ctx *ctx) } } + ctx->stack_frame_size = data.stack_frame_size; + #ifdef IR_DEBUG if (ctx->flags & IR_DEBUG_RA) { fprintf(stderr, "---- Finish LSRA\n"); diff --git a/ir_x86.dasc b/ir_x86.dasc index 64bda56..8f93bef 100644 --- a/ir_x86.dasc +++ b/ir_x86.dasc @@ -1366,12 +1366,7 @@ binop_fp: } return insn->op; case IR_VAR: - if (ctx->use_lists[ref].count > 0) { - return IR_VAR; - } else { - return IR_SKIPPED | IR_VAR; - } - break; + return IR_SKIPPED | IR_VAR; case IR_ALLOCA: /* alloca() may be use only in functions */ if (ctx->flags & IR_FUNCTION) { @@ -1880,6 +1875,16 @@ static int32_t ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg) return IR_SPILL_POS_TO_OFFSET(offset); } +static int32_t ir_var_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg) +{ + ir_backend_data *data = ctx->data; + ir_insn *var_insn = &ctx->ir_base[ref]; + + IR_ASSERT(var_insn->op == IR_VAR); + *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + return IR_SPILL_POS_TO_OFFSET(var_insn->op3); +} + static void ir_emit_load_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val) { ir_backend_data *data = ctx->data; @@ -2542,7 +2547,7 @@ static void ir_emit_mem_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_emit_load(ctx, IR_ADDR, reg, insn->op2); } } else if (insn->op == IR_VSTORE) { - offset = ir_ref_spill_slot(ctx, insn->op2, ®); + offset = ir_var_spill_slot(ctx, insn->op2, ®); } else { IR_ASSERT(0); return; @@ -2777,7 +2782,7 @@ static void ir_emit_mem_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_emit_load(ctx, IR_ADDR, reg, insn->op2); } } else if (insn->op == IR_VSTORE) { - offset = ir_ref_spill_slot(ctx, insn->op2, ®); + offset = ir_var_spill_slot(ctx, insn->op2, ®); } else { IR_ASSERT(0); return; @@ -2884,7 +2889,7 @@ static void ir_emit_mem_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_emit_load(ctx, IR_ADDR, reg, insn->op2); } } else if (insn->op == IR_VSTORE) { - offset = ir_ref_spill_slot(ctx, insn->op2, ®); + offset = ir_var_spill_slot(ctx, insn->op2, ®); } else { IR_ASSERT(0); return; @@ -2998,7 +3003,7 @@ static void ir_emit_mem_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_emit_load(ctx, IR_ADDR, reg, insn->op2); } } else if (insn->op == IR_VSTORE) { - offset = ir_ref_spill_slot(ctx, insn->op2, ®); + offset = ir_var_spill_slot(ctx, insn->op2, ®); } else { IR_ASSERT(0); return; @@ -3101,7 +3106,7 @@ static void ir_emit_mem_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_emit_load(ctx, IR_ADDR, reg, insn->op2); } } else if (insn->op == IR_VSTORE) { - offset = ir_ref_spill_slot(ctx, insn->op2, ®); + offset = ir_var_spill_slot(ctx, insn->op2, ®); } else { IR_ASSERT(0); return; @@ -5156,7 +5161,7 @@ static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_reg fp; IR_ASSERT(def_reg != IR_REG_NONE); - offset = ir_ref_spill_slot(ctx, insn->op1, &fp); + offset = ir_var_spill_slot(ctx, insn->op1, &fp); | lea Ra(def_reg), aword [Ra(fp)+offset] if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { ir_emit_store(ctx, type, def, def_reg); @@ -5165,15 +5170,26 @@ static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) { + ir_backend_data *data = ctx->data; + ir_insn *var_insn = &ctx->ir_base[insn->op2]; ir_ref type = insn->type; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg fp; + int32_t offset; - if (def_reg == IR_REG_NONE && ir_is_same_mem(ctx, insn->op2, def)) { + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); + if (def_reg == IR_REG_NONE && ir_is_same_mem_var(ctx, def, var_insn->op3)) { return; // fake load } IR_ASSERT(def_reg != IR_REG_NONE); - ir_emit_load(ctx, type, def_reg, insn->op2); + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, def_reg, fp, offset); + } else { + ir_emit_load_mem_fp(ctx, type, def_reg, fp, offset); + } if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { ir_emit_store(ctx, type, def, def_reg); } @@ -5181,33 +5197,48 @@ static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) static void ir_emit_vstore_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) { + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *var_insn = &ctx->ir_base[insn->op2]; ir_insn *val_insn = &ctx->ir_base[insn->op3]; ir_ref type = val_insn->type; ir_reg op3_reg = ctx->regs[ref][3]; + ir_reg fp; + int32_t offset; + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); if ((op3_reg == IR_REG_NONE || (op3_reg & IR_REG_SPILL_LOAD)) - && !IR_IS_CONST_REF(insn->op3) && ir_is_same_mem(ctx, insn->op3, insn->op2)) { + && !IR_IS_CONST_REF(insn->op3) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { return; // fake store } if (IR_IS_CONST_REF(insn->op3) && IR_IS_32BIT(type, val_insn->val)) { - ir_emit_store_imm(ctx, type, insn->op2, val_insn->val.i32); + | ASM_MEM_IMM_OP mov, type, [Ra(fp)+offset], val_insn->val.i32 } else { IR_ASSERT(op3_reg != IR_REG_NONE); if ((op3_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op3)) { op3_reg &= ~IR_REG_SPILL_LOAD; ir_emit_load(ctx, type, op3_reg, insn->op3); } - ir_emit_store(ctx, type, insn->op2, op3_reg); + ir_emit_store_mem_int(ctx, type, fp, offset, op3_reg); } } static void ir_emit_vstore_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) { + ir_backend_data *data = ctx->data; + ir_insn *var_insn = &ctx->ir_base[insn->op2]; ir_ref type = ctx->ir_base[insn->op3].type; ir_reg op3_reg = ctx->regs[ref][3]; + ir_reg fp; + int32_t offset; + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); if ((op3_reg == IR_REG_NONE || (op3_reg & IR_REG_SPILL_LOAD)) - && !IR_IS_CONST_REF(insn->op3) && ir_is_same_mem(ctx, insn->op3, insn->op2)) { + && !IR_IS_CONST_REF(insn->op3) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { return; // fake store } IR_ASSERT(op3_reg != IR_REG_NONE); @@ -5215,7 +5246,7 @@ static void ir_emit_vstore_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) op3_reg &= ~IR_REG_SPILL_LOAD; ir_emit_load(ctx, type, op3_reg, insn->op3); } - ir_emit_store(ctx, type, insn->op2, op3_reg); + ir_emit_store_mem_fp(ctx, type, fp, offset, op3_reg); } static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) @@ -7579,46 +7610,48 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) } else { ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data); } - if (insn->op == IR_VAR) { - ir_use_list *use_list = &ctx->use_lists[i]; - ir_ref i, n, *p, use; - ir_insn *use_insn; - int32_t stack_spill_pos = ival->stack_spill_pos; - - n = use_list->count; - for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { - use = *p; - use_insn = &ctx->ir_base[use]; - if (use_insn->op == IR_VLOAD) { - if (ctx->vregs[use] - && !ctx->live_intervals[ctx->vregs[use]]) { - ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); - memset(ival, 0, sizeof(ir_live_interval)); - ctx->live_intervals[ctx->vregs[use]] = ival; - ival->type = insn->type; - ival->reg = IR_REG_NONE; - ival->vreg = ctx->vregs[use]; - ival->stack_spill_pos = stack_spill_pos; - } - } else if (use_insn->op == IR_VSTORE) { - if (!IR_IS_CONST_REF(use_insn->op3) - && ctx->vregs[use_insn->op3] - && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { - ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); - memset(ival, 0, sizeof(ir_live_interval)); - ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival; - ival->type = insn->type; - ival->reg = IR_REG_NONE; - ival->vreg = ctx->vregs[insn->op3]; - ival->stack_spill_pos = stack_spill_pos; - } - } - } - } } else if (insn->op == IR_PARAM) { IR_ASSERT(0 && "unexpected PARAM"); return; } + } else if (insn->op == IR_VAR) { + ir_use_list *use_list = &ctx->use_lists[i]; + ir_ref n = use_list->count; + + if (n > 0) { + int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data); + ir_ref i, *p, use; + ir_insn *use_insn; + + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_VLOAD) { + if (ctx->vregs[use] + && !ctx->live_intervals[ctx->vregs[use]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[use]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[use]; + ival->stack_spill_pos = stack_spill_pos; + } + } else if (use_insn->op == IR_VSTORE) { + if (!IR_IS_CONST_REF(use_insn->op3) + && ctx->vregs[use_insn->op3] + && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[insn->op3]; + ival->stack_spill_pos = stack_spill_pos; + } + } + } + } } insn_flags = ir_op_flags[insn->op]; @@ -7754,11 +7787,6 @@ static void ir_calc_stack_frame_size(ir_ctx *ctx, ir_backend_data *data) for (i = 1, p = ctx->live_intervals + i; i <= ctx->vregs_count; i++, p++) { ival = *p; if (ival) { - if (ival->stack_spill_pos != -1 && !(ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) { - if (ival->stack_spill_pos + ir_type_size[ival->type] > data->ra_data.stack_frame_size) { - data->ra_data.stack_frame_size = ival->stack_spill_pos + ir_type_size[ival->type]; - } - } if (ival->reg != IR_REG_NONE) { if (!IR_REGSET_IN(data->used_preserved_regs, ival->reg) && !IR_REGSET_IN(fixed_regset, ival->reg) @@ -7846,8 +7874,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) void *entry; size_t size; - ctx->data = &data; - data.ra_data.stack_frame_size = 0; + data.ra_data.stack_frame_size = (!ctx->live_intervals) ? 0 : ctx->stack_frame_size; data.ra_data.unused_slot_4 = 0; data.ra_data.unused_slot_2 = 0; data.ra_data.unused_slot_1 = 0; @@ -7864,6 +7891,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) data.float_neg_const = 0; data.double_abs_const = 0; data.float_abs_const = 0; + ctx->data = &data; if (!ctx->live_intervals) { ir_allocate_unique_spill_slots(ctx);