#define _GNU_SOURCE #include "ir.h" #include "ir_private.h" #include #include "ir_x86.h" #ifdef IR_DEBUG uint32_t debug_regset = 0xffffffff; /* all 32 regisers */ #endif /* RA - Register Allocation, Liveness, Coalescing and SSA Resolution */ int ir_assign_virtual_registers(ir_ctx *ctx) { uint32_t *vregs; uint32_t vregs_count = 0; int b, i, n; ir_block *bb; ir_insn *insn; uint32_t flags; /* Assign unique virtual register to each data node */ if (!ctx->prev_insn_len) { ctx->prev_insn_len = ir_mem_malloc(ctx->insns_count * sizeof(uint32_t)); } vregs = ir_mem_calloc(ctx->insns_count, sizeof(ir_ref)); for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { n = b; /* The first insn of BB keeps BB number in prev_insn_len[] */ for (i = bb->start, insn = ctx->ir_base + i; i <= bb->end;) { ctx->prev_insn_len[i] = n; /* The first insn of BB keeps BB number in prev_insn_len[] */ flags = ir_op_flags[insn->op]; if ((flags & IR_OP_FLAG_DATA) || ((flags & IR_OP_FLAG_MEM) && insn->type != IR_VOID)) { if ((insn->op == IR_PARAM || insn->op == IR_VAR) && ctx->use_lists[i].count == 0) { /* pass */ } else if (insn->op == IR_VAR && ctx->use_lists[i].count > 0) { vregs[i] = ++vregs_count; /* for spill slot */ } else if (!ctx->rules || ir_needs_vreg(ctx, i)) { vregs[i] = ++vregs_count; } } n = ir_operands_count(ctx, insn); n = 1 + (n >> 2); // support for multi-word instructions like MERGE and PHI i += n; insn += n; } } ctx->vregs_count = vregs_count; ctx->vregs = vregs; return 1; } /* Lifetime intervals construction * * See "Linear Scan Register Allocation on SSA Form", Christian Wimmer and * Michael Franz, CGO'10 (2010), Figure 4. */ static void ir_add_local_var(ir_ctx *ctx, int v, uint8_t type) { ir_live_interval *ival = ctx->live_intervals[v]; IR_ASSERT(!ival); ival = ir_mem_malloc(sizeof(ir_live_interval)); IR_ASSERT(type != IR_VOID); ival->type = type; ival->reg = IR_REG_NONE; ival->flags = 0; ival->stack_spill_pos = 0; // not allocated ival->range.start = 0; ival->range.end = ctx->insns_count; ival->range.next = NULL; ival->use_pos = NULL; ival->top = ival; ival->next = NULL; ctx->live_intervals[v] = ival; } static void ir_add_live_range(ir_ctx *ctx, int v, uint8_t type, ir_live_pos start, ir_live_pos end) { ir_live_interval *ival = ctx->live_intervals[v]; ir_live_range *p, *q, *next, *prev; if (!ival) { ival = ir_mem_malloc(sizeof(ir_live_interval)); IR_ASSERT(type != IR_VOID); ival->type = type; ival->reg = IR_REG_NONE; ival->flags = 0; ival->stack_spill_pos = 0; // not allocated ival->range.start = start; ival->range.end = end; ival->range.next = NULL; ival->use_pos = NULL; ival->top = ival; ival->next = NULL; ctx->live_intervals[v] = ival; return; } IR_ASSERT(type == IR_VOID || type == ival->type); p = &ival->range; prev = NULL; while (p && end >= p->start) { if (p->end >= start) { if (start < p->start) { p->start = start; } if (end > p->end) { p->end = end; /* merge with next */ next = p->next; while (next && p->end >= next->start) { if (next->end > p->end) { p->end = next->end; } p->next = next->next; /* list of deleted structures is keapt at ctx->unused_live_ranges for reuse */ next->next = ctx->unused_live_ranges; ctx->unused_live_ranges = next; next = p->next; } } return; } prev = p; p = prev->next; } if (ctx->unused_live_ranges) { /* reuse */ q = ctx->unused_live_ranges; ctx->unused_live_ranges = q->next; } else { q = ir_mem_malloc(sizeof(ir_live_range)); } if (prev) { prev->next = q; } else { q->start = ival->range.start; q->end = ival->range.end; q->next = ival->range.next; p = q; q = &ival->range; } q->start = start; q->end = end; q->next = p; } static void ir_add_fixed_live_range(ir_ctx *ctx, ir_reg reg, ir_live_pos start, ir_live_pos end) { int v = ctx->vregs_count + 1 + reg; ir_live_interval *ival = ctx->live_intervals[v]; if (!ival) { ival = ir_mem_malloc(sizeof(ir_live_interval)); ival->type = IR_VOID; ival->reg = reg; ival->flags = 0; ival->stack_spill_pos = 0; // not allocated ival->range.start = start; ival->range.end = end; ival->range.next = NULL; ival->use_pos = NULL; ival->top = ival; ival->next = NULL; ctx->live_intervals[v] = ival; return; } ir_add_live_range(ctx, v, IR_VOID, start, end); } static void ir_add_tmp(ir_ctx *ctx, ir_ref ref, uint8_t num, ir_tmp_reg tmp_reg) { ir_live_interval *ival = ir_mem_malloc(sizeof(ir_live_interval)); ival->type = tmp_reg.type; ival->reg = IR_REG_NONE; ival->flags = IR_LIVE_INTERVAL_TEMP | num; ival->stack_spill_pos = 0; // not allocated ival->range.start = IR_START_LIVE_POS_FROM_REF(ref) + tmp_reg.start; ival->range.end = IR_START_LIVE_POS_FROM_REF(ref) + tmp_reg.end; ival->range.next = NULL; ival->use_pos = NULL; if (!ctx->live_intervals[0]) { ival->top = ival; ival->next = NULL; ctx->live_intervals[0] = ival; } else if (ival->range.start >= ctx->live_intervals[0]->top->range.start) { ir_live_interval *prev = ctx->live_intervals[0]->top; while (prev->next && ival->range.start >= prev->next->range.start) { prev = prev->next; } ival->top = prev->top; ival->next = prev->next; prev->next = ival; } else { ir_live_interval *next = ctx->live_intervals[0]; ival->top = ival; ival->next = next; ctx->live_intervals[0] = ival; while (next) { next->top = ival; next = next->next; } } return; } static void ir_fix_live_range(ir_ctx *ctx, int v, ir_live_pos old_start, ir_live_pos new_start) { ir_live_range *p = &ctx->live_intervals[v]->range; while (p && p->start < old_start) { p = p->next; } IR_ASSERT(p && p->start == old_start); p->start = new_start; } static void ir_add_use_pos(ir_ctx *ctx, int v, ir_use_pos *use_pos) { ir_live_interval *ival = ctx->live_intervals[v]; ir_use_pos *prev = NULL; ir_use_pos *p = ival->use_pos; while (p && (p->pos < use_pos->pos || (p->pos == use_pos->pos && (use_pos->op_num == 0 || p->op_num < use_pos->op_num)))) { prev = p; p = p->next; } if (prev) { use_pos->next = prev->next; prev->next = use_pos; } else { use_pos->next = ival->use_pos; ival->use_pos = use_pos; } } static void ir_add_use(ir_ctx *ctx, int v, int op_num, ir_live_pos pos, ir_reg hint, ir_ref hint_ref) { ir_use_pos *use_pos; use_pos = ir_mem_malloc(sizeof(ir_use_pos)); use_pos->op_num = op_num; use_pos->hint = hint; use_pos->hint_ref = hint_ref; use_pos->pos = pos; use_pos->flags = ctx->rules ? ir_get_use_flags(ctx, IR_LIVE_POS_TO_REF(pos), op_num) : 0; ir_add_use_pos(ctx, v, use_pos); } int ir_compute_live_ranges(ir_ctx *ctx) { int i, j, k, n; int b, succ; uint32_t flags, len; ir_insn *insn; ir_block *bb, *succ_bb; ir_bitset visited, live; ir_bitset loops = NULL; ir_bitset queue = NULL; ir_reg reg; if (!(ctx->flags & IR_LINEAR) || !ctx->vregs) { return 0; } /* Compute Live Ranges */ visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1); len = ir_bitset_len(ctx->vregs_count + 1); live = ir_bitset_malloc((ctx->cfg_blocks_count + 1) * len * 8 * sizeof(*live)); ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM, sizeof(ir_live_interval*)); for (b = ctx->cfg_blocks_count; b > 0; b--) { bb = &ctx->cfg_blocks[b]; /* for each successor of b */ ir_bitset_incl(visited, b); ir_bitset_clear(live, len); for (i = 0; i < bb->successors_count; i++) { succ = ctx->cfg_edges[bb->successors + i]; /* blocks must be ordered where all dominators of a block are before this block */ IR_ASSERT(ir_bitset_in(visited, succ) || bb->loop_header == succ); /* live = union of successors.liveIn */ ir_bitset_union(live, live + (len * succ), len); /* for each phi function phi of successor */ succ_bb = &ctx->cfg_blocks[succ]; if (succ_bb->predecessors_count > 1) { ir_use_list *use_list = &ctx->use_lists[succ_bb->start]; k = 0; for (j = 0; j < succ_bb->predecessors_count; j++) { if (ctx->cfg_edges[succ_bb->predecessors + j] == b) { k = j + 2; break; } } IR_ASSERT(k != 0); for (j = 0; j < use_list->count; j++) { insn = &ctx->ir_base[ctx->use_edges[use_list->refs + j]]; if (insn->op == IR_PHI) { if (insn->ops[k] > 0) { /* live.add(phi.inputOf(b)) */ IR_ASSERT(ctx->vregs[insn->ops[k]]); ir_bitset_incl(live, ctx->vregs[insn->ops[k]]); // TODO: ir_add_live_range() is used just to set ival->type /* intervals[phi.inputOf(b)].addRange(b.from, b.to) */ ir_add_live_range(ctx, ctx->vregs[insn->ops[k]], insn->type, IR_START_LIVE_POS_FROM_REF(bb->start), IR_END_LIVE_POS_FROM_REF(bb->end)); } } } } } /* for each opd in live */ IR_BITSET_FOREACH(live, len, i) { /* intervals[opd].addRange(b.from, b.to) */ ir_add_live_range(ctx, i, IR_VOID, IR_START_LIVE_POS_FROM_REF(bb->start), IR_END_LIVE_POS_FROM_REF(bb->end)); } IR_BITSET_FOREACH_END(); /* for each operation op of b in reverse order */ for (i = bb->end; i > bb->start; i -= ctx->prev_insn_len[i]) { insn = &ctx->ir_base[i]; flags = ir_op_flags[insn->op]; if (ctx->rules) { ir_tmp_reg tmp_regs[4]; int n = ir_get_temporary_regs(ctx, i, tmp_regs); while (n > 0) { n--; ir_add_tmp(ctx, i, tmp_regs[n].num, tmp_regs[n]); } } if ((flags & IR_OP_FLAG_DATA) || ((flags & IR_OP_FLAG_MEM) && insn->type != IR_VOID)) { if (ctx->vregs[i]) { if (ir_bitset_in(live, ctx->vregs[i])) { if (insn->op != IR_PHI) { ir_live_pos def_pos; ir_ref hint_ref = 0; reg = ctx->rules ? ir_uses_fixed_reg(ctx, i, 0) : IR_REG_NONE; if (reg != IR_REG_NONE) { def_pos = IR_SAVE_LIVE_POS_FROM_REF(i); if (insn->op == IR_PARAM) { /* parameter register must be kept before it's copied */ ir_add_fixed_live_range(ctx, reg, IR_START_LIVE_POS_FROM_REF(bb->start), def_pos); } else { ir_add_fixed_live_range(ctx, reg, IR_DEF_LIVE_POS_FROM_REF(i), def_pos); } } else if (ctx->rules && ir_result_reuses_op1_reg(ctx, i)) { /* We add two uses to emulate move from op1 to res */ ir_add_use(ctx, ctx->vregs[i], 0, IR_DEF_LIVE_POS_FROM_REF(i), reg, hint_ref); def_pos = IR_LOAD_LIVE_POS_FROM_REF(i); hint_ref = IR_IS_CONST_REF(insn->op1) ? 0 : insn->op1; } else { def_pos = IR_DEF_LIVE_POS_FROM_REF(i); } /* intervals[opd].setFrom(op.id) */ ir_fix_live_range(ctx, ctx->vregs[i], IR_START_LIVE_POS_FROM_REF(bb->start), def_pos); ir_add_use(ctx, ctx->vregs[i], 0, def_pos, reg, hint_ref); } else { ir_add_use(ctx, ctx->vregs[i], 0, IR_DEF_LIVE_POS_FROM_REF(i), IR_REG_NONE, 0); } /* live.remove(opd) */ ir_bitset_excl(live, ctx->vregs[i]); } else if (insn->op == IR_VAR) { if (ctx->use_lists[i].count > 0) { ir_add_local_var(ctx, ctx->vregs[i], insn->type); } } } } if (insn->op != IR_PHI) { n = ir_input_edges_count(ctx, insn); for (j = 1; j <= n; j++) { if (IR_OPND_KIND(flags, j) == IR_OPND_DATA) { ir_ref input = insn->ops[j]; if (input > 0 && ctx->vregs[input]) { ir_live_pos use_pos; if (ctx->rules && j == 1 && ir_result_reuses_op1_reg(ctx, i)) { use_pos = IR_LOAD_LIVE_POS_FROM_REF(i); reg = ctx->rules ? ir_uses_fixed_reg(ctx, i, j) : IR_REG_NONE; if (reg != IR_REG_NONE) { ir_add_fixed_live_range(ctx, reg, use_pos, IR_USE_LIVE_POS_FROM_REF(i)); } } else { reg = ctx->rules ? ir_uses_fixed_reg(ctx, i, j) : IR_REG_NONE; if (reg != IR_REG_NONE) { use_pos = IR_LOAD_LIVE_POS_FROM_REF(i); ir_add_fixed_live_range(ctx, reg, use_pos, IR_USE_LIVE_POS_FROM_REF(i)); } else if (j > 1 && input == insn->op1 && ctx->rules && ir_result_reuses_op1_reg(ctx, i)) { /* Input is the same as "op1" */ use_pos = IR_LOAD_LIVE_POS_FROM_REF(i); } else { use_pos = IR_USE_LIVE_POS_FROM_REF(i); } } /* intervals[opd].addRange(b.from, op.id) */ ir_add_live_range(ctx, ctx->vregs[input], ctx->ir_base[input].type, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); ir_add_use(ctx, ctx->vregs[input], j, use_pos, reg, 0); /* live.add(opd) */ ir_bitset_incl(live, ctx->vregs[input]); } } } /* CPU specific constraints */ if (ctx->rules) { ir_regset regset = ir_get_scratch_regset(ctx, i); if (regset != IR_REGSET_EMPTY) { IR_REGSET_FOREACH(regset, reg) { ir_add_fixed_live_range(ctx, reg, IR_LOAD_LIVE_POS_FROM_REF(i), // TODO: LOAD instead of USE disables register usage for input // this is necessary for DIV and MOD, but not for MUL IR_DEF_LIVE_POS_FROM_REF(i)); } IR_REGSET_FOREACH_END(); } } } } /* if b is loop header */ if ((bb->flags & IR_BB_LOOP_HEADER) && !ir_bitset_empty(live, len)) { /* variables live at loop header are alive at the whole loop body */ uint32_t bb_set_len = ir_bitset_len(ctx->cfg_blocks_count + 1); int child; ir_block *child_bb; if (!loops) { loops = ir_bitset_malloc(ctx->cfg_blocks_count + 1); queue = ir_bitset_malloc(ctx->cfg_blocks_count + 1); } else { ir_bitset_clear(loops, bb_set_len); ir_bitset_clear(queue, bb_set_len); } ir_bitset_incl(loops, b); ir_bitset_incl(queue, b); do { child = ir_bitset_pop_first(queue, bb_set_len); child_bb = &ctx->cfg_blocks[child]; IR_BITSET_FOREACH(live, len, i) { ir_add_live_range(ctx, i, IR_VOID, IR_START_LIVE_POS_FROM_REF(child_bb->start), IR_END_LIVE_POS_FROM_REF(child_bb->end)); } IR_BITSET_FOREACH_END(); child = child_bb->dom_child; while (child) { child_bb = &ctx->cfg_blocks[child]; if (child_bb->loop_header && ir_bitset_in(loops, child_bb->loop_header)) { ir_bitset_incl(queue, child); if (child_bb->flags & IR_BB_LOOP_HEADER) { ir_bitset_incl(loops, child); } } child = child_bb->dom_next_child; } } while (!ir_bitset_empty(queue, bb_set_len)); } /* b.liveIn = live */ ir_bitset_copy(live + (len * b), live, len); } if (loops) { ir_mem_free(loops); ir_mem_free(queue); } ir_mem_free(live); ir_mem_free(visited); return 1; } void ir_free_live_ranges(ir_live_range *live_range) { ir_live_range *p; while (live_range) { p = live_range; live_range = live_range->next; ir_mem_free(p); } } void ir_free_live_intervals(ir_live_interval **live_intervals, int count) { uint32_t i; ir_live_interval *ival; for (i = 1; i <= count; i++) { ival = live_intervals[i]; if (ival) { if (ival->range.next) { ir_free_live_ranges(ival->range.next); } ir_mem_free(ival); } } ir_mem_free(live_intervals); } /* Live Ranges coalescing */ static ir_live_pos ir_vregs_overlap(ir_ctx *ctx, uint32_t r1, uint32_t r2) { ir_live_range *lrg1 = &ctx->live_intervals[r1]->range; ir_live_range *lrg2 = &ctx->live_intervals[r2]->range; while (1) { if (lrg1->start < lrg2->end) { if (lrg2->start < lrg1->end) { return IR_MAX(lrg1->start, lrg2->start); } else { lrg1 = lrg1->next; if (!lrg1) { return 0; } } } else { lrg2 = lrg2->next; if (!lrg2) { return 0; } } } } static void ir_vregs_join(ir_ctx *ctx, uint32_t r1, uint32_t r2) { ir_live_interval *ival = ctx->live_intervals[r2]; ir_live_range *live_range = &ival->range; ir_live_range *next; ir_use_pos *use_pos; #if 0 fprintf(stderr, "COALESCE %d -> %d\n", r2, r1); #endif ir_add_live_range(ctx, r1, ival->type, live_range->start, live_range->end); live_range = live_range->next; while (live_range) { ir_add_live_range(ctx, r1, ival->type, live_range->start, live_range->end); next = live_range->next; live_range->next = ctx->unused_live_ranges; ctx->unused_live_ranges = live_range; live_range = next; } while (live_range); use_pos = ival->use_pos; while (use_pos) { ir_use_pos *next_use_pos = use_pos->next; if (ctx->vregs[use_pos->hint_ref] == r1) { use_pos->hint_ref = 0; } ir_add_use_pos(ctx, r1, use_pos); use_pos = next_use_pos; } ir_mem_free(ival); ctx->live_intervals[r2] = NULL; ctx->live_intervals[r1]->flags |= IR_LIVE_INTERVAL_COALESCED; } static bool ir_try_coalesce(ir_ctx *ctx, ir_ref from, ir_ref to) { ir_ref i; int v1 = ctx->vregs[from]; int v2 = ctx->vregs[to]; if (v1 != v2 && !ir_vregs_overlap(ctx, v1, v2)) { uint8_t f1 = ctx->live_intervals[v1]->flags; uint8_t f2 = ctx->live_intervals[v2]->flags; if ((f1 & IR_LIVE_INTERVAL_COALESCED) && !(f2 & IR_LIVE_INTERVAL_COALESCED)) { ir_vregs_join(ctx, v1, v2); ctx->vregs[to] = v1; } else if ((f2 & IR_LIVE_INTERVAL_COALESCED) && !(f1 & IR_LIVE_INTERVAL_COALESCED)) { ir_vregs_join(ctx, v2, v1); ctx->vregs[from] = v2; } else if (v1 < v2) { ir_vregs_join(ctx, v1, v2); if (f2 & IR_LIVE_INTERVAL_COALESCED) { for (i = 0; i < ctx->insns_count; i++) { if (ctx->vregs[i] == v2) { ctx->vregs[i] = v1; } } } else { ctx->vregs[to] = v1; } } else { ir_vregs_join(ctx, v2, v1); if (f1 & IR_LIVE_INTERVAL_COALESCED) { for (i = 0; i < ctx->insns_count; i++) { if (ctx->vregs[i] == v1) { ctx->vregs[i] = v2; } } } else { ctx->vregs[from] = v2; } } return 1; } return 0; } static void ir_add_phi_move(ir_ctx *ctx, int b, ir_ref from, ir_ref to) { if (IR_IS_CONST_REF(from) || ctx->vregs[from] != ctx->vregs[to]) { ctx->cfg_blocks[b].flags |= IR_BB_DESSA_MOVES; #if 0 fprintf(stderr, "BB%d: MOV %d -> %d\n", b, from, to); #endif } } static int ir_block_cmp(const void *b1, const void *b2, void *data) { ir_ctx *ctx = data; int d1 = ctx->cfg_blocks[*(ir_ref*)b1].loop_depth; int d2 = ctx->cfg_blocks[*(ir_ref*)b2].loop_depth; if (d1 > d2) { return -1; } else if (d1 == d2) { return 0; } else { return 1; } } static void ir_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn) { ir_live_pos pos = IR_USE_LIVE_POS_FROM_REF(i); ir_live_pos load_pos = IR_LOAD_LIVE_POS_FROM_REF(i); ir_live_interval *ival; ir_live_range *r; ir_use_pos *p, *p1 = NULL, *p2 = NULL; ir_ref tmp; tmp = insn->op1; insn->op1 = insn->op2; insn->op2 = tmp; ival = ctx->live_intervals[ctx->vregs[insn->op1]]; p = ival->use_pos; while (p) { if (p->pos == pos) { p->pos = load_pos; p->op_num = 1; p1 = p; break; } p = p->next; } ival = ctx->live_intervals[ctx->vregs[i]]; p = ival->use_pos; while (p) { if (p->pos == load_pos) { p->hint_ref = insn->op1; break; } p = p->next; } if (insn->op2 > 0 && ctx->vregs[insn->op2]) { ival = ctx->live_intervals[ctx->vregs[insn->op2]]; r = &ival->range; while (r) { if (r->end == load_pos) { r->end = pos; break; } r = r->next; } p = ival->use_pos; while (p) { if (p->pos == load_pos) { p->pos = pos; p->op_num = 2; p2 = p; break; } p = p->next; } } if (p1 && p2) { uint8_t tmp = p1->flags; p1->flags = p2->flags; p2->flags = tmp; } } static int ir_hint_conflict(ir_ctx *ctx, ir_ref ref, int use, int def) { ir_use_pos *p; ir_reg r1 = IR_REG_NONE; ir_reg r2 = IR_REG_NONE; p = ctx->live_intervals[use]->use_pos; while (p) { if (IR_LIVE_POS_TO_REF(p->pos) == ref) { break; } if (p->hint != IR_REG_NONE) { r1 = p->hint; } p = p->next; } p = ctx->live_intervals[def]->use_pos; while (p) { if (IR_LIVE_POS_TO_REF(p->pos) > ref) { if (p->hint != IR_REG_NONE) { r2 = p->hint; break; } } p = p->next; } return r1 != r2 && r1 != IR_REG_NONE && r2 != IR_REG_NONE; } static int ir_try_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn) { if (insn->op1 > 0 && ctx->vregs[insn->op1] != ctx->vregs[i] && !ir_vregs_overlap(ctx, ctx->vregs[insn->op1], ctx->vregs[i]) && !ir_hint_conflict(ctx, i, ctx->vregs[insn->op1], ctx->vregs[i])) { /* pass */ } else if (insn->op2 > 0 && insn->op1 != insn->op2 && (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE)) { if (ctx->vregs[insn->op2] != ctx->vregs[i]) { ir_live_pos pos = IR_USE_LIVE_POS_FROM_REF(i); ir_live_pos load_pos = IR_LOAD_LIVE_POS_FROM_REF(i); ir_live_interval *ival = ctx->live_intervals[ctx->vregs[insn->op2]]; ir_live_range *r = &ival->range; while (r) { if (r->end == pos) { r->end = load_pos; if (!ir_vregs_overlap(ctx, ctx->vregs[insn->op2], ctx->vregs[i]) && !ir_hint_conflict(ctx, i, ctx->vregs[insn->op2], ctx->vregs[i])) { ir_swap_operands(ctx, i, insn); return 1; } else { r->end = pos; } break; } r = r->next; } } } return 0; } int ir_coalesce(ir_ctx *ctx) { int b, i, n, succ; ir_ref *p, use, input, k, j; ir_block *bb, *succ_bb; ir_use_list *use_list; ir_insn *insn; uint32_t *offsets; ir_worklist blocks; bool compact = 0; /* Collect a list of blocks which are predecossors to block with phi finctions */ ir_worklist_init(&blocks, ctx->cfg_blocks_count + 1); for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { if (bb->predecessors_count > 1) { use_list = &ctx->use_lists[bb->start]; n = use_list->count; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; insn = &ctx->ir_base[use]; if (insn->op == IR_PHI) { k = ir_input_edges_count(ctx, insn); for (j = 2; j <= k; j++) { ir_worklist_push(&blocks, ctx->cfg_edges[bb->predecessors + (j-2)]); } } } } } qsort_r(blocks.l.a.refs, ir_worklist_len(&blocks), sizeof(ir_ref), ir_block_cmp, ctx); while (ir_worklist_len(&blocks)) { b = ir_worklist_pop(&blocks); bb = &ctx->cfg_blocks[b]; IR_ASSERT(bb->successors_count == 1); succ = ctx->cfg_edges[bb->successors]; succ_bb = &ctx->cfg_blocks[succ]; IR_ASSERT(succ_bb->predecessors_count > 1); k = 0; for (j = 0; j < succ_bb->predecessors_count; j++) { if (ctx->cfg_edges[succ_bb->predecessors + j] == b) { k = j + 2; break; } } IR_ASSERT(k != 0); use_list = &ctx->use_lists[succ_bb->start]; n = use_list->count; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; insn = &ctx->ir_base[use]; if (insn->op == IR_PHI) { input = insn->ops[k]; if (input > 0) { if (!ir_try_coalesce(ctx, input, use)) { ir_add_phi_move(ctx, b, input, use); } else { compact = 1; } } else { /* Move for constant input */ ir_add_phi_move(ctx, b, input, use); } } } } ir_worklist_free(&blocks); #if 1 if (ctx->rules) { /* try to swap operands of commutative instructions for better register allocation */ for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { for (i = bb->start, insn = ctx->ir_base + i; i <= bb->end;) { if (ir_result_reuses_op1_reg(ctx, i)) { if (insn->op2 > 0 && insn->op1 != insn->op2 && (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE)) { ir_try_swap_operands(ctx, i, insn); } // if (insn->op1 > 0) { // ir_try_coalesce(ctx, insn->op1, i); // } // } else if (insn->op == IR_COPY) { // if (insn->op1 > 0) { // ir_try_coalesce(ctx, insn->op1, i); // } } n = ir_operands_count(ctx, insn); n = 1 + (n >> 2); // support for multi-word instructions like MERGE and PHI i += n; insn += n; } } } #endif if (compact) { #if 1 offsets = ir_mem_calloc(ctx->vregs_count + 1, sizeof(uint32_t)); for (i = 1, n = 1; i <= ctx->vregs_count; i++) { if (ctx->live_intervals[i]) { if (i != n) { ctx->live_intervals[n] = ctx->live_intervals[i]; offsets[i] = i - n; } n++; } } n--; if (n != ctx->vregs_count) { j = ctx->vregs_count - n; for (i = n + 1; i <= ctx->vregs_count + IR_REG_NUM; i++) { ctx->live_intervals[i] = ctx->live_intervals[i + j]; } for (j = 1; j < ctx->insns_count; j++) { if (ctx->vregs[j]) { ctx->vregs[j] -= offsets[ctx->vregs[j]]; } } ctx->vregs_count = n; } ir_mem_free(offsets); #endif } return 1; } /* SSA Deconstruction */ int ir_compute_dessa_moves(ir_ctx *ctx) { int b, i, n; ir_ref j, k, *p, use; ir_block *bb; ir_use_list *use_list; ir_insn *insn; for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { if (bb->predecessors_count > 1) { use_list = &ctx->use_lists[bb->start]; n = use_list->count; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; insn = &ctx->ir_base[use]; if (insn->op == IR_PHI) { k = ir_input_edges_count(ctx, insn); for (j = 2; j <= k; j++) { if (IR_IS_CONST_REF(insn->ops[j]) || ctx->vregs[insn->ops[j]] != ctx->vregs[use]) { int pred = ctx->cfg_edges[bb->predecessors + (j-2)]; ctx->cfg_blocks[pred].flags |= IR_BB_DESSA_MOVES; } } } } } } return 1; } int ir_gen_dessa_moves(ir_ctx *ctx, int b, emit_copy_t emit_copy) { int succ, j, k = 0, n = 0; ir_block *bb, *succ_bb; ir_use_list *use_list; uint8_t *type; uint32_t *loc, *pred; uint32_t len; ir_bitset todo, ready; bb = &ctx->cfg_blocks[b]; if (!(bb->flags & IR_BB_DESSA_MOVES)) { return 0; } IR_ASSERT(bb->successors_count == 1); succ = ctx->cfg_edges[bb->successors]; succ_bb = &ctx->cfg_blocks[succ]; IR_ASSERT(succ_bb->predecessors_count > 1); use_list = &ctx->use_lists[succ_bb->start]; for (j = 0; j < succ_bb->predecessors_count; j++) { if (ctx->cfg_edges[succ_bb->predecessors + j] == b) { k = j + 2; break; } } IR_ASSERT(k != 0); type = ir_mem_calloc((ctx->vregs_count + 1), sizeof(uint8_t)); loc = ir_mem_calloc((ctx->vregs_count + 1) * 2, sizeof(uint32_t)); pred = loc + (ctx->vregs_count + 1); len = ir_bitset_len(ctx->vregs_count + 1); todo = ir_bitset_malloc(ctx->vregs_count + 1); ready = ir_bitset_malloc(ctx->vregs_count + 1); for (j = 0; j < use_list->count; j++) { ir_ref ref = ctx->use_edges[use_list->refs + j]; ir_insn *insn = &ctx->ir_base[ref]; if (insn->op == IR_PHI) { ir_ref input = insn->ops[k]; if (IR_IS_CONST_REF(input)) { emit_copy(ctx, insn->type, input, ctx->vregs[ref]); } else if (ctx->vregs[input] != ctx->vregs[ref]) { loc[ctx->vregs[input]] = ctx->vregs[input]; pred[ctx->vregs[ref]] = ctx->vregs[input]; type[ctx->vregs[ref]] = insn->type; ir_bitset_incl(todo, ctx->vregs[ref]); n++; } } } IR_BITSET_FOREACH(todo, len, j) { if (!loc[j]) { ir_bitset_incl(ready, j); } } IR_BITSET_FOREACH_END(); while (!ir_bitset_empty(todo, len)) { uint32_t a, b, c; while (!ir_bitset_empty(ready, len)) { b = ir_bitset_pop_first(ready, len); a = pred[b]; c = loc[a]; emit_copy(ctx, type[b], c, b); loc[a] = b; if (a == c && pred[a]) { ir_bitset_incl(ready, a); } } b = ir_bitset_pop_first(todo, len); if (b != loc[pred[b]]) { emit_copy(ctx, type[b], b, 0); loc[b] = 0; ir_bitset_incl(ready, b); } } ir_mem_free(ready); ir_mem_free(todo); ir_mem_free(loc); ir_mem_free(type); return 1; } /* Linear Scan Register Allocation * * See "Optimized Interval Splitting in a Linear Scan Register Allocator", * Christian Wimmer VEE'10 (2005), Figure 2. */ typedef struct _ir_lsra_data { uint32_t stack_frame_size; } ir_lsra_data; #ifdef IR_DEBUG # define IR_LOG_LSRA(action, vreg, ival, comment) do { \ if (ctx->flags & IR_DEBUG_RA) { \ int _vreg = (vreg); \ ir_live_interval *_ival = (ival); \ ir_live_pos _start = _ival->range.start; \ ir_live_pos _end = ir_ival_end(_ival); \ fprintf(stderr, action " R%d [%d.%d...%d.%d)" comment "\n", \ _vreg, \ IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \ IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end)); \ } \ } while (0) # define IR_LOG_LSRA_ASSIGN(action, vreg, ival, comment) do { \ if (ctx->flags & IR_DEBUG_RA) { \ int _vreg = (vreg); \ ir_live_interval *_ival = (ival); \ ir_live_pos _start = _ival->range.start; \ ir_live_pos _end = ir_ival_end(_ival); \ fprintf(stderr, action " R%d [%d.%d...%d.%d) to %s" comment "\n", \ _vreg, \ IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \ IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \ ir_reg_name(_ival->reg, _ival->type)); \ } \ } while (0) # define IR_LOG_LSRA_SPLIT(vreg, ival, pos) do { \ if (ctx->flags & IR_DEBUG_RA) { \ int _vreg = (vreg); \ ir_live_interval *_ival = (ival); \ ir_live_pos _start = _ival->range.start; \ ir_live_pos _end = ir_ival_end(_ival); \ ir_live_pos _pos = (pos); \ fprintf(stderr, " ---- Split R%d [%d.%d...%d.%d) at %d.%d\n", \ _vreg, \ IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \ IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \ IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \ } \ } while (0) # define IR_LOG_LSRA_CONFLICT(action, vreg, ival, pos) do { \ if (ctx->flags & IR_DEBUG_RA) { \ int _vreg = (vreg); \ ir_live_interval *_ival = (ival); \ ir_live_pos _start = _ival->range.start; \ ir_live_pos _end = ir_ival_end(_ival); \ ir_live_pos _pos = (pos); \ fprintf(stderr, action " R%d [%d.%d...%d.%d) assigned to %s at %d.%d\n", \ _vreg, \ IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \ IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \ ir_reg_name(_ival->reg, _ival->type), \ IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \ } \ } while (0) #else # define IR_LOG_LSRA(action, vreg, ival, comment) # define IR_LOG_LSRA_ASSIGN(action, vreg, ival, comment) # define IR_LOG_LSRA_SPLIT(vreg, ival, pos) # define IR_LOG_LSRA_CONFLICT(action, vreg, ival, pos); #endif static ir_live_pos ir_ival_end(ir_live_interval *ival) { ir_live_range *live_range = &ival->range; while (live_range->next) { live_range = live_range->next; } return live_range->end; } static bool ir_ival_covers(ir_live_interval *ival, ir_live_pos position) { ir_live_range *live_range = &ival->range; do { if (position >= live_range->start && position < live_range->end) { return 1; } live_range = live_range->next; } while (live_range); return 0; } static bool ir_ival_has_hole_between(ir_live_interval *ival, ir_live_pos from, ir_live_pos to) { ir_live_range *r = &ival->range; while (r) { if (from < r->start) { return 1; } else if (to <= r->end) { return 0; } else if (from >= r->end) { return 1; } r = r->next; } return 0; } static ir_live_pos ir_last_use_pos_before(ir_live_interval *ival, ir_live_pos pos, uint8_t flags) { ir_live_pos ret = 0; ir_use_pos *p = ival->use_pos; while (p && p->pos <= pos) { if (p->flags & flags) { ret = p->pos; } p = p->next; } return ret; } static ir_live_pos ir_first_use_pos_after(ir_live_interval *ival, ir_live_pos pos, uint8_t flags) { ir_use_pos *p = ival->use_pos; while (p && p->pos <= pos) { p = p->next; } while (p && !(p->flags & flags)) { p = p->next; } return p ? p->pos : 0x7fffffff; } static ir_block *ir_block_from_live_pos(ir_ctx *ctx, ir_live_pos pos) { int b; ir_block *bb; ir_ref ref = IR_LIVE_POS_TO_REF(pos); // TODO: use binary search or map for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) { if (ref >= bb->start && ref <= bb->end) { return bb; } } IR_ASSERT(0); return NULL; } static ir_live_pos ir_find_optimal_split_position(ir_ctx *ctx, int v, ir_live_interval *ival, ir_live_pos min_pos, ir_live_pos max_pos) { ir_block *min_bb, *max_bb; if (min_pos == max_pos) { return max_pos; } IR_ASSERT(min_pos < max_pos); IR_ASSERT(min_pos >= ival->range.start); IR_ASSERT(max_pos < ir_ival_end(ival)); min_bb = ir_block_from_live_pos(ctx, min_pos); max_bb = ir_block_from_live_pos(ctx, max_pos); if (min_bb == max_bb || ir_ival_has_hole_between(ival, min_pos, max_pos)) { // TODO: ??? return max_pos; } if (min_bb->loop_depth < max_bb->loop_depth) { /* Split at the end of the loop entry */ do { if (max_bb->loop_header) { max_bb = &ctx->cfg_blocks[max_bb->loop_header]; } max_bb = &ctx->cfg_blocks[ctx->cfg_edges[max_bb->predecessors]]; IR_ASSERT(ir_ival_covers(ival, IR_DEF_LIVE_POS_FROM_REF(max_bb->end))); } while (min_bb->loop_depth < max_bb->loop_depth); return IR_DEF_LIVE_POS_FROM_REF(max_bb->end); } IR_ASSERT(min_bb->loop_depth == max_bb->loop_depth); // TODO: Can "min_bb" be in a deeper loop than "max_bb" ??? return IR_LOAD_LIVE_POS_FROM_REF(max_bb->start); } static ir_live_interval *ir_split_interval_at(ir_ctx *ctx, int v, ir_live_interval *ival, ir_live_pos pos) { ir_live_interval *child; ir_live_range *p, *prev; ir_use_pos *use_pos, *prev_use_pos; IR_LOG_LSRA_SPLIT(v, ival, pos); IR_ASSERT(pos > ival->range.start); p = &ival->range; prev = NULL; while (p && pos >= p->end) { prev = p; p = prev->next; } IR_ASSERT(p); if (pos < p->start) { /* split between ranges */ pos = p->start; } use_pos = ival->use_pos; prev_use_pos = NULL; if (p->start == pos) { while (use_pos && pos > use_pos->pos) { prev_use_pos = use_pos; use_pos = use_pos->next; } } else { while (use_pos && pos >= use_pos->pos) { prev_use_pos = use_pos; use_pos = use_pos->next; } } child = ir_mem_malloc(sizeof(ir_live_interval)); child->type = ival->type; child->reg = IR_REG_NONE; child->flags = 0; child->stack_spill_pos = 0; // not allocated child->range.start = pos; child->range.end = p->end; child->range.next = p->next; child->use_pos = prev_use_pos ? prev_use_pos->next : use_pos; child->top = ival->top; child->next = ival->next; ival->next = child; if (pos == p->start) { prev->next = NULL; } else { p->end = pos; p->next = NULL; } if (prev_use_pos) { prev_use_pos->next = NULL; } else { ival->use_pos = NULL; } return child; } static void ir_allocate_spill_slot(ir_ctx *ctx, int current, ir_lsra_data *data) { ir_live_interval *ival = ctx->live_intervals[current]->top; if (ival->stack_spill_pos == 0) { data->stack_frame_size += 8; // ir_type_size[insn->type]; // TODO: alignment ival->stack_spill_pos = data->stack_frame_size; } } static ir_reg ir_try_allocate_preferred_reg(ir_ctx *ctx, ir_live_interval *ival, ir_regset available, ir_live_pos *freeUntilPos) { ir_use_pos *use_pos; use_pos = ival->use_pos; while (use_pos) { if (use_pos->hint >= 0 && IR_REGSET_IN(available, use_pos->hint)) { if (ir_ival_end(ival) <= freeUntilPos[use_pos->hint]) { /* register available for the whole interval */ return use_pos->hint; } } use_pos = use_pos->next; } use_pos = ival->use_pos; while (use_pos) { if (use_pos->hint_ref) { ir_reg reg = ctx->live_intervals[ctx->vregs[use_pos->hint_ref]]->reg; if (reg >= 0) { if (ir_ival_end(ival) <= freeUntilPos[reg]) { /* register available for the whole interval */ return reg; } } } use_pos = use_pos->next; } return IR_REG_NONE; } static void ir_add_to_unhandled(ir_ctx *ctx, ir_list *unhandled, int current) { ir_live_pos pos = ctx->live_intervals[current]->range.start; if (ir_list_len(unhandled) == 0 || pos < ctx->live_intervals[ir_list_peek(unhandled)]->range.start) { ir_list_push(unhandled, current); } else { uint32_t i = ir_list_len(unhandled); while (i > 0) { i--; if (pos < ctx->live_intervals[ir_list_at(unhandled, i)]->range.start) { i++; break; } } ir_list_insert(unhandled, i, current); } } static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, int current, uint32_t len, ir_bitset active, ir_bitset inactive) { ir_live_pos freeUntilPos[IR_REG_NUM]; int i, reg; ir_live_pos pos, next; ir_live_interval *ival = ctx->live_intervals[current]; ir_regset available; if (IR_IS_TYPE_FP(ival->type)) { available = IR_REGSET_FP; /* set freeUntilPos of all physical registers to maxInt */ for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) { freeUntilPos[i] = 0x7fffffff; } } else { available = IR_REGSET_GP; if (ctx->flags & IR_USE_FRAME_POINTER) { IR_REGSET_EXCL(available, IR_REG_FRAME_POINTER); } /* set freeUntilPos of all physical registers to maxInt */ for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) { freeUntilPos[i] = 0x7fffffff; } } #ifdef IR_DEBUG available &= debug_regset; #endif /* for each interval it in active */ IR_BITSET_FOREACH(active, len, i) { /* freeUntilPos[it.reg] = 0 */ reg = ctx->live_intervals[i]->reg; IR_ASSERT(reg >= 0); if (IR_REGSET_IN(available, reg)) { freeUntilPos[reg] = 0; } } IR_BITSET_FOREACH_END(); /* for each interval it in inactive intersecting with current * * This loop is not necessary for program in SSA form (see LSRA on SSA fig. 6), * but it is still necessary after coalescing and splitting */ IR_BITSET_FOREACH(inactive, len, i) { /* freeUntilPos[it.reg] = next intersection of it with current */ reg = ctx->live_intervals[i]->reg; IR_ASSERT(reg >= 0); if (IR_REGSET_IN(available, reg)) { next = ir_vregs_overlap(ctx, current, i); if (next && next < freeUntilPos[reg]) { freeUntilPos[reg] = next; } } } IR_BITSET_FOREACH_END(); /* Try to use hint */ reg = ir_try_allocate_preferred_reg(ctx, ival, available, freeUntilPos); if (reg != IR_REG_NONE) { ival->reg = reg; IR_LOG_LSRA_ASSIGN(" ---- Assign", current, ival, " (hint available without spilling)"); return reg; } /* reg = register with highest freeUntilPos */ reg = IR_REGSET_FIRST(available); IR_REGSET_EXCL(available, reg); pos = freeUntilPos[reg]; IR_REGSET_FOREACH(available, i) { if (freeUntilPos[i] > pos) { pos = freeUntilPos[i]; reg = i; } else if (freeUntilPos[i] == pos && !IR_REGSET_IN(IR_REGSET_SCRATCH, reg) && IR_REGSET_IN(IR_REGSET_SCRATCH, i)) { /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */ pos = freeUntilPos[i]; reg = i; } } IR_REGSET_FOREACH_END(); if (!pos) { /* no register available without spilling */ return IR_REG_NONE; } else if (ir_ival_end(ival) <= pos) { /* register available for the whole interval */ ival->reg = reg; IR_LOG_LSRA_ASSIGN(" ---- Assign", current, ival, " (available without spilling)"); return reg; } else if (pos > ival->range.start) { /* register available for the first part of the interval */ /* split current before freeUntilPos[reg] */ ir_live_pos split_pos = ir_last_use_pos_before(ival, pos, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG); if (split_pos > ival->range.start) { split_pos = ir_find_optimal_split_position(ctx, current, ival, split_pos, pos); ir_split_interval_at(ctx, current, ival, split_pos); ival->reg = reg; IR_LOG_LSRA_ASSIGN(" ---- Assign", current, ival, " (available without spilling for the first part)"); return reg; } } return IR_REG_NONE; } static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, int current, uint32_t len, ir_bitset active, ir_bitset inactive, ir_list *unhandled) { ir_live_pos nextUsePos[IR_REG_NUM]; ir_live_pos blockPos[IR_REG_NUM]; int i, reg; ir_live_pos pos, next_use_pos; ir_live_interval *ival = ctx->live_intervals[current]; ir_use_pos *use_pos; ir_regset available; if (!(ival->flags & IR_LIVE_INTERVAL_TEMP)) { use_pos = ival->use_pos; while (use_pos && !(use_pos->flags & IR_USE_MUST_BE_IN_REG)) { use_pos = use_pos->next; } if (!use_pos) { /* spill */ IR_LOG_LSRA(" ---- Spill", current, ival, " (no use pos that must be in reg)"); return IR_REG_NONE; } next_use_pos = use_pos->pos; } else { next_use_pos = ival->range.end; } if (IR_IS_TYPE_FP(ival->type)) { available = IR_REGSET_FP; /* set nextUsePos of all physical registers to maxInt */ for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) { nextUsePos[i] = 0x7fffffff; blockPos[i] = 0x7fffffff; } } else { available = IR_REGSET_GP; if (ctx->flags & IR_USE_FRAME_POINTER) { IR_REGSET_EXCL(available, IR_REG_FRAME_POINTER); } /* set nextUsePos of all physical registers to maxInt */ for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) { nextUsePos[i] = 0x7fffffff; blockPos[i] = 0x7fffffff; } } #ifdef IR_DEBUG available &= debug_regset; #endif /* for each interval it in active */ IR_BITSET_FOREACH(active, len, i) { /* nextUsePos[it.reg] = next use of it after start of current */ reg = ctx->live_intervals[i]->reg; IR_ASSERT(reg >= 0); if (IR_REGSET_IN(available, reg)) { // TODO: intervals that can't be spilled should be handled as fixed if (ctx->live_intervals[i]->type == IR_VOID) { /* fixed intervals */ blockPos[reg] = nextUsePos[reg] = 0; } else { pos = ir_first_use_pos_after(ctx->live_intervals[i], ival->range.start, IR_USE_MUST_BE_IN_REG /* | IR_USE_SHOULD_BE_IN_REG */); if (pos < nextUsePos[reg]) { nextUsePos[reg] = pos; } } } } IR_BITSET_FOREACH_END(); /* for each interval it in inactive intersecting with current */ IR_BITSET_FOREACH(inactive, len, i) { /* freeUntilPos[it.reg] = next intersection of it with current */ reg = ctx->live_intervals[i]->reg; IR_ASSERT(reg >= 0); if (IR_REGSET_IN(available, reg)) { ir_live_pos overlap = ir_vregs_overlap(ctx, current, i); if (overlap) { if (ctx->live_intervals[i]->type == IR_VOID) { /* fixed intervals */ if (overlap < nextUsePos[reg]) { nextUsePos[reg] = overlap; } if (overlap < blockPos[reg]) { blockPos[reg] = overlap; } } else { pos = ir_first_use_pos_after(ctx->live_intervals[i], ival->range.start, IR_USE_MUST_BE_IN_REG /* | IR_USE_SHOULD_BE_IN_REG */); if (pos < nextUsePos[reg]) { nextUsePos[reg] = pos; } } } } } IR_BITSET_FOREACH_END(); // TODO: support for register hinting /* reg = register with highest nextUsePos */ reg = IR_REGSET_FIRST(available); IR_REGSET_EXCL(available, reg); pos = nextUsePos[reg]; IR_REGSET_FOREACH(available, i) { if (nextUsePos[i] > pos) { pos = nextUsePos[i]; reg = i; } } IR_REGSET_FOREACH_END(); /* if first usage of current is after nextUsePos[reg] then */ if (next_use_pos > pos && !(ival->flags & IR_LIVE_INTERVAL_TEMP)) { /* all other intervals are used before current, so it is best to spill current itself */ /* assign spill slot to current */ /* split current before its first use position that requires a register */ ir_live_pos split_pos; if (next_use_pos == ival->range.start) { IR_ASSERT(use_pos && use_pos->op_num == 0); /* split right after definition */ split_pos = next_use_pos + 1; } else { split_pos = ir_find_optimal_split_position(ctx, current, ival, ival->range.start, next_use_pos - 1); } if (split_pos > ival->range.start) { IR_LOG_LSRA(" ---- Conflict with others", current, ival, " (all others are used before)"); ir_live_interval *child = ir_split_interval_at(ctx, current, ival, split_pos); IR_LOG_LSRA(" ---- Spill", current, ival, ""); ctx->live_intervals[current] = child; ir_add_to_unhandled(ctx, unhandled, current); IR_LOG_LSRA(" ---- Queue", current, child, ""); return IR_REG_NONE; } } if (ir_ival_end(ival) > blockPos[reg]) { /* spilling make a register free only for the first part of current */ IR_LOG_LSRA(" ---- Conflict with others", current, ival, " (spilling make a register free only for the first part)"); /* split current at optimal position before block_pos[reg] */ ir_live_pos split_pos = ir_last_use_pos_before(ival, blockPos[reg] + 1, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG); split_pos = ir_find_optimal_split_position(ctx, current, ival, split_pos, blockPos[reg]); ir_split_interval_at(ctx, current, ival, split_pos); } /* spill intervals that currently block reg */ IR_BITSET_FOREACH(active, len, i) { ir_live_interval *other = ctx->live_intervals[i]; ir_live_pos split_pos; if (reg == other->reg) { /* split active interval for reg at position */ ir_live_pos overlap = ir_vregs_overlap(ctx, current, i); if (overlap) { IR_ASSERT(other->type != IR_VOID); IR_LOG_LSRA_CONFLICT(" ---- Conflict with active", i, other, overlap); split_pos = ir_last_use_pos_before(other, ival->range.start, IR_USE_MUST_BE_IN_REG); if (split_pos < ival->range.start) { split_pos++; // TODO: ??? } if (split_pos > other->range.start) { split_pos = ir_find_optimal_split_position(ctx, i, other, split_pos, ival->range.start); ir_live_interval *child = ir_split_interval_at(ctx, i, other, split_pos); ir_bitset_excl(active, i); IR_LOG_LSRA(" ---- Finish", i, other, ""); other = child; } else { other->reg = IR_REG_NONE; ir_bitset_excl(active, i); IR_LOG_LSRA(" ---- Spill and Finish", i, other, " (it must not be in reg)"); } split_pos = ir_first_use_pos_after(other, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG) - 1; if (split_pos >= ir_ival_end(other)) { other = other->next; } else if (split_pos > other->range.start) { split_pos = ir_find_optimal_split_position(ctx, i, other, ival->range.start, split_pos); ir_live_interval *child = ir_split_interval_at(ctx, i, other, split_pos); IR_LOG_LSRA(" ---- Spill", i, other, ""); other = child; } else { // TODO: this may cause enless loop } if (other) { ctx->live_intervals[i] = other; ir_add_to_unhandled(ctx, unhandled, i); IR_LOG_LSRA(" ---- Queue", i, other, ""); } } break; } } IR_BITSET_FOREACH_END(); /* split any inactive interval for reg at the end of its lifetime hole */ IR_BITSET_FOREACH(inactive, len, i) { /* freeUntilPos[it.reg] = next intersection of it with current */ ir_live_interval *other = ctx->live_intervals[i]; if (reg == other->reg) { ir_live_pos overlap = ir_vregs_overlap(ctx, current, i); if (overlap) { IR_ASSERT(other->type != IR_VOID); IR_LOG_LSRA_CONFLICT(" ---- Conflict with inactive", i, other, overlap); // TODO: optimal split position (this case is not tested) ir_split_interval_at(ctx, i, other, overlap); } } } IR_BITSET_FOREACH_END(); /* current.reg = reg */ ival->reg = reg; IR_LOG_LSRA_ASSIGN(" ---- Assign", current, ival, " (after splitting others)"); return reg; } static int ir_live_range_cmp(const void *r1, const void *r2, void *data) { ir_ctx *ctx = data; ir_live_range *lrg1 = &ctx->live_intervals[*(ir_ref*)r1]->range; ir_live_range *lrg2 = &ctx->live_intervals[*(ir_ref*)r2]->range; return lrg2->start - lrg1->start; } static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, int from, int to) { if (to == 0) { ir_block *bb = ctx->data; ir_reg reg; if (IR_IS_TYPE_INT(type)) { reg = IR_REG_R0; // TODO: Temporary register } else if (IR_IS_TYPE_FP(type)) { reg = IR_REG_XMM0; // TODO: Temporary register } else { IR_ASSERT(0); return 0; } ir_add_fixed_live_range(ctx, reg, IR_START_LIVE_POS_FROM_REF(bb->end), IR_END_LIVE_POS_FROM_REF(bb->end)); } return 1; } static int ir_linear_scan(ir_ctx *ctx) { int b; ir_block *bb; ir_list unhandled; ir_bitset active, inactive; ir_live_interval *ival; int current, i; uint32_t len; ir_live_pos position; ir_reg reg; ir_lsra_data data; if (!ctx->live_intervals) { return 0; } /* Add fixed intervals for temporary registers used for DESSA moves */ for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { if (bb->flags & IR_BB_DESSA_MOVES) { ctx->data = bb; ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); } } ctx->data = &data; data.stack_frame_size = 0; ir_list_init(&unhandled, ctx->vregs_count + 1); len = ir_bitset_len(ctx->vregs_count + 1 + IR_REG_NUM); active = ir_bitset_malloc(ctx->vregs_count + 1 + IR_REG_NUM); inactive = ir_bitset_malloc(ctx->vregs_count + 1 + IR_REG_NUM); for (i = 0; i <= ctx->vregs_count; i++) { if (ctx->live_intervals[i] && ctx->live_intervals[i]->range.start > 0) { ir_list_push(&unhandled, i); } } for (i = ctx->vregs_count + 1; i <= ctx->vregs_count + IR_REG_NUM; i++) { if (ctx->live_intervals[i]) { ir_bitset_incl(inactive, i); } } qsort_r(unhandled.a.refs, ir_list_len(&unhandled), sizeof(ir_ref), ir_live_range_cmp, ctx); #ifdef IR_DEBUG if (ctx->flags & IR_DEBUG_RA) { fprintf(stderr, "----\n"); ir_dump_live_ranges(ctx, stderr); fprintf(stderr, "---- Start LSRA\n"); } #endif while (1) { if (ir_list_len(&unhandled) == 0) { position = 0x7fffffff; IR_BITSET_FOREACH(active, len, i) { ival = ctx->live_intervals[i]; if (ival->next) { if (ival->next->range.start < position) { position = ival->next->range.start; current = i; } } } IR_BITSET_FOREACH_END(); if (position < 0x7fffffff) { ir_bitset_excl(active, current); ival = ctx->live_intervals[current] = ctx->live_intervals[current]->next; } else { break; } } else { current = ir_list_pop(&unhandled); ival = ctx->live_intervals[current]; position = ival->range.start; } IR_LOG_LSRA(" ---- Processing", current, ival, "..."); /* for each interval i in active */ IR_BITSET_FOREACH(active, len, i) { ival = ctx->live_intervals[i]; if (ir_ival_end(ival) <= position) { /* move i from active to handled */ ir_bitset_excl(active, i); if (ival->next) { ctx->live_intervals[i] = ival->next; ir_add_to_unhandled(ctx, &unhandled, i); } } else if (!ir_ival_covers(ival, position)) { /* move i from active to inactive */ ir_bitset_excl(active, i); ir_bitset_incl(inactive, i); } } IR_BITSET_FOREACH_END(); /* for each interval i in inactive */ IR_BITSET_FOREACH(inactive, len, i) { ival = ctx->live_intervals[i]; if (ir_ival_end(ival) <= position) { /* move i from inactive to handled */ ir_bitset_excl(inactive, i); if (ival->next) { ctx->live_intervals[i] = ival->next; ir_add_to_unhandled(ctx, &unhandled, i); } } else if (ir_ival_covers(ival, position)) { /* move i from active to inactive */ ir_bitset_excl(inactive, i); ir_bitset_incl(active, i); } } IR_BITSET_FOREACH_END(); #if 1 && IR_DEBUG ival = ctx->live_intervals[current]; ir_insn *insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(ival->range.start)]; if (insn->op == IR_VLOAD) { ir_insn *var = &ctx->ir_base[insn->op2]; IR_ASSERT(var->op == IR_VAR); if (strcmp(ir_get_str(ctx, var->op2), "_spill_") == 0) { if (ctx->live_intervals[ctx->vregs[insn->op2]]->stack_spill_pos) { ctx->live_intervals[current]->stack_spill_pos = ctx->live_intervals[ctx->vregs[insn->op2]]->stack_spill_pos; } else { ir_allocate_spill_slot(ctx, current, &data); ctx->live_intervals[ctx->vregs[insn->op2]]->stack_spill_pos = ctx->live_intervals[current]->stack_spill_pos; } continue; } } insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(ival->range.end)]; if (insn->op == IR_VSTORE) { ir_insn *var = &ctx->ir_base[insn->op2]; IR_ASSERT(var->op == IR_VAR); if (strcmp(ir_get_str(ctx, var->op2), "_spill_") == 0) { if (ctx->live_intervals[ctx->vregs[insn->op2]]->stack_spill_pos) { ctx->live_intervals[current]->stack_spill_pos = ctx->live_intervals[ctx->vregs[insn->op2]]->stack_spill_pos; } else { ir_allocate_spill_slot(ctx, current, &data); ctx->live_intervals[ctx->vregs[insn->op2]]->stack_spill_pos = ctx->live_intervals[current]->stack_spill_pos; } continue; } } #endif reg = ir_try_allocate_free_reg(ctx, current, len, active, inactive); if (reg == IR_REG_NONE) { reg = ir_allocate_blocked_reg(ctx, current, len, active, inactive, &unhandled); } if (reg != IR_REG_NONE) { if (ctx->live_intervals[current]->reg != IR_REG_NONE) { ir_bitset_incl(active, current); } } } #ifdef IR_DEBUG /* all intervals must be processed */ IR_BITSET_FOREACH(active, len, i) { ival = ctx->live_intervals[i]; IR_ASSERT(!ival->next); } IR_BITSET_FOREACH_END(); IR_BITSET_FOREACH(inactive, len, i) { ival = ctx->live_intervals[i]; IR_ASSERT(!ival->next); } IR_BITSET_FOREACH_END(); #endif ir_mem_free(inactive); ir_mem_free(active); ir_list_free(&unhandled); if (ctx->live_intervals[0]) { ctx->live_intervals[0] = ctx->live_intervals[0]->top; } for (i = 1; i <= ctx->vregs_count; i++) { ival = ctx->live_intervals[i]; if (ival) { ival = ival->top; ctx->live_intervals[i] = ival; if (ival->next || ival->reg == IR_REG_NONE) { ir_allocate_spill_slot(ctx, i, &data); } } } #ifdef IR_DEBUG if (ctx->flags & IR_DEBUG_RA) { fprintf(stderr, "---- Finish LSRA\n"); ir_dump_live_ranges(ctx, stderr); fprintf(stderr, "----\n"); } #endif return 1; } static void assign_regs(ir_ctx *ctx) { uint32_t i; ir_live_interval *ival; ir_use_pos *use_pos; int8_t reg; ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); for (i = 1; i <= ctx->vregs_count; i++) { ival = ctx->live_intervals[i]; if (ival) { ival = ival->top; do { if (ival->reg >= 0) { use_pos = ival->use_pos; while (use_pos) { reg = ival->reg; if (ival->top->stack_spill_pos){ // TODO: Insert spill loads and stotres in optimal positons (resolution) if (use_pos->op_num == 0) { reg |= IR_REG_SPILL_STORE; } else { reg |= IR_REG_SPILL_LOAD; } } ctx->regs[IR_LIVE_POS_TO_REF(use_pos->pos)][use_pos->op_num] = reg; use_pos = use_pos->next; } } ival = ival->next; } while (ival); } } /* Temporary registers */ ival = ctx->live_intervals[0]; if (ival) { ival = ival->top; do { IR_ASSERT(ival->reg != IR_REG_NONE); ctx->regs[IR_LIVE_POS_TO_REF(ival->range.start)][ival->flags & IR_LIVE_INTERVAL_REG_NUM_MASK] = ival->reg; ival = ival->next; } while (ival); } } int ir_reg_alloc(ir_ctx *ctx) { if (ir_linear_scan(ctx)) { assign_regs(ctx); return 1; } return 0; }