diff --git a/ir.c b/ir.c index ef9ae6b..9f7cf3b 100644 --- a/ir.c +++ b/ir.c @@ -325,6 +325,7 @@ void ir_init(ir_ctx *ctx, ir_ref consts_limit, ir_ref insns_limit) ctx->snapshot_create = NULL; ctx->entries_count = 0; ctx->entries = NULL; + ctx->osr_entry_loads = NULL; ctx->code_buffer = NULL; ctx->code_buffer_size = 0; @@ -390,6 +391,10 @@ void ir_free(ir_ctx *ctx) if (ctx->entries) { ir_mem_free(ctx->entries); } + if (ctx->osr_entry_loads) { + ir_list_free((ir_list*)ctx->osr_entry_loads); + ir_mem_free(ctx->osr_entry_loads); + } } ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t addr) diff --git a/ir.h b/ir.h index 3b5bf44..b098ef6 100644 --- a/ir.h +++ b/ir.h @@ -542,6 +542,7 @@ struct _ir_ctx { uint32_t jmp_table_offset; uint32_t entries_count; ir_ref *entries; + void *osr_entry_loads; void *code_buffer; size_t code_buffer_size; ir_strtab strtab; diff --git a/ir_aarch64.dasc b/ir_aarch64.dasc index 0589a4b..941f948 100644 --- a/ir_aarch64.dasc +++ b/ir_aarch64.dasc @@ -200,8 +200,13 @@ static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { const char *ir_reg_name(int8_t reg, ir_type type) { - if (reg == IR_REG_NUM) { - return "SCRATCH"; + if (reg >= IR_REG_NUM) { + if (reg == IR_REG_SCRATCH) { + return "SCRATCH"; + } else { + IR_ASSERT(reg == IR_REG_ALL); + return "ALL"; + } } IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); if (type == IR_VOID) { @@ -473,7 +478,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co case IR_CALL: insn = &ctx->ir_base[ref]; def_reg = (IR_IS_TYPE_INT(insn->type)) ? IR_REG_INT_RET1 : IR_REG_FP_RET1; - constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_NUM, IR_USE_SUB_REF, IR_DEF_SUB_REF); + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF); n = 1; IR_FALLTHROUGH; case IR_TAILCALL: @@ -818,14 +823,12 @@ binop_fp: return IR_RSTORE; case IR_START: case IR_BEGIN: -// case IR_END: case IR_IF_TRUE: case IR_IF_FALSE: case IR_CASE_VAL: case IR_CASE_DEFAULT: case IR_MERGE: case IR_LOOP_BEGIN: -// case IR_LOOP_END: case IR_UNREACHABLE: return IR_SKIP; case IR_RETURN: @@ -3419,7 +3422,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); | .addr &addr - bb->flags |= IR_BB_SKIP; + bb->flags |= IR_BB_EMPTY; continue; } } @@ -4578,7 +4581,8 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); - ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 1, sizeof(ir_live_interval*)); + /* vregs + tmp + fixed + SRATCH + ALL */ + ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { @@ -4683,7 +4687,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); IR_REGSET_EXCL(available, reg); ctx->regs[i][constraints.tmp_regs[n].num] = reg; - } else if (constraints.tmp_regs[n].reg == IR_REG_NUM) { + } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); } else { IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); @@ -4924,11 +4928,6 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) data.stack_frame_alignment = 0; } - /* For all entries */ - if (ctx->entries_count) { - ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref)); - } - Dst = &data.dasm_state; data.dasm_state = NULL; dasm_init(&data.dasm_state, DASM_MAXSECTION); @@ -4946,12 +4945,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); - if (bb->flags & IR_BB_SKIP) { - continue; - } else if (ctx->prev_ref[bb->end] == bb->start - && bb->successors_count == 1 - && (ctx->ir_base[bb->end].op == IR_END || ctx->ir_base[bb->end].op == IR_LOOP_END) - && !(bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_DESSA_MOVES))) { + if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { continue; } |=>b: @@ -5071,6 +5065,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) break; case IR_END: case IR_LOOP_END: + if (bb->flags & IR_BB_OSR_ENTRY_LOADS) { + ir_emit_osr_entry_loads(ctx, b, bb); + } if (bb->flags & IR_BB_DESSA_MOVES) { data.dessa_from_block = b; ir_gen_dessa_moves(ctx, b, ir_emit_dessa_move); diff --git a/ir_aarch64.h b/ir_aarch64.h index 4a6e713..4c36f7e 100644 --- a/ir_aarch64.h +++ b/ir_aarch64.h @@ -93,6 +93,8 @@ enum _ir_reg { #define IR_REG_FP_FIRST IR_REG_V0 #define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1) #define IR_REG_FP_LAST (IR_REG_NUM - 1) +#define IR_REG_SCRATCH (IR_REG_NUM) /* special name for regset */ +#define IR_REG_ALL (IR_REG_NUM + 1) /* special name for regset */ #define IR_REGSET_64BIT 1 diff --git a/ir_cfg.c b/ir_cfg.c index 2140568..23d25d8 100644 --- a/ir_cfg.c +++ b/ir_cfg.c @@ -866,18 +866,7 @@ int ir_schedule_blocks(ir_ctx *ctx) blocks.pos = 0; list = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 1) * 2); map = list + (ctx->cfg_blocks_count + 1); - for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { - if (ctx->prev_ref[bb->end] == bb->start - && bb->successors_count == 1 - && (ctx->ir_base[bb->end].op == IR_END || ctx->ir_base[bb->end].op == IR_LOOP_END) - && !(bb->flags & IR_BB_DESSA_MOVES)) { - bb->flags |= IR_BB_EMPTY; - if ((ctx->flags & IR_MERGE_EMPTY_ENTRIES) && (bb->flags & IR_BB_ENTRY)) { - if (ctx->cfg_edges[bb->successors] == b + 1) { - (bb + 1)->flags |= IR_BB_PREV_EMPTY_ENTRY; - } - } - } + for (b = 1; b <= ctx->cfg_blocks_count; b++) { ir_bitset_incl(blocks.set, b); } @@ -1012,6 +1001,22 @@ int ir_schedule_blocks(ir_ctx *ctx) } ir_mem_free(ctx->cfg_blocks); ctx->cfg_blocks = cfg_blocks; + + if (ctx->osr_entry_loads) { + ir_list *list = (ir_list*)ctx->osr_entry_loads; + uint32_t pos = 0, count; + + while (1) { + b = ir_list_at(list, pos); + if (b == 0) { + break; + } + ir_list_set(list, pos, map[b]); + pos++; + count = ir_list_at(list, pos); + pos += count + 1; + } + } } ir_mem_free(list); @@ -1028,10 +1033,7 @@ uint32_t ir_skip_empty_target_blocks(ir_ctx *ctx, uint32_t b) while (1) { bb = &ctx->cfg_blocks[b]; - if (ctx->prev_ref[bb->end] == bb->start - && bb->successors_count == 1 - && (ctx->ir_base[bb->end].op == IR_END || ctx->ir_base[bb->end].op == IR_LOOP_END) - && !(bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_DESSA_MOVES))) { + if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { b = ctx->cfg_edges[bb->successors]; } else { break; @@ -1051,10 +1053,7 @@ uint32_t ir_skip_empty_next_blocks(ir_ctx *ctx, uint32_t b) bb = &ctx->cfg_blocks[b]; - if (ctx->prev_ref[bb->end] == bb->start - && bb->successors_count == 1 - && (ctx->ir_base[bb->end].op == IR_END || ctx->ir_base[bb->end].op == IR_LOOP_END) - && !(bb->flags & (IR_BB_START|/*IR_BB_ENTRY|*/IR_BB_DESSA_MOVES))) { + if ((bb->flags & (IR_BB_START|IR_BB_EMPTY)) == IR_BB_EMPTY) { b++; } else { break; diff --git a/ir_dump.c b/ir_dump.c index c8db2d3..b084fd6 100644 --- a/ir_dump.c +++ b/ir_dump.c @@ -250,6 +250,30 @@ void ir_dump_cfg(ir_ctx *ctx, FILE *f) if (bb->loop_depth != 0) { fprintf(f, "\tloop_depth=%d\n", bb->loop_depth); } + if (bb->flags & IR_BB_OSR_ENTRY_LOADS) { + ir_list *list = (ir_list*)ctx->osr_entry_loads; + uint32_t pos = 0, i, count; + + IR_ASSERT(list); + while (1) { + i = ir_list_at(list, pos); + if (b == i) { + break; + } + IR_ASSERT(i != 0); /* end marker */ + pos++; + count = ir_list_at(list, pos); + pos += count + 1; + } + pos++; + count = ir_list_at(list, pos); + pos++; + + for (i = 0; i < count; i++, pos++) { + ir_ref ref = ir_list_at(list, pos); + fprintf(f, "\tOSR_ENTRY_LOAD=d_%d\n", ref); + } + } if (bb->flags & IR_BB_DESSA_MOVES) { ctx->data = f; ir_gen_dessa_moves(ctx, b, ir_dump_dessa_move); @@ -370,7 +394,7 @@ void ir_dump_live_ranges(ir_ctx *ctx, FILE *f) } } #if 1 - n = ctx->vregs_count + ir_regs_number() + 1; + n = ctx->vregs_count + ir_regs_number() + 2; for (i = ctx->vregs_count + 1; i <= n; i++) { ir_live_interval *ival = ctx->live_intervals[i]; diff --git a/ir_emit.c b/ir_emit.c index d011598..88553e8 100644 --- a/ir_emit.c +++ b/ir_emit.c @@ -311,6 +311,10 @@ static void *ir_jmp_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn) # pragma GCC diagnostic pop #endif + +/* Forward Declarations */ +static void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb); + #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) # include "ir_emit_x86.h" #elif defined(IR_TARGET_AARCH64) @@ -319,31 +323,107 @@ static void *ir_jmp_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn) # error "Unknown IR target" #endif +static IR_NEVER_INLINE void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb) +{ + ir_list *list = (ir_list*)ctx->osr_entry_loads; + int pos = 0, count, i; + ir_ref ref; + + IR_ASSERT(ctx->binding); + IR_ASSERT(list); + while (1) { + i = ir_list_at(list, pos); + if (b == i) { + break; + } + IR_ASSERT(i != 0); /* end marker */ + pos++; + count = ir_list_at(list, pos); + pos += count + 1; + } + pos++; + count = ir_list_at(list, pos); + pos++; + + for (i = 0; i < count; i++, pos++) { + ref = ir_list_at(list, pos); + IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); + if (ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos == -1) { + /* not spilled */ + ir_reg reg = ctx->live_intervals[ctx->vregs[ref]]->reg; + ir_type type = ctx->ir_base[ref].type; + int32_t offset = -ir_binding_find(ctx, ref); + + IR_ASSERT(offset > 0); + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, reg, ctx->spill_base, offset); + } else { + ir_emit_load_mem_fp(ctx, type, reg, ctx->spill_base, offset); + } + } + } +} + int ir_match(ir_ctx *ctx) { uint32_t b; - ir_ref i; + ir_ref start, ref, *prev_ref; ir_block *bb; - - if (!ctx->prev_ref) { - ir_build_prev_refs(ctx); - } + ir_insn *insn; ctx->rules = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t)); + + prev_ref = ctx->prev_ref; + if (!prev_ref) { + ir_build_prev_refs(ctx); + prev_ref = ctx->prev_ref; + } + for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) { IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + start = bb->start; if (bb->flags & IR_BB_ENTRY) { - ir_insn *insn = &ctx->ir_base[bb->start]; + insn = &ctx->ir_base[start]; IR_ASSERT(insn->op == IR_ENTRY); insn->op3 = ctx->entries_count++; } - for (i = bb->end; i > bb->start; i = ctx->prev_ref[i]) { - if (!ctx->rules[i]) { - ctx->rules[i] = ir_match_insn(ctx, i, bb); + ctx->rules[start] = IR_SKIP; + ref = bb->end; + insn = &ctx->ir_base[ref]; + if (insn->op == IR_END || insn->op == IR_LOOP_END) { + ctx->rules[ref] = insn->op; + ref = prev_ref[ref]; + if (ref == bb->start && bb->successors_count == 1) { + if (EXPECTED(!(bb->flags & IR_BB_ENTRY))) { + bb->flags |= IR_BB_EMPTY; + } else if (ctx->flags & IR_MERGE_EMPTY_ENTRIES) { + bb->flags |= IR_BB_EMPTY; + if (ctx->cfg_edges[bb->successors] == b + 1) { + (bb + 1)->flags |= IR_BB_PREV_EMPTY_ENTRY; + } + } + continue; + } + } + while (ref > start) { + if (!ctx->rules[ref]) { + ctx->rules[ref] = ir_match_insn(ctx, ref, bb); + } + ir_match_insn2(ctx, ref, bb); + ref = prev_ref[ref]; + } + } + + if (ctx->entries_count) { + ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref)); + + for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) { + if (bb->flags & IR_BB_ENTRY) { + ir_ref i = bb->start; + ir_insn *insn = ctx->ir_base + i; + ctx->entries[insn->op3] = b; } - ir_match_insn2(ctx, i, bb); } - ctx->rules[i] = IR_SKIP; } return 1; diff --git a/ir_emit_c.c b/ir_emit_c.c index 060fc92..fe20dca 100644 --- a/ir_emit_c.c +++ b/ir_emit_c.c @@ -712,6 +712,12 @@ static int ir_emit_func(ir_ctx *ctx, FILE *f) vars = ir_bitset_malloc(ctx->vregs_count + 1); for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + if (ctx->prev_ref[bb->end] == bb->start + && bb->successors_count == 1 + && (ctx->ir_base[bb->end].op == IR_END || ctx->ir_base[bb->end].op == IR_LOOP_END) + && !(bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_DESSA_MOVES))) { + bb->flags |= IR_BB_EMPTY; + } for (i = bb->start, insn = ctx->ir_base + i; i <= bb->end;) { if (ctx->vregs[i]) { if (!ir_bitset_in(vars, ctx->vregs[i])) { @@ -750,10 +756,7 @@ static int ir_emit_func(ir_ctx *ctx, FILE *f) for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); - if (ctx->prev_ref[bb->end] == bb->start - && bb->successors_count == 1 - && (ctx->ir_base[bb->end].op == IR_END || ctx->ir_base[bb->end].op == IR_LOOP_END) - && !(bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_DESSA_MOVES))) { + if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { continue; } if (bb->predecessors_count > 1 || (bb->predecessors_count == 1 && ctx->cfg_edges[bb->predecessors] != prev)) { diff --git a/ir_private.h b/ir_private.h index 26265df..237f765 100644 --- a/ir_private.h +++ b/ir_private.h @@ -587,6 +587,12 @@ IR_ALWAYS_INLINE ir_ref ir_list_at(ir_list *l, uint32_t i) return ir_array_at(&l->a, i); } +IR_ALWAYS_INLINE void ir_list_set(ir_list *l, uint32_t i, ir_ref val) +{ + IR_ASSERT(i < l->len); + ir_array_set_unchecked(&l->a, i, val); +} + /* Worklist (unique list) */ typedef struct _ir_worklist { ir_list l; @@ -815,8 +821,8 @@ struct _ir_use_list { #define IR_BB_IRREDUCIBLE_LOOP (1<<4) #define IR_BB_DESSA_MOVES (1<<5) /* translation out of SSA requires MOVEs */ #define IR_BB_EMPTY (1<<6) -#define IR_BB_SKIP (1<<7) /* skip during code generation */ -#define IR_BB_PREV_EMPTY_ENTRY (1<<8) +#define IR_BB_PREV_EMPTY_ENTRY (1<<7) +#define IR_BB_OSR_ENTRY_LOADS (1<<8) /* OSR Entry-point with register LOADs */ struct _ir_block { uint32_t flags; diff --git a/ir_ra.c b/ir_ra.c index 50b5767..b7b2372 100644 --- a/ir_ra.c +++ b/ir_ra.c @@ -338,6 +338,65 @@ static void ir_add_phi_use(ir_ctx *ctx, int v, int op_num, ir_live_pos pos, ir_r ir_add_use_pos(ctx, v, use_pos); } +static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, ir_bitset live, uint32_t len, uint32_t b) +{ + bool ok = 1; + int count = 0; + ir_list *list = (ir_list*)ctx->osr_entry_loads; + ir_ref i; + + IR_BITSET_FOREACH(live, len, i) { + /* Skip live references from ENTRY to PARAM. TODO: duplicate PARAM in each ENTRY ??? */ + ir_use_pos *use_pos = ctx->live_intervals[i]->use_pos; + ir_ref ref = (use_pos->flags & IR_PHI_USE) ? use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos); + + if (use_pos->op_num) { + ir_ref *ops = ctx->ir_base[ref].ops; + ref = ops[use_pos->op_num]; + } + + if (ctx->ir_base[ref].op == IR_PARAM) { + continue; + } + if (ctx->binding) { + ir_ref var = ir_binding_find(ctx, ref); + if (var < 0) { + /* We may load the value at OSR entry-point */ + if (!count) { + bb->flags &= ~IR_BB_EMPTY; + bb->flags |= IR_BB_OSR_ENTRY_LOADS; + if (!ctx->osr_entry_loads) { + list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list)); + ir_list_init(list, 16); + } + ir_list_push(list, b); + ir_list_push(list, 0); + } + ir_list_push(list, ref); + count++; + continue; + } + } + fprintf(stderr, "ENTRY %d (block %i) - live var %d\n", ctx->ir_base[bb->start].op2, b, ref); + ok = 0; + } IR_BITSET_FOREACH_END(); + + if (!ok) { + IR_ASSERT(0); + } + if (count) { + ir_list_set(list, ir_list_len(ctx->osr_entry_loads) - (count + 1), count); + +#if 0 + /* ENTRY "clobbers" all registers */ + ir_ref ref = ctx->ir_base[bb->start].op1; + ir_add_fixed_live_range(ctx, &unused, IR_REG_ALL, + IR_DEF_LIVE_POS_FROM_REF(ref), + IR_SAVE_LIVE_POS_FROM_REF(ref)); +#endif + } +} + int ir_compute_live_ranges(ir_ctx *ctx) { uint32_t b, i, j, k, n, succ, *p; @@ -364,7 +423,8 @@ int ir_compute_live_ranges(ir_ctx *ctx) #endif len = ir_bitset_len(ctx->vregs_count + 1); bb_live = ir_mem_malloc((ctx->cfg_blocks_count + 1) * len * sizeof(ir_bitset_base_t)); - ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 1, sizeof(ir_live_interval*)); + /* vregs + tmp + fixed + SRATCH + ALL */ + ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); for (b = ctx->cfg_blocks_count; b > 0; b--) { bb = &ctx->cfg_blocks[b]; IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); @@ -440,6 +500,7 @@ int ir_compute_live_ranges(ir_ctx *ctx) } } + /* for each opd in live */ IR_BITSET_FOREACH(live, len, i) { /* intervals[opd].addRange(b.from, b.to) */ @@ -650,8 +711,9 @@ int ir_compute_live_ranges(ir_ctx *ctx) && !ir_bitset_empty(live, len)) { /* variables live at loop header are alive at the whole loop body */ uint32_t bb_set_len = ir_bitset_len(ctx->cfg_blocks_count + 1); - int child; + uint32_t child; ir_block *child_bb; + ir_bitset child_live_in; if (!loops) { loops = ir_bitset_malloc(ctx->cfg_blocks_count + 1); @@ -664,8 +726,10 @@ int ir_compute_live_ranges(ir_ctx *ctx) child = b; do { child_bb = &ctx->cfg_blocks[child]; + child_live_in = bb_live + (len * child); IR_BITSET_FOREACH(live, len, i) { + ir_bitset_incl(child_live_in, i); ir_add_live_range(ctx, &unused, i, IR_VOID, IR_START_LIVE_POS_FROM_REF(child_bb->start), IR_END_LIVE_POS_FROM_REF(child_bb->end)); @@ -682,7 +746,19 @@ int ir_compute_live_ranges(ir_ctx *ctx) } child = child_bb->dom_next_child; } - } while ((child = ir_bitqueue_pop(&queue)) >= 0); + } while ((child = ir_bitqueue_pop(&queue)) != (uint32_t)-1); + } + } + + if (ctx->entries) { + for (i = 0; i < ctx->entries_count; i++) { + b = ctx->entries[i]; + bb = &ctx->cfg_blocks[b]; + live = bb_live + (len * b); + ir_add_osr_entry_loads(ctx, bb, live, len, b); + } + if (ctx->osr_entry_loads) { + ir_list_push((ir_list*)ctx->osr_entry_loads, 0); } } @@ -720,7 +796,8 @@ void ir_free_live_intervals(ir_live_interval **live_intervals, int count) ir_live_interval *ival, *next; ir_use_pos *use_pos; - count += IR_REG_NUM + 1; + /* vregs + tmp + fixed + SRATCH + ALL */ + count += IR_REG_NUM + 2; for (i = 0; i <= count; i++) { ival = live_intervals[i]; while (ival) { @@ -861,6 +938,7 @@ static bool ir_try_coalesce(ir_ctx *ctx, ir_live_range **unused, ir_ref from, ir static void ir_add_phi_move(ir_ctx *ctx, uint32_t b, ir_ref from, ir_ref to) { if (IR_IS_CONST_REF(from) || ctx->vregs[from] != ctx->vregs[to]) { + ctx->cfg_blocks[b].flags &= ~IR_BB_EMPTY; ctx->cfg_blocks[b].flags |= IR_BB_DESSA_MOVES; ctx->flags |= IR_LR_HAVE_DESSA_MOVES; #if 0 @@ -1148,7 +1226,8 @@ int ir_coalesce(ir_ctx *ctx) n--; if (n != ctx->vregs_count) { j = ctx->vregs_count - n; - for (i = n + 1; i <= n + IR_REG_NUM + 1; i++) { + /* vregs + tmp + fixed + SRATCH + ALL */ + for (i = n + 1; i <= n + IR_REG_NUM + 2; i++) { ctx->live_intervals[i] = ctx->live_intervals[i + j]; if (ctx->live_intervals[i]) { ctx->live_intervals[i]->vreg = i; @@ -1191,6 +1270,7 @@ int ir_compute_dessa_moves(ir_ctx *ctx) for (j = 2; j <= k; j++) { if (IR_IS_CONST_REF(ir_insn_op(insn, j)) || ctx->vregs[ir_insn_op(insn, j)] != ctx->vregs[use]) { int pred = ctx->cfg_edges[bb->predecessors + (j-2)]; + ctx->cfg_blocks[pred].flags &= ~IR_BB_EMPTY; ctx->cfg_blocks[pred].flags |= IR_BB_DESSA_MOVES; ctx->flags |= IR_LR_HAVE_DESSA_MOVES; } @@ -1794,9 +1874,15 @@ static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_l /* freeUntilPos[it.reg] = 0 */ reg = other->reg; IR_ASSERT(reg >= 0); - if (reg == IR_REG_NUM) { - ir_regset regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + if (reg >= IR_REG_SCRATCH) { + ir_regset regset; + if (reg == IR_REG_SCRATCH) { + regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + } else { + IR_ASSERT(reg == IR_REG_ALL); + regset = available; + } IR_REGSET_FOREACH(regset, reg) { freeUntilPos[reg] = 0; } IR_REGSET_FOREACH_END(); @@ -1816,11 +1902,17 @@ static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_l /* freeUntilPos[it.reg] = next intersection of it with current */ reg = other->reg; IR_ASSERT(reg >= 0); - if (reg == IR_REG_NUM) { + if (reg >= IR_REG_SCRATCH) { next = ir_ivals_overlap(&ival->range, other->current_range); if (next) { - ir_regset regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + ir_regset regset; + if (reg == IR_REG_SCRATCH) { + regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + } else { + IR_ASSERT(reg == IR_REG_ALL); + regset = available; + } IR_REGSET_FOREACH(regset, reg) { if (next < freeUntilPos[reg]) { freeUntilPos[reg] = next; @@ -1967,9 +2059,15 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li /* nextUsePos[it.reg] = next use of it after start of current */ reg = other->reg; IR_ASSERT(reg >= 0); - if (reg == IR_REG_NUM) { - ir_regset regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + if (reg >= IR_REG_SCRATCH) { + ir_regset regset; + if (reg == IR_REG_SCRATCH) { + regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + } else { + IR_ASSERT(reg == IR_REG_ALL); + regset = available; + } IR_REGSET_FOREACH(regset, reg) { blockPos[reg] = nextUsePos[reg] = 0; } IR_REGSET_FOREACH_END(); @@ -1993,12 +2091,18 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li /* freeUntilPos[it.reg] = next intersection of it with current */ reg = other->reg; IR_ASSERT(reg >= 0); - if (reg == IR_REG_NUM) { + if (reg >= IR_REG_SCRATCH) { ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range); if (overlap) { - ir_regset regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + ir_regset regset; + if (reg == IR_REG_SCRATCH) { + regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + } else { + IR_ASSERT(reg == IR_REG_ALL); + regset = available; + } IR_REGSET_FOREACH(regset, reg) { if (overlap < nextUsePos[reg]) { nextUsePos[reg] = overlap; @@ -2399,7 +2503,8 @@ static int ir_linear_scan(ir_ctx *ctx) ival = ival->next; } - for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_NUM + 1; j++) { + /* vregs + tmp + fixed + SRATCH + ALL */ + for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_NUM + 2; j++) { ival = ctx->live_intervals[j]; if (ival) { ival->current_range = &ival->range; diff --git a/ir_x86.dasc b/ir_x86.dasc index 1c47f16..3a213e8 100644 --- a/ir_x86.dasc +++ b/ir_x86.dasc @@ -418,8 +418,13 @@ static const int8_t *_ir_fp_fc_reg_params = NULL; const char *ir_reg_name(int8_t reg, ir_type type) { - if (reg == IR_REG_NUM) { - return "SCRATCH"; + if (reg >= IR_REG_NUM) { + if (reg == IR_REG_SCRATCH) { + return "SCRATCH"; + } else { + IR_ASSERT(reg == IR_REG_ALL); + return "ALL"; + } } IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); if (type == IR_VOID) { @@ -794,7 +799,7 @@ cmp_fp: def_reg = IR_REG_FP_RET1; #endif } - constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_NUM, IR_USE_SUB_REF, IR_DEF_SUB_REF); + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF); n = 1; IR_FALLTHROUGH; case IR_TAILCALL: @@ -1590,14 +1595,12 @@ store_int: return IR_RSTORE; case IR_START: case IR_BEGIN: -// case IR_END: case IR_IF_TRUE: case IR_IF_FALSE: case IR_CASE_VAL: case IR_CASE_DEFAULT: case IR_MERGE: case IR_LOOP_BEGIN: -// case IR_LOOP_END: case IR_UNREACHABLE: return IR_SKIP; case IR_RETURN: @@ -5709,7 +5712,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); | .aword &addr - bb->flags |= IR_BB_SKIP; + bb->flags |= IR_BB_EMPTY; continue; } } @@ -7559,7 +7562,8 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); - ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 1, sizeof(ir_live_interval*)); + /* vregs + tmp + fixed + SRATCH + ALL */ + ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { @@ -7665,7 +7669,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); IR_REGSET_EXCL(available, reg); ctx->regs[i][constraints.tmp_regs[n].num] = reg; - } else if (constraints.tmp_regs[n].reg == IR_REG_NUM) { + } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); } else { IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); @@ -7918,10 +7922,6 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) data.stack_frame_alignment = 0; } - if (ctx->entries_count) { - ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref)); - } - Dst = &data.dasm_state; data.dasm_state = NULL; dasm_init(&data.dasm_state, DASM_MAXSECTION); @@ -7947,12 +7947,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); - if (bb->flags & IR_BB_SKIP) { - continue; - } else if (ctx->prev_ref[bb->end] == bb->start - && bb->successors_count == 1 - && (ctx->ir_base[bb->end].op == IR_END || ctx->ir_base[bb->end].op == IR_LOOP_END) - && !(bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_DESSA_MOVES))) { + if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { continue; } |=>b: @@ -8388,6 +8383,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) break; case IR_END: case IR_LOOP_END: + if (bb->flags & IR_BB_OSR_ENTRY_LOADS) { + ir_emit_osr_entry_loads(ctx, b, bb); + } if (bb->flags & IR_BB_DESSA_MOVES) { data.dessa_from_block = b; ir_gen_dessa_moves(ctx, b, ir_emit_dessa_move); diff --git a/ir_x86.h b/ir_x86.h index b916bd4..ee642ce 100644 --- a/ir_x86.h +++ b/ir_x86.h @@ -86,6 +86,8 @@ enum _ir_reg { #define IR_REG_FP_FIRST IR_REG_XMM0 #define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1) #define IR_REG_FP_LAST (IR_REG_NUM - 1) +#define IR_REG_SCRATCH (IR_REG_NUM) /* special name for regset */ +#define IR_REG_ALL (IR_REG_NUM + 1) /* special name for regset */ #define IR_REGSET_64BIT 0