From b37d4e04439dee3a22a4c5cef0b0db15c7596d49 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Fri, 16 Jun 2023 02:14:02 +0300 Subject: [PATCH] Allow usage of CPU stack slots for deoptimization --- ir.c | 4 ++-- ir.h | 12 ++++++----- ir_aarch64.dasc | 8 ++++++-- ir_dump.c | 4 ++-- ir_emit.c | 14 ++++++++++++- ir_gcm.c | 2 -- ir_ra.c | 53 ++++++++++++++++++++++++++++++------------------- ir_save.c | 8 ++------ ir_x86.dasc | 8 ++++++-- 9 files changed, 71 insertions(+), 42 deletions(-) diff --git a/ir.c b/ir.c index f144173..1d4a59c 100644 --- a/ir.c +++ b/ir.c @@ -973,8 +973,8 @@ ir_ref ir_bind(ir_ctx *ctx, ir_ref var, ir_ref def) ctx->binding = ir_mem_malloc(sizeof(ir_hashtab));; ir_hashtab_init(ctx->binding, 16); } - /* Node may be bound to some VAR node or to some special spill slot (using negative "var") */ - IR_ASSERT(var < 0 || (var < ctx->insns_count && ctx->ir_base[var].op == IR_VAR)); + /* Node may be bound to some special spill slot (using negative "var") */ + IR_ASSERT(var < 0); if (!ir_hashtab_add(ctx->binding, def, var)) { /* Add a copy with different binding */ def = ir_emit2(ctx, IR_OPT(IR_COPY, ctx->ir_base[def].type), def, 1); diff --git a/ir.h b/ir.h index a7cac0c..a469d93 100644 --- a/ir.h +++ b/ir.h @@ -685,13 +685,14 @@ int ir_gcm(ir_ctx *ctx); int ir_schedule(ir_ctx *ctx); /* Liveness & Register Allocation (implementation in ir_ra.c) */ -#define IR_REG_NONE -1 -#define IR_REG_SPILL_LOAD (1<<6) -#define IR_REG_SPILL_STORE (1<<6) +#define IR_REG_NONE -1 +#define IR_REG_SPILL_LOAD (1<<6) +#define IR_REG_SPILL_STORE (1<<6) +#define IR_REG_SPILL_SPECIAL (1<<7) #define IR_REG_SPILLED(r) \ - ((r) & (IR_REG_SPILL_LOAD|IR_REG_SPILL_STORE)) + ((r) & (IR_REG_SPILL_LOAD|IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)) #define IR_REG_NUM(r) \ - ((int8_t)((r) == IR_REG_NONE ? IR_REG_NONE : ((r) & ~(IR_REG_SPILL_LOAD|IR_REG_SPILL_STORE)))) + ((int8_t)((r) == IR_REG_NONE ? IR_REG_NONE : ((r) & ~(IR_REG_SPILL_LOAD|IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)))) int ir_assign_virtual_registers(ir_ctx *ctx); int ir_compute_live_ranges(ir_ctx *ctx); @@ -702,6 +703,7 @@ int ir_reg_alloc(ir_ctx *ctx); int ir_regs_number(void); bool ir_reg_is_int(int32_t reg); const char *ir_reg_name(int8_t reg, ir_type type); +int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref); /* Target CPU instruction selection and code geneartion (see ir_x86.c) */ int ir_match(ir_ctx *ctx); diff --git a/ir_aarch64.dasc b/ir_aarch64.dasc index 9215264..3f74980 100644 --- a/ir_aarch64.dasc +++ b/ir_aarch64.dasc @@ -3139,6 +3139,7 @@ static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_reg src_reg = insn->op2; ir_type type = insn->type; + ir_reg fp; if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), src_reg)) { if (ctx->vregs[def] @@ -3153,7 +3154,7 @@ static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) /* op3 is used as a flag that the value is already stored in memory. * If op3 is set we don't have to store the value once again (in case of spilling) */ - if (!insn->op3) { + if (!insn->op3 || insn->op3 != ir_ref_spill_slot(ctx, def, &fp) || fp != ctx->spill_base) { ir_emit_store(ctx, type, def, src_reg); } } else { @@ -3165,7 +3166,8 @@ static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_emit_fp_mov(ctx, type, def_reg, src_reg); } } - if (IR_REG_SPILLED(ctx->regs[def][0]) && !insn->op3) { + if (IR_REG_SPILLED(ctx->regs[def][0]) + && (!insn->op3 || insn->op3 != ir_ref_spill_slot(ctx, def, &fp) || fp != ctx->spill_base)) { ir_emit_store(ctx, type, def, def_reg); } } @@ -4825,8 +4827,10 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) } if (ctx->stack_frame_size > ctx->fixed_stack_frame_size) { // TODO: report error to caller +#ifdef IR_DEBUG_MESSAGES fprintf(stderr, "IR Compilation Aborted: ctx->stack_frame_size > ctx->fixed_stack_frame_size at %s:%d\n", __FILE__, __LINE__); +#endif return NULL; } ctx->stack_frame_size = ctx->fixed_stack_frame_size; diff --git a/ir_dump.c b/ir_dump.c index 0c2fbd5..c21a299 100644 --- a/ir_dump.c +++ b/ir_dump.c @@ -346,11 +346,11 @@ void ir_dump_live_ranges(const ir_ctx *ctx, FILE *f) } if (ival->next) { fprintf(f, "\n\t"); - } else if (ival->reg >= 0) { + } else if (ival->reg != IR_REG_NONE) { fprintf(f, " "); } do { - if (ival->reg >= 0) { + if (ival->reg != IR_REG_NONE) { fprintf(f, "[%%%s]", ir_reg_name(ival->reg, ival->type)); } p = &ival->range; diff --git a/ir_emit.c b/ir_emit.c index 2ee9194..0cad097 100644 --- a/ir_emit.c +++ b/ir_emit.c @@ -374,7 +374,7 @@ static IR_NEVER_INLINE void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block for (i = 0; i < count; i++, pos++) { ref = ir_list_at(list, pos); IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); - if (ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos == -1) { + if (!(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILLED)) { /* not spilled */ ir_reg reg = ctx->live_intervals[ctx->vregs[ref]]->reg; ir_type type = ctx->ir_base[ref].type; @@ -386,6 +386,8 @@ static IR_NEVER_INLINE void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block } else { ir_emit_load_mem_fp(ctx, type, reg, ctx->spill_base, offset); } + } else { + IR_ASSERT(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL); } } } @@ -589,3 +591,13 @@ int ir_match(ir_ctx *ctx) return 1; } + +int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref) +{ + int32_t offset; + + IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); + offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; + IR_ASSERT(offset != -1); + return IR_SPILL_POS_TO_OFFSET(offset); +} diff --git a/ir_gcm.c b/ir_gcm.c index 24c94e4..8dc264b 100644 --- a/ir_gcm.c +++ b/ir_gcm.c @@ -127,8 +127,6 @@ static void ir_gcm_schedule_late(ir_ctx *ctx, uint32_t *_blocks, ir_bitset visit uint32_t loop_depth = bb->loop_depth; if (loop_depth) { -// TODO: proper support for anti-dependencies between bound nodes ??? -// && !(ctx->binding && ir_binding_find(ctx, ref))) { uint32_t flags; use_list = &ctx->use_lists[ref]; diff --git a/ir_ra.c b/ir_ra.c index a272c38..747813f 100644 --- a/ir_ra.c +++ b/ir_ra.c @@ -3223,22 +3223,10 @@ static void ir_assign_bound_spill_slots(ir_ctx *ctx) if (ival && ival->stack_spill_pos == -1 && (ival->next || ival->reg == IR_REG_NONE)) { - if (b->val < 0) { - /* special spill slot */ - ival->stack_spill_pos = -b->val; - ival->flags |= IR_LIVE_INTERVAL_SPILLED | IR_LIVE_INTERVAL_SPILL_SPECIAL; - } else { - /* node is bound to VAR node */ - ir_live_interval *var_ival; - - IR_ASSERT(ctx->ir_base[b->val].op == IR_VAR); - var_ival = ctx->live_intervals[ctx->vregs[b->val]]; - if (var_ival->stack_spill_pos == -1) { - var_ival->stack_spill_pos = ir_allocate_spill_slot(ctx, var_ival->type, ctx->data); - } - ival->stack_spill_pos = var_ival->stack_spill_pos; - ival->flags |= IR_LIVE_INTERVAL_SPILLED; - } + IR_ASSERT(b->val < 0); + /* special spill slot */ + ival->stack_spill_pos = -b->val; + ival->flags |= IR_LIVE_INTERVAL_SPILLED | IR_LIVE_INTERVAL_SPILL_SPECIAL; } } b++; @@ -3541,7 +3529,7 @@ static void assign_regs(ir_ctx *ctx) top_ival = ival = ctx->live_intervals[i]; if (ival) { do { - if (ival->reg >= 0) { + if (ival->reg != IR_REG_NONE) { use_pos = ival->use_pos; while (use_pos) { ref = IR_LIVE_POS_TO_REF(use_pos->pos); @@ -3553,21 +3541,34 @@ static void assign_regs(ir_ctx *ctx) && (ctx->regs[ref][2] == IR_REG_NONE || IR_REG_NUM(ctx->regs[ref][2]) != reg) && (ctx->regs[ref][3] == IR_REG_NONE || IR_REG_NUM(ctx->regs[ref][3]) != reg)) { /* load op1 directly into result (valid only when op1 register is not reused) */ - ctx->regs[ref][1] = reg | IR_REG_SPILL_LOAD; + if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { + ctx->regs[ref][1] = reg | IR_REG_SPILL_SPECIAL; + } else { + ctx->regs[ref][1] = reg | IR_REG_SPILL_LOAD; + } } if (top_ival->flags & IR_LIVE_INTERVAL_SPILLED) { // TODO: Insert spill loads and stotres in optimal positons (resolution) if (use_pos->op_num == 0) { - reg |= IR_REG_SPILL_STORE; + if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { + reg |= IR_REG_SPILL_SPECIAL; + } else { + reg |= IR_REG_SPILL_STORE; + } } else { if ((use_pos->flags & IR_USE_MUST_BE_IN_REG) || ctx->ir_base[ref].op == IR_CALL || ctx->ir_base[ref].op == IR_TAILCALL + || ctx->ir_base[ref].op == IR_SNAPSHOT || (use_pos->op_num == 2 && ctx->ir_base[ref].op1 == ctx->ir_base[ref].op2 && IR_REG_NUM(ctx->regs[ref][1]) == reg)) { - reg |= IR_REG_SPILL_LOAD; + if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { + reg |= IR_REG_SPILL_SPECIAL; + } else { + reg |= IR_REG_SPILL_LOAD; + } } else { /* fuse spill load (valid only when register is not reused) */ reg = IR_REG_NONE; @@ -3579,6 +3580,18 @@ static void assign_regs(ir_ctx *ctx) } ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg); + use_pos = use_pos->next; + } + } else if ((top_ival->flags & IR_LIVE_INTERVAL_SPILLED) + && !(top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) { + use_pos = ival->use_pos; + while (use_pos) { + ref = IR_LIVE_POS_TO_REF(use_pos->pos); + if (ctx->ir_base[ref].op == IR_SNAPSHOT) { + /* A reference to a CPU spill slot */ + reg = IR_REG_SPILL_STORE | IR_REG_STACK_POINTER; + ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg); + } use_pos = use_pos->next; } } diff --git a/ir_save.c b/ir_save.c index 88eae76..64e0019 100644 --- a/ir_save.c +++ b/ir_save.c @@ -113,12 +113,8 @@ void ir_save(const ir_ctx *ctx, FILE *f) if (((flags & IR_OP_FLAG_DATA) || ((flags & IR_OP_FLAG_MEM) && insn->type != IR_VOID)) && ctx->binding) { ir_ref var = ir_binding_find(ctx, i); if (var) { - if (var >= 0) { - fprintf(f, " # BIND(d_%d);", var); - } else { - // TODO: use callback ??? - fprintf(f, " # BIND(%d);", var); - } + IR_ASSERT(var < 0); + fprintf(f, " # BIND(0x%x);", -var); } } fprintf(f, "\n"); diff --git a/ir_x86.dasc b/ir_x86.dasc index 2e2b753..f912efd 100644 --- a/ir_x86.dasc +++ b/ir_x86.dasc @@ -5452,6 +5452,7 @@ static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_reg src_reg = insn->op2; ir_type type = insn->type; + ir_reg fp; if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), src_reg)) { if (ctx->vregs[def] @@ -5466,7 +5467,7 @@ static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) /* op3 is used as a flag that the value is already stored in memory. * If op3 is set we don't have to store the value once again (in case of spilling) */ - if (!insn->op3) { + if (!insn->op3 || insn->op3 != ir_ref_spill_slot(ctx, def, &fp) || fp != ctx->spill_base) { ir_emit_store(ctx, type, def, src_reg); } } else { @@ -5478,7 +5479,8 @@ static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_emit_fp_mov(ctx, type, def_reg, src_reg); } } - if (IR_REG_SPILLED(ctx->regs[def][0]) && !insn->op3) { + if (IR_REG_SPILLED(ctx->regs[def][0]) + && (!insn->op3 || insn->op3 != ir_ref_spill_slot(ctx, def, &fp) || fp != ctx->spill_base)) { ir_emit_store(ctx, type, def, def_reg); } } @@ -7936,8 +7938,10 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) } if (ctx->stack_frame_size > ctx->fixed_stack_frame_size) { // TODO: report error to caller +#ifdef IR_DEBUG_MESSAGES fprintf(stderr, "IR Compilation Aborted: ctx->stack_frame_size > ctx->fixed_stack_frame_size at %s:%d\n", __FILE__, __LINE__); +#endif return NULL; } ctx->stack_frame_size = ctx->fixed_stack_frame_size;