Add necessary compensation loads for bounded nodes when enter into function through OSR entry-point

This commit is contained in:
Dmitry Stogov 2023-03-21 13:45:37 +03:00
parent 7ddca6d7b6
commit 87dbdcea0d
12 changed files with 311 additions and 89 deletions

5
ir.c
View File

@ -325,6 +325,7 @@ void ir_init(ir_ctx *ctx, ir_ref consts_limit, ir_ref insns_limit)
ctx->snapshot_create = NULL; ctx->snapshot_create = NULL;
ctx->entries_count = 0; ctx->entries_count = 0;
ctx->entries = NULL; ctx->entries = NULL;
ctx->osr_entry_loads = NULL;
ctx->code_buffer = NULL; ctx->code_buffer = NULL;
ctx->code_buffer_size = 0; ctx->code_buffer_size = 0;
@ -390,6 +391,10 @@ void ir_free(ir_ctx *ctx)
if (ctx->entries) { if (ctx->entries) {
ir_mem_free(ctx->entries); ir_mem_free(ctx->entries);
} }
if (ctx->osr_entry_loads) {
ir_list_free((ir_list*)ctx->osr_entry_loads);
ir_mem_free(ctx->osr_entry_loads);
}
} }
ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t addr) ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t addr)

1
ir.h
View File

@ -542,6 +542,7 @@ struct _ir_ctx {
uint32_t jmp_table_offset; uint32_t jmp_table_offset;
uint32_t entries_count; uint32_t entries_count;
ir_ref *entries; ir_ref *entries;
void *osr_entry_loads;
void *code_buffer; void *code_buffer;
size_t code_buffer_size; size_t code_buffer_size;
ir_strtab strtab; ir_strtab strtab;

View File

@ -200,8 +200,13 @@ static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = {
const char *ir_reg_name(int8_t reg, ir_type type) const char *ir_reg_name(int8_t reg, ir_type type)
{ {
if (reg == IR_REG_NUM) { if (reg >= IR_REG_NUM) {
return "SCRATCH"; if (reg == IR_REG_SCRATCH) {
return "SCRATCH";
} else {
IR_ASSERT(reg == IR_REG_ALL);
return "ALL";
}
} }
IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); IR_ASSERT(reg >= 0 && reg < IR_REG_NUM);
if (type == IR_VOID) { if (type == IR_VOID) {
@ -473,7 +478,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
case IR_CALL: case IR_CALL:
insn = &ctx->ir_base[ref]; insn = &ctx->ir_base[ref];
def_reg = (IR_IS_TYPE_INT(insn->type)) ? IR_REG_INT_RET1 : IR_REG_FP_RET1; def_reg = (IR_IS_TYPE_INT(insn->type)) ? IR_REG_INT_RET1 : IR_REG_FP_RET1;
constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_NUM, IR_USE_SUB_REF, IR_DEF_SUB_REF); constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF);
n = 1; n = 1;
IR_FALLTHROUGH; IR_FALLTHROUGH;
case IR_TAILCALL: case IR_TAILCALL:
@ -818,14 +823,12 @@ binop_fp:
return IR_RSTORE; return IR_RSTORE;
case IR_START: case IR_START:
case IR_BEGIN: case IR_BEGIN:
// case IR_END:
case IR_IF_TRUE: case IR_IF_TRUE:
case IR_IF_FALSE: case IR_IF_FALSE:
case IR_CASE_VAL: case IR_CASE_VAL:
case IR_CASE_DEFAULT: case IR_CASE_DEFAULT:
case IR_MERGE: case IR_MERGE:
case IR_LOOP_BEGIN: case IR_LOOP_BEGIN:
// case IR_LOOP_END:
case IR_UNREACHABLE: case IR_UNREACHABLE:
return IR_SKIP; return IR_SKIP;
case IR_RETURN: case IR_RETURN:
@ -3419,7 +3422,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]);
| .addr &addr | .addr &addr
bb->flags |= IR_BB_SKIP; bb->flags |= IR_BB_EMPTY;
continue; continue;
} }
} }
@ -4578,7 +4581,8 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 1, sizeof(ir_live_interval*)); /* vregs + tmp + fixed + SRATCH + ALL */
ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) {
@ -4683,7 +4687,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available);
IR_REGSET_EXCL(available, reg); IR_REGSET_EXCL(available, reg);
ctx->regs[i][constraints.tmp_regs[n].num] = reg; ctx->regs[i][constraints.tmp_regs[n].num] = reg;
} else if (constraints.tmp_regs[n].reg == IR_REG_NUM) { } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) {
available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH);
} else { } else {
IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg);
@ -4924,11 +4928,6 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
data.stack_frame_alignment = 0; data.stack_frame_alignment = 0;
} }
/* For all entries */
if (ctx->entries_count) {
ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref));
}
Dst = &data.dasm_state; Dst = &data.dasm_state;
data.dasm_state = NULL; data.dasm_state = NULL;
dasm_init(&data.dasm_state, DASM_MAXSECTION); dasm_init(&data.dasm_state, DASM_MAXSECTION);
@ -4946,12 +4945,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
if (bb->flags & IR_BB_SKIP) { if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
continue;
} else if (ctx->prev_ref[bb->end] == bb->start
&& bb->successors_count == 1
&& (ctx->ir_base[bb->end].op == IR_END || ctx->ir_base[bb->end].op == IR_LOOP_END)
&& !(bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_DESSA_MOVES))) {
continue; continue;
} }
|=>b: |=>b:
@ -5071,6 +5065,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
break; break;
case IR_END: case IR_END:
case IR_LOOP_END: case IR_LOOP_END:
if (bb->flags & IR_BB_OSR_ENTRY_LOADS) {
ir_emit_osr_entry_loads(ctx, b, bb);
}
if (bb->flags & IR_BB_DESSA_MOVES) { if (bb->flags & IR_BB_DESSA_MOVES) {
data.dessa_from_block = b; data.dessa_from_block = b;
ir_gen_dessa_moves(ctx, b, ir_emit_dessa_move); ir_gen_dessa_moves(ctx, b, ir_emit_dessa_move);

View File

@ -93,6 +93,8 @@ enum _ir_reg {
#define IR_REG_FP_FIRST IR_REG_V0 #define IR_REG_FP_FIRST IR_REG_V0
#define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1) #define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1)
#define IR_REG_FP_LAST (IR_REG_NUM - 1) #define IR_REG_FP_LAST (IR_REG_NUM - 1)
#define IR_REG_SCRATCH (IR_REG_NUM) /* special name for regset */
#define IR_REG_ALL (IR_REG_NUM + 1) /* special name for regset */
#define IR_REGSET_64BIT 1 #define IR_REGSET_64BIT 1

View File

@ -866,18 +866,7 @@ int ir_schedule_blocks(ir_ctx *ctx)
blocks.pos = 0; blocks.pos = 0;
list = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 1) * 2); list = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 1) * 2);
map = list + (ctx->cfg_blocks_count + 1); map = list + (ctx->cfg_blocks_count + 1);
for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { for (b = 1; b <= ctx->cfg_blocks_count; b++) {
if (ctx->prev_ref[bb->end] == bb->start
&& bb->successors_count == 1
&& (ctx->ir_base[bb->end].op == IR_END || ctx->ir_base[bb->end].op == IR_LOOP_END)
&& !(bb->flags & IR_BB_DESSA_MOVES)) {
bb->flags |= IR_BB_EMPTY;
if ((ctx->flags & IR_MERGE_EMPTY_ENTRIES) && (bb->flags & IR_BB_ENTRY)) {
if (ctx->cfg_edges[bb->successors] == b + 1) {
(bb + 1)->flags |= IR_BB_PREV_EMPTY_ENTRY;
}
}
}
ir_bitset_incl(blocks.set, b); ir_bitset_incl(blocks.set, b);
} }
@ -1012,6 +1001,22 @@ int ir_schedule_blocks(ir_ctx *ctx)
} }
ir_mem_free(ctx->cfg_blocks); ir_mem_free(ctx->cfg_blocks);
ctx->cfg_blocks = cfg_blocks; ctx->cfg_blocks = cfg_blocks;
if (ctx->osr_entry_loads) {
ir_list *list = (ir_list*)ctx->osr_entry_loads;
uint32_t pos = 0, count;
while (1) {
b = ir_list_at(list, pos);
if (b == 0) {
break;
}
ir_list_set(list, pos, map[b]);
pos++;
count = ir_list_at(list, pos);
pos += count + 1;
}
}
} }
ir_mem_free(list); ir_mem_free(list);
@ -1028,10 +1033,7 @@ uint32_t ir_skip_empty_target_blocks(ir_ctx *ctx, uint32_t b)
while (1) { while (1) {
bb = &ctx->cfg_blocks[b]; bb = &ctx->cfg_blocks[b];
if (ctx->prev_ref[bb->end] == bb->start if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
&& bb->successors_count == 1
&& (ctx->ir_base[bb->end].op == IR_END || ctx->ir_base[bb->end].op == IR_LOOP_END)
&& !(bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_DESSA_MOVES))) {
b = ctx->cfg_edges[bb->successors]; b = ctx->cfg_edges[bb->successors];
} else { } else {
break; break;
@ -1051,10 +1053,7 @@ uint32_t ir_skip_empty_next_blocks(ir_ctx *ctx, uint32_t b)
bb = &ctx->cfg_blocks[b]; bb = &ctx->cfg_blocks[b];
if (ctx->prev_ref[bb->end] == bb->start if ((bb->flags & (IR_BB_START|IR_BB_EMPTY)) == IR_BB_EMPTY) {
&& bb->successors_count == 1
&& (ctx->ir_base[bb->end].op == IR_END || ctx->ir_base[bb->end].op == IR_LOOP_END)
&& !(bb->flags & (IR_BB_START|/*IR_BB_ENTRY|*/IR_BB_DESSA_MOVES))) {
b++; b++;
} else { } else {
break; break;

View File

@ -250,6 +250,30 @@ void ir_dump_cfg(ir_ctx *ctx, FILE *f)
if (bb->loop_depth != 0) { if (bb->loop_depth != 0) {
fprintf(f, "\tloop_depth=%d\n", bb->loop_depth); fprintf(f, "\tloop_depth=%d\n", bb->loop_depth);
} }
if (bb->flags & IR_BB_OSR_ENTRY_LOADS) {
ir_list *list = (ir_list*)ctx->osr_entry_loads;
uint32_t pos = 0, i, count;
IR_ASSERT(list);
while (1) {
i = ir_list_at(list, pos);
if (b == i) {
break;
}
IR_ASSERT(i != 0); /* end marker */
pos++;
count = ir_list_at(list, pos);
pos += count + 1;
}
pos++;
count = ir_list_at(list, pos);
pos++;
for (i = 0; i < count; i++, pos++) {
ir_ref ref = ir_list_at(list, pos);
fprintf(f, "\tOSR_ENTRY_LOAD=d_%d\n", ref);
}
}
if (bb->flags & IR_BB_DESSA_MOVES) { if (bb->flags & IR_BB_DESSA_MOVES) {
ctx->data = f; ctx->data = f;
ir_gen_dessa_moves(ctx, b, ir_dump_dessa_move); ir_gen_dessa_moves(ctx, b, ir_dump_dessa_move);
@ -370,7 +394,7 @@ void ir_dump_live_ranges(ir_ctx *ctx, FILE *f)
} }
} }
#if 1 #if 1
n = ctx->vregs_count + ir_regs_number() + 1; n = ctx->vregs_count + ir_regs_number() + 2;
for (i = ctx->vregs_count + 1; i <= n; i++) { for (i = ctx->vregs_count + 1; i <= n; i++) {
ir_live_interval *ival = ctx->live_intervals[i]; ir_live_interval *ival = ctx->live_intervals[i];

102
ir_emit.c
View File

@ -311,6 +311,10 @@ static void *ir_jmp_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn)
# pragma GCC diagnostic pop # pragma GCC diagnostic pop
#endif #endif
/* Forward Declarations */
static void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb);
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
# include "ir_emit_x86.h" # include "ir_emit_x86.h"
#elif defined(IR_TARGET_AARCH64) #elif defined(IR_TARGET_AARCH64)
@ -319,31 +323,107 @@ static void *ir_jmp_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn)
# error "Unknown IR target" # error "Unknown IR target"
#endif #endif
static IR_NEVER_INLINE void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb)
{
ir_list *list = (ir_list*)ctx->osr_entry_loads;
int pos = 0, count, i;
ir_ref ref;
IR_ASSERT(ctx->binding);
IR_ASSERT(list);
while (1) {
i = ir_list_at(list, pos);
if (b == i) {
break;
}
IR_ASSERT(i != 0); /* end marker */
pos++;
count = ir_list_at(list, pos);
pos += count + 1;
}
pos++;
count = ir_list_at(list, pos);
pos++;
for (i = 0; i < count; i++, pos++) {
ref = ir_list_at(list, pos);
IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
if (ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos == -1) {
/* not spilled */
ir_reg reg = ctx->live_intervals[ctx->vregs[ref]]->reg;
ir_type type = ctx->ir_base[ref].type;
int32_t offset = -ir_binding_find(ctx, ref);
IR_ASSERT(offset > 0);
if (IR_IS_TYPE_INT(type)) {
ir_emit_load_mem_int(ctx, type, reg, ctx->spill_base, offset);
} else {
ir_emit_load_mem_fp(ctx, type, reg, ctx->spill_base, offset);
}
}
}
}
int ir_match(ir_ctx *ctx) int ir_match(ir_ctx *ctx)
{ {
uint32_t b; uint32_t b;
ir_ref i; ir_ref start, ref, *prev_ref;
ir_block *bb; ir_block *bb;
ir_insn *insn;
if (!ctx->prev_ref) {
ir_build_prev_refs(ctx);
}
ctx->rules = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t)); ctx->rules = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
prev_ref = ctx->prev_ref;
if (!prev_ref) {
ir_build_prev_refs(ctx);
prev_ref = ctx->prev_ref;
}
for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) { for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
start = bb->start;
if (bb->flags & IR_BB_ENTRY) { if (bb->flags & IR_BB_ENTRY) {
ir_insn *insn = &ctx->ir_base[bb->start]; insn = &ctx->ir_base[start];
IR_ASSERT(insn->op == IR_ENTRY); IR_ASSERT(insn->op == IR_ENTRY);
insn->op3 = ctx->entries_count++; insn->op3 = ctx->entries_count++;
} }
for (i = bb->end; i > bb->start; i = ctx->prev_ref[i]) { ctx->rules[start] = IR_SKIP;
if (!ctx->rules[i]) { ref = bb->end;
ctx->rules[i] = ir_match_insn(ctx, i, bb); insn = &ctx->ir_base[ref];
if (insn->op == IR_END || insn->op == IR_LOOP_END) {
ctx->rules[ref] = insn->op;
ref = prev_ref[ref];
if (ref == bb->start && bb->successors_count == 1) {
if (EXPECTED(!(bb->flags & IR_BB_ENTRY))) {
bb->flags |= IR_BB_EMPTY;
} else if (ctx->flags & IR_MERGE_EMPTY_ENTRIES) {
bb->flags |= IR_BB_EMPTY;
if (ctx->cfg_edges[bb->successors] == b + 1) {
(bb + 1)->flags |= IR_BB_PREV_EMPTY_ENTRY;
}
}
continue;
}
}
while (ref > start) {
if (!ctx->rules[ref]) {
ctx->rules[ref] = ir_match_insn(ctx, ref, bb);
}
ir_match_insn2(ctx, ref, bb);
ref = prev_ref[ref];
}
}
if (ctx->entries_count) {
ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref));
for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) {
if (bb->flags & IR_BB_ENTRY) {
ir_ref i = bb->start;
ir_insn *insn = ctx->ir_base + i;
ctx->entries[insn->op3] = b;
} }
ir_match_insn2(ctx, i, bb);
} }
ctx->rules[i] = IR_SKIP;
} }
return 1; return 1;

View File

@ -712,6 +712,12 @@ static int ir_emit_func(ir_ctx *ctx, FILE *f)
vars = ir_bitset_malloc(ctx->vregs_count + 1); vars = ir_bitset_malloc(ctx->vregs_count + 1);
for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
if (ctx->prev_ref[bb->end] == bb->start
&& bb->successors_count == 1
&& (ctx->ir_base[bb->end].op == IR_END || ctx->ir_base[bb->end].op == IR_LOOP_END)
&& !(bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_DESSA_MOVES))) {
bb->flags |= IR_BB_EMPTY;
}
for (i = bb->start, insn = ctx->ir_base + i; i <= bb->end;) { for (i = bb->start, insn = ctx->ir_base + i; i <= bb->end;) {
if (ctx->vregs[i]) { if (ctx->vregs[i]) {
if (!ir_bitset_in(vars, ctx->vregs[i])) { if (!ir_bitset_in(vars, ctx->vregs[i])) {
@ -750,10 +756,7 @@ static int ir_emit_func(ir_ctx *ctx, FILE *f)
for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
if (ctx->prev_ref[bb->end] == bb->start if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
&& bb->successors_count == 1
&& (ctx->ir_base[bb->end].op == IR_END || ctx->ir_base[bb->end].op == IR_LOOP_END)
&& !(bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_DESSA_MOVES))) {
continue; continue;
} }
if (bb->predecessors_count > 1 || (bb->predecessors_count == 1 && ctx->cfg_edges[bb->predecessors] != prev)) { if (bb->predecessors_count > 1 || (bb->predecessors_count == 1 && ctx->cfg_edges[bb->predecessors] != prev)) {

View File

@ -587,6 +587,12 @@ IR_ALWAYS_INLINE ir_ref ir_list_at(ir_list *l, uint32_t i)
return ir_array_at(&l->a, i); return ir_array_at(&l->a, i);
} }
IR_ALWAYS_INLINE void ir_list_set(ir_list *l, uint32_t i, ir_ref val)
{
IR_ASSERT(i < l->len);
ir_array_set_unchecked(&l->a, i, val);
}
/* Worklist (unique list) */ /* Worklist (unique list) */
typedef struct _ir_worklist { typedef struct _ir_worklist {
ir_list l; ir_list l;
@ -815,8 +821,8 @@ struct _ir_use_list {
#define IR_BB_IRREDUCIBLE_LOOP (1<<4) #define IR_BB_IRREDUCIBLE_LOOP (1<<4)
#define IR_BB_DESSA_MOVES (1<<5) /* translation out of SSA requires MOVEs */ #define IR_BB_DESSA_MOVES (1<<5) /* translation out of SSA requires MOVEs */
#define IR_BB_EMPTY (1<<6) #define IR_BB_EMPTY (1<<6)
#define IR_BB_SKIP (1<<7) /* skip during code generation */ #define IR_BB_PREV_EMPTY_ENTRY (1<<7)
#define IR_BB_PREV_EMPTY_ENTRY (1<<8) #define IR_BB_OSR_ENTRY_LOADS (1<<8) /* OSR Entry-point with register LOADs */
struct _ir_block { struct _ir_block {
uint32_t flags; uint32_t flags;

133
ir_ra.c
View File

@ -338,6 +338,65 @@ static void ir_add_phi_use(ir_ctx *ctx, int v, int op_num, ir_live_pos pos, ir_r
ir_add_use_pos(ctx, v, use_pos); ir_add_use_pos(ctx, v, use_pos);
} }
static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, ir_bitset live, uint32_t len, uint32_t b)
{
bool ok = 1;
int count = 0;
ir_list *list = (ir_list*)ctx->osr_entry_loads;
ir_ref i;
IR_BITSET_FOREACH(live, len, i) {
/* Skip live references from ENTRY to PARAM. TODO: duplicate PARAM in each ENTRY ??? */
ir_use_pos *use_pos = ctx->live_intervals[i]->use_pos;
ir_ref ref = (use_pos->flags & IR_PHI_USE) ? use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos);
if (use_pos->op_num) {
ir_ref *ops = ctx->ir_base[ref].ops;
ref = ops[use_pos->op_num];
}
if (ctx->ir_base[ref].op == IR_PARAM) {
continue;
}
if (ctx->binding) {
ir_ref var = ir_binding_find(ctx, ref);
if (var < 0) {
/* We may load the value at OSR entry-point */
if (!count) {
bb->flags &= ~IR_BB_EMPTY;
bb->flags |= IR_BB_OSR_ENTRY_LOADS;
if (!ctx->osr_entry_loads) {
list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list));
ir_list_init(list, 16);
}
ir_list_push(list, b);
ir_list_push(list, 0);
}
ir_list_push(list, ref);
count++;
continue;
}
}
fprintf(stderr, "ENTRY %d (block %i) - live var %d\n", ctx->ir_base[bb->start].op2, b, ref);
ok = 0;
} IR_BITSET_FOREACH_END();
if (!ok) {
IR_ASSERT(0);
}
if (count) {
ir_list_set(list, ir_list_len(ctx->osr_entry_loads) - (count + 1), count);
#if 0
/* ENTRY "clobbers" all registers */
ir_ref ref = ctx->ir_base[bb->start].op1;
ir_add_fixed_live_range(ctx, &unused, IR_REG_ALL,
IR_DEF_LIVE_POS_FROM_REF(ref),
IR_SAVE_LIVE_POS_FROM_REF(ref));
#endif
}
}
int ir_compute_live_ranges(ir_ctx *ctx) int ir_compute_live_ranges(ir_ctx *ctx)
{ {
uint32_t b, i, j, k, n, succ, *p; uint32_t b, i, j, k, n, succ, *p;
@ -364,7 +423,8 @@ int ir_compute_live_ranges(ir_ctx *ctx)
#endif #endif
len = ir_bitset_len(ctx->vregs_count + 1); len = ir_bitset_len(ctx->vregs_count + 1);
bb_live = ir_mem_malloc((ctx->cfg_blocks_count + 1) * len * sizeof(ir_bitset_base_t)); bb_live = ir_mem_malloc((ctx->cfg_blocks_count + 1) * len * sizeof(ir_bitset_base_t));
ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 1, sizeof(ir_live_interval*)); /* vregs + tmp + fixed + SRATCH + ALL */
ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
for (b = ctx->cfg_blocks_count; b > 0; b--) { for (b = ctx->cfg_blocks_count; b > 0; b--) {
bb = &ctx->cfg_blocks[b]; bb = &ctx->cfg_blocks[b];
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
@ -440,6 +500,7 @@ int ir_compute_live_ranges(ir_ctx *ctx)
} }
} }
/* for each opd in live */ /* for each opd in live */
IR_BITSET_FOREACH(live, len, i) { IR_BITSET_FOREACH(live, len, i) {
/* intervals[opd].addRange(b.from, b.to) */ /* intervals[opd].addRange(b.from, b.to) */
@ -650,8 +711,9 @@ int ir_compute_live_ranges(ir_ctx *ctx)
&& !ir_bitset_empty(live, len)) { && !ir_bitset_empty(live, len)) {
/* variables live at loop header are alive at the whole loop body */ /* variables live at loop header are alive at the whole loop body */
uint32_t bb_set_len = ir_bitset_len(ctx->cfg_blocks_count + 1); uint32_t bb_set_len = ir_bitset_len(ctx->cfg_blocks_count + 1);
int child; uint32_t child;
ir_block *child_bb; ir_block *child_bb;
ir_bitset child_live_in;
if (!loops) { if (!loops) {
loops = ir_bitset_malloc(ctx->cfg_blocks_count + 1); loops = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
@ -664,8 +726,10 @@ int ir_compute_live_ranges(ir_ctx *ctx)
child = b; child = b;
do { do {
child_bb = &ctx->cfg_blocks[child]; child_bb = &ctx->cfg_blocks[child];
child_live_in = bb_live + (len * child);
IR_BITSET_FOREACH(live, len, i) { IR_BITSET_FOREACH(live, len, i) {
ir_bitset_incl(child_live_in, i);
ir_add_live_range(ctx, &unused, i, IR_VOID, ir_add_live_range(ctx, &unused, i, IR_VOID,
IR_START_LIVE_POS_FROM_REF(child_bb->start), IR_START_LIVE_POS_FROM_REF(child_bb->start),
IR_END_LIVE_POS_FROM_REF(child_bb->end)); IR_END_LIVE_POS_FROM_REF(child_bb->end));
@ -682,7 +746,19 @@ int ir_compute_live_ranges(ir_ctx *ctx)
} }
child = child_bb->dom_next_child; child = child_bb->dom_next_child;
} }
} while ((child = ir_bitqueue_pop(&queue)) >= 0); } while ((child = ir_bitqueue_pop(&queue)) != (uint32_t)-1);
}
}
if (ctx->entries) {
for (i = 0; i < ctx->entries_count; i++) {
b = ctx->entries[i];
bb = &ctx->cfg_blocks[b];
live = bb_live + (len * b);
ir_add_osr_entry_loads(ctx, bb, live, len, b);
}
if (ctx->osr_entry_loads) {
ir_list_push((ir_list*)ctx->osr_entry_loads, 0);
} }
} }
@ -720,7 +796,8 @@ void ir_free_live_intervals(ir_live_interval **live_intervals, int count)
ir_live_interval *ival, *next; ir_live_interval *ival, *next;
ir_use_pos *use_pos; ir_use_pos *use_pos;
count += IR_REG_NUM + 1; /* vregs + tmp + fixed + SRATCH + ALL */
count += IR_REG_NUM + 2;
for (i = 0; i <= count; i++) { for (i = 0; i <= count; i++) {
ival = live_intervals[i]; ival = live_intervals[i];
while (ival) { while (ival) {
@ -861,6 +938,7 @@ static bool ir_try_coalesce(ir_ctx *ctx, ir_live_range **unused, ir_ref from, ir
static void ir_add_phi_move(ir_ctx *ctx, uint32_t b, ir_ref from, ir_ref to) static void ir_add_phi_move(ir_ctx *ctx, uint32_t b, ir_ref from, ir_ref to)
{ {
if (IR_IS_CONST_REF(from) || ctx->vregs[from] != ctx->vregs[to]) { if (IR_IS_CONST_REF(from) || ctx->vregs[from] != ctx->vregs[to]) {
ctx->cfg_blocks[b].flags &= ~IR_BB_EMPTY;
ctx->cfg_blocks[b].flags |= IR_BB_DESSA_MOVES; ctx->cfg_blocks[b].flags |= IR_BB_DESSA_MOVES;
ctx->flags |= IR_LR_HAVE_DESSA_MOVES; ctx->flags |= IR_LR_HAVE_DESSA_MOVES;
#if 0 #if 0
@ -1148,7 +1226,8 @@ int ir_coalesce(ir_ctx *ctx)
n--; n--;
if (n != ctx->vregs_count) { if (n != ctx->vregs_count) {
j = ctx->vregs_count - n; j = ctx->vregs_count - n;
for (i = n + 1; i <= n + IR_REG_NUM + 1; i++) { /* vregs + tmp + fixed + SRATCH + ALL */
for (i = n + 1; i <= n + IR_REG_NUM + 2; i++) {
ctx->live_intervals[i] = ctx->live_intervals[i + j]; ctx->live_intervals[i] = ctx->live_intervals[i + j];
if (ctx->live_intervals[i]) { if (ctx->live_intervals[i]) {
ctx->live_intervals[i]->vreg = i; ctx->live_intervals[i]->vreg = i;
@ -1191,6 +1270,7 @@ int ir_compute_dessa_moves(ir_ctx *ctx)
for (j = 2; j <= k; j++) { for (j = 2; j <= k; j++) {
if (IR_IS_CONST_REF(ir_insn_op(insn, j)) || ctx->vregs[ir_insn_op(insn, j)] != ctx->vregs[use]) { if (IR_IS_CONST_REF(ir_insn_op(insn, j)) || ctx->vregs[ir_insn_op(insn, j)] != ctx->vregs[use]) {
int pred = ctx->cfg_edges[bb->predecessors + (j-2)]; int pred = ctx->cfg_edges[bb->predecessors + (j-2)];
ctx->cfg_blocks[pred].flags &= ~IR_BB_EMPTY;
ctx->cfg_blocks[pred].flags |= IR_BB_DESSA_MOVES; ctx->cfg_blocks[pred].flags |= IR_BB_DESSA_MOVES;
ctx->flags |= IR_LR_HAVE_DESSA_MOVES; ctx->flags |= IR_LR_HAVE_DESSA_MOVES;
} }
@ -1794,9 +1874,15 @@ static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_l
/* freeUntilPos[it.reg] = 0 */ /* freeUntilPos[it.reg] = 0 */
reg = other->reg; reg = other->reg;
IR_ASSERT(reg >= 0); IR_ASSERT(reg >= 0);
if (reg == IR_REG_NUM) { if (reg >= IR_REG_SCRATCH) {
ir_regset regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); ir_regset regset;
if (reg == IR_REG_SCRATCH) {
regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
} else {
IR_ASSERT(reg == IR_REG_ALL);
regset = available;
}
IR_REGSET_FOREACH(regset, reg) { IR_REGSET_FOREACH(regset, reg) {
freeUntilPos[reg] = 0; freeUntilPos[reg] = 0;
} IR_REGSET_FOREACH_END(); } IR_REGSET_FOREACH_END();
@ -1816,11 +1902,17 @@ static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_l
/* freeUntilPos[it.reg] = next intersection of it with current */ /* freeUntilPos[it.reg] = next intersection of it with current */
reg = other->reg; reg = other->reg;
IR_ASSERT(reg >= 0); IR_ASSERT(reg >= 0);
if (reg == IR_REG_NUM) { if (reg >= IR_REG_SCRATCH) {
next = ir_ivals_overlap(&ival->range, other->current_range); next = ir_ivals_overlap(&ival->range, other->current_range);
if (next) { if (next) {
ir_regset regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); ir_regset regset;
if (reg == IR_REG_SCRATCH) {
regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
} else {
IR_ASSERT(reg == IR_REG_ALL);
regset = available;
}
IR_REGSET_FOREACH(regset, reg) { IR_REGSET_FOREACH(regset, reg) {
if (next < freeUntilPos[reg]) { if (next < freeUntilPos[reg]) {
freeUntilPos[reg] = next; freeUntilPos[reg] = next;
@ -1967,9 +2059,15 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
/* nextUsePos[it.reg] = next use of it after start of current */ /* nextUsePos[it.reg] = next use of it after start of current */
reg = other->reg; reg = other->reg;
IR_ASSERT(reg >= 0); IR_ASSERT(reg >= 0);
if (reg == IR_REG_NUM) { if (reg >= IR_REG_SCRATCH) {
ir_regset regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); ir_regset regset;
if (reg == IR_REG_SCRATCH) {
regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
} else {
IR_ASSERT(reg == IR_REG_ALL);
regset = available;
}
IR_REGSET_FOREACH(regset, reg) { IR_REGSET_FOREACH(regset, reg) {
blockPos[reg] = nextUsePos[reg] = 0; blockPos[reg] = nextUsePos[reg] = 0;
} IR_REGSET_FOREACH_END(); } IR_REGSET_FOREACH_END();
@ -1993,12 +2091,18 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
/* freeUntilPos[it.reg] = next intersection of it with current */ /* freeUntilPos[it.reg] = next intersection of it with current */
reg = other->reg; reg = other->reg;
IR_ASSERT(reg >= 0); IR_ASSERT(reg >= 0);
if (reg == IR_REG_NUM) { if (reg >= IR_REG_SCRATCH) {
ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range); ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
if (overlap) { if (overlap) {
ir_regset regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); ir_regset regset;
if (reg == IR_REG_SCRATCH) {
regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
} else {
IR_ASSERT(reg == IR_REG_ALL);
regset = available;
}
IR_REGSET_FOREACH(regset, reg) { IR_REGSET_FOREACH(regset, reg) {
if (overlap < nextUsePos[reg]) { if (overlap < nextUsePos[reg]) {
nextUsePos[reg] = overlap; nextUsePos[reg] = overlap;
@ -2399,7 +2503,8 @@ static int ir_linear_scan(ir_ctx *ctx)
ival = ival->next; ival = ival->next;
} }
for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_NUM + 1; j++) { /* vregs + tmp + fixed + SRATCH + ALL */
for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_NUM + 2; j++) {
ival = ctx->live_intervals[j]; ival = ctx->live_intervals[j];
if (ival) { if (ival) {
ival->current_range = &ival->range; ival->current_range = &ival->range;

View File

@ -418,8 +418,13 @@ static const int8_t *_ir_fp_fc_reg_params = NULL;
const char *ir_reg_name(int8_t reg, ir_type type) const char *ir_reg_name(int8_t reg, ir_type type)
{ {
if (reg == IR_REG_NUM) { if (reg >= IR_REG_NUM) {
return "SCRATCH"; if (reg == IR_REG_SCRATCH) {
return "SCRATCH";
} else {
IR_ASSERT(reg == IR_REG_ALL);
return "ALL";
}
} }
IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); IR_ASSERT(reg >= 0 && reg < IR_REG_NUM);
if (type == IR_VOID) { if (type == IR_VOID) {
@ -794,7 +799,7 @@ cmp_fp:
def_reg = IR_REG_FP_RET1; def_reg = IR_REG_FP_RET1;
#endif #endif
} }
constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_NUM, IR_USE_SUB_REF, IR_DEF_SUB_REF); constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF);
n = 1; n = 1;
IR_FALLTHROUGH; IR_FALLTHROUGH;
case IR_TAILCALL: case IR_TAILCALL:
@ -1590,14 +1595,12 @@ store_int:
return IR_RSTORE; return IR_RSTORE;
case IR_START: case IR_START:
case IR_BEGIN: case IR_BEGIN:
// case IR_END:
case IR_IF_TRUE: case IR_IF_TRUE:
case IR_IF_FALSE: case IR_IF_FALSE:
case IR_CASE_VAL: case IR_CASE_VAL:
case IR_CASE_DEFAULT: case IR_CASE_DEFAULT:
case IR_MERGE: case IR_MERGE:
case IR_LOOP_BEGIN: case IR_LOOP_BEGIN:
// case IR_LOOP_END:
case IR_UNREACHABLE: case IR_UNREACHABLE:
return IR_SKIP; return IR_SKIP;
case IR_RETURN: case IR_RETURN:
@ -5709,7 +5712,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]);
| .aword &addr | .aword &addr
bb->flags |= IR_BB_SKIP; bb->flags |= IR_BB_EMPTY;
continue; continue;
} }
} }
@ -7559,7 +7562,8 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 1, sizeof(ir_live_interval*)); /* vregs + tmp + fixed + SRATCH + ALL */
ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) {
@ -7665,7 +7669,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available);
IR_REGSET_EXCL(available, reg); IR_REGSET_EXCL(available, reg);
ctx->regs[i][constraints.tmp_regs[n].num] = reg; ctx->regs[i][constraints.tmp_regs[n].num] = reg;
} else if (constraints.tmp_regs[n].reg == IR_REG_NUM) { } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) {
available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH);
} else { } else {
IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg);
@ -7918,10 +7922,6 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
data.stack_frame_alignment = 0; data.stack_frame_alignment = 0;
} }
if (ctx->entries_count) {
ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref));
}
Dst = &data.dasm_state; Dst = &data.dasm_state;
data.dasm_state = NULL; data.dasm_state = NULL;
dasm_init(&data.dasm_state, DASM_MAXSECTION); dasm_init(&data.dasm_state, DASM_MAXSECTION);
@ -7947,12 +7947,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
if (bb->flags & IR_BB_SKIP) { if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
continue;
} else if (ctx->prev_ref[bb->end] == bb->start
&& bb->successors_count == 1
&& (ctx->ir_base[bb->end].op == IR_END || ctx->ir_base[bb->end].op == IR_LOOP_END)
&& !(bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_DESSA_MOVES))) {
continue; continue;
} }
|=>b: |=>b:
@ -8388,6 +8383,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
break; break;
case IR_END: case IR_END:
case IR_LOOP_END: case IR_LOOP_END:
if (bb->flags & IR_BB_OSR_ENTRY_LOADS) {
ir_emit_osr_entry_loads(ctx, b, bb);
}
if (bb->flags & IR_BB_DESSA_MOVES) { if (bb->flags & IR_BB_DESSA_MOVES) {
data.dessa_from_block = b; data.dessa_from_block = b;
ir_gen_dessa_moves(ctx, b, ir_emit_dessa_move); ir_gen_dessa_moves(ctx, b, ir_emit_dessa_move);

View File

@ -86,6 +86,8 @@ enum _ir_reg {
#define IR_REG_FP_FIRST IR_REG_XMM0 #define IR_REG_FP_FIRST IR_REG_XMM0
#define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1) #define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1)
#define IR_REG_FP_LAST (IR_REG_NUM - 1) #define IR_REG_FP_LAST (IR_REG_NUM - 1)
#define IR_REG_SCRATCH (IR_REG_NUM) /* special name for regset */
#define IR_REG_ALL (IR_REG_NUM + 1) /* special name for regset */
#define IR_REGSET_64BIT 0 #define IR_REGSET_64BIT 0