Refactor the ENTRY nodes

Now all ENTRY nodes have a "fake" input control edge.
Through this edge all of them are dominated by START node.
This commit is contained in:
Dmitry Stogov 2023-03-17 09:02:37 +03:00
parent 09409898ea
commit f5b7065b10
12 changed files with 209 additions and 156 deletions

17
ir.c
View File

@ -163,7 +163,7 @@ void ir_print_const(ir_ctx *ctx, ir_insn *insn, FILE *f)
#define ir_op_flag_c1X2 (ir_op_flag_c | 1 | (3 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_c3 (ir_op_flag_c | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_S (IR_OP_FLAG_CONTROL|IR_OP_FLAG_BB_START)
#define ir_op_flag_S0X2 (ir_op_flag_S | 0 | (2 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_S0X1 (ir_op_flag_S | 0 | (1 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_S1 (ir_op_flag_S | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_S1X1 (ir_op_flag_S | 1 | (2 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_S2 (ir_op_flag_S | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT))
@ -200,7 +200,6 @@ void ir_print_const(ir_ctx *ctx, ir_insn *insn, FILE *f)
#define ir_op_kind_reg IR_OPND_CONTROL_DEP
#define ir_op_kind_beg IR_OPND_CONTROL_REF
#define ir_op_kind_ret IR_OPND_CONTROL_REF
#define ir_op_kind_ent IR_OPND_CONTROL_REF
#define ir_op_kind_str IR_OPND_STR
#define ir_op_kind_num IR_OPND_NUM
#define ir_op_kind_fld IR_OPND_STR
@ -324,6 +323,8 @@ void ir_init(ir_ctx *ctx, ir_ref consts_limit, ir_ref insns_limit)
ctx->prev_ref = NULL;
ctx->data = NULL;
ctx->snapshot_create = NULL;
ctx->entries_count = 0;
ctx->entries = NULL;
ctx->code_buffer = NULL;
ctx->code_buffer_size = 0;
@ -386,6 +387,9 @@ void ir_free(ir_ctx *ctx)
if (ctx->prev_ref) {
ir_mem_free(ctx->prev_ref);
}
if (ctx->entries) {
ir_mem_free(ctx->entries);
}
}
ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t addr)
@ -1506,11 +1510,14 @@ void _ir_START(ir_ctx *ctx)
ctx->control = ir_emit0(ctx, IR_START);
}
void _ir_ENTRY(ir_ctx *ctx, ir_ref num)
void _ir_ENTRY(ir_ctx *ctx, ir_ref src, ir_ref num)
{
IR_ASSERT(!ctx->control);
ctx->control = ir_emit2(ctx, IR_ENTRY, num, ctx->ir_base[1].op2);
ctx->ir_base[1].op2 = ctx->control;
/* fake control edge */
IR_ASSERT((ir_op_flags[ctx->ir_base[src].op] & IR_OP_FLAG_TERMINATOR)
|| ctx->ir_base[src].op == IR_END
|| ctx->ir_base[src].op == IR_LOOP_END); /* return from a recursive call */
ctx->control = ir_emit2(ctx, IR_ENTRY, src, num);
}
void _ir_BEGIN(ir_ctx *ctx, ir_ref src)

7
ir.h
View File

@ -167,7 +167,6 @@ typedef enum _ir_type {
* reg - data-control dependency on region (PHI, VAR, PARAM)
* beg - reference to a LOOP_BEGIN region (LOOP_END)
* ret - reference to a previous RETURN instruction (RETURN)
* ent - reference to a previous ENTRY instruction (ENTRY)
* str - string: variable/argument name (VAR, PARAM, CALL, TAILCALL)
* num - number: argument number (PARAM)
* prb - branch probability 1-99 (0 - unspecified): (IF_TRUE, IF_FALSE, CASE_VAL, CASE_DEFAULT)
@ -295,8 +294,8 @@ typedef enum _ir_type {
_(SNAPSHOT, xN, src, def, def) /* SNAPSHOT(src, args...) */ \
\
/* control-flow nodes */ \
_(START, S0X2, ret, ent, ___) /* function start */ \
_(ENTRY, S0X2, num, ent, ___) /* code entry (op3 keeps addr) */ \
_(START, S0X1, ret, ___, ___) /* function start */ \
_(ENTRY, S1X1, src, num, ___) /* entry with a fake src edge */ \
_(BEGIN, S1, src, ___, ___) /* block start */ \
_(IF_TRUE, S1X1, src, prb, ___) /* IF TRUE proj. */ \
_(IF_FALSE, S1X1, src, prb, ___) /* IF FALSE proj. */ \
@ -541,6 +540,8 @@ struct _ir_ctx {
ir_snapshot_create_t snapshot_create;
uint32_t rodata_offset;
uint32_t jmp_table_offset;
uint32_t entries_count;
ir_ref *entries;
void *code_buffer;
size_t code_buffer_size;
ir_strtab strtab;

View File

@ -4896,7 +4896,6 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
int ret;
void *entry;
size_t size;
uint32_t entries_count;
ctx->data = &data;
data.ra_data.stack_frame_size = 0;
@ -4926,12 +4925,8 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
}
/* For all entries */
entries_count = 0;
i = ctx->ir_base[1].op2;
while (i != IR_UNUSED) {
entries_count++;
ctx->ir_base[i].op3 = entries_count;
i = ctx->ir_base[i].op2;
if (ctx->entries_count) {
ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref));
}
Dst = &data.dasm_state;
@ -4940,7 +4935,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX);
dasm_setup(&data.dasm_state, dasm_actions);
/* labels for each block + for each constant + rodata label + jmp_table label + for each entry */
dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + entries_count);
dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count);
if (!(ctx->flags & IR_SKIP_PROLOGUE)) {
ir_emit_prologue(ctx);
@ -4964,10 +4959,11 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
i = bb->start;
insn = ctx->ir_base + i;
if (bb->flags & IR_BB_ENTRY) {
uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 3 + insn->op3;
uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3;
|=>label:
ir_emit_prologue(ctx);
ctx->entries[insn->op3] = i;
}
/* skip first instruction */
@ -5079,11 +5075,23 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
data.dessa_from_block = b;
ir_gen_dessa_moves(ctx, b, ir_emit_dessa_move);
}
IR_ASSERT(bb->successors_count == 1);
target = ir_skip_empty_target_blocks(ctx, ctx->cfg_edges[bb->successors]);
if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) {
| b =>target
}
do {
ir_ref succ = ctx->cfg_edges[bb->successors];
if (UNEXPECTED(bb->successors_count == 2)) {
if (ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) {
succ = ctx->cfg_edges[bb->successors + 1];
} else {
IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY);
}
} else {
IR_ASSERT(bb->successors_count == 1);
}
target = ir_skip_empty_target_blocks(ctx, succ);
if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) {
| b =>target
}
} while (0);
break;
case IR_RETURN_VOID:
ir_emit_return_void(ctx);
@ -5289,12 +5297,14 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
ctx->rodata_offset = 0;
}
/* For all entries */
i = ctx->ir_base[1].op2;
while (i != IR_UNUSED) {
uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 3 + ctx->ir_base[i].op3);
ctx->ir_base[i].op3 = offset;
i = ctx->ir_base[i].op2;
if (ctx->entries_count) {
/* For all entries */
i = ctx->entries_count;
do {
ir_insn *insn = &ctx->ir_base[ctx->entries[--i]];
uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3);
insn->op3 = offset;
} while (i != 0);
}
dasm_free(&data.dasm_state);

View File

@ -499,7 +499,7 @@
#define ir_TRAP() do {_ir_CTX->control = ir_emit1(_ir_CTX, IR_TRAP, _ir_CTX->control);} while (0)
#define ir_START() _ir_START(_ir_CTX)
#define ir_ENTRY(_num) _ir_ENTRY(_ir_CTX, (_num))
#define ir_ENTRY(_src, _num) _ir_ENTRY(_ir_CTX, (_src), (_num))
#define ir_BEGIN(_src) _ir_BEGIN(_ir_CTX, (_src))
#define ir_IF(_condition) _ir_IF(_ir_CTX, (_condition))
#define ir_IF_TRUE(_if) _ir_IF_TRUE(_ir_CTX, (_if))
@ -561,7 +561,7 @@ void _ir_RSTORE(ir_ctx *ctx, ir_ref reg, ir_ref val);
ir_ref _ir_LOAD(ir_ctx *ctx, ir_type type, ir_ref addr);
void _ir_STORE(ir_ctx *ctx, ir_ref addr, ir_ref val);
void _ir_START(ir_ctx *ctx);
void _ir_ENTRY(ir_ctx *ctx, ir_ref num);
void _ir_ENTRY(ir_ctx *ctx, ir_ref src, ir_ref num);
void _ir_BEGIN(ir_ctx *ctx, ir_ref src);
ir_ref _ir_END(ir_ctx *ctx);
ir_ref _ir_END_LIST(ir_ctx *ctx, ir_ref list);

156
ir_cfg.c
View File

@ -87,7 +87,7 @@ IR_ALWAYS_INLINE void _ir_add_predecessors(ir_insn *insn, ir_worklist *worklist)
IR_ASSERT(ref);
ir_worklist_push(worklist, ref);
}
} else if (insn->op != IR_START && insn->op != IR_ENTRY) {
} else if (insn->op != IR_START) {
if (EXPECTED(insn->op1)) {
ir_worklist_push(worklist, insn->op1);
}
@ -167,14 +167,7 @@ int ir_build_cfg(ir_ctx *ctx)
/* START node my be inaccessible from "stop" nodes */
ir_bitset_incl(bb_leaks, 1);
/* ENTRY nodes may be inaccessible from "stop" nodes */
ref = ctx->ir_base[1].op2;
while (ref) {
ir_bitset_incl(bb_leaks, ref);
ref = ctx->ir_base[ref].op2;
}
/* Add all not processed START, ENTRY and succcessor of IF and SWITCH */
/* Add not processed START and succcessor of IF and SWITCH */
IR_BITSET_FOREACH_DIFFERENCE(bb_leaks, bb_starts, len, start) {
ir_worklist_push(&worklist, start);
} IR_BITSET_FOREACH_END();
@ -259,10 +252,6 @@ next_successor:
bb->flags = IR_BB_START;
bb->predecessors_count = 0;
ir_worklist_push(&worklist, b);
} else if (insn->op == IR_ENTRY) {
bb->flags = IR_BB_ENTRY;
bb->predecessors_count = 0;
ir_worklist_push(&worklist, b);
} else {
bb->flags = IR_BB_UNREACHABLE; /* all blocks are marked as UNREACHABLE first */
if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) {
@ -271,10 +260,14 @@ next_successor:
edges_count += n;
count += n;
} else if (EXPECTED(insn->op1)) {
if (insn->op == IR_ENTRY) {
bb->flags |= IR_BB_ENTRY;
}
bb->predecessors_count = 1;
edges_count++;
count++;
} else {
IR_ASSERT(insn->op == IR_BEGIN); /* start of unreachable block */
bb->predecessors_count = 0;
}
}
@ -595,16 +588,6 @@ int ir_build_dominators_tree(ir_ctx *ctx)
postnum = 1;
compute_postnum(ctx, &postnum, 1);
if (ctx->ir_base[1].op2) {
for (b = 2, bb = &ctx->cfg_blocks[2]; b <= ctx->cfg_blocks_count; b++, bb++) {
if (bb->flags & IR_BB_ENTRY) {
compute_postnum(ctx, &postnum, b);
bb->idom = 1;
}
}
ctx->cfg_blocks[1].postnum = postnum;
}
/* Find immediate dominators */
blocks = ctx->cfg_blocks;
edges = ctx->cfg_edges;
@ -635,7 +618,7 @@ int ir_build_dominators_tree(ir_ctx *ctx)
uint32_t pred_b = *p;
ir_block *pred_bb = &blocks[pred_b];
if (pred_bb->idom > 0 && !(pred_bb->flags & IR_BB_ENTRY)) {
if (pred_bb->idom > 0) {
if (idom == 0) {
idom = pred_b;
} else if (idom != pred_b) {
@ -669,10 +652,7 @@ int ir_build_dominators_tree(ir_ctx *ctx)
/* Construct dominators tree */
for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
if (bb->flags & IR_BB_ENTRY) {
bb->idom = 0;
bb->dom_depth = 0;
} else if (bb->idom > 0) {
if (bb->idom > 0) {
ir_block *idom_bb = &blocks[bb->idom];
bb->dom_depth = idom_bb->dom_depth + 1;
@ -875,11 +855,10 @@ next:
int ir_schedule_blocks(ir_ctx *ctx)
{
ir_bitqueue blocks;
uint32_t b, *p, successor, best_successor, j, last_non_empty = 0;
ir_block *bb, *successor_bb, *best_successor_bb;
uint32_t b, best_successor, j, last_non_empty;
ir_block *bb, *best_successor_bb;
ir_insn *insn;
uint32_t *list, *map;
uint32_t prob, best_successor_prob;
uint32_t count = 0;
bool reorder = 0;
@ -893,6 +872,11 @@ int ir_schedule_blocks(ir_ctx *ctx)
&& (ctx->ir_base[bb->end].op == IR_END || ctx->ir_base[bb->end].op == IR_LOOP_END)
&& !(bb->flags & IR_BB_DESSA_MOVES)) {
bb->flags |= IR_BB_EMPTY;
if ((ctx->flags & IR_MERGE_EMPTY_ENTRIES) && (bb->flags & IR_BB_ENTRY)) {
if (ctx->cfg_edges[bb->successors] == b + 1) {
(bb + 1)->flags |= IR_BB_PREV_EMPTY_ENTRY;
}
}
}
ir_bitset_incl(blocks.set, b);
}
@ -900,27 +884,18 @@ int ir_schedule_blocks(ir_ctx *ctx)
while ((b = ir_bitqueue_pop(&blocks)) != (uint32_t)-1) {
bb = &ctx->cfg_blocks[b];
/* Start trace */
last_non_empty = 0;
do {
if (bb->predecessors_count > 1
&& (ctx->flags & IR_MERGE_EMPTY_ENTRIES)) {
/* Insert empty ENTRY blocks */
for (j = 0, p = &ctx->cfg_edges[bb->predecessors]; j < bb->predecessors_count; j++, p++) {
uint32_t predecessor = *p;
if (UNEXPECTED(bb->flags & IR_BB_PREV_EMPTY_ENTRY) && ir_bitqueue_in(&blocks, b - 1)) {
/* Schedule the previous empty ENTRY block before this one */
uint32_t predecessor = b - 1;
if (ir_bitqueue_in(&blocks, predecessor)
&& (ctx->cfg_blocks[predecessor].flags & IR_BB_ENTRY)
&& ctx->cfg_blocks[predecessor].end == ctx->cfg_blocks[predecessor].start + 1) {
ir_bitqueue_del(&blocks, predecessor);
count++;
list[count] = predecessor;
map[predecessor] = count;
if (predecessor != count) {
reorder = 1;
}
if (!(bb->flags & IR_BB_EMPTY)) {
last_non_empty = b;
}
}
ir_bitqueue_del(&blocks, predecessor);
count++;
list[count] = predecessor;
map[predecessor] = count;
if (predecessor != count) {
reorder = 1;
}
}
count++;
@ -933,36 +908,55 @@ int ir_schedule_blocks(ir_ctx *ctx)
last_non_empty = b;
}
best_successor_bb = NULL;
for (b = 0, p = &ctx->cfg_edges[bb->successors]; b < bb->successors_count; b++, p++) {
successor = *p;
if (ir_bitqueue_in(&blocks, successor)) {
successor_bb = &ctx->cfg_blocks[successor];
insn = &ctx->ir_base[successor_bb->start];
if (insn->op == IR_IF_TRUE || insn->op == IR_IF_FALSE || insn->op == IR_CASE_DEFAULT) {
prob = insn->op2;
} else if (insn->op == IR_CASE_VAL) {
prob = insn->op3;
} else {
prob = 0;
}
if (!best_successor_bb
|| successor_bb->loop_depth > best_successor_bb->loop_depth) {
// TODO: use block frequency
best_successor = successor;
best_successor_bb = successor_bb;
best_successor_prob = prob;
} else if ((best_successor_prob && prob
&& prob > best_successor_prob)
|| (!best_successor_prob && prob
&& prob > 100 / bb->successors_count)
|| (best_successor_prob && !prob
&& best_successor_prob < 100 / bb->successors_count)
|| (!best_successor_prob && !prob
&& (best_successor_bb->flags & IR_BB_EMPTY)
&& !(successor_bb->flags & IR_BB_EMPTY))) {
best_successor = successor;
best_successor_bb = successor_bb;
best_successor_prob = prob;
if (bb->successors_count == 1) {
best_successor = ctx->cfg_edges[bb->successors];
if (ir_bitqueue_in(&blocks, best_successor)) {
best_successor_bb = &ctx->cfg_blocks[best_successor];
}
} else if (bb->successors_count > 1) {
uint32_t prob, best_successor_prob;
uint32_t *p, successor;
ir_block *successor_bb;
for (b = 0, p = &ctx->cfg_edges[bb->successors]; b < bb->successors_count; b++, p++) {
successor = *p;
if (ir_bitqueue_in(&blocks, successor)) {
successor_bb = &ctx->cfg_blocks[successor];
insn = &ctx->ir_base[successor_bb->start];
if (insn->op == IR_IF_TRUE || insn->op == IR_IF_FALSE) {
prob = insn->op2;
if (!prob) {
prob = 100 / bb->successors_count;
if (!(successor_bb->flags & IR_BB_EMPTY)) {
prob++;
}
}
} else if (insn->op == IR_CASE_DEFAULT) {
prob = insn->op2;
if (!prob) {
prob = 100 / bb->successors_count;
}
} else if (insn->op == IR_CASE_VAL) {
prob = insn->op3;
if (!prob) {
prob = 100 / bb->successors_count;
}
} else if (insn->op == IR_ENTRY) {
if ((ctx->flags & IR_MERGE_EMPTY_ENTRIES) && (successor_bb->flags & IR_BB_EMPTY)) {
prob = 99; /* prefer empty ENTRY block to go first */
} else {
prob = 1;
}
} else {
prob = 100 / bb->successors_count;
}
if (!best_successor_bb
|| successor_bb->loop_depth > best_successor_bb->loop_depth
|| prob > best_successor_prob) {
best_successor = successor;
best_successor_bb = successor_bb;
best_successor_prob = prob;
}
}
}
}
@ -970,7 +964,7 @@ int ir_schedule_blocks(ir_ctx *ctx)
/* Try to continue trace using the other successor of the last IF */
if ((bb->flags & IR_BB_EMPTY) && last_non_empty) {
bb = &ctx->cfg_blocks[last_non_empty];
if (bb->successors_count == 2) {
if (bb->successors_count == 2 && ctx->ir_base[bb->end].op == IR_IF) {
b = ctx->cfg_edges[bb->successors];
if (!ir_bitqueue_in(&blocks, b)) {

View File

@ -283,12 +283,15 @@ bool ir_check(ir_ctx *ctx)
}
break;
case IR_UNREACHABLE:
case IR_IJMP:
case IR_RETURN:
if (use_list->count == 1) {
/* UNREACHABLE, IJMP and RETURN may be used in MERGE with the following ENTRY */
break;
/* UNREACHABLE and RETURN may be linked with the following ENTRY by a fake edge */
if (ctx->ir_base[ctx->use_edges[use_list->refs]].op == IR_ENTRY) {
break;
}
}
IR_FALLTHROUGH;
case IR_IJMP:
if (use_list->count != 0) {
fprintf(stderr, "ir_base[%d].op (%s) must not have successors (%d)\n",
i, ir_op_name[insn->op], use_list->count);
@ -309,6 +312,18 @@ bool ir_check(ir_ctx *ctx)
/* result of CALL may be used as data in control instruction */
break;
}
if ((insn->op == IR_LOOP_END || insn->op == IR_END) && count == 2) {
/* LOOP_END/END may be linked with the following ENTRY by a fake edge */
if (ctx->ir_base[ctx->use_edges[use_list->refs]].op == IR_ENTRY) {
count--;
}
if (ctx->ir_base[ctx->use_edges[use_list->refs + 1]].op == IR_ENTRY) {
count--;
}
if (count == 1) {
break;
}
}
fprintf(stderr, "ir_base[%d].op (%s) must have 1 succesor (%d)\n",
i, ir_op_name[insn->op], count);
ok = 0;

View File

@ -377,12 +377,12 @@ int ir_disasm(const char *name,
ir_hashtab_init(&labels, 32);
if (ctx) {
ir_ref entry;
entry = ctx->ir_base[1].op2;
while (entry != IR_UNUSED) {
ir_hashtab_add(&labels, ctx->ir_base[entry].op3, ctx->ir_base[entry].op1);
entry = ctx->ir_base[entry].op2;
if (ctx->entries_count) {
int i = ctx->entries_count;
do {
ir_insn *insn = &ctx->ir_base[ctx->entries[--i]];
ir_hashtab_add(&labels, insn->op3, insn->op2);
} while (i != 0);
}
rodata_offset = ctx->rodata_offset;

View File

@ -332,6 +332,11 @@ int ir_match(ir_ctx *ctx)
ctx->rules = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
if (bb->flags & IR_BB_ENTRY) {
ir_insn *insn = &ctx->ir_base[bb->start];
IR_ASSERT(insn->op == IR_ENTRY);
insn->op3 = ctx->entries_count++;
}
for (i = bb->end; i > bb->start; i = ctx->prev_ref[i]) {
if (!ctx->rules[i]) {
ctx->rules[i] = ir_match_insn(ctx, i, bb);

View File

@ -816,6 +816,7 @@ struct _ir_use_list {
#define IR_BB_DESSA_MOVES (1<<5) /* translation out of SSA requires MOVEs */
#define IR_BB_EMPTY (1<<6)
#define IR_BB_SKIP (1<<7) /* skip during code generation */
#define IR_BB_PREV_EMPTY_ENTRY (1<<8)
struct _ir_block {
uint32_t flags;

19
ir_ra.c
View File

@ -385,20 +385,27 @@ int ir_compute_live_ranges(ir_ctx *ctx)
#endif
if (n > 1) {
IR_ASSERT(succ > b);
/* live = union of successors.liveIn */
ir_bitset_copy(live, bb_live + (len * succ), len);
if (EXPECTED(succ > b) && EXPECTED((ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) == 0)) {
ir_bitset_copy(live, bb_live + (len * succ), len);
} else {
IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER));
ir_bitset_clear(live, len);
}
for (p++, n--; n > 0; p++, n--) {
succ = *p;
IR_ASSERT(succ > b);
ir_bitset_union(live, bb_live + (len * succ), len);
if (EXPECTED(succ > b) && EXPECTED((ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) == 0)) {
ir_bitset_union(live, bb_live + (len * succ), len);
} else {
IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER));
}
}
} else {
/* live = successor.liveIn */
if (EXPECTED(succ > b)) {
if (EXPECTED(succ > b) && EXPECTED((ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) == 0)) {
ir_bitset_copy(live, bb_live + (len * succ), len);
} else {
IR_ASSERT(ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER);
IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER));
ir_bitset_clear(live, len);
}

View File

@ -210,7 +210,7 @@ static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref)
insn = &ctx->ir_base[ref];
n = ir_input_edges_count(ctx, insn);
insn->optx = IR_NOP;
insn->opt = IR_NOP; /* keep "inputs_count" */
for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
*p = IR_UNUSED;
}
@ -525,11 +525,6 @@ int ir_sccp(ir_ctx *ctx)
ir_bitqueue_init(&worklist, ctx->insns_count);
worklist.pos = 0;
ir_bitset_incl(worklist.set, 1);
i = ctx->ir_base[1].op2;
while (i) {
ir_bitset_incl(worklist.set, i);
i = ctx->ir_base[i].op2;
}
while ((i = ir_bitqueue_pop(&worklist)) >= 0) {
insn = &ctx->ir_base[i];
flags = ir_op_flags[insn->op];
@ -625,7 +620,7 @@ int ir_sccp(ir_ctx *ctx)
continue;
}
} else {
IR_ASSERT(insn->op == IR_START || insn->op == IR_ENTRY || IR_IS_FEASIBLE(insn->op1));
IR_ASSERT(insn->op == IR_START || IR_IS_FEASIBLE(insn->op1));
IR_MAKE_BOTTOM(i);
}
} else {

View File

@ -6412,8 +6412,17 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint8_t op, v
uint32_t target;
if (!(bb->flags & IR_BB_DESSA_MOVES)) {
IR_ASSERT(bb->successors_count == 1);
target = ir_skip_empty_target_blocks(ctx, ctx->cfg_edges[bb->successors]);
target = ctx->cfg_edges[bb->successors];
if (UNEXPECTED(bb->successors_count == 2)) {
if (ctx->cfg_blocks[target].flags & IR_BB_ENTRY) {
target = ctx->cfg_edges[bb->successors + 1];
} else {
IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY);
}
} else {
IR_ASSERT(bb->successors_count == 1);
}
target = ir_skip_empty_target_blocks(ctx, target);
if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) {
if (int_cmp) {
switch (op) {
@ -7873,7 +7882,6 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
int ret;
void *entry;
size_t size;
uint32_t entries_count;
ctx->data = &data;
data.ra_data.stack_frame_size = 0;
@ -7910,13 +7918,8 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
data.stack_frame_alignment = 0;
}
/* For all entries */
entries_count = 0;
i = ctx->ir_base[1].op2;
while (i != IR_UNUSED) {
entries_count++;
ctx->ir_base[i].op3 = entries_count;
i = ctx->ir_base[i].op2;
if (ctx->entries_count) {
ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref));
}
Dst = &data.dasm_state;
@ -7925,7 +7928,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX);
dasm_setup(&data.dasm_state, dasm_actions);
/* labels for each block + for each constant + rodata label + jmp_table label + for each entry */
dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + entries_count);
dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count);
if ((ctx->flags & IR_GEN_ENDBR) && (ctx->flags & IR_START_BR_TARGET)) {
|.if X64
@ -7957,7 +7960,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
i = bb->start;
insn = ctx->ir_base + i;
if (bb->flags & IR_BB_ENTRY) {
uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 3 + insn->op3;
uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3;
|=>label:
if ((ctx->flags & IR_GEN_ENDBR) && (ctx->flags & IR_ENTRY_BR_TARGET)) {
@ -7968,6 +7971,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
|.endif
}
ir_emit_prologue(ctx);
ctx->entries[insn->op3] = i;
}
/* skip first instruction */
@ -8388,11 +8392,23 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
data.dessa_from_block = b;
ir_gen_dessa_moves(ctx, b, ir_emit_dessa_move);
}
IR_ASSERT(bb->successors_count == 1);
target = ir_skip_empty_target_blocks(ctx, ctx->cfg_edges[bb->successors]);
if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) {
| jmp =>target
}
do {
ir_ref succ = ctx->cfg_edges[bb->successors];
if (UNEXPECTED(bb->successors_count == 2)) {
if (ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) {
succ = ctx->cfg_edges[bb->successors + 1];
} else {
IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY);
}
} else {
IR_ASSERT(bb->successors_count == 1);
}
target = ir_skip_empty_target_blocks(ctx, succ);
if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) {
| jmp =>target
}
} while (0);
break;
case IR_RETURN_VOID:
ir_emit_return_void(ctx);
@ -8616,12 +8632,14 @@ next_block:;
ctx->rodata_offset = 0;
}
/* For all entries */
i = ctx->ir_base[1].op2;
while (i != IR_UNUSED) {
uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 3 + ctx->ir_base[i].op3);
ctx->ir_base[i].op3 = offset;
i = ctx->ir_base[i].op2;
if (ctx->entries_count) {
/* For all entries */
i = ctx->entries_count;
do {
ir_insn *insn = &ctx->ir_base[ctx->entries[--i]];
uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3);
insn->op3 = offset;
} while (i != 0);
}
dasm_free(&data.dasm_state);