Introduce ir_bitset_pop_first_ex() and ir_bitset_incl_ex() to avoid repatable checks of the first bitset elements.

This commit is contained in:
Dmitry Stogov 2022-08-12 18:01:15 +03:00
parent 3a0fb71282
commit 9ff5d74778
4 changed files with 51 additions and 19 deletions

View File

@ -531,7 +531,7 @@ int ir_schedule_blocks(ir_ctx *ctx)
uint32_t b, *p, successor, best_successor, j;
ir_block *bb, *successor_bb, *best_successor_bb;
ir_insn *insn;
uint32_t *list, *map;
uint32_t *list, *map, pos;
uint32_t prob, best_successor_prob;
uint32_t count = 0;
bool reorder = 0;
@ -542,7 +542,8 @@ int ir_schedule_blocks(ir_ctx *ctx)
ir_bitset_incl(blocks, b);
}
while ((b = ir_bitset_pop_first(blocks, len)) != (uint32_t)-1) {
pos = 0;
while ((b = ir_bitset_pop_first_ex(blocks, len, &pos)) != (uint32_t)-1) {
bb = &ctx->cfg_blocks[b];
do {
if (bb->predecessors_count == 2) {

View File

@ -361,6 +361,33 @@ IR_ALWAYS_INLINE int ir_bitset_pop_first(ir_bitset set, uint32_t len)
return -1; /* empty set */
}
IR_ALWAYS_INLINE int ir_bitset_pop_first_ex(ir_bitset set, uint32_t len, uint32_t *pos)
{
uint32_t i = *pos;
ir_bitset_base_t x;
do {
x = set[i];
if (x) {
int bit = IR_BITSET_BITS * i + ir_bitset_ntz(x);
set[i] = x & (x - 1);
*pos = i;
return bit;
}
i++;
} while (i < len);
*pos = len - 1;
return -1; /* empty set */
}
IR_ALWAYS_INLINE void ir_bitset_incl_ex(ir_bitset set, uint32_t n, uint32_t *pos)
{
uint32_t i = n / IR_BITSET_BITS;
set[i] |= IR_BITSET_ONE << (n % IR_BITSET_BITS);
if (i < *pos) {
*pos = i;
}
}
#define IR_BITSET_FOREACH(set, len, bit) do { \
ir_bitset _set = (set); \
uint32_t _i, _len = (len); \

View File

@ -552,6 +552,7 @@ int ir_compute_live_ranges(ir_ctx *ctx)
uint32_t bb_set_len = ir_bitset_len(ctx->cfg_blocks_count + 1);
int child;
ir_block *child_bb;
uint32_t pos;
if (!loops) {
loops = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
@ -560,6 +561,7 @@ int ir_compute_live_ranges(ir_ctx *ctx)
ir_bitset_clear(loops, bb_set_len);
ir_bitset_clear(queue, bb_set_len);
}
pos = bb_set_len - 1;
ir_bitset_incl(loops, b);
child = b;
do {
@ -575,14 +577,14 @@ int ir_compute_live_ranges(ir_ctx *ctx)
while (child) {
child_bb = &ctx->cfg_blocks[child];
if (child_bb->loop_header && ir_bitset_in(loops, child_bb->loop_header)) {
ir_bitset_incl(queue, child);
ir_bitset_incl_ex(queue, child, &pos);
if (child_bb->flags & IR_BB_LOOP_HEADER) {
ir_bitset_incl(loops, child);
}
}
child = child_bb->dom_next_child;
}
} while ((child = ir_bitset_pop_first(queue, bb_set_len)) >= 0);
} while ((child = ir_bitset_pop_first_ex(queue, bb_set_len, &pos)) >= 0);
}
/* b.liveIn = live */

View File

@ -371,7 +371,7 @@ static void ir_sccp_remove_unreachable_merge_inputs(ir_ctx *ctx, ir_insn *_value
}
}
static void ir_sccp_mark_reachable_data(ir_ctx *ctx, ir_bitset worklist, ir_insn *_values, ir_insn *insn)
static uint32_t ir_sccp_mark_reachable_data(ir_ctx *ctx, ir_bitset worklist, ir_insn *_values, ir_insn *insn, uint32_t pos)
{
int j, n, use;
uint32_t flags = ir_op_flags[insn->op];
@ -381,11 +381,12 @@ static void ir_sccp_mark_reachable_data(ir_ctx *ctx, ir_bitset worklist, ir_insn
if (IR_OPND_KIND(flags, j) == IR_OPND_DATA || IR_OPND_KIND(flags, j) == IR_OPND_VAR) {
use = insn->ops[j];
if (use > 0 && IR_IS_TOP(use) && !ir_bitset_in(worklist, use)) {
ir_bitset_incl(worklist, use);
ir_sccp_mark_reachable_data(ctx, worklist, _values, &ctx->ir_base[use]);
ir_bitset_incl_ex(worklist, use, &pos);
pos = ir_sccp_mark_reachable_data(ctx, worklist, _values, &ctx->ir_base[use], pos);
}
}
}
return pos;
}
int ir_sccp(ir_ctx *ctx)
@ -393,7 +394,7 @@ int ir_sccp(ir_ctx *ctx)
ir_ref i, j, n, *p, use;
ir_use_list *use_list;
ir_insn *insn, *use_insn;
uint32_t flags;
uint32_t flags, pos;
uint32_t len = ir_bitset_len(ctx->insns_count);
ir_bitset worklist = ir_bitset_malloc(ctx->insns_count);
ir_insn *_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn));
@ -407,7 +408,8 @@ int ir_sccp(ir_ctx *ctx)
ir_bitset_incl(worklist, i);
i = ctx->ir_base[i].op2;
}
while ((i = ir_bitset_pop_first(worklist, len)) >= 0) {
pos = 0;
while ((i = ir_bitset_pop_first_ex(worklist, len, &pos)) >= 0) {
insn = &ctx->ir_base[i];
flags = ir_op_flags[insn->op];
if (flags & IR_OP_FLAG_DATA) {
@ -474,7 +476,7 @@ int ir_sccp(ir_ctx *ctx)
IR_MAKE_BOTTOM(i);
}
if (!IR_IS_BOTTOM(use)) {
ir_bitset_incl(worklist, use);
ir_bitset_incl_ex(worklist, use, &pos);
}
break;
}
@ -504,7 +506,7 @@ int ir_sccp(ir_ctx *ctx)
IR_MAKE_BOTTOM(i);
}
if (!IR_IS_BOTTOM(use)) {
ir_bitset_incl(worklist, use);
ir_bitset_incl_ex(worklist, use, &pos);
}
default_case = IR_UNUSED;
break;
@ -522,7 +524,7 @@ int ir_sccp(ir_ctx *ctx)
IR_MAKE_BOTTOM(i);
}
if (!IR_IS_BOTTOM(default_case)) {
ir_bitset_incl(worklist, default_case);
ir_bitset_incl_ex(worklist, default_case, &pos);
}
}
if (!IR_IS_BOTTOM(i)) {
@ -570,12 +572,12 @@ int ir_sccp(ir_ctx *ctx)
if (IR_OPND_KIND(flags, j) == IR_OPND_DATA || IR_OPND_KIND(flags, j) == IR_OPND_VAR) {
use = insn->ops[j];
if (use > 0 && IR_IS_TOP(use) && !ir_bitset_in(worklist, use)) {
ir_bitset_incl(worklist, use);
ir_sccp_mark_reachable_data(ctx, worklist, _values, &ctx->ir_base[use]);
ir_bitset_incl_ex(worklist, use, &pos);
pos = ir_sccp_mark_reachable_data(ctx, worklist, _values, &ctx->ir_base[use], pos);
}
}
}
// ir_sccp_mark_reachable_data(ctx, worklist, _values, insn);
// pos = ir_sccp_mark_reachable_data(ctx, worklist, _values, insn, pos);
}
}
use_list = &ctx->use_lists[i];
@ -586,12 +588,12 @@ int ir_sccp(ir_ctx *ctx)
if ((ir_op_flags[insn->op] & IR_OP_FLAG_DATA)) {
if (insn->op != IR_PHI || IR_IS_REACHABLE(insn->op1)) {
if (!IR_IS_BOTTOM(use)) {
ir_bitset_incl(worklist, use);
ir_bitset_incl_ex(worklist, use, &pos);
}
}
} else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || IR_IS_REACHABLE(insn->op1)) {
if (!IR_IS_BOTTOM(use)) {
ir_bitset_incl(worklist, use);
ir_bitset_incl_ex(worklist, use, &pos);
}
}
}
@ -659,11 +661,11 @@ int ir_sccp(ir_ctx *ctx)
ir_sccp_remove_if(ctx, _values, i, _values[i].op1);
} else if (_values[i].op == IR_MERGE || _values[i].op == IR_LOOP_BEGIN) {
/* schedule merge to remove unreachable MERGE inputs */
ir_bitset_incl(worklist, i);
ir_bitset_incl_ex(worklist, i, &pos);
}
}
while ((i = ir_bitset_pop_first(worklist, len)) >= 0) {
while ((i = ir_bitset_pop_first_ex(worklist, len, &pos)) >= 0) {
/* remove unreachable MERGE inputs */
ir_sccp_remove_unreachable_merge_inputs(ctx, _values, i, _values[i].op1);
}