Introduce ir_bitqueue API

This commit is contained in:
Dmitry Stogov 2022-08-12 19:25:10 +03:00
parent 9ff5d74778
commit d55154d998
4 changed files with 106 additions and 74 deletions

View File

@ -526,40 +526,40 @@ next:
*/
int ir_schedule_blocks(ir_ctx *ctx)
{
uint32_t len = ir_bitset_len(ctx->cfg_blocks_count + 1);
ir_bitset blocks = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
ir_bitqueue blocks;
uint32_t b, *p, successor, best_successor, j;
ir_block *bb, *successor_bb, *best_successor_bb;
ir_insn *insn;
uint32_t *list, *map, pos;
uint32_t *list, *map;
uint32_t prob, best_successor_prob;
uint32_t count = 0;
bool reorder = 0;
ir_bitqueue_init(&blocks, ctx->cfg_blocks_count + 1);
blocks.pos = 0;
list = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 1) * 2);
map = list + (ctx->cfg_blocks_count + 1);
for (b = 1; b <= ctx->cfg_blocks_count; b++) {
ir_bitset_incl(blocks, b);
ir_bitset_incl(blocks.set, b);
}
pos = 0;
while ((b = ir_bitset_pop_first_ex(blocks, len, &pos)) != (uint32_t)-1) {
while ((b = ir_bitqueue_pop(&blocks)) != (uint32_t)-1) {
bb = &ctx->cfg_blocks[b];
do {
if (bb->predecessors_count == 2) {
uint32_t predecessor = ctx->cfg_edges[bb->predecessors];
if (!ir_bitset_in(blocks, predecessor)) {
if (!ir_bitqueue_in(&blocks, predecessor)) {
predecessor = ctx->cfg_edges[bb->predecessors + 1];
}
if (ir_bitset_in(blocks, predecessor)) {
if (ir_bitqueue_in(&blocks, predecessor)) {
ir_block *predecessor_bb = &ctx->cfg_blocks[predecessor];
if (predecessor_bb->successors_count == 1
&& predecessor_bb->predecessors_count == 1
&& predecessor_bb->end == predecessor_bb->start + 1
&& !(predecessor_bb->flags & IR_BB_DESSA_MOVES)) {
ir_bitset_excl(blocks, predecessor);
ir_bitqueue_del(&blocks, predecessor);
count++;
list[count] = predecessor;
map[predecessor] = count;
@ -581,7 +581,7 @@ int ir_schedule_blocks(ir_ctx *ctx)
best_successor_bb = NULL;
for (b = 0, p = &ctx->cfg_edges[bb->successors]; b < bb->successors_count; b++, p++) {
successor = *p;
if (ir_bitset_in(blocks, successor)) {
if (ir_bitqueue_in(&blocks, successor)) {
successor_bb = &ctx->cfg_blocks[successor];
insn = &ctx->ir_base[successor_bb->start];
if (insn->op == IR_IF_TRUE || insn->op == IR_IF_FALSE || insn->op == IR_CASE_DEFAULT) {
@ -617,12 +617,12 @@ int ir_schedule_blocks(ir_ctx *ctx)
if (predecessor_bb->successors_count == 2) {
b = ctx->cfg_edges[predecessor_bb->successors];
if (!ir_bitset_in(blocks, b)) {
if (!ir_bitqueue_in(&blocks, b)) {
b = ctx->cfg_edges[predecessor_bb->successors + 1];
}
if (ir_bitset_in(blocks, b)) {
if (ir_bitqueue_in(&blocks, b)) {
bb = &ctx->cfg_blocks[b];
ir_bitset_excl(blocks, b);
ir_bitqueue_del(&blocks, b);
continue;
}
}
@ -631,7 +631,7 @@ int ir_schedule_blocks(ir_ctx *ctx)
}
b = best_successor;
bb = best_successor_bb;
ir_bitset_excl(blocks, b);
ir_bitqueue_del(&blocks, b);
} while (1);
}
@ -663,7 +663,7 @@ int ir_schedule_blocks(ir_ctx *ctx)
}
ir_mem_free(list);
ir_mem_free(blocks);
ir_bitqueue_free(&blocks);
return 1;
}

View File

@ -361,33 +361,6 @@ IR_ALWAYS_INLINE int ir_bitset_pop_first(ir_bitset set, uint32_t len)
return -1; /* empty set */
}
IR_ALWAYS_INLINE int ir_bitset_pop_first_ex(ir_bitset set, uint32_t len, uint32_t *pos)
{
uint32_t i = *pos;
ir_bitset_base_t x;
do {
x = set[i];
if (x) {
int bit = IR_BITSET_BITS * i + ir_bitset_ntz(x);
set[i] = x & (x - 1);
*pos = i;
return bit;
}
i++;
} while (i < len);
*pos = len - 1;
return -1; /* empty set */
}
IR_ALWAYS_INLINE void ir_bitset_incl_ex(ir_bitset set, uint32_t n, uint32_t *pos)
{
uint32_t i = n / IR_BITSET_BITS;
set[i] |= IR_BITSET_ONE << (n % IR_BITSET_BITS);
if (i < *pos) {
*pos = i;
}
}
#define IR_BITSET_FOREACH(set, len, bit) do { \
ir_bitset _set = (set); \
uint32_t _i, _len = (len); \
@ -402,6 +375,68 @@ IR_ALWAYS_INLINE void ir_bitset_incl_ex(ir_bitset set, uint32_t n, uint32_t *pos
} \
} while (0)
/* Bit Queue */
typedef struct _ir_bitqueue {
uint32_t len;
uint32_t pos;
ir_bitset set;
} ir_bitqueue;
IR_ALWAYS_INLINE void ir_bitqueue_init(ir_bitqueue *q, uint32_t n)
{
q->len = ir_bitset_len(n);
q->pos = q->len - 1;
q->set = ir_bitset_malloc(n);
}
IR_ALWAYS_INLINE void ir_bitqueue_free(ir_bitqueue *q)
{
ir_mem_free(q->set);
}
IR_ALWAYS_INLINE void ir_bitqueue_clear(ir_bitqueue *q)
{
q->pos = q->len - 1;
ir_bitset_clear(q->set, q->len);
}
IR_ALWAYS_INLINE int ir_bitqueue_pop(ir_bitqueue *q)
{
uint32_t i = q->pos;
ir_bitset_base_t x;
do {
x = q->set[i];
if (x) {
int bit = IR_BITSET_BITS * i + ir_bitset_ntz(x);
q->set[i] = x & (x - 1);
q->pos = i;
return bit;
}
i++;
} while (i < q->len);
q->pos = q->len - 1;
return -1; /* empty set */
}
IR_ALWAYS_INLINE void ir_bitqueue_add(ir_bitqueue *q, uint32_t n)
{
uint32_t i = n / IR_BITSET_BITS;
q->set[i] |= IR_BITSET_ONE << (n % IR_BITSET_BITS);
if (i < q->pos) {
q->pos = i;
}
}
IR_ALWAYS_INLINE void ir_bitqueue_del(ir_bitqueue *q, uint32_t n)
{
ir_bitset_excl(q->set, n);
}
IR_ALWAYS_INLINE bool ir_bitqueue_in(ir_bitqueue *q, uint32_t n)
{
return ir_bitset_in(q->set, n);
}
/* Dynamic array of numeric references */
typedef struct _ir_array {
ir_ref *refs;

14
ir_ra.c
View File

@ -332,7 +332,7 @@ int ir_compute_live_ranges(ir_ctx *ctx)
ir_block *bb, *succ_bb;
ir_bitset visited, live;
ir_bitset loops = NULL;
ir_bitset queue = NULL;
ir_bitqueue queue;
ir_reg reg;
ir_live_range *unused = NULL;
@ -552,16 +552,14 @@ int ir_compute_live_ranges(ir_ctx *ctx)
uint32_t bb_set_len = ir_bitset_len(ctx->cfg_blocks_count + 1);
int child;
ir_block *child_bb;
uint32_t pos;
if (!loops) {
loops = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
queue = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
ir_bitqueue_init(&queue, ctx->cfg_blocks_count + 1);
} else {
ir_bitset_clear(loops, bb_set_len);
ir_bitset_clear(queue, bb_set_len);
ir_bitqueue_clear(&queue);
}
pos = bb_set_len - 1;
ir_bitset_incl(loops, b);
child = b;
do {
@ -577,14 +575,14 @@ int ir_compute_live_ranges(ir_ctx *ctx)
while (child) {
child_bb = &ctx->cfg_blocks[child];
if (child_bb->loop_header && ir_bitset_in(loops, child_bb->loop_header)) {
ir_bitset_incl_ex(queue, child, &pos);
ir_bitqueue_add(&queue, child);
if (child_bb->flags & IR_BB_LOOP_HEADER) {
ir_bitset_incl(loops, child);
}
}
child = child_bb->dom_next_child;
}
} while ((child = ir_bitset_pop_first_ex(queue, bb_set_len, &pos)) >= 0);
} while ((child = ir_bitqueue_pop(&queue)) >= 0);
}
/* b.liveIn = live */
@ -597,7 +595,7 @@ int ir_compute_live_ranges(ir_ctx *ctx)
if (loops) {
ir_mem_free(loops);
ir_mem_free(queue);
ir_bitqueue_free(&queue);
}
ir_mem_free(live);

View File

@ -371,7 +371,7 @@ static void ir_sccp_remove_unreachable_merge_inputs(ir_ctx *ctx, ir_insn *_value
}
}
static uint32_t ir_sccp_mark_reachable_data(ir_ctx *ctx, ir_bitset worklist, ir_insn *_values, ir_insn *insn, uint32_t pos)
static void ir_sccp_mark_reachable_data(ir_ctx *ctx, ir_bitqueue *worklist, ir_insn *_values, ir_insn *insn)
{
int j, n, use;
uint32_t flags = ir_op_flags[insn->op];
@ -380,13 +380,12 @@ static uint32_t ir_sccp_mark_reachable_data(ir_ctx *ctx, ir_bitset worklist, ir_
for (j = 1; j <= n; j++) {
if (IR_OPND_KIND(flags, j) == IR_OPND_DATA || IR_OPND_KIND(flags, j) == IR_OPND_VAR) {
use = insn->ops[j];
if (use > 0 && IR_IS_TOP(use) && !ir_bitset_in(worklist, use)) {
ir_bitset_incl_ex(worklist, use, &pos);
pos = ir_sccp_mark_reachable_data(ctx, worklist, _values, &ctx->ir_base[use], pos);
if (use > 0 && IR_IS_TOP(use) && !ir_bitqueue_in(worklist, use)) {
ir_bitqueue_add(worklist, use);
ir_sccp_mark_reachable_data(ctx, worklist, _values, &ctx->ir_base[use]);
}
}
}
return pos;
}
int ir_sccp(ir_ctx *ctx)
@ -394,22 +393,22 @@ int ir_sccp(ir_ctx *ctx)
ir_ref i, j, n, *p, use;
ir_use_list *use_list;
ir_insn *insn, *use_insn;
uint32_t flags, pos;
uint32_t len = ir_bitset_len(ctx->insns_count);
ir_bitset worklist = ir_bitset_malloc(ctx->insns_count);
uint32_t flags;
ir_bitqueue worklist;
ir_insn *_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn));
ctx->flags |= IR_OPT_IN_SCCP;
/* A bit modified SCCP algorith of M. N. Wegman and F. K. Zadeck */
ir_bitset_incl(worklist, 1);
ir_bitqueue_init(&worklist, ctx->insns_count);
worklist.pos = 0;
ir_bitset_incl(worklist.set, 1);
i = ctx->ir_base[1].op2;
while (i) {
ir_bitset_incl(worklist, i);
ir_bitset_incl(worklist.set, i);
i = ctx->ir_base[i].op2;
}
pos = 0;
while ((i = ir_bitset_pop_first_ex(worklist, len, &pos)) >= 0) {
while ((i = ir_bitqueue_pop(&worklist)) >= 0) {
insn = &ctx->ir_base[i];
flags = ir_op_flags[insn->op];
if (flags & IR_OP_FLAG_DATA) {
@ -476,7 +475,7 @@ int ir_sccp(ir_ctx *ctx)
IR_MAKE_BOTTOM(i);
}
if (!IR_IS_BOTTOM(use)) {
ir_bitset_incl_ex(worklist, use, &pos);
ir_bitqueue_add(&worklist, use);
}
break;
}
@ -506,7 +505,7 @@ int ir_sccp(ir_ctx *ctx)
IR_MAKE_BOTTOM(i);
}
if (!IR_IS_BOTTOM(use)) {
ir_bitset_incl_ex(worklist, use, &pos);
ir_bitqueue_add(&worklist, use);
}
default_case = IR_UNUSED;
break;
@ -524,7 +523,7 @@ int ir_sccp(ir_ctx *ctx)
IR_MAKE_BOTTOM(i);
}
if (!IR_IS_BOTTOM(default_case)) {
ir_bitset_incl_ex(worklist, default_case, &pos);
ir_bitqueue_add(&worklist, default_case);
}
}
if (!IR_IS_BOTTOM(i)) {
@ -571,13 +570,13 @@ int ir_sccp(ir_ctx *ctx)
for (j = 1; j <= n; j++) {
if (IR_OPND_KIND(flags, j) == IR_OPND_DATA || IR_OPND_KIND(flags, j) == IR_OPND_VAR) {
use = insn->ops[j];
if (use > 0 && IR_IS_TOP(use) && !ir_bitset_in(worklist, use)) {
ir_bitset_incl_ex(worklist, use, &pos);
pos = ir_sccp_mark_reachable_data(ctx, worklist, _values, &ctx->ir_base[use], pos);
if (use > 0 && IR_IS_TOP(use) && !ir_bitqueue_in(&worklist, use)) {
ir_bitqueue_add(&worklist, use);
ir_sccp_mark_reachable_data(ctx, &worklist, _values, &ctx->ir_base[use]);
}
}
}
// pos = ir_sccp_mark_reachable_data(ctx, worklist, _values, insn, pos);
// ir_sccp_mark_reachable_data(ctx, &worklist, _values, insn);
}
}
use_list = &ctx->use_lists[i];
@ -588,12 +587,12 @@ int ir_sccp(ir_ctx *ctx)
if ((ir_op_flags[insn->op] & IR_OP_FLAG_DATA)) {
if (insn->op != IR_PHI || IR_IS_REACHABLE(insn->op1)) {
if (!IR_IS_BOTTOM(use)) {
ir_bitset_incl_ex(worklist, use, &pos);
ir_bitqueue_add(&worklist, use);
}
}
} else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || IR_IS_REACHABLE(insn->op1)) {
if (!IR_IS_BOTTOM(use)) {
ir_bitset_incl_ex(worklist, use, &pos);
ir_bitqueue_add(&worklist, use);
}
}
}
@ -661,17 +660,17 @@ int ir_sccp(ir_ctx *ctx)
ir_sccp_remove_if(ctx, _values, i, _values[i].op1);
} else if (_values[i].op == IR_MERGE || _values[i].op == IR_LOOP_BEGIN) {
/* schedule merge to remove unreachable MERGE inputs */
ir_bitset_incl_ex(worklist, i, &pos);
ir_bitqueue_add(&worklist, i);
}
}
while ((i = ir_bitset_pop_first_ex(worklist, len, &pos)) >= 0) {
while ((i = ir_bitqueue_pop(&worklist)) >= 0) {
/* remove unreachable MERGE inputs */
ir_sccp_remove_unreachable_merge_inputs(ctx, _values, i, _values[i].op1);
}
ir_mem_free(_values);
ir_mem_free(worklist);
ir_bitqueue_free(&worklist);
ctx->flags &= ~IR_OPT_IN_SCCP;