diff --git a/ir_cfg.c b/ir_cfg.c index 755e8a7..5150a12 100644 --- a/ir_cfg.c +++ b/ir_cfg.c @@ -526,40 +526,40 @@ next: */ int ir_schedule_blocks(ir_ctx *ctx) { - uint32_t len = ir_bitset_len(ctx->cfg_blocks_count + 1); - ir_bitset blocks = ir_bitset_malloc(ctx->cfg_blocks_count + 1); + ir_bitqueue blocks; uint32_t b, *p, successor, best_successor, j; ir_block *bb, *successor_bb, *best_successor_bb; ir_insn *insn; - uint32_t *list, *map, pos; + uint32_t *list, *map; uint32_t prob, best_successor_prob; uint32_t count = 0; bool reorder = 0; + ir_bitqueue_init(&blocks, ctx->cfg_blocks_count + 1); + blocks.pos = 0; list = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 1) * 2); map = list + (ctx->cfg_blocks_count + 1); for (b = 1; b <= ctx->cfg_blocks_count; b++) { - ir_bitset_incl(blocks, b); + ir_bitset_incl(blocks.set, b); } - pos = 0; - while ((b = ir_bitset_pop_first_ex(blocks, len, &pos)) != (uint32_t)-1) { + while ((b = ir_bitqueue_pop(&blocks)) != (uint32_t)-1) { bb = &ctx->cfg_blocks[b]; do { if (bb->predecessors_count == 2) { uint32_t predecessor = ctx->cfg_edges[bb->predecessors]; - if (!ir_bitset_in(blocks, predecessor)) { + if (!ir_bitqueue_in(&blocks, predecessor)) { predecessor = ctx->cfg_edges[bb->predecessors + 1]; } - if (ir_bitset_in(blocks, predecessor)) { + if (ir_bitqueue_in(&blocks, predecessor)) { ir_block *predecessor_bb = &ctx->cfg_blocks[predecessor]; if (predecessor_bb->successors_count == 1 && predecessor_bb->predecessors_count == 1 && predecessor_bb->end == predecessor_bb->start + 1 && !(predecessor_bb->flags & IR_BB_DESSA_MOVES)) { - ir_bitset_excl(blocks, predecessor); + ir_bitqueue_del(&blocks, predecessor); count++; list[count] = predecessor; map[predecessor] = count; @@ -581,7 +581,7 @@ int ir_schedule_blocks(ir_ctx *ctx) best_successor_bb = NULL; for (b = 0, p = &ctx->cfg_edges[bb->successors]; b < bb->successors_count; b++, p++) { successor = *p; - if (ir_bitset_in(blocks, successor)) { + if (ir_bitqueue_in(&blocks, successor)) { successor_bb = &ctx->cfg_blocks[successor]; insn = &ctx->ir_base[successor_bb->start]; if (insn->op == IR_IF_TRUE || insn->op == IR_IF_FALSE || insn->op == IR_CASE_DEFAULT) { @@ -617,12 +617,12 @@ int ir_schedule_blocks(ir_ctx *ctx) if (predecessor_bb->successors_count == 2) { b = ctx->cfg_edges[predecessor_bb->successors]; - if (!ir_bitset_in(blocks, b)) { + if (!ir_bitqueue_in(&blocks, b)) { b = ctx->cfg_edges[predecessor_bb->successors + 1]; } - if (ir_bitset_in(blocks, b)) { + if (ir_bitqueue_in(&blocks, b)) { bb = &ctx->cfg_blocks[b]; - ir_bitset_excl(blocks, b); + ir_bitqueue_del(&blocks, b); continue; } } @@ -631,7 +631,7 @@ int ir_schedule_blocks(ir_ctx *ctx) } b = best_successor; bb = best_successor_bb; - ir_bitset_excl(blocks, b); + ir_bitqueue_del(&blocks, b); } while (1); } @@ -663,7 +663,7 @@ int ir_schedule_blocks(ir_ctx *ctx) } ir_mem_free(list); - ir_mem_free(blocks); + ir_bitqueue_free(&blocks); return 1; } diff --git a/ir_private.h b/ir_private.h index 2de70ad..e413c3d 100644 --- a/ir_private.h +++ b/ir_private.h @@ -361,33 +361,6 @@ IR_ALWAYS_INLINE int ir_bitset_pop_first(ir_bitset set, uint32_t len) return -1; /* empty set */ } -IR_ALWAYS_INLINE int ir_bitset_pop_first_ex(ir_bitset set, uint32_t len, uint32_t *pos) -{ - uint32_t i = *pos; - ir_bitset_base_t x; - do { - x = set[i]; - if (x) { - int bit = IR_BITSET_BITS * i + ir_bitset_ntz(x); - set[i] = x & (x - 1); - *pos = i; - return bit; - } - i++; - } while (i < len); - *pos = len - 1; - return -1; /* empty set */ -} - -IR_ALWAYS_INLINE void ir_bitset_incl_ex(ir_bitset set, uint32_t n, uint32_t *pos) -{ - uint32_t i = n / IR_BITSET_BITS; - set[i] |= IR_BITSET_ONE << (n % IR_BITSET_BITS); - if (i < *pos) { - *pos = i; - } -} - #define IR_BITSET_FOREACH(set, len, bit) do { \ ir_bitset _set = (set); \ uint32_t _i, _len = (len); \ @@ -402,6 +375,68 @@ IR_ALWAYS_INLINE void ir_bitset_incl_ex(ir_bitset set, uint32_t n, uint32_t *pos } \ } while (0) +/* Bit Queue */ +typedef struct _ir_bitqueue { + uint32_t len; + uint32_t pos; + ir_bitset set; +} ir_bitqueue; + +IR_ALWAYS_INLINE void ir_bitqueue_init(ir_bitqueue *q, uint32_t n) +{ + q->len = ir_bitset_len(n); + q->pos = q->len - 1; + q->set = ir_bitset_malloc(n); +} + +IR_ALWAYS_INLINE void ir_bitqueue_free(ir_bitqueue *q) +{ + ir_mem_free(q->set); +} + +IR_ALWAYS_INLINE void ir_bitqueue_clear(ir_bitqueue *q) +{ + q->pos = q->len - 1; + ir_bitset_clear(q->set, q->len); +} + +IR_ALWAYS_INLINE int ir_bitqueue_pop(ir_bitqueue *q) +{ + uint32_t i = q->pos; + ir_bitset_base_t x; + do { + x = q->set[i]; + if (x) { + int bit = IR_BITSET_BITS * i + ir_bitset_ntz(x); + q->set[i] = x & (x - 1); + q->pos = i; + return bit; + } + i++; + } while (i < q->len); + q->pos = q->len - 1; + return -1; /* empty set */ +} + +IR_ALWAYS_INLINE void ir_bitqueue_add(ir_bitqueue *q, uint32_t n) +{ + uint32_t i = n / IR_BITSET_BITS; + q->set[i] |= IR_BITSET_ONE << (n % IR_BITSET_BITS); + if (i < q->pos) { + q->pos = i; + } +} + +IR_ALWAYS_INLINE void ir_bitqueue_del(ir_bitqueue *q, uint32_t n) +{ + ir_bitset_excl(q->set, n); +} + +IR_ALWAYS_INLINE bool ir_bitqueue_in(ir_bitqueue *q, uint32_t n) +{ + return ir_bitset_in(q->set, n); +} + /* Dynamic array of numeric references */ typedef struct _ir_array { ir_ref *refs; diff --git a/ir_ra.c b/ir_ra.c index 1059a90..add6246 100644 --- a/ir_ra.c +++ b/ir_ra.c @@ -332,7 +332,7 @@ int ir_compute_live_ranges(ir_ctx *ctx) ir_block *bb, *succ_bb; ir_bitset visited, live; ir_bitset loops = NULL; - ir_bitset queue = NULL; + ir_bitqueue queue; ir_reg reg; ir_live_range *unused = NULL; @@ -552,16 +552,14 @@ int ir_compute_live_ranges(ir_ctx *ctx) uint32_t bb_set_len = ir_bitset_len(ctx->cfg_blocks_count + 1); int child; ir_block *child_bb; - uint32_t pos; if (!loops) { loops = ir_bitset_malloc(ctx->cfg_blocks_count + 1); - queue = ir_bitset_malloc(ctx->cfg_blocks_count + 1); + ir_bitqueue_init(&queue, ctx->cfg_blocks_count + 1); } else { ir_bitset_clear(loops, bb_set_len); - ir_bitset_clear(queue, bb_set_len); + ir_bitqueue_clear(&queue); } - pos = bb_set_len - 1; ir_bitset_incl(loops, b); child = b; do { @@ -577,14 +575,14 @@ int ir_compute_live_ranges(ir_ctx *ctx) while (child) { child_bb = &ctx->cfg_blocks[child]; if (child_bb->loop_header && ir_bitset_in(loops, child_bb->loop_header)) { - ir_bitset_incl_ex(queue, child, &pos); + ir_bitqueue_add(&queue, child); if (child_bb->flags & IR_BB_LOOP_HEADER) { ir_bitset_incl(loops, child); } } child = child_bb->dom_next_child; } - } while ((child = ir_bitset_pop_first_ex(queue, bb_set_len, &pos)) >= 0); + } while ((child = ir_bitqueue_pop(&queue)) >= 0); } /* b.liveIn = live */ @@ -597,7 +595,7 @@ int ir_compute_live_ranges(ir_ctx *ctx) if (loops) { ir_mem_free(loops); - ir_mem_free(queue); + ir_bitqueue_free(&queue); } ir_mem_free(live); diff --git a/ir_sccp.c b/ir_sccp.c index 5c82d3d..bb86f8b 100644 --- a/ir_sccp.c +++ b/ir_sccp.c @@ -371,7 +371,7 @@ static void ir_sccp_remove_unreachable_merge_inputs(ir_ctx *ctx, ir_insn *_value } } -static uint32_t ir_sccp_mark_reachable_data(ir_ctx *ctx, ir_bitset worklist, ir_insn *_values, ir_insn *insn, uint32_t pos) +static void ir_sccp_mark_reachable_data(ir_ctx *ctx, ir_bitqueue *worklist, ir_insn *_values, ir_insn *insn) { int j, n, use; uint32_t flags = ir_op_flags[insn->op]; @@ -380,13 +380,12 @@ static uint32_t ir_sccp_mark_reachable_data(ir_ctx *ctx, ir_bitset worklist, ir_ for (j = 1; j <= n; j++) { if (IR_OPND_KIND(flags, j) == IR_OPND_DATA || IR_OPND_KIND(flags, j) == IR_OPND_VAR) { use = insn->ops[j]; - if (use > 0 && IR_IS_TOP(use) && !ir_bitset_in(worklist, use)) { - ir_bitset_incl_ex(worklist, use, &pos); - pos = ir_sccp_mark_reachable_data(ctx, worklist, _values, &ctx->ir_base[use], pos); + if (use > 0 && IR_IS_TOP(use) && !ir_bitqueue_in(worklist, use)) { + ir_bitqueue_add(worklist, use); + ir_sccp_mark_reachable_data(ctx, worklist, _values, &ctx->ir_base[use]); } } } - return pos; } int ir_sccp(ir_ctx *ctx) @@ -394,22 +393,22 @@ int ir_sccp(ir_ctx *ctx) ir_ref i, j, n, *p, use; ir_use_list *use_list; ir_insn *insn, *use_insn; - uint32_t flags, pos; - uint32_t len = ir_bitset_len(ctx->insns_count); - ir_bitset worklist = ir_bitset_malloc(ctx->insns_count); + uint32_t flags; + ir_bitqueue worklist; ir_insn *_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn)); ctx->flags |= IR_OPT_IN_SCCP; /* A bit modified SCCP algorith of M. N. Wegman and F. K. Zadeck */ - ir_bitset_incl(worklist, 1); + ir_bitqueue_init(&worklist, ctx->insns_count); + worklist.pos = 0; + ir_bitset_incl(worklist.set, 1); i = ctx->ir_base[1].op2; while (i) { - ir_bitset_incl(worklist, i); + ir_bitset_incl(worklist.set, i); i = ctx->ir_base[i].op2; } - pos = 0; - while ((i = ir_bitset_pop_first_ex(worklist, len, &pos)) >= 0) { + while ((i = ir_bitqueue_pop(&worklist)) >= 0) { insn = &ctx->ir_base[i]; flags = ir_op_flags[insn->op]; if (flags & IR_OP_FLAG_DATA) { @@ -476,7 +475,7 @@ int ir_sccp(ir_ctx *ctx) IR_MAKE_BOTTOM(i); } if (!IR_IS_BOTTOM(use)) { - ir_bitset_incl_ex(worklist, use, &pos); + ir_bitqueue_add(&worklist, use); } break; } @@ -506,7 +505,7 @@ int ir_sccp(ir_ctx *ctx) IR_MAKE_BOTTOM(i); } if (!IR_IS_BOTTOM(use)) { - ir_bitset_incl_ex(worklist, use, &pos); + ir_bitqueue_add(&worklist, use); } default_case = IR_UNUSED; break; @@ -524,7 +523,7 @@ int ir_sccp(ir_ctx *ctx) IR_MAKE_BOTTOM(i); } if (!IR_IS_BOTTOM(default_case)) { - ir_bitset_incl_ex(worklist, default_case, &pos); + ir_bitqueue_add(&worklist, default_case); } } if (!IR_IS_BOTTOM(i)) { @@ -571,13 +570,13 @@ int ir_sccp(ir_ctx *ctx) for (j = 1; j <= n; j++) { if (IR_OPND_KIND(flags, j) == IR_OPND_DATA || IR_OPND_KIND(flags, j) == IR_OPND_VAR) { use = insn->ops[j]; - if (use > 0 && IR_IS_TOP(use) && !ir_bitset_in(worklist, use)) { - ir_bitset_incl_ex(worklist, use, &pos); - pos = ir_sccp_mark_reachable_data(ctx, worklist, _values, &ctx->ir_base[use], pos); + if (use > 0 && IR_IS_TOP(use) && !ir_bitqueue_in(&worklist, use)) { + ir_bitqueue_add(&worklist, use); + ir_sccp_mark_reachable_data(ctx, &worklist, _values, &ctx->ir_base[use]); } } } -// pos = ir_sccp_mark_reachable_data(ctx, worklist, _values, insn, pos); +// ir_sccp_mark_reachable_data(ctx, &worklist, _values, insn); } } use_list = &ctx->use_lists[i]; @@ -588,12 +587,12 @@ int ir_sccp(ir_ctx *ctx) if ((ir_op_flags[insn->op] & IR_OP_FLAG_DATA)) { if (insn->op != IR_PHI || IR_IS_REACHABLE(insn->op1)) { if (!IR_IS_BOTTOM(use)) { - ir_bitset_incl_ex(worklist, use, &pos); + ir_bitqueue_add(&worklist, use); } } } else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || IR_IS_REACHABLE(insn->op1)) { if (!IR_IS_BOTTOM(use)) { - ir_bitset_incl_ex(worklist, use, &pos); + ir_bitqueue_add(&worklist, use); } } } @@ -661,17 +660,17 @@ int ir_sccp(ir_ctx *ctx) ir_sccp_remove_if(ctx, _values, i, _values[i].op1); } else if (_values[i].op == IR_MERGE || _values[i].op == IR_LOOP_BEGIN) { /* schedule merge to remove unreachable MERGE inputs */ - ir_bitset_incl_ex(worklist, i, &pos); + ir_bitqueue_add(&worklist, i); } } - while ((i = ir_bitset_pop_first_ex(worklist, len, &pos)) >= 0) { + while ((i = ir_bitqueue_pop(&worklist)) >= 0) { /* remove unreachable MERGE inputs */ ir_sccp_remove_unreachable_merge_inputs(ctx, _values, i, _values[i].op1); } ir_mem_free(_values); - ir_mem_free(worklist); + ir_bitqueue_free(&worklist); ctx->flags &= ~IR_OPT_IN_SCCP;