ir/ir_gcm.c
2022-04-27 14:47:52 +03:00

411 lines
10 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "ir.h"
#include "ir_private.h"
/* GCM - Global Code Motion
*
* C. Click. "Global code motion, global value numbering" Submitted to PLDI 95.
*/
static void ir_gcm_schedule_early(ir_ctx *ctx, int *_blocks, ir_ref ref)
{
ir_ref j, n, *p;
ir_insn *insn;
uint32_t flags;
if (_blocks[ref] > 0) {
return;
}
_blocks[ref] = 1;
insn = &ctx->ir_base[ref];
flags = ir_op_flags[insn->op];
if (IR_OPND_KIND(flags, 1) == IR_OPND_CONTROL_DEP) {
IR_ASSERT(_blocks[insn->op1] > 0);
_blocks[ref] = _blocks[insn->op1];
}
n = ir_input_edges_count(ctx, insn);
for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
ir_ref input = *p;
if (input > 0) {
if (_blocks[input] == 0) {
ir_gcm_schedule_early(ctx, _blocks, input);
}
if (IR_OPND_KIND(flags, 1) != IR_OPND_CONTROL_DEP
&& ctx->cfg_blocks[_blocks[ref]].dom_depth < ctx->cfg_blocks[_blocks[input]].dom_depth) {
_blocks[ref] = _blocks[input];
}
}
}
}
/* Last Common Ancestor */
static int ir_gcm_find_lca(ir_ctx *ctx, int b1, int b2)
{
while (ctx->cfg_blocks[b1].dom_depth > ctx->cfg_blocks[b2].dom_depth) {
b1 = ctx->cfg_blocks[b1].dom_parent;
}
while (ctx->cfg_blocks[b2].dom_depth > ctx->cfg_blocks[b1].dom_depth) {
b2 = ctx->cfg_blocks[b2].dom_parent;
}
while (b1 != b2) {
b1 = ctx->cfg_blocks[b1].dom_parent;
b2 = ctx->cfg_blocks[b2].dom_parent;
}
return b2;
}
static void ir_gcm_schedule_late(ir_ctx *ctx, int *_blocks, ir_bitset visited, ir_ref ref)
{
ir_ref i, n, *p, use;
ir_insn *insn;
uint32_t flags;
ir_bitset_incl(visited, ref);
n = ctx->use_lists[ref].count;
if (n) {
int lca, b;
for (i = 0, p = &ctx->use_edges[ctx->use_lists[ref].refs]; i < n; i++, p++) {
use = *p;
if (!ir_bitset_in(visited, use) && _blocks[use]) {
ir_gcm_schedule_late(ctx, _blocks, visited, use);
}
}
insn = &ctx->ir_base[ref];
flags = ir_op_flags[insn->op];
if (IR_OPND_KIND(flags, 1) == IR_OPND_CONTROL_DEP) {
return;
}
lca = 0;
for (i = 0, p -= n; i < n; i++, p++) {
use = *p;
b = _blocks[use];
if (!b) {
continue;
}
insn = &ctx->ir_base[use];
if (insn->op == IR_PHI) {
if (insn->op2 == ref) {
b = _blocks[ctx->ir_base[insn->op1].op1];
} else if (insn->op3 == ref) {
b = _blocks[ctx->ir_base[insn->op1].op2];
} else {
IR_ASSERT(0);
}
}
lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
}
b = lca;
while (lca != ctx->cfg_blocks[_blocks[ref]].dom_parent) {
if (ctx->cfg_blocks[lca].loop_depth < ctx->cfg_blocks[b].loop_depth) {
b = lca;
}
lca = ctx->cfg_blocks[lca].dom_parent;
}
_blocks[ref] = b;
}
}
int ir_gcm(ir_ctx *ctx)
{
ir_ref i, j, k, n, *p, ref;
ir_bitset visited;
ir_block *bb;
ir_list queue;
int *_blocks;
ir_insn *insn;
uint32_t flags;
_blocks = ir_mem_malloc(ctx->insns_count * sizeof(int));
memset(_blocks, 0, ctx->insns_count * sizeof(int));
ir_list_init(&queue, ctx->insns_count);
/* pin control instructions and collect their direct inputs */
for (i = 1, bb = ctx->cfg_blocks + 1; i <= ctx->cfg_blocks_count; i++, bb++) {
j = bb->end;
while (1) {
insn = &ctx->ir_base[j];
_blocks[j] = i; /* pin to block */
flags = ir_op_flags[insn->op];
n = ir_input_edges_count(ctx, insn);
for (k = 1, p = insn->ops + 1; k <= n; k++, p++) {
ref = *p;
if (ref > 0) {
if (IR_OPND_KIND(flags, k) == IR_OPND_DATA || IR_OPND_KIND(flags, k) == IR_OPND_VAR) {
ir_list_push(&queue, ref);
}
}
}
if (j == bb->start) {
break;
}
j = insn->op1; /* control predecessor */
}
}
n = ir_list_len(&queue);
for (i = 0; i < n; i++) {
ref = ir_list_at(&queue, i);
if (_blocks[ref] == 0) {
ir_gcm_schedule_early(ctx, _blocks, ref);
}
}
#ifdef IR_DEBUG
if (ctx->flags & IR_DEBUG_GCM) {
fprintf(stderr, "GCM Schedule Early\n");
for (i = 1; i < ctx->insns_count; i++) {
fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
}
}
#endif
/* collect uses of control instructions */
visited = ir_bitset_malloc(ctx->insns_count);
ir_list_clear(&queue);
for (i = 1, bb = ctx->cfg_blocks + 1; i <= ctx->cfg_blocks_count; i++, bb++) {
j = bb->end;
while (1) {
ir_bitset_incl(visited, j);
insn = &ctx->ir_base[j];
n = ctx->use_lists[j].count;
if (n > 0) {
for (k = 0, p = &ctx->use_edges[ctx->use_lists[j].refs]; k < n; k++, p++) {
ref = *p;
if (ctx->ir_base[ref].op == IR_PARAM || ctx->ir_base[ref].op == IR_VAR) {
_blocks[ref] = _blocks[j];
} else if (ir_op_flags[ctx->ir_base[ref].op] & IR_OP_FLAG_DATA) {
ir_list_push(&queue, ref);
}
}
}
if (j == bb->start) {
break;
}
j = insn->op1; /* control predecessor */
}
}
n = ir_list_len(&queue);
for (i = 0; i < n; i++) {
ref = ir_list_at(&queue, i);
if (!ir_bitset_in(visited, ref)) {
ir_gcm_schedule_late(ctx, _blocks, visited, ref);
}
}
ir_mem_free(visited);
ir_list_free(&queue);
#ifdef IR_DEBUG
if (ctx->flags & IR_DEBUG_GCM) {
fprintf(stderr, "GCM Schedule Late\n");
for (i = 1; i < ctx->insns_count; i++) {
fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
}
}
#endif
ctx->gcm_blocks = _blocks;
return 1;
}
static int ir_copy(ir_ctx *new_ctx, ir_ctx *ctx, ir_ref *_next, bool preserve_constants_order)
{
ir_ref i, j, k, n, *p, ref, new_ref;
ir_ref *_xlat;
ir_insn *insn;
uint32_t flags;
if (preserve_constants_order) {
ir_bitset used = ir_bitset_malloc(ctx->consts_count + 1);
_xlat = ir_mem_calloc(ctx->consts_count + ctx->insns_count, sizeof(ir_ref));
_xlat += ctx->consts_count;
for (j = 1, i = 1; i != 0; i = _next[i]) {
_xlat[i] = j;
insn = &ctx->ir_base[i];
flags = ir_op_flags[insn->op];
n = ir_operands_count(ctx, insn);
for (k = 1, p = insn->ops + 1; k <= n; k++, p++) {
ref = *p;
if (IR_OPND_KIND(flags, k) == IR_OPND_DATA && IR_IS_CONST_REF(ref)) {
ir_bitset_incl(used, -ref);
}
}
n = 1 + (n >> 2); // support for multi-word instructions like MERGE and PHI
j += n;
}
IR_BITSET_FOREACH(used, ir_bitset_len(ctx->consts_count + 1), ref) {
if (ctx->ir_base[-ref].op == IR_FUNC) {
_xlat[-ref] = ir_const_func(new_ctx, ir_str(new_ctx, ir_get_str(ctx, ctx->ir_base[-ref].val.addr)));
} else if (ctx->ir_base[-ref].op == IR_STR) {
_xlat[-ref] = ir_const_str(new_ctx, ir_str(new_ctx, ir_get_str(ctx, ctx->ir_base[-ref].val.addr)));
} else {
_xlat[-ref] = ir_const(new_ctx, ctx->ir_base[-ref].val, ctx->ir_base[-ref].type);
}
} IR_BITSET_FOREACH_END();
ir_mem_free(used);
} else {
_xlat = ir_mem_calloc(ctx->insns_count, sizeof(ir_ref));
for (j = 1, i = 1; i != 0; i = _next[i]) {
_xlat[i] = j;
insn = &ctx->ir_base[i];
n = ir_operands_count(ctx, insn);
n = 1 + (n >> 2); // support for multi-word instructions like MERGE and PHI
j += n;
}
}
for (i = 1; i != 0; i = _next[i]) {
insn = &ctx->ir_base[i];
flags = ir_op_flags[insn->op];
n = ir_operands_count(ctx, insn);
if (insn->op == IR_MERGE && n != 2) {
new_ref = ir_emit_N(new_ctx, insn->opt, n);
} else if (n <= 3) {
new_ref = ir_emit0(new_ctx, insn->opt);
} else {
new_ref = ir_emit_N(new_ctx, insn->opt, n);
}
for (k = 1, p = insn->ops + 1; k <= n; k++, p++) {
ref = *p;
switch (IR_OPND_KIND(flags, k)) {
case IR_OPND_DATA:
case IR_OPND_VAR:
if (IR_IS_CONST_REF(ref) && !preserve_constants_order) {
if (ctx->ir_base[-ref].op == IR_FUNC) {
ref = ir_const_func(new_ctx, ir_str(new_ctx, ir_get_str(ctx, ctx->ir_base[ref].val.addr)));
} else if (ctx->ir_base[-ref].op == IR_STR) {
ref = ir_const_str(new_ctx, ir_str(new_ctx, ir_get_str(ctx, ctx->ir_base[ref].val.addr)));
} else {
ref = ir_const(new_ctx, ctx->ir_base[ref].val, ctx->ir_base[ref].type);
}
break;
}
case IR_OPND_CONTROL:
case IR_OPND_CONTROL_DEP:
case IR_OPND_CONTROL_REF:
ref = _xlat[ref];
break;
case IR_OPND_STR:
ref = ir_str(new_ctx, ir_get_str(ctx, ref));
break;
case IR_OPND_NUM:
break;
default:
IR_ASSERT(0);
break;
}
ir_set_op(new_ctx, new_ref, k, ref);
}
}
if (ctx->cfg_blocks) {
int b;
ir_block *bb;
new_ctx->cfg_blocks_count = ctx->cfg_blocks_count;
if (ctx->cfg_edges_count) {
new_ctx->cfg_edges_count = ctx->cfg_edges_count;
new_ctx->cfg_edges = ir_mem_malloc(ctx->cfg_edges_count * sizeof(uint32_t));
memcpy(new_ctx->cfg_edges, ctx->cfg_edges, ctx->cfg_edges_count * sizeof(uint32_t));
}
new_ctx->cfg_blocks = ir_mem_malloc((ctx->cfg_blocks_count + 1) * sizeof(ir_block));
memcpy(new_ctx->cfg_blocks, ctx->cfg_blocks, (ctx->cfg_blocks_count + 1) * sizeof(ir_block));
for (b = 1, bb = new_ctx->cfg_blocks + 1; b <= new_ctx->cfg_blocks_count; b++, bb++) {
bb->start = _xlat[bb->start];
bb->end = _xlat[bb->end];
}
}
if (preserve_constants_order) {
_xlat -= ctx->consts_count;
}
ir_mem_free(_xlat);
return 1;
}
int ir_schedule(ir_ctx *ctx)
{
ir_ctx new_ctx;
ir_ref i, j, k;
ir_ref b;
ir_ref *_blocks = ctx->gcm_blocks;
ir_ref *_next = ir_mem_calloc(ctx->insns_count, sizeof(ir_ref));
ir_ref *_prev = ir_mem_calloc(ctx->insns_count, sizeof(ir_ref));
ir_ref _rest = 0;
ir_block *bb;
ir_insn *insn;
/* Create double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
IR_ASSERT(_blocks[1]);
_prev[1] = 0;
for (i = 2, j = 1; i < ctx->insns_count; i++) {
b = _blocks[i];
if (b) {
bb = &ctx->cfg_blocks[b];
if (_blocks[j] == b || i == bb->start) {
/* add to the end of list */
_next[j] = i;
_prev[i] = j;
j = i;
} else if (_prev[bb->end]) {
/* move up, insert before the end of alredy scheduled BB */
k = bb->end;
_prev[i] = _prev[k];
_next[i] = k;
_next[_prev[k]] = i;
_prev[k] = i;
} else {
/* move down late */
_next[i] = _rest;
_rest = i;
}
}
}
_next[j] = 0;
while (_rest) {
i = _rest;
_rest = _next[i];
b = _blocks[i];
bb = &ctx->cfg_blocks[b];
/* insert after start of the block and all PARAM, VAR, PI, PHI */
k = _next[bb->start];
insn = &ctx->ir_base[k];
while (insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI || insn->op == IR_PHI) {
k = _next[k];
insn = &ctx->ir_base[k];
}
/* insert before "k" */
_prev[i] = _prev[k];
_next[i] = k;
_next[_prev[k]] = i;
_prev[k] = i;
}
ir_mem_free(_prev);
/* Linearization */
ir_init(&new_ctx, ctx->consts_count, ctx->insns_count);
new_ctx.flags = ctx->flags;
/* TODO: linearize without reallocation and reconstruction ??? */
if (!ir_copy(&new_ctx, ctx, _next, 1)) {
ir_free(&new_ctx);
return 0;
}
ir_free(ctx);
ir_build_def_use_lists(&new_ctx);
ir_truncate(&new_ctx);
memcpy(ctx, &new_ctx, sizeof(ir_ctx));
ctx->flags |= IR_LINEAR;
ir_mem_free(_next);
return 1;
}