diff --git a/ir_cfg.c b/ir_cfg.c index 4513fdd..c7d4730 100644 --- a/ir_cfg.c +++ b/ir_cfg.c @@ -806,6 +806,7 @@ next: } } else if (ir_worklist_len(&work)) { bb->flags |= IR_BB_LOOP_HEADER; + bb->loop_depth = 1; while (ir_worklist_len(&work)) { j = ir_worklist_pop(&work); while (blocks[j].loop_header > 0) { @@ -836,10 +837,16 @@ next: i = sorted_blocks[n]; ir_block *bb = &blocks[i]; if (bb->loop_header > 0) { - bb->loop_depth = blocks[bb->loop_header].loop_depth; - } - if (bb->flags & IR_BB_LOOP_HEADER) { - bb->loop_depth++; + ir_block *loop = &blocks[bb->loop_header]; + uint32_t loop_depth = loop->loop_depth; + + if (bb->flags & IR_BB_LOOP_HEADER) { + loop_depth++; + } + bb->loop_depth = loop_depth; + if (bb->flags & (IR_BB_ENTRY|IR_BB_LOOP_WITH_ENTRY)) { + loop->flags |= IR_BB_LOOP_WITH_ENTRY; + } } } diff --git a/ir_dump.c b/ir_dump.c index 3013282..5ab56a1 100644 --- a/ir_dump.c +++ b/ir_dump.c @@ -243,7 +243,11 @@ void ir_dump_cfg(ir_ctx *ctx, FILE *f) fprintf(f, "\tUNREACHABLE\n"); } if (bb->flags & IR_BB_LOOP_HEADER) { - fprintf(f, "\tLOOP_HEADER\n"); + if (bb->flags & IR_BB_LOOP_WITH_ENTRY) { + fprintf(f, "\tLOOP_HEADER, LOOP_WITH_ENTRY\n"); + } else { + fprintf(f, "\tLOOP_HEADER\n"); + } } if (bb->flags & IR_BB_IRREDUCIBLE_LOOP) { fprintf(stderr, "\tIRREDUCIBLE_LOOP\n"); diff --git a/ir_gcm.c b/ir_gcm.c index 6181af7..4fc3512 100644 --- a/ir_gcm.c +++ b/ir_gcm.c @@ -123,10 +123,9 @@ static void ir_gcm_schedule_late(ir_ctx *ctx, uint32_t *_blocks, ir_bitset visit uint32_t loop_depth = bb->loop_depth; if (loop_depth) { - insn = &ctx->ir_base[ref]; - if (insn->op >= IR_ADD_OV && insn->op <= IR_OVERFLOW) { - /* Don't move overflow checking math out of the loop */ - // TODO: this should be turned into a more general check to prohibit LICM ??? + if ((ctx->cfg_blocks[bb->loop_header].flags & IR_BB_LOOP_WITH_ENTRY) + && !(ctx->binding && ir_binding_find(ctx, ref))) { + /* Don't move loop invariant code across an OSR ENTRY if we can't restore it */ } else { lca = bb->dom_parent; while (lca != _blocks[ref]) { diff --git a/ir_private.h b/ir_private.h index 0309b3b..8506105 100644 --- a/ir_private.h +++ b/ir_private.h @@ -823,6 +823,7 @@ struct _ir_use_list { #define IR_BB_EMPTY (1<<6) #define IR_BB_PREV_EMPTY_ENTRY (1<<7) #define IR_BB_OSR_ENTRY_LOADS (1<<8) /* OSR Entry-point with register LOADs */ +#define IR_BB_LOOP_WITH_ENTRY (1<<9) /* set together with LOOP_HEADER if there is an ENTRY in the loop */ struct _ir_block { uint32_t flags; diff --git a/ir_ra.c b/ir_ra.c index 49881e2..2fa83fd 100644 --- a/ir_ra.c +++ b/ir_ra.c @@ -371,7 +371,7 @@ static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, ir_bitset live, ui continue; } } - fprintf(stderr, "ENTRY %d (block %i) - live var %d\n", ctx->ir_base[bb->start].op2, b, ref); + fprintf(stderr, "ENTRY %d (block %d start %d) - live var %d\n", ctx->ir_base[bb->start].op2, b, bb->start, ref); ok = 0; } IR_BITSET_FOREACH_END();