JMP optimization for GUARDs (guard failur is unexpected)

TODO: this should be ported to ARM
This commit is contained in:
Dmitry Stogov 2023-02-01 14:51:36 +03:00
parent 743696fe03
commit dc728853a2

View File

@ -6197,86 +6197,163 @@ static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
}
static void ir_emit_guard(ir_ctx *ctx, ir_ref def, ir_insn *insn)
static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint8_t op, void *addr, bool int_cmp)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg op2_reg = ctx->regs[def][2];
ir_type type = ctx->ir_base[insn->op2].type;
void *addr;
ir_insn *next_insn = &ctx->ir_base[def + 1];
IR_ASSERT(IR_IS_TYPE_INT(type));
if (IR_IS_CONST_REF(insn->op2)) {
if ((insn->op == IR_GUARD && insn->op2 == IR_FALSE) ||
(insn->op == IR_GUARD_NOT && insn->op2 == IR_TRUE)) {
addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) {
| jmp aword &addr
} else {
|.if X64
if (IR_IS_SIGNED_32BIT(addr)) {
| mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit>
if (next_insn->op == IR_END || next_insn->op == IR_LOOP_END) {
ir_block *bb = &ctx->cfg_blocks[b];
uint32_t target;
if (!(bb->flags & IR_BB_DESSA_MOVES)) {
IR_ASSERT(bb->successors_count == 1);
target = ir_skip_empty_target_blocks(ctx, ctx->cfg_edges[bb->successors]);
if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) {
if (int_cmp) {
switch (op) {
case IR_EQ:
| jne =>target
break;
case IR_NE:
| je =>target
break;
case IR_LT:
| jge =>target
break;
case IR_GE:
| jl =>target
break;
case IR_LE:
| jg =>target
break;
case IR_GT:
| jle =>target
break;
case IR_ULT:
| jae =>target
break;
case IR_UGE:
| jb =>target
break;
case IR_ULE:
| ja =>target
break;
case IR_UGT:
| jbe =>target
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
} else {
| mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit>
switch (op) {
case IR_EQ:
| jne =>target
| jp =>target
break;
case IR_NE:
| jp &addr
| je =>target
break;
case IR_LT:
| jae =>target
break;
case IR_GE:
| jp &addr
| jb =>target
break;
case IR_LE:
| ja =>target
break;
case IR_GT:
| jp &addr
| jbe =>target
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
}
| jmp aword [rax]
|.endif
| jmp &addr
return 1;
}
}
return;
}
} else if (next_insn->op == IR_IJMP && IR_IS_CONST_REF(next_insn->op2)) {
void *target_addr = ir_jmp_addr(ctx, next_insn, &ctx->ir_base[next_insn->op2]);
if (op2_reg != IR_REG_NONE && ir_rule(ctx, insn->op2) != IR_SKIP_MEM) {
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op2_reg, insn->op2);
}
| ASM_REG_REG_OP test, type, op2_reg, op2_reg
} else {
int32_t offset = 0;
if (ir_rule(ctx, insn->op2) == IR_SKIP_MEM) {
offset = ir_fuse_load(ctx, insn->op2, &op2_reg);
} else {
offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg);
}
if (op2_reg == IR_REG_NONE) {
| ASM_MEM_IMM_OP cmp, type, [offset], 0
} else {
| ASM_MEM_IMM_OP cmp, type, [Ra(op2_reg)+offset], 0
if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(target_addr)) {
if (int_cmp) {
switch (op) {
case IR_EQ:
| jne &target_addr
break;
case IR_NE:
| je &target_addr
break;
case IR_LT:
| jge &target_addr
break;
case IR_GE:
| jl &target_addr
break;
case IR_LE:
| jg &target_addr
break;
case IR_GT:
| jle &target_addr
break;
case IR_ULT:
| jae &target_addr
break;
case IR_UGE:
| jb &target_addr
break;
case IR_ULE:
| ja &target_addr
break;
case IR_UGT:
| jbe &target_addr
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
} else {
switch (op) {
case IR_EQ:
| jne &target_addr
| jp &target_addr
break;
case IR_NE:
| jp &addr
| je &target_addr
break;
case IR_LT:
| jae &target_addr
break;
case IR_GE:
| jp &addr
| jb &target_addr
break;
case IR_LE:
| ja &target_addr
break;
case IR_GT:
| jp &addr
| jbe &target_addr
break;
default:
IR_ASSERT(0 && "NIY binary op");
break;
}
}
| jmp &addr
return 1;
}
}
addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) {
if (insn->op == IR_GUARD) {
| je aword &addr
} else {
| jne aword &addr
}
} else {
|.if X64
if (insn->op == IR_GUARD) {
| jne >1
} else {
| je >1
}
if (IR_IS_SIGNED_32BIT(addr)) {
| mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit>
} else {
| mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit>
}
| jmp aword [rax]
|1:
|.endif
}
}
static void ir_emit_guard_jcc(ir_ctx *ctx, uint8_t op, void *addr, bool int_cmp)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
if (int_cmp) {
switch (op) {
case IR_EQ:
@ -6349,9 +6426,89 @@ static void ir_emit_guard_jcc(ir_ctx *ctx, uint8_t op, void *addr, bool int_cmp)
break;
}
}
return 0;
}
static void ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
static bool ir_emit_guard(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg op2_reg = ctx->regs[def][2];
ir_type type = ctx->ir_base[insn->op2].type;
void *addr;
IR_ASSERT(IR_IS_TYPE_INT(type));
if (IR_IS_CONST_REF(insn->op2)) {
if ((insn->op == IR_GUARD && insn->op2 == IR_FALSE) ||
(insn->op == IR_GUARD_NOT && insn->op2 == IR_TRUE)) {
addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) {
| jmp aword &addr
} else {
|.if X64
if (IR_IS_SIGNED_32BIT(addr)) {
| mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit>
} else {
| mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit>
}
| jmp aword [rax]
|.endif
}
}
return 0;
}
if (op2_reg != IR_REG_NONE && ir_rule(ctx, insn->op2) != IR_SKIP_MEM) {
if (op2_reg & IR_REG_SPILL_LOAD) {
op2_reg &= ~IR_REG_SPILL_LOAD;
ir_emit_load(ctx, type, op2_reg, insn->op2);
}
| ASM_REG_REG_OP test, type, op2_reg, op2_reg
} else {
int32_t offset = 0;
if (ir_rule(ctx, insn->op2) == IR_SKIP_MEM) {
offset = ir_fuse_load(ctx, insn->op2, &op2_reg);
} else {
offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg);
}
if (op2_reg == IR_REG_NONE) {
| ASM_MEM_IMM_OP cmp, type, [offset], 0
} else {
| ASM_MEM_IMM_OP cmp, type, [Ra(op2_reg)+offset], 0
}
}
addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) {
ir_op op;
if (insn->op == IR_GUARD) {
op = IR_EQ;
} else {
op = IR_NE;
}
return ir_emit_guard_jcc(ctx, b, def, op, addr, 1);
} else {
|.if X64
if (insn->op == IR_GUARD) {
| jne >1
} else {
| je >1
}
if (IR_IS_SIGNED_32BIT(addr)) {
| mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit>
} else {
| mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit>
}
| jmp aword [rax]
|1:
|.endif
return 0;
}
}
static bool ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
@ -6391,10 +6548,10 @@ static void ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *
| jmp aword [rax]
|.endif
}
return;
return 0;
} else if (op == IR_UGE) {
/* always true */
return;
return 0;
} else if (op == IR_ULE) {
op = IR_EQ;
} else if (op == IR_UGT) {
@ -6407,10 +6564,10 @@ static void ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *
op ^= 1; // reverse
}
ir_emit_guard_jcc(ctx, op, addr, 1);
return ir_emit_guard_jcc(ctx, b, def, op, addr, 1);
}
static void ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
static bool ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
{
ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]);
void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
@ -6418,19 +6575,19 @@ static void ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *i
if (insn->op == IR_GUARD) {
op ^= 1; // reverse
}
ir_emit_guard_jcc(ctx, op, addr, 0);
return ir_emit_guard_jcc(ctx, b, def, op, addr, 0);
}
static void ir_emit_guard_test_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
static bool ir_emit_guard_test_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
{
void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
ir_op op = (insn->op == IR_GUARD) ? IR_EQ : IR_NE;
ir_emit_test_int_common(ctx, insn->op2, op);
ir_emit_guard_jcc(ctx, op, addr, 1);
return ir_emit_guard_jcc(ctx, b, def, op, addr, 1);
}
static void ir_emit_guard_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn)
static bool ir_emit_guard_overflow(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
@ -6453,6 +6610,7 @@ static void ir_emit_guard_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn)
| jc &addr
}
}
return 0;
}
static void ir_emit_lea(ir_ctx *ctx, ir_ref def, ir_type type, ir_reg base_reg, ir_reg index_reg, uint8_t scale, int32_t offset)
@ -7923,13 +8081,19 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
ir_emit_jcc(ctx, IR_NE, b, i, insn, 1);
break;
case IR_GUARD_CMP_INT:
ir_emit_guard_cmp_int(ctx, b, i, insn);
if (ir_emit_guard_cmp_int(ctx, b, i, insn)) {
goto next_block;
}
break;
case IR_GUARD_CMP_FP:
ir_emit_guard_cmp_fp(ctx, b, i, insn);
if (ir_emit_guard_cmp_fp(ctx, b, i, insn)) {
goto next_block;
}
break;
case IR_GUARD_TEST_INT:
ir_emit_guard_test_int(ctx, b, i, insn);
if (ir_emit_guard_test_int(ctx, b, i, insn)) {
goto next_block;
}
break;
case IR_IF_INT:
ir_emit_if_int(ctx, b, i, insn);
@ -8039,10 +8203,14 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
break;
case IR_GUARD:
case IR_GUARD_NOT:
ir_emit_guard(ctx, i, insn);
if (ir_emit_guard(ctx, b, i, insn)) {
goto next_block;
}
break;
case IR_GUARD_OVERFLOW:
ir_emit_guard_overflow(ctx, i, insn);
if (ir_emit_guard_overflow(ctx, b, i, insn)) {
goto next_block;
}
break;
case IR_TLS:
ir_emit_tls(ctx, i, insn);
@ -8060,6 +8228,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
insn += n;
rule += n;
}
next_block:
}
if (data.rodata_label) {