From 47ce6941e159ef0bc94d21e677ac8ddaccfb67cb Mon Sep 17 00:00:00 2001 From: Daniil Gentili Date: Sat, 3 Jun 2023 22:28:12 +0200 Subject: [PATCH] Begin defining registers --- Makefile | 2 +- ir_riscv64.dasc | 5269 +++++++++++++++++++++++++++++++++++++++++++++++ ir_riscv64.h | 171 ++ 3 files changed, 5441 insertions(+), 1 deletion(-) create mode 100644 ir_riscv64.dasc create mode 100644 ir_riscv64.h diff --git a/Makefile b/Makefile index 769a6b6..1f8cc2b 100644 --- a/Makefile +++ b/Makefile @@ -41,7 +41,7 @@ ifeq (aarch64, $(TARGET)) endif ifeq (riscv64, $(TARGET)) - CC= riscv64-linux-gnu-gcc --sysroot=$(HOME)/php/RISCV64 + CC= riscv64-linux-gnu-gcc CFLAGS += -DIR_TARGET_RISCV64 DASM_ARCH = riscv64 DASM_FLAGS = diff --git a/ir_riscv64.dasc b/ir_riscv64.dasc new file mode 100644 index 0000000..cda7d69 --- /dev/null +++ b/ir_riscv64.dasc @@ -0,0 +1,5269 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (riscv64 native code generator based on DynAsm) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Daniil Gentili + */ + +|.arch riscv64 + +|.actionlist dasm_actions +|.globals ir_lb +|.section code, cold_code, rodata, jmp_table + +#define IR_SPILL_POS_TO_OFFSET(offset) \ + ((ctx->flags & IR_USE_FRAME_POINTER) ? \ + ((offset) + (int32_t)sizeof(void*) * 2) : \ + ((offset) + data->call_stack_size)) + +#define B_IMM (1<<27) // signed imm26 * 4 +#define ADR_IMM (1<<20) // signed imm21 +#define ADRP_IMM (1LL<<32) // signed imm21 * 4096 + +static bool aarch64_may_use_b(ir_ctx *ctx, const void *addr) +{ + if (ctx->code_buffer) { + if (addr >= ctx->code_buffer && (char*)addr < (char*)ctx->code_buffer + ctx->code_buffer_size) { + return (ctx->code_buffer_size < B_IMM); + } else if ((char*)addr >= (char*)ctx->code_buffer + ctx->code_buffer_size) { + return (((char*)addr - (char*)ctx->code_buffer) < B_IMM); + } else if (addr < ctx->code_buffer) { + return (((char*)(ctx->code_buffer + ctx->code_buffer_size) - (char*)addr) < B_IMM); + } + } + return 1; //??? +} + +static bool aarch64_may_use_adr(ir_ctx *ctx, const void *addr) +{ + if (ctx->code_buffer) { + if (addr >= ctx->code_buffer && (char*)addr < (char*)ctx->code_buffer + ctx->code_buffer_size) { + return ( ctx->code_buffer_size < ADR_IMM); + } else if ((char*)addr >= (char*)ctx->code_buffer + ctx->code_buffer_size) { + return (((char*)addr - (char*)ctx->code_buffer) < ADR_IMM); + } else if (addr < ctx->code_buffer) { + return (((char*)(ctx->code_buffer + ctx->code_buffer_size) - (char*)addr) < ADR_IMM); + } + } + return 0; +} + +static bool aarch64_may_use_adrp(ir_ctx *ctx, const void *addr) +{ + if (ctx->code_buffer) { + if (addr >= ctx->code_buffer && (char*)addr < (char*)ctx->code_buffer + ctx->code_buffer_size) { + return ( ctx->code_buffer_size < ADRP_IMM); + } else if ((char*)addr >= (char*)ctx->code_buffer + ctx->code_buffer_size) { + return (((char*)addr - (char*)ctx->code_buffer) < ADRP_IMM); + } else if (addr < ctx->code_buffer) { + return (((char*)(ctx->code_buffer + ctx->code_buffer_size) - (char*)addr) < ADRP_IMM); + } + } + return 0; +} + +/* Determine whether "val" falls into two allowed ranges: + * Range 1: [0, 0xfff] + * Range 2: LSL #12 to Range 1 + * Used to guard the immediate encoding for add/adds/sub/subs/cmp/cmn instructions. */ +static bool aarch64_may_encode_imm12(const int64_t val) +{ + return (val >= 0 && (val <= 0xfff || !(val & 0xffffffffff000fff))); +} + +/* Determine whether an immediate value can be encoded as the immediate operand of logical instructions. */ +static bool aarch64_may_encode_logical_imm(uint64_t value, uint32_t type_size) +{ + /* fast path: power of two */ + if (value > 0 && !(value & (value - 1))) { + return 1; + } + + if (type_size == 8) { + if (dasm_imm13((uint32_t)value, (uint32_t)(value >> 32)) != -1) { + return 1; + } + } else { + if (dasm_imm13((uint32_t)value, (uint32_t)value) != -1) { + return 1; + } + } + + return 0; +} + +static bool aarch64_may_encode_addr_offset(int64_t offset, uint32_t type_size) +{ + return (uintptr_t)(offset) % type_size == 0 && (uintptr_t)(offset) < 0xfff * type_size; +} + +|.macro ASM_REG_REG_OP, op, type, dst, src +|| if (ir_type_size[type] == 8) { +| op Rx(dst), Rx(src) +|| } else { +| op Rw(dst), Rw(src) +|| } +|.endmacro + +|.macro ASM_REG_REG_REG_OP, op, type, dst, src1, src2 +|| if (ir_type_size[type] == 8) { +| op Rx(dst), Rx(src1), Rx(src2) +|| } else { +| op Rw(dst), Rw(src1), Rw(src2) +|| } +|.endmacro + +|.macro ASM_REG_REG_REG_REG_OP, op, type, dst, src1, src2, src3 +|| if (ir_type_size[type] == 8) { +| op Rx(dst), Rx(src1), Rx(src2), Rx(src3) +|| } else { +| op Rw(dst), Rw(src1), Rw(src2), Rw(src3); +|| } +|.endmacro + +|.macro ASM_REG_REG_IMM_OP, op, type, dst, src1, val +|| if (ir_type_size[type] == 8) { +| op Rx(dst), Rx(src1), #val +|| } else { +| op Rw(dst), Rw(src1), #val +|| } +|.endmacro + +|.macro ASM_REG_IMM_OP, op, type, reg, val +|| if (ir_type_size[type] == 8) { +| op Rx(reg), #val +|| } else { +| op Rw(reg), #val +|| } +|.endmacro + +|.macro ASM_FP_REG_REG_REG_OP, op, type, dst, src1, src2 +|| if (type == IR_DOUBLE) { +| op Rd(dst-IR_REG_FP_FIRST), Rd(src1-IR_REG_FP_FIRST), Rd(src2-IR_REG_FP_FIRST) +|| } else { +|| IR_ASSERT(type == IR_FLOAT); +| op Rs(dst-IR_REG_FP_FIRST), Rs(src1-IR_REG_FP_FIRST), Rs(src2-IR_REG_FP_FIRST) +|| } +|.endmacro + +typedef struct _ir_backend_data { + ir_reg_alloc_data ra_data; + int32_t stack_frame_alignment; + int32_t call_stack_size; + ir_regset used_preserved_regs; + uint32_t dessa_from_block; + dasm_State *dasm_state; + int rodata_label, jmp_table_label; +} ir_backend_data; + +#define IR_GP_REG_NAME(code, name64, name32) \ + #name64, +#define IR_GP_REG_NAME32(code, name64, name32) \ + #name32, +#define IR_FP_REG_NAME(code, name64, name32, name16, name8) \ + #name64, +#define IR_FP_REG_NAME32(code, name64, name32, name16, name8) \ + #name32, + +static const char *_ir_reg_name[IR_REG_NUM] = { + IR_GP_REGS(IR_GP_REG_NAME) + IR_FP_REGS(IR_FP_REG_NAME) +}; + +static const char *_ir_reg_name32[IR_REG_NUM] = { + IR_GP_REGS(IR_GP_REG_NAME32) + IR_FP_REGS(IR_FP_REG_NAME32) +}; + +/* Calling Convention */ +static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { + IR_REG_INT_ARG1, + IR_REG_INT_ARG2, + IR_REG_INT_ARG3, + IR_REG_INT_ARG4, + IR_REG_INT_ARG5, + IR_REG_INT_ARG6, + IR_REG_INT_ARG7, + IR_REG_INT_ARG8, +}; + +static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { + IR_REG_FP_ARG1, + IR_REG_FP_ARG2, + IR_REG_FP_ARG3, + IR_REG_FP_ARG4, + IR_REG_FP_ARG5, + IR_REG_FP_ARG6, + IR_REG_FP_ARG7, + IR_REG_FP_ARG8, +}; + +const char *ir_reg_name(int8_t reg, ir_type type) +{ + if (reg >= IR_REG_NUM) { + if (reg == IR_REG_SCRATCH) { + return "SCRATCH"; + } else { + IR_ASSERT(reg == IR_REG_ALL); + return "ALL"; + } + } + IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); + if (type == IR_VOID) { + type = (reg < IR_REG_FP_FIRST) ? IR_ADDR : IR_DOUBLE; + } + if (ir_type_size[type] == 8) { + return _ir_reg_name[reg]; + } else { + return _ir_reg_name32[reg]; + } +} + +enum _ir_rule { + IR_CMP_INT = IR_LAST_OP, + IR_CMP_FP, + IR_MUL_PWR2, + IR_DIV_PWR2, + IR_MOD_PWR2, + IR_OP_INT, + IR_OP_FP, + IR_BINOP_INT, + IR_BINOP_FP, + IR_SHIFT, + IR_SHIFT_CONST, + IR_COPY_INT, + IR_COPY_FP, + IR_CMP_AND_BRANCH_INT, + IR_CMP_AND_BRANCH_FP, + IR_GUARD_CMP_INT, + IR_GUARD_CMP_FP, + IR_GUARD_OVERFLOW, + IR_OVERFLOW_AND_BRANCH, + IR_MIN_MAX_INT, + IR_REG_BINOP_INT, + IR_LOAD_INT, + IR_LOAD_FP, + IR_STORE_INT, + IR_STORE_FP, + IR_IF_INT, + IR_RETURN_VOID, + IR_RETURN_INT, + IR_RETURN_FP, +}; + +/* register allocation */ +int ir_get_target_constraints(const ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints) +{ + uint32_t rule = ir_rule(ctx, ref); + const ir_insn *insn; + int n = 0; + int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; + + constraints->def_reg = IR_REG_NONE; + constraints->hints_count = 0; + switch (rule & IR_RULE_MASK) { + case IR_BINOP_INT: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op1)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + const ir_insn *val_insn = &ctx->ir_base[insn->op2]; + switch (insn->op) { + case IR_ADD: + case IR_ADD_OV: + case IR_SUB: + case IR_SUB_OV: + if (!aarch64_may_encode_imm12(val_insn->val.u64)) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_MUL_OV: + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); + n++; + break; + case IR_AND: + case IR_OR: + case IR_XOR: + if (!aarch64_may_encode_logical_imm(val_insn->val.u64, ir_type_size[insn->type])) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_MUL: + case IR_DIV: + case IR_MOD: + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + break; + } + } + if (insn->op == IR_MOD) { + constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } else if (insn->op == IR_MUL_OV && (ir_type_size[insn->type] == 8 || IR_IS_TYPE_SIGNED(insn->type))) { + constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); + n++; + } + break; + case IR_SEXT: + case IR_ZEXT: + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + IR_FALLTHROUGH; + case IR_MUL_PWR2: + case IR_DIV_PWR2: + case IR_MOD_PWR2: + case IR_SHIFT: + case IR_SHIFT_CONST: + case IR_OP_INT: + case IR_OP_FP: + case IR_INT2FP: + case IR_FP2INT: + case IR_FP2FP: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op1)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (rule == IR_SHIFT && insn->op == IR_ROL) { + constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_BINOP_FP: + case IR_MIN_MAX_INT: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op1)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_CMP_INT: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op1)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + insn = &ctx->ir_base[insn->op2]; + if (!aarch64_may_encode_imm12(insn->val.u64)) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + } + break; + case IR_CMP_FP: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op1)) { + const ir_insn *val_insn = &ctx->ir_base[insn->op1]; + constraints->tmp_regs[n] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + const ir_insn *val_insn = &ctx->ir_base[insn->op2]; + constraints->tmp_regs[n] = IR_TMP_REG(2, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_VSTORE: + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op3)) { + insn = &ctx->ir_base[insn->op3]; + constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + break; + case IR_LOAD_FP: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op2)) { + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_STORE_INT: + case IR_STORE_FP: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op2)) { + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op3)) { + insn = &ctx->ir_base[insn->op3]; + if (!IR_IS_TYPE_INT(insn->type) || insn->val.i64 != 0) { + constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + } + break; + case IR_SWITCH: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op2)) { + insn = &ctx->ir_base[insn->op2]; + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } else { + insn = &ctx->ir_base[insn->op2]; + constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + break; + case IR_CALL: + insn = &ctx->ir_base[ref]; + constraints->def_reg = (IR_IS_TYPE_INT(insn->type)) ? IR_REG_INT_RET1 : IR_REG_FP_RET1; + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF); + n = 1; + IR_FALLTHROUGH; + case IR_TAILCALL: + insn = &ctx->ir_base[ref]; + if (insn->inputs_count > 2) { + constraints->hints[2] = IR_REG_NONE; + constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints); + if (!IR_IS_CONST_REF(insn->op2)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF); + n++; + } + } + flags = IR_USE_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; + break; + case IR_COPY_INT: + case IR_COPY_FP: + flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG; + break; + case IR_PARAM: + constraints->def_reg = ir_get_param_reg(ctx, ref); + flags = (constraints->def_reg == IR_REG_NONE) ? IR_USE_MUST_BE_IN_REG : IR_USE_SHOULD_BE_IN_REG; + break; + case IR_PI: + case IR_PHI: + flags = IR_USE_SHOULD_BE_IN_REG; + break; + case IR_RLOAD: + constraints->def_reg = ctx->ir_base[ref].op2; + flags = IR_USE_SHOULD_BE_IN_REG; + break; + case IR_EXITCALL: + constraints->def_reg = IR_REG_INT_RET1; + break; + case IR_TRUNC: + case IR_BITCAST: + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + break; + case IR_RSTORE: + flags = IR_OP3_SHOULD_BE_IN_REG; + break; + case IR_RETURN_INT: + flags = IR_OP2_SHOULD_BE_IN_REG; + constraints->hints[2] = IR_REG_INT_RET1; + constraints->hints_count = 3; + break; + case IR_RETURN_FP: + flags = IR_OP2_SHOULD_BE_IN_REG; + constraints->hints[2] = IR_REG_FP_RET1; + constraints->hints_count = 3; + break; + case IR_SNAPSHOT: + flags = 0; + break; + } + constraints->tmps_count = n; + + return flags; +} + +/* instruction selection */ +static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref, ir_type type) +{ + if (!IR_IS_CONST_REF(addr_ref)) { + ir_insn *addr_insn = &ctx->ir_base[addr_ref]; + + if (addr_insn->op == IR_ADD + && !IR_IS_CONST_REF(addr_insn->op1) + && IR_IS_CONST_REF(addr_insn->op2) // TODO: temporary workaround + && aarch64_may_encode_addr_offset(ctx->ir_base[addr_insn->op2].val.i64, ir_type_size[type])) { + ir_use_list *use_list = &ctx->use_lists[addr_ref]; + ir_ref j = use_list->count; + + if (j > 1) { + /* check if address is used only in LOAD and STORE */ + ir_ref *p = &ctx->use_edges[use_list->refs]; + + do { + ir_insn *insn = &ctx->ir_base[*p]; + if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) { + return; + } + p++; + } while (--j); + } + ctx->rules[addr_ref] = IR_FUSED | IR_SIMPLE | addr_insn->op; + } + } +} + +static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) +{ + ir_insn *op2_insn; + ir_insn *insn = &ctx->ir_base[ref]; + + switch (insn->op) { + case IR_EQ: + case IR_NE: + case IR_LT: + case IR_GE: + case IR_LE: + case IR_GT: + case IR_ULT: + case IR_UGE: + case IR_ULE: + case IR_UGT: + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { + return IR_CMP_INT; + } else { + return IR_CMP_FP; + } + break; + case IR_ADD: + case IR_SUB: + if (IR_IS_TYPE_INT(insn->type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.i64 == 0) { + return IR_COPY_INT; + } + } +binop_int: + return IR_BINOP_INT; + } else { +binop_fp: + return IR_BINOP_FP; + } + break; + case IR_MUL: + if (IR_IS_TYPE_INT(insn->type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 0) { + // 0 + } else if (op2_insn->val.u64 == 1) { + return IR_COPY_INT; + } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { + return IR_MUL_PWR2; + } + } + return IR_BINOP_INT; + } else { + goto binop_fp; + } + break; + case IR_ADD_OV: + case IR_SUB_OV: + IR_ASSERT(IR_IS_TYPE_INT(insn->type)); + goto binop_int; + case IR_MUL_OV: + IR_ASSERT(IR_IS_TYPE_INT(insn->type)); + goto binop_int; + case IR_DIV: + if (IR_IS_TYPE_INT(insn->type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 1) { + return IR_COPY_INT; + } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { + return IR_DIV_PWR2; + } + } + return IR_BINOP_INT; + } else { + goto binop_fp; + } + break; + case IR_MOD: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (IR_IS_TYPE_UNSIGNED(insn->type) && IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { + return IR_MOD_PWR2; + } + } + return IR_BINOP_INT; + case IR_BSWAP: + case IR_NOT: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_OP_INT; + } else { + IR_ASSERT(0); + } + break; + case IR_NEG: + case IR_ABS: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_OP_INT; + } else { + return IR_OP_FP; + } + case IR_OR: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.i64 == 0) { + return IR_COPY_INT; + } else if (op2_insn->val.i64 == -1) { + // -1 + } + } + goto binop_int; + case IR_AND: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.i64 == 0) { + // 0 + } else if (op2_insn->val.i64 == -1) { + return IR_COPY_INT; + } + } + goto binop_int; + case IR_XOR: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } + } + goto binop_int; + case IR_SHL: + if (IR_IS_CONST_REF(insn->op2)) { + if (ctx->flags & IR_OPT_CODEGEN) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 0) { + return IR_COPY_INT; + } else if (ir_type_size[insn->type] >= 4) { + if (op2_insn->val.u64 == 1) { + // lea [op1*2] + } else if (op2_insn->val.u64 == 2) { + // lea [op1*4] + } else if (op2_insn->val.u64 == 3) { + // lea [op1*8] + } + } + } + return IR_SHIFT_CONST; + } + return IR_SHIFT; + case IR_SHR: + case IR_SAR: + case IR_ROL: + case IR_ROR: + if (IR_IS_CONST_REF(insn->op2)) { + if (ctx->flags & IR_OPT_CODEGEN) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 0) { + return IR_COPY_INT; + } + } + return IR_SHIFT_CONST; + } + return IR_SHIFT; + case IR_MIN: + case IR_MAX: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_MIN_MAX_INT; + } else { + goto binop_fp; + } + break; +// case IR_COND: + case IR_COPY: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_COPY_INT; + } else { + return IR_COPY_FP; + } + break; + case IR_CALL: + ctx->flags |= IR_HAS_CALLS; + return IR_CALL; + case IR_VAR: + return IR_SKIPPED | IR_VAR; + case IR_ALLOCA: + if (ctx->flags & IR_FUNCTION) { + ctx->flags |= IR_USE_FRAME_POINTER | IR_HAS_ALLOCA; + } + return IR_ALLOCA; + case IR_LOAD: + ir_match_fuse_addr(ctx, insn->op2, insn->type); + if (IR_IS_TYPE_INT(insn->type)) { + return IR_LOAD_INT; + } else { + return IR_LOAD_FP; + } + break; + case IR_STORE: + ir_match_fuse_addr(ctx, insn->op2, ctx->ir_base[insn->op3].type); + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { + return IR_STORE_INT; + } else { + return IR_STORE_FP; + } + break; + case IR_RLOAD: + if (IR_REGSET_IN(IR_REGSET_UNION(ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)) { + return IR_SKIPPED | IR_RLOAD; + } + return IR_RLOAD; + case IR_RSTORE: + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + ir_insn *op_insn = &ctx->ir_base[insn->op2]; + + if (!ctx->rules[insn->op2]) { + ctx->rules[insn->op2] = ir_match_insn(ctx, insn->op2); + } + if (ctx->rules[insn->op2] == IR_BINOP_INT) { + if (ctx->ir_base[op_insn->op1].op == IR_RLOAD + && ctx->ir_base[op_insn->op1].op2 == insn->op3) { + ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; + return IR_REG_BINOP_INT; + } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) + && ctx->ir_base[op_insn->op2].op == IR_RLOAD + && ctx->ir_base[op_insn->op2].op2 == insn->op3) { + ir_ref tmp = op_insn->op1; + op_insn->op1 = op_insn->op2; + op_insn->op2 = tmp; + ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; + return IR_REG_BINOP_INT; + } + } + } + } + return IR_RSTORE; + case IR_START: + case IR_BEGIN: + case IR_IF_TRUE: + case IR_IF_FALSE: + case IR_CASE_VAL: + case IR_CASE_DEFAULT: + case IR_MERGE: + case IR_LOOP_BEGIN: + case IR_UNREACHABLE: + return IR_SKIPPED | insn->op; + case IR_RETURN: + if (!insn->op2) { + return IR_RETURN_VOID; + } else if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + return IR_RETURN_INT; + } else { + return IR_RETURN_FP; + } + case IR_IF: + if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + op2_insn = &ctx->ir_base[insn->op2]; + if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { + if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; + return IR_CMP_AND_BRANCH_INT; + } else { + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; + return IR_CMP_AND_BRANCH_FP; + } + } else if (op2_insn->op == IR_OVERFLOW) { + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; + return IR_OVERFLOW_AND_BRANCH; + } + } + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + return IR_IF_INT; + } else { + IR_ASSERT(0 && "NIY IR_IF_FP"); + break; + } + case IR_GUARD: + case IR_GUARD_NOT: + if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + op2_insn = &ctx->ir_base[insn->op2]; + if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT + // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP + && (insn->op2 == ref - 1 || + (insn->op2 == ctx->prev_ref[ref] - 1 + && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) { + if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; + return IR_GUARD_CMP_INT; + } else { + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; + return IR_GUARD_CMP_FP; + } + } else if (op2_insn->op == IR_OVERFLOW) { + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; + return IR_GUARD_OVERFLOW; + } + } + return insn->op; + default: + break; + } + + return insn->op; +} + +static void ir_match_insn2(ir_ctx *ctx, ir_ref ref, uint32_t rule) +{ +} + +/* code genertion */ +static int32_t ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg) +{ + ir_backend_data *data = ctx->data; + int32_t offset; + + IR_ASSERT(ref >= 0); + offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; + IR_ASSERT(offset != -1); + if (ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { + IR_ASSERT(ctx->spill_base != IR_REG_NONE); + *reg = ctx->spill_base; + return offset; + } + *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + return IR_SPILL_POS_TO_OFFSET(offset); +} + +static int32_t ir_var_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg) +{ + ir_backend_data *data = ctx->data; + ir_insn *var_insn = &ctx->ir_base[ref]; + + IR_ASSERT(var_insn->op == IR_VAR); + *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + return IR_SPILL_POS_TO_OFFSET(var_insn->op3); +} + +static void ir_emit_load_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (ir_type_size[type] == 8) { + if (val == 0) { + if (reg != IR_REG_ZR) { + | mov Rx(reg), xzr + } + } else if (((uint64_t)(val)) <= 0xffff) { + | movz Rx(reg), #((uint64_t)(val)) + } else if (~((uint64_t)(val)) <= 0xffff) { + | movn Rx(reg), #(~((uint64_t)(val))) + } else if ((uint64_t)(val) & 0xffff) { + | movz Rx(reg), #((uint64_t)(val) & 0xffff) + if (((uint64_t)(val) >> 16) & 0xffff) { + | movk Rx(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 + } + if (((uint64_t)(val) >> 32) & 0xffff) { + | movk Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32 + } + if ((((uint64_t)(val) >> 48) & 0xffff)) { + | movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 + } + } else if (((uint64_t)(val) >> 16) & 0xffff) { + | movz Rx(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 + if (((uint64_t)(val) >> 32) & 0xffff) { + | movk Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32 + } + if ((((uint64_t)(val) >> 48) & 0xffff)) { + | movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 + } + } else if (((uint64_t)(val) >> 32) & 0xffff) { + | movz Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32 + if ((((uint64_t)(val) >> 48) & 0xffff)) { + | movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 + } + } else { + | movz Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 + } + } else { + if (val == 0) { + if (reg != IR_REG_ZR) { + | mov Rw(reg), wzr + } + } else if (((uint64_t)(val)) <= 0xffff) { + | movz Rw(reg), #((uint64_t)(val)) + } else if (~((uint64_t)(val)) <= 0xffff) { + | movn Rw(reg), #(~((uint64_t)(val))) + } else if ((uint64_t)(val) & 0xffff) { + | movz Rw(reg), #((uint64_t)(val) & 0xffff) + if (((uint64_t)(val) >> 16) & 0xffff) { + | movk Rw(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 + } + } else if (((uint64_t)(val) >> 16) & 0xffff) { + | movz Rw(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 + } + } +} + +static void ir_emit_load_mem_int(ir_ctx *ctx, ir_type type, ir_reg reg, ir_reg base_reg, int32_t offset) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { + switch (ir_type_size[type]) { + case 8: + | ldr Rx(reg), [Rx(base_reg), #offset] + break; + case 4: + | ldr Rw(reg), [Rx(base_reg), #offset] + break; + case 2: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsh Rw(reg), [Rx(base_reg), #offset] + } else { + | ldrh Rw(reg), [Rx(base_reg), #offset] + } + break; + case 1: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsb Rw(reg), [Rx(base_reg), #offset] + } else { + | ldrb Rw(reg), [Rx(base_reg), #offset] + } + break; + default: + IR_ASSERT(0); + } + } else { + ir_reg tmp_reg = IR_REG_INT_TMP; /* reserved temporary register */ + + ir_emit_load_imm_int(ctx, IR_ADDR, tmp_reg, offset); + switch (ir_type_size[type]) { + case 8: + | ldr Rx(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + case 4: + | ldr Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + case 2: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsh Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + } else { + | ldrh Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + } + break; + case 1: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsb Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + } else { + | ldrb Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + } + break; + default: + IR_ASSERT(0); + } + } +} + +static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *insn = &ctx->ir_base[src]; + int label; + + if (type == IR_FLOAT && insn->val.u32 == 0) { + | fmov Rs(reg-IR_REG_FP_FIRST), wzr + } else if (type == IR_DOUBLE && insn->val.u64 == 0) { + | fmov Rd(reg-IR_REG_FP_FIRST), xzr + } else { + label = ctx->cfg_blocks_count - src; + insn->const_flags |= IR_CONST_EMIT; + if (type == IR_DOUBLE) { + | ldr Rd(reg-IR_REG_FP_FIRST), =>label + } else { + IR_ASSERT(type == IR_FLOAT); + | ldr Rs(reg-IR_REG_FP_FIRST), =>label + } + } +} + +static void ir_emit_load_mem_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_reg base_reg, int32_t offset) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { + if (type == IR_DOUBLE) { + | ldr Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] + } else { + IR_ASSERT(type == IR_FLOAT); + | ldr Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] + } + } else { + ir_reg tmp_reg = IR_REG_INT_TMP; /* reserved temporary register */ + + ir_emit_load_imm_int(ctx, IR_ADDR, tmp_reg, offset); + if (type == IR_DOUBLE) { + | ldr Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(tmp_reg)] + } else { + IR_ASSERT(type == IR_FLOAT); + | ldr Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(tmp_reg)] + } + } +} + +static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) +{ + int32_t offset; + ir_reg fp; + + if (IR_IS_CONST_REF(src)) { + if (IR_IS_TYPE_INT(type)) { + ir_insn *insn = &ctx->ir_base[src]; + + IR_ASSERT(insn->op != IR_STR); + ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); + } else { + ir_emit_load_imm_fp(ctx, type, reg, src); + } + } else { + offset = ir_ref_spill_slot(ctx, src, &fp); + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, reg, fp, offset); + } else { + ir_emit_load_mem_fp(ctx, type, reg, fp, offset); + } + } +} + +static void ir_emit_store_mem_int(ir_ctx *ctx, ir_type type, ir_reg base_reg, int32_t offset, ir_reg reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { + switch (ir_type_size[type]) { + case 8: + | str Rx(reg), [Rx(base_reg), #offset] + break; + case 4: + | str Rw(reg), [Rx(base_reg), #offset] + break; + case 2: + | strh Rw(reg), [Rx(base_reg), #offset] + break; + case 1: + | strb Rw(reg), [Rx(base_reg), #offset] + break; + default: + IR_ASSERT(0); + } + } else { + ir_reg tmp_reg = IR_REG_INT_TMP; /* reserved temporary register */ + + ir_emit_load_imm_int(ctx, IR_ADDR, tmp_reg, offset); + switch (ir_type_size[type]) { + case 8: + | str Rx(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + case 4: + | str Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + case 2: + | strh Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + case 1: + | strb Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + default: + IR_ASSERT(0); + } + } +} + +static void ir_emit_store_mem_fp(ir_ctx *ctx, ir_type type, ir_reg base_reg, int32_t offset, ir_reg reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { + if (type == IR_DOUBLE) { + | str Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] + } else { + IR_ASSERT(type == IR_FLOAT); + | str Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] + } + } else { + ir_reg tmp_reg = IR_REG_INT_TMP; /* reserved temporary register */ + + ir_emit_load_imm_int(ctx, IR_ADDR, tmp_reg, offset); + if (type == IR_DOUBLE) { + | str Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(tmp_reg)] + } else { + IR_ASSERT(type == IR_FLOAT); + | str Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(tmp_reg)] + } + } +} + +static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg) +{ + int32_t offset; + ir_reg fp; + + IR_ASSERT(dst >= 0); + offset = ir_ref_spill_slot(ctx, dst, &fp); + if (IR_IS_TYPE_INT(type)) { + ir_emit_store_mem_int(ctx, type, fp, offset, reg); + } else { + ir_emit_store_mem_fp(ctx, type, fp, offset, reg); + } +} + +static void ir_emit_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (ir_type_size[type] == 8) { + if (dst == IR_REG_STACK_POINTER) { + | mov sp, Rx(src) + } else if (src == IR_REG_STACK_POINTER) { + | mov Rx(dst), sp + } else { + | mov Rx(dst), Rx(src) + } + } else { + | mov Rw(dst), Rw(src) + } +} + +static void ir_emit_fp_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (ir_type_size[type] == 8) { + | fmov Rd(dst-IR_REG_FP_FIRST), Rd(src-IR_REG_FP_FIRST) + } else { + | fmov Rs(dst-IR_REG_FP_FIRST), Rs(src-IR_REG_FP_FIRST) + } +} + +static void ir_emit_prologue(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + | stp x29, x30, [sp, # (-(data->ra_data.stack_frame_size+16))]! + | mov x29, sp + if (data->call_stack_size) { + | sub sp, sp, #(data->call_stack_size) + } + } else if (data->ra_data.stack_frame_size + data->call_stack_size) { + if (ctx->fixed_stack_red_zone) { + IR_ASSERT(data->ra_data.stack_frame_size + data->call_stack_size <= ctx->fixed_stack_red_zone); + } else { + | sub sp, sp, #(data->ra_data.stack_frame_size + data->call_stack_size) + } + } + if (data->used_preserved_regs) { + int offset; + uint32_t i; + ir_reg prev = IR_REG_NONE; + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + offset = data->ra_data.stack_frame_size + sizeof(void*) * 2; + } else { + offset = data->ra_data.stack_frame_size + data->call_stack_size; + } + for (i = 0; i < IR_REG_NUM; i++) { + if (IR_REGSET_IN(data->used_preserved_regs, i)) { + if (prev == IR_REG_NONE) { + prev = i; + } else if (i < IR_REG_FP_FIRST) { + offset -= sizeof(void*) * 2; + | stp Rx(prev), Rx(i), [Rx(fp), #offset] + prev = IR_REG_NONE; + } else { + if (prev < IR_REG_FP_FIRST) { + offset -= sizeof(void*); + | str Rx(prev), [Rx(fp), #offset] + offset -= sizeof(void*); + | str Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] + } else { + offset -= sizeof(void*) * 2; + | stp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] + } + prev = IR_REG_NONE; + } + } + } + if (prev != IR_REG_NONE) { + if (prev < IR_REG_FP_FIRST) { + offset -= sizeof(void*); + | str Rx(prev), [Rx(fp), #offset] + } else { + offset -= sizeof(void*); + | str Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] + } + } + } +} + +static void ir_emit_epilogue(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (data->used_preserved_regs) { + int offset; + uint32_t i; + ir_reg prev = IR_REG_NONE; + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + offset = data->ra_data.stack_frame_size + sizeof(void*) * 2; + } else { + offset = data->ra_data.stack_frame_size + data->call_stack_size; + } + for (i = 0; i < IR_REG_NUM; i++) { + if (IR_REGSET_IN(data->used_preserved_regs, i)) { + if (prev == IR_REG_NONE) { + prev = i; + } else if (i < IR_REG_FP_FIRST) { + offset -= sizeof(void*) * 2; + | ldp Rx(prev), Rx(i), [Rx(fp), #offset] + prev = IR_REG_NONE; + } else { + if (prev < IR_REG_FP_FIRST) { + offset -= sizeof(void*); + | ldr Rx(prev), [Rx(fp), #offset] + offset -= sizeof(void*); + | ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] + } else { + offset -= sizeof(void*) * 2; + | ldp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] + } + prev = IR_REG_NONE; + } + } + } + if (prev != IR_REG_NONE) { + if (prev < IR_REG_FP_FIRST) { + offset -= sizeof(void*); + | ldr Rx(prev), [Rx(fp), #offset] + } else { + offset -= sizeof(void*); + | ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] + } + } + } + + if (ctx->flags & IR_USE_FRAME_POINTER) { + if (data->call_stack_size || (ctx->flags & IR_HAS_ALLOCA)) { + | mov sp, x29 + } + | ldp x29, x30, [sp], # (data->ra_data.stack_frame_size+16) + } else if (data->ra_data.stack_frame_size + data->call_stack_size) { + if (ctx->fixed_stack_red_zone) { + IR_ASSERT(data->ra_data.stack_frame_size + data->call_stack_size <= ctx->fixed_stack_red_zone); + } else { + | add sp, sp, #(data->ra_data.stack_frame_size + data->call_stack_size) + } + } +} + +static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE) { + if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2)) { + op2_reg &= ~IR_REG_SPILL_LOAD; + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + switch (insn->op) { + case IR_ADD: + | ASM_REG_REG_REG_OP add, type, def_reg, op1_reg, op2_reg + break; + case IR_ADD_OV: + | ASM_REG_REG_REG_OP adds, type, def_reg, op1_reg, op2_reg + break; + case IR_SUB: + | ASM_REG_REG_REG_OP sub, type, def_reg, op1_reg, op2_reg + break; + case IR_SUB_OV: + | ASM_REG_REG_REG_OP subs, type, def_reg, op1_reg, op2_reg + break; + case IR_MUL: + | ASM_REG_REG_REG_OP mul, type, def_reg, op1_reg, op2_reg + break; + case IR_MUL_OV: + if (ir_type_size[type] == 8) { + if (IR_IS_TYPE_SIGNED(type)) { + tmp_reg = ctx->regs[def][3]; + IR_ASSERT(tmp_reg != IR_REG_NONE); + | smulh Rx(tmp_reg), Rx(op1_reg), Rx(op2_reg) + | mul Rx(def_reg), Rx(op1_reg), Rx(op2_reg) + | cmp Rx(tmp_reg), Rx(def_reg), asr #63 + } else { + tmp_reg = ctx->regs[def][3]; + IR_ASSERT(tmp_reg != IR_REG_NONE); + | umulh Rx(tmp_reg), Rx(op1_reg), Rx(op2_reg) + | mul Rx(def_reg), Rx(op1_reg), Rx(op2_reg) + | cmp Rx(tmp_reg), xzr + } + } else { + if (IR_IS_TYPE_SIGNED(type)) { + tmp_reg = ctx->regs[def][3]; + IR_ASSERT(tmp_reg != IR_REG_NONE); + | smull Rx(def_reg), Rw(op1_reg), Rw(op2_reg) + | asr Rx(tmp_reg), Rx(def_reg), #32 + | cmp Rx(tmp_reg), Rx(def_reg), asr #31 + } else { + | umull Rx(def_reg), Rw(op1_reg), Rw(op2_reg) + | cmp xzr, Rx(def_reg), lsr #32 + } + } + break; + case IR_DIV: + if (IR_IS_TYPE_SIGNED(type)) { + | ASM_REG_REG_REG_OP sdiv, type, def_reg, op1_reg, op2_reg + } else { + | ASM_REG_REG_REG_OP udiv, type, def_reg, op1_reg, op2_reg + } + break; + case IR_MOD: + tmp_reg = ctx->regs[def][3]; + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (IR_IS_TYPE_SIGNED(type)) { + | ASM_REG_REG_REG_OP sdiv, type, tmp_reg, op1_reg, op2_reg + | ASM_REG_REG_REG_REG_OP msub, type, def_reg, tmp_reg, op2_reg, op1_reg + } else { + | ASM_REG_REG_REG_OP udiv, type, tmp_reg, op1_reg, op2_reg + | ASM_REG_REG_REG_REG_OP msub, type, def_reg, tmp_reg, op2_reg, op1_reg + } + break; + case IR_OR: + | ASM_REG_REG_REG_OP orr, type, def_reg, op1_reg, op2_reg + break; + case IR_AND: + | ASM_REG_REG_REG_OP and, type, def_reg, op1_reg, op2_reg + break; + case IR_XOR: + | ASM_REG_REG_REG_OP eor, type, def_reg, op1_reg, op2_reg + break; + default: + IR_ASSERT(0 && "NIY binary op"); + break; + } + } else { + IR_ASSERT(IR_IS_CONST_REF(op2)); + int32_t val = ctx->ir_base[op2].val.i32; + switch (insn->op) { + case IR_ADD: + | ASM_REG_REG_IMM_OP add, type, def_reg, op1_reg, val + break; + case IR_ADD_OV: + | ASM_REG_REG_IMM_OP adds, type, def_reg, op1_reg, val + break; + case IR_SUB: + | ASM_REG_REG_IMM_OP sub, type, def_reg, op1_reg, val + break; + case IR_SUB_OV: + | ASM_REG_REG_IMM_OP subs, type, def_reg, op1_reg, val + break; + case IR_OR: + | ASM_REG_REG_IMM_OP orr, type, def_reg, op1_reg, val + break; + case IR_AND: + | ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, val + break; + case IR_XOR: + | ASM_REG_REG_IMM_OP eor, type, def_reg, op1_reg, val + break; + default: + IR_ASSERT(0 && "NIY binary op"); + break; + } + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_min_max_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + + if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op1_reg, op1); + } + if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2)) { + op2_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op2_reg, op2); + } + + if (op1 == op2) { + return; + } + + if (ir_type_size[type] == 8) { + | cmp Rx(op1_reg), Rx(op2_reg) + if (insn->op == IR_MIN) { + if (IR_IS_TYPE_SIGNED(type)) { + | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), le + } else { + | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), ls + } + } else { + IR_ASSERT(insn->op == IR_MAX); + if (IR_IS_TYPE_SIGNED(type)) { + | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), ge + } else { + | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), hs + } + } + } else { + | cmp Rw(op1_reg), Rw(op2_reg) + if (insn->op == IR_MIN) { + if (IR_IS_TYPE_SIGNED(type)) { + | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), le + } else { + | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), ls + } + } else { + IR_ASSERT(insn->op == IR_MAX); + if (IR_IS_TYPE_SIGNED(type)) { + | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), ge + } else { + | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), hs + } + } + } + + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_insn *math_insn = &ctx->ir_base[insn->op1]; + ir_type type = math_insn->type; + + IR_ASSERT(def_reg != IR_REG_NONE); + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (math_insn->op == IR_MUL_OV) { + | cset Rw(def_reg), ne + } else if (IR_IS_TYPE_SIGNED(type)) { + | cset Rw(def_reg), vs + } else { + | cset Rw(def_reg), cs + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_overflow_and_branch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *overflow_insn = &ctx->ir_base[insn->op2]; + ir_insn *math_insn = &ctx->ir_base[overflow_insn->op1]; + ir_type type = math_insn->type; + uint32_t true_block, false_block, next_block; + bool reverse = 0; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block == next_block) { + reverse = 1; + true_block = false_block; + false_block = 0; + } else if (false_block == next_block) { + false_block = 0; + } + + if (math_insn->op == IR_MUL_OV) { + if (reverse) { + | beq =>true_block + } else { + | bne =>true_block + } + } else if (IR_IS_TYPE_SIGNED(type)) { + if (reverse) { + | bvc =>true_block + } else { + | bvs =>true_block + } + } else { + if (reverse) { + | bcc =>true_block + } else { + | bcs =>true_block + } + } + if (false_block) { + | b =>false_block + } +} + +static void ir_emit_reg_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *op_insn = &ctx->ir_base[insn->op2]; + ir_type type = op_insn->type; + ir_ref op2 = op_insn->op2; + ir_reg op2_reg = ctx->regs[insn->op2][2]; + ir_reg reg; + + IR_ASSERT(insn->op == IR_RSTORE); + reg = insn->op3; + + if (op2_reg == IR_REG_NONE) { + ir_val *val = &ctx->ir_base[op2].val; + + IR_ASSERT(IR_IS_CONST_REF(op2)); + switch (op_insn->op) { + case IR_ADD: + | ASM_REG_REG_IMM_OP add, type, reg, reg, val->i32 + break; + case IR_SUB: + | ASM_REG_REG_IMM_OP sub, type, reg, reg, val->i32 + break; + case IR_OR: + | ASM_REG_REG_IMM_OP orr, type, reg, reg, val->i32 + break; + case IR_AND: + | ASM_REG_REG_IMM_OP and, type, reg, reg, val->i32 + break; + case IR_XOR: + | ASM_REG_REG_IMM_OP eor, type, reg, reg, val->i32 + break; + default: + IR_ASSERT(0 && "NIY binary op"); + break; + } + } else { + if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2)) { + op2_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op2_reg, op2); + } + switch (op_insn->op) { + case IR_ADD: + | ASM_REG_REG_REG_OP add, type, reg, reg, op2_reg + break; + case IR_SUB: + | ASM_REG_REG_REG_OP sub, type, reg, reg, op2_reg + break; + case IR_MUL: + | ASM_REG_REG_REG_OP mul, type, reg, reg, op2_reg + break; + case IR_OR: + | ASM_REG_REG_REG_OP orr, type, reg, reg, op2_reg + break; + case IR_AND: + | ASM_REG_REG_REG_OP and, type, reg, reg, op2_reg + break; + case IR_XOR: + | ASM_REG_REG_REG_OP eor, type, reg, reg, op2_reg + break; + default: + IR_ASSERT(0 && "NIY binary op"); + break; + } + } +} + +static void ir_emit_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op1_reg, op1); + } + if (insn->op == IR_MUL) { + uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); + if (shift == 1) { + | ASM_REG_REG_REG_OP add, insn->type, def_reg, op1_reg, op1_reg + } else { + | ASM_REG_REG_IMM_OP lsl, insn->type, def_reg, op1_reg, shift + } + } else if (insn->op == IR_DIV) { + uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); + | ASM_REG_REG_IMM_OP lsr, insn->type, def_reg, op1_reg, shift + } else if (insn->op == IR_MOD) { + uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; + | ASM_REG_REG_IMM_OP and, insn->type, def_reg, op1_reg, mask + } else { + IR_ASSERT(0); + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op1)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op1_reg, insn->op1); + } + if (op2_reg & IR_REG_SPILL_LOAD) { + op2_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + switch (insn->op) { + case IR_SHL: + | ASM_REG_REG_REG_OP lsl, type, def_reg, op1_reg, op2_reg + break; + case IR_SHR: + | ASM_REG_REG_REG_OP lsr, type, def_reg, op1_reg, op2_reg + break; + case IR_SAR: + | ASM_REG_REG_REG_OP asr, type, def_reg, op1_reg, op2_reg + break; + case IR_ROL: + tmp_reg = ctx->regs[def][3]; + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (ir_type_size[type] == 8) { + | neg Rx(tmp_reg), Rx(op2_reg) + | ror Rx(def_reg), Rx(op1_reg), Rx(tmp_reg) + } else { + | neg Rw(tmp_reg), Rw(op2_reg) + | ror Rw(def_reg), Rw(op1_reg), Rw(tmp_reg) + } + break; + case IR_ROR: + | ASM_REG_REG_REG_OP ror, type, def_reg, op1_reg, op2_reg + break; + default: + IR_ASSERT(0); + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + uint32_t shift = ctx->ir_base[insn->op2].val.u64; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op1_reg, op1); + } + switch (insn->op) { + case IR_SHL: + | ASM_REG_REG_IMM_OP lsl, type, def_reg, op1_reg, shift + break; + case IR_SHR: + | ASM_REG_REG_IMM_OP lsr, type, def_reg, op1_reg, shift + break; + case IR_SAR: + | ASM_REG_REG_IMM_OP asr, type, def_reg, op1_reg, shift + break; + case IR_ROL: + if (ir_type_size[type] == 8) { + shift = (64 - shift) % 64; + | ror Rx(def_reg), Rx(op1_reg), #shift + } else { + shift = (32 - shift) % 32; + | ror Rw(def_reg), Rw(op1_reg), #shift + } + break; + case IR_ROR: + | ASM_REG_REG_IMM_OP ror, type, def_reg, op1_reg, shift + break; + default: + IR_ASSERT(0); + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op1_reg, op1); + } + if (insn->op == IR_NOT) { + if (insn->type == IR_BOOL) { + | ASM_REG_IMM_OP cmp, type, op1, 0 + | cset Rw(def_reg), eq + } else { + | ASM_REG_REG_OP mvn, insn->type, def_reg, op1_reg + } + } else if (insn->op == IR_NEG) { + | ASM_REG_REG_OP neg, insn->type, def_reg, op1_reg + } else if (insn->op == IR_ABS) { + if (ir_type_size[type] == 8) { + | cmp Rx(op1_reg), #0 + | cneg Rx(def_reg), Rx(op1_reg), lt + } else { + | cmp Rw(op1_reg), #0 + | cneg Rw(def_reg), Rw(op1_reg), lt + } + } else if (insn->op == IR_BSWAP) { + | ASM_REG_REG_OP rev, insn->type, def_reg, op1_reg + } else { + IR_ASSERT(0); + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_op_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op1_reg, op1); + } + if (insn->op == IR_NEG) { + if (type == IR_DOUBLE) { + | fneg Rd(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(type == IR_FLOAT); + | fneg Rs(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST) + } + } else if (insn->op == IR_ABS) { + if (type == IR_DOUBLE) { + | fabs Rd(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(type == IR_FLOAT); + | fabs Rs(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST) + } + } else { + IR_ASSERT(0); + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_binop_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op1_reg, op1); + } + if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2)) { + op2_reg &= ~IR_REG_SPILL_LOAD; + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + switch (insn->op) { + case IR_ADD: + | ASM_FP_REG_REG_REG_OP fadd, type, def_reg, op1_reg, op2_reg + break; + case IR_SUB: + | ASM_FP_REG_REG_REG_OP fsub, type, def_reg, op1_reg, op2_reg + break; + case IR_MUL: + | ASM_FP_REG_REG_REG_OP fmul, type, def_reg, op1_reg, op2_reg + break; + case IR_DIV: + | ASM_FP_REG_REG_REG_OP fdiv, type, def_reg, op1_reg, op2_reg + break; + case IR_MIN: + | ASM_FP_REG_REG_REG_OP fmin, type, def_reg, op1_reg, op2_reg + break; + case IR_MAX: + | ASM_FP_REG_REG_REG_OP fmax, type, def_reg, op1_reg, op2_reg + break; + default: + IR_ASSERT(0 && "NIY binary op"); + break; + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_reg op1_reg, ir_ref op1, ir_reg op2_reg, ir_ref op2) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + IR_ASSERT(op1_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE) { + if (ir_type_size[type] == 8) { + | cmp Rx(op1_reg), Rx(op2_reg) + } else { + | cmp Rw(op1_reg), Rw(op2_reg) + } + } else { + IR_ASSERT(IR_IS_CONST_REF(op2)); + int32_t val = ctx->ir_base[op2].val.i32; + + if (ir_type_size[type] == 8) { + | cmp Rx(op1_reg), #val + } else { + | cmp Rw(op1_reg), #val + } + } +} + +static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = ctx->ir_base[insn->op1].type; + ir_op op = insn->op; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE) { + if (op2_reg & IR_REG_SPILL_LOAD) { + op2_reg &= ~IR_REG_SPILL_LOAD; + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (IR_IS_CONST_REF(op2)) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (IR_IS_CONST_REF(insn->op2) && ctx->ir_base[insn->op2].val.u64 == 0) { + if (op == IR_ULT) { + /* always false */ + ir_emit_load_imm_int(ctx, IR_BOOL, def_reg, 0); + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + return; + } else if (op == IR_UGE) { + /* always true */ + ir_emit_load_imm_int(ctx, IR_BOOL, def_reg, 1); + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + return; + } else if (op == IR_ULE) { + op = IR_EQ; + } else if (op == IR_UGT) { + op = IR_NE; + } + } + ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2); + switch (op) { + case IR_EQ: + | cset Rw(def_reg), eq + break; + case IR_NE: + | cset Rw(def_reg), ne + break; + case IR_LT: + | cset Rw(def_reg), lt + break; + case IR_GE: + | cset Rw(def_reg), ge + break; + case IR_LE: + | cset Rw(def_reg), le + break; + case IR_GT: + | cset Rw(def_reg), gt + break; + case IR_ULT: + | cset Rw(def_reg), lo + break; + case IR_UGE: + | cset Rw(def_reg), hs + break; + case IR_ULE: + | cset Rw(def_reg), ls + break; + case IR_UGT: + | cset Rw(def_reg), hi + break; + default: + IR_ASSERT(0 && "NIY binary op"); + break; + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = ctx->ir_base[cmp_insn->op1].type; + ir_op op = cmp_insn->op; + ir_ref op1, op2; + ir_reg op1_reg, op2_reg; + + if (op == IR_LT || op == IR_LE) { + /* swap operands to avoid P flag check */ + op ^= 3; + op1 = cmp_insn->op2; + op2 = cmp_insn->op1; + op1_reg = ctx->regs[cmp_ref][2]; + op2_reg = ctx->regs[cmp_ref][1]; + } else { + op1 = cmp_insn->op1; + op2 = cmp_insn->op2; + op1_reg = ctx->regs[cmp_ref][1]; + op2_reg = ctx->regs[cmp_ref][2]; + } + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op1_reg, op1); + } + if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2)) { + op2_reg &= ~IR_REG_SPILL_LOAD; + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (type == IR_DOUBLE) { + | fcmp Rd(op1_reg-IR_REG_FP_FIRST), Rd(op2_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(type == IR_FLOAT); + | fcmp Rs(op1_reg-IR_REG_FP_FIRST), Rs(op2_reg-IR_REG_FP_FIRST) + } + return op; +} + +static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_op op = ir_emit_cmp_fp_common(ctx, def, insn); + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); +//??? ir_reg tmp_reg = ctx->regs[def][3]; // TODO: take into accouny vs flag + + IR_ASSERT(def_reg != IR_REG_NONE); + switch (op) { + case IR_EQ: + | cset Rw(def_reg), eq + break; + case IR_NE: + | cset Rw(def_reg), ne + break; + case IR_LT: + | cset Rw(def_reg), mi + break; + case IR_GE: + | cset Rw(def_reg), ge + break; + case IR_LE: + | cset Rw(def_reg), ls + break; + case IR_GT: + | cset Rw(def_reg), gt + break; + default: + IR_ASSERT(0 && "NIY binary op"); + break; + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_jmp_true(ir_ctx *ctx, uint32_t b, ir_ref def) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block != next_block) { + | b =>true_block + } +} + +static void ir_emit_jmp_false(ir_ctx *ctx, uint32_t b, ir_ref def) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (false_block != next_block) { + | b =>false_block + } +} + +static void ir_emit_jz(ir_ctx *ctx, uint8_t op, uint32_t b, ir_type type, ir_reg reg) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block == next_block) { + IR_ASSERT(op < IR_LT); + op ^= 1; // reverse + true_block = false_block; + false_block = 0; + } else if (false_block == next_block) { + false_block = 0; + } + + if (op == IR_EQ) { + if (ir_type_size[type] == 8) { + | cbz Rx(reg), =>true_block + } else { + | cbz Rw(reg), =>true_block + } + } else { + IR_ASSERT(op == IR_NE); + if (ir_type_size[type] == 8) { + | cbnz Rx(reg), =>true_block + } else { + | cbnz Rw(reg), =>true_block + } + } + if (false_block) { + | b =>false_block + } +} + +static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, uint32_t b, ir_ref def, ir_insn *insn, bool int_cmp) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block == next_block) { + /* swap to avoid unconditional JMP */ + op ^= 1; // reverse + true_block = false_block; + false_block = 0; + } else if (false_block == next_block) { + false_block = 0; + } + + if (int_cmp) { + switch (op) { + case IR_EQ: + | beq =>true_block + break; + case IR_NE: + | bne =>true_block + break; + case IR_LT: + | blt =>true_block + break; + case IR_GE: + | bge =>true_block + break; + case IR_LE: + | ble =>true_block + break; + case IR_GT: + | bgt =>true_block + break; + case IR_ULT: + | blo =>true_block + break; + case IR_UGE: + | bhs =>true_block + break; + case IR_ULE: + | bls =>true_block + break; + case IR_UGT: + | bhi =>true_block + break; + default: + IR_ASSERT(0 && "NIY binary op"); + break; + } + } else { + switch (op) { + case IR_EQ: + | beq =>true_block + break; + case IR_NE: + | bne =>true_block + break; + case IR_LT: + | bmi =>true_block + break; + case IR_GE: + | bge =>true_block + break; + case IR_LE: + | bls =>true_block + break; + case IR_GT: + | bgt =>true_block + break; +// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; +// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; +// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; +// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; + default: + IR_ASSERT(0 && "NIY binary op"); + break; + } + } + if (false_block) { + | b =>false_block + } +} + +static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; + ir_op op = cmp_insn->op; + ir_type type = ctx->ir_base[cmp_insn->op1].type; + ir_ref op1 = cmp_insn->op1; + ir_ref op2 = cmp_insn->op2; + ir_reg op1_reg = ctx->regs[insn->op2][1]; + ir_reg op2_reg = ctx->regs[insn->op2][2]; + + if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op1_reg != IR_REG_NONE && IR_IS_CONST_REF(op1)) { + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE) { + if (op2_reg & IR_REG_SPILL_LOAD) { + op2_reg &= ~IR_REG_SPILL_LOAD; + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (IR_IS_CONST_REF(op2)) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) { + if (op == IR_ULT) { + /* always false */ + ir_emit_jmp_false(ctx, b, def); + return; + } else if (op == IR_UGE) { + /* always true */ + ir_emit_jmp_true(ctx, b, def); + return; + } else if (op == IR_ULE) { + op = IR_EQ; + } else if (op == IR_UGT) { + op = IR_NE; + } + if (op1_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) { + ir_emit_jz(ctx, op, b, type, op1_reg); + return; + } + } + ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2); + ir_emit_jcc(ctx, op, b, def, insn, 1); +} + +static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]); + ir_emit_jcc(ctx, op, b, def, insn, 0); +} + +static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_type type = ctx->ir_base[insn->op2].type; + ir_reg op2_reg = ctx->regs[def][2]; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (IR_IS_CONST_REF(insn->op2)) { + uint32_t true_block, false_block, next_block; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (ir_const_is_true(&ctx->ir_base[insn->op2])) { + if (true_block != next_block) { + | b =>true_block + } + } else { + if (false_block != next_block) { + | b =>false_block + } + } + return; + } + IR_ASSERT(op2_reg != IR_REG_NONE); + if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op2)) { + op2_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + | ASM_REG_IMM_OP cmp, type, op2_reg, 0 + ir_emit_jcc(ctx, IR_NE, b, def, insn, 1); +} + +static void ir_emit_return_void(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_emit_epilogue(ctx); + | ret +} + +static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_reg op2_reg = ctx->regs[ref][2]; + + if (op2_reg != IR_REG_INT_RET1) { + ir_type type = ctx->ir_base[insn->op2].type; + + if (op2_reg != IR_REG_NONE && !(op2_reg & IR_REG_SPILL_LOAD)) { + ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg); + } else { + ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2); + } + } + ir_emit_return_void(ctx); +} + +static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_reg op2_reg = ctx->regs[ref][2]; + ir_type type = ctx->ir_base[insn->op2].type; + + if (op2_reg != IR_REG_FP_RET1) { + if (op2_reg != IR_REG_NONE && !(op2_reg & IR_REG_SPILL_LOAD)) { + ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg); + } else { + ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2); + } + } + ir_emit_return_void(ctx); +} + +static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + if ((op1_reg != IR_REG_NONE) && ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op1))) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + + if (op1_reg != IR_REG_NONE) { + if (ir_type_size[src_type] == 1) { + if (ir_type_size[dst_type] == 2) { + | sxtb Rw(def_reg), Rw(op1_reg) + } else if (ir_type_size[dst_type] == 4) { + | sxtb Rw(def_reg), Rw(op1_reg) + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + | sxtb Rx(def_reg), Rx(op1_reg) + } + } else if (ir_type_size[src_type] == 2) { + if (ir_type_size[dst_type] == 4) { + | sxth Rw(def_reg), Rw(op1_reg) + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + | sxth Rx(def_reg), Rx(op1_reg) + } + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + | sxtw Rx(def_reg), Rw(op1_reg) + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, insn->op1, &fp); + + if (ir_type_size[src_type] == 1) { + if (ir_type_size[dst_type] == 2) { + | ldrsb Rw(def_reg), [Rx(fp), #offset] + } else if (ir_type_size[dst_type] == 4) { + | ldrsb Rw(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldrsb Rx(def_reg), [Rx(fp), #offset] + } + } else if (ir_type_size[src_type] == 2) { + if (ir_type_size[dst_type] == 4) { + | ldrsh Rw(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldrsh Rx(def_reg), [Rx(fp), #offset] + } + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldrsw Rx(def_reg), [Rx(fp), #offset] + } + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + if ((op1_reg != IR_REG_NONE) && ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op1))) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + + if (op1_reg != IR_REG_NONE) { + if (ir_type_size[src_type] == 1) { + | uxtb Rw(def_reg), Rw(op1_reg) + } else if (ir_type_size[src_type] == 2) { + | uxth Rw(def_reg), Rw(op1_reg) + } else { + | mov Rw(def_reg), Rw(op1_reg) + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, insn->op1, &fp); + + if (ir_type_size[src_type] == 1) { + | ldrb Rw(def_reg), [Rx(fp), #offset] + } else if (ir_type_size[src_type] == 2) { + | ldrh Rw(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldr Rw(def_reg), [Rx(fp), #offset] + } + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_trunc(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(ir_type_size[dst_type] < ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (op1_reg != IR_REG_NONE) { + if (ir_type_size[dst_type] == 1) { + | and Rw(def_reg), Rw(op1_reg), #0xff + } else if (ir_type_size[dst_type] == 2) { + | and Rw(def_reg), Rw(op1_reg), #0xffff + } else if (op1_reg != def_reg) { + ir_emit_mov(ctx, dst_type, def_reg, op1_reg); + } + } else { + ir_emit_load(ctx, dst_type, def_reg, insn->op1); + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(ir_type_size[dst_type] == ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (IR_IS_TYPE_INT(src_type) && IR_IS_TYPE_INT(dst_type)) { + if (op1_reg != IR_REG_NONE) { + if (op1_reg & IR_REG_SPILL_LOAD) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (op1_reg != def_reg) { + ir_emit_mov(ctx, dst_type, def_reg, op1_reg); + } + } else { + ir_emit_load(ctx, dst_type, def_reg, insn->op1); + } + } else if (IR_IS_TYPE_FP(src_type) && IR_IS_TYPE_FP(dst_type)) { + if (op1_reg != IR_REG_NONE) { + if (op1_reg & IR_REG_SPILL_LOAD) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (op1_reg != def_reg) { + ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); + } + } else { + ir_emit_load(ctx, dst_type, def_reg, insn->op1); + } + } else if (IR_IS_TYPE_FP(src_type)) { + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + if (op1_reg != IR_REG_NONE) { + if (op1_reg & IR_REG_SPILL_LOAD) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (src_type == IR_DOUBLE) { + | fmov Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fmov Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); //??? + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, insn->op1, &fp); + + if (src_type == IR_DOUBLE) { + | ldr Rx(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(src_type == IR_FLOAT); + | ldr Rw(def_reg), [Rx(fp), #offset] + } + } + } else if (IR_IS_TYPE_FP(dst_type)) { + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + if (op1_reg != IR_REG_NONE) { + if (op1_reg & IR_REG_SPILL_LOAD) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (dst_type == IR_DOUBLE) { + | fmov Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | fmov Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); //??? + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, insn->op1, &fp); + + if (dst_type == IR_DOUBLE) { + | ldr Rd(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(src_type == IR_FLOAT); + | ldr Rs(def_reg), [Rx(fp), #offset] + } + } + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_FP(dst_type)); + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op1)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + + if (ir_type_size[src_type] == 8) { + if (IR_IS_TYPE_SIGNED(src_type)) { + if (dst_type == IR_DOUBLE) { + | scvtf Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | scvtf Rs(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) + } + } else { + if (dst_type == IR_DOUBLE) { + | ucvtf Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | ucvtf Rs(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) + } + } + } else { + if (IR_IS_TYPE_SIGNED(src_type)) { + if (dst_type == IR_DOUBLE) { + | scvtf Rd(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | scvtf Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) + } + } else { + if (dst_type == IR_DOUBLE) { + | ucvtf Rd(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | ucvtf Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) + } + } + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_FP(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op1)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (ir_type_size[dst_type] == 8) { + if (IR_IS_TYPE_SIGNED(dst_type)) { + if (src_type == IR_DOUBLE) { + | fcvtzs Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fcvtzs Rx(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) + } + } else { + if (src_type == IR_DOUBLE) { + | fcvtzu Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fcvtzu Rx(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) + } + } + } else { + if (IR_IS_TYPE_SIGNED(dst_type)) { + if (src_type == IR_DOUBLE) { + | fcvtzs Rw(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fcvtzs Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) + } + } else { + if (src_type == IR_DOUBLE) { + | fcvtzu Rw(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fcvtzu Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) + } + } + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_fp2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_FP(src_type)); + IR_ASSERT(IR_IS_TYPE_FP(dst_type)); + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + if ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op1)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (src_type == dst_type) { + if (op1_reg != def_reg) { + ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); + } + } else if (src_type == IR_DOUBLE) { + | fcvt Rs(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fcvt Rd(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST) + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_copy_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_ref type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op1_reg, insn->op1); + } + if (def_reg == op1_reg) { + /* same reg */ + } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else if (def_reg != IR_REG_NONE) { + ir_emit_load(ctx, type, def_reg, insn->op1); + } else if (op1_reg != IR_REG_NONE) { + ir_emit_store(ctx, type, def, op1_reg); + } else { + IR_ASSERT(0); + } + if (def_reg != IR_REG_NONE && (ctx->regs[def][0] & IR_REG_SPILL_STORE)) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_copy_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && (op1_reg & IR_REG_SPILL_LOAD)) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op1_reg, insn->op1); + } + if (def_reg == op1_reg) { + /* same reg */ + } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { + ir_emit_fp_mov(ctx, type, def_reg, op1_reg); + } else if (def_reg != IR_REG_NONE) { + ir_emit_load(ctx, type, def_reg, insn->op1); + } else if (op1_reg != IR_REG_NONE) { + ir_emit_store(ctx, type, def, op1_reg); + } else { + IR_ASSERT(0); + } + if (def_reg != IR_REG_NONE && (ctx->regs[def][0] & IR_REG_SPILL_STORE)) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + int32_t offset; + ir_reg fp; + + IR_ASSERT(def_reg != IR_REG_NONE); + offset = ir_var_spill_slot(ctx, insn->op1, &fp); + | add Rx(def_reg), Rx(fp), #offset + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + ir_insn *var_insn = &ctx->ir_base[insn->op2]; + ir_ref type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg fp; + int32_t offset; + + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); + if (def_reg == IR_REG_NONE && ir_is_same_mem_var(ctx, def, var_insn->op3)) { + return; // fake load + } + IR_ASSERT(def_reg != IR_REG_NONE); + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, def_reg, fp, offset); + } else { + ir_emit_load_mem_fp(ctx, type, def_reg, fp, offset); + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_vstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + ir_insn *var_insn = &ctx->ir_base[insn->op2]; + ir_insn *val_insn = &ctx->ir_base[insn->op3]; + ir_ref type = val_insn->type; + ir_reg op3_reg = ctx->regs[ref][3]; + ir_reg fp; + int32_t offset; + + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); + IR_ASSERT(op3_reg != IR_REG_NONE); + if ((op3_reg & IR_REG_SPILL_LOAD) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { + return; // fake store + } + if ((op3_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op3)) { + op3_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + if (IR_IS_TYPE_INT(type)) { + ir_emit_store_mem_int(ctx, type, fp, offset, op3_reg); + } else { + ir_emit_store_mem_fp(ctx, type, fp, offset, op3_reg); + } +} + +static int32_t ir_fuse_addr(ir_ctx *ctx, ir_ref ref, ir_reg *preg1, ir_reg *preg2) +{ + ir_insn *addr_insn = &ctx->ir_base[ref]; + ir_reg reg; + + IR_ASSERT(addr_insn->op == IR_ADD); + IR_ASSERT(!IR_IS_CONST_REF(addr_insn->op1) && IR_IS_CONST_REF(addr_insn->op2)); + reg = ctx->regs[ref][1]; + if (reg & IR_REG_SPILL_LOAD) { + reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, IR_ADDR, reg, addr_insn->op1); + } + *preg1 = reg; + *preg2 = IR_REG_NONE; // TODO: ??? + return ctx->ir_base[addr_insn->op2].val.i32; +} + +static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = insn->type; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } + IR_ASSERT(def_reg != IR_REG_NONE); + if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { + ir_reg op1_reg; + int32_t offset = ir_fuse_addr(ctx, insn->op2, &op1_reg, &op2_reg); + + if (op2_reg == IR_REG_NONE) { + ir_emit_load_mem_int(ctx, type, def_reg, op1_reg, offset); + } else { + switch (ir_type_size[type]) { + case 8: + | ldr Rx(def_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + case 4: + | ldr Rw(def_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + case 2: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsh Rw(def_reg), [Rx(op1_reg), Rx(op2_reg)] + } else { + | ldrh Rw(def_reg), [Rx(op1_reg), Rx(op2_reg)] + } + break; + case 1: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsb Rw(def_reg), [Rx(op1_reg), Rx(op2_reg)] + } else { + | ldrb Rw(def_reg), [Rx(op1_reg), Rx(op2_reg)] + } + break; + default: + IR_ASSERT(0); + } + } + } else { + if (op2_reg == IR_REG_NONE) { + op2_reg = def_reg; + } + if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op2)) { + op2_reg &= ~IR_REG_SPILL_LOAD; + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + ir_emit_load_mem_int(ctx, type, def_reg, op2_reg, 0); + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_load_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = insn->type; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } + IR_ASSERT(def_reg != IR_REG_NONE); + if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { + ir_reg op1_reg; + int32_t offset = ir_fuse_addr(ctx, insn->op2, &op1_reg, &op2_reg); + + if (op2_reg == IR_REG_NONE) { + ir_emit_load_mem_fp(ctx, type, def_reg, op1_reg, offset); + } else { + if (type == IR_DOUBLE) { + | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(op1_reg), Rx(op2_reg)] + } else { + IR_ASSERT(type == IR_FLOAT); + | ldr Rs(def_reg-IR_REG_FP_FIRST), [Rx(op1_reg), Rx(op2_reg)] + } + } + } else { + if (op2_reg != IR_REG_NONE && (op2_reg & IR_REG_SPILL_LOAD)) { + op2_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + if (op2_reg == IR_REG_NONE) { + op2_reg = def_reg; + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + ir_emit_load_mem_fp(ctx, type, def_reg, op2_reg, 0); + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_store_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *val_insn = &ctx->ir_base[insn->op3]; + ir_ref type = val_insn->type; + ir_reg op2_reg = ctx->regs[ref][2]; + ir_reg op3_reg = ctx->regs[ref][3]; + + if (op3_reg == IR_REG_NONE) { + IR_ASSERT(IR_IS_CONST_REF(insn->op3) && ctx->ir_base[insn->op3].val.i64 == 0); + op3_reg = IR_REG_ZR; + } else if ((op3_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op3)) { + op3_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { + ir_reg op1_reg; + int32_t offset = ir_fuse_addr(ctx, insn->op2, &op1_reg, &op2_reg); + + if (op2_reg == IR_REG_NONE) { + ir_emit_store_mem_int(ctx, type, op1_reg, offset, op3_reg); + } else { + switch (ir_type_size[type]) { + case 8: + | str Rx(op3_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + case 4: + | str Rw(op3_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + case 2: + | strh Rw(op3_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + case 1: + | strb Rw(op3_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + default: + IR_ASSERT(0); + } + } + } else { + IR_ASSERT(op2_reg != IR_REG_NONE); + if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op2)) { + op2_reg &= ~IR_REG_SPILL_LOAD; + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + ir_emit_store_mem_int(ctx, type, op2_reg, 0, op3_reg); + } +} + +static void ir_emit_store_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = ctx->ir_base[insn->op3].type; + ir_reg op2_reg = ctx->regs[ref][2]; + ir_reg op3_reg = ctx->regs[ref][3]; + + IR_ASSERT(op3_reg != IR_REG_NONE); + if ((op3_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op3)) { + op3_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { + ir_reg op1_reg; + int32_t offset = ir_fuse_addr(ctx, insn->op2, &op1_reg, &op2_reg); + + if (op2_reg == IR_REG_NONE) { + ir_emit_store_mem_fp(ctx, type, op1_reg, offset, op3_reg); + } else { + if (type == IR_DOUBLE) { + | str Rd(op3_reg-IR_REG_FP_FIRST), [Rx(op1_reg), Rx(op2_reg)] + } else { + IR_ASSERT(type == IR_FLOAT); + | str Rs(op3_reg-IR_REG_FP_FIRST), [Rx(op1_reg), Rx(op2_reg)] + } + } + } else { + if ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(insn->op2)) { + op2_reg &= ~IR_REG_SPILL_LOAD; + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + ir_emit_store_mem_fp(ctx, type, op2_reg, 0, op3_reg); + } +} + +static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_reg src_reg = insn->op2; + ir_type type = insn->type; + + if (IR_REGSET_IN(IR_REGSET_UNION(ctx->fixed_regset, IR_REGSET_FIXED), src_reg)) { + if (ctx->vregs[def] + && ctx->live_intervals[ctx->vregs[def]] + && ctx->live_intervals[ctx->vregs[def]]->stack_spill_pos != -1) { + ir_emit_store(ctx, type, def, src_reg); + } + } else { + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (def_reg == IR_REG_NONE) { + /* op3 is used as a flag that the value is already stored in memory. + * If op3 is set we don't have to store the value once again (in case of spilling) + */ + if (!insn->op3) { + ir_emit_store(ctx, type, def, src_reg); + } + } else { + if (src_reg != def_reg) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, def_reg, src_reg); + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + ir_emit_fp_mov(ctx, type, def_reg, src_reg); + } + } + if ((ctx->regs[def][0] & IR_REG_SPILL_STORE) && !insn->op3) { + ir_emit_store(ctx, type, def, def_reg); + } + } + } +} + +static void ir_emit_rstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_ref type = ctx->ir_base[insn->op2].type; + ir_reg op2_reg = ctx->regs[ref][2]; + ir_reg dst_reg = insn->op3; + + if (op2_reg != IR_REG_NONE) { + if (op2_reg & IR_REG_SPILL_LOAD) { + op2_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + if (op2_reg != dst_reg) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, dst_reg, op2_reg); + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + ir_emit_fp_mov(ctx, type, dst_reg, op2_reg); + } + } + } else { + ir_emit_load(ctx, type, dst_reg, insn->op2); + } +} + +static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *val = &ctx->ir_base[insn->op2]; + int32_t size = val->val.i32; + + IR_ASSERT(IR_IS_TYPE_INT(val->type)); + IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); + + if (ctx->flags & IR_HAS_CALLS) { + /* Stack must be 16 byte aligned */ + size = IR_ALIGNED_SIZE(size, 16); + } else { + size = IR_ALIGNED_SIZE(size, 8); + } + | sub sp, sp, #size + if (!(ctx->flags & IR_USE_FRAME_POINTER)) { + data->call_stack_size += size; + } + } else { + int32_t alignment = (ctx->flags & IR_HAS_CALLS) ? 16 : 8; + ir_reg op2_reg = ctx->regs[def][2]; + ir_type type = ctx->ir_base[insn->op2].type; + + IR_ASSERT(ctx->flags & IR_FUNCTION); + IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (op2_reg & IR_REG_SPILL_LOAD) { + op2_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + | add Rx(def_reg), Rx(op2_reg), #(alignment-1) + | and Rx(def_reg), Rx(def_reg), #(~(alignment-1)) + | sub sp, sp, Rx(def_reg); + } + if (def_reg != IR_REG_NONE) { + | mov Rx(def_reg), sp + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } else { + ir_emit_store(ctx, IR_ADDR, def, IR_REG_STACK_POINTER); + } +} + +static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *val = &ctx->ir_base[insn->op2]; + int32_t size = val->val.i32; + + IR_ASSERT(IR_IS_TYPE_INT(val->type)); + IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); + + if (ctx->flags & IR_HAS_CALLS) { + /* Stack must be 16 byte aligned */ + size = IR_ALIGNED_SIZE(size, 16); + } else { + size = IR_ALIGNED_SIZE(size, 8); + } + | add sp, sp, #size + if (!(ctx->flags & IR_USE_FRAME_POINTER)) { + data->call_stack_size -= size; + } + } else { +// int32_t alignment = (ctx->flags & IR_HAS_CALLS) ? 16 : 8; + ir_reg op2_reg = ctx->regs[def][2]; + ir_type type = ctx->ir_base[insn->op2].type; + + IR_ASSERT(ctx->flags & IR_FUNCTION); + IR_ASSERT(op2_reg != IR_REG_NONE); + if (op2_reg & IR_REG_SPILL_LOAD) { + op2_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + + // TODO: alignment + + | add sp, sp, Rx(op2_reg); + } +} + +static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type; + ir_block *bb; + ir_insn *use_insn, *val; + uint32_t n, *p, use_block; + int i; + int label, default_label = 0; + int count = 0; + ir_val min, max; + ir_reg op1_reg, op2_reg, tmp_reg; + + type = ctx->ir_base[insn->op2].type; + if (IR_IS_TYPE_SIGNED(type)) { + min.u64 = 0x7fffffffffffffff; + max.u64 = 0x8000000000000000; + } else { + min.u64 = 0xffffffffffffffff; + max.u64 = 0x0; + } + + bb = &ctx->cfg_blocks[b]; + p = &ctx->cfg_edges[bb->successors]; + for (n = bb->successors_count; n != 0; p++, n--) { + use_block = *p; + use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; + if (use_insn->op == IR_CASE_VAL) { + val = &ctx->ir_base[use_insn->op2]; + if (IR_IS_TYPE_SIGNED(type)) { + IR_ASSERT(IR_IS_TYPE_SIGNED(val->type)); + min.i64 = IR_MIN(min.i64, val->val.i64); + max.i64 = IR_MAX(max.i64, val->val.i64); + } else { + IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type)); + min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64); + max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64); + } + count++; + } else if (use_insn->op == IR_CASE_DEFAULT) { + default_label = ir_skip_empty_target_blocks(ctx, use_block); + } else { + IR_ASSERT(0); + } + } + + op1_reg = ctx->regs[def][1]; + op2_reg = ctx->regs[def][2]; + tmp_reg = ctx->regs[def][3]; + + IR_ASSERT(op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); + if (op2_reg & IR_REG_SPILL_LOAD) { + op2_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op2_reg, insn->op2); + } else if (IR_IS_CONST_REF(insn->op2)) { + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + + /* Generate a table jmp or a seqence of calls */ + if ((max.i64-min.i64) < count * 8) { + int *labels = ir_mem_malloc(sizeof(int) * (max.i64 - min.i64 + 1)); + + for (i = 0; i <= (max.i64 - min.i64); i++) { + labels[i] = default_label; + } + p = &ctx->cfg_edges[bb->successors]; + for (n = bb->successors_count; n != 0; p++, n--) { + use_block = *p; + use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; + if (use_insn->op == IR_CASE_VAL) { + val = &ctx->ir_base[use_insn->op2]; + label = ir_skip_empty_target_blocks(ctx, use_block); + labels[val->val.i64 - min.i64] = label; + } + } + + if (aarch64_may_encode_imm12(max.i64)) { + | ASM_REG_IMM_OP cmp, type, op2_reg, max.i64 + } else { + ir_emit_load_imm_int(ctx, type, tmp_reg, max.i64); + | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg + } + if (IR_IS_TYPE_SIGNED(type)) { + | bgt =>default_label + } else { + | bhi =>default_label + } + + if (op1_reg == IR_REG_NONE) { + op1_reg = op2_reg; + } + if (aarch64_may_encode_imm12(min.i64)) { + | ASM_REG_REG_IMM_OP subs, type, op1_reg, op2_reg, min.i64 + } else { + ir_emit_load_imm_int(ctx, type, tmp_reg, min.i64); + | ASM_REG_REG_REG_OP subs, type, op1_reg, op2_reg, tmp_reg + } + if (IR_IS_TYPE_SIGNED(type)) { + | blt =>default_label + } else { + | blo =>default_label + } + | adr Rx(tmp_reg), >1 + | ldr Rx(tmp_reg), [Rx(tmp_reg), Rx(op1_reg), lsl #3] + | br Rx(tmp_reg) + |.jmp_table + if (!data->jmp_table_label) { + data->jmp_table_label = ctx->cfg_blocks_count + ctx->consts_count + 3; + |=>data->jmp_table_label: + } + |.align 8 + |1: + for (i = 0; i <= (max.i64 - min.i64); i++) { + int b = labels[i]; + ir_block *bb = &ctx->cfg_blocks[b]; + ir_insn *insn = &ctx->ir_base[bb->end]; + + if (insn->op == IR_IJMP && IR_IS_CONST_REF(insn->op2)) { + ir_ref prev = ctx->prev_ref[bb->end]; + if (prev != bb->start && ctx->ir_base[prev].op == IR_SNAPSHOT) { + prev = ctx->prev_ref[prev]; + } + if (prev == bb->start) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); + + | .addr &addr + if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) { + bb->flags |= IR_BB_EMPTY; + } + continue; + } + } + | .addr =>b + } + |.code + ir_mem_free(labels); + } else { + p = &ctx->cfg_edges[bb->successors]; + for (n = bb->successors_count; n != 0; p++, n--) { + use_block = *p; + use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; + if (use_insn->op == IR_CASE_VAL) { + val = &ctx->ir_base[use_insn->op2]; + label = ir_skip_empty_target_blocks(ctx, use_block); + if (aarch64_may_encode_imm12(val->val.i64)) { + | ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i64 + } else { + ir_emit_load_imm_int(ctx, type, tmp_reg, val->val.i64); + | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg + + } + | beq =>label + } + } + if (default_label) { + | b =>default_label + } + } +} + +static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg) +{ + int i; + int8_t *pred, *loc, *types; + ir_reg to, from_reg; + ir_type type; + ir_regset todo, ready; + + loc = ir_mem_malloc(IR_REG_NUM * 3 * sizeof(int8_t)); + pred = loc + IR_REG_NUM; + types = pred + IR_REG_NUM; + memset(loc, IR_REG_NONE, IR_REG_NUM * 2 * sizeof(int8_t)); + todo = IR_REGSET_EMPTY; + ready = IR_REGSET_EMPTY; + + for (i = 0; i < count; i++) { + from_reg = copies[i].from; + to = copies[i].to; + if (from_reg != to) { + loc[from_reg] = from_reg; + pred[to] = from_reg; + types[from_reg] = copies[i].type; + IR_REGSET_INCL(todo, to); + } + } + + IR_REGSET_FOREACH(todo, i) { + if (loc[i] == IR_REG_NONE) { + IR_REGSET_INCL(ready, i); + } + } IR_REGSET_FOREACH_END(); + + while (1) { + ir_ref /*a, b,*/ c; + + while (ready != IR_REGSET_EMPTY) { + to = ir_regset_pop_first(&ready); + from_reg = pred[to]; + c = loc[from_reg]; + type = types[from_reg]; + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, to, c); + } else { + ir_emit_fp_mov(ctx, type, to, c); + } + IR_REGSET_EXCL(todo, to); + loc[from_reg] = to; + if (from_reg == c && pred[from_reg] != IR_REG_NONE) { + IR_REGSET_INCL(ready, from_reg); + } + } + + if (todo == IR_REGSET_EMPTY) { + break; + } + to = ir_regset_pop_first(&todo); + from_reg = pred[to]; + IR_ASSERT(to != loc[from_reg]); + type = types[from_reg]; + if (IR_IS_TYPE_INT(type)) { + IR_ASSERT(tmp_reg != IR_REG_NONE); + IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST); + ir_emit_mov(ctx, type, tmp_reg, to); + loc[to] = tmp_reg; + } else { + IR_ASSERT(tmp_fp_reg != IR_REG_NONE); + IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST); + ir_emit_fp_mov(ctx, type, tmp_fp_reg, to); + loc[to] = tmp_fp_reg; + } + IR_REGSET_INCL(ready, to); + } + + ir_mem_free(loc); + + return 1; +} + +static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) +{ + int j, n; + ir_type type; + int int_param = 0; + int fp_param = 0; + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + int32_t used_stack = 0; + + n = ir_input_edges_count(ctx, insn); + for (j = 3; j <= n; j++) { + type = ctx->ir_base[ir_insn_op(insn, j)].type; + if (IR_IS_TYPE_INT(type)) { + if (int_param >= int_reg_params_count) { + used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); + } + int_param++; + } else if (IR_IS_TYPE_FP(type)) { + if (fp_param >= fp_reg_params_count) { + used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); + } + fp_param++; + } else { + IR_ASSERT(0); + } + } + + return used_stack; +} + +static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + int j, n; + ir_ref arg; + ir_insn *arg_insn; + uint8_t type; + ir_reg src_reg, dst_reg; + int int_param = 0; + int fp_param = 0; + int count = 0; + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + int32_t used_stack, stack_offset = 0; + ir_copy *copies; + bool do_pass3 = 0; + /* For temporaries we may use any scratch registers except for registers used for parameters */ + ir_reg tmp_fp_reg = IR_REG_FP_LAST; /* Temporary register for FP loads and swap */ + + n = ir_input_edges_count(ctx, insn); + if (n < 3) { + return 0; + } + + if (tmp_reg == IR_REG_NONE) { + tmp_reg = IR_REG_IP0; + } + + if (insn->op == IR_CALL && (ctx->flags & IR_PREALLOCATED_STACK)) { + // TODO: support for preallocated stack + used_stack = 0; + } else { + used_stack = ir_call_used_stack(ctx, insn); + /* Stack must be 16 byte aligned */ + used_stack = IR_ALIGNED_SIZE(used_stack, 16); + if (ctx->fixed_call_stack_size && used_stack <= ctx->fixed_call_stack_size) { + used_stack = 0; + } else { + data->call_stack_size += used_stack; + if (used_stack) { + | sub sp, sp, #used_stack + } + } + } + + /* 1. move all register arguments that should be passed through stack + * and collect arguments that should be passed through registers */ + copies = ir_mem_malloc((n - 2) * sizeof(ir_copy)); + for (j = 3; j <= n; j++) { + arg = ir_insn_op(insn, j); + src_reg = ir_get_alocated_reg(ctx, def, j); + arg_insn = &ctx->ir_base[arg]; + type = arg_insn->type; + if (IR_IS_TYPE_INT(type)) { + if (int_param < int_reg_params_count) { + dst_reg = int_reg_params[int_param]; + } else { + dst_reg = IR_REG_NONE; /* pass argument through stack */ + } + int_param++; + } else if (IR_IS_TYPE_FP(type)) { + if (fp_param < fp_reg_params_count) { + dst_reg = fp_reg_params[fp_param]; + } else { + dst_reg = IR_REG_NONE; /* pass argument through stack */ + } + fp_param++; + } else { + IR_ASSERT(0); + } + if (dst_reg != IR_REG_NONE) { + if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) { + /* delay CONST->REG and MEM->REG moves to third pass */ + do_pass3 = 1; + } else { + IR_ASSERT(src_reg != IR_REG_NONE); + if (src_reg & IR_REG_SPILL_LOAD) { + src_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, src_reg, arg); + } + if (src_reg != dst_reg) { + /* delay REG->REG moves to second pass */ + copies[count].type = type; + copies[count].from = src_reg; + copies[count].to = dst_reg; + count++; + } + } + } else { + /* Pass register arguments to stack (REG->MEM moves) */ + if (!IR_IS_CONST_REF(arg) && src_reg != IR_REG_NONE && !(src_reg & IR_REG_SPILL_LOAD)) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } else { + ir_emit_store_mem_fp(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } + } else { + do_pass3 = 1; + } + stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); + } + } + + /* 2. move all arguments that should be passed from one register to anouther (REG->REG movs) */ + if (count) { + ir_parallel_copy(ctx, copies, count, tmp_reg, tmp_fp_reg); + } + ir_mem_free(copies); + + /* 3. move the remaining memory and immediate values */ + if (do_pass3) { + stack_offset = 0; + int_param = 0; + fp_param = 0; + for (j = 3; j <= n; j++) { + arg = ir_insn_op(insn, j); + src_reg = ir_get_alocated_reg(ctx, def, j); + arg_insn = &ctx->ir_base[arg]; + type = arg_insn->type; + if (IR_IS_TYPE_INT(type)) { + if (int_param < int_reg_params_count) { + dst_reg = int_reg_params[int_param]; + } else { + dst_reg = IR_REG_NONE; /* argument already passed through stack */ + } + int_param++; + } else if (IR_IS_TYPE_FP(type)) { + if (fp_param < fp_reg_params_count) { + dst_reg = fp_reg_params[fp_param]; + } else { + dst_reg = IR_REG_NONE; /* argument already passed through stack */ + } + fp_param++; + } else { + IR_ASSERT(0); + } + if (dst_reg != IR_REG_NONE) { + if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) { + if (IR_IS_TYPE_INT(type)) { + if (IR_IS_CONST_REF(arg)) { + if (type == IR_ADDR) { + ir_insn *val_insn = &ctx->ir_base[arg]; + + if (val_insn->op == IR_STR) { + int label = ctx->cfg_blocks_count - arg; + + val_insn->const_flags |= IR_CONST_EMIT; + | adr Rx(dst_reg), =>label + continue; + } + } else if (ir_type_size[type] == 1) { + type = IR_ADDR; + } + } + ir_emit_load(ctx, type, dst_reg, arg); + } else { + ir_emit_load(ctx, type, dst_reg, arg); + } + } + } else { + if (IR_IS_TYPE_INT(type)) { + if (IR_IS_CONST_REF(arg)) { + ir_insn *val_insn = &ctx->ir_base[arg]; + + if (val_insn->op == IR_STR) { + int label = ctx->cfg_blocks_count - arg; + + val_insn->const_flags |= IR_CONST_EMIT; + IR_ASSERT(tmp_reg != IR_REG_NONE); + | adr Rx(tmp_reg), =>label + | str Rx(tmp_reg), [sp, #stack_offset] + } else { + IR_ASSERT(tmp_reg != IR_REG_NONE); + ir_emit_load_imm_int(ctx, type, tmp_reg, val_insn->val.i64); + | str Rx(tmp_reg), [sp, #stack_offset] + } + } else if (src_reg == IR_REG_NONE) { + IR_ASSERT(tmp_reg != IR_REG_NONE); + ir_emit_load(ctx, type, tmp_reg, arg); + ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, tmp_reg); + } else if (src_reg & IR_REG_SPILL_LOAD) { + src_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, src_reg, arg); + ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } + } else { + if (IR_IS_CONST_REF(arg)) { + ir_emit_load(ctx, type, tmp_fp_reg, arg); + ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_REG_STACK_POINTER, stack_offset, tmp_fp_reg); + } else if (src_reg == IR_REG_NONE) { + IR_ASSERT(tmp_fp_reg != IR_REG_NONE); + ir_emit_load(ctx, type, tmp_fp_reg, arg); + ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_REG_STACK_POINTER, stack_offset, tmp_fp_reg); + } else if (src_reg & IR_REG_SPILL_LOAD) { + src_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, src_reg, arg); + ir_emit_store_mem_fp(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } + } + stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); + } + } + } + return used_stack; +} + +static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg; + int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *addr_insn = &ctx->ir_base[insn->op2]; + void *addr; + + IR_ASSERT(addr_insn->type == IR_ADDR); + if (addr_insn->op == IR_FUNC) { + addr = ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.i32)); + } else { + addr = (void*)addr_insn->val.addr; + } + if (aarch64_may_use_b(ctx, addr)) { + | bl &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | blr Rx(IR_REG_INT_TMP) + } + } else { + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(op2_reg != IR_REG_NONE); + if (op2_reg & IR_REG_SPILL_LOAD) { + op2_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | blr Rx(op2_reg) + } + + if (used_stack) { + | add sp, sp, #used_stack + data->call_stack_size -= used_stack; + } + + if (insn->type != IR_VOID) { + if (IR_IS_TYPE_INT(insn->type)) { + def_reg = IR_REG_NUM(ctx->regs[def][0]); + if (def_reg != IR_REG_NONE) { + if (def_reg != IR_REG_INT_RET1) { + ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } else if (ctx->use_lists[def].count > 1) { + ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1); + } + } else if (IR_IS_TYPE_FP(insn->type)) { + def_reg = IR_REG_NUM(ctx->regs[def][0]); + if (def_reg != IR_REG_NONE) { + if (def_reg != IR_REG_FP_RET1) { + ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1); + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } else if (ctx->use_lists[def].count > 1) { + ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1); + } + } else { + IR_ASSERT(0); + } + } +} + +static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); + + if (used_stack != 0) { + ir_emit_call(ctx, def, insn); + ir_emit_return_void(ctx); + return; + } + + ir_emit_epilogue(ctx); + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *addr_insn = &ctx->ir_base[insn->op2]; + void *addr; + + IR_ASSERT(addr_insn->type == IR_ADDR); + if (addr_insn->op == IR_FUNC) { + addr = ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.i32)); + } else { + addr = (void*)addr_insn->val.addr; + } + + if (aarch64_may_use_b(ctx, addr)) { + | b &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | br Rx(IR_REG_INT_TMP) + } + } else { + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(op2_reg != IR_REG_NONE); + if (op2_reg & IR_REG_SPILL_LOAD) { + op2_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | br Rx(op2_reg) + } +} + +static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg op2_reg = ctx->regs[def][2]; + + if (op2_reg != IR_REG_NONE) { + if (op2_reg & IR_REG_SPILL_LOAD) { + op2_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | br Rx(op2_reg) + } else if (IR_IS_CONST_REF(insn->op2)) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); + + if (aarch64_may_use_b(ctx, addr)) { + | b &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | br Rx(IR_REG_INT_TMP) + } + } else { + IR_ASSERT(0); + } +} + +static void ir_emit_guard(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg op2_reg = ctx->regs[def][2]; + ir_type type = ctx->ir_base[insn->op2].type; + + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (IR_IS_CONST_REF(insn->op2)) { + if ((insn->op == IR_GUARD && insn->op2 == IR_FALSE) || + (insn->op == IR_GUARD_NOT && insn->op2 == IR_TRUE)) { + if (IR_IS_CONST_REF(insn->op3)) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + if (aarch64_may_use_b(ctx, addr)) { + | b &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | br Rx(IR_REG_INT_TMP) + } + } else { + IR_ASSERT(0); + } + } + return; + } + + IR_ASSERT(op2_reg != IR_REG_NONE); + if (op2_reg & IR_REG_SPILL_LOAD) { + op2_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + + if (IR_IS_CONST_REF(insn->op3)) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + if (insn->op == IR_GUARD) { + if (ir_type_size[type] == 8) { + | cbz Rx(op2_reg), &addr + } else { + | cbz Rw(op2_reg), &addr + } + } else { + if (ir_type_size[type] == 8) { + | cbnz Rx(op2_reg), &addr + } else { + | cbnz Rw(op2_reg), &addr + } + } + } else { + IR_ASSERT(0); + } +} + +static void ir_emit_guard_jz(ir_ctx *ctx, uint8_t op, void *addr, ir_type type, ir_reg reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (op == IR_EQ) { + if (ir_type_size[type] == 8) { + | cbnz Rx(reg), &addr + } else { + | cbnz Rw(reg), &addr + } + } else { + IR_ASSERT(op == IR_NE); + if (ir_type_size[type] == 8) { + | cbz Rx(reg), &addr + } else { + | cbz Rw(reg), &addr + } + } +} + +static void ir_emit_guard_jcc(ir_ctx *ctx, uint8_t op, void *addr, bool int_cmp) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (int_cmp) { + switch (op) { + case IR_EQ: + | beq &addr + break; + case IR_NE: + | bne &addr + break; + case IR_LT: + | blt &addr + break; + case IR_GE: + | bge &addr + break; + case IR_LE: + | ble &addr + break; + case IR_GT: + | bgt &addr + break; + case IR_ULT: + | blo &addr + break; + case IR_UGE: + | bhs &addr + break; + case IR_ULE: + | bls &addr + break; + case IR_UGT: + | bhi &addr + break; + default: + IR_ASSERT(0 && "NIY binary op"); + break; + } + } else { + switch (op) { + case IR_EQ: + | beq &addr + break; + case IR_NE: + | bne &addr + break; + case IR_LT: + | bmi &addr + break; + case IR_GE: + | bge &addr + break; + case IR_LE: + | bls &addr + break; + case IR_GT: + | bgt &addr + break; +// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; +// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; +// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; +// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; + default: + IR_ASSERT(0 && "NIY binary op"); + break; + } + } +} + +static void ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; + ir_op op = cmp_insn->op; + ir_type type = ctx->ir_base[cmp_insn->op1].type; + ir_ref op1 = cmp_insn->op1; + ir_ref op2 = cmp_insn->op2; + ir_reg op1_reg = ctx->regs[insn->op2][1]; + ir_reg op2_reg = ctx->regs[insn->op2][2]; + void *addr; + + if (op1_reg != IR_REG_NONE && ((op1_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op1))) { + op1_reg &= ~IR_REG_SPILL_LOAD; + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE && ((op2_reg & IR_REG_SPILL_LOAD) || IR_IS_CONST_REF(op2))) { + op2_reg &= ~IR_REG_SPILL_LOAD; + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + + addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) { + if (op == IR_ULT) { + /* always false */ + if (aarch64_may_use_b(ctx, addr)) { + | b &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | br Rx(IR_REG_INT_TMP) + } + return; + } else if (op == IR_UGE) { + /* always true */ + return; + } else if (op == IR_ULE) { + op = IR_EQ; + } else if (op == IR_UGT) { + op = IR_NE; + } + if (op1_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) { + if (insn->op == IR_GUARD_NOT) { + op ^= 1; // reverse + } + ir_emit_guard_jz(ctx, op, addr, type, op1_reg); + return; + } + } + ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2); + + if (insn->op == IR_GUARD) { + op ^= 1; // reverse + } + + ir_emit_guard_jcc(ctx, op, addr, 1); +} + +static void ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]); + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + if (insn->op == IR_GUARD) { + op ^= 1; // reverse + } + ir_emit_guard_jcc(ctx, op, addr, 0); +} + +static void ir_emit_guard_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *overflow_insn = &ctx->ir_base[insn->op2]; + ir_insn *math_insn = &ctx->ir_base[overflow_insn->op1]; + ir_type type = math_insn->type; + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (math_insn->op == IR_MUL_OV) { + if (insn->op == IR_GUARD) { + | beq &addr + } else { + | bne &addr + } + } else if (IR_IS_TYPE_SIGNED(type)) { + if (insn->op == IR_GUARD) { + | bvc &addr + } else { + | bvs &addr + } + } else { + if (insn->op == IR_GUARD) { + | bcc &addr + } else { + | bcs &addr + } + } +} + +static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + uint32_t code; + ir_reg reg = IR_REG_NUM(ctx->regs[def][0]); + + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } + +||#ifdef __APPLE__ +|| code = 0xd53bd060 | reg; // TODO: hard-coded: mrs reg, tpidrro_el0 +| .long code +| and Rx(reg), Rx(reg), #0xfffffffffffffff8 +|//??? MEM_ACCESS_64_WITH_UOFFSET_64 ldr, Rx(reg), Rx(reg), #insn->op2, TMP1 +|//??? MEM_ACCESS_64_WITH_UOFFSET_64 ldr, Rx(reg), Rx(reg), #insn->op3, TMP1 +||#else +|| code = 0xd53bd040 | reg; // TODO: hard-coded: mrs reg, tpidr_el0 +| .long code +||//??? ZEND_ASSERT(insn->op2 <= LDR_STR_PIMM64); +| ldr Rx(reg), [Rx(reg), #insn->op2] +||#endif + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, IR_ADDR, def, reg); + } +} + +static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + IR_ASSERT(def_reg != IR_REG_NONE); + + | stp d30, d31, [sp, #-16]! + | stp d28, d29, [sp, #-16]! + | stp d26, d27, [sp, #-16]! + | stp d24, d25, [sp, #-16]! + | stp d22, d23, [sp, #-16]! + | stp d20, d21, [sp, #-16]! + | stp d18, d19, [sp, #-16]! + | stp d16, d17, [sp, #-16]! + | stp d14, d15, [sp, #-16]! + | stp d12, d13, [sp, #-16]! + | stp d10, d11, [sp, #-16]! + | stp d8, d9, [sp, #-16]! + | stp d6, d7, [sp, #-16]! + | stp d4, d5, [sp, #-16]! + | stp d2, d3, [sp, #-16]! + | stp d0, d1, [sp, #-16]! + + | str x30, [sp, #-16]! + | stp x28, x29, [sp, #-16]! + | stp x26, x27, [sp, #-16]! + | stp x24, x25, [sp, #-16]! + | stp x22, x23, [sp, #-16]! + | stp x20, x21, [sp, #-16]! + | stp x18, x19, [sp, #-16]! + | stp x16, x17, [sp, #-16]! + | stp x14, x15, [sp, #-16]! + | stp x12, x13, [sp, #-16]! + | stp x10, x11, [sp, #-16]! + | stp x8, x9, [sp, #-16]! + | stp x6, x7, [sp, #-16]! + | stp x4, x5, [sp, #-16]! + | stp x2, x3, [sp, #-16]! + | stp x0, x1, [sp, #-16]! + + | mov Rx(IR_REG_INT_ARG2), sp + | str Rx(IR_REG_INT_ARG2), [sp, #(31*8)] + | mov Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_TMP) + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *addr_insn = &ctx->ir_base[insn->op2]; + void *addr; + + IR_ASSERT(addr_insn->type == IR_ADDR); + if (addr_insn->op == IR_FUNC) { + addr = ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.i32)); + } else { + addr = (void*)addr_insn->val.addr; + } + + if (aarch64_may_use_b(ctx, addr)) { + | bl &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | blr Rx(IR_REG_INT_TMP) + } + } else { + IR_ASSERT(0); + } + + | add sp, sp, #(32*8+32*8) + + if (def_reg != IR_REG_INT_RET1) { + ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); + } + if (ctx->regs[def][0] & IR_REG_SPILL_STORE) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_reg to_reg, ir_ref to, int32_t offset) +{ + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + + IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE); + + if (IR_IS_TYPE_INT(type)) { + if (from_reg != IR_REG_NONE) { + if (to_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, to_reg, from_reg); + } else { + ir_emit_store(ctx, type, to, from_reg); + } + } else { + ir_emit_load_mem_int(ctx, type, to_reg, fp, offset); + } + } else { + if (from_reg != IR_REG_NONE) { + if (to_reg != IR_REG_NONE) { + ir_emit_fp_mov(ctx, type, to_reg, from_reg); + } else { + ir_emit_store(ctx, type, to, from_reg); + } + } else { + ir_emit_load_mem_fp(ctx, type, to_reg, fp, offset); + } + } +} + +static void ir_emit_load_params(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + ir_use_list *use_list = &ctx->use_lists[1]; + ir_insn *insn; + ir_ref i, n, *p, use; + int int_param_num = 0; + int fp_param_num = 0; + ir_reg src_reg; + ir_reg dst_reg; + // TODO: Calling convention specific + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + int32_t stack_offset = 0; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */ + } else { + stack_offset = sizeof(void*) + data->ra_data.stack_frame_size + data->call_stack_size; /* skip return address */ + } + n = use_list->count; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PARAM) { + if (IR_IS_TYPE_INT(insn->type)) { + if (int_param_num < int_reg_params_count) { + src_reg = int_reg_params[int_param_num]; + } else { + src_reg = IR_REG_NONE; + } + int_param_num++; + } else { + if (fp_param_num < fp_reg_params_count) { + src_reg = fp_reg_params[fp_param_num]; + } else { + src_reg = IR_REG_NONE; + } + fp_param_num++; + } + if (ctx->vregs[use]) { + dst_reg = IR_REG_NUM(ctx->regs[use][0]); + IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE || + stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos + + ((ctx->flags & IR_USE_FRAME_POINTER) ? -data->ra_data.stack_frame_size : data->call_stack_size)); + if (src_reg != dst_reg) { + ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset); + } + if (dst_reg != IR_REG_NONE && (ctx->regs[use][0] & IR_REG_SPILL_STORE)) { + ir_emit_store(ctx, insn->type, use, dst_reg); + } + } + if (src_reg == IR_REG_NONE) { + if (sizeof(void*) == 8) { + stack_offset += sizeof(void*); + } else { + stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); + } + } + } + } +} + +static ir_reg ir_get_free_reg(ir_type type, ir_regset available) +{ + if (IR_IS_TYPE_INT(type)) { + available = IR_REGSET_INTERSECTION(available, IR_REGSET_GP); + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + available = IR_REGSET_INTERSECTION(available, IR_REGSET_FP); + } + IR_ASSERT(!IR_REGSET_IS_EMPTY(available)); + return IR_REGSET_FIRST(available); +} + +static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) +{ + ir_backend_data *data = ctx->data; + ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end; + + if (to == 0) { + if (IR_IS_TYPE_INT(type)) { + if (ctx->regs[ref][0] == IR_REG_NONE) { + ctx->regs[ref][0] = IR_REG_X0; + } + } else if (IR_IS_TYPE_FP(type)) { + if (ctx->regs[ref][1] == IR_REG_NONE) { + ctx->regs[ref][1] = IR_REG_V0; + } + } else { + IR_ASSERT(0); + return 0; + } + } else if (from != 0) { + if (IR_IS_TYPE_INT(type)) { + if (ctx->regs[ref][0] == IR_REG_NONE) { + ctx->regs[ref][0] = IR_REG_X0; + } + } else if (IR_IS_TYPE_FP(type)) { + if (ctx->regs[ref][1] == IR_REG_NONE) { + ctx->regs[ref][1] = IR_REG_V0; + } + } else { + IR_ASSERT(0); + return 0; + } + } + return 1; +} + +static void ir_fix_param_spills(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + ir_use_list *use_list = &ctx->use_lists[1]; + ir_insn *insn; + ir_ref i, n, *p, use; + int int_param_num = 0; + int fp_param_num = 0; + ir_reg src_reg; + // TODO: Calling convention specific + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + int32_t stack_offset = 0; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + /* skip old frame pointer and return address */ + stack_offset = sizeof(void*) * 2 + (data->ra_data.stack_frame_size - data->stack_frame_alignment); + } else { + /* skip return address */ + stack_offset = sizeof(void*) + data->ra_data.stack_frame_size; + } + n = use_list->count; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PARAM) { + if (IR_IS_TYPE_INT(insn->type)) { + if (int_param_num < int_reg_params_count) { + src_reg = int_reg_params[int_param_num]; + } else { + src_reg = IR_REG_NONE; + } + int_param_num++; + } else { + if (fp_param_num < fp_reg_params_count) { + src_reg = fp_reg_params[fp_param_num]; + } else { + src_reg = IR_REG_NONE; + } + fp_param_num++; + } + if (src_reg == IR_REG_NONE) { + if (ctx->vregs[use]) { + ir_live_interval *ival = ctx->live_intervals[ctx->vregs[use]]; + if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) + && ival->stack_spill_pos == -1 + && (ival->next || ival->reg == IR_REG_NONE)) { + ival->stack_spill_pos = stack_offset; + ctx->regs[use][0] = IR_REG_NONE; + } + } + if (sizeof(void*) == 8) { + stack_offset += sizeof(void*); + } else { + stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); + } + } + } + } +} + +static void ir_allocate_unique_spill_slots(ir_ctx *ctx) +{ + uint32_t b; + ir_block *bb; + ir_insn *insn; + ir_ref i, n, j, *p; + uint32_t *rule, insn_flags; + ir_backend_data *data = ctx->data; + ir_regset available = 0; + ir_target_constraints constraints; + uint32_t def_flags; + ir_reg reg; + + ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); + memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); + + /* vregs + tmp + fixed + SRATCH + ALL */ + ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); + + if (!ctx->arena) { + ctx->arena = ir_arena_create(16 * 1024); + } + + for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { + switch (ctx->rules ? *rule : insn->op) { + case IR_START: + case IR_BEGIN: + case IR_END: + case IR_IF_TRUE: + case IR_IF_FALSE: + case IR_CASE_VAL: + case IR_CASE_DEFAULT: + case IR_MERGE: + case IR_LOOP_BEGIN: + case IR_LOOP_END: + break; + default: + def_flags = ir_get_target_constraints(ctx, i, &constraints); + if (ctx->rules + && *rule != IR_CMP_AND_BRANCH_INT + && *rule != IR_CMP_AND_BRANCH_FP + && *rule != IR_GUARD_CMP_INT + && *rule != IR_GUARD_CMP_FP) { + available = IR_REGSET_SCRATCH; + } + if (ctx->vregs[i]) { + reg = constraints.def_reg; + if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { + IR_REGSET_EXCL(available, reg); + ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; + } else if (def_flags & IR_USE_MUST_BE_IN_REG) { + if (insn->op == IR_VLOAD + && ctx->live_intervals[ctx->vregs[i]] + && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) { + /* pass */ + } else if (insn->op != IR_PARAM) { + reg = ir_get_free_reg(insn->type, available); + IR_REGSET_EXCL(available, reg); + ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; + } + } + if (!ctx->live_intervals[ctx->vregs[i]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[i]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[i]; + ival->stack_spill_pos = -1; + if (insn->op == IR_PARAM && reg == IR_REG_NONE) { + ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM; + } else { + ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data); + } + } else if (insn->op == IR_PARAM) { + IR_ASSERT(0 && "unexpected PARAM"); + return; + } + } else if (insn->op == IR_VAR) { + ir_use_list *use_list = &ctx->use_lists[i]; + ir_ref n = use_list->count; + + if (n > 0) { + int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data); + ir_ref i, *p, use; + ir_insn *use_insn; + + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_VLOAD) { + if (ctx->vregs[use] + && !ctx->live_intervals[ctx->vregs[use]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[use]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[use]; + ival->stack_spill_pos = stack_spill_pos; + } + } else if (use_insn->op == IR_VSTORE) { + if (!IR_IS_CONST_REF(use_insn->op3) + && ctx->vregs[use_insn->op3] + && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[use_insn->op3]; + ival->stack_spill_pos = stack_spill_pos; + } + } + } + } + } + + insn_flags = ir_op_flags[insn->op]; + n = constraints.tmps_count; + if (n) { + do { + n--; + if (constraints.tmp_regs[n].type) { + ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); + IR_REGSET_EXCL(available, reg); + ctx->regs[i][constraints.tmp_regs[n].num] = reg; + } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { + available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); + } else { + IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); + } + } while (n); + } + n = ir_input_edges_count(ctx, insn); + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + ir_ref input = *p; + if (IR_OPND_KIND(insn_flags, j) == IR_OPND_DATA && input > 0 && ctx->vregs[input]) { + if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) { + ir_reg reg = ctx->regs[i][0] & ~IR_REG_SPILL_STORE; + ctx->regs[i][1] = reg | IR_REG_SPILL_LOAD; + } else { + uint8_t use_flags = IR_USE_FLAGS(def_flags, j); + ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; + + if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { + IR_REGSET_EXCL(available, reg); + ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; + } else if (j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) { + ctx->regs[i][j] = ctx->regs[i][1]; + } else if (use_flags & IR_USE_MUST_BE_IN_REG) { + reg = ir_get_free_reg(ctx->ir_base[input].type, available); + IR_REGSET_EXCL(available, reg); + ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; + } + } + } + } + break; + } + n = ir_insn_len(insn); + i += n; + insn += n; + rule += n; + } + if (bb->flags & IR_BB_DESSA_MOVES) { + data->dessa_from_block = b; + ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); + } + } + + if (ctx->fixed_save_regset) { + ir_reg reg; + (void) reg; + + data->used_preserved_regs = (ir_regset)ctx->fixed_save_regset; + IR_REGSET_FOREACH(data->used_preserved_regs, reg) { + data->ra_data.stack_frame_size += sizeof(void*); + } IR_REGSET_FOREACH_END(); + } + + if ((ctx->flags & IR_HAS_CALLS) && !(ctx->flags & IR_FUNCTION)) { + while (IR_ALIGNED_SIZE(data->ra_data.stack_frame_size, 16) != data->ra_data.stack_frame_size) { + data->ra_data.stack_frame_size += sizeof(void*); + data->stack_frame_alignment += sizeof(void*); + } + } else if (ctx->flags & IR_HAS_CALLS) { + ctx->flags |= IR_USE_FRAME_POINTER; + /* Stack must be 16 byte aligned */ + /* Stack must be 16 byte aligned */ + if (!(ctx->flags & IR_FUNCTION)) { + while (IR_ALIGNED_SIZE(data->ra_data.stack_frame_size, 16) != data->ra_data.stack_frame_size) { + data->ra_data.stack_frame_size += sizeof(void*); + data->stack_frame_alignment += sizeof(void*); + } + } else if (ctx->flags & IR_USE_FRAME_POINTER) { + while (IR_ALIGNED_SIZE(data->ra_data.stack_frame_size + sizeof(void*) * 2, 16) != data->ra_data.stack_frame_size + sizeof(void*) * 2) { + data->ra_data.stack_frame_size += sizeof(void*); + data->stack_frame_alignment += sizeof(void*); + } + } else { + while (IR_ALIGNED_SIZE(data->ra_data.stack_frame_size, 16) != data->ra_data.stack_frame_size) { + data->ra_data.stack_frame_size += sizeof(void*); + data->stack_frame_alignment += sizeof(void*); + } + } + } + + ir_fix_param_spills(ctx); +} + +static void ir_preallocate_call_stack(ir_ctx *ctx, ir_backend_data *data) +{ + int call_stack_size, peak_call_stack_size = 0; + ir_ref i, n; + ir_insn *insn; + + for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) { + if (insn->op == IR_CALL) { + call_stack_size = ir_call_used_stack(ctx, insn); + if (call_stack_size > peak_call_stack_size) { + peak_call_stack_size = call_stack_size; + } + } + n = ir_insn_len(insn); + i += n; + insn += n; + } + if (peak_call_stack_size) { + data->call_stack_size = peak_call_stack_size; + ctx->flags |= IR_PREALLOCATED_STACK; + } +} + +static void ir_calc_stack_frame_size(ir_ctx *ctx, ir_backend_data *data) +{ + int i; + ir_live_interval **p, *ival; + uint32_t additional_size = 0; + ir_regset fixed_regset = (ctx->flags & IR_FUNCTION) ? (ir_regset)ctx->fixed_regset : IR_REGSET_PRESERVED; + + if (ctx->fixed_save_regset) { + ir_reg reg; + (void) reg; + + data->used_preserved_regs = (ir_regset)ctx->fixed_save_regset; + IR_REGSET_FOREACH(data->used_preserved_regs, reg) { + additional_size += sizeof(void*); + } IR_REGSET_FOREACH_END(); + } + + for (i = 1, p = ctx->live_intervals + i; i <= ctx->vregs_count; i++, p++) { + ival = *p; + if (ival) { + if (ival->reg != IR_REG_NONE) { + if (!IR_REGSET_IN(data->used_preserved_regs, ival->reg) + && !IR_REGSET_IN(fixed_regset, ival->reg) + && IR_REGSET_IN(IR_REGSET_PRESERVED, ival->reg)) { + if (!ctx->fixed_save_regset) { + IR_REGSET_INCL(data->used_preserved_regs, ival->reg); + additional_size += sizeof(void*); + } else { + // TODO: Preserved reg and fixed frame conflict ??? + // IR_ASSERT(!ctx->fixed_save_regset && "NIY"); + } + } + } + } + } + + ival = ctx->live_intervals[0]; + while (ival) { + if (ival->reg != IR_REG_NONE) { + if (!IR_REGSET_IN(data->used_preserved_regs, ival->reg) + && !IR_REGSET_IN(fixed_regset, ival->reg) + && IR_REGSET_IN(IR_REGSET_PRESERVED, ival->reg)) { + IR_REGSET_INCL(data->used_preserved_regs, ival->reg); + additional_size += sizeof(void*); + } + } + ival = ival->next; + } + + data->ra_data.stack_frame_size = IR_ALIGNED_SIZE(data->ra_data.stack_frame_size, sizeof(void*)); + data->ra_data.stack_frame_size += additional_size; + + if ((ctx->flags & IR_HAS_CALLS) && !(ctx->flags & IR_FUNCTION)) { + while (IR_ALIGNED_SIZE(data->ra_data.stack_frame_size, 16) != data->ra_data.stack_frame_size) { + data->ra_data.stack_frame_size += sizeof(void*); + data->stack_frame_alignment += sizeof(void*); + } + } else if (ctx->flags & IR_HAS_CALLS) { + ctx->flags |= IR_USE_FRAME_POINTER; + /* Stack must be 16 byte aligned */ + if (!(ctx->flags & IR_FUNCTION)) { + while (IR_ALIGNED_SIZE(data->ra_data.stack_frame_size, 16) != data->ra_data.stack_frame_size) { + data->ra_data.stack_frame_size += sizeof(void*); + data->stack_frame_alignment += sizeof(void*); + } + } else if (ctx->flags & IR_USE_FRAME_POINTER) { + while (IR_ALIGNED_SIZE(data->ra_data.stack_frame_size + sizeof(void*) * 2, 16) != data->ra_data.stack_frame_size + sizeof(void*) * 2) { + data->ra_data.stack_frame_size += sizeof(void*); + data->stack_frame_alignment += sizeof(void*); + } + } else { + if (!(ctx->flags & IR_NO_STACK_COMBINE)) { + ir_preallocate_call_stack(ctx, data); + } + while (IR_ALIGNED_SIZE(data->ra_data.stack_frame_size + data->call_stack_size, 16) != + data->ra_data.stack_frame_size + data->call_stack_size) { + data->ra_data.stack_frame_size += sizeof(void*); + data->stack_frame_alignment += sizeof(void*); + } + } + } + + ir_fix_param_spills(ctx); +} + +static void* dasm_labels[ir_lb_MAX]; + +void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) +{ + uint32_t b, n, target; + ir_block *bb; + ir_ref i; + ir_insn *insn; + uint32_t *rule; + ir_backend_data data; + dasm_State **Dst; + int ret; + void *entry; + size_t size; + + data.ra_data.stack_frame_size = (!ctx->live_intervals) ? 0 : ctx->stack_frame_size; + data.ra_data.unused_slot_4 = 0; + data.ra_data.unused_slot_2 = 0; + data.ra_data.unused_slot_1 = 0; + data.stack_frame_alignment = 0; + data.call_stack_size = 0; + data.used_preserved_regs = 0; + data.rodata_label = 0; + data.jmp_table_label = 0; + ctx->data = &data; + + if (!ctx->live_intervals) { + ir_allocate_unique_spill_slots(ctx); + } else { + ir_calc_stack_frame_size(ctx, &data); + } + + if (ctx->fixed_stack_frame_size != -1) { + if (ctx->fixed_stack_red_zone) { + IR_ASSERT(ctx->fixed_stack_red_zone == ctx->fixed_stack_frame_size + ctx->fixed_call_stack_size); + } + IR_ASSERT(data.ra_data.stack_frame_size <= ctx->fixed_stack_frame_size); + data.ra_data.stack_frame_size = ctx->fixed_stack_frame_size; + data.call_stack_size = ctx->fixed_call_stack_size; + data.stack_frame_alignment = 0; + } + + Dst = &data.dasm_state; + data.dasm_state = NULL; + dasm_init(&data.dasm_state, DASM_MAXSECTION); + dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX); + dasm_setup(&data.dasm_state, dasm_actions); + /* labels for each block + for each constant + rodata label + jmp_table label + for each entry */ + dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count); + + if (!(ctx->flags & IR_SKIP_PROLOGUE)) { + ir_emit_prologue(ctx); + } + if (ctx->flags & IR_FUNCTION) { + ir_emit_load_params(ctx); + } + + for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { + continue; + } + |=>b: + + i = bb->start; + insn = ctx->ir_base + i; + if (bb->flags & IR_BB_ENTRY) { + uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3; + + |=>label: + ir_emit_prologue(ctx); + ctx->entries[insn->op3] = i; + } + + /* skip first instruction */ + n = ir_insn_len(insn); + i += n; + insn += n; + rule = ctx->rules + i; + + while (i <= bb->end) { + if (!((*rule) & (IR_FUSED|IR_SKIPPED))) + switch (*rule) { + case IR_VAR: + case IR_PARAM: + case IR_PI: + case IR_PHI: + case IR_SNAPSHOT: + break; + case IR_MUL_PWR2: + case IR_DIV_PWR2: + case IR_MOD_PWR2: + ir_emit_mul_div_mod_pwr2(ctx, i, insn); + break; + case IR_SHIFT: + ir_emit_shift(ctx, i, insn); + break; + case IR_SHIFT_CONST: + ir_emit_shift_const(ctx, i, insn); + break; + case IR_OP_INT: + ir_emit_op_int(ctx, i, insn); + break; + case IR_OP_FP: + ir_emit_op_fp(ctx, i, insn); + break; + case IR_BINOP_INT: + ir_emit_binop_int(ctx, i, insn); + break; + case IR_BINOP_FP: + ir_emit_binop_fp(ctx, i, insn); + break; + case IR_CMP_INT: + ir_emit_cmp_int(ctx, i, insn); + break; + case IR_CMP_FP: + ir_emit_cmp_fp(ctx, i, insn); + break; + case IR_SEXT: + ir_emit_sext(ctx, i, insn); + break; + case IR_ZEXT: + ir_emit_zext(ctx, i, insn); + break; + case IR_TRUNC: + ir_emit_trunc(ctx, i, insn); + break; + case IR_BITCAST: + ir_emit_bitcast(ctx, i, insn); + break; + case IR_INT2FP: + ir_emit_int2fp(ctx, i, insn); + break; + case IR_FP2INT: + ir_emit_fp2int(ctx, i, insn); + break; + case IR_FP2FP: + ir_emit_fp2fp(ctx, i, insn); + break; + case IR_COPY_INT: + ir_emit_copy_int(ctx, i, insn); + break; + case IR_COPY_FP: + ir_emit_copy_fp(ctx, i, insn); + break; + case IR_CMP_AND_BRANCH_INT: + ir_emit_cmp_and_branch_int(ctx, b, i, insn); + break; + case IR_CMP_AND_BRANCH_FP: + ir_emit_cmp_and_branch_fp(ctx, b, i, insn); + break; + case IR_GUARD_CMP_INT: + ir_emit_guard_cmp_int(ctx, b, i, insn); + break; + case IR_GUARD_CMP_FP: + ir_emit_guard_cmp_fp(ctx, b, i, insn); + break; + case IR_IF_INT: + ir_emit_if_int(ctx, b, i, insn); + break; + case IR_SWITCH: + ir_emit_switch(ctx, b, i, insn); + break; + case IR_MIN_MAX_INT: + ir_emit_min_max_int(ctx, i, insn); + break; + case IR_OVERFLOW: + ir_emit_overflow(ctx, i, insn); + break; + case IR_OVERFLOW_AND_BRANCH: + ir_emit_overflow_and_branch(ctx, b, i, insn); + break; + case IR_END: + case IR_LOOP_END: + if (bb->flags & IR_BB_OSR_ENTRY_LOADS) { + ir_emit_osr_entry_loads(ctx, b, bb); + } + if (bb->flags & IR_BB_DESSA_MOVES) { + ir_emit_dessa_moves(ctx, b, bb); + } + do { + ir_ref succ = ctx->cfg_edges[bb->successors]; + + if (UNEXPECTED(bb->successors_count == 2)) { + if (ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) { + succ = ctx->cfg_edges[bb->successors + 1]; + } else { + IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); + } + } else { + IR_ASSERT(bb->successors_count == 1); + } + target = ir_skip_empty_target_blocks(ctx, succ); + if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) { + | b =>target + } + } while (0); + break; + case IR_RETURN_VOID: + ir_emit_return_void(ctx); + break; + case IR_RETURN_INT: + ir_emit_return_int(ctx, i, insn); + break; + case IR_RETURN_FP: + ir_emit_return_fp(ctx, i, insn); + break; + case IR_CALL: + ir_emit_call(ctx, i, insn); + break; + case IR_TAILCALL: + ir_emit_tailcall(ctx, i, insn); + break; + case IR_IJMP: + ir_emit_ijmp(ctx, i, insn); + break; + case IR_REG_BINOP_INT: + ir_emit_reg_binop_int(ctx, i, insn); + break; + case IR_VADDR: + ir_emit_vaddr(ctx, i, insn); + break; + case IR_VLOAD: + ir_emit_vload(ctx, i, insn); + break; + case IR_VSTORE: + ir_emit_vstore(ctx, i, insn); + break; + case IR_RLOAD: + ir_emit_rload(ctx, i, insn); + break; + case IR_RSTORE: + ir_emit_rstore(ctx, i, insn); + break; + case IR_LOAD_INT: + ir_emit_load_int(ctx, i, insn); + break; + case IR_LOAD_FP: + ir_emit_load_fp(ctx, i, insn); + break; + case IR_STORE_INT: + ir_emit_store_int(ctx, i, insn); + break; + case IR_STORE_FP: + ir_emit_store_fp(ctx, i, insn); + break; + case IR_ALLOCA: + ir_emit_alloca(ctx, i, insn); + break; + case IR_AFREE: + ir_emit_afree(ctx, i, insn); + break; + case IR_EXITCALL: + ir_emit_exitcall(ctx, i, insn); + break; + case IR_GUARD: + case IR_GUARD_NOT: + ir_emit_guard(ctx, i, insn); + break; + case IR_GUARD_OVERFLOW: + ir_emit_guard_overflow(ctx, i, insn); + break; + case IR_TLS: + ir_emit_tls(ctx, i, insn); + break; + default: + IR_ASSERT(0 && "NIY rule/insruction"); + break; + } + n = ir_insn_len(insn); + i += n; + insn += n; + rule += n; + } + } + + if (data.rodata_label) { + |.rodata + } + for (i = IR_UNUSED + 1, insn = ctx->ir_base - i; i < ctx->consts_count; i++, insn--) { + if (insn->const_flags & IR_CONST_EMIT) { + if (IR_IS_TYPE_FP(insn->type)) { + int label = ctx->cfg_blocks_count + i; + + if (!data.rodata_label) { + data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; + + |.rodata + |=>data.rodata_label: + } + if (insn->type == IR_DOUBLE) { + |.align 8 + |=>label: + |.long insn->val.u32, insn->val.u32_hi + } else { + IR_ASSERT(insn->type == IR_FLOAT); + |.align 4 + |=>label: + |.long insn->val.u32 + } + } else if (insn->op == IR_STR) { + int label = ctx->cfg_blocks_count + i; + const char *str = ir_get_str(ctx, insn->val.i32); + int i = 0; + + if (!data.rodata_label) { + data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; + + |.rodata + |=>data.rodata_label: + } + |.align 8 + |=>label: + while (1) { + char c; + uint32_t w = 0; + int j; + + for (j = 0; j < 4; j++) { + c = str[i]; + if (!c) { + break; + } else if (c == '\\') { + if (str[i+1] == '\\') { + i++; + c = '\\'; + } else if (str[i+1] == '\'') { + i++; + c = '\''; + } else if (str[i+1] == '"') { + i++; + c = '"'; + } else if (str[i+1] == 'a') { + i++; + c = '\a'; + } else if (str[i+1] == 'b') { + i++; + c = '\b'; + } else if (str[i+1] == 'e') { + i++; + c = '\e'; + } else if (str[i+1] == 'f') { + i++; + c = '\f'; + } else if (str[i+1] == 'n') { + i++; + c = '\n'; + } else if (str[i+1] == 'r') { + i++; + c = '\r'; + } else if (str[i+1] == 't') { + i++; + c = '\t'; + } else if (str[i+1] == 'v') { + i++; + c = '\v'; + } else if (str[i+1] == '?') { + i++; + c = 0x3f; + } + } + w |= c << (8 * j); + i++; + } + | .long w + if (!c) { + break; + } + } + + } else { + IR_ASSERT(0); + } + } + } + if (data.rodata_label) { + |.code + } + + ret = dasm_link(&data.dasm_state, size_ptr); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&data.dasm_state); + if (ctx->code_buffer == NULL) { + ir_mem_unmap(entry, size); + } + return NULL; + } + size = *size_ptr; + + if (ctx->code_buffer != NULL) { + if (IR_ALIGNED_SIZE(size, 16) > ctx->code_buffer_size) { + dasm_free(&data.dasm_state); + return NULL; + } + entry = ctx->code_buffer; + IR_ASSERT((uintptr_t)entry % 16 == 0); + } else { + entry = ir_mem_mmap(size); + ir_mem_unprotect(entry, size); + } + + ret = dasm_encode(&data.dasm_state, entry); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&data.dasm_state); + if (ctx->code_buffer == NULL) { + ir_mem_unmap(entry, size); + } + return NULL; + } + + if (data.jmp_table_label) { + uint32_t offset = dasm_getpclabel(&data.dasm_state, data.jmp_table_label); + ctx->jmp_table_offset = offset; + } else { + ctx->jmp_table_offset = 0; + } + if (data.rodata_label) { + uint32_t offset = dasm_getpclabel(&data.dasm_state, data.rodata_label); + ctx->rodata_offset = offset; + } else { + ctx->rodata_offset = 0; + } + + if (ctx->entries_count) { + /* For all entries */ + i = ctx->entries_count; + do { + ir_insn *insn = &ctx->ir_base[ctx->entries[--i]]; + uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3); + insn->op3 = offset; + } while (i != 0); + } + + dasm_free(&data.dasm_state); + + ir_mem_flush(entry, size); + + if (ctx->code_buffer == NULL) { + ir_mem_protect(entry, size); + } + + return entry; +} + +const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, void *code_buffer, size_t code_buffer_size, size_t *size_ptr) +{ + void *entry; + size_t size; + uint32_t i; + dasm_State **Dst, *dasm_state; + int ret; + + /* IR_ASSERT(aarch64_may_use_b(ctx, exit_addr)) */ + IR_ASSERT(code_buffer); + if ((char*)exit_addr >= (char*)code_buffer && (char*)exit_addr < (char*)code_buffer + code_buffer_size) { + IR_ASSERT(code_buffer_size < B_IMM); + } else if ((char*)exit_addr >= (char*)code_buffer + code_buffer_size) { + IR_ASSERT(((char*)exit_addr - (char*)code_buffer) < B_IMM); + } else if ((char*)exit_addr < (char*)code_buffer) { + IR_ASSERT(((((char*)(code_buffer)) + code_buffer_size) - (char*)exit_addr) < B_IMM); + } else { + IR_ASSERT(0); + } + + Dst = &dasm_state; + dasm_state = NULL; + dasm_init(&dasm_state, DASM_MAXSECTION); + dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); + dasm_setup(&dasm_state, dasm_actions); + + | bl >2 + |1: + for (i = 1; i < exit_points_per_group; i++) { + | bl >2 + } + |2: + | adr Rx(IR_REG_INT_TMP), <1 + | sub Rx(IR_REG_INT_TMP), lr, Rx(IR_REG_INT_TMP) + | lsr Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #2 + if (first_exit_point) { + | add Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #first_exit_point + } + | b &exit_addr + + ret = dasm_link(&dasm_state, &size); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&dasm_state); + return NULL; + } + + if (code_buffer != NULL) { + if (IR_ALIGNED_SIZE(size, 16) > code_buffer_size) { + dasm_free(&dasm_state); + return NULL; + } + entry = code_buffer; + IR_ASSERT((uintptr_t)entry % 16 == 0); + } else { + entry = ir_mem_mmap(size); + ir_mem_unprotect(entry, size); + } + + ret = dasm_encode(&dasm_state, entry); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&dasm_state); + if (code_buffer == NULL) { + ir_mem_unmap(entry, size); + } + return NULL; + } + + dasm_free(&dasm_state); + + ir_mem_flush(entry, size); + + if (code_buffer == NULL) { + ir_mem_protect(entry, size); + } + + *size_ptr = size; + return entry; +} diff --git a/ir_riscv64.h b/ir_riscv64.h new file mode 100644 index 0000000..229b97e --- /dev/null +++ b/ir_riscv64.h @@ -0,0 +1,171 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Aarch64 CPU specific definitions) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef IR_AARCH64_H +#define IR_AARCH64_H + +#define IR_GP_REGS(_) \ + _(X0, x0) \ + _(X1, x1) \ + _(X2, x2) \ + _(X3, x3) \ + _(X4, x4) \ + _(X5, x5) \ + _(X6, x6) \ + _(X7, x7) \ + _(X8, x8) \ + _(X9, x9) \ + _(X10, x10) \ + _(X11, x11) \ + _(X12, x12) \ + _(X13, x13) \ + _(X14, x14) \ + _(X15, x15) \ + _(X16, x16) \ + _(X17, x17) \ + _(X18, x18) \ + _(X19, x19) \ + _(X20, x20) \ + _(X21, x21) \ + _(X22, x22) \ + _(X23, x23) \ + _(X24, x24) \ + _(X25, x25) \ + _(X26, x26) \ + _(X27, x27) \ + _(X28, x28) \ + _(X29, x29) \ + _(X30, x30) \ + _(X31, x31) \ + _(PC, pc) \ + +# define IR_FP_REGS(_) \ + _(F0, f0) \ + _(F1, f1) \ + _(F2, f2) \ + _(F3, f3) \ + _(F4, f4) \ + _(F5, f5) \ + _(F6, f6) \ + _(F7, f7) \ + _(F8, f8) \ + _(F9, f9) \ + _(F10, f10) \ + _(F11, f11) \ + _(F12, f12) \ + _(F13, f13) \ + _(F14, f14) \ + _(F15, f15) \ + _(F16, f16) \ + _(F17, f17) \ + _(F18, f18) \ + _(F19, f19) \ + _(F20, f20) \ + _(F21, f21) \ + _(F22, f22) \ + _(F23, f23) \ + _(F24, f24) \ + _(F25, f25) \ + _(F26, f26) \ + _(F27, f27) \ + _(F28, f28) \ + _(F29, f29) \ + _(F30, f30) \ + _(F31, f31) \ + +#define IR_GP_REG_ENUM(code, name) \ + IR_REG_ ## code, + +#define IR_FP_REG_ENUM(code, name) \ + IR_REG_ ## code, + +enum _ir_reg { + _IR_REG_NONE = -1, + IR_GP_REGS(IR_GP_REG_ENUM) + IR_FP_REGS(IR_FP_REG_ENUM) + IR_REG_NUM, +}; + +#define IR_REG_GP_FIRST IR_REG_X0 +#define IR_REG_FP_FIRST IR_REG_F0 +#define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1) +#define IR_REG_FP_LAST (IR_REG_NUM - 1) +#define IR_REG_SCRATCH (IR_REG_NUM) /* special name for regset */ +#define IR_REG_ALL (IR_REG_NUM + 1) /* special name for regset */ + +#define IR_REGSET_64BIT 1 + +#define IR_REG_STACK_POINTER \ + IR_REG_X2 +#define IR_REG_FRAME_POINTER \ + IR_REG_X8 + +#define IR_REG_LR IR_REG_X1 +#define IR_REG_ZR IR_REG_X0 + +#define IR_REGSET_FIXED \ + ( IR_REGSET(IR_REG_ZR) \ + | IR_REGSET(IR_REG_LR) \ + | IR_REGSET(IR_REG_STACK_POINTER) \ + | IR_REGSET(IR_REG_FRAME_POINTER)) +#define IR_REGSET_GP \ + IR_REGSET_DIFFERENCE(IR_REGSET_INTERVAL(IR_REG_GP_FIRST, IR_REG_GP_LAST), IR_REGSET_FIXED) +#define IR_REGSET_FP \ + IR_REGSET_DIFFERENCE(IR_REGSET_INTERVAL(IR_REG_FP_FIRST, IR_REG_FP_LAST), IR_REGSET_FIXED) + +/* Calling Convention */ +#define IR_REG_INT_RET1 IR_REG_X10 +#define IR_REG_FP_RET1 IR_REG_F10 +#define IR_REG_INT_ARGS 8 +#define IR_REG_FP_ARGS 8 +#define IR_REG_INT_ARG1 IR_REG_X10 +#define IR_REG_INT_ARG2 IR_REG_X11 +#define IR_REG_INT_ARG3 IR_REG_X12 +#define IR_REG_INT_ARG4 IR_REG_X13 +#define IR_REG_INT_ARG5 IR_REG_X14 +#define IR_REG_INT_ARG6 IR_REG_X15 +#define IR_REG_INT_ARG7 IR_REG_X16 +#define IR_REG_INT_ARG8 IR_REG_X17 +#define IR_REG_FP_ARG1 IR_REG_F10 +#define IR_REG_FP_ARG2 IR_REG_F11 +#define IR_REG_FP_ARG3 IR_REG_F12 +#define IR_REG_FP_ARG4 IR_REG_F13 +#define IR_REG_FP_ARG5 IR_REG_F14 +#define IR_REG_FP_ARG6 IR_REG_F15 +#define IR_REG_FP_ARG7 IR_REG_F16 +#define IR_REG_FP_ARG8 IR_REG_F17 +#define IR_MAX_REG_ARGS 16 +#define IR_SHADOW_ARGS 0 + +# define IR_REGSET_SCRATCH \ + (IR_REGSET_INTERVAL(IR_REG_X0, IR_REG_X18) \ + | IR_REGSET_INTERVAL(IR_REG_V0, IR_REG_V7) \ + | IR_REGSET_INTERVAL(IR_REG_V16, IR_REG_V31)) + +# define IR_REGSET_PRESERVED \ + (IR_REGSET_INTERVAL(IR_REG_X19, IR_REG_X30) \ + | IR_REGSET_INTERVAL(IR_REG_V8, IR_REG_V15)) + +typedef struct _ir_tmp_reg { + union { + uint8_t num; + int8_t reg; + }; + uint8_t type; + uint8_t start; + uint8_t end; +} ir_tmp_reg; + +struct _ir_target_constraints { + int8_t def_reg; + uint8_t tmps_count; + uint8_t hints_count; + ir_tmp_reg tmp_regs[3]; + int8_t hints[IR_MAX_REG_ARGS + 3]; +}; + +#endif /* IR_AARCH64_H */