#define IR_X64 1 #include "ir.h" #include "ir_private.h" #include "ir_x86.h" #ifdef _WIN32 # define IR_SET_ALIGNED(alignment, decl) __declspec(align(alignment)) decl #elif defined(HAVE_ATTRIBUTE_ALIGNED) # define IR_SET_ALIGNED(alignment, decl) decl __attribute__ ((__aligned__ (alignment))) #else # define IR_SET_ALIGNED(alignment, decl) decl #endif #include "dynasm/dasm_proto.h" #include "dynasm/dasm_x86.h" |.if X64 |.arch x64 |.else |.arch x86 |.endif |.actionlist dasm_actions |.globals ir_lb |.section code, cold_code, rodata, jmp_table |.macro ASM_REG_OP, op, type, reg || switch (ir_type_size[type]) { || case 1: | op Rb(reg) || break; || case 2: | op Rw(reg) || break; || case 4: | op Rd(reg) || break; || case 8: | op Rq(reg) || break; || default: || IR_ASSERT(0); || } |.endmacro |.macro ASM_MEM_OP, op, type, mem || switch (ir_type_size[type]) { || case 1: | op byte mem || break; || case 2: | op word mem || break; || case 4: | op dword mem || break; || case 8: | op qword mem || break; || default: || IR_ASSERT(0); || } |.endmacro |.macro ASM_REF_OP, op, type, ref || do { || ir_reg _reg = ir_ref_reg(ctx, ref); || if (_reg >= 0) { | ASM_REG_OP op, type, _reg || } else { || int32_t offset = ir_ref_spill_slot(ctx, ref); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_MEM_OP op, type, [rbp+offset] || } else { | ASM_MEM_OP op, type, [rsp+offset] || } || } || } while (0); |.endmacro |.macro ASM_REG_REG_OP, op, type, dst, src || switch (ir_type_size[type]) { || case 1: | op Rb(dst), Rb(src) || break; || case 2: | op Rw(dst), Rw(src) || break; || case 4: | op Rd(dst), Rd(src) || break; || case 8: | op Rq(dst), Rq(src) || break; || default: || IR_ASSERT(0); || } |.endmacro |.macro ASM_REG_IMM_OP, op, type, dst, src || switch (ir_type_size[type]) { || case 1: | op Rb(dst), src || break; || case 2: | op Rw(dst), src || break; || case 4: | op Rd(dst), src || break; || case 8: | op Rq(dst), src || break; || default: || IR_ASSERT(0); || } |.endmacro |.macro ASM_MEM_REG_OP, op, type, dst, src || switch (ir_type_size[type]) { || case 1: | op byte dst, Rb(src) || break; || case 2: | op word dst, Rw(src) || break; || case 4: | op dword dst, Rd(src) || break; || case 8: | op qword dst, Rq(src) || break; || default: || IR_ASSERT(0); || } |.endmacro |.macro ASM_MEM_IMM_OP, op, type, dst, src || switch (ir_type_size[type]) { || case 1: | op byte dst, src || break; || case 2: | op word dst, src || break; || case 4: | op dword dst, src || break; || case 8: | op qword dst, src || break; || default: || IR_ASSERT(0); || } |.endmacro |.macro ASM_REG_MEM_OP, op, type, dst, src || switch (ir_type_size[type]) { || case 1: | op Rb(dst), byte src || break; || case 2: | op Rw(dst), word src || break; || case 4: | op Rd(dst), dword src || break; || case 8: | op Rq(dst), qword src || break; || default: || IR_ASSERT(0); || } |.endmacro |.macro ASM_VREG_OP, op, type, vreg || do { || ir_reg _reg = ir_vreg_reg(ctx, vreg); || if (_reg >= 0) { | op Ra(_reg) || } else { || int32_t offset = ir_vreg_spill_slot(ctx, vreg); || if (ctx->flags & IR_USE_FRAME_POINTER) { | op aword [rbp+offset] || } else { | op aword [rsp+offset] || } || } || } while (0); |.endmacro |.macro ASM_VREGX_OP, op, type, vreg || if (IR_IS_CONST_REF(vreg)) { || ir_insn *_insn = &ctx->ir_base[vreg]; || int32_t val = _insn->val.i32; | op dword val // TODO: || } else { | ASM_VREG_OP op, type, vreg || } |.endmacro |.macro ASM_REG_REF_OP, _op, type, dst, src || if (IR_IS_CONST_REF(src)) { || ir_insn *_insn = &ctx->ir_base[src]; || if (_insn->op == IR_STR) { || int label = ctx->cfg_blocks_count - src; | //ASM_REG_MEM_OP _op, type, dst, [=>label] | lea Ra(dst), aword [=>label] // TODO: mov -> lea || _insn->emit_const = 1; || } else { | ASM_REG_IMM_OP _op, type, dst, _insn->val.u32 // TODO: || } || } else { || ir_reg _reg = ir_ref_reg(ctx, src); || if (_reg >= 0) { | ASM_REG_REG_OP _op, type, dst, _reg || } else { || int32_t offset = ir_ref_spill_slot(ctx, src); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_REG_MEM_OP _op, type, dst, [rbp+offset] || } else { | ASM_REG_MEM_OP _op, type, dst, [rsp+offset] || } || } || } |.endmacro |.macro ASM_VREG_IMM_OP, op, type, dst, src || do { || ir_insn *_insn = &ctx->ir_base[src]; || ir_reg _reg = ir_vreg_reg(ctx, dst); || if (_reg >= 0) { | ASM_REG_IMM_OP op, type, _reg, _insn->val.u32 || } else { || int32_t offset = ir_vreg_spill_slot(ctx, dst); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_MEM_IMM_OP op, type, [rbp+offset], _insn->val.u32 || } else { | ASM_MEM_IMM_OP op, type, [rsp+offset], _insn->val.u32 || } || } || } while (0); |.endmacro |.macro ASM_VREG_REG_OP, op, type, dst, src || do { || ir_reg _reg = ir_vreg_reg(ctx, dst); || if (_reg >= 0) { | ASM_REG_REG_OP op, type, _reg, src || } else { || int32_t offset = ir_vreg_spill_slot(ctx, dst); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_MEM_REG_OP op, type, [rbp+offset], src || } else { | ASM_MEM_REG_OP op, type, [rsp+offset], src || } || } || } while (0); |.endmacro |.macro ASM_REG_VREG_OP, op, type, dst, src || do { || ir_reg _reg = ir_vreg_reg(ctx, src); || if (_reg >= 0) { | ASM_REG_REG_OP op, type, dst, _reg || } else { || int32_t offset = ir_vreg_spill_slot(ctx, src); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_REG_MEM_OP op, type, dst, [rbp+offset] || } else { | ASM_REG_MEM_OP op, type, dst, [rsp+offset] || } || } || } while (0); |.endmacro |.macro ASM_REF_REG_OP, op, type, dst, src || do { || ir_reg _reg = ir_ref_reg(ctx, dst); || if (_reg >= 0) { | ASM_REG_REG_OP op, type, _reg, src || } else { || int32_t offset = ir_ref_spill_slot(ctx, dst); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_MEM_REG_OP op, type, [rbp+offset], src || } else { | ASM_MEM_REG_OP op, type, [rsp+offset], src || } || } || } while (0); |.endmacro |.macro ASM_REF_IMM_OP, op, type, dst, src || do { || ir_reg _reg = ir_ref_reg(ctx, dst); || if (_reg >= 0) { | ASM_REG_IMM_OP op, type, _reg, src || } else { || int32_t offset = ir_ref_spill_slot(ctx, dst); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_MEM_IMM_OP op, type, [rbp+offset], src || } else { | ASM_MEM_IMM_OP op, type, [rsp+offset], src || } || } || } while (0); |.endmacro |.macro ASM_REG_REG_IMUL, type, dst, src || switch (ir_type_size[type]) { || case 2: | imul Rw(dst), Rw(src) || break; || case 4: | imul Rd(dst), Rd(src) || break; || case 8: | imul Rq(dst), Rq(src) || break; || default: || IR_ASSERT(0); || } |.endmacro |.macro ASM_REG_IMM_IMUL, type, dst, src || switch (ir_type_size[type]) { || case 2: | imul Rw(dst), src || break; || case 4: | imul Rd(dst), src || break; || case 8: | imul Rq(dst), src || break; || default: || IR_ASSERT(0); || } |.endmacro |.macro ASM_REG_MEM_IMUL, type, dst, src || switch (ir_type_size[type]) { || case 2: | imul Rw(dst), word src || break; || case 4: | imul Rd(dst), dword src || break; || case 8: | imul Rq(dst), qword src || break; || default: || IR_ASSERT(0); || } |.endmacro |.macro ASM_REG_REF_IMUL, type, dst, src || if (IR_IS_CONST_REF(src)) { || ir_insn *_insn = &ctx->ir_base[src]; | ASM_REG_IMM_IMUL type, dst, _insn->val.u32 // TODO: || } else { || ir_reg _reg = ir_ref_reg(ctx, src); || if (_reg >= 0) { | ASM_REG_REG_IMUL type, dst, _reg || } else { || int32_t offset = ir_ref_spill_slot(ctx, src); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_REG_MEM_IMUL type, dst, [rbp+offset] || } else { | ASM_REG_MEM_IMUL type, dst, [rsp+offset] || } || } || } |.endmacro |.macro ASM_SSE2_REG_REG_OP, fop, dop, type, dst, src || if (type == IR_FLOAT) { | fop xmm(dst-IR_REG_XMM0), xmm(src-IR_REG_XMM0) || } else if (type == IR_DOUBLE) { | dop xmm(dst-IR_REG_XMM0), xmm(src-IR_REG_XMM0) || } else { || IR_ASSERT(0); || } |.endmacro |.macro ASM_SSE2_MEM_REG_OP, fop, dop, type, dst, src || if (type == IR_FLOAT) { | fop dword dst, xmm(src-IR_REG_XMM0) || } else if (type == IR_DOUBLE) { | dop qword dst, xmm(src-IR_REG_XMM0) || } else { || IR_ASSERT(0); || } |.endmacro |.macro ASM_SSE2_REG_MEM_OP, fop, dop, type, dst, src || if (type == IR_FLOAT) { | fop xmm(dst-IR_REG_XMM0), dword src || } else if (type == IR_DOUBLE) { | dop xmm(dst-IR_REG_XMM0), qword src || } else { || IR_ASSERT(0); || } |.endmacro |.macro ASM_SSE2_REG_REF_OP, fop, dop, type, dst, src || if (IR_IS_CONST_REF(src)) { || ir_insn *_insn = &ctx->ir_base[src]; || int label = ctx->cfg_blocks_count - src; | ASM_SSE2_REG_MEM_OP fop, dop, type, dst, [=>label] || _insn->emit_const = 1; || } else { || ir_reg _reg = ir_ref_reg(ctx, src); || if (_reg >= 0) { | ASM_SSE2_REG_REG_OP fop, dop, type, dst, _reg || } else { || int32_t offset = ir_ref_spill_slot(ctx, src); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_SSE2_REG_MEM_OP fop, dop, type, dst, [rbp+offset] || } else { | ASM_SSE2_REG_MEM_OP fop, dop, type, dst, [rsp+offset] || } || } || } |.endmacro |.macro ASM_SSE2_REF_REG_OP, fop, dop, type, dst, src || do { || ir_reg _reg = ir_ref_reg(ctx, dst); || if (_reg >= 0) { | ASM_SSE2_REG_REG_OP fop, dop, type, _reg, src || } else if (ival->stack_spill_pos) { || int32_t offset = ir_ref_spill_slot(ctx, dst); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_SSE2_MEM_REG_OP fop, dop, type, [rbp+offset], src || } else { | ASM_SSE2_MEM_REG_OP fop, dop, type, [rsp+offset], src || } || } || } while (0); |.endmacro |.macro ASM_AVX_REG_REG_REG_OP, fop, dop, type, dst, op1, op2 || if (type == IR_FLOAT) { | fop xmm(dst-IR_REG_XMM0), xmm(op1-IR_REG_XMM0), xmm(op2-IR_REG_XMM0) || } else if (type == IR_DOUBLE) { | dop xmm(dst-IR_REG_XMM0), xmm(op1-IR_REG_XMM0), xmm(op2-IR_REG_XMM0) || } else { || IR_ASSERT(0); || } |.endmacro |.macro ASM_AVX_REG_REG_MEM_OP, fop, dop, type, dst, op1, op2 || if (type == IR_FLOAT) { | fop xmm(dst-IR_REG_XMM0), xmm(op1-IR_REG_XMM0), dword op2 || } else if (type == IR_DOUBLE) { | dop xmm(dst-IR_REG_XMM0), xmm(op1-IR_REG_XMM0), qword op2 || } else { || IR_ASSERT(0); || } |.endmacro |.macro ASM_AVX_REG_REG_REF_OP, fop, dop, type, dst, op1, op2 || if (IR_IS_CONST_REF(op2)) { || ir_insn *_insn = &ctx->ir_base[op2]; || int label = ctx->cfg_blocks_count - op2; | ASM_AVX_REG_REG_MEM_OP fop, dop, type, dst, op1, [=>label] || _insn->emit_const = 1; || } else { || ir_reg _reg = ir_ref_reg(ctx, op2); || if (_reg >= 0) { | ASM_AVX_REG_REG_REG_OP fop, dop, type, dst, op1, _reg || } else { || int32_t offset = ir_ref_spill_slot(ctx, op2); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_AVX_REG_REG_MEM_OP fop, dop, type, dst, op1, [rbp+offset] || } else { | ASM_AVX_REG_REG_MEM_OP fop, dop, type, dst, op1, [rsp+offset] || } || } || } |.endmacro |.macro ASM_FP_REG_REG_OP, fop, dop, avx_fop, avx_dop, type, dst, src || if (type == IR_FLOAT) { || if (ctx->flags & IR_AVX) { | avx_fop xmm(dst-IR_REG_XMM0), xmm(src-IR_REG_XMM0) || } else { | fop xmm(dst-IR_REG_XMM0), xmm(src-IR_REG_XMM0) || } || } else if (type == IR_DOUBLE) { || if (ctx->flags & IR_AVX) { | avx_dop xmm(dst-IR_REG_XMM0), xmm(src-IR_REG_XMM0) || } else { | dop xmm(dst-IR_REG_XMM0), xmm(src-IR_REG_XMM0) || } || } else { || IR_ASSERT(0); || } |.endmacro |.macro ASM_FP_MEM_REG_OP, fop, dop, avx_fop, avx_dop, type, dst, src || if (type == IR_FLOAT) { || if (ctx->flags & IR_AVX) { | avx_fop dword dst, xmm(src-IR_REG_XMM0) || } else { | fop dword dst, xmm(src-IR_REG_XMM0) || } || } else if (type == IR_DOUBLE) { || if (ctx->flags & IR_AVX) { | avx_dop qword dst, xmm(src-IR_REG_XMM0) || } else { | dop qword dst, xmm(src-IR_REG_XMM0) || } || } else { || IR_ASSERT(0); || } |.endmacro |.macro ASM_FP_REG_MEM_OP, fop, dop, avx_fop, avx_dop, type, dst, src || if (type == IR_FLOAT) { || if (ctx->flags & IR_AVX) { | avx_fop xmm(dst-IR_REG_XMM0), dword src || } else { | fop xmm(dst-IR_REG_XMM0), dword src || } || } else if (type == IR_DOUBLE) { || if (ctx->flags & IR_AVX) { | avx_dop xmm(dst-IR_REG_XMM0), qword src || } else { | dop xmm(dst-IR_REG_XMM0), qword src || } || } else { || IR_ASSERT(0); || } |.endmacro |.macro ASM_FP_REG_REF_OP, fop, dop, avx_fop, avx_dop, type, dst, src || if (IR_IS_CONST_REF(src)) { || ir_insn *_insn = &ctx->ir_base[src]; || int label = ctx->cfg_blocks_count - src; | ASM_FP_REG_MEM_OP fop, dop, avx_fop, avx_dop, type, dst, [=>label] || _insn->emit_const = 1; || } else { || ir_reg _reg = ir_ref_reg(ctx, src); || if (_reg >= 0) { | ASM_FP_REG_REG_OP fop, dop, avx_fop, avx_dop, type, dst, _reg || } else { || int32_t offset = ir_ref_spill_slot(ctx, src); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_FP_REG_MEM_OP fop, dop, avx_fop, avx_dop, type, dst, [rbp+offset] || } else { | ASM_FP_REG_MEM_OP fop, dop, avx_fop, avx_dop, type, dst, [rsp+offset] || } || } || } |.endmacro |.macro ASM_FP_REF_REG_OP, fop, dop, avx_fop, avx_dop, type, dst, src || do { || ir_reg _reg = ir_cef_reg(ctx, dst); || if (_reg >= 0) { | ASM_FP_REG_REG_OP fop, dop, avx_fop, avx_dop, type, _reg, src || } else { || int32_t offset = ir_ref_spill_slot(ctx, dst); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_FP_MEM_REG_OP fop, dop, avx_fop, avx_dop, type, [rbp+offset], src || } else { | ASM_FP_MEM_REG_OP fop, dop, avx_fop, avx_dop, type, [rsp+offset], src || } || } || } while (0); |.endmacro |.macro ASM_FP_MOV_REG_REF_OP, type, dst, src || if (IR_IS_CONST_REF(src)) { || ir_insn *_insn = &ctx->ir_base[src]; || int label = ctx->cfg_blocks_count - src; | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, dst, [=>label] || _insn->emit_const = 1; || } else { || ir_reg _reg = ir_ref_reg(ctx, src); || if (_reg >= 0) { | ASM_FP_REG_REG_OP movaps, movapd, vmovaps, vmovapd, type, dst, _reg || } else { || int32_t offset = ir_ref_spill_slot(ctx, src); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, dst, [rbp+offset] || } else { | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, dst, [rsp+offset] || } || } || } |.endmacro |.macro ASM_FP_MOV_REF_REG_OP, type, dst, src || do { || ir_reg _reg = ir_ref_reg(ctx, dst); || if (_reg >= 0) { | ASM_FP_REG_REG_OP movaps, movapd, vmovaps, vmovapd, type, _reg, src || } else { || int32_t offset = ir_ref_spill_slot(ctx, dst); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_FP_MEM_REG_OP movss, movsd, vmovss, vmovsd, type, [rbp+offset], src || } else { | ASM_FP_MEM_REG_OP movss, movsd, vmovss, vmovsd, type, [rsp+offset], src || } || } || } while (0); |.endmacro |.macro ASM_FP_MOV_REG_VREG_OP, type, dst, src || if (IR_IS_CONST_REF(src)) { || ir_insn *_insn = &ctx->ir_base[src]; || int label = ctx->cfg_blocks_count - src; | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, dst, [=>label] || _insn->emit_const = 1; || } else { || ir_reg _reg = ir_vreg_reg(ctx, src); || if (_reg >= 0) { | ASM_FP_REG_REG_OP movaps, movapd, vmovaps, vmovapd, type, dst, _reg || } else { || int32_t offset = ir_vreg_spill_slot(ctx, src); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, dst, [rbp+offset] || } else { | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, dst, [rsp+offset] || } || } || } |.endmacro |.macro ASM_FP_MOV_VREG_REG_OP, type, dst, src || do { || ir_reg _reg = ir_vreg_reg(ctx, dst); || if (_reg >= 0) { | ASM_FP_REG_REG_OP movaps, movapd, vmovaps, vmovapd, type, _reg, src || } else { || int32_t offset = ir_vreg_spill_slot(ctx, dst); || if (ctx->flags & IR_USE_FRAME_POINTER) { | ASM_FP_MEM_REG_OP movss, movsd, vmovss, vmovsd, type, [rbp+offset], src || } else { | ASM_FP_MEM_REG_OP movss, movsd, vmovss, vmovsd, type, [rsp+offset], src || } || } || } while (0); |.endmacro typedef struct _ir_backend_data { uint32_t stack_frame_size; ir_regset used_preserved_regs; dasm_State *dasm_state; int rodata_label, jmp_table_label; } ir_backend_data; #define IR_GP_REG_NAME(code, name64, name32, name16, name8, name8h) \ #name64, #define IR_GP_REG_NAME32(code, name64, name32, name16, name8, name8h) \ #name32, #define IR_GP_REG_NAME16(code, name64, name32, name16, name8, name8h) \ #name16, #define IR_GP_REG_NAME8(code, name64, name32, name16, name8, name8h) \ #name8, #define IR_FP_REG_NAME(code, name) \ #name, static const char *_ir_reg_name[IR_REG_NUM] = { IR_GP_REGS(IR_GP_REG_NAME) IR_FP_REGS(IR_FP_REG_NAME) }; static const char *_ir_reg_name32[IR_REG_NUM] = { IR_GP_REGS(IR_GP_REG_NAME32) }; static const char *_ir_reg_name16[IR_REG_NUM] = { IR_GP_REGS(IR_GP_REG_NAME16) }; static const char *_ir_reg_name8[IR_REG_NUM] = { IR_GP_REGS(IR_GP_REG_NAME8) }; /* Calling Convention */ #ifdef _WIN64 static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { IR_REG_INT_ARG1, IR_REG_INT_ARG2, IR_REG_INT_ARG3, IR_REG_INT_ARG4, }; static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { IR_REG_FP_ARG1, IR_REG_FP_ARG2, IR_REG_FP_ARG3, IR_REG_FP_ARG4, }; #elif defined(__x86_64__) static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { IR_REG_INT_ARG1, IR_REG_INT_ARG2, IR_REG_INT_ARG3, IR_REG_INT_ARG4, IR_REG_INT_ARG5, IR_REG_INT_ARG6, }; static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { IR_REG_FP_ARG1, IR_REG_FP_ARG2, IR_REG_FP_ARG3, IR_REG_FP_ARG4, IR_REG_FP_ARG5, IR_REG_FP_ARG6, IR_REG_FP_ARG7, IR_REG_FP_ARG8, }; #else static const int8_t *_ir_int_reg_params = NULL; static const int8_t *_ir_fp_reg_params = NULL; #endif const char *ir_reg_name(int8_t reg, ir_type type) { IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); if (IR_IS_TYPE_FP(type) || ir_type_size[type] == 8) { return _ir_reg_name[reg]; } else if (ir_type_size[type] == 4) { return _ir_reg_name32[reg]; } else if (ir_type_size[type] == 2) { return _ir_reg_name16[reg]; } else if (ir_type_size[type] == 1) { return _ir_reg_name8[reg]; } IR_ASSERT(0); return NULL; } typedef enum _ir_rule { IR_SKIP = IR_LAST_OP, IR_SKIP_REG, IR_CMP_INT, // res=reg({%rax,%rbx,%rcx,%rdx}), op1=reg(GP), op2=reg(GP)|mem|imm // res=reg({%rax,%rbx,%rcx,%rdx}), op1=mem, op2=reg(GP)|imm IR_CMP_FP, // res=reg({%rax,%rbx,%rcx,%rdx}), op1=reg(FP), op2=reg(FP)|mem IR_MUL_INT, // res=%rax op1=%rax, op2=reg(GP)|mem, change=%rdx IR_DIV_INT, // res=%rax op1=%rax, op2=reg(GP)|mem, change=%rdx IR_MOD_INT, // res=%rdx op1=%rax, op2=reg(GP)|mem, change=%rax IR_LEA_OB, // res=reg(GP), op1=reg(GP) IR_LEA_SI, // res=reg(GP), op1=reg(GP) IR_LEA_SIB, // res=reg(GP), op1=reg(GP) IR_LEA_IB, // res=reg(GP), op1=reg(GP), op2=reg(GP) IR_LEA_SI_O, IR_LEA_SIB_O, IR_LEA_IB_O, IR_LEA_I_OB, IR_LEA_OB_I, IR_LEA_OB_SI, IR_LEA_SI_OB, IR_LEA_B_SI, IR_LEA_SI_B, IR_INC, IR_DEC, IR_MUL_2, IR_MUL_PWR2, IR_DIV_PWR2, IR_MOD_PWR2, IR_BOOL_NOT_INT, IR_OP_INT, IR_BINOP_INT, // res=reg(GP), op1.reg(GP, hint=res), op2=any IR_BINOP_SSE2, // res=reg(FP), op1.reg(FP, hint=res), op2=reg(FP)|mem IR_BINOP_AVX, // res=reg(FP), op1.reg(FP), op2=reg(FP)|mem IR_SHIFT, // res=reg(GP), op1.reg(GP, hint=res), op2=%rcx IR_SHIFT_CONST, // res=reg(GP), op1.reg(GP, hint=res), imm IR_COPY_INT, // res=reg(GP), op1=reg(GP)|mem|imm // res=mem, op1=reg(GP)|imm IR_COPY_FP, // res=reg(FP), op1=reg(FP)|mem // res=mem, op2=reg(GP) IR_CMP_AND_BRANCH_INT, // op1=reg(GP), op2=reg(GP)|mem|imm // op1=mem, op2=reg(GP)|imm IR_CMP_AND_BRANCH_FP, // op1=reg(FP), op2=reg(FP)|mem IR_IF_INT, // op1=reg(GP)|mem IR_RETURN_VOID, // IR_RETURN_INT, // op1=reg(GP, hint=%rax)|mem|imm IR_RETURN_FP, // op1=reg(FP, hint=%xmm0)|mem } ir_rule; /* instruction selection */ bool ir_needs_def_reg(ir_ctx *ctx, ir_ref ref) { IR_ASSERT(ctx->rules); return ctx->rules[ref] != IR_SKIP; } ir_regset ir_get_fixed_regset(ir_ctx *ctx, ir_ref ref) { ir_ref rule; rule = ctx->rules[ref]; // if (rule == IR_SHIFT) { // return IR_REGSET(IR_REG_RCX); // if (rule == IR_MUL_INT || rule == IR_DIV_INT || rule == IR_MOD_INT) { // return IR_REGSET(IR_REG_RAX) | IR_REGSET(IR_REG_RDX); // } if (rule == IR_MOD_INT && IR_IS_CONST_REF(ctx->ir_base[ref].op1)) { return IR_REGSET(IR_REG_RAX); } return IR_REGSET_EMPTY; } static ir_reg ir_get_param_reg(ir_ctx *ctx, ir_ref ref) { ir_use_list *use_list = &ctx->use_lists[1]; int i; ir_ref use, *p; ir_insn *insn; int int_param = 0; int fp_param = 0; int int_reg_params_count = IR_REG_INT_ARGS; int fp_reg_params_count = IR_REG_FP_ARGS; const int8_t *int_reg_params = _ir_int_reg_params; const int8_t *fp_reg_params = _ir_fp_reg_params; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) { use = *p; insn = &ctx->ir_base[use]; if (insn->op == IR_PARAM) { if (IR_IS_TYPE_INT(insn->type)) { if (use == ref) { if (int_param < int_reg_params_count) { return int_reg_params[int_param]; } else { return IR_REG_NONE; } } int_param++; } else if (IR_IS_TYPE_FP(insn->type)) { if (use == ref) { if (fp_param < fp_reg_params_count) { return fp_reg_params[fp_param]; } else { return IR_REG_NONE; } } fp_param++; } else { IR_ASSERT(0); } } } return IR_REG_NONE; } ir_reg ir_uses_fixed_reg(ir_ctx *ctx, ir_ref ref, int op_num) { ir_ref rule; rule = ctx->rules[ref]; if (rule == IR_SHIFT) { if (op_num == 2) { return IR_REG_RCX; } } else if (rule == IR_MUL_INT || rule == IR_DIV_INT) { if (op_num == 0 || op_num == 1) { return IR_REG_RAX; } } else if (rule == IR_MOD_INT) { if (op_num == 0) { return IR_REG_RDX; } else if (op_num == 1) { return IR_REG_RAX; } } else if (rule == IR_RETURN_INT) { if (op_num == 2) { return IR_REG_RAX; } } else if (rule == IR_RETURN_FP) { if (op_num == 2) { return IR_REG_XMM0; } } else if (rule == IR_SKIP_REG) { if (ctx->ir_base[ref].op == IR_PARAM && op_num == 0) { return ir_get_param_reg(ctx, ref); } } return IR_REG_NONE; } bool ir_result_reuses_op1(ir_ctx *ctx, ir_ref ref) { ir_ref rule; rule = ctx->rules[ref]; switch (rule) { case IR_BINOP_INT: case IR_BINOP_SSE2: case IR_SHIFT: case IR_SHIFT_CONST: case IR_MUL_INT: case IR_DIV_INT: case IR_MOD_INT: return 1; } return 0; } static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref, ir_block *bb) { ir_insn *op1_insn, *op2_insn; ir_insn *insn = &ctx->ir_base[ref]; switch (insn->op) { case IR_EQ: case IR_NE: case IR_LT: case IR_GE: case IR_LE: case IR_GT: case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { return IR_CMP_INT; } else { return IR_CMP_FP; } break; case IR_ADD: case IR_SUB: if (IR_IS_TYPE_INT(insn->type)) { if (IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.i64 == 0) { return IR_COPY_INT; } else if (ir_type_size[insn->type] == sizeof(void*)) { if (insn->op1 > bb->start && insn->op1 < ref && ctx->use_lists[insn->op1].count == 1) { if (!ctx->rules[insn->op1]) { ctx->rules[insn->op1] = ir_match_insn(ctx, insn->op1, bb); } } if (ctx->rules[insn->op1] == IR_LEA_SI) { ctx->rules[insn->op1] = IR_SKIP; return IR_LEA_SI_O; // lea ret, [op1.op1.reg*op1.op2.scale+op2.offset] } else if (ctx->rules[insn->op1] == IR_LEA_SIB) { ctx->rules[insn->op1] = IR_SKIP; return IR_LEA_SIB_O; // lea ret, [op1.op1.reg+op1.op1.reg*op1.op2.scale+op2.offset] } else if (ctx->rules[insn->op1] == IR_LEA_IB) { ctx->rules[insn->op1] = IR_SKIP; return IR_LEA_IB_O; // lea ret, [op1.op1.reg+op1.op2.reg+op2.offset] } return IR_LEA_OB; // lea ret, [op1.reg+op2.offset] } else if (op2_insn->val.i64 == 1 || op2_insn->val.i64 == -1) { if (insn->op == IR_ADD) { if (op2_insn->val.i64 == 1) { return IR_INC; // inc op1 } else { return IR_DEC; // dec op1 } } else { if (op2_insn->val.i64 == 1) { return IR_DEC; // dec op1 } else { return IR_INC; // inc op1 } } } } else if (insn->op == IR_ADD && ir_type_size[insn->type] == sizeof(void*)) { if (insn->op1 > bb->start && insn->op1 < ref && ctx->use_lists[insn->op1].count == 1) { if (!ctx->rules[insn->op1]) { ctx->rules[insn->op1] = ir_match_insn(ctx, insn->op1, bb); } } if (insn->op2 > bb->start && insn->op2 < ref && ctx->use_lists[insn->op2].count == 1) { if (!ctx->rules[insn->op2]) { ctx->rules[insn->op2] = ir_match_insn(ctx, insn->op2, bb); } } if (ctx->rules[insn->op1] == IR_LEA_OB) { ctx->rules[insn->op1] = IR_SKIP; if (ctx->rules[insn->op2] == IR_LEA_SI) { ctx->rules[insn->op2] = IR_SKIP; return IR_LEA_OB_SI; // lea ret, [op1.op1.reg+op1.op2.offset+op2.op1.reg*op2.op2.scale] } return IR_LEA_OB_I; // lea ret, [op1.op1.reg+op1.op2.offset+op2.reg] } if (ctx->rules[insn->op2] == IR_LEA_OB) { ctx->rules[insn->op2] = IR_SKIP; if (ctx->rules[insn->op1] == IR_LEA_SI) { ctx->rules[insn->op1] = IR_SKIP; return IR_LEA_SI_OB; // lea ret, [op1.op1.reg*op1.op2.scale+op2.op1.reg+op2.op2.offset] } return IR_LEA_I_OB; // lea ret, [op1.reg+op2.op1.reg+op2.op2.offset] } return IR_LEA_IB; // lea ret, [op1.reg+op2.reg] } return IR_BINOP_INT; } else if (ctx->flags & IR_AVX) { return IR_BINOP_AVX; } else { return IR_BINOP_SSE2; } break; case IR_MUL: if (IR_IS_TYPE_INT(insn->type)) { if (IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.u64 == 0) { // 0 } else if (op2_insn->val.u64 == 1) { return IR_COPY_INT; } else if (ir_type_size[insn->type] == sizeof(void*)) { if (op2_insn->val.u64 == 2 || op2_insn->val.u64 == 4 || op2_insn->val.u64 == 8) { return IR_LEA_SI; // lea ret, [op1.reg*op2.scale] } else if (op2_insn->val.u64 == 3 || op2_insn->val.u64 == 5 || op2_insn->val.u64 == 9) { return IR_LEA_SIB; // lea ret, [op1.reg+op1.reg*op2.scale] } } else if (op2_insn->val.u64 == 2) { return IR_MUL_2; // add op1, op1 } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { return IR_MUL_PWR2; // shl op1, IR_LOG2(op2_insn->val.u64) } } return (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) ? IR_BINOP_INT : IR_MUL_INT; } else if (ctx->flags & IR_AVX) { return IR_BINOP_AVX; } else { return IR_BINOP_SSE2; } break; case IR_DIV: if (IR_IS_TYPE_INT(insn->type)) { if (IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.u64 == 1) { return IR_COPY_INT; } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { return IR_DIV_PWR2; // shr op1, IR_LOG2(op2_insn->val.u64) } } return IR_DIV_INT; } else if (ctx->flags & IR_AVX) { return IR_BINOP_AVX; } else { return IR_BINOP_SSE2; } break; case IR_MOD: if (IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_CONST_REF(insn->op1)) { // const } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { return IR_MOD_PWR2; // and op1, IR_LOG2(op2_insn->val.u64)-1 } } return IR_MOD_INT; // case IR_POW: // case IR_CAST: // case IR_ADD_OV: // case IR_SUB_OV: // case IR_MUL_OV: // case IR_OVERFLOW: case IR_BSWAP: case IR_NOT: if (insn->type == IR_BOOL) { if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { return IR_BOOL_NOT_INT; } else { IR_ASSERT(0); // TODO: IR_BOOL_NOT_FP } } else if (IR_IS_TYPE_INT(insn->type)) { return IR_OP_INT; } else { IR_ASSERT(0); } break; case IR_NEG: if (IR_IS_TYPE_INT(insn->type)) { return IR_OP_INT; } else { IR_ASSERT(0); return IR_SKIP; // xorpd .LC0(%rip), %xmm0; 0 0x80000000 0 0 // vxorpd .LC0(%rip), %xmm0, %xmm0 // xorps .LC0(%rip), %xmm0; 0x80000000 0 0 0 // vxorps .LC0(%rip), %xmm0, %xmm0 } case IR_ABS: if (IR_IS_TYPE_INT(insn->type)) { IR_ASSERT(0); return IR_SKIP; // movl %edi, %eax; negl %eax; cmovs %edi, %eax } else { IR_ASSERT(0); return IR_SKIP; // andpd .LC0(%rip), %xmm0; 0xffffffff 0x7fffffff 0 0 // vandpd .LC0(%rip), %xmm0, %xmm0 // andps .LC0(%rip), %xmm0; 0x7fffffff 0 0 0 // vandps .LC0(%rip), %xmm0, %xmm0 } case IR_OR: if (IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.i64 == 0) { return IR_COPY_INT; } else if (op2_insn->val.i64 == -1) { // TODO: type len // -1 } } else if (IR_IS_CONST_REF(insn->op1)) { op1_insn = &ctx->ir_base[insn->op1]; if (op1_insn->val.i64 == 0) { // op2 } else if (op1_insn->val.i64 == -1) { // TODO: type len // -1 } } return IR_BINOP_INT; case IR_AND: if (IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.i64 == 0) { // 0 } else if (op2_insn->val.i64 == -1) { // TODO: type len return IR_COPY_INT; } } else if (IR_IS_CONST_REF(insn->op1)) { op1_insn = &ctx->ir_base[insn->op1]; if (op1_insn->val.i64 == 0) { // 0 } else if (op1_insn->val.i64 == -1) { // TODO: type len // op2 } } return IR_BINOP_INT; case IR_XOR: if (IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_CONST_REF(insn->op1)) { // const } } return IR_BINOP_INT; case IR_SHL: if (IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.u64 == 0) { return IR_COPY_INT; } else if (ir_type_size[insn->type] == sizeof(void*)) { if (op2_insn->val.u64 == 1) { // lea [op1*2] } else if (op2_insn->val.u64 == 2) { // lea [op1*4] } else if (op2_insn->val.u64 == 3) { // lea [op1*8] } } return IR_SHIFT_CONST; } return IR_SHIFT; case IR_SHR: case IR_SAR: case IR_ROL: case IR_ROR: if (IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.u64 == 0) { return IR_COPY_INT; } return IR_SHIFT_CONST; } return IR_SHIFT; // case IR_MIN: // case IR_MAX: // case IR_COND: case IR_COPY: if (IR_IS_TYPE_INT(insn->type)) { return IR_COPY_INT; } else { return IR_COPY_FP; } break; case IR_PHI: case IR_PI: case IR_PARAM: return IR_SKIP_REG; case IR_CALL: ctx->flags |= IR_HAS_CALLS; return IR_CALL; // case IR_TAILCALL: // case IR_ALLOCA: // case IR_VLOAD: // case IR_VSTORE: // case IR_LOAD: // case IR_STORE: case IR_VAR: case IR_START: case IR_BEGIN: // case IR_END: case IR_IF_TRUE: case IR_IF_FALSE: case IR_CASE_VAL: case IR_CASE_DEFAULT: case IR_MERGE: case IR_LOOP_BEGIN: // case IR_LOOP_END: case IR_LOOP_EXIT: return IR_SKIP; case IR_RETURN: if (!insn->op2) { return IR_RETURN_VOID; } else if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { return IR_RETURN_INT; } else { return IR_RETURN_FP; } case IR_IF: if (insn->op2 > bb->start && insn->op2 < ref && ctx->use_lists[insn->op2].count == 1) { op2_insn = &ctx->ir_base[insn->op2]; if (op2_insn && op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { ctx->rules[insn->op2] = IR_SKIP; if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { return IR_CMP_AND_BRANCH_INT; } else { return IR_CMP_AND_BRANCH_FP; } } } if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { return IR_IF_INT; } else { IR_ASSERT(0 && "NIY IR_IF_FP"); } default: break; } return insn->op; } int ir_match(ir_ctx *ctx) { int b, n; ir_ref i; ir_block *bb; ir_insn *insn; if (!ctx->prev_insn_len) { ctx->prev_insn_len = ir_mem_malloc(ctx->insns_count * sizeof(uint32_t)); for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { n = b; /* The first insn of BB keeps BB number in prev_insn_len[] */ for (i = bb->start, insn = ctx->ir_base + i; i <= bb->end;) { ctx->prev_insn_len[i] = n; /* The first insn of BB keeps BB number in prev_insn_len[] */ n = ir_operands_count(ctx, insn); n = 1 + (n >> 2); // support for multi-word instructions like MERGE and PHI i += n; insn += n; } } } ctx->rules = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t)); for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) { for (i = bb->end; i >= bb->start; i -= ctx->prev_insn_len[i]) { insn = &ctx->ir_base[i]; if (!ctx->rules[i]) { // if (ctx->rules[i] != IR_SKIP) { ctx->rules[i] = ir_match_insn(ctx, i, bb); } } } return 1; } /* code genertion */ static int ir_skip_empty_blocks(ir_ctx *ctx, int b) { while (ctx->cfg_blocks[b].flags & IR_BB_MAY_SKIP) { b++; } return b; } static ir_reg ir_vreg_reg(ir_ctx *ctx, ir_ref v) { return v < 0 ? IR_REG_NONE : ctx->live_intervals[v]->reg; } static ir_reg ir_vreg_spill_slot(ir_ctx *ctx, ir_ref v) { int32_t offset; IR_ASSERT(v >= 0); offset = ctx->live_intervals[v]->stack_spill_pos; IR_ASSERT(offset != 0); if (ctx->flags & IR_USE_FRAME_POINTER) { return -offset; } else { ir_backend_data *data = ctx->data; return data->stack_frame_size - offset; } } static ir_reg ir_ref_reg(ir_ctx *ctx, ir_ref ref) { return ref < 0 ? IR_REG_NONE : ctx->live_intervals[ctx->vregs[ref]]->reg; } static ir_reg ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref) { int32_t offset; IR_ASSERT(ref >= 0); offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; IR_ASSERT(offset != 0); if (ctx->flags & IR_USE_FRAME_POINTER) { return -offset; } else { ir_backend_data *data = ctx->data; return data->stack_frame_size - offset; } } static bool ir_last_use(ir_ctx *ctx, ir_ref ref) { // TODO: return 0; } static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_ref src, ir_reg reg) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; | ASM_REG_REF_OP mov, type, reg, src } static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref dst) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; | ASM_REF_REG_OP mov, type, dst, reg } static void ir_emit_fp_load(ir_ctx *ctx, ir_type type, ir_ref src, ir_reg reg) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (IR_IS_CONST_REF(src)) { ir_insn *insn = &ctx->ir_base[src]; if (insn->type == IR_FLOAT && insn->val.f == 0) { if (ctx->flags & IR_AVX) { | vxorps xmm(reg-IR_REG_XMM0), xmm(reg-IR_REG_XMM0), xmm(reg-IR_REG_XMM0) } else { | xorps xmm(reg-IR_REG_XMM0), xmm(reg-IR_REG_XMM0) } return; } else if (insn->type == IR_DOUBLE && insn->val.d == 0) { if (ctx->flags & IR_AVX) { | vxorpd xmm(reg-IR_REG_XMM0), xmm(reg-IR_REG_XMM0), xmm(reg-IR_REG_XMM0) } else { | xorpd xmm(reg-IR_REG_XMM0), xmm(reg-IR_REG_XMM0) } return; } } | ASM_FP_MOV_REG_REF_OP type, reg, src } static void ir_emit_fp_store(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref dst) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; | ASM_FP_MOV_REF_REG_OP type, dst, reg } static void ir_emit_prologue(ir_ctx *ctx) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (ctx->flags & IR_USE_FRAME_POINTER) { | push rbp | mov rbp, rsp } if (data->stack_frame_size) { | sub rsp, data->stack_frame_size } if (data->used_preserved_regs) { int offset; uint32_t i; if (ctx->flags & IR_USE_FRAME_POINTER) { offset = -(int)sizeof(void*); } else { offset = data->stack_frame_size; } for (i = 0; i < IR_REG_NUM; i++) { if (IR_REGSET_IN(data->used_preserved_regs, i)) { if (i < IR_REG_FP_FIRST) { ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_RBP : IR_REG_RSP; | mov aword [Ra(fp)+offset], Ra(i) offset -= sizeof(void*); } else { IR_ASSERT(0 && "NIY FP register saing"); } } } } } static void ir_emit_epilogue(ir_ctx *ctx) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (data->used_preserved_regs) { int offset; uint32_t i; if (ctx->flags & IR_USE_FRAME_POINTER) { offset = -(int)sizeof(void*); } else { offset = data->stack_frame_size; } for (i = 0; i < IR_REG_NUM; i++) { if (IR_REGSET_IN(data->used_preserved_regs, i)) { if (i < IR_REG_FP_FIRST) { ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_RBP : IR_REG_RSP; | mov Ra(i), aword [Ra(fp)+offset] offset -= sizeof(void*); } else { IR_ASSERT(0 && "NIY FP register saing"); } } } } if (ctx->flags & IR_USE_FRAME_POINTER) { | mov rsp, rbp | pop rbp } else if (data->stack_frame_size) { | add rsp, data->stack_frame_size } } void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_type type = insn->type; ir_reg def_reg = ir_ref_reg(ctx, def); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); ir_reg op2_reg = ir_ref_reg(ctx, insn->op2); ir_reg reg; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (def_reg >= 0) { reg = def_reg; } else if (op1_reg >= 0 && ir_last_use(ctx, insn->op1)) { reg = op1_reg; } else { // TODO: add $imm, mem // TODO: commutative insns reg = IR_REG_RAX; // TODO: temporary register } if (op1_reg != reg) { ir_emit_load(ctx, type, insn->op1, reg); } if (op2_reg < 0 && insn->op1 == insn->op2) { op2_reg = reg; } if (op2_reg >= 0) { switch (insn->op) { case IR_ADD: | ASM_REG_REG_OP add, type, reg, op2_reg break; case IR_SUB: | ASM_REG_REG_OP sub, type, reg, op2_reg break; case IR_MUL: | ASM_REG_REG_IMUL type, reg, op2_reg break; case IR_OR: | ASM_REG_REG_OP or, type, reg, op2_reg break; case IR_AND: | ASM_REG_REG_OP and, type, reg, op2_reg break; case IR_XOR: | ASM_REG_REG_OP xor, type, reg, op2_reg break; default: IR_ASSERT(0 && "NIY binary op"); break; } } else { switch (insn->op) { case IR_ADD: | ASM_REG_REF_OP add, type, reg, insn->op2 break; case IR_SUB: | ASM_REG_REF_OP sub, type, reg, insn->op2 break; case IR_MUL: | ASM_REG_REF_IMUL type, reg, insn->op2 break; case IR_OR: | ASM_REG_REF_OP or, type, reg, insn->op2 break; case IR_AND: | ASM_REG_REF_OP and, type, reg, insn->op2 break; case IR_XOR: | ASM_REG_REF_OP xor, type, reg, insn->op2 break; default: IR_ASSERT(0 && "NIY binary op"); break; } } if (def_reg != reg) { ir_emit_store(ctx, type, reg, def); } } void ir_emit_incdec(ir_ctx *ctx, uint32_t rule, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = ir_ref_reg(ctx, def); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); ir_reg reg; if (def_reg >= 0) { reg = def_reg; } else if (op1_reg >= 0 && ir_last_use(ctx, insn->op1)) { reg = op1_reg; } else { reg = IR_REG_RAX; // TODO: temporary register } if (op1_reg != reg) { ir_emit_load(ctx, insn->type, insn->op1, reg); } if (rule == IR_INC) { | ASM_REG_OP inc, insn->type, reg } else { | ASM_REG_OP dec, insn->type, reg } if (def_reg != reg) { ir_emit_store(ctx, insn->type, reg, def); } } void ir_emit_mul2(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = ir_ref_reg(ctx, def); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); ir_reg reg; if (def_reg >= 0) { reg = def_reg; } else if (op1_reg >= 0 && ir_last_use(ctx, insn->op1)) { reg = op1_reg; } else { reg = IR_REG_RAX; // TODO: temporary register } if (op1_reg != reg) { ir_emit_load(ctx, insn->type, insn->op1, reg); } | ASM_REG_REG_OP add, insn->type, reg, reg if (def_reg != reg) { ir_emit_store(ctx, insn->type, reg, def); } } void ir_emit_mul_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = ir_ref_reg(ctx, def); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); ir_reg reg; uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); if (def_reg >= 0) { reg = def_reg; } else if (op1_reg >= 0 && ir_last_use(ctx, insn->op1)) { reg = op1_reg; } else { reg = IR_REG_RAX; // TODO: temporary register } if (op1_reg != reg) { ir_emit_load(ctx, insn->type, insn->op1, reg); } | ASM_REG_IMM_OP shl, insn->type, reg, shift if (def_reg != reg) { ir_emit_store(ctx, insn->type, reg, def); } } void ir_emit_div_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = ir_ref_reg(ctx, def); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); ir_reg reg; if (def_reg >= 0) { reg = def_reg; } else if (op1_reg >= 0 && ir_last_use(ctx, insn->op1)) { reg = op1_reg; } else { reg = IR_REG_RAX; // TODO: temporary register } if (op1_reg != reg) { ir_emit_load(ctx, insn->type, insn->op1, reg); } | ASM_REG_IMM_OP shr, insn->type, reg, shift if (def_reg != reg) { ir_emit_store(ctx, insn->type, reg, def); } } void ir_emit_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = ir_ref_reg(ctx, def); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); uint32_t mask = IR_LOG2(ctx->ir_base[insn->op2].val.u64) - 1; ir_reg reg; if (def_reg >= 0) { reg = def_reg; } else if (op1_reg >= 0 && ir_last_use(ctx, insn->op1)) { reg = op1_reg; } else { reg = IR_REG_RAX; // TODO: temporary register } if (op1_reg != reg) { ir_emit_load(ctx, insn->type, insn->op1, reg); } | ASM_REG_IMM_OP and, insn->type, reg, mask if (def_reg != reg) { ir_emit_store(ctx, insn->type, reg, def); } } void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = ir_ref_reg(ctx, def); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); ir_reg op2_reg = ir_ref_reg(ctx, insn->op2); ir_reg reg; if (def_reg >= 0) { reg = def_reg; } else if (op1_reg >= 0 && ir_last_use(ctx, insn->op1)) { reg = op1_reg; } else { reg = IR_REG_RAX; // TODO: temporary register } if (op1_reg != reg) { ir_emit_load(ctx, insn->type, insn->op1, reg); } if (op2_reg != IR_REG_RCX) { ir_emit_load(ctx, insn->type, insn->op2, IR_REG_RCX); } switch (insn->op) { case IR_SHL: | ASM_REG_IMM_OP shl, insn->type, reg, cl break; case IR_SHR: | ASM_REG_IMM_OP shr, insn->type, reg, cl break; case IR_SAR: | ASM_REG_IMM_OP sar, insn->type, reg, cl break; case IR_ROL: | ASM_REG_IMM_OP rol, insn->type, reg, cl break; case IR_ROR: | ASM_REG_IMM_OP ror, insn->type, reg, cl break; default: IR_ASSERT(0); } if (def_reg != reg) { ir_emit_store(ctx, insn->type, reg, def); } } void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = ir_ref_reg(ctx, def); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); uint32_t shift = ctx->ir_base[insn->op2].val.u64; ir_reg reg; if (def_reg >= 0) { reg = def_reg; } else if (op1_reg >= 0 && ir_last_use(ctx, insn->op1)) { reg = op1_reg; } else { reg = IR_REG_RAX; // TODO: temporary register } if (op1_reg != reg) { ir_emit_load(ctx, insn->type, insn->op1, reg); } switch (insn->op) { case IR_SHL: | ASM_REG_IMM_OP shl, insn->type, reg, shift break; case IR_SHR: | ASM_REG_IMM_OP shr, insn->type, reg, shift break; case IR_SAR: | ASM_REG_IMM_OP sar, insn->type, reg, shift break; case IR_ROL: | ASM_REG_IMM_OP rol, insn->type, reg, shift break; case IR_ROR: | ASM_REG_IMM_OP ror, insn->type, reg, shift break; default: IR_ASSERT(0); } if (def_reg != reg) { ir_emit_store(ctx, insn->type, reg, def); } } void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = ir_ref_reg(ctx, def); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); ir_reg reg; if (def_reg >= 0) { reg = def_reg; } else if (op1_reg >= 0 && ir_last_use(ctx, insn->op1)) { reg = op1_reg; } else { reg = IR_REG_RAX; // TODO: temporary register } if (op1_reg != reg) { ir_emit_load(ctx, insn->type, insn->op1, reg); } if (insn->op == IR_NOT) { | ASM_REG_OP not, insn->type, reg } else if (insn->op == IR_NEG) { | ASM_REG_OP neg, insn->type, reg } else if (insn->op == IR_BSWAP) { switch (ir_type_size[insn->type]) { case 4: | bswap Rd(reg) break; case 8: | bswap Rq(reg) break; default: IR_ASSERT(0); } } else { IR_ASSERT(0); } if (def_reg != reg) { ir_emit_store(ctx, insn->type, reg, def); } } void ir_emit_bool_not_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = ir_ref_reg(ctx, def); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); ir_reg reg; ir_type type = ctx->ir_base[insn->op1].type; if (op1_reg >= 0) { | ASM_REG_REG_OP test, type, op1_reg, op1_reg } else { | ASM_REF_IMM_OP cmp, type, insn->op1, 0 } if (def_reg >= 0) { reg = def_reg; } else if (op1_reg >= 0 && ir_last_use(ctx, insn->op1)) { reg = op1_reg; } else { reg = IR_REG_RAX; // TODO: temporary register } | setne Rb(reg) if (def_reg != reg) { ir_emit_store(ctx, insn->type, reg, def); } } void ir_emit_mul_div_mod(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_type type = insn->type; ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); ir_reg op2_reg = ir_ref_reg(ctx, insn->op2); ir_reg def_reg = ir_ref_reg(ctx, def); ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (op1_reg != IR_REG_RAX) { ir_emit_load(ctx, type, insn->op1, IR_REG_RAX); } if (op2_reg < 0) { if (insn->op1 == insn->op2) { op2_reg = IR_REG_RAX; } else if (IR_IS_CONST_REF(insn->op2)) { if (insn->op == IR_MUL) { op2_reg = IR_REG_RDX; ir_emit_load(ctx, type, insn->op2, IR_REG_RDX); // TODO: temporary register 2 } else { op2_reg = IR_REG_RCX; ir_emit_load(ctx, type, insn->op2, IR_REG_RCX); // TODO: temporary register 2 } } } if (insn->op == IR_MUL) { if (IR_IS_TYPE_SIGNED(insn->type)) { if (op2_reg >= 0) { | ASM_REG_OP imul, type, op2_reg } else { | ASM_REF_OP imul, type, insn->op2 } } else { if (op2_reg >= 0) { | ASM_REG_OP mul, type, op2_reg } else { | ASM_REF_OP mul, type, insn->op2 } } } else { if (IR_IS_TYPE_SIGNED(type)) { | cdq if (op2_reg >= 0) { | ASM_REG_OP idiv, type, op2_reg } else { | ASM_REF_OP idiv, type, insn->op2 } } else { | ASM_REG_REG_OP xor, type, IR_REG_RDX, IR_REG_RDX if (op2_reg >= 0) { | ASM_REG_OP div, type, op2_reg } else { | ASM_REF_OP div, type, insn->op2 } } } if (insn->op == IR_MUL || insn->op == IR_DIV) { if (def_reg != IR_REG_RAX) { ir_emit_store(ctx, type, IR_REG_RAX, def); } } else if (insn->op == IR_MOD) { if (ir_type_size[type] == 1) { ir_live_interval *ival = ctx->live_intervals[ctx->vregs[def]]; ir_reg reg = ir_ref_reg(ctx, def); if (reg >= 0) { | mov al, ah | mov Rb(reg), al } else if (ival->stack_spill_pos) { if (ctx->flags & IR_USE_FRAME_POINTER) { int32_t offset = -ival->stack_spill_pos; | mov byte [rbp+offset], ah } else { int32_t offset = data->stack_frame_size - ival->stack_spill_pos; | mov byte [rsp+offset], ah } } } else if (def_reg != IR_REG_RDX) { ir_emit_store(ctx, type, IR_REG_RDX, def); } } else { IR_ASSERT(0); } } void ir_emit_binop_sse2(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_type type = insn->type; ir_reg def_reg = ir_ref_reg(ctx, def); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); ir_reg op2_reg = ir_ref_reg(ctx, insn->op2); ir_reg reg; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (def_reg >= 0) { reg = def_reg; } else if (op1_reg >= 0 && ir_last_use(ctx, insn->op1)) { reg = op1_reg; } else { // TODO: commutative insns reg = IR_REG_XMM7; // TODO: temporary register } if (op1_reg != reg) { ir_emit_fp_load(ctx, type, insn->op1, reg); } if (op2_reg < 0 && insn->op1 == insn->op2) { op2_reg = reg; } if (op2_reg >= 0) { switch (insn->op) { case IR_ADD: | ASM_SSE2_REG_REG_OP addss, addsd, type, reg, op2_reg break; case IR_SUB: | ASM_SSE2_REG_REG_OP subss, subsd, type, reg, op2_reg break; case IR_MUL: | ASM_SSE2_REG_REG_OP mulss, mulsd, type, reg, op2_reg break; case IR_DIV: | ASM_SSE2_REG_REG_OP divss, divsd, type, reg, op2_reg break; default: IR_ASSERT(0 && "NIY binary op"); break; } } else { switch (insn->op) { case IR_ADD: | ASM_SSE2_REG_REF_OP addss, addsd, type, reg, insn->op2 break; case IR_SUB: | ASM_SSE2_REG_REF_OP subss, subsd, type, reg, insn->op2 break; case IR_MUL: | ASM_SSE2_REG_REF_OP mulss, mulsd, type, reg, insn->op2 break; case IR_DIV: | ASM_SSE2_REG_REF_OP divss, divsd, type, reg, insn->op2 break; default: IR_ASSERT(0 && "NIY binary op"); break; } } if (def_reg != reg) { ir_emit_fp_store(ctx, type, reg, def); } } void ir_emit_binop_avx(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_type type = insn->type; ir_reg def_reg = ir_ref_reg(ctx, def); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); ir_reg op2_reg = ir_ref_reg(ctx, insn->op2); ir_reg reg; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (def_reg >= 0) { reg = def_reg; } else if (op1_reg >= 0 && ir_last_use(ctx, insn->op1)) { reg = op1_reg; } else { // TODO: commutative insns reg = IR_REG_XMM7; // TODO: temporary register } if (op1_reg < 0) { ir_emit_fp_load(ctx, type, insn->op1, reg); op1_reg = reg; } if (op2_reg < 0 && insn->op1 == insn->op2) { op2_reg = reg; } if (op2_reg >= 0) { switch (insn->op) { case IR_ADD: | ASM_AVX_REG_REG_REG_OP vaddss, vaddsd, type, reg, op1_reg, op2_reg break; case IR_SUB: | ASM_AVX_REG_REG_REG_OP vsubss, vsubsd, type, reg, op1_reg, op2_reg break; case IR_MUL: | ASM_AVX_REG_REG_REG_OP vmulss, vmulsd, type, reg, op1_reg, op2_reg break; case IR_DIV: | ASM_AVX_REG_REG_REG_OP vdivss, vdivsd, type, reg, op1_reg, op2_reg break; default: IR_ASSERT(0 && "NIY binary op"); break; } } else { switch (insn->op) { case IR_ADD: | ASM_AVX_REG_REG_REF_OP vaddss, vaddsd, type, reg, op1_reg, insn->op2 break; case IR_SUB: | ASM_AVX_REG_REG_REF_OP vsubss, vsubsd, type, reg, op1_reg, insn->op2 break; case IR_MUL: | ASM_AVX_REG_REG_REF_OP vmulss, vmulsd, type, reg, op1_reg, insn->op2 break; case IR_DIV: | ASM_AVX_REG_REG_REF_OP vdivss, vdivsd, type, reg, op1_reg, insn->op2 break; default: IR_ASSERT(0 && "NIY binary op"); break; } } if (def_reg != reg) { ir_emit_fp_store(ctx, type, reg, def); } } void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_type type = ctx->ir_base[insn->op1].type; ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); ir_reg op2_reg = ir_ref_reg(ctx, insn->op2); ir_reg def_reg = ir_ref_reg(ctx, def); ir_reg reg; ir_op op; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (def_reg >= 0) { reg = def_reg; } else { reg = IR_REG_RAX; // TODO: temporary register } if (op1_reg < 0 && op2_reg < 0) { if (IR_IS_CONST_REF(insn->op2)) { /* cmp $imm, mem */ } else { // TODO: commutative insns op1_reg = IR_REG_RAX; // TODO: temporary register ir_emit_load(ctx, type, insn->op1, op1_reg); } } op = insn->op; if (op2_reg >= 0) { | ASM_REF_REG_OP cmp, type, insn->op1, op2_reg } else if (op1_reg >= 0) { if (IR_IS_CONST_REF(insn->op2) && ctx->ir_base[insn->op2].val.u64 == 0) { if (op == IR_ULT) { /* always false */ if (def_reg >= 0) { | xor Ra(def_reg), Ra(def_reg) } else { | ASM_REF_IMM_OP mov, insn->type, def, 0 } return; } else if (op == IR_UGE) { /* always true */ | ASM_REF_IMM_OP mov, insn->type, def, 1 return; } else if (op == IR_ULE) { op = IR_EQ; } else if (op == IR_UGT) { op = IR_NE; } | ASM_REG_REG_OP test, type, op1_reg, op1_reg } else { | ASM_REG_REF_OP cmp, type, op1_reg, insn->op2 } } switch (op) { case IR_EQ: | sete Rb(reg) break; case IR_NE: | setne Rb(reg) break; case IR_LT: | setl Rb(reg) break; case IR_GE: | setge Rb(reg) break; case IR_LE: | setle Rb(reg) break; case IR_GT: | setg Rb(reg) break; case IR_ULT: | setb Rb(reg) break; case IR_UGE: | setae Rb(reg) break; case IR_ULE: | setbe Rb(reg) break; case IR_UGT: | seta Rb(reg) break; default: IR_ASSERT(0 && "NIY binary op"); break; } if (reg != def_reg) { ir_emit_store(ctx, insn->type, reg, def); } } void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_type type = ctx->ir_base[insn->op1].type; ir_reg op1_reg, op2_reg, def_reg, reg; ir_ref op1, op2; ir_op op = insn->op; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (op == IR_LT || op == IR_LE) { /* swap operands to avoid P flag check */ op ^= 3; op1 = insn->op2; op2 = insn->op1; } else { op1 = insn->op1; op2 = insn->op2; } def_reg = ir_ref_reg(ctx, def); op1_reg = ir_ref_reg(ctx, op1); op2_reg = ir_ref_reg(ctx, op2); if (def_reg >= 0) { reg = def_reg; } else { reg = IR_REG_RAX; // TODO: temporary register } if (op1_reg < 0) { if (op2_reg >= 0 && (op == IR_EQ || op == IR_NE)) { ir_ref tmp; ir_reg tmp_reg; tmp = op1; op1 = op2; op2 = tmp; tmp_reg = op1_reg; op1_reg = op2_reg; op2_reg = tmp_reg; } else { op1_reg = IR_REG_XMM7; // TODO: temporary register ir_emit_fp_load(ctx, type, op1, op1_reg); } } | ASM_FP_REG_REF_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, op2 switch (op) { case IR_EQ: //| sete Rb(reg) | setnp al | mov edx, 0 // TODO: Temporary register | cmovne eax, edx break; case IR_NE: //| setne Rb(reg) | setp al | mov edx, 1 // TODO: Temporary register | cmovne eax, edx break; case IR_LT: //| setb Rb(reg) | setnp al | mov edx, 0 // TODO: Temporary register | cmovae eax, edx break; case IR_GE: | setae Rb(reg) break; case IR_LE: //| setbe Rb(reg) | setnp al | mov edx, 0 // TODO: Temporary register | cmova eax, edx break; case IR_GT: | seta Rb(reg) break; // case IR_ULT: fprintf(stderr, "\tsetb "); break; // case IR_UGE: fprintf(stderr, "\tsetae "); break; // case IR_ULE: fprintf(stderr, "\tsetbe "); break; // case IR_UGT: fprintf(stderr, "\tseta "); break; default: IR_ASSERT(0 && "NIY binary op"); break; } if (reg != def_reg) { ir_emit_store(ctx, insn->type, reg, def); } } static void ir_emit_jmp_true(ir_ctx *ctx, int b, ir_ref def) { ir_use_list *use_list; ir_insn *use_insn; ir_ref i, *p, use, n; int true_block = 0, false_block = 0, next_block; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; use_list = &ctx->use_lists[def]; n = use_list->count; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; use_insn = &ctx->ir_base[use]; if (use_insn->op == IR_IF_TRUE) { true_block = ir_skip_empty_blocks(ctx, ctx->bb_num[use]); } else if (use_insn->op == IR_IF_FALSE) { false_block = ir_skip_empty_blocks(ctx, ctx->bb_num[use]); } else { IR_ASSERT(0); } } IR_ASSERT(true_block && false_block); next_block = ir_skip_empty_blocks(ctx, b + 1); if (true_block != next_block) { | jmp =>true_block } } static void ir_emit_jmp_false(ir_ctx *ctx, int b, ir_ref def) { ir_use_list *use_list; ir_insn *use_insn; ir_ref i, *p, use, n; int true_block = 0, false_block = 0, next_block; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; use_list = &ctx->use_lists[def]; n = use_list->count; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; use_insn = &ctx->ir_base[use]; if (use_insn->op == IR_IF_TRUE) { true_block = ir_skip_empty_blocks(ctx, ctx->bb_num[use]); } else if (use_insn->op == IR_IF_FALSE) { false_block = ir_skip_empty_blocks(ctx, ctx->bb_num[use]); } else { IR_ASSERT(0); } } IR_ASSERT(true_block && false_block); next_block = ir_skip_empty_blocks(ctx, b + 1); if (false_block != next_block) { | jmp =>false_block } } static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, int b, ir_ref def, ir_insn *insn, bool int_cmp) { ir_use_list *use_list; ir_insn *use_insn; ir_ref i, *p, use, n; int true_block = 0, false_block = 0, next_block; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; use_list = &ctx->use_lists[def]; n = use_list->count; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; use_insn = &ctx->ir_base[use]; if (use_insn->op == IR_IF_TRUE) { true_block = ir_skip_empty_blocks(ctx, ctx->bb_num[use]); } else if (use_insn->op == IR_IF_FALSE) { false_block = ir_skip_empty_blocks(ctx, ctx->bb_num[use]); } else { IR_ASSERT(0); } } IR_ASSERT(true_block && false_block); next_block = ir_skip_empty_blocks(ctx, b + 1); if (true_block == next_block) { if (int_cmp || (op != IR_GT && op != IR_GE)) { /* swap to avoid unconditional JMP if this doesn't introduce additional JP instruction */ if (op < IR_LT) { op ^= 1; // reverse } else { op ^= 3; // reverse } true_block = false_block; false_block = 0; } } else if (false_block != next_block) { false_block = 0; } if (int_cmp) { switch (op) { case IR_EQ: | je =>true_block break; case IR_NE: | jne =>true_block break; case IR_LT: | jl =>true_block break; case IR_GE: | jge =>true_block break; case IR_LE: | jle =>true_block break; case IR_GT: | jg =>true_block break; case IR_ULT: | jb =>true_block break; case IR_UGE: | jae =>true_block break; case IR_ULE: | jbe =>true_block break; case IR_UGT: | ja =>true_block break; default: IR_ASSERT(0 && "NIY binary op"); break; } } else { switch (op) { case IR_EQ: if (!false_block) { | jp >1 | je =>true_block |1: } else { | jp =>false_block | je =>true_block } break; case IR_NE: | jne =>true_block | jp =>true_block break; case IR_LT: if (!false_block) { | jp >1 | jb =>true_block |1: } else { | jp =>false_block | jb =>true_block } break; case IR_GE: | jae =>true_block break; case IR_LE: if (!false_block) { | jp >1 | jbe =>true_block |1: } else { | jp =>false_block | jbe =>true_block } break; case IR_GT: | ja =>true_block break; // case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; // case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; // case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; // case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; default: IR_ASSERT(0 && "NIY binary op"); break; } } if (false_block) { | jmp =>false_block } } void ir_emit_cmp_and_branch_int(ir_ctx *ctx, int b, ir_ref def, ir_insn *insn) { ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; ir_type type = ctx->ir_base[cmp_insn->op1].type; ir_reg op1_reg = ir_ref_reg(ctx, cmp_insn->op1); ir_reg op2_reg = ir_ref_reg(ctx, cmp_insn->op2); ir_op op; if (op1_reg < 0 && op2_reg < 0) { if (IR_IS_CONST_REF(cmp_insn->op2)) { /* cmp $imm, mem */ } else { // TODO: commutative insns op1_reg = IR_REG_RAX; // TODO: temporary register ir_emit_load(ctx, type, cmp_insn->op1, op1_reg); } } ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; op = cmp_insn->op; if (op2_reg >= 0) { | ASM_REF_REG_OP cmp, type, cmp_insn->op1, op2_reg } else if (op1_reg >= 0) { if (IR_IS_CONST_REF(cmp_insn->op2) && ctx->ir_base[cmp_insn->op2].val.u64 == 0) { if (op == IR_ULT) { /* always false */ ir_emit_jmp_false(ctx, b, def); return; } else if (op == IR_UGE) { /* always true */ ir_emit_jmp_true(ctx, b, def); return; } else if (op == IR_ULE) { op = IR_EQ; } else if (op == IR_UGT) { op = IR_NE; } | ASM_REG_REG_OP test, type, op1_reg, op1_reg } else { | ASM_REG_REF_OP cmp, type, op1_reg, cmp_insn->op2 } } ir_emit_jcc(ctx, op, b, def, insn, 1); } void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, int b, ir_ref def, ir_insn *insn) { ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; ir_type type = ctx->ir_base[cmp_insn->op1].type; ir_op op = cmp_insn->op; ir_ref op1, op2; ir_reg op1_reg, op2_reg; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (op == IR_LT || op == IR_LE) { /* swap operands to avoid P flag check */ op ^= 3; op1 = cmp_insn->op2; op2 = cmp_insn->op1; } else { op1 = cmp_insn->op1; op2 = cmp_insn->op2; } op1_reg = ir_ref_reg(ctx, op1); op2_reg = ir_ref_reg(ctx, op2); if (op1_reg < 0) { if (op2_reg >= 0 && (op == IR_EQ || op == IR_NE)) { ir_ref tmp; ir_reg tmp_reg; tmp = op1; op1 = op2; op2 = tmp; tmp_reg = op1_reg; op1_reg = op2_reg; op2_reg = tmp_reg; } else { op1_reg = IR_REG_XMM7; // TODO: temporary register ir_emit_fp_load(ctx, type, op1, op1_reg); } } | ASM_FP_REG_REF_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, op2 ir_emit_jcc(ctx, op, b, def, insn, 0); } void ir_emit_if_int(ir_ctx *ctx, int b, ir_ref def, ir_insn *insn) { ir_type type = ctx->ir_base[insn->op2].type; ir_reg op2_reg = ir_ref_reg(ctx, insn->op2); ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (op2_reg >= 0) { | ASM_REG_REG_OP test, type, op2_reg, op2_reg } else { | ASM_REF_IMM_OP cmp, type, insn->op2, 0 } ir_emit_jcc(ctx, IR_NE, b, def, insn, 1); } void ir_emit_return_void(ir_ctx *ctx) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_emit_epilogue(ctx); | ret } void ir_emit_return_int(ir_ctx *ctx, ir_insn *insn) { ir_reg op2_reg = ir_ref_reg(ctx, insn->op2); if (op2_reg != IR_REG_INT_RET1) { ir_emit_load(ctx, ctx->ir_base[insn->op2].type, insn->op2, IR_REG_INT_RET1); } ir_emit_return_void(ctx); } void ir_emit_return_fp(ir_ctx *ctx, ir_insn *insn) { ir_reg op2_reg = ir_ref_reg(ctx, insn->op2); if (op2_reg != IR_REG_FP_RET1) { ir_emit_fp_load(ctx, ctx->ir_base[insn->op2].type, insn->op2, IR_REG_FP_RET1); } ir_emit_return_void(ctx); } static void ir_emit_copy_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_ref type = insn->type; ir_reg def_reg = ir_ref_reg(ctx, def); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); ir_reg reg; if (def_reg >= 0 && def_reg == op1_reg) { /* same reg */ } else if (def_reg >= 0) { ir_emit_load(ctx, type, insn->op1, def_reg); } else if (op1_reg >= 0) { ir_emit_store(ctx, type, op1_reg, def); } else if (IR_IS_CONST_REF(insn->op1)) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; | ASM_REF_IMM_OP mov, type, def, insn->op1 } else { reg = IR_REG_RAX; // TODO: temporary register ir_emit_load(ctx, type, insn->op1, reg); ir_emit_store(ctx, type, reg, def); } } static void ir_emit_copy_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_type type = insn->type; ir_reg def_reg = ir_ref_reg(ctx, def); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); ir_reg reg; if (def_reg > 0 && def_reg == op1_reg) { /* same reg */ } else if (def_reg >= 0) { ir_emit_fp_load(ctx, type, insn->op1, def_reg); } else if (op1_reg >= 0) { ir_emit_fp_store(ctx, type, op1_reg, def); } else { reg = IR_REG_XMM7; // TODO: temporary register ir_emit_fp_load(ctx, type, insn->op1, reg); ir_emit_fp_store(ctx, type, reg, def); } } static void ir_emit_switch(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type; ir_use_list *use_list; ir_insn *use_insn, *val; ir_ref i, *p, use, n; int label, default_label = 0; int count; int64_t min, max;; int32_t offset; type = ctx->ir_base[insn->op2].type; if (IR_IS_TYPE_SIGNED(type)) { min = 0x7fffffffffffffff; max = 0x8000000000000000; } else { min = 0x0; max = 0xffffffffffffffff; } use_list = &ctx->use_lists[def]; n = use_list->count; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; use_insn = &ctx->ir_base[use]; if (use_insn->op == IR_CASE_VAL) { val = &ctx->ir_base[use_insn->op2]; if (IR_IS_TYPE_SIGNED(type)) { IR_ASSERT(IR_IS_TYPE_SIGNED(val->type)); min = IR_MIN(min, val->val.i64); max = IR_MAX(max, val->val.i64); } else { IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type)); min = (int64_t)IR_MIN((uint64_t)min, val->val.u64); max = (int64_t)IR_MAX((uint64_t)max, val->val.u64); } count++; } else if (use_insn->op == IR_CASE_DEFAULT) { default_label = ir_skip_empty_blocks(ctx, ctx->bb_num[use]); } else { IR_ASSERT(0); } } /* Generate a table jmp or a seqence of calls */ if ((max-min) < count * 8) { int *labels = ir_mem_malloc(sizeof(int) * (max - min + 1)); ir_reg op2_reg = ir_ref_reg(ctx, insn->op2); ir_reg reg; if (op2_reg > 0) { reg = op2_reg; } else { reg = IR_REG_RAX; // TODO: remporary register ir_emit_load(ctx, type, insn->op2, reg); } offset = -min * 8; for (i = 0; i <= (max-min+1); i++) { labels[i] = default_label; } for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; use_insn = &ctx->ir_base[use]; if (use_insn->op == IR_CASE_VAL) { val = &ctx->ir_base[use_insn->op2]; label = ir_skip_empty_blocks(ctx, ctx->bb_num[use]); labels[val->val.i64 - min] = label; } } | ASM_REG_IMM_OP cmp, type, reg, min if (IR_IS_TYPE_SIGNED(type)) { | jl =>default_label } else { | jb =>default_label } | ASM_REG_IMM_OP cmp, type, reg, max if (IR_IS_TYPE_SIGNED(type)) { | jg =>default_label } else { | ja =>default_label } |.if X64 switch (ir_type_size[type]) { case 1: if (IR_IS_TYPE_SIGNED(type)) { | movsx Ra(reg), Rb(reg) } else { | movzx Ra(reg), Rb(reg) } break; case 2: if (IR_IS_TYPE_SIGNED(type)) { | movsx Ra(reg), Rw(reg) } else { | movzx Ra(reg), Rw(reg) } break; case 4: if (IR_IS_TYPE_SIGNED(type)) { | movsxd Ra(reg), Rd(reg) } else { | mov Rd(reg), Rd(reg) } break; case 8: break; default: IR_ASSERT(0); } | lea r0, aword [>1] // TODO: r0 temporary register | jmp aword [r0 + Ra(reg)*8 + offset] |.else switch (ir_type_size[type]) { case 1: if (IR_IS_TYPE_SIGNED(type)) { | movsx Ra(reg), Rb(reg) } else { | movzx Ra(reg), Rb(reg) } break; case 2: if (IR_IS_TYPE_SIGNED(type)) { | movsx Ra(reg), Rw(reg) } else { | movzx Ra(reg), Rw(reg) } break; case 4: break; default: IR_ASSERT(0); } | jmp aword [offset + Ra(reg) * 4 + >1] |.endif |.jmp_table if (!data->jmp_table_label) { data->jmp_table_label = ctx->cfg_blocks_count + ctx->consts_count + 3; |=>data->jmp_table_label: } |.align aword |1: for (i = 0; i <= (max-min+1); i++) { | .aword =>labels[i] } |.code ir_mem_free(labels); } else { for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; use_insn = &ctx->ir_base[use]; if (use_insn->op == IR_CASE_VAL) { val = &ctx->ir_base[use_insn->op2]; label = ir_skip_empty_blocks(ctx, ctx->bb_num[use]); | ASM_REF_IMM_OP cmp, type, insn->op2, val->val.i32 // TODO: 64-bit constant support | je =>label } } if (default_label) { | jmp =>default_label } } } #define IS_32BIT(addr) (((uintptr_t)(addr)) <= 0x7fffffff) #define IS_SIGNED_32BIT(val) ((((intptr_t)(val)) <= 0x7fffffff) && (((intptr_t)(val)) >= (-2147483647 - 1))) #include static void *ir_resolve_sym_name(const char *name) { void *handle = NULL; void *addr; #ifdef RTLD_DEFAULT handle = RTLD_DEFAULT; #endif addr = dlsym(handle, name); IR_ASSERT(addr != NULL); return addr; } typedef struct _ir_copy { ir_ref from; ir_reg to; } ir_copy; int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; int i; ir_ref from; ir_reg to, from_reg; ir_type type; ir_ref *loc, *pred; ir_regset todo, ready; loc = ir_mem_calloc(IR_REG_NUM * 2, sizeof(ir_ref)); pred = loc + IR_REG_NUM; memset(loc, IR_REG_NONE, IR_REG_NUM * sizeof(ir_ref)); todo = IR_REGSET_EMPTY; ready = IR_REGSET_EMPTY; for (i = 0; i < count; i++) { from = copies[i].from; from_reg = ir_ref_reg(ctx, from); to = copies[i].to; if (from_reg == IR_REG_NONE) { pred[to] = from; IR_REGSET_INCL(todo, to); } else if (from_reg != to) { loc[from_reg] = from_reg; pred[to] = from; IR_REGSET_INCL(todo, to); } } IR_REGSET_FOREACH(todo, i) { if (loc[i] == IR_REG_NONE) { IR_REGSET_INCL(ready, i); } } IR_REGSET_FOREACH_END(); while (todo != IR_REGSET_EMPTY) { ir_ref /*a, b,*/ c; while (ready != IR_REGSET_EMPTY) { to = IR_REGSET_FIRST(ready); IR_REGSET_EXCL(ready, to); from = pred[to]; type = ctx->ir_base[from].type; from_reg = ir_ref_reg(ctx, from); if (from_reg == IR_REG_NONE) { if (IR_IS_TYPE_INT(type)) { ir_emit_load(ctx, type, from, to); } else { ir_emit_fp_load(ctx, type, from, to); } } else { c = loc[from_reg]; if (IR_IS_TYPE_INT(type)) { | ASM_REG_REG_OP mov, type, to, c } else { | ASM_FP_REG_REG_OP movaps, movapd, vmovaps, vmovapd, type, to, c } loc[from_reg] = to; if (from_reg == c && pred[from_reg]) { IR_REGSET_INCL(ready, from_reg); } } } to = IR_REGSET_FIRST(todo); IR_REGSET_EXCL(todo, to); from = pred[to]; from_reg = ir_ref_reg(ctx, from); if (from_reg != IR_REG_NONE && to != loc[from_reg]) { type = ctx->ir_base[from].type; if (IR_IS_TYPE_INT(type)) { | ASM_REG_REG_OP mov, type, IR_REG_R0, to // TODO: Temporary register } else { | ASM_FP_REG_REG_OP movaps, movapd, vmovaps, vmovapd, type, IR_REG_XMM7, to // TODO: Temporary register } loc[to] = 0; IR_REGSET_INCL(ready, to); } } ir_mem_free(loc); return 1; } static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; const char *name; void *addr; int j, n; ir_ref arg; ir_insn *arg_insn; uint8_t type; ir_reg src, dst; int int_param = 0; int fp_param = 0; int count = 0; int int_reg_params_count = IR_REG_INT_ARGS; int fp_reg_params_count = IR_REG_FP_ARGS; const int8_t *int_reg_params = _ir_int_reg_params; const int8_t *fp_reg_params = _ir_fp_reg_params; ir_copy *copies; name = ir_get_str(ctx, ctx->ir_base[insn->op2].val.addr); addr = ir_resolve_sym_name(name); n = ir_input_edges_count(ctx, insn); copies = ir_mem_malloc((n - 2) * sizeof(ir_copy)); for (j = 3; j <= n; j++) { arg = insn->ops[j]; src = ir_ref_reg(ctx, arg); arg_insn = &ctx->ir_base[arg]; type = arg_insn->type; if (IR_IS_TYPE_INT(type)) { if (int_param < int_reg_params_count) { dst = int_reg_params[int_param]; if (src != dst) { copies[count].from = arg; copies[count].to = dst; count++; } } else { // TODO: pass arg throgh stack } int_param++; } else if (IR_IS_TYPE_FP(type)) { if (fp_param < fp_reg_params_count) { dst = fp_reg_params[fp_param]; if (src != dst) { copies[count].from = arg; copies[count].to = dst; count++; } } else { // TODO: pass arg throgh stack } fp_param++; } else { IR_ASSERT(0); } } ir_parallel_copy(ctx, copies, count); ir_mem_free(copies); // if (IS_SIGNED_32BIT(addr)) { // TODO: 32-bit IP relative or 64-bit absolute address // | call qword &addr // } else { | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 | call rax // } if (insn->type != IR_VOID) { if (IR_IS_TYPE_INT(insn->type)) { dst = ir_ref_reg(ctx, def); if (dst != IR_REG_INT_RET1) { ir_emit_store(ctx, insn->type, IR_REG_INT_RET1, def); } } else if (IR_IS_TYPE_FP(insn->type)) { dst = ir_ref_reg(ctx, def); if (dst != IR_REG_FP_RET1) { ir_emit_fp_store(ctx, insn->type, IR_REG_FP_RET1, def); } } else { IR_ASSERT(0); } } } static int ir_emit_dessa_move(ir_ctx *ctx, uint8_t type, int from, int to) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_insn *from_insn; int8_t to_reg, from_reg, dst_reg; from_insn =&ctx->ir_base[from]; if (IR_IS_TYPE_INT(type)) { to_reg = to ? ir_vreg_reg(ctx, to) : IR_REG_RAX; // %rax is a temporary register from_reg = from ? ir_vreg_reg(ctx, from) : IR_REG_RAX; // %rax is a temporary register if (IR_IS_CONST_REF(from) && to_reg >= 0 && IR_IS_TYPE_INT(from_insn->type) && from_insn->val.u64 == 0) { | ASM_REG_REG_OP xor, type, to_reg, to_reg } else if (from > 0 && from_reg < 0 && to_reg < 0) { | ASM_VREGX_OP push, IR_ADDR, from | ASM_VREG_OP pop, IR_ADDR, to } else { if (from_reg >= 0 && to_reg >= 0) { | ASM_REG_REG_OP mov, type, to_reg, from_reg } else if (to_reg >= 0 && IR_IS_CONST_REF(from)) { ir_insn *_insn = &ctx->ir_base[from]; | ASM_REG_IMM_OP mov, type, to_reg, _insn->val.i32 // TODO: } else if (IR_IS_CONST_REF(from)) { | ASM_VREG_IMM_OP mov, type, to, from } else if (from_reg >= 0) { | ASM_VREG_REG_OP mov, type, to, from_reg } else if (to_reg >= 0) { | ASM_REG_VREG_OP mov, type, to_reg, from } } } else { to_reg = to ? ir_vreg_reg(ctx, to) : IR_REG_XMM0; // %xmm0 is a temporary register from_reg = from ? ir_vreg_reg(ctx, from) : IR_REG_XMM0; // %xmm0 is a temporary register dst_reg = to_reg; if (to_reg < 0 && from_reg < 0) { to_reg = IR_REG_XMM0; // TODO: temporary register } if (IR_IS_CONST_REF(from) && to_reg >= 0) { ir_emit_fp_load(ctx, from_insn->type, from, to_reg); } else { if (to_reg >= 0 && from_reg >= 0) { | ASM_FP_REG_REG_OP movaps, movapd, vmovaps, vmovapd, type, to_reg, from_reg } else if (from_reg >= 0) { | ASM_FP_MOV_VREG_REG_OP type, to, from_reg } else if (to_reg >= 0) { | ASM_FP_MOV_REG_VREG_OP type, to_reg, from } } if (to_reg != dst_reg) { | ASM_FP_MOV_VREG_REG_OP type, to, to_reg } } return 1; } static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, int from_reg, int to, ir_ref name) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_live_interval *ival = ctx->live_intervals[to]; ir_reg to_reg = ir_vreg_reg(ctx, to); ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_RBP : IR_REG_RSP; int32_t offset; if (IR_IS_TYPE_INT(type)) { if (from_reg >= 0) { if (to_reg >= 0) { | ASM_REG_REG_OP mov, type, to_reg, from_reg } else { IR_ASSERT(ctx->live_intervals[to]->stack_spill_pos); offset = (ctx->flags & IR_USE_FRAME_POINTER) ? -ival->stack_spill_pos : data->stack_frame_size - ival->stack_spill_pos; | ASM_MEM_REG_OP mov, type, [Ra(fp)+offset], from_reg } } else { offset = (ctx->flags & IR_USE_FRAME_POINTER) ? -8 : -8; // TODO: if (to_reg >= 0) { | ASM_REG_MEM_OP mov, type, to_reg, [Ra(fp)+offset] } else { | ASM_REG_MEM_OP mov, type, IR_REG_R0, [Ra(fp)+offset] // TODO: tmp offset = (ctx->flags & IR_USE_FRAME_POINTER) ? -ival->stack_spill_pos : data->stack_frame_size - ival->stack_spill_pos; | ASM_MEM_REG_OP mov, type, [Ra(fp)+offset], IR_REG_R0 // TODO: tmp } } } else { if (from_reg >= 0) { if (to_reg >= 0) { | ASM_FP_REG_REG_OP movaps, movapd, vmovaps, vmovapd, type, from_reg, to_reg } else { IR_ASSERT(ctx->live_intervals[to]->stack_spill_pos); offset = (ctx->flags & IR_USE_FRAME_POINTER) ? -ival->stack_spill_pos : data->stack_frame_size - ival->stack_spill_pos; | ASM_FP_MEM_REG_OP movss, movsd, vmovss, vmovsd, type, [Ra(fp)+offset], from_reg } } else { offset = (ctx->flags & IR_USE_FRAME_POINTER) ? -8 : -8; // TODO: if (to_reg >= 0) { | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, to_reg, [Ra(fp)+offset] } else { | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, IR_REG_R0, [Ra(fp)+offset] // TODO: tmp offset = (ctx->flags & IR_USE_FRAME_POINTER) ? -ival->stack_spill_pos : data->stack_frame_size - ival->stack_spill_pos; | ASM_FP_MEM_REG_OP movss, movsd, vmovss, vmovsd, type, [Ra(fp)+offset], IR_REG_R0 // TODO: tmp } } } } static void ir_emit_load_params(ir_ctx *ctx) { ir_use_list *use_list = &ctx->use_lists[1]; ir_insn *insn; ir_ref i, n, *p, use; int int_param_num = 0; int fp_param_num = 0; int src_reg; /* negative values represent arguments on CPU stack */ int dst_reg; // TODO: Calling convention specific int int_reg_params_count = IR_REG_INT_ARGS; int fp_reg_params_count = IR_REG_FP_ARGS; const int8_t *int_reg_params = _ir_int_reg_params; const int8_t *fp_reg_params = _ir_fp_reg_params; n = use_list->count; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; insn = &ctx->ir_base[use]; if (insn->op == IR_PARAM) { if (ctx->vregs[use]) { if (IR_IS_TYPE_INT(insn->type)) { if (int_param_num < int_reg_params_count) { src_reg = int_reg_params[int_param_num]; } else { // TODO: replace IR_REG_NONE by stack slot src_reg = IR_REG_NONE; } int_param_num++; } else { if (fp_param_num < fp_reg_params_count) { src_reg = fp_reg_params[fp_param_num]; } else { // TODO: replace IR_REG_NONE by stack slot src_reg = IR_REG_NONE; } fp_param_num++; } dst_reg = ir_ref_reg(ctx, use); if (src_reg != dst_reg) { // TODO: DO parallel move ir_emit_param_move(ctx, insn->type, src_reg, ctx->vregs[use], insn->op2); } } else { if (IR_IS_TYPE_INT(insn->type)) { int_param_num++; } else { fp_param_num++; } } } } } static void ir_allocate_unique_spill_slots(ir_ctx *ctx) { int b; ir_block *bb; ir_insn *insn; ir_ref i, n; uint32_t flags, *rule; ir_backend_data *data = ctx->data; ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1, sizeof(ir_live_interval*)); for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { bb->flags &= ~IR_BB_MAY_SKIP; flags = IR_BB_MAY_SKIP; if (bb->successors_count != 1 || ctx->cfg_edges[bb->successors] != b + 1 || (bb->flags & IR_BB_DESSA_MOVES)) { flags = 0; } for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { switch (ctx->rules ? *rule : insn->op) { case IR_START: case IR_BEGIN: case IR_END: case IR_IF_TRUE: case IR_IF_FALSE: case IR_CASE_VAL: case IR_CASE_DEFAULT: case IR_MERGE: case IR_LOOP_BEGIN: case IR_LOOP_END: case IR_LOOP_EXIT: case IR_SKIP: break; default: flags = 0; case IR_PARAM: case IR_VAR: case IR_PHI: case IR_PI: case IR_SKIP_REG: /* skip */ if (ctx->vregs[i]) { if (!ctx->live_intervals[ctx->vregs[i]]) { ir_live_interval *ival = ir_mem_malloc(sizeof(ir_live_interval)); ctx->live_intervals[ctx->vregs[i]] = ival; ival->type = insn->type; ival->reg = IR_REG_NONE; ival->range.start = 0; ival->range.end = 0; ival->range.next = NULL; // if (insn->op == IR_PARAM) { // fprintf(f, "\t%s d_%d = %s;\n", ir_type_cname[insn->type], ctx->vregs[i], ir_get_str(ctx, insn->op2)); // } else { data->stack_frame_size += 8; // ir_type_size[insn->type]; // TODO: alignment ival->stack_spill_pos = data->stack_frame_size; // fprintf(f, "\t%s d_%d;\n", ir_type_cname[insn->type], ctx->vregs[i]); // } } else if (insn->op == IR_PARAM) { IR_ASSERT(0 && "unexpected PARAM"); return; } } break; } n = ir_operands_count(ctx, insn); n = 1 + (n >> 2); // support for multi-word instructions like MERGE and PHI i += n; insn += n; rule += n; } bb->flags |= flags; } } static void ir_mark_empty_blocks(ir_ctx *ctx) { int b; ir_block *bb; ir_insn *insn; ir_ref i, n; uint32_t flags, *rule; for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { bb->flags &= ~IR_BB_MAY_SKIP; if (bb->successors_count == 1 && ctx->cfg_edges[bb->successors] == b + 1 && !(bb->flags & IR_BB_DESSA_MOVES)) { flags = IR_BB_MAY_SKIP; for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i < bb->end;) { if (*rule != IR_SKIP && *rule != IR_SKIP_REG) { flags = 0; break; } n = ir_operands_count(ctx, insn); n = 1 + (n >> 2); // support for multi-word instructions like MERGE and PHI i += n; insn += n; rule += n; } bb->flags |= flags; } } } static void ir_cals_stack_frame_size(ir_ctx *ctx, ir_backend_data *data) { int i; ir_live_interval **p, *ival; uint32_t additional_size = 0; for (i = 1, p = ctx->live_intervals + i; i <= ctx->vregs_count; i++, p++) { ival = *p; if (ival) { if (ival->stack_spill_pos) { if (ival->stack_spill_pos > data->stack_frame_size) { data->stack_frame_size = ival->stack_spill_pos; } } if (ival->reg >= 0) { if (!IR_REGSET_IN(data->used_preserved_regs, ival->reg) && IR_REGSET_IN(IR_REGSET_PRESERVED, ival->reg)) { IR_REGSET_INCL(data->used_preserved_regs, ival->reg); additional_size += 8; } } } } data->stack_frame_size += additional_size; if (ctx->flags & IR_HAS_CALLS) { /* Stack must be 16 byte aligned */ if (ctx->flags & IR_USE_FRAME_POINTER) { while (IR_ALIGNED_SIZE(data->stack_frame_size + sizeof(void*) * 2, 16) != data->stack_frame_size + sizeof(void*) * 2) { data->stack_frame_size += 8; } } else { while (IR_ALIGNED_SIZE(data->stack_frame_size + sizeof(void*), 16) != data->stack_frame_size + sizeof(void*)) { data->stack_frame_size += 8; } } } } static void* dasm_labels[ir_lb_MAX]; void *ir_emit(ir_ctx *ctx, size_t *size) { int b, n, target; ir_block *bb; ir_ref i; ir_insn *insn; uint32_t *rule; ir_backend_data data; dasm_State **Dst; int ret; void *entry; ctx->data = &data; data.stack_frame_size = 0; data.used_preserved_regs = 0; data.rodata_label = 0; data.jmp_table_label = 0; if (!ctx->live_intervals) { ir_allocate_unique_spill_slots(ctx); } else { ir_mark_empty_blocks(ctx); ir_cals_stack_frame_size(ctx, &data); } Dst = &data.dasm_state; data.dasm_state = NULL; dasm_init(&data.dasm_state, DASM_MAXSECTION); dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX); dasm_setup(&data.dasm_state, dasm_actions); dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1); if (ctx->flags & IR_FUNCTION) { ir_emit_prologue(ctx); ir_emit_load_params(ctx); } for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { // if (bb->flags & IR_BB_MAY_SKIP) { // continue; // } // if (ir_needs_block_label(ctx, b)) { |=>b: // } for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { switch (*rule) { case IR_SKIP: case IR_SKIP_REG: break; case IR_LEA_OB: { ir_reg def_reg = ir_ref_reg(ctx, i); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); int32_t offset = ctx->ir_base[insn->op2].val.i32; IR_ASSERT(def_reg >= 0 && op1_reg >= 0); if (insn->op == IR_SUB) { offset = -offset; } | lea Ra(def_reg), aword [Ra(op1_reg)+offset] } break; case IR_LEA_SI: { ir_reg def_reg = ir_ref_reg(ctx, i); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); int32_t scale = ctx->ir_base[insn->op2].val.i32; IR_ASSERT(def_reg >= 0 && op1_reg >= 0); if (scale == 2) { | lea Ra(def_reg), aword [Ra(op1_reg)*2] } else if (scale == 4) { | lea Ra(def_reg), aword [Ra(op1_reg)*4] } else if (scale == 8) { | lea Ra(def_reg), aword [Ra(op1_reg)*8] } else { IR_ASSERT(0); } } break; case IR_LEA_SIB: { ir_reg def_reg = ir_ref_reg(ctx, i); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); int32_t scale = ctx->ir_base[insn->op2].val.i32; IR_ASSERT(def_reg >= 0 && op1_reg >= 0); if (scale == 3) { | lea Ra(def_reg), aword [Ra(op1_reg)+Ra(op1_reg)*2] } else if (scale == 5) { | lea Ra(def_reg), aword [Ra(op1_reg)+Ra(op1_reg)*4] } else if (scale == 9) { | lea Ra(def_reg), aword [Ra(op1_reg)+Ra(op1_reg)*8] } else { IR_ASSERT(0); } } break; case IR_LEA_IB: { ir_reg def_reg = ir_ref_reg(ctx, i); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); ir_reg op2_reg = ir_ref_reg(ctx, insn->op2); IR_ASSERT(def_reg >= 0 && op1_reg >= 0 && op2_reg >= 0); | lea Ra(def_reg), aword [Ra(op1_reg)+Ra(op2_reg)] } break; case IR_LEA_OB_I: { ir_reg def_reg = ir_ref_reg(ctx, i); ir_insn *op1_insn = &ctx->ir_base[insn->op1]; ir_reg op1_reg = ir_ref_reg(ctx, op1_insn->op1); ir_reg op2_reg = ir_ref_reg(ctx, insn->op2); int32_t offset = ctx->ir_base[op1_insn->op2].val.i32; IR_ASSERT(def_reg >= 0 && op1_reg >= 0 && op2_reg >= 0); if (op1_insn->op == IR_SUB) { offset = -offset; } | lea Ra(def_reg), aword [Ra(op1_reg)+Ra(op2_reg)+offset] } break; case IR_LEA_I_OB: { ir_reg def_reg = ir_ref_reg(ctx, i); ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); ir_insn *op2_insn = &ctx->ir_base[insn->op2]; ir_reg op2_reg = ir_ref_reg(ctx, op2_insn->op2); int32_t offset = ctx->ir_base[op2_insn->op2].val.i32; IR_ASSERT(def_reg >= 0 && op1_reg >= 0 && op2_reg >= 0); if (op2_insn->op == IR_SUB) { offset = -offset; } | lea Ra(def_reg), aword [Ra(op1_reg)+Ra(op2_reg)+offset] } break; case IR_LEA_SI_O: { ir_reg def_reg = ir_ref_reg(ctx, i); ir_insn *op1_insn = &ctx->ir_base[insn->op1]; ir_reg op1_reg = ir_ref_reg(ctx, op1_insn->op1); int32_t scale = ctx->ir_base[op1_insn->op2].val.i32; int32_t offset = ctx->ir_base[insn->op2].val.i32; IR_ASSERT(def_reg >= 0 && op1_reg >= 0); if (insn->op == IR_SUB) { offset = -offset; } if (scale == 2) { | lea Ra(def_reg), aword [Ra(op1_reg)*2+offset] } else if (scale == 4) { | lea Ra(def_reg), aword [Ra(op1_reg)*4+offset] } else if (scale == 8) { | lea Ra(def_reg), aword [Ra(op1_reg)*8+offset] } else { IR_ASSERT(0); } } break; case IR_LEA_SIB_O: { ir_reg def_reg = ir_ref_reg(ctx, i); ir_insn *op1_insn = &ctx->ir_base[insn->op1]; ir_reg op1_reg = ir_ref_reg(ctx, op1_insn->op1); int32_t scale = ctx->ir_base[op1_insn->op2].val.i32; int32_t offset = ctx->ir_base[insn->op2].val.i32; IR_ASSERT(def_reg >= 0 && op1_reg >= 0); if (insn->op == IR_SUB) { offset = -offset; } if (scale == 3) { | lea Ra(def_reg), aword [Ra(op1_reg)+Ra(op1_reg)*2+offset] } else if (scale == 5) { | lea Ra(def_reg), aword [Ra(op1_reg)+Ra(op1_reg)*4+offset] } else if (scale == 6) { | lea Ra(def_reg), aword [Ra(op1_reg)+Ra(op1_reg)*8+offset] } else { IR_ASSERT(0); } } break; case IR_LEA_IB_O: { ir_reg def_reg = ir_ref_reg(ctx, i); ir_insn *op1_insn = &ctx->ir_base[insn->op1]; ir_reg op1_reg = ir_ref_reg(ctx, op1_insn->op1); ir_reg op2_reg = ir_ref_reg(ctx, op1_insn->op2); int32_t offset = ctx->ir_base[insn->op2].val.i32; IR_ASSERT(def_reg >= 0 && op1_reg >= 0 && op2_reg >= 0); if (insn->op == IR_SUB) { offset = -offset; } | lea Ra(def_reg), aword [Ra(op1_reg)+Ra(op2_reg)+offset] } break; case IR_LEA_OB_SI: { ir_reg def_reg = ir_ref_reg(ctx, i); ir_insn *op1_insn = &ctx->ir_base[insn->op1]; ir_insn *op2_insn = &ctx->ir_base[insn->op2]; ir_reg op1_reg = ir_ref_reg(ctx, op1_insn->op1); int32_t offset = ctx->ir_base[op1_insn->op2].val.i32; ir_reg op2_reg = ir_ref_reg(ctx, op2_insn->op1); int32_t scale = ctx->ir_base[op2_insn->op2].val.i32; IR_ASSERT(def_reg >= 0 && op1_reg >= 0 && op2_reg >= 0); if (op1_insn->op == IR_SUB) { offset = -offset; } if (scale == 2) { | lea Ra(def_reg), aword [Ra(op1_reg)+Ra(op2_reg)*2+offset] } else if (scale == 4) { | lea Ra(def_reg), aword [Ra(op1_reg)+Ra(op2_reg)*4+offset] } else if (scale == 8) { | lea Ra(def_reg), aword [Ra(op1_reg)+Ra(op2_reg)*8+offset] } else { IR_ASSERT(0); } } break; case IR_LEA_SI_OB: { ir_reg def_reg = ir_ref_reg(ctx, i); ir_insn *op1_insn = &ctx->ir_base[insn->op1]; ir_insn *op2_insn = &ctx->ir_base[insn->op2]; ir_reg op1_reg = ir_ref_reg(ctx, op1_insn->op1); int32_t scale = ctx->ir_base[op1_insn->op2].val.i32; ir_reg op2_reg = ir_ref_reg(ctx, op2_insn->op1); int32_t offset = ctx->ir_base[op2_insn->op2].val.i32; IR_ASSERT(def_reg >= 0 && op1_reg >= 0 && op2_reg >= 0); if (op1_insn->op == IR_SUB) { offset = -offset; } if (scale == 2) { | lea Ra(def_reg), aword [Ra(op2_reg)+Ra(op1_reg)*2+offset] } else if (scale == 4) { | lea Ra(def_reg), aword [Ra(op2_reg)+Ra(op1_reg)*4+offset] } else if (scale == 8) { | lea Ra(def_reg), aword [Ra(op2_reg)+Ra(op1_reg)*8+offset] } else { IR_ASSERT(0); } } break; case IR_LEA_B_SI: { ir_reg def_reg = ir_ref_reg(ctx, i); ir_insn *op2_insn = &ctx->ir_base[insn->op2]; ir_reg op1_reg = ir_ref_reg(ctx, insn->op1); ir_reg op2_reg = ir_ref_reg(ctx, op2_insn->op1); int32_t scale = ctx->ir_base[op2_insn->op2].val.i32; IR_ASSERT(def_reg >= 0 && op1_reg >= 0 && op2_reg >= 0); if (scale == 2) { | lea Ra(def_reg), aword [Ra(op1_reg)+Ra(op2_reg)*2] } else if (scale == 4) { | lea Ra(def_reg), aword [Ra(op1_reg)+Ra(op2_reg)*4] } else if (scale == 8) { | lea Ra(def_reg), aword [Ra(op1_reg)+Ra(op2_reg)*8] } else { IR_ASSERT(0); } } break; case IR_LEA_SI_B: { ir_reg def_reg = ir_ref_reg(ctx, i); ir_insn *op1_insn = &ctx->ir_base[insn->op1]; ir_reg op1_reg = ir_ref_reg(ctx, op1_insn->op1); int32_t scale = ctx->ir_base[op1_insn->op2].val.i32; ir_reg op2_reg = ir_ref_reg(ctx, insn->op2); IR_ASSERT(def_reg >= 0 && op1_reg >= 0 && op2_reg >= 0); if (scale == 2) { | lea Ra(def_reg), aword [Ra(op2_reg)+Ra(op1_reg)*2] } else if (scale == 4) { | lea Ra(def_reg), aword [Ra(op2_reg)+Ra(op1_reg)*4] } else if (scale == 8) { | lea Ra(def_reg), aword [Ra(op2_reg)+Ra(op1_reg)*8] } else { IR_ASSERT(0); } } break; case IR_INC: case IR_DEC: ir_emit_incdec(ctx, *rule, i, insn); break; case IR_MUL_2: ir_emit_mul2(ctx, i, insn); break; case IR_MUL_PWR2: ir_emit_mul_pwr2(ctx, i, insn); break; case IR_DIV_PWR2: ir_emit_div_pwr2(ctx, i, insn); break; case IR_MOD_PWR2: ir_emit_mod_pwr2(ctx, i, insn); break; case IR_SHIFT: ir_emit_shift(ctx, i, insn); break; case IR_SHIFT_CONST: ir_emit_shift_const(ctx, i, insn); break; case IR_OP_INT: ir_emit_op_int(ctx, i, insn); break; case IR_BOOL_NOT_INT: ir_emit_bool_not_int(ctx, i, insn); break; case IR_BINOP_INT: ir_emit_binop_int(ctx, i, insn); break; case IR_BINOP_SSE2: ir_emit_binop_sse2(ctx, i, insn); break; case IR_BINOP_AVX: ir_emit_binop_avx(ctx, i, insn); break; case IR_MUL_INT: case IR_DIV_INT: case IR_MOD_INT: ir_emit_mul_div_mod(ctx, i, insn); break; case IR_CMP_INT: ir_emit_cmp_int(ctx, i, insn); break; case IR_CMP_FP: ir_emit_cmp_fp(ctx, i, insn); break; case IR_COPY_INT: ir_emit_copy_int(ctx, i, insn); break; case IR_COPY_FP: ir_emit_copy_fp(ctx, i, insn); break; case IR_CMP_AND_BRANCH_INT: ir_emit_cmp_and_branch_int(ctx, b, i, insn); break; case IR_CMP_AND_BRANCH_FP: ir_emit_cmp_and_branch_fp(ctx, b, i, insn); break; case IR_IF_INT: ir_emit_if_int(ctx, b, i, insn); break; case IR_SWITCH: ir_emit_switch(ctx, i, insn); break; case IR_END: case IR_LOOP_END: if (bb->flags & IR_BB_DESSA_MOVES) { ir_gen_dessa_moves(ctx, b, ir_emit_dessa_move); } IR_ASSERT(bb->successors_count == 1); target = ir_skip_empty_blocks(ctx, ctx->cfg_edges[bb->successors]); if (target != ir_skip_empty_blocks(ctx, b + 1)) { | jmp =>target } break; case IR_RETURN_VOID: ir_emit_return_void(ctx); break; case IR_RETURN_INT: ir_emit_return_int(ctx, insn); break; case IR_RETURN_FP: ir_emit_return_fp(ctx, insn); break; case IR_CALL: ir_emit_call(ctx, i, insn); break; default: IR_ASSERT(0 && "NIY rule/insruction"); break; } n = ir_operands_count(ctx, insn); n = 1 + (n >> 2); // support for multi-word instructions like MERGE and PHI i += n; insn += n; rule += n; } } for (i = IR_UNUSED + 1, insn = ctx->ir_base - i; i < ctx->consts_count; i++, insn--) { if (insn->emit_const) { if (IR_IS_TYPE_FP(insn->type)) { int label = ctx->cfg_blocks_count + i; if (!data.rodata_label) { data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; |.rodata |=>data.rodata_label: } if (insn->type == IR_DOUBLE) { |.align 8 |=>label: |.dword insn->val.u32, insn->val.u32_hi } else if (insn->type == IR_FLOAT) { |.align 4 |=>label: |.dword insn->val.u32 } else { IR_ASSERT(0); } } else if (insn->op == IR_STR) { int label = ctx->cfg_blocks_count + i; const char *str = ir_get_str(ctx, insn->val.addr); int i = 0; if (!data.rodata_label) { data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; |.rodata |=>data.rodata_label: } |.align 8 |=>label: while (str[i]) { char c = str[i]; if (c == '\\') { if (str[i+1] == '\\') { i++; c = '\\'; } else if (str[i+1] == 'n') { i++; c = '\n'; } else if (str[i+1] == 'r') { i++; c = '\r'; } else if (str[i+1] == 't') { i++; c = '\t'; } } |.byte c i++; } |.byte 0 } else { IR_ASSERT(0); } } } if (data.rodata_label) { |.code } ret = dasm_link(&data.dasm_state, size); if (ret != DASM_S_OK) { return 0; } entry = ir_mem_mmap(4096); ir_mem_unprotect(entry, 4096); ret = dasm_encode(&data.dasm_state, entry); if (data.rodata_label) { int rodata = dasm_getpclabel(&data.dasm_state, data.rodata_label); *size = rodata; } else if (data.jmp_table_label) { int rodata = dasm_getpclabel(&data.dasm_state, data.jmp_table_label); *size = rodata; } if (ret != DASM_S_OK) { IR_ASSERT(0); ir_mem_unmap(entry, 4096); return NULL; } dasm_free(&data.dasm_state); ir_mem_flush(entry, 4096); ir_mem_protect(entry, 4096); return entry; }