Codegeneration for VA_ARG nodes (Windows and MacOS are not supported yet)

This commit is contained in:
Dmitry Stogov 2023-11-23 19:38:33 +03:00
parent 1671b3de78
commit 25bd3024da
8 changed files with 725 additions and 11 deletions

3
ir.h
View File

@ -564,6 +564,9 @@ struct _ir_ctx {
int32_t fixed_stack_frame_size; /* fixed stack allocated by generated code for spills and registers save/restore */
int32_t fixed_call_stack_size; /* fixed preallocated stack for parameter passing (default 0) */
uint64_t fixed_save_regset; /* registers that always saved/restored in prologue/epilogue */
uint32_t locals_area_size;
uint32_t gp_reg_params;
uint32_t fp_reg_params;
ir_live_interval **live_intervals;
ir_arena *arena;
ir_live_range *unused_ranges;

View File

@ -524,6 +524,16 @@ int ir_get_target_constraints(const ir_ctx *ctx, ir_ref ref, ir_target_constrain
case IR_SNAPSHOT:
flags = 0;
break;
case IR_VA_START:
flags = IR_OP1_MUST_BE_IN_REG;
constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
n = 1;
break;
case IR_VA_ARG:
flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG;
constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF);
n = 1;
break;
}
constraints->tmps_count = n;
@ -875,6 +885,48 @@ binop_fp:
}
}
return insn->op;
case IR_VA_START:
ctx->flags2 |= IR_HAS_VA_START;
if (ctx->ir_base[insn->op2].op == IR_ALLOCA) {
ir_use_list *use_list = &ctx->use_lists[insn->op2];
ir_ref *p, n = use_list->count;
for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
ir_insn *use_insn = &ctx->ir_base[*p];
if (use_insn->op == IR_VA_START || use_insn->op == IR_VA_END) {
} else if (use_insn->op == IR_VA_COPY) {
if (use_insn->op3 == insn->op2) {
ctx->flags2 |= IR_HAS_VA_COPY;
}
} else if (use_insn->op == IR_VA_ARG) {
if (use_insn->op2 == insn->op2) {
if (IR_IS_TYPE_INT(use_insn->type)) {
ctx->flags2 |= IR_HAS_VA_ARG_GP;
} else {
IR_ASSERT(IR_IS_TYPE_FP(use_insn->type));
ctx->flags2 |= IR_HAS_VA_ARG_FP;
}
}
} else if (*p > ref) {
/* diriect va_list access */
ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP;
}
}
}
return IR_VA_START;
case IR_VA_END:
return IR_SKIPPED | IR_NOP;
case IR_VADDR:
if (ctx->use_lists[ref].count > 0) {
ir_use_list *use_list = &ctx->use_lists[ref];
ir_ref *p, n = use_list->count;
for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
if (ctx->ir_base[*p].op != IR_VA_END) {
return IR_VADDR;
}
}
}
return IR_SKIPPED | IR_NOP;
default:
break;
}
@ -1266,15 +1318,17 @@ static void ir_emit_prologue(ir_ctx *ctx)
}
}
if (ctx->used_preserved_regs) {
ir_reg fp;
int offset;
uint32_t i;
ir_reg prev = IR_REG_NONE;
ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
if (ctx->flags & IR_USE_FRAME_POINTER) {
fp = IR_REG_FRAME_POINTER;
offset = ctx->stack_frame_size + sizeof(void*) * 2;
} else {
fp = IR_REG_STACK_POINTER;
offset = ctx->stack_frame_size + ctx->call_stack_size;
}
for (i = 0; i < IR_REG_NUM; i++) {
@ -1309,6 +1363,53 @@ static void ir_emit_prologue(ir_ctx *ctx)
}
}
}
if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) {
#ifndef __APPLE__
const int8_t *int_reg_params = _ir_int_reg_params;
const int8_t *fp_reg_params = _ir_fp_reg_params;
ir_reg fp;
int offset;
int i;
if (ctx->flags & IR_USE_FRAME_POINTER) {
fp = IR_REG_FRAME_POINTER;
offset = ctx->locals_area_size + sizeof(void*) * 2;
} else {
fp = IR_REG_STACK_POINTER;
offset = ctx->locals_area_size + ctx->call_stack_size;
}
if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
ir_reg prev = IR_REG_NONE;
/* skip named args */
offset += sizeof(void*) * ctx->gp_reg_params;
for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) {
if (prev != IR_REG_NONE) {
| stp Rx(prev), Rx(int_reg_params[i]), [Rx(fp), #offset]
prev = IR_REG_NONE;
offset += sizeof(void*) * 2;
} else {
prev = int_reg_params[i];
}
}
if (prev != IR_REG_NONE) {
| str Rx(prev), [Rx(fp), #offset]
offset += sizeof(void*);
}
}
if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
/* skip named args */
offset += 16 * ctx->fp_reg_params;
for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) {
// TODO: Rd->Rq stur->str ???
| str Rd(fp_reg_params[i]-IR_REG_FP_FIRST), [Rx(fp), #offset]
offset += 16;
}
}
#endif
}
}
static void ir_emit_epilogue(ir_ctx *ctx)
@ -3543,14 +3644,185 @@ static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def)
static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
#ifndef __APPLE__
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg fp;
int reg_save_area_offset;
int overflow_arg_area_offset;
ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3];
IR_ASSERT(op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
if (IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
if (ctx->flags & IR_USE_FRAME_POINTER) {
fp = IR_REG_FRAME_POINTER;
reg_save_area_offset = ctx->locals_area_size + sizeof(void*) * 2;
overflow_arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2; // TODO: ???
} else {
fp = IR_REG_STACK_POINTER;
reg_save_area_offset = ctx->stack_frame_size + ctx->call_stack_size + 4; // TODO: ???
overflow_arg_area_offset = 0;
IR_ASSERT(0); // TODO: ???
}
/* Set va_list.stack */
| add Rx(tmp_reg), Rx(fp), #overflow_arg_area_offset
| str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)]
if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
reg_save_area_offset += sizeof(void*) * IR_REG_INT_ARGS;
/* Set va_list.gr_top */
if (overflow_arg_area_offset != reg_save_area_offset) {
| add Rx(tmp_reg), Rx(fp), #reg_save_area_offset
}
| str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, gr_top)]
/* Set va_list.gr_offset */
| movn Rw(tmp_reg), #~(0 - (sizeof(void*) * (IR_REG_INT_ARGS - ctx->gp_reg_params)))
| str Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, gr_offset)]
} else {
/* Set va_list.gr_offset */
| str wzr, [Rx(op2_reg), #offsetof(ir_va_list, gr_offset)]
}
if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
reg_save_area_offset += 16 * IR_REG_FP_ARGS;
/* Set va_list.vr_top */
if (overflow_arg_area_offset != reg_save_area_offset) {
| add Rx(tmp_reg), Rx(fp), #reg_save_area_offset
}
| str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, vr_top)]
/* Set va_list.vr_offset */
| movn Rw(tmp_reg), #~(0 - (16 * (IR_REG_FP_ARGS - ctx->fp_reg_params)))
| str Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)]
} else {
/* Set va_list.vr_offset */
| str wzr, [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)]
}
#else
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg fp;
int arg_area_offset;
ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3];
IR_ASSERT(tmp_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
if (ctx->flags & IR_USE_FRAME_POINTER) {
fp = IR_REG_FRAME_POINTER;
arg_area_offset = 8 + 4; // TODO: ???
} else {
fp = IR_REG_STACK_POINTER;
arg_area_offset = 0;
IR_ASSERT(0); // TODO: ???
}
| add Rx(tmp_reg), Rx(fp), #arg_area_offset
if (op2_reg != IR_REG_NONE) {
| str Rx(tmp_reg), [Rx(op2_reg)]
} else {
int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg);
| str Rx(tmp_reg), [Rx(op2_reg), #offset]
}
#endif
}
static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
IR_ASSERT(0 && "NIY va_copy");
}
static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
#ifndef __APPLE__
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_reg def_reg = ctx->regs[def][0];
ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3];
IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
if (IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
if (IR_IS_TYPE_INT(type)) {
| ldr Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, gr_offset)]
| cmp Rw(tmp_reg), wzr
| bge >1
| ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #offsetof(ir_va_list, gr_top)]
| sxtw Rx(tmp_reg), Rw(tmp_reg)
| add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP)
| ldr Rx(def_reg), [Rx(IR_REG_INT_TMP)]
| add Rw(tmp_reg), Rw(tmp_reg), #sizeof(void*)
| str Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, gr_offset)]
| b >2
|1:
| ldr Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)]
| ldr Rx(def_reg), [Rx(tmp_reg)]
| add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*)
| str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)]
|2:
} else {
| ldr Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)]
| cmp Rw(tmp_reg), wzr
| bge >1
| ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #offsetof(ir_va_list, vr_top)]
| sxtw Rx(tmp_reg), Rw(tmp_reg)
| add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP)
| ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(IR_REG_INT_TMP)]
| add Rw(tmp_reg), Rw(tmp_reg), #16
| str Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)]
| b >2
|1:
| ldr Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)]
| ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(tmp_reg)]
| add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*)
| str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)]
|2:
}
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, type, def, def_reg);
}
#else
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_reg def_reg = ctx->regs[def][0];
ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3];
IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
| ldr Rx(tmp_reg), [Rx(op2_reg)]
if (IR_IS_TYPE_INT(type)) {
ir_emit_load_mem_int(ctx, type, def_reg, tmp_reg, 0);
} else {
ir_emit_load_mem_fp(ctx, type, def_reg, tmp_reg, 0);
}
| add Rx(tmp_reg), Rx(tmp_reg), #IR_MAX(ir_type_size[type], sizeof(void*))
if (op2_reg != IR_REG_NONE) {
| str Rx(tmp_reg), [Rx(op2_reg)]
} else {
int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg);
| str Rx(tmp_reg), [Rx(op2_reg), #offset]
}
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, type, def, def_reg);
}
#endif
}
static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
@ -4759,7 +5031,6 @@ static void ir_fix_param_spills(ir_ctx *ctx)
&& ival->stack_spill_pos == -1
&& (ival->next || ival->reg == IR_REG_NONE)) {
ival->stack_spill_pos = stack_offset;
ctx->regs[use][0] = IR_REG_NONE;
}
}
if (sizeof(void*) == 8) {
@ -4973,10 +5244,41 @@ static void ir_preallocate_call_stack(ir_ctx *ctx)
}
}
static void ir_count_reg_params(ir_ctx *ctx)
{
ir_use_list *use_list = &ctx->use_lists[1];
ir_insn *insn;
ir_ref i, n, *p, use;
int gp_reg_params = 0;
int fp_reg_params = 0;
n = use_list->count;
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
use = *p;
insn = &ctx->ir_base[use];
if (insn->op == IR_PARAM) {
if (IR_IS_TYPE_INT(insn->type)) {
if (gp_reg_params <= IR_REG_INT_ARGS) {
gp_reg_params++;
}
} else {
if (fp_reg_params <= IR_REG_FP_ARGS) {
fp_reg_params++;
}
}
}
}
ctx->gp_reg_params = gp_reg_params;
ctx->fp_reg_params = fp_reg_params;
}
void ir_fix_stack_frame(ir_ctx *ctx)
{
uint32_t additional_size = 0;
ctx->locals_area_size = ctx->stack_frame_size;
ir_count_reg_params(ctx);
if (ctx->used_preserved_regs) {
ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
ir_reg reg;
@ -4987,6 +5289,15 @@ void ir_fix_stack_frame(ir_ctx *ctx)
} IR_REGSET_FOREACH_END();
}
if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) {
if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
additional_size += sizeof(void*) * IR_REG_INT_ARGS;
}
if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
additional_size += 16 * IR_REG_FP_ARGS;
}
}
ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*));
ctx->stack_frame_size += additional_size;
ctx->stack_frame_alignment = 0;
@ -5321,7 +5632,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
ir_emit_tls(ctx, i, insn);
break;
case IR_TRAP:
| brk;
| brk
break;
default:
IR_ASSERT(0 && "NIY rule/instruction");

View File

@ -152,6 +152,16 @@ enum _ir_reg {
(IR_REGSET_INTERVAL(IR_REG_X19, IR_REG_X30) \
| IR_REGSET_INTERVAL(IR_REG_V8, IR_REG_V15))
#ifndef __APPLE__
typedef struct _ir_va_list {
void *stack;
void *gr_top;
void *vr_top;
int32_t gr_offset;
int32_t vr_offset;
} ir_va_list;
#endif
typedef struct _ir_tmp_reg {
union {
uint8_t num;

View File

@ -883,6 +883,11 @@ IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn)
#define IR_HAS_CALLS (1<<3)
#define IR_OPT_IN_SCCP (1<<4)
#define IR_LINEAR (1<<5)
#define IR_HAS_VA_START (1<<6)
#define IR_HAS_VA_COPY (1<<7)
#define IR_HAS_VA_ARG_GP (1<<8)
#define IR_HAS_VA_ARG_FP (1<<9)
#define IR_HAS_FP_RET_SLOT (1<<10)
/* Temporary: SCCP -> CFG */
#define IR_SCCP_DONE (1<<25)

View File

@ -3598,7 +3598,9 @@ static int ir_linear_scan(ir_ctx *ctx)
}
#ifdef IR_TARGET_X86
if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) {
if (ctx->flags2 & IR_HAS_FP_RET_SLOT) {
ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data);
} else if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) {
ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data);
} else {
ctx->ret_slot = -1;
@ -3733,6 +3735,10 @@ static void assign_regs(ir_ctx *ctx)
if (ctx->ir_base[ref].op == IR_PHI) {
/* Spilled PHI var is passed through memory */
reg = IR_REG_NONE;
} else if (ctx->ir_base[ref].op == IR_PARAM
&& (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)) {
/* Stack PARAM var is passed through memory */
reg = IR_REG_NONE;
} else {
uint32_t use_b = ctx->cfg_map[ref];

View File

@ -877,6 +877,16 @@ op2_const:
case IR_SNAPSHOT:
flags = 0;
break;
case IR_VA_START:
flags = IR_OP1_MUST_BE_IN_REG;
constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
n = 1;
break;
case IR_VA_ARG:
flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG;
constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF);
n = 1;
break;
}
constraints->tmps_count = n;
@ -1424,6 +1434,11 @@ binop_fp:
break;
case IR_CALL:
ctx->flags2 |= IR_HAS_CALLS;
#ifndef IR_REG_FP_RET1
if (IR_IS_TYPE_FP(insn->type)) {
ctx->flags2 |= IR_HAS_FP_RET_SLOT;
}
#endif
IR_FALLTHROUGH;
case IR_TAILCALL:
if (ir_in_same_block(ctx, insn->op2)) {
@ -1921,6 +1936,48 @@ store_int:
case IR_CTPOP:
ir_match_fuse_load(ctx, insn->op1, ref);
return (ctx->mflags & IR_X86_BMI1) ? IR_BIT_COUNT : IR_CTPOP;
case IR_VA_START:
ctx->flags2 |= IR_HAS_VA_START;
if ((ctx->ir_base[insn->op2].op == IR_ALLOCA) || (ctx->ir_base[insn->op2].op == IR_VADDR)) {
ir_use_list *use_list = &ctx->use_lists[insn->op2];
ir_ref *p, n = use_list->count;
for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
ir_insn *use_insn = &ctx->ir_base[*p];
if (use_insn->op == IR_VA_START || use_insn->op == IR_VA_END) {
} else if (use_insn->op == IR_VA_COPY) {
if (use_insn->op3 == insn->op2) {
ctx->flags2 |= IR_HAS_VA_COPY;
}
} else if (use_insn->op == IR_VA_ARG) {
if (use_insn->op2 == insn->op2) {
if (IR_IS_TYPE_INT(use_insn->type)) {
ctx->flags2 |= IR_HAS_VA_ARG_GP;
} else {
IR_ASSERT(IR_IS_TYPE_FP(use_insn->type));
ctx->flags2 |= IR_HAS_VA_ARG_FP;
}
}
} else if (*p > ref) {
/* diriect va_list access */
ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP;
}
}
}
return IR_VA_START;
case IR_VA_END:
return IR_SKIPPED | IR_NOP;
case IR_VADDR:
if (ctx->use_lists[ref].count > 0) {
ir_use_list *use_list = &ctx->use_lists[ref];
ir_ref *p, n = use_list->count;
for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
if (ctx->ir_base[*p].op != IR_VA_END) {
return IR_VADDR;
}
}
}
return IR_SKIPPED | IR_NOP;
default:
break;
}
@ -2224,25 +2281,24 @@ static void ir_emit_prologue(ir_ctx *ctx)
}
}
if (ctx->used_preserved_regs) {
ir_reg fp;
int offset;
uint32_t i;
ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
if (ctx->flags & IR_USE_FRAME_POINTER) {
fp = IR_REG_FRAME_POINTER;
offset = 0;
} else {
fp = IR_REG_STACK_POINTER;
offset = ctx->stack_frame_size + ctx->call_stack_size;
}
for (i = 0; i < IR_REG_NUM; i++) {
if (IR_REGSET_IN(used_preserved_regs, i)) {
if (i < IR_REG_FP_FIRST) {
ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset -= sizeof(void*);
| mov aword [Ra(fp)+offset], Ra(i)
} else {
ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset -= sizeof(void*);
if (ctx->mflags & IR_X86_AVX) {
| vmovsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST)
@ -2253,6 +2309,46 @@ static void ir_emit_prologue(ir_ctx *ctx)
}
}
}
if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) {
#if defined(IR_TARGET_X64) && !defined(_WIN64)
|.if X64
const int8_t *int_reg_params = _ir_int_reg_params;
const int8_t *fp_reg_params = _ir_fp_reg_params;
ir_reg fp;
int offset;
uint32_t i;
if (ctx->flags & IR_USE_FRAME_POINTER) {
fp = IR_REG_FRAME_POINTER;
offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size);
} else {
fp = IR_REG_STACK_POINTER;
offset = ctx->locals_area_size + ctx->call_stack_size;
}
if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
/* skip named args */
offset += sizeof(void*) * ctx->gp_reg_params;
for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) {
| mov qword [Ra(fp)+offset], Rq(int_reg_params[i])
offset += sizeof(void*);
}
}
if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
| test al, al
| je >1
/* skip named args */
offset += 16 * ctx->fp_reg_params;
for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) {
| movaps [Ra(fp)+offset], xmm(fp_reg_params[i]-IR_REG_FP_FIRST)
offset += 16;
}
|1:
}
|.endif
#endif
}
}
static void ir_emit_epilogue(ir_ctx *ctx)
@ -6245,6 +6341,189 @@ static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def)
}
}
static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
#if defined(IR_TARGET_X64) && !defined(_WIN64)
|.if X64
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg fp;
int reg_save_area_offset;
int overflow_arg_area_offset;
ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3];
bool have_reg_save_area = 0;
IR_ASSERT(op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
if (IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
if (ctx->flags & IR_USE_FRAME_POINTER) {
fp = IR_REG_FRAME_POINTER;
reg_save_area_offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size);
overflow_arg_area_offset = 16;
} else {
fp = IR_REG_STACK_POINTER;
reg_save_area_offset = ctx->stack_frame_size + ctx->call_stack_size + 4; // TODO: ???
overflow_arg_area_offset = 0;
IR_ASSERT(0); // TODO: ???
}
if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
| lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset]
have_reg_save_area = 1;
/* Set va_list.gp_offset */
| mov dword [Ra(op2_reg)+offsetof(ir_va_list, gp_offset)], sizeof(void*) * ctx->gp_reg_params
} else {
reg_save_area_offset -= sizeof(void*) * IR_REG_INT_ARGS;
/* Set va_list.gp_offset */
| mov dword [Ra(op2_reg)+offsetof(ir_va_list, gp_offset)], sizeof(void*) * IR_REG_INT_ARGS
}
if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
if (!have_reg_save_area) {
| lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset]
have_reg_save_area = 1;
}
/* Set va_list.fp_offset */
| mov dword [Ra(op2_reg)+offsetof(ir_va_list, fp_offset)], sizeof(void*) * IR_REG_INT_ARGS + 16 * ctx->fp_reg_params
} else {
/* Set va_list.fp_offset */
| mov dword [Ra(op2_reg)+offsetof(ir_va_list, fp_offset)], sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS
}
if (have_reg_save_area) {
/* Set va_list.reg_save_area */
| mov qword [Ra(op2_reg)+offsetof(ir_va_list, reg_save_area)], Ra(tmp_reg)
}
| lea Ra(tmp_reg), aword [Ra(fp)+overflow_arg_area_offset]
/* Set va_list.overflow_arg_area */
| mov qword [Ra(op2_reg)+offsetof(ir_va_list, overflow_arg_area)], Ra(tmp_reg)
|.endif
#elif defined(IR_TARGET_X86)
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg fp;
int arg_area_offset;
ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3];
IR_ASSERT(tmp_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
if (ctx->flags & IR_USE_FRAME_POINTER) {
fp = IR_REG_FRAME_POINTER;
arg_area_offset = 8 + 4; // TODO: ???
} else {
fp = IR_REG_STACK_POINTER;
arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + 4; // TODO: ???
}
| lea Ra(tmp_reg), aword [Ra(fp)+arg_area_offset]
if (op2_reg != IR_REG_NONE) {
| mov aword [Ra(op2_reg)], Ra(tmp_reg)
} else {
int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg);
| mov aword [Ra(op2_reg)+offset], Ra(tmp_reg)
}
#else
IR_ASSERT(0 && "NIY va_start");
#endif
}
static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
IR_ASSERT(0 && "NIY va_copy");
}
static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
#if defined(IR_TARGET_X64) && !defined(_WIN64)
|.if X64
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_reg def_reg = ctx->regs[def][0];
ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3];
IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
if (IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
if (IR_IS_TYPE_INT(type)) {
| mov Rd(tmp_reg), dword [Ra(op2_reg)+offsetof(ir_va_list, gp_offset)]
| cmp Rd(tmp_reg), sizeof(void*)*IR_REG_INT_ARGS
| jge >1
| add Rd(tmp_reg), sizeof(void*)
| mov dword [Ra(op2_reg)+offsetof(ir_va_list, gp_offset)], Rd(tmp_reg)
| add Ra(tmp_reg), aword [Ra(op2_reg)+offsetof(ir_va_list, reg_save_area)]
| jmp >2
|1:
| mov Ra(tmp_reg), aword [Ra(op2_reg)+offsetof(ir_va_list, overflow_arg_area)]
| add Ra(tmp_reg), sizeof(void*)
| mov aword [Ra(op2_reg)+offsetof(ir_va_list, overflow_arg_area)], Ra(tmp_reg)
|2:
| mov Ra(def_reg), aword [Ra(tmp_reg)-sizeof(void*)]
} else {
| mov Rd(tmp_reg), dword [Ra(op2_reg)+offsetof(ir_va_list, fp_offset)]
| cmp Rd(tmp_reg), sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS
| jge >1
| add Rd(tmp_reg), 16
| mov dword [Ra(op2_reg)+offsetof(ir_va_list, fp_offset)], Rd(tmp_reg)
| add Ra(tmp_reg), aword [Ra(op2_reg)+offsetof(ir_va_list, reg_save_area)]
ir_emit_load_mem_fp(ctx, type, def_reg, tmp_reg, -16);
| jmp >2
|1:
| mov Ra(tmp_reg), aword [Ra(op2_reg)+offsetof(ir_va_list, overflow_arg_area)]
ir_emit_load_mem_fp(ctx, type, def_reg, tmp_reg, 0);
| add Ra(tmp_reg), 8
| mov aword [Ra(op2_reg)+offsetof(ir_va_list, overflow_arg_area)], Ra(tmp_reg)
|2:
}
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, type, def, def_reg);
}
|.endif
#elif defined(IR_TARGET_X86)
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_reg def_reg = ctx->regs[def][0];
ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3];
IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
| mov Ra(tmp_reg), aword [Ra(op2_reg)]
if (IR_IS_TYPE_INT(type)) {
ir_emit_load_mem_int(ctx, type, def_reg, tmp_reg, 0);
} else {
ir_emit_load_mem_fp(ctx, type, def_reg, tmp_reg, 0);
}
| add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*))
if (op2_reg != IR_REG_NONE) {
| mov aword [Ra(op2_reg)], Ra(tmp_reg)
} else {
int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg);
| mov aword [Ra(op2_reg)+offset], Ra(tmp_reg)
}
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, type, def, def_reg);
}
#else
IR_ASSERT(0 && "NIY va_arg");
#endif
}
static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
@ -7154,7 +7433,35 @@ static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn)
ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1);
}
#else
IR_ASSERT(0); // TODO: float/double return value
if (ctx->use_lists[def].count > 1) {
int32_t offset;
ir_reg fp;
if (def_reg == IR_REG_NONE) {
offset = ir_ref_spill_slot(ctx, def, &fp);
if (insn->type == IR_DOUBLE) {
| fstp qword [Ra(fp)+offset]
} else {
IR_ASSERT(insn->type == IR_FLOAT);
| fstp dword [Ra(fp)+offset]
}
} else {
offset = ctx->ret_slot;
IR_ASSERT(offset != -1);
offset = IR_SPILL_POS_TO_OFFSET(offset);
fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
if (insn->type == IR_DOUBLE) {
| fstp qword [Ra(fp)+offset]
} else {
IR_ASSERT(insn->type == IR_FLOAT);
| fstp dword [Ra(fp)+offset]
}
ir_emit_load_mem_fp(ctx, insn->type, def_reg, fp, offset);
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
}
}
#endif
}
}
@ -8258,7 +8565,6 @@ static void ir_fix_param_spills(ir_ctx *ctx)
&& ival->stack_spill_pos == -1
&& (ival->next || ival->reg == IR_REG_NONE)) {
ival->stack_spill_pos = stack_start + stack_offset;
ctx->regs[use][0] = IR_REG_NONE;
}
}
if (sizeof(void*) == 8) {
@ -8291,7 +8597,9 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
ir_reg reg;
#ifndef IR_REG_FP_RET1
if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) {
if (ctx->flags2 & IR_HAS_FP_RET_SLOT) {
ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data->ra_data);
} else if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) {
ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data->ra_data);
} else {
ctx->ret_slot = -1;
@ -8323,6 +8631,13 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
case IR_LOOP_BEGIN:
case IR_LOOP_END:
break;
#ifndef IR_REG_FP_RET1
case IR_CALL:
if (ctx->ret_slot == -1 && (insn->type == IR_FLOAT || insn->type == IR_DOUBLE)) {
ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data->ra_data);
}
#endif
IR_FALLTHROUGH;
default:
def_flags = ir_get_target_constraints(ctx, i, &constraints);
if (ctx->rules
@ -8491,10 +8806,45 @@ static void ir_preallocate_call_stack(ir_ctx *ctx)
}
}
#if defined(IR_TARGET_X64) && !defined(_WIN64)
static void ir_count_reg_params(ir_ctx *ctx)
{
ir_use_list *use_list = &ctx->use_lists[1];
ir_insn *insn;
ir_ref i, n, *p, use;
int gp_reg_params = 0;
int fp_reg_params = 0;
n = use_list->count;
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
use = *p;
insn = &ctx->ir_base[use];
if (insn->op == IR_PARAM) {
if (IR_IS_TYPE_INT(insn->type)) {
if (gp_reg_params <= IR_REG_INT_ARGS) {
gp_reg_params++;
}
} else {
if (fp_reg_params <= IR_REG_FP_ARGS) {
fp_reg_params++;
}
}
}
}
ctx->gp_reg_params = gp_reg_params;
ctx->fp_reg_params = fp_reg_params;
}
#endif
void ir_fix_stack_frame(ir_ctx *ctx)
{
uint32_t additional_size = 0;
ctx->locals_area_size = ctx->stack_frame_size;
#if defined(IR_TARGET_X64) && !defined(_WIN64)
ir_count_reg_params(ctx);
#endif
if (ctx->used_preserved_regs) {
ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
ir_reg reg;
@ -8505,6 +8855,17 @@ void ir_fix_stack_frame(ir_ctx *ctx)
} IR_REGSET_FOREACH_END();
}
#if defined(IR_TARGET_X64) && !defined(_WIN64)
if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) {
if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
additional_size += sizeof(void*) * IR_REG_INT_ARGS;
}
if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
additional_size += 16 * IR_REG_FP_ARGS;
}
}
#endif
ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*));
ctx->stack_frame_size += additional_size;
ctx->stack_frame_alignment = 0;
@ -8653,6 +9014,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
case IR_PI:
case IR_PHI:
case IR_SNAPSHOT:
case IR_VA_END:
break;
case IR_LEA_OB:
{
@ -9156,6 +9518,15 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
case IR_ALLOCA:
ir_emit_alloca(ctx, i, insn);
break;
case IR_VA_START:
ir_emit_va_start(ctx, i, insn);
break;
case IR_VA_COPY:
ir_emit_va_copy(ctx, i, insn);
break;
case IR_VA_ARG:
ir_emit_va_arg(ctx, i, insn);
break;
case IR_AFREE:
ir_emit_afree(ctx, i, insn);
break;

View File

@ -178,6 +178,13 @@ enum _ir_reg {
| IR_REGSET(IR_REG_RBP) \
| IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15))
typedef struct _ir_va_list {
uint32_t gp_offset;
uint32_t fp_offset;
void *overflow_arg_area;
void *reg_save_area;
} ir_va_list;
#elif defined(IR_TARGET_X86)
# define IR_REG_INT_RET1 IR_REG_RAX

View File

@ -75,6 +75,7 @@ test:
movl 0x1c(%esp), %ecx
movl 0x24(%esp), %ebx
movl 0x2c(%esp), %ebp
movl 0x20(%esp), %edx
imull %ecx, %edx
movl %edx, (%esp)
leal 4(%edx), %edx