From ac4ea33c242391e93f11dda036d9f7bf6c41f673 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Wed, 20 Dec 2023 21:06:53 +0300 Subject: [PATCH] Link with undefined functions through "thunks" --- ir.h | 3 ++- ir_aarch64.dasc | 24 +++++++++++++++++++++--- ir_emit.c | 5 +++-- ir_main.c | 23 ++++++++++++++++------- ir_x86.dasc | 33 +++++++++++++++++++++++++++++---- 5 files changed, 71 insertions(+), 17 deletions(-) diff --git a/ir.h b/ir.h index 6391b1b..537daeb 100644 --- a/ir.h +++ b/ir.h @@ -759,6 +759,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size); bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr); void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr); +void ir_fix_thunk(void *thunk_entry, void *addr); /* Target address resolution (implementation in ir_emit.c) */ void *ir_resolve_sym_name(const char *name); @@ -806,7 +807,7 @@ struct _ir_loader { bool (*sym_data_end) (ir_loader *loader); bool (*func_init) (ir_loader *loader, ir_ctx *ctx, const char *name); bool (*func_process) (ir_loader *loader, ir_ctx *ctx, const char *name); - void*(*resolve_sym_name) (ir_loader *loader, const char *name); + void*(*resolve_sym_name) (ir_loader *loader, const char *name, bool add_thunk); bool (*has_sym) (ir_loader *loader, const char *name); bool (*add_sym) (ir_loader *loader, const char *name, void *addr); }; diff --git a/ir_aarch64.dasc b/ir_aarch64.dasc index 700f857..5e4ce90 100644 --- a/ir_aarch64.dasc +++ b/ir_aarch64.dasc @@ -1176,9 +1176,10 @@ static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) ir_insn *insn = &ctx->ir_base[src]; if (insn->op == IR_SYM || insn->op == IR_FUNC) { + const char *name = ir_get_str(ctx, insn->val.name); void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ? - ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, insn->val.name)) : - ir_resolve_sym_name(ir_get_str(ctx, insn->val.name)); + ctx->loader->resolve_sym_name(ctx->loader, name, insn->op == IR_FUNC) : + ir_resolve_sym_name(name); IR_ASSERT(addr); ir_emit_load_imm_int(ctx, type, reg, (intptr_t)addr); } else if (insn->op == IR_STR) { @@ -6053,4 +6054,21 @@ void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr) ir_mem_flush(entry, size); return entry; -} \ No newline at end of file +} + +void ir_fix_thunk(void *thunk_entry, void *addr) +{ + uint32_t *code = thunk_entry; + IR_ASSERT((code[0] & 0xffe00000) == 0xd2800000 + && (code[1] & 0xffe00000) == 0xf2a00000 + && (code[2] & 0xffe00000) == 0xf2c00000 + && (code[3] & 0xffe00000) == 0xf2e00000 + && (code[4] & 0xfffffc1f) == 0xd61f0000); + + code[0] = (code[0] & 0xffe0001f) | (uint32_t)((uint64_t)(addr) & 0xffff) << 5; + code[1] = (code[1] & 0xffe0001f) | (uint32_t)(((uint64_t)(addr) >> 16) & 0xffff) << 5; + code[2] = (code[2] & 0xffe0001f) | (uint32_t)(((uint64_t)(addr) >> 32) & 0xffff) << 5; + code[3] = (code[3] & 0xffe0001f) | (uint32_t)(((uint64_t)(addr) >> 48) & 0xffff) << 5; + + ir_mem_flush(code, sizeof(uint32_t) * 4); +} diff --git a/ir_emit.c b/ir_emit.c index 1b6f8e0..caf2f54 100644 --- a/ir_emit.c +++ b/ir_emit.c @@ -327,9 +327,10 @@ static void *ir_call_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn) IR_ASSERT(addr_insn->type == IR_ADDR); if (addr_insn->op == IR_FUNC) { + const char* name = ir_get_str(ctx, addr_insn->val.name); addr = (ctx->loader && ctx->loader->resolve_sym_name) ? - ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, addr_insn->val.name)) : - ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.name)); + ctx->loader->resolve_sym_name(ctx->loader, name, 1) : + ir_resolve_sym_name(name); IR_ASSERT(addr); } else { IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR); diff --git a/ir_main.c b/ir_main.c index acfd9f6..f4fcd40 100644 --- a/ir_main.c +++ b/ir_main.c @@ -332,6 +332,13 @@ static bool ir_loader_add_sym(ir_loader *loader, const char *name, void *addr) l->sym[old_val].addr = addr; if (l->sym[old_val].thunk_addr) { // TODO: Fix thunk or relocation ??? + if (l->code_buffer.start) { + ir_mem_unprotect(l->code_buffer.start, (char*)l->code_buffer.end - (char*)l->code_buffer.start); + } + ir_fix_thunk(l->sym[old_val].thunk_addr, addr); + if (l->code_buffer.start) { + ir_mem_protect(l->code_buffer.start, (char*)l->code_buffer.end - (char*)l->code_buffer.start); + } } return 1; } @@ -354,7 +361,7 @@ static bool ir_loader_has_sym(ir_loader *loader, const char *name) return val != 0; } -static void* ir_loader_resolve_sym_name(ir_loader *loader, const char *name) +static void* ir_loader_resolve_sym_name(ir_loader *loader, const char *name, bool add_thunk) { ir_main_loader *l = (ir_main_loader*)loader; uint32_t len = (uint32_t)strlen(name); @@ -365,10 +372,12 @@ static void* ir_loader_resolve_sym_name(ir_loader *loader, const char *name) if (l->sym[val].addr) { return l->sym[val].addr; } - if (!l->sym[val].thunk_addr) { + if (!l->sym[val].thunk_addr && add_thunk) { /* Undefined declaration */ // TODO: Add thunk or relocation ??? - l->sym[val].thunk_addr = (void*)(intptr_t)sizeof(void*); + size_t size; + + l->sym[val].thunk_addr = ir_emit_thunk(&l->code_buffer, NULL, &size); } return l->sym[val].thunk_addr; } @@ -397,7 +406,7 @@ static bool ir_loader_external_sym_dcl(ir_loader *loader, const char *name, uint ir_emit_llvm_sym_decl(name, flags | IR_EXTERN, 0, l->llvm_file); } if (l->dump_asm || l->dump_size || l->run) { - void *addr = ir_loader_resolve_sym_name(loader, name); + void *addr = ir_loader_resolve_sym_name(loader, name, 0); if (!addr) { return 0; @@ -462,7 +471,7 @@ static bool ir_loader_external_func_dcl(ir_loader *loader, const char *name, uin ir_emit_llvm_func_decl(name, flags | IR_EXTERN, ret_type, params_count, param_types, l->llvm_file); } if (l->dump_asm || l->dump_size || l->run) { - void *addr = ir_loader_resolve_sym_name(loader, name); + void *addr = ir_loader_resolve_sym_name(loader, name, 0); if (!addr) { return 0; @@ -646,7 +655,7 @@ static bool ir_loader_sym_data_ref(ir_loader *loader, ir_op op, const char *ref, } if (l->dump_asm || l->dump_size || l->run) { void *data = (char*)l->data_start + l->data_pos; - void *addr = ir_loader_resolve_sym_name(loader, ref); + void *addr = ir_loader_resolve_sym_name(loader, ref, 0); if (!addr) { ir_loader_add_reloc(l, ref, data); @@ -801,7 +810,7 @@ static bool ir_loader_func_process(ir_loader *loader, ir_ctx *ctx, const char *n for (i = IR_UNUSED + 1, insn = ctx->ir_base - i; i < ctx->consts_count; i++, insn--) { if (insn->op == IR_FUNC) { const char *name = ir_get_str(ctx, insn->val.name); - void *addr = ir_loader_resolve_sym_name(loader, name); + void *addr = ir_loader_resolve_sym_name(loader, name, 0); IR_ASSERT(addr); ir_disasm_add_symbol(name, (uintptr_t)addr, sizeof(void*)); diff --git a/ir_x86.dasc b/ir_x86.dasc index 53bcd58..8e3dd55 100644 --- a/ir_x86.dasc +++ b/ir_x86.dasc @@ -2162,9 +2162,10 @@ static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) ir_insn *insn = &ctx->ir_base[src]; if (insn->op == IR_SYM || insn->op == IR_FUNC) { + const char *name = ir_get_str(ctx, insn->val.name); void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ? - ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, insn->val.name)) : - ir_resolve_sym_name(ir_get_str(ctx, insn->val.name)); + ctx->loader->resolve_sym_name(ctx->loader, name, insn->op == IR_FUNC) : + ir_resolve_sym_name(name); IR_ASSERT(addr); ir_emit_load_imm_int(ctx, type, reg, (intptr_t)addr); } else if (insn->op == IR_STR) { @@ -2250,9 +2251,10 @@ static void ir_emit_store_mem_int_const(ir_ctx *ctx, ir_type type, ir_reg base_r int64_t val = val_insn->val.i64; if (val_insn->op == IR_FUNC || val_insn->op == IR_SYM) { + const char *name = ir_get_str(ctx, val_insn->val.name); void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ? - ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, val_insn->val.name)) : - ir_resolve_sym_name(ir_get_str(ctx, val_insn->val.name)); + ctx->loader->resolve_sym_name(ctx->loader, name, val_insn->op == IR_FUNC) : + ir_resolve_sym_name(name); IR_ASSERT(addr); val = (int64_t)(intptr_t)addr; } @@ -9871,9 +9873,13 @@ void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr) dasm_setup(&dasm_state, dasm_actions); |.code + |.if X64 | jmp aword [>1] |1: | .aword &addr + |.else + | jmp &addr + |.endif ret = dasm_link(&dasm_state, &size); if (ret != DASM_S_OK) { @@ -9902,3 +9908,22 @@ void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr) return entry; } + +void ir_fix_thunk(void *thunk_entry, void *addr) +{ + unsigned char *code = thunk_entry; + void **addr_ptr; + + if (sizeof(void*) == 8) { + int32_t *offset_ptr; + + IR_ASSERT(code[0] == 0xff && code[1] == 0x25); + offset_ptr = (int32_t*)(code + 2); + addr_ptr = (void**)(code + 6 + *offset_ptr); + *addr_ptr = addr; + } else { + IR_ASSERT(code[0] == 0xe9); + addr_ptr = (void**)(code + 1); + *addr_ptr = (void*)((unsigned char*)addr - (code + 5)); + } +}