diff --git a/.gitignore b/.gitignore index 939253c..05e8e59 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,6 @@ *.dot *.pdf ir_fold_hash.h -ir_load.c ir_x86.c ir_aarch64.c minilua diff --git a/Makefile b/Makefile index 9c54e0b..04f0442 100644 --- a/Makefile +++ b/Makefile @@ -1,84 +1,118 @@ +# TRAGET may be "x86_64" or "x86" or "aarch64" +TARGET = x86_64 +# BUILD can be "debug" or "release" +BUILD = debug +BUILD_DIR = . +SRC_DIR = . + CC = gcc -#CFLAGS = -O2 -g -Wall -DIR_TARGET_X64 -CFLAGS = -O0 -g -Wall -DIR_DEBUG -DIR_TARGET_X64 +BUILD_CC = gcc +CFLAGS = -Wall LDFLAGS = -lm PHP = php LLK = /home/dmitry/php/llk/llk.php -DASM_ARCH = x86 -DASM_FLAGS = -D X64=1 -all: ir ir_test +ifeq (debug, $(BUILD)) + CFLAGS += -O0 -g -DIR_DEBUG=1 +endif +ifeq (release, $(BUILD)) + CFLAGS += -O2 -g +endif -ir: ir_main.o ir.o ir_strtab.o ir_cfg.o ir_sccp.o ir_gcm.o ir_ra.o ir_$(DASM_ARCH).o \ - ir_load.o ir_save.o ir_emit_c.o ir_dump.o ir_disasm.o ir_gdb.o ir_perf.o ir_check.o +ifeq (x86_64, $(TARGET)) + CFLAGS += -DIR_TARGET_X64 + DASM_ARCH = x86 + DASM_FLAGS = -D X64=1 +endif +ifeq (x86, $(TARGET)) + CC= gcc -m32 + CFLAGS += -DIR_TARGET_X86 + DASM_ARCH = x86 + DASM_FLAGS = +endif +ifeq (aarch64, $(TARGET)) + CC= aarch64-linux-gnu-gcc --sysroot=/home/dmitry/php/ARM64 + CFLAGS += -DIR_TARGET_AARCH64 + DASM_ARCH = aarch64 + DASM_FLAGS = +endif + +all: $(BUILD_DIR)/ir $(BUILD_DIR)/ir_test + +$(BUILD_DIR)/ir: $(BUILD_DIR)/ir_main.o $(BUILD_DIR)/ir.o $(BUILD_DIR)/ir_strtab.o $(BUILD_DIR)/ir_cfg.o \ + $(BUILD_DIR)/ir_sccp.o $(BUILD_DIR)/ir_gcm.o $(BUILD_DIR)/ir_ra.o $(BUILD_DIR)/ir_$(DASM_ARCH).o \ + $(BUILD_DIR)/ir_load.o $(BUILD_DIR)/ir_save.o $(BUILD_DIR)/ir_emit_c.o $(BUILD_DIR)/ir_dump.o \ + $(BUILD_DIR)/ir_disasm.o $(BUILD_DIR)/ir_gdb.o $(BUILD_DIR)/ir_perf.o $(BUILD_DIR)/ir_check.o $(CC) $(CFLAGS) $(LDFLAGS) -o $@ -lcapstone $^ -ir_test: ir_test.o ir.o ir_strtab.o ir_cfg.o ir_sccp.o ir_gcm.o ir_ra.o ir_$(DASM_ARCH).o \ - ir_save.o ir_dump.o ir_disasm.o ir_gdb.o ir_perf.o ir_check.o +$(BUILD_DIR)/ir_test: $(BUILD_DIR)/ir_test.o $(BUILD_DIR)/ir.o $(BUILD_DIR)/ir_strtab.o $(BUILD_DIR)/ir_cfg.o \ + $(BUILD_DIR)/ir_sccp.o $(BUILD_DIR)/ir_gcm.o $(BUILD_DIR)/ir_ra.o $(BUILD_DIR)/ir_$(DASM_ARCH).o \ + $(BUILD_DIR)/ir_save.o $(BUILD_DIR)/ir_dump.o $(BUILD_DIR)/ir_disasm.o $(BUILD_DIR)/ir_gdb.o \ + $(BUILD_DIR)/ir_perf.o $(BUILD_DIR)/ir_check.o $(CC) $(CFLAGS) $(LDFLAGS) -o $@ -lcapstone $^ -ir.o: ir.c ir.h ir_private.h ir_fold.h ir_fold_hash.h +$(BUILD_DIR)/ir.o: $(SRC_DIR)/ir.c $(SRC_DIR)/ir.h $(SRC_DIR)/ir_private.h $(SRC_DIR)/ir_fold.h \ + $(BUILD_DIR)/ir_fold_hash.h + $(CC) $(CFLAGS) -I$(BUILD_DIR) -o $@ -c $< +$(BUILD_DIR)/ir_cfg.o: $(SRC_DIR)/ir_cfg.c $(SRC_DIR)/ir.h $(SRC_DIR)/ir_private.h $(CC) $(CFLAGS) -o $@ -c $< -ir_cfg.o: ir_cfg.c ir.h ir_private.h +$(BUILD_DIR)/ir_sccp.o: $(SRC_DIR)/ir_sccp.c $(SRC_DIR)/ir.h $(SRC_DIR)/ir_private.h $(CC) $(CFLAGS) -o $@ -c $< -ir_sccp.o: ir_sccp.c ir.h ir_private.h +$(BUILD_DIR)/ir_gcm.o: $(SRC_DIR)/ir_gcm.c $(SRC_DIR)/ir.h $(SRC_DIR)/ir_private.h $(CC) $(CFLAGS) -o $@ -c $< -ir_gcm.o: ir_gcm.c ir.h ir_private.h +$(BUILD_DIR)/ir_ra.o: $(SRC_DIR)/ir_ra.c $(SRC_DIR)/ir.h $(SRC_DIR)/ir_private.h $(SRC_DIR)/ir_$(DASM_ARCH).h $(CC) $(CFLAGS) -o $@ -c $< -ir_ra.o: ir_ra.c ir.h ir_private.h ir_$(DASM_ARCH).h +$(BUILD_DIR)/ir_strtab.o: $(SRC_DIR)/ir_strtab.c $(SRC_DIR)/ir.h $(CC) $(CFLAGS) -o $@ -c $< -ir_strtab.o: ir_strtab.c ir.h +$(BUILD_DIR)/ir_save.o: $(SRC_DIR)/ir_save.c $(SRC_DIR)/ir.h $(CC) $(CFLAGS) -o $@ -c $< -ir_save.o: ir_save.c ir.h +$(BUILD_DIR)/ir_load.o: $(SRC_DIR)/ir_load.c $(SRC_DIR)/ir.h $(CC) $(CFLAGS) -o $@ -c $< -ir_load.o: ir_load.c ir.h +$(BUILD_DIR)/ir_emit_c.o: $(SRC_DIR)/ir_emit_c.c $(SRC_DIR)/ir.h $(CC) $(CFLAGS) -o $@ -c $< -ir_emit_c.o: ir_emit_c.c ir.h +$(BUILD_DIR)/ir_dump.o: $(SRC_DIR)/ir_dump.c $(SRC_DIR)/ir.h $(CC) $(CFLAGS) -o $@ -c $< -ir_dump.o: ir_dump.c ir.h +$(BUILD_DIR)/ir_disasm.o: $(SRC_DIR)/ir_disasm.c $(SRC_DIR)/ir.h $(SRC_DIR)/ir_private.h $(CC) $(CFLAGS) -o $@ -c $< -ir_disasm.o: ir_disasm.c ir.h ir_private.h +$(BUILD_DIR)/ir_gdb.o: $(SRC_DIR)/ir_gdb.c $(SRC_DIR)/ir.h $(CC) $(CFLAGS) -o $@ -c $< -ir_gdb.o: ir_gdb.c ir.h +$(BUILD_DIR)/ir_perf.o: $(SRC_DIR)/ir_perf.c $(SRC_DIR)/ir.h $(CC) $(CFLAGS) -o $@ -c $< -ir_perf.o: ir_perf.c ir.h +$(BUILD_DIR)/ir_check.o: $(SRC_DIR)/ir_check.c $(SRC_DIR)/ir.h $(CC) $(CFLAGS) -o $@ -c $< -ir_check.o: ir_check.c ir.h +$(BUILD_DIR)/ir_main.o: $(SRC_DIR)/ir_main.c $(SRC_DIR)/ir.h $(CC) $(CFLAGS) -o $@ -c $< -ir_main.o: ir_main.c ir.h - $(CC) $(CFLAGS) -o $@ -c $< -ir_test.o: ir_test.c ir.h +$(BUILD_DIR)/ir_test.o: $(SRC_DIR)/ir_test.c $(SRC_DIR)/ir.h $(CC) $(CFLAGS) -o $@ -c $< -ir_load.c: ir.g +$(SRC_DIR)/ir_load.c: $(SRC_DIR)/ir.g $(PHP) $(LLK) ir.g -ir_fold_hash.h: gen_ir_fold_hash ir_fold.h ir.h - ./gen_ir_fold_hash < ir_fold.h > ir_fold_hash.h -gen_ir_fold_hash: gen_ir_fold_hash.c ir_strtab.c - $(CC) $(CFLAGS) $(LDFALGS) -o $@ $^ +$(BUILD_DIR)/ir_fold_hash.h: $(BUILD_DIR)/gen_ir_fold_hash $(SRC_DIR)/ir_fold.h $(SRC_DIR)/ir.h + $(BUILD_DIR)/gen_ir_fold_hash < $(SRC_DIR)/ir_fold.h > $(BUILD_DIR)/ir_fold_hash.h +$(BUILD_DIR)/gen_ir_fold_hash: $(SRC_DIR)/gen_ir_fold_hash.c $(SRC_DIR)/ir_strtab.c + $(BUILD_CC) $(CFLAGS) $(LDFALGS) -o $@ $^ -minilua: dynasm/minilua.c - $(CC) dynasm/minilua.c -lm -o $@ -ir_$(DASM_ARCH).c: ir_$(DASM_ARCH).dasc minilua dynasm/*.lua - ./minilua dynasm/dynasm.lua $(DASM_FLAGS) -o $@ ir_$(DASM_ARCH).dasc -ir_$(DASM_ARCH).o: ir_$(DASM_ARCH).c ir.h ir_private.h ir_$(DASM_ARCH).h - $(CC) $(CFLAGS) -o $@ -c $< +$(BUILD_DIR)/minilua: $(SRC_DIR)/dynasm/minilua.c + $(BUILD_CC) $(SRC_DIR)/dynasm/minilua.c -lm -o $@ +$(BUILD_DIR)/ir_$(DASM_ARCH).c: $(SRC_DIR)/ir_$(DASM_ARCH).dasc $(SRC_DIR)/dynasm/*.lua $(BUILD_DIR)/minilua + $(BUILD_DIR)/minilua $(SRC_DIR)/dynasm/dynasm.lua $(DASM_FLAGS) -o $@ $(SRC_DIR)/ir_$(DASM_ARCH).dasc +$(BUILD_DIR)/ir_$(DASM_ARCH).o: $(BUILD_DIR)/ir_$(DASM_ARCH).c $(SRC_DIR)/ir.h $(SRC_DIR)/ir_private.h \ + $(SRC_DIR)/ir_$(DASM_ARCH).h + $(CC) $(CFLAGS) -I$(SRC_DIR) -o $@ -c $< -test: ir - ./ir test.ir --dump --save 2>2.log - ./ir test.ir --dot ir.dot - dot -Tpdf ir.dot -o ir.pdf - php ir-test.php +test: $(BUILD_DIR)/ir + $(BUILD_DIR)/ir $(SRC_DIR)/test.ir --dump --save 2>$(BUILD_DIR)/test.log + $(BUILD_DIR)/ir $(SRC_DIR)/test.ir --dot $(BUILD_DIR)/ir.dot + dot -Tpdf $(BUILD_DIR)/ir.dot -o $(BUILD_DIR)/ir.pdf + BUILD_DIR=$(BUILD_DIR) SRC_DIR=$(SRC_DIR) $(PHP) $(SRC_DIR)/ir-test.php clean: - rm -rf ir ir_test *.o \ - ir_load.c \ - minilua ir_$(DASM_ARCH).c \ - ir_fold_hash.h gen_ir_fold_hash \ - ir.dot ir.pdf 2.log \ - b perf.data perf.data.old perf.data.jitted \ - tests/*.diff tests/*.out tests/*.exp tests/*.ir \ - tests/x86_64/*.diff tests/x86_64/*.out tests/x86_64/*.exp tests/x86_64/*.ir \ - tests/c/*.diff tests/c/*.out tests/c/*.exp tests/c/*.ir \ - tests/debug/*.diff tests/debug/*.out tests/debug/*.exp tests/debug/*.ir + rm -rf $(BUILD_DIR)/ir $(BUILD_DIR)/ir_test $(BUILD_DIR)/*.o \ + $(BUILD_DIR)/minilua $(BUILD_DIR)/ir_$(DASM_ARCH).c \ + $(BUILD_DIR)/ir_fold_hash.h $(BUILD_DIR)/gen_ir_fold_hash \ + $(BUILD_DIR)/ir.dot $(BUILD_DIR)/ir.pdf $(BUILD_DIR)/test.log + find $(SRC_DIR)/tests -type f -name '*.diff' -delete + find $(SRC_DIR)/tests -type f -name '*.out' -delete + find $(SRC_DIR)/tests -type f -name '*.exp' -delete + find $(SRC_DIR)/tests -type f -name '*.ir' -delete diff --git a/ir-test.php b/ir-test.php index 73a3a42..6569215 100644 --- a/ir-test.php +++ b/ir-test.php @@ -48,7 +48,7 @@ function parse_test($test, &$name, &$code, &$expect, &$args, &$target) { return true; } -function run_test($test, $name, $code, $expect, $args) { +function run_test($build_dir, $test, $name, $code, $expect, $args) { $base = substr($test, 0, -4); $input = $base . ".ir"; $output = $base . ".out"; @@ -59,8 +59,8 @@ function run_test($test, $name, $code, $expect, $args) { if (!@file_put_contents($input, $code)) { return false; } - @system("./ir $input $args >$output 2>&1"); -// if (@system("./ir $input $args 2>&1 >$output") != 0) { + @system("$build_dir/ir $input $args >$output 2>&1"); +// if (@system("$build_dir/ir $input $args 2>&1 >$output") != 0) { // return false; // } $out = @file_get_contents($output); @@ -107,9 +107,11 @@ function find_tests($dir) { } function run_tests() { + $build_dir = getenv("BUILD_DIR") ?? "."; + $src_dir = getenv("SRC_DIR") ?? "."; $skiped = 0; - $target = @system("./ir --target"); - $tests = find_tests("tests"); + $target = @system("$build_dir/ir --target"); + $tests = find_tests("$src_dir/tests"); $bad = array(); $failed = array(); $total = count($tests); @@ -126,7 +128,7 @@ function run_tests() { $len = strlen($str); echo $str; flush(); - $ret = run_test($test, $name, $code, $expect, $opt); + $ret = run_test($build_dir, $test, $name, $code, $expect, $opt); echo str_repeat(" ", $len); if ($ret) { echo "\r\e[1;32mPASS\e[0m: $name [$test]\n"; diff --git a/ir_aarch64.dasc b/ir_aarch64.dasc index 57dd8fe..ef44b6a 100644 --- a/ir_aarch64.dasc +++ b/ir_aarch64.dasc @@ -2,14 +2,6 @@ #include "ir_aarch64.h" #include "ir_private.h" -#ifdef _WIN32 -# define IR_SET_ALIGNED(alignment, decl) __declspec(align(alignment)) decl -#elif defined(HAVE_ATTRIBUTE_ALIGNED) -# define IR_SET_ALIGNED(alignment, decl) decl __attribute__ ((__aligned__ (alignment))) -#else -# define IR_SET_ALIGNED(alignment, decl) decl -#endif - #define DASM_M_GROW(ctx, t, p, sz, need) \ do { \ size_t _sz = (sz), _need = (need); \ diff --git a/ir_load.c b/ir_load.c new file mode 100644 index 0000000..1d0407a --- /dev/null +++ b/ir_load.c @@ -0,0 +1,1053 @@ +/* This file is generated from "ir.g". Do not edit! */ + +#include "ir.h" +#include "ir_private.h" + +#include +#include +#include + +const unsigned char *yy_buf; +const unsigned char *yy_end; +const unsigned char *yy_pos; +const unsigned char *yy_text; +uint32_t yy_line; + +typedef struct _ir_parser_ctx { + ir_ctx *ctx; + uint32_t undef_count; + ir_strtab var_tab; +} ir_parser_ctx; + +static ir_strtab type_tab; +static ir_strtab op_tab; + +#define IR_IS_UNRESOLVED(ref) \ + ((ref) < (ir_ref)0xc0000000) +#define IR_ENCODE_UNRESOLVED_REF(ref, op) \ + ((ir_ref)0xc0000000 - ((ref) * sizeof(ir_ref) + (op))) +#define IR_DECODE_UNRESOLVED_REF(ref) \ + ((ir_ref)0xc0000000 - (ref)) + +static ir_ref ir_use_var(ir_parser_ctx *p, uint32_t n, const char *str, size_t len) { + ir_ref ref = ir_strtab_find(&p->var_tab, str, len); + if (!ref) { + p->undef_count++; + /* create a linked list of unresolved references with header in "var_tab" */ + ref = IR_UNUSED; /* list terminator */ + ir_strtab_lookup(&p->var_tab, str, len, IR_ENCODE_UNRESOLVED_REF(p->ctx->insns_count, n)); + } else if (IR_IS_UNRESOLVED(ref)) { + /* keep the linked list of unresolved references with header in "var_tab" */ + /* "ref" keeps the tail of the list */ + ir_strtab_update(&p->var_tab, str, len, IR_ENCODE_UNRESOLVED_REF(p->ctx->insns_count, n)); + } + return ref; +} + +static void ir_define_var(ir_parser_ctx *p, const char *str, size_t len, ir_ref ref) { + ir_ref old_ref = ir_strtab_lookup(&p->var_tab, str, len, ref); + if (ref != old_ref) { + if (IR_IS_UNRESOLVED(old_ref)) { + p->undef_count--; + /* update the linked list of unresolved references */ + do { + ir_ref *ptr = ((ir_ref*)(p->ctx->ir_base)) + IR_DECODE_UNRESOLVED_REF(old_ref); + old_ref = *ptr; + *ptr = ref; + } while (old_ref != IR_UNUSED); + ir_strtab_update(&p->var_tab, str, len, ref); + } else { + fprintf(stderr, "ERROR: Redefined variable `%*s` on line %d\n", (int)len, str, yy_line); + exit(2); + } + } +} + +static void report_undefined_var(const char *str, uint32_t len, ir_ref val) +{ + if (IR_IS_UNRESOLVED(val)) { + fprintf(stderr, "ERROR: Undefined variable `%*s`\n", (int)len, str); + } +} + +void ir_check_indefined_vars(ir_parser_ctx *p) +{ + ir_strtab_apply(&p->var_tab, report_undefined_var); + exit(2); +} + +/* forward declarations */ +static void yy_error(const char *msg); +static void yy_error_sym(const char *msg, int sym); + +#define YYPOS cpos +#define YYEND cend + +#define YY_EOF 0 +#define YY__LBRACE 1 +#define YY__SEMICOLON 2 +#define YY__RBRACE 3 +#define YY_FUNC 4 +#define YY__LPAREN 5 +#define YY__COMMA 6 +#define YY__RPAREN 7 +#define YY__COLON 8 +#define YY__EQUAL 9 +#define YY_FUNC_ADDR 10 +#define YY__SLASH 11 +#define YY_NULL 12 +#define YY_ID 13 +#define YY_DECNUMBER 14 +#define YY_HEXNUMBER 15 +#define YY_FLOATNUMBER 16 +#define YY_CHARACTER 17 +#define YY_STRING 18 +#define YY_EOL 19 +#define YY_WS 20 +#define YY_ONE_LINE_COMMENT 21 +#define YY_COMMENT 22 + +static const char * sym_name[] = { + "", + "{", + ";", + "}", + "func", + "(", + ",", + ")", + ":", + "=", + "func_addr", + "/", + "null", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + NULL +}; + +#define YY_IN_SET(sym, set, bitset) \ + (bitset[sym>>3] & (1 << (sym & 0x7))) + +static int skip_EOL(int sym); +static int skip_WS(int sym); +static int skip_ONE_LINE_COMMENT(int sym); +static int skip_COMMENT(int sym); +static int get_sym(void); +static int parse_ir(int sym); +static int parse_ir_func(int sym, ir_parser_ctx *p); +static int parse_ir_func_prototype(int sym, ir_parser_ctx *p); +static int parse_ir_insn(int sym, ir_parser_ctx *p); +static int parse_type(int sym, uint8_t *t); +static int parse_func(int sym, uint8_t *op); +static int parse_val(int sym, ir_parser_ctx *p, uint8_t op, uint32_t n, ir_ref *ref); +static int parse_const(int sym, uint8_t t, ir_val *val); +static int parse_ID(int sym, const char **str, size_t *len); +static int parse_DECNUMBER(int sym, uint32_t t, ir_val *val); +static int parse_HEXNUMBER(int sym, uint32_t t, ir_val *val); +static int parse_FLOATNUMBER(int sym, uint32_t t, ir_val *val); +static int parse_CHARACTER(int sym, ir_val *val); +static int parse_STRING(int sym, const char **str, size_t *len); + +static int get_skip_sym(void) { + int ch; + int ret; + int accept = -1; + const unsigned char *accept_pos; + const unsigned char *cpos = yy_pos; + const unsigned char *cend = yy_end; + +_yy_state_start: + yy_text = YYPOS; + ch = *YYPOS; + switch (ch) { + case 'f': + ch = *++YYPOS; + if (ch != 'u') goto _yy_tunnel_2; + ch = *++YYPOS; + if (ch != 'n') goto _yy_tunnel_2; + ch = *++YYPOS; + if (ch != 'c') goto _yy_tunnel_2; + ch = *++YYPOS; + if (ch != '_') {ret = YY_FUNC; goto _yy_tunnel_68;} + ch = *++YYPOS; + if (ch != 'a') goto _yy_tunnel_2; + ch = *++YYPOS; + if (ch != 'd') goto _yy_tunnel_2; + ch = *++YYPOS; + if (ch != 'd') goto _yy_tunnel_2; + ch = *++YYPOS; + if (ch != 'r') goto _yy_tunnel_2; + ret = YY_FUNC_ADDR; + goto _yy_state_68; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case '_': + goto _yy_state_2; + case 'n': + ch = *++YYPOS; + if (ch != 'u') goto _yy_tunnel_2; + ch = *++YYPOS; + if (ch != 'l') goto _yy_tunnel_2; + ch = *++YYPOS; + if (ch != 'l') goto _yy_tunnel_2; + ret = YY_NULL; + goto _yy_state_68; + case '(': + YYPOS++; + ret = YY__LPAREN; + goto _yy_fin; + case ',': + YYPOS++; + ret = YY__COMMA; + goto _yy_fin; + case ')': + YYPOS++; + ret = YY__RPAREN; + goto _yy_fin; + case ':': + YYPOS++; + ret = YY__COLON; + goto _yy_fin; + case '{': + YYPOS++; + ret = YY__LBRACE; + goto _yy_fin; + case '=': + YYPOS++; + ret = YY__EQUAL; + goto _yy_fin; + case '-': + ch = *++YYPOS; + if ((ch >= '0' && ch <= '9')) { + goto _yy_state_12; + } else if (ch == '.') { + goto _yy_state_13; + } else { + goto _yy_state_error; + } + case '0': + ch = *++YYPOS; + if (ch != 'x') goto _yy_tunnel_12; + ch = *++YYPOS; + if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f')) { + goto _yy_state_45; + } else { + goto _yy_state_error; + } + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + goto _yy_state_12; + case '.': + goto _yy_state_13; + case '\'': + goto _yy_state_14; + case '"': + goto _yy_state_15; + case '/': + ch = *++YYPOS; + accept = YY__SLASH; + accept_pos = yy_pos; + if (ch == '*') { + goto _yy_state_38; + } else if (ch == '/') { + goto _yy_state_22; + } else { + ret = YY__SLASH; + goto _yy_fin; + } + case ';': + YYPOS++; + ret = YY__SEMICOLON; + goto _yy_fin; + case '}': + YYPOS++; + ret = YY__RBRACE; + goto _yy_fin; + case '\r': + ch = *++YYPOS; + if (ch == '\n') { + yy_line++; + YYPOS++; + ret = YY_EOL; + goto _yy_fin; + } else { + ret = YY_EOL; + goto _yy_fin; + } + case '\n': + yy_line++; + YYPOS++; + ret = YY_EOL; + goto _yy_fin; + case ' ': + case '\t': + case '\f': + case '\v': + goto _yy_state_21; + case '#': + goto _yy_state_22; + case '\0': + if (ch == 0 && YYPOS < YYEND) goto _yy_state_error; + YYPOS++; + ret = YY_EOF; + goto _yy_fin; + default: + goto _yy_state_error; + } +_yy_state_2: + ch = *++YYPOS; +_yy_tunnel_2: + if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || ch == '_' || (ch >= 'a' && ch <= 'z')) { + goto _yy_state_2; + } else { + ret = YY_ID; + goto _yy_fin; + } +_yy_state_12: + ch = *++YYPOS; +_yy_tunnel_12: + accept = YY_DECNUMBER; + accept_pos = yy_pos; + if ((ch >= '0' && ch <= '9')) { + goto _yy_state_12; + } else if (ch == '.') { + goto _yy_state_28; + } else if (ch == 'E' || ch == 'e') { + goto _yy_state_29; + } else { + ret = YY_DECNUMBER; + goto _yy_fin; + } +_yy_state_13: + ch = *++YYPOS; + if ((ch >= '0' && ch <= '9')) { + goto _yy_state_28; + } else { + goto _yy_state_error; + } +_yy_state_14: + ch = *++YYPOS; + if (ch == '\\') { + ch = *++YYPOS; + if (YYPOS < YYEND) { + if (ch == '\n') { + yy_line++; + } + goto _yy_state_14; + } else { + goto _yy_state_error; + } + } else if (ch == '\'') { + YYPOS++; + ret = YY_CHARACTER; + goto _yy_fin; + } else if (YYPOS < YYEND && (ch <= '&' || (ch >= '(' && ch <= '[') || ch >= ']')) { + if (ch == '\n') { + yy_line++; + } + goto _yy_state_14; + } else { + goto _yy_state_error; + } +_yy_state_15: + ch = *++YYPOS; + if (ch == '\\') { + ch = *++YYPOS; + if (YYPOS < YYEND) { + if (ch == '\n') { + yy_line++; + } + goto _yy_state_15; + } else { + goto _yy_state_error; + } + } else if (ch == '"') { + YYPOS++; + ret = YY_STRING; + goto _yy_fin; + } else if (YYPOS < YYEND && (ch <= '!' || (ch >= '#' && ch <= '[') || ch >= ']')) { + if (ch == '\n') { + yy_line++; + } + goto _yy_state_15; + } else { + goto _yy_state_error; + } +_yy_state_21: + ch = *++YYPOS; + if (ch == '\t' || ch == '\v' || ch == '\f' || ch == ' ') { + goto _yy_state_21; + } else { + ret = YY_WS; + goto _yy_fin; + } +_yy_state_22: + ch = *++YYPOS; + if (ch == '\r') { + ch = *++YYPOS; + if (ch == '\n') { + yy_line++; + YYPOS++; + ret = YY_ONE_LINE_COMMENT; + goto _yy_fin; + } else { + ret = YY_ONE_LINE_COMMENT; + goto _yy_fin; + } + } else if (ch == '\n') { + yy_line++; + YYPOS++; + ret = YY_ONE_LINE_COMMENT; + goto _yy_fin; + } else if (YYPOS < YYEND && (ch <= '\t' || ch == '\v' || ch == '\f' || ch >= '\016')) { + goto _yy_state_22; + } else { + goto _yy_state_error; + } +_yy_state_28: + ch = *++YYPOS; + accept = YY_FLOATNUMBER; + accept_pos = yy_pos; + if (ch == 'E' || ch == 'e') { + goto _yy_state_29; + } else if ((ch >= '0' && ch <= '9')) { + goto _yy_state_28; + } else { + ret = YY_FLOATNUMBER; + goto _yy_fin; + } +_yy_state_29: + ch = *++YYPOS; + if (ch == '+' || ch == '-') { + ch = *++YYPOS; + if ((ch >= '0' && ch <= '9')) { + goto _yy_state_48; + } else { + goto _yy_state_error; + } + } else if ((ch >= '0' && ch <= '9')) { + goto _yy_state_48; + } else { + goto _yy_state_error; + } +_yy_state_38: + ch = *++YYPOS; +_yy_tunnel_38: + if (ch == '*') { + ch = *++YYPOS; + if (ch != '/') goto _yy_tunnel_38; + YYPOS++; + ret = YY_COMMENT; + goto _yy_fin; + } else if (YYPOS < YYEND && (ch <= ')' || ch >= '+')) { + if (ch == '\n') { + yy_line++; + } + goto _yy_state_38; + } else { + goto _yy_state_error; + } +_yy_state_45: + ch = *++YYPOS; + if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f')) { + goto _yy_state_45; + } else { + ret = YY_HEXNUMBER; + goto _yy_fin; + } +_yy_state_48: + ch = *++YYPOS; + if ((ch >= '0' && ch <= '9')) { + goto _yy_state_48; + } else { + ret = YY_FLOATNUMBER; + goto _yy_fin; + } +_yy_state_68: + ch = *++YYPOS; +_yy_tunnel_68: + if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || ch == '_' || (ch >= 'a' && ch <= 'z')) { + goto _yy_state_2; + } else { + goto _yy_fin; + } +_yy_state_error: + if (accept >= 0) { + yy_pos = accept_pos; + return accept; + } + if (YYPOS >= YYEND) { + yy_error("unexpected "); + } else if (YYPOS == yy_text) { + yy_error("unexpected character 'escape_char(ch)'"); + } else { + yy_error("unexpected sequence 'escape_string(yy_text, 1 + YYPOS - yy_text))'"); + } + YYPOS++; + goto _yy_state_start; +_yy_fin: + yy_pos = YYPOS; + return ret; +} + +static int skip_EOL(int sym) { + if (sym != YY_EOL) { + yy_error_sym(" expected, got", sym); + } + sym = get_skip_sym(); + return sym; +} + +static int skip_WS(int sym) { + if (sym != YY_WS) { + yy_error_sym(" expected, got", sym); + } + sym = get_skip_sym(); + return sym; +} + +static int skip_ONE_LINE_COMMENT(int sym) { + if (sym != YY_ONE_LINE_COMMENT) { + yy_error_sym(" expected, got", sym); + } + sym = get_skip_sym(); + return sym; +} + +static int skip_COMMENT(int sym) { + if (sym != YY_COMMENT) { + yy_error_sym(" expected, got", sym); + } + sym = get_skip_sym(); + return sym; +} + +static int get_sym(void) { + int sym; + sym = get_skip_sym(); + while (sym == YY_EOL || sym == YY_WS || sym == YY_ONE_LINE_COMMENT || sym == YY_COMMENT) { + if (sym == YY_EOL) { + sym = skip_EOL(sym); + } else if (sym == YY_WS) { + sym = skip_WS(sym); + } else if (sym == YY_ONE_LINE_COMMENT) { + sym = skip_ONE_LINE_COMMENT(sym); + } else { + sym = skip_COMMENT(sym); + } + } + return sym; +} + +static int parse_ir(int sym) { + ir_parser_ctx p; + do { + sym = parse_ir_func_prototype(sym, &p); + sym = parse_ir_func(sym, &p); + } while (sym == YY_FUNC); + return sym; +} + +static int parse_ir_func(int sym, ir_parser_ctx *p) { + if (sym != YY__LBRACE) { + yy_error_sym("'{' expected, got", sym); + } + sym = get_sym(); + while (sym == YY_ID) { + sym = parse_ir_insn(sym, p); + if (sym != YY__SEMICOLON) { + yy_error_sym("';' expected, got", sym); + } + sym = get_sym(); + } + if (sym != YY__RBRACE) { + yy_error_sym("'}' expected, got", sym); + } + sym = get_sym(); + return sym; +} + +static int parse_ir_func_prototype(int sym, ir_parser_ctx *p) { + const char *name; + size_t len; + uint8_t t = 0; + if (sym != YY_FUNC) { + yy_error_sym("'func' expected, got", sym); + } + sym = get_sym(); + sym = parse_ID(sym, &name, &len); + if (sym != YY__LPAREN) { + yy_error_sym("'(' expected, got", sym); + } + sym = get_sym(); + if (sym == YY_ID) { + sym = parse_type(sym, &t); + while (sym == YY__COMMA) { + sym = get_sym(); + sym = parse_type(sym, &t); + } + } + if (sym != YY__RPAREN) { + yy_error_sym("')' expected, got", sym); + } + sym = get_sym(); + if (sym != YY__COLON) { + yy_error_sym("':' expected, got", sym); + } + sym = get_sym(); + sym = parse_type(sym, &t); + return sym; +} + +static int parse_ir_insn(int sym, ir_parser_ctx *p) { + int sym2; + const unsigned char *save_pos; + const unsigned char *save_text; + int save_line; + int alt0; + const char *str, *str2 = NULL, *func; + size_t len, len2 = 0, func_len; + uint8_t op; + uint8_t t = 0; + ir_ref op1 = IR_UNUSED; + ir_ref op2 = IR_UNUSED; + ir_ref op3 = IR_UNUSED; + ir_ref ref; + ir_val val; + ir_val count; + ir_val flags; + uint32_t n; + save_pos = yy_pos; + save_text = yy_text; + save_line = yy_line; + alt0 = -2; + sym2 = sym; + if (sym2 == YY_ID) { + sym2 = get_sym(); + goto _yy_state_0_1; + } else { + yy_error_sym("unexpected", sym2); + } +_yy_state_0_1: + if (sym2 == YY_ID) { + alt0 = 1; + goto _yy_state_0; + } else if (sym2 == YY__EQUAL) { + alt0 = 5; + goto _yy_state_0; + } else { + yy_error_sym("unexpected", sym2); + } +_yy_state_0: + yy_pos = save_pos; + yy_text = save_text; + yy_line = save_line; + if (alt0 == 1) { + sym = parse_type(sym, &t); + sym = parse_ID(sym, &str, &len); + if (sym == YY__COMMA) { + sym = get_sym(); + sym = parse_ID(sym, &str2, &len2); + } + } else if (alt0 == 5) { + sym = parse_ID(sym, &str, &len); + } else { + yy_error_sym("unexpected", sym); + } + if (sym != YY__EQUAL) { + yy_error_sym("'=' expected, got", sym); + } + sym = get_sym(); + switch (sym) { + case YY_DECNUMBER: + case YY_HEXNUMBER: + case YY_FLOATNUMBER: + case YY_CHARACTER: + val.u64 = 0; + sym = parse_const(sym, t, &val); + ref = ir_const(p->ctx, val, t); + break; + case YY_FUNC: + sym = get_sym(); + if (sym != YY__LPAREN) { + yy_error_sym("'(' expected, got", sym); + } + sym = get_sym(); + sym = parse_ID(sym, &func, &func_len); + flags.u64 = 0; + if (sym == YY__COMMA) { + sym = get_sym(); + sym = parse_DECNUMBER(sym, IR_U16, &flags); + } + if (sym != YY__RPAREN) { + yy_error_sym("')' expected, got", sym); + } + sym = get_sym(); + ref = ir_const_func(p->ctx, ir_strl(p->ctx, func, func_len), flags.u16); + break; + case YY_FUNC_ADDR: + sym = get_sym(); + if (sym != YY__LPAREN) { + yy_error_sym("'(' expected, got", sym); + } + sym = get_sym(); + if (sym == YY_DECNUMBER) { + sym = parse_DECNUMBER(sym, IR_ADDR, &val); + } else if (sym == YY_HEXNUMBER) { + sym = parse_HEXNUMBER(sym, IR_ADDR, &val); + } else { + yy_error_sym("unexpected", sym); + } + flags.u64 = 0; + if (sym == YY__COMMA) { + sym = get_sym(); + sym = parse_DECNUMBER(sym, IR_U16, &flags); + } + if (sym != YY__RPAREN) { + yy_error_sym("')' expected, got", sym); + } + sym = get_sym(); + ref = ir_const_func_addr(p->ctx, val.addr, flags.u16); + break; + case YY_STRING: + sym = parse_STRING(sym, &func, &func_len); + ref = ir_const_str(p->ctx, ir_strl(p->ctx, func, func_len)); + break; + case YY_ID: + sym = parse_func(sym, &op); + if (sym == YY__SLASH) { + sym = get_sym(); + sym = parse_DECNUMBER(sym, IR_I32, &count); + if (op == IR_PHI || op == IR_SNAPSHOT) count.i32++; + if (op == IR_CALL || op == IR_TAILCALL) count.i32+=2; + if (count.i32 < 0 || count.i32 > 255) yy_error("bad bumber of operands"); + ref = ir_emit_N(p->ctx, IR_OPT(op, t), count.i32); + if (sym == YY__LPAREN) { + sym = get_sym(); + if (sym == YY_ID || sym == YY_STRING || sym == YY_DECNUMBER || sym == YY_NULL) { + sym = parse_val(sym, p, op, 1, &op1); + n = 1; + if (n > count.i32) yy_error("too many operands"); + ir_set_op(p->ctx, ref, n, op1); + while (sym == YY__COMMA) { + sym = get_sym(); + sym = parse_val(sym, p, op, n, &op1); + n++; + if (n > count.i32) yy_error("too many operands"); + ir_set_op(p->ctx, ref, n, op1); + } + } + if (sym != YY__RPAREN) { + yy_error_sym("')' expected, got", sym); + } + sym = get_sym(); + } + } else if (sym == YY__LPAREN || sym == YY__SEMICOLON) { + if (sym == YY__LPAREN) { + sym = get_sym(); + if (sym == YY_ID || sym == YY_STRING || sym == YY_DECNUMBER || sym == YY_NULL) { + sym = parse_val(sym, p, op, 1, &op1); + if (sym == YY__COMMA) { + sym = get_sym(); + sym = parse_val(sym, p, op, 2, &op2); + if (sym == YY__COMMA) { + sym = get_sym(); + sym = parse_val(sym, p, op, 3, &op3); + } + } + } + if (sym != YY__RPAREN) { + yy_error_sym("')' expected, got", sym); + } + sym = get_sym(); + } + if (IR_IS_FOLDABLE_OP(op) + && !IR_IS_UNRESOLVED(op1) + && !IR_IS_UNRESOLVED(op2) + && !IR_IS_UNRESOLVED(op3)) { + ref = ir_fold(p->ctx, IR_OPT(op, t), op1, op2, op3); + } else { + ref = ir_emit(p->ctx, IR_OPT(op, t), op1, op2, op3); + } + } else { + yy_error_sym("unexpected", sym); + } + break; + default: + yy_error_sym("unexpected", sym); + } + ir_define_var(p, str, len, ref); + if (str2) ir_define_var(p, str2, len2, ref); + return sym; +} + +static int parse_type(int sym, uint8_t *t) { + const char *str; + size_t len; + ir_ref ref; + sym = parse_ID(sym, &str, &len); + ref = ir_strtab_find(&type_tab, str, len); + if (!ref) yy_error("invalid type"); + *t = ref; + return sym; +} + +static int parse_func(int sym, uint8_t *op) { + const char *str; + size_t len; + ir_ref ref; + sym = parse_ID(sym, &str, &len); + ref = ir_strtab_find(&op_tab, str, len); + if (!ref) yy_error("invalid op"); + *op = ref - 1; + return sym; +} + +static int parse_val(int sym, ir_parser_ctx *p, uint8_t op, uint32_t n, ir_ref *ref) { + const char *str; + size_t len; + ir_val val; + uint32_t kind = IR_OPND_KIND(ir_op_flags[op], n); + if (sym == YY_ID) { + sym = parse_ID(sym, &str, &len); + if (!IR_IS_REF_OPND_KIND(kind)) yy_error("unexpected reference"); + *ref = ir_use_var(p, n, str, len); + } else if (sym == YY_STRING) { + sym = parse_STRING(sym, &str, &len); + if (kind != IR_OPND_STR) yy_error("unexpected string"); + *ref = ir_strl(p->ctx, str, len); + } else if (sym == YY_DECNUMBER) { + sym = parse_DECNUMBER(sym, IR_I32, &val); + if (kind != IR_OPND_NUM && kind != IR_OPND_PROB) yy_error("unexpected number"); + if (val.u64 < 0 && val.u64 >= 0x7ffffff) yy_error("number out of range"); + *ref = val.u64; + } else if (sym == YY_NULL) { + sym = get_sym(); + *ref = IR_UNUSED; + } else { + yy_error_sym("unexpected", sym); + } + return sym; +} + +static int parse_const(int sym, uint8_t t, ir_val *val) { + if (sym == YY_DECNUMBER) { + sym = parse_DECNUMBER(sym, t, val); + } else if (sym == YY_HEXNUMBER) { + sym = parse_HEXNUMBER(sym, t, val); + } else if (sym == YY_FLOATNUMBER) { + sym = parse_FLOATNUMBER(sym, t, val); + } else if (sym == YY_CHARACTER) { + sym = parse_CHARACTER(sym, val); + } else { + yy_error_sym("unexpected", sym); + } + return sym; +} + +static int parse_ID(int sym, const char **str, size_t *len) { + if (sym != YY_ID) { + yy_error_sym(" expected, got", sym); + } + *str = (const char*)yy_text; *len = yy_pos - yy_text; + sym = get_sym(); + return sym; +} + +static int parse_DECNUMBER(int sym, uint32_t t, ir_val *val) { + if (sym != YY_DECNUMBER) { + yy_error_sym(" expected, got", sym); + } + if (t >= IR_DOUBLE) val->d = atof((const char*)yy_text); else val->i64 = atoll((const char*)yy_text); + sym = get_sym(); + return sym; +} + +static int parse_HEXNUMBER(int sym, uint32_t t, ir_val *val) { + if (sym != YY_HEXNUMBER) { + yy_error_sym(" expected, got", sym); + } + val->u64 = strtoull((const char*)yy_text + 2, NULL, 16); + sym = get_sym(); + return sym; +} + +static int parse_FLOATNUMBER(int sym, uint32_t t, ir_val *val) { + if (sym != YY_FLOATNUMBER) { + yy_error_sym(" expected, got", sym); + } + val->d = atof((const char*)yy_text); + sym = get_sym(); + return sym; +} + +static int parse_CHARACTER(int sym, ir_val *val) { + if (sym != YY_CHARACTER) { + yy_error_sym(" expected, got", sym); + } + if ((char)yy_text[1] != '\\') { + val->i64 = (char)yy_text[1]; + } else if ((char)yy_text[2] == '\\') { + val->i64 = '\\'; + } else if ((char)yy_text[2] == 'r') { + val->i64 = '\r'; + } else if ((char)yy_text[2] == 'n') { + val->i64 = '\n'; + } else if ((char)yy_text[2] == 't') { + val->i64 = '\t'; + } else if ((char)yy_text[2] == '0') { + val->i64 = '\0'; + } else { + IR_ASSERT(0); + } + sym = get_sym(); + return sym; +} + +static int parse_STRING(int sym, const char **str, size_t *len) { + if (sym != YY_STRING) { + yy_error_sym(" expected, got", sym); + } + *str = (const char*)yy_text + 1; *len = yy_pos - yy_text - 2; + sym = get_sym(); + return sym; +} + +static void parse(void) { + int sym; + + yy_pos = yy_text = yy_buf; + yy_line = 1; + sym = parse_ir(get_sym()); + if (sym != YY_EOF) { + yy_error_sym(" expected, got", sym); + } +} + +static void yy_error(const char *msg) { + fprintf(stderr, "ERROR: %s at line %d\n", msg, yy_line); + exit(2); +} + +static void yy_error_sym(const char *msg, int sym) { + fprintf(stderr, "ERROR: %s '%s' at line %d\n", msg, sym_name[sym], yy_line); + exit(2); +} + +int ir_load(ir_ctx *ctx, FILE *f) { + ir_parser_ctx p; + int sym; + long pos, end; + + p.ctx = ctx; + p.undef_count = 0; + ir_strtab_init(&p.var_tab, 256, 4096); + + pos = ftell(f); + fseek(f, 0, SEEK_END); + end = ftell(f); + fseek(f, pos, SEEK_SET); + yy_buf = alloca(end - pos + 1); + yy_end = yy_buf + (end - pos); + fread((void*)yy_buf, (end - pos), 1, f); + *(unsigned char*)yy_end = 0; + + yy_pos = yy_text = yy_buf; + yy_line = 1; + sym = parse_ir_func(get_sym(), &p); + if (sym != YY_EOF) { + yy_error_sym(" expected, got", sym); + } + if (p.undef_count) { + ir_check_indefined_vars(&p); + } + + ir_strtab_free(&p.var_tab); + + return 1; +} + +void ir_loader_init(void) +{ + ir_ref i; + + ir_strtab_init(&type_tab, IR_LAST_OP, 0); + for (i = 1; i < IR_LAST_TYPE; i++) { + ir_strtab_lookup(&type_tab, ir_type_cname[i], strlen(ir_type_cname[i]), i); + } + + ir_strtab_init(&op_tab, IR_LAST_OP, 0); + for (i = 0; i < IR_LAST_OP; i++) { + ir_strtab_lookup(&op_tab, ir_op_name[i], strlen(ir_op_name[i]), i + 1); + } +} + +void ir_loader_free(void) +{ + ir_strtab_free(&type_tab); + ir_strtab_free(&op_tab); +}