diff --git a/ir.c b/ir.c index f641bab..8be380d 100644 --- a/ir.c +++ b/ir.c @@ -72,16 +72,56 @@ const char *ir_op_name[IR_LAST_OP] = { #endif }; +static void ir_print_escaped_str(const char *s, size_t len, FILE *f) +{ + char ch; + + while (len > 0) { + ch = *s; + switch (ch) { + case '\\': fputs("\\\\", f); break; + case '\'': fputs("'", f); break; + case '\"': fputs("\\\"", f); break; + case '\a': fputs("\\a", f); break; + case '\b': fputs("\\b", f); break; + case '\e': fputs("\\e", f); break; + case '\f': fputs("\\f", f); break; + case '\n': fputs("\\n", f); break; + case '\r': fputs("\\r", f); break; + case '\t': fputs("\\t", f); break; + case '\v': fputs("\\v", f); break; + case '\?': fputs("\\?", f); break; + default: + if (ch < 32) { + fprintf(f, "\\%c%c%c", + '0' + ((ch >> 3) % 8), + '0' + ((ch >> 6) % 8), + '0' + (ch % 8)); + break; + } else { + fputc(ch, f); + } + } + s++; + len--; + } +} + void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted) { if (insn->op == IR_FUNC || insn->op == IR_SYM) { fprintf(f, "%s", ir_get_str(ctx, insn->val.name)); return; } else if (insn->op == IR_STR) { + size_t len; + const char *str = ir_get_strl(ctx, insn->val.str, &len); + if (quoted) { - fprintf(f, "\"%s\"", ir_get_str(ctx, insn->val.str)); + fprintf(f, "\""); + ir_print_escaped_str(str, len, f); + fprintf(f, "\""); } else { - fprintf(f, "%s", ir_get_str(ctx, insn->val.str)); + ir_print_escaped_str(str, len, f); } return; } diff --git a/ir.g b/ir.g index e7cee24..20976a4 100644 --- a/ir.g +++ b/ir.g @@ -122,6 +122,42 @@ static void yy_error(const char *msg); static void yy_error_sym(const char *msg, int sym); static void yy_error_str(const char *msg, const char *str); +static ir_ref ir_make_const_str(ir_ctx *ctx, const char *str, size_t len) +{ + char *buf = alloca(len + 1); + char *p = buf; + + while (len > 0) { + if (*str != '\\') { + *p = *str; + } else { + str++; + len--; + IR_ASSERT(len != 0); + switch (*str) { + case '\\': *p = '\\'; break; + case '\'': *p = '\''; break; + case '"': *p = '"'; break; + case 'a': *p = '\a'; break; + case 'e': *p = 27; break; /* '\e'; */ + case 'f': *p = '\f'; break; + case 'n': *p = '\n'; break; + case 'r': *p = '\r'; break; + case 't': *p = '\t'; break; + case 'v': *p = '\v'; break; + case '?': *p = 0x3f; break; + default: + yy_error("unsupported escape sequence"); + } + } + str++; + p++; + len--; + } + *p = 0; + return ir_const_str(ctx, ir_strl(ctx, buf, p - buf)); +} + %} ir(ir_loader *loader): @@ -404,7 +440,7 @@ ir_insn(ir_parser_ctx *p): | "sym" "(" ID(&func, &func_len) ")" {ref = ir_const_sym(p->ctx, ir_strl(p->ctx, func, func_len));} | STRING(&func, &func_len) - {ref = ir_const_str(p->ctx, ir_strl(p->ctx, func, func_len));} + {ref = ir_make_const_str(p->ctx, func, func_len);} | func(&op) ( "/" diff --git a/ir_aarch64.dasc b/ir_aarch64.dasc index e58d25d..f5158d3 100644 --- a/ir_aarch64.dasc +++ b/ir_aarch64.dasc @@ -5767,44 +5767,6 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) c = str[i]; if (!c) { break; - } else if (c == '\\') { - if (str[i+1] == '\\') { - i++; - c = '\\'; - } else if (str[i+1] == '\'') { - i++; - c = '\''; - } else if (str[i+1] == '"') { - i++; - c = '"'; - } else if (str[i+1] == 'a') { - i++; - c = '\a'; - } else if (str[i+1] == 'b') { - i++; - c = '\b'; - } else if (str[i+1] == 'e') { - i++; - c = 27; /* '\e'; */ - } else if (str[i+1] == 'f') { - i++; - c = '\f'; - } else if (str[i+1] == 'n') { - i++; - c = '\n'; - } else if (str[i+1] == 'r') { - i++; - c = '\r'; - } else if (str[i+1] == 't') { - i++; - c = '\t'; - } else if (str[i+1] == 'v') { - i++; - c = '\v'; - } else if (str[i+1] == '?') { - i++; - c = 0x3f; - } } w |= c << (8 * j); i++; diff --git a/ir_emit_llvm.c b/ir_emit_llvm.c index f1d03c3..594df52 100644 --- a/ir_emit_llvm.c +++ b/ir_emit_llvm.c @@ -1267,45 +1267,6 @@ static int ir_emit_func(ir_ctx *ctx, const char *name, FILE *f) for (j = 0; j < len; j++) { char c = str[j]; - if (c == '\\') { - if (str[j+1] == '\\') { - j++; - c = '\\'; - } else if (str[j+1] == '\'') { - j++; - c = '\''; - } else if (str[j+1] == '"') { - j++; - c = '"'; - } else if (str[j+1] == 'a') { - j++; - c = '\a'; - } else if (str[j+1] == 'b') { - j++; - c = '\b'; - } else if (str[j+1] == 'e') { - j++; - c = 27; /* '\e'; */ - } else if (str[j+1] == 'f') { - j++; - c = '\f'; - } else if (str[j+1] == 'n') { - j++; - c = '\n'; - } else if (str[j+1] == 'r') { - j++; - c = '\r'; - } else if (str[j+1] == 't') { - j++; - c = '\t'; - } else if (str[j+1] == 'v') { - j++; - c = '\v'; - } else if (str[j+1] == '?') { - j++; - c = 0x3f; - } - } if (c < ' ' || c >= 127) { char c1 = c >> 8; char c2 = c & 15; diff --git a/ir_load.c b/ir_load.c index 42637e8..377abeb 100644 --- a/ir_load.c +++ b/ir_load.c @@ -104,6 +104,42 @@ static void yy_error(const char *msg); static void yy_error_sym(const char *msg, int sym); static void yy_error_str(const char *msg, const char *str); +static ir_ref ir_make_const_str(ir_ctx *ctx, const char *str, size_t len) +{ + char *buf = alloca(len + 1); + char *p = buf; + + while (len > 0) { + if (*str != '\\') { + *p = *str; + } else { + str++; + len--; + IR_ASSERT(len != 0); + switch (*str) { + case '\\': *p = '\\'; break; + case '\'': *p = '\''; break; + case '"': *p = '"'; break; + case 'a': *p = '\a'; break; + case 'e': *p = 27; break; /* '\e'; */ + case 'f': *p = '\f'; break; + case 'n': *p = '\n'; break; + case 'r': *p = '\r'; break; + case 't': *p = '\t'; break; + case 'v': *p = '\v'; break; + case '?': *p = 0x3f; break; + default: + yy_error("unsupported escape sequence"); + } + } + str++; + p++; + len--; + } + *p = 0; + return ir_const_str(ctx, ir_strl(ctx, buf, p - buf)); +} + #define YYPOS cpos #define YYEND cend @@ -1363,7 +1399,7 @@ _yy_state_76: break; case YY_STRING: sym = parse_STRING(sym, &func, &func_len); - ref = ir_const_str(p->ctx, ir_strl(p->ctx, func, func_len)); + ref = ir_make_const_str(p->ctx, func, func_len); break; case YY_ID: sym = parse_func(sym, &op); diff --git a/ir_x86.dasc b/ir_x86.dasc index 60b6cca..e3fa95a 100644 --- a/ir_x86.dasc +++ b/ir_x86.dasc @@ -9673,45 +9673,6 @@ next_block:; while (str[i]) { char c = str[i]; - if (c == '\\') { - if (str[i+1] == '\\') { - i++; - c = '\\'; - } else if (str[i+1] == '\'') { - i++; - c = '\''; - } else if (str[i+1] == '"') { - i++; - c = '"'; - } else if (str[i+1] == 'a') { - i++; - c = '\a'; - } else if (str[i+1] == 'b') { - i++; - c = '\b'; - } else if (str[i+1] == 'e') { - i++; - c = 27; /* '\e'; */ - } else if (str[i+1] == 'f') { - i++; - c = '\f'; - } else if (str[i+1] == 'n') { - i++; - c = '\n'; - } else if (str[i+1] == 'r') { - i++; - c = '\r'; - } else if (str[i+1] == 't') { - i++; - c = '\t'; - } else if (str[i+1] == 'v') { - i++; - c = '\v'; - } else if (str[i+1] == '?') { - i++; - c = 0x3f; - } - } |.byte c i++; } diff --git a/tests/llvm/call_001.irt b/tests/llvm/call_001.irt index c4b5f01..d4206c5 100644 --- a/tests/llvm/call_001.irt +++ b/tests/llvm/call_001.irt @@ -21,4 +21,4 @@ define i32 @test() ret i32 %d2 } declare i32 @printf(ptr, ...) -@.str6 = private unnamed_addr constant [12 x i8] c"hello %d!\0A\00" +@.str6 = private unnamed_addr constant [11 x i8] c"hello %d!\0A\00" diff --git a/tests/llvm/call_002.irt b/tests/llvm/call_002.irt index b6e9e44..da1a052 100644 --- a/tests/llvm/call_002.irt +++ b/tests/llvm/call_002.irt @@ -17,4 +17,4 @@ define i32 @test(ptr %d2, i32 %d3) %d4 = call i32 %d2(ptr @.str4, i32 %d3) ret i32 %d4 } -@.str4 = private unnamed_addr constant [12 x i8] c"hello %d!\0A\00" +@.str4 = private unnamed_addr constant [11 x i8] c"hello %d!\0A\00" diff --git a/tests/llvm/tailcall_001.irt b/tests/llvm/tailcall_001.irt index aab2351..5a3802a 100644 --- a/tests/llvm/tailcall_001.irt +++ b/tests/llvm/tailcall_001.irt @@ -21,4 +21,4 @@ define i8 @test() ret i8 %d2 } declare i32 @printf(ptr, ...) -@.str6 = private unnamed_addr constant [12 x i8] c"hello %d!\0A\00" +@.str6 = private unnamed_addr constant [11 x i8] c"hello %d!\0A\00" diff --git a/tests/llvm/tailcall_002.irt b/tests/llvm/tailcall_002.irt index f734cab..cffdcc8 100644 --- a/tests/llvm/tailcall_002.irt +++ b/tests/llvm/tailcall_002.irt @@ -17,4 +17,4 @@ define i32 @test(ptr %d2, i32 %d3) %d4 = tail call i32 %d2(ptr @.str4, i32 %d3) ret i32 %d4 } -@.str4 = private unnamed_addr constant [12 x i8] c"hello %d!\0A\00" +@.str4 = private unnamed_addr constant [11 x i8] c"hello %d!\0A\00"