Escpae/unescape strings in proper places

This commit is contained in:
Dmitry Stogov 2023-12-29 14:37:28 +03:00
parent 35bfa1f596
commit bf856b5e7d
10 changed files with 120 additions and 124 deletions

44
ir.c
View File

@ -72,16 +72,56 @@ const char *ir_op_name[IR_LAST_OP] = {
#endif #endif
}; };
static void ir_print_escaped_str(const char *s, size_t len, FILE *f)
{
char ch;
while (len > 0) {
ch = *s;
switch (ch) {
case '\\': fputs("\\\\", f); break;
case '\'': fputs("'", f); break;
case '\"': fputs("\\\"", f); break;
case '\a': fputs("\\a", f); break;
case '\b': fputs("\\b", f); break;
case '\e': fputs("\\e", f); break;
case '\f': fputs("\\f", f); break;
case '\n': fputs("\\n", f); break;
case '\r': fputs("\\r", f); break;
case '\t': fputs("\\t", f); break;
case '\v': fputs("\\v", f); break;
case '\?': fputs("\\?", f); break;
default:
if (ch < 32) {
fprintf(f, "\\%c%c%c",
'0' + ((ch >> 3) % 8),
'0' + ((ch >> 6) % 8),
'0' + (ch % 8));
break;
} else {
fputc(ch, f);
}
}
s++;
len--;
}
}
void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted) void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted)
{ {
if (insn->op == IR_FUNC || insn->op == IR_SYM) { if (insn->op == IR_FUNC || insn->op == IR_SYM) {
fprintf(f, "%s", ir_get_str(ctx, insn->val.name)); fprintf(f, "%s", ir_get_str(ctx, insn->val.name));
return; return;
} else if (insn->op == IR_STR) { } else if (insn->op == IR_STR) {
size_t len;
const char *str = ir_get_strl(ctx, insn->val.str, &len);
if (quoted) { if (quoted) {
fprintf(f, "\"%s\"", ir_get_str(ctx, insn->val.str)); fprintf(f, "\"");
ir_print_escaped_str(str, len, f);
fprintf(f, "\"");
} else { } else {
fprintf(f, "%s", ir_get_str(ctx, insn->val.str)); ir_print_escaped_str(str, len, f);
} }
return; return;
} }

38
ir.g
View File

@ -122,6 +122,42 @@ static void yy_error(const char *msg);
static void yy_error_sym(const char *msg, int sym); static void yy_error_sym(const char *msg, int sym);
static void yy_error_str(const char *msg, const char *str); static void yy_error_str(const char *msg, const char *str);
static ir_ref ir_make_const_str(ir_ctx *ctx, const char *str, size_t len)
{
char *buf = alloca(len + 1);
char *p = buf;
while (len > 0) {
if (*str != '\\') {
*p = *str;
} else {
str++;
len--;
IR_ASSERT(len != 0);
switch (*str) {
case '\\': *p = '\\'; break;
case '\'': *p = '\''; break;
case '"': *p = '"'; break;
case 'a': *p = '\a'; break;
case 'e': *p = 27; break; /* '\e'; */
case 'f': *p = '\f'; break;
case 'n': *p = '\n'; break;
case 'r': *p = '\r'; break;
case 't': *p = '\t'; break;
case 'v': *p = '\v'; break;
case '?': *p = 0x3f; break;
default:
yy_error("unsupported escape sequence");
}
}
str++;
p++;
len--;
}
*p = 0;
return ir_const_str(ctx, ir_strl(ctx, buf, p - buf));
}
%} %}
ir(ir_loader *loader): ir(ir_loader *loader):
@ -404,7 +440,7 @@ ir_insn(ir_parser_ctx *p):
| "sym" "(" ID(&func, &func_len) ")" | "sym" "(" ID(&func, &func_len) ")"
{ref = ir_const_sym(p->ctx, ir_strl(p->ctx, func, func_len));} {ref = ir_const_sym(p->ctx, ir_strl(p->ctx, func, func_len));}
| STRING(&func, &func_len) | STRING(&func, &func_len)
{ref = ir_const_str(p->ctx, ir_strl(p->ctx, func, func_len));} {ref = ir_make_const_str(p->ctx, func, func_len);}
| func(&op) | func(&op)
( (
"/" "/"

View File

@ -5767,44 +5767,6 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
c = str[i]; c = str[i];
if (!c) { if (!c) {
break; break;
} else if (c == '\\') {
if (str[i+1] == '\\') {
i++;
c = '\\';
} else if (str[i+1] == '\'') {
i++;
c = '\'';
} else if (str[i+1] == '"') {
i++;
c = '"';
} else if (str[i+1] == 'a') {
i++;
c = '\a';
} else if (str[i+1] == 'b') {
i++;
c = '\b';
} else if (str[i+1] == 'e') {
i++;
c = 27; /* '\e'; */
} else if (str[i+1] == 'f') {
i++;
c = '\f';
} else if (str[i+1] == 'n') {
i++;
c = '\n';
} else if (str[i+1] == 'r') {
i++;
c = '\r';
} else if (str[i+1] == 't') {
i++;
c = '\t';
} else if (str[i+1] == 'v') {
i++;
c = '\v';
} else if (str[i+1] == '?') {
i++;
c = 0x3f;
}
} }
w |= c << (8 * j); w |= c << (8 * j);
i++; i++;

View File

@ -1267,45 +1267,6 @@ static int ir_emit_func(ir_ctx *ctx, const char *name, FILE *f)
for (j = 0; j < len; j++) { for (j = 0; j < len; j++) {
char c = str[j]; char c = str[j];
if (c == '\\') {
if (str[j+1] == '\\') {
j++;
c = '\\';
} else if (str[j+1] == '\'') {
j++;
c = '\'';
} else if (str[j+1] == '"') {
j++;
c = '"';
} else if (str[j+1] == 'a') {
j++;
c = '\a';
} else if (str[j+1] == 'b') {
j++;
c = '\b';
} else if (str[j+1] == 'e') {
j++;
c = 27; /* '\e'; */
} else if (str[j+1] == 'f') {
j++;
c = '\f';
} else if (str[j+1] == 'n') {
j++;
c = '\n';
} else if (str[j+1] == 'r') {
j++;
c = '\r';
} else if (str[j+1] == 't') {
j++;
c = '\t';
} else if (str[j+1] == 'v') {
j++;
c = '\v';
} else if (str[j+1] == '?') {
j++;
c = 0x3f;
}
}
if (c < ' ' || c >= 127) { if (c < ' ' || c >= 127) {
char c1 = c >> 8; char c1 = c >> 8;
char c2 = c & 15; char c2 = c & 15;

View File

@ -104,6 +104,42 @@ static void yy_error(const char *msg);
static void yy_error_sym(const char *msg, int sym); static void yy_error_sym(const char *msg, int sym);
static void yy_error_str(const char *msg, const char *str); static void yy_error_str(const char *msg, const char *str);
static ir_ref ir_make_const_str(ir_ctx *ctx, const char *str, size_t len)
{
char *buf = alloca(len + 1);
char *p = buf;
while (len > 0) {
if (*str != '\\') {
*p = *str;
} else {
str++;
len--;
IR_ASSERT(len != 0);
switch (*str) {
case '\\': *p = '\\'; break;
case '\'': *p = '\''; break;
case '"': *p = '"'; break;
case 'a': *p = '\a'; break;
case 'e': *p = 27; break; /* '\e'; */
case 'f': *p = '\f'; break;
case 'n': *p = '\n'; break;
case 'r': *p = '\r'; break;
case 't': *p = '\t'; break;
case 'v': *p = '\v'; break;
case '?': *p = 0x3f; break;
default:
yy_error("unsupported escape sequence");
}
}
str++;
p++;
len--;
}
*p = 0;
return ir_const_str(ctx, ir_strl(ctx, buf, p - buf));
}
#define YYPOS cpos #define YYPOS cpos
#define YYEND cend #define YYEND cend
@ -1363,7 +1399,7 @@ _yy_state_76:
break; break;
case YY_STRING: case YY_STRING:
sym = parse_STRING(sym, &func, &func_len); sym = parse_STRING(sym, &func, &func_len);
ref = ir_const_str(p->ctx, ir_strl(p->ctx, func, func_len)); ref = ir_make_const_str(p->ctx, func, func_len);
break; break;
case YY_ID: case YY_ID:
sym = parse_func(sym, &op); sym = parse_func(sym, &op);

View File

@ -9673,45 +9673,6 @@ next_block:;
while (str[i]) { while (str[i]) {
char c = str[i]; char c = str[i];
if (c == '\\') {
if (str[i+1] == '\\') {
i++;
c = '\\';
} else if (str[i+1] == '\'') {
i++;
c = '\'';
} else if (str[i+1] == '"') {
i++;
c = '"';
} else if (str[i+1] == 'a') {
i++;
c = '\a';
} else if (str[i+1] == 'b') {
i++;
c = '\b';
} else if (str[i+1] == 'e') {
i++;
c = 27; /* '\e'; */
} else if (str[i+1] == 'f') {
i++;
c = '\f';
} else if (str[i+1] == 'n') {
i++;
c = '\n';
} else if (str[i+1] == 'r') {
i++;
c = '\r';
} else if (str[i+1] == 't') {
i++;
c = '\t';
} else if (str[i+1] == 'v') {
i++;
c = '\v';
} else if (str[i+1] == '?') {
i++;
c = 0x3f;
}
}
|.byte c |.byte c
i++; i++;
} }

View File

@ -21,4 +21,4 @@ define i32 @test()
ret i32 %d2 ret i32 %d2
} }
declare i32 @printf(ptr, ...) declare i32 @printf(ptr, ...)
@.str6 = private unnamed_addr constant [12 x i8] c"hello %d!\0A\00" @.str6 = private unnamed_addr constant [11 x i8] c"hello %d!\0A\00"

View File

@ -17,4 +17,4 @@ define i32 @test(ptr %d2, i32 %d3)
%d4 = call i32 %d2(ptr @.str4, i32 %d3) %d4 = call i32 %d2(ptr @.str4, i32 %d3)
ret i32 %d4 ret i32 %d4
} }
@.str4 = private unnamed_addr constant [12 x i8] c"hello %d!\0A\00" @.str4 = private unnamed_addr constant [11 x i8] c"hello %d!\0A\00"

View File

@ -21,4 +21,4 @@ define i8 @test()
ret i8 %d2 ret i8 %d2
} }
declare i32 @printf(ptr, ...) declare i32 @printf(ptr, ...)
@.str6 = private unnamed_addr constant [12 x i8] c"hello %d!\0A\00" @.str6 = private unnamed_addr constant [11 x i8] c"hello %d!\0A\00"

View File

@ -17,4 +17,4 @@ define i32 @test(ptr %d2, i32 %d3)
%d4 = tail call i32 %d2(ptr @.str4, i32 %d3) %d4 = tail call i32 %d2(ptr @.str4, i32 %d3)
ret i32 %d4 ret i32 %d4
} }
@.str4 = private unnamed_addr constant [12 x i8] c"hello %d!\0A\00" @.str4 = private unnamed_addr constant [11 x i8] c"hello %d!\0A\00"