Parse strings more correctly, keep information on whether it was a single or double quoted string

This commit is contained in:
nikic 2011-05-28 00:21:12 +02:00
parent acab6f2826
commit 355ddd8d75
7 changed files with 86 additions and 49 deletions

View File

@ -71,7 +71,7 @@ function resolveNodes($code) {
function resolveMacros($code) {
return preg_replace_callback(
'~(?<name>init|push|pushNormalizing|toArray|parse(?:Var|Encapsed|LNumber|DNumber|String))' . ARGS . '~',
'~(?<name>init|push|pushNormalizing|toArray|parse(?:Var|Encapsed|LNumber|DNumber))' . ARGS . '~',
function($matches) {
// recurse
$matches['args'] = resolveMacros($matches['args']);
@ -127,12 +127,6 @@ function resolveMacros($code) {
return '(double) ' . $args[0];
}
if ('parseString' == $name) {
assertArgs(1, $args, $name);
return 'str_replace(array(\'\\\\\\\'\', \'\\\\\\\\\'), array(\'\\\'\', \'\\\\\'), substr(' . $args[0] . ', 1, -1))';
}
},
$code
);

View File

@ -933,8 +933,8 @@ state 9
. error
state 10
(24) inner_statement_list : inner_statement_list . inner_statement
(235) expr_without_variable : T_FUNCTION optional_ref '(' parameter_list ')' lexical_vars '{' inner_statement_list . '}'
(24) inner_statement_list : inner_statement_list . inner_statement
T_INCLUDE shift 57
T_INCLUDE_ONCE shift 58
@ -1239,8 +1239,8 @@ state 12
. error
state 13
(24) inner_statement_list : inner_statement_list . inner_statement
(139) method_body : '{' inner_statement_list . '}'
(24) inner_statement_list : inner_statement_list . inner_statement
T_INCLUDE shift 57
T_INCLUDE_ONCE shift 58
@ -10458,7 +10458,6 @@ state 135
. error
state 136
(93) case_list : case_list T_CASE expr . case_separator inner_statement_list
(183) expr_without_variable : expr . T_BOOLEAN_OR expr
(184) expr_without_variable : expr . T_BOOLEAN_AND expr
(185) expr_without_variable : expr . T_LOGICAL_OR expr
@ -10486,6 +10485,7 @@ state 136
(211) expr_without_variable : expr . T_INSTANCEOF class_name_reference
(213) expr_without_variable : expr . '?' expr ':' expr
(214) expr_without_variable : expr . '?' ':' expr
(93) case_list : case_list T_CASE expr . case_separator inner_statement_list
T_LOGICAL_OR shift 84
T_LOGICAL_XOR shift 85
@ -10519,7 +10519,6 @@ state 136
. error
state 137
(48) statement : expr . ';'
(183) expr_without_variable : expr . T_BOOLEAN_OR expr
(184) expr_without_variable : expr . T_BOOLEAN_AND expr
(185) expr_without_variable : expr . T_LOGICAL_OR expr
@ -10547,6 +10546,7 @@ state 137
(211) expr_without_variable : expr . T_INSTANCEOF class_name_reference
(213) expr_without_variable : expr . '?' expr ':' expr
(214) expr_without_variable : expr . '?' ':' expr
(48) statement : expr . ';'
T_LOGICAL_OR shift 84
T_LOGICAL_XOR shift 85
@ -10578,7 +10578,6 @@ state 137
. error
state 138
(38) statement : T_BREAK expr . ';'
(183) expr_without_variable : expr . T_BOOLEAN_OR expr
(184) expr_without_variable : expr . T_BOOLEAN_AND expr
(185) expr_without_variable : expr . T_LOGICAL_OR expr
@ -10606,6 +10605,7 @@ state 138
(211) expr_without_variable : expr . T_INSTANCEOF class_name_reference
(213) expr_without_variable : expr . '?' expr ':' expr
(214) expr_without_variable : expr . '?' ':' expr
(38) statement : T_BREAK expr . ';'
T_LOGICAL_OR shift 84
T_LOGICAL_XOR shift 85
@ -10637,7 +10637,6 @@ state 138
. error
state 139
(40) statement : T_CONTINUE expr . ';'
(183) expr_without_variable : expr . T_BOOLEAN_OR expr
(184) expr_without_variable : expr . T_BOOLEAN_AND expr
(185) expr_without_variable : expr . T_LOGICAL_OR expr
@ -10665,6 +10664,7 @@ state 139
(211) expr_without_variable : expr . T_INSTANCEOF class_name_reference
(213) expr_without_variable : expr . '?' expr ':' expr
(214) expr_without_variable : expr . '?' ':' expr
(40) statement : T_CONTINUE expr . ';'
T_LOGICAL_OR shift 84
T_LOGICAL_XOR shift 85
@ -10696,7 +10696,6 @@ state 139
. error
state 140
(56) statement : T_THROW expr . ';'
(183) expr_without_variable : expr . T_BOOLEAN_OR expr
(184) expr_without_variable : expr . T_BOOLEAN_AND expr
(185) expr_without_variable : expr . T_LOGICAL_OR expr
@ -10724,6 +10723,7 @@ state 140
(211) expr_without_variable : expr . T_INSTANCEOF class_name_reference
(213) expr_without_variable : expr . '?' expr ':' expr
(214) expr_without_variable : expr . '?' ':' expr
(56) statement : T_THROW expr . ';'
T_LOGICAL_OR shift 84
T_LOGICAL_XOR shift 85
@ -10932,8 +10932,6 @@ state 143
. error
state 144
(31) statement : T_IF '(' expr . ')' statement elseif_list else_single
(32) statement : T_IF '(' expr . ')' ':' inner_statement_list new_elseif_list new_else_single T_ENDIF ';'
(183) expr_without_variable : expr . T_BOOLEAN_OR expr
(184) expr_without_variable : expr . T_BOOLEAN_AND expr
(185) expr_without_variable : expr . T_LOGICAL_OR expr
@ -10961,6 +10959,8 @@ state 144
(211) expr_without_variable : expr . T_INSTANCEOF class_name_reference
(213) expr_without_variable : expr . '?' expr ':' expr
(214) expr_without_variable : expr . '?' ':' expr
(31) statement : T_IF '(' expr . ')' statement elseif_list else_single
(32) statement : T_IF '(' expr . ')' ':' inner_statement_list new_elseif_list new_else_single T_ENDIF ';'
T_LOGICAL_OR shift 84
T_LOGICAL_XOR shift 85
@ -10992,7 +10992,6 @@ state 144
. error
state 145
(33) statement : T_WHILE '(' expr . ')' while_statement
(183) expr_without_variable : expr . T_BOOLEAN_OR expr
(184) expr_without_variable : expr . T_BOOLEAN_AND expr
(185) expr_without_variable : expr . T_LOGICAL_OR expr
@ -11020,6 +11019,7 @@ state 145
(211) expr_without_variable : expr . T_INSTANCEOF class_name_reference
(213) expr_without_variable : expr . '?' expr ':' expr
(214) expr_without_variable : expr . '?' ':' expr
(33) statement : T_WHILE '(' expr . ')' while_statement
T_LOGICAL_OR shift 84
T_LOGICAL_XOR shift 85
@ -11051,9 +11051,6 @@ state 145
. error
state 146
(50) statement : T_FOREACH '(' expr . T_AS variable ')' foreach_statement
(51) statement : T_FOREACH '(' expr . T_AS '&' variable ')' foreach_statement
(52) statement : T_FOREACH '(' expr . T_AS variable T_DOUBLE_ARROW optional_ref variable ')' foreach_statement
(183) expr_without_variable : expr . T_BOOLEAN_OR expr
(184) expr_without_variable : expr . T_BOOLEAN_AND expr
(185) expr_without_variable : expr . T_LOGICAL_OR expr
@ -11081,6 +11078,9 @@ state 146
(211) expr_without_variable : expr . T_INSTANCEOF class_name_reference
(213) expr_without_variable : expr . '?' expr ':' expr
(214) expr_without_variable : expr . '?' ':' expr
(50) statement : T_FOREACH '(' expr . T_AS variable ')' foreach_statement
(51) statement : T_FOREACH '(' expr . T_AS '&' variable ')' foreach_statement
(52) statement : T_FOREACH '(' expr . T_AS variable T_DOUBLE_ARROW optional_ref variable ')' foreach_statement
T_LOGICAL_OR shift 84
T_LOGICAL_XOR shift 85
@ -11112,7 +11112,6 @@ state 146
. error
state 147
(36) statement : T_SWITCH '(' expr . ')' switch_case_list
(183) expr_without_variable : expr . T_BOOLEAN_OR expr
(184) expr_without_variable : expr . T_BOOLEAN_AND expr
(185) expr_without_variable : expr . T_LOGICAL_OR expr
@ -11140,6 +11139,7 @@ state 147
(211) expr_without_variable : expr . T_INSTANCEOF class_name_reference
(213) expr_without_variable : expr . '?' expr ':' expr
(214) expr_without_variable : expr . '?' ':' expr
(36) statement : T_SWITCH '(' expr . ')' switch_case_list
T_LOGICAL_OR shift 84
T_LOGICAL_XOR shift 85
@ -11763,7 +11763,6 @@ state 157
. error
state 158
(34) statement : T_DO statement T_WHILE '(' expr . ')' ';'
(183) expr_without_variable : expr . T_BOOLEAN_OR expr
(184) expr_without_variable : expr . T_BOOLEAN_AND expr
(185) expr_without_variable : expr . T_LOGICAL_OR expr
@ -11791,6 +11790,7 @@ state 158
(211) expr_without_variable : expr . T_INSTANCEOF class_name_reference
(213) expr_without_variable : expr . '?' expr ':' expr
(214) expr_without_variable : expr . '?' ':' expr
(34) statement : T_DO statement T_WHILE '(' expr . ')' ';'
T_LOGICAL_OR shift 84
T_LOGICAL_XOR shift 85
@ -12061,7 +12061,6 @@ state 162
. error
state 163
(100) elseif_list : elseif_list T_ELSEIF '(' expr . ')' statement
(183) expr_without_variable : expr . T_BOOLEAN_OR expr
(184) expr_without_variable : expr . T_BOOLEAN_AND expr
(185) expr_without_variable : expr . T_LOGICAL_OR expr
@ -12089,6 +12088,7 @@ state 163
(211) expr_without_variable : expr . T_INSTANCEOF class_name_reference
(213) expr_without_variable : expr . '?' expr ':' expr
(214) expr_without_variable : expr . '?' ':' expr
(100) elseif_list : elseif_list T_ELSEIF '(' expr . ')' statement
T_LOGICAL_OR shift 84
T_LOGICAL_XOR shift 85
@ -12120,7 +12120,6 @@ state 163
. error
state 164
(102) new_elseif_list : new_elseif_list T_ELSEIF '(' expr . ')' ':' inner_statement_list
(183) expr_without_variable : expr . T_BOOLEAN_OR expr
(184) expr_without_variable : expr . T_BOOLEAN_AND expr
(185) expr_without_variable : expr . T_LOGICAL_OR expr
@ -12148,6 +12147,7 @@ state 164
(211) expr_without_variable : expr . T_INSTANCEOF class_name_reference
(213) expr_without_variable : expr . '?' expr ':' expr
(214) expr_without_variable : expr . '?' ':' expr
(102) new_elseif_list : new_elseif_list T_ELSEIF '(' expr . ')' ':' inner_statement_list
T_LOGICAL_OR shift 84
T_LOGICAL_XOR shift 85
@ -14864,7 +14864,6 @@ state 222
. reduce (300)
state 223
(43) statement : T_RETURN variable . ';'
(163) expr_without_variable : variable . '=' expr
(164) expr_without_variable : variable . '=' '&' variable
(165) expr_without_variable : variable . '=' '&' T_NEW class_name_reference ctor_arguments
@ -14884,6 +14883,7 @@ state 223
(300) expr : variable .
(307) object_access : variable . T_OBJECT_OPERATOR object_property '(' function_call_argument_list ')'
(308) object_access_arrayable : variable . T_OBJECT_OPERATOR object_property
(43) statement : T_RETURN variable . ';'
'=' shift 44
T_PLUS_EQUAL shift 108
@ -15122,8 +15122,8 @@ state 228
. reduce (210)
state 229
(68) class_declaration_statement : T_INTERFACE T_STRING interface_extends_list '{' class_statement_list . '}'
(133) class_statement_list : class_statement_list . class_statement
(68) class_declaration_statement : T_INTERFACE T_STRING interface_extends_list '{' class_statement_list . '}'
(142) method_modifiers : .
T_CONST shift 495
@ -15144,8 +15144,8 @@ state 229
. reduce (142)
state 230
(67) class_declaration_statement : class_entry_type T_STRING extends_from implements_list '{' class_statement_list . '}'
(133) class_statement_list : class_statement_list . class_statement
(67) class_declaration_statement : class_entry_type T_STRING extends_from implements_list '{' class_statement_list . '}'
(142) method_modifiers : .
T_CONST shift 495
@ -16189,10 +16189,10 @@ state 279
. error
state 280
(249) name : T_NAMESPACE . T_NS_SEPARATOR namespace_name
(11) top_statement : T_NAMESPACE . namespace_name ';'
(12) top_statement : T_NAMESPACE . namespace_name '{' top_statement_list '}'
(13) top_statement : T_NAMESPACE . '{' top_statement_list '}'
(249) name : T_NAMESPACE . T_NS_SEPARATOR namespace_name
T_STRING shift 582 and reduce (5)
T_NS_SEPARATOR shift 346
@ -16412,10 +16412,10 @@ state 298
. reduce (315)
state 299
(50) statement : T_FOREACH '(' expr T_AS variable . ')' foreach_statement
(52) statement : T_FOREACH '(' expr T_AS variable . T_DOUBLE_ARROW optional_ref variable ')' foreach_statement
(307) object_access : variable . T_OBJECT_OPERATOR object_property '(' function_call_argument_list ')'
(308) object_access_arrayable : variable . T_OBJECT_OPERATOR object_property
(50) statement : T_FOREACH '(' expr T_AS variable . ')' foreach_statement
(52) statement : T_FOREACH '(' expr T_AS variable . T_DOUBLE_ARROW optional_ref variable ')' foreach_statement
T_OBJECT_OPERATOR shift 266
T_DOUBLE_ARROW shift 365
@ -16581,16 +16581,16 @@ state 316
. reduce (316)
state 317
(46) statement : T_ECHO expr_list . ';'
(158) expr_list : expr_list . ',' expr
(46) statement : T_ECHO expr_list . ';'
',' shift 123
';' shift 619 and reduce (46)
. error
state 318
(66) function_declaration_statement : T_FUNCTION optional_ref . T_STRING '(' parameter_list ')' '{' inner_statement_list '}'
(235) expr_without_variable : T_FUNCTION optional_ref . '(' parameter_list ')' lexical_vars '{' inner_statement_list '}'
(66) function_declaration_statement : T_FUNCTION optional_ref . T_STRING '(' parameter_list ')' '{' inner_statement_list '}'
T_STRING shift 421
'(' shift 258
@ -16605,17 +16605,17 @@ state 319
. error
state 320
(44) statement : T_GLOBAL global_var_list . ';'
(124) global_var_list : global_var_list . ',' global_var
(44) statement : T_GLOBAL global_var_list . ';'
',' shift 307
';' shift 627 and reduce (44)
. error
state 321
(45) statement : T_STATIC static_var_list . ';'
(129) static_var_list : static_var_list . ',' T_VARIABLE
(130) static_var_list : static_var_list . ',' T_VARIABLE '=' static_scalar
(45) statement : T_STATIC static_var_list . ';'
',' shift 425
';' shift 628 and reduce (45)
@ -16674,8 +16674,8 @@ state 327
. error
state 328
(63) variables_list : variables_list . ',' variable
(215) expr_without_variable : T_ISSET '(' variables_list . ')'
(63) variables_list : variables_list . ',' variable
',' shift 248
')' shift 662 and reduce (215)
@ -16769,9 +16769,9 @@ state 338
. reduce (301)
state 339
(51) statement : T_FOREACH '(' expr T_AS '&' variable . ')' foreach_statement
(307) object_access : variable . T_OBJECT_OPERATOR object_property '(' function_call_argument_list ')'
(308) object_access_arrayable : variable . T_OBJECT_OPERATOR object_property
(51) statement : T_FOREACH '(' expr T_AS '&' variable . ')' foreach_statement
T_OBJECT_OPERATOR shift 266
')' shift 22
@ -16795,9 +16795,9 @@ state 341
. error
state 342
(52) statement : T_FOREACH '(' expr T_AS variable T_DOUBLE_ARROW optional_ref variable . ')' foreach_statement
(307) object_access : variable . T_OBJECT_OPERATOR object_property '(' function_call_argument_list ')'
(308) object_access_arrayable : variable . T_OBJECT_OPERATOR object_property
(52) statement : T_FOREACH '(' expr T_AS variable T_DOUBLE_ARROW optional_ref variable . ')' foreach_statement
T_OBJECT_OPERATOR shift 266
')' shift 24
@ -16862,8 +16862,8 @@ state 349
. reduce (260)
state 350
(66) function_declaration_statement : T_FUNCTION . optional_ref T_STRING '(' parameter_list ')' '{' inner_statement_list '}'
(235) expr_without_variable : T_FUNCTION . optional_ref '(' parameter_list ')' lexical_vars '{' inner_statement_list '}'
(66) function_declaration_statement : T_FUNCTION . optional_ref T_STRING '(' parameter_list ')' '{' inner_statement_list '}'
(64) optional_ref : .
'&' shift 603 and reduce (65)
@ -16871,8 +16871,8 @@ state 350
. reduce (64)
state 351
(45) statement : T_STATIC . static_var_list ';'
(246) class_name : T_STATIC .
(45) statement : T_STATIC . static_var_list ';'
T_VARIABLE shift 409
static_var_list goto 321
@ -17303,8 +17303,8 @@ state 406
. error
state 407
(42) statement : T_RETURN expr_without_variable . ';'
(301) expr : expr_without_variable .
(42) statement : T_RETURN expr_without_variable . ';'
';' shift 623 and reduce (42)
. reduce (301)
@ -17426,9 +17426,9 @@ state 425
. error
state 426
(62) variables_list : variable .
(307) object_access : variable . T_OBJECT_OPERATOR object_property '(' function_call_argument_list ')'
(308) object_access_arrayable : variable . T_OBJECT_OPERATOR object_property
(62) variables_list : variable .
T_OBJECT_OPERATOR shift 266
. reduce (62)
@ -17714,9 +17714,9 @@ state 457
. error
state 458
(107) parameter_list : non_empty_parameter_list .
(111) non_empty_parameter_list : non_empty_parameter_list . ',' optional_class_type optional_ref T_VARIABLE
(112) non_empty_parameter_list : non_empty_parameter_list . ',' optional_class_type optional_ref T_VARIABLE '=' static_scalar
(107) parameter_list : non_empty_parameter_list .
',' shift 264
. reduce (107)
@ -17858,9 +17858,9 @@ state 479
. error
state 480
(63) variables_list : variables_list ',' variable .
(307) object_access : variable . T_OBJECT_OPERATOR object_property '(' function_call_argument_list ')'
(308) object_access_arrayable : variable . T_OBJECT_OPERATOR object_property
(63) variables_list : variables_list ',' variable .
T_OBJECT_OPERATOR shift 266
. reduce (63)
@ -19785,4 +19785,4 @@ Statistics for zend_language_parser.phpy:
3852 items
1167 lookahead sets used
13663+826=14489 action entries
232288 bytes used
232216 bytes used

View File

@ -604,7 +604,7 @@ ctor_arguments:
common_scalar:
T_LNUMBER { $$ = new Node_Scalar_LNumber(array('value' => (int) $1)); }
| T_DNUMBER { $$ = new Node_Scalar_DNumber(array('value' => (double) $1)); }
| T_CONSTANT_ENCAPSED_STRING { $$ = new Node_Scalar_String(array('value' => str_replace(array('\\\'', '\\\\'), array('\'', '\\'), substr($1, 1, -1)))); }
| T_CONSTANT_ENCAPSED_STRING { $$ = Node_Scalar_String::create($1); }
| T_LINE { $$ = new Node_Scalar_LineConst(array()); }
| T_FILE { $$ = new Node_Scalar_FileConst(array()); }
| T_DIR { $$ = new Node_Scalar_DirConst(array()); }

View File

@ -604,7 +604,7 @@ ctor_arguments:
common_scalar:
T_LNUMBER { $$ = Scalar_LNumber[value: parseLNumber($1)]; }
| T_DNUMBER { $$ = Scalar_DNumber[value: parseDNumber($1)]; }
| T_CONSTANT_ENCAPSED_STRING { $$ = Scalar_String[value: parseString($1)]; }
| T_CONSTANT_ENCAPSED_STRING { $$ = Scalar_String::create($1); }
| T_LINE { $$ = Scalar_LineConst[]; }
| T_FILE { $$ = Scalar_FileConst[]; }
| T_DIR { $$ = Scalar_DirConst[]; }

View File

@ -1,8 +1,47 @@
<?php
/**
* @property string $value String value
* @property string $value String value
* @property bool $isBinary Whether the string is binary (b'')
* @property int $type Whether SINGLE_QUOTED or DOUBLE_QUOTED
*/
class Node_Scalar_String extends Node_Scalar
{
const SINGLE_QUOTED = 0;
const DOUBLE_QUOTED = 1;
/**
* Creates a String node from a string token (parses escape sequences).
*
* @param string $s String
* @return Node_Scalar_String String Node
*/
public static function create($s) {
$isBinary = false;
if ('b' === $s[0]) {
$isBinary = true;
}
if ('\'' === $s[0]) {
$type = self::SINGLE_QUOTED;
$s = str_replace(
array('\\\\', '\\\''),
array( '\\', '\''),
substr($s, $isBinary + 1, -1)
);
} else {
$type = self::DOUBLE_QUOTED;
$s = str_replace(
array('\\\\', '\"', '\$', '\n', '\r', '\t', '\f', '\v'),
array( '\\', '"', '$', "\n", "\r", "\t", "\f", "\v"),
substr($s, $isBinary + 1, -1)
);
// TODO: parse hex and oct escape sequences
}
return new self(array('value' => $s, 'isBinary' => $isBinary, 'type' => $type));
}
}

View File

@ -2545,7 +2545,7 @@ class Parser
}
private function yyn270() {
$this->yyval = new Node_Scalar_String(array('value' => str_replace(array('\\\'', '\\\\'), array('\'', '\\'), substr($this->yyastk[$this->yysp-(1-1)], 1, -1))));
$this->yyval = Node_Scalar_String::create($this->yyastk[$this->yysp-(1-1)]);
}
private function yyn271() {

View File

@ -21,7 +21,11 @@ class PrettyPrinter_Zend extends PrettyPrinterAbstract
}
public function pScalar_String(Node_Scalar_String $node) {
return '\'' . addslashes($node->value) . '\'';
return ($node->isBinary ? 'b' : '')
. (Node_Scalar_String::SINGLE_QUOTED === $node->type
? '\'' . addcslashes($node->value, '\'\\') . '\''
: '"' . addcslashes($node->value, "\n\r\t\f\v$\"\\") . '"'
);
}
public function pExpr_Assign(Node_Expr_Assign $node) {