Properly parse escape sequences:

* Add support for oct and hex escape sequences
* Take used quote type into account when parsing encapsed strings
This commit is contained in:
nikic 2011-08-20 10:40:27 +02:00
parent 05c514f9c5
commit 17a81b5c8f
6 changed files with 122 additions and 40 deletions

View File

@ -89,7 +89,6 @@ class #(-p)
protected $yyval; protected $yyval;
protected $yyastk; protected $yyastk;
protected $yysp; protected $yysp;
protected $yyaccept;
protected $lexer; protected $lexer;
#endif #endif
#if -t #if -t

View File

@ -99,7 +99,7 @@ function resolveNodes($code) {
function resolveMacros($code) { function resolveMacros($code) {
return preg_replace_callback( return preg_replace_callback(
'~(?<name>error|init|push|pushNormalizing|toArray|parse(?:Var|Encapsed|LNumber|DNumber))' . ARGS . '~', '~(?<name>error|init|push|pushNormalizing|toArray|parse(?:Var|LNumber|DNumber|Encapsed))' . ARGS . '~',
function($matches) { function($matches) {
// recurse // recurse
$matches['args'] = resolveMacros($matches['args']); $matches['args'] = resolveMacros($matches['args']);
@ -144,12 +144,6 @@ function resolveMacros($code) {
return 'substr(' . $args[0] . ', 1)'; return 'substr(' . $args[0] . ', 1)';
} }
if ('parseEncapsed' == $name) {
assertArgs(1, $args, $name);
return 'stripcslashes(' . $args[0] . ')';
}
if ('parseLNumber' == $name) { if ('parseLNumber' == $name) {
assertArgs(1, $args, $name); assertArgs(1, $args, $name);
@ -161,6 +155,12 @@ function resolveMacros($code) {
return '(double) ' . $args[0]; return '(double) ' . $args[0];
} }
if ('parseEncapsed' == $name) {
assertArgs(2, $args, $name);
return 'foreach (' . $args[0] . ' as &$s) { if (is_string($s)) { $s = PHPParser_Node_Scalar_String::parseEscapeSequences($s, ' . $args[1] . '); } }';
}
}, },
$code $code
); );

View File

@ -601,8 +601,8 @@ exit_expr:
backticks_expr: backticks_expr:
/* empty */ { $$ = array(); } /* empty */ { $$ = array(); }
| T_ENCAPSED_AND_WHITESPACE { $$ = array(Scalar_String::parseEscapeSequences($1)); } | T_ENCAPSED_AND_WHITESPACE { $$ = array(Scalar_String::parseEscapeSequences($1, '`')); }
| encaps_list { $$ = $1; } | encaps_list { parseEncapsed($1, '`'); $$ = $1; }
; ;
ctor_arguments: ctor_arguments:
@ -622,7 +622,7 @@ common_scalar:
| T_FUNC_C { $$ = Scalar_FuncConst[]; } | T_FUNC_C { $$ = Scalar_FuncConst[]; }
| T_NS_C { $$ = Scalar_NSConst[]; } | T_NS_C { $$ = Scalar_NSConst[]; }
| T_START_HEREDOC T_ENCAPSED_AND_WHITESPACE T_END_HEREDOC | T_START_HEREDOC T_ENCAPSED_AND_WHITESPACE T_END_HEREDOC
{ $$ = Scalar_String[Scalar_String::parseEscapeSequences($2)]; } { $$ = Scalar_String[Scalar_String::parseEscapeSequences($2, null)]; }
| T_START_HEREDOC T_END_HEREDOC | T_START_HEREDOC T_END_HEREDOC
{ $$ = Scalar_String['']; } { $$ = Scalar_String['']; }
; ;
@ -641,8 +641,10 @@ scalar:
| class_constant { $$ = $1; } | class_constant { $$ = $1; }
| name { $$ = Expr_ConstFetch[$1]; } | name { $$ = Expr_ConstFetch[$1]; }
| common_scalar { $$ = $1; } | common_scalar { $$ = $1; }
| '"' encaps_list '"' { $$ = Scalar_Encapsed[$2]; } | '"' encaps_list '"'
| T_START_HEREDOC encaps_list T_END_HEREDOC { $$ = Scalar_Encapsed[$2]; } { parseEncapsed($2, '"'); $$ = Scalar_Encapsed[$2]; }
| T_START_HEREDOC encaps_list T_END_HEREDOC
{ parseEncapsed($2, null); $$ = Scalar_Encapsed[$2]; }
; ;
static_array_pair_list: static_array_pair_list:
@ -760,9 +762,9 @@ array_pair:
encaps_list: encaps_list:
encaps_list encaps_var { push($1, $2); } encaps_list encaps_var { push($1, $2); }
| encaps_list T_ENCAPSED_AND_WHITESPACE { push($1, Scalar_String::parseEscapeSequences($2)); } | encaps_list T_ENCAPSED_AND_WHITESPACE { push($1, $2); }
| encaps_var { init($1); } | encaps_var { init($1); }
| T_ENCAPSED_AND_WHITESPACE encaps_var { init(Scalar_String::parseEscapeSequences($1), $2); } | T_ENCAPSED_AND_WHITESPACE encaps_var { init($1, $2); }
; ;
encaps_var: encaps_var:

View File

@ -24,46 +24,70 @@ class PHPParser_Node_Scalar_String extends PHPParser_Node_Scalar
/** /**
* Creates a String node from a string token (parses escape sequences). * Creates a String node from a string token (parses escape sequences).
* *
* @param string $s String * @param string $str String
* @param int $line Line * @param int $line Line
* @param null|string $docComment Nearest doc comment * @param null|string $docComment Nearest doc comment
* *
* @return PHPParser_Node_Scalar_String String Node * @return PHPParser_Node_Scalar_String String Node
*/ */
public static function create($s, $line, $docComment) { public static function create($str, $line = -1, $docComment = null) {
$bLength = 0; $bLength = 0;
if ('b' === $s[0]) { if ('b' === $str[0]) {
$bLength = 1; $bLength = 1;
} }
if ('\'' === $s[$bLength]) { if ('\'' === $str[$bLength]) {
$s = str_replace( $str = str_replace(
array('\\\\', '\\\''), array('\\\\', '\\\''),
array( '\\', '\''), array( '\\', '\''),
substr($s, $bLength + 1, -1) substr($str, $bLength + 1, -1)
); );
} else { } else {
$s = self::parseEscapeSequences(substr($s, $bLength + 1, -1)); $str = self::parseEscapeSequences(substr($str, $bLength + 1, -1), '"');
} }
return new self($s, $line, $docComment); return new self($str, $line, $docComment);
} }
/** /**
* Parses escape sequences in the content of a doubly quoted string * Parses escape sequences in strings (all string types apart from single quoted).
* or heredoc string.
* *
* @param string $s String without quotes * @param string $str String without quotes
* @param null|string $quote Quote type
* *
* @return string String with escape sequences parsed * @return string String with escape sequences parsed
*/ */
public static function parseEscapeSequences($s) { public static function parseEscapeSequences($str, $quote) {
// TODO: parse hex and oct escape sequences if (null !== $quote) {
$str = str_replace('\\' . $quote, $quote, $str);
}
return str_replace( return preg_replace_callback(
array('\\\\', '\"', '\$', '\n', '\r', '\t', '\f', '\v'), '~\\\\([\\\\$nrtfv]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3})~',
array( '\\', '"', '$', "\n", "\r", "\t", "\f", "\v"), array(__CLASS__, 'parseCallback'),
$s $str
); );
} }
protected static $replacements = array(
'\\' => '\\',
'$' => '$',
'n' => "\n",
'r' => "\r",
't' => "\t",
'f' => "\f",
'v' => "\v",
);
public static function parseCallback($matches) {
$str = $matches[1];
if (isset(self::$replacements[$str])) {
return self::$replacements[$str];
} elseif ('x' === $str[0] || 'X' === $str[0]) {
return chr(hexdec($str));
} else {
return chr(octdec($str));
}
}
} }

View File

@ -858,7 +858,6 @@ class PHPParser_Parser
protected $yyval; protected $yyval;
protected $yyastk; protected $yyastk;
protected $yysp; protected $yysp;
protected $yyaccept;
protected $lexer; protected $lexer;
/** /**
@ -2039,11 +2038,11 @@ class PHPParser_Parser
} }
protected function yyn261($line, $docComment) { protected function yyn261($line, $docComment) {
$this->yyval = array(PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(1-1)])); $this->yyval = array(PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(1-1)], '`'));
} }
protected function yyn262($line, $docComment) { protected function yyn262($line, $docComment) {
$this->yyval = $this->yyastk[$this->yysp-(1-1)]; foreach ($this->yyastk[$this->yysp-(1-1)] as &$s) { if (is_string($s)) { $s = PHPParser_Node_Scalar_String::parseEscapeSequences($s, '`'); } }; $this->yyval = $this->yyastk[$this->yysp-(1-1)];
} }
protected function yyn263($line, $docComment) { protected function yyn263($line, $docComment) {
@ -2095,7 +2094,7 @@ class PHPParser_Parser
} }
protected function yyn275($line, $docComment) { protected function yyn275($line, $docComment) {
$this->yyval = new PHPParser_Node_Scalar_String(PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(3-2)]), $line, $docComment); $this->yyval = new PHPParser_Node_Scalar_String(PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(3-2)], null), $line, $docComment);
} }
protected function yyn276($line, $docComment) { protected function yyn276($line, $docComment) {
@ -2143,11 +2142,11 @@ class PHPParser_Parser
} }
protected function yyn287($line, $docComment) { protected function yyn287($line, $docComment) {
$this->yyval = new PHPParser_Node_Scalar_Encapsed($this->yyastk[$this->yysp-(3-2)], $line, $docComment); foreach ($this->yyastk[$this->yysp-(3-2)] as &$s) { if (is_string($s)) { $s = PHPParser_Node_Scalar_String::parseEscapeSequences($s, '"'); } }; $this->yyval = new PHPParser_Node_Scalar_Encapsed($this->yyastk[$this->yysp-(3-2)], $line, $docComment);
} }
protected function yyn288($line, $docComment) { protected function yyn288($line, $docComment) {
$this->yyval = new PHPParser_Node_Scalar_Encapsed($this->yyastk[$this->yysp-(3-2)], $line, $docComment); foreach ($this->yyastk[$this->yysp-(3-2)] as &$s) { if (is_string($s)) { $s = PHPParser_Node_Scalar_String::parseEscapeSequences($s, null); } }; $this->yyval = new PHPParser_Node_Scalar_Encapsed($this->yyastk[$this->yysp-(3-2)], $line, $docComment);
} }
protected function yyn289($line, $docComment) { protected function yyn289($line, $docComment) {
@ -2359,7 +2358,7 @@ class PHPParser_Parser
} }
protected function yyn341($line, $docComment) { protected function yyn341($line, $docComment) {
$this->yyastk[$this->yysp-(2-1)][] = PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(2-2)]); $this->yyval = $this->yyastk[$this->yysp-(2-1)]; $this->yyastk[$this->yysp-(2-1)][] = $this->yyastk[$this->yysp-(2-2)]; $this->yyval = $this->yyastk[$this->yysp-(2-1)];
} }
protected function yyn342($line, $docComment) { protected function yyn342($line, $docComment) {
@ -2367,7 +2366,7 @@ class PHPParser_Parser
} }
protected function yyn343($line, $docComment) { protected function yyn343($line, $docComment) {
$this->yyval = array(PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(2-1)]), $this->yyastk[$this->yysp-(2-2)]); $this->yyval = array($this->yyastk[$this->yysp-(2-1)], $this->yyastk[$this->yysp-(2-2)]);
} }
protected function yyn344($line, $docComment) { protected function yyn344($line, $docComment) {

View File

@ -0,0 +1,58 @@
<?php
class PHPParser_Tests_Node_Scalar_StringTest extends PHPUnit_Framework_TestCase
{
/**
* @dataProvider provideTestParseEscapeSequences
*/
public function testParseEscapeSequences($expected, $string, $quote) {
$this->assertEquals(
$expected,
PHPParser_Node_Scalar_String::parseEscapeSequences($string, $quote)
);
}
/**
* @dataProvider provideTestCreate
*/
public function testCreate($expected, $string) {
$this->assertEquals(
$expected,
PHPParser_Node_Scalar_String::create($string)->value
);
}
public function provideTestParseEscapeSequences() {
return array(
array('"', '\\"', '"'),
array('\\"', '\\"', '`'),
array('\\"\\`', '\\"\\`', null),
array("\\\$\n\r\t\f\v", '\\\\\$\n\r\t\f\v', null),
array(chr(255), '\xFF', null),
array(chr(255), '\377', null),
array(chr(0), '\400', null),
array("\0", '\0', null),
array('\xFF', '\\\\xFF', null),
);
}
public function provideTestCreate() {
$tests = array(
array('A', '\'A\''),
array('A', 'b\'A\''),
array('A', '"A"'),
array('A', 'b"A"'),
array('\\', '\'\\\\\''),
array('\'', '\'\\\'\''),
);
foreach ($this->provideTestParseEscapeSequences() as $i => $test) {
// skip second and third tests, they aren't for double quotes
if ($i != 1 && $i != 2) {
$tests[] = array($test[0], '"' . $test[1] . '"');
}
}
return $tests;
}
}