From 17a81b5c8f1e7fbbcfeaba1282c62feaa37be64b Mon Sep 17 00:00:00 2001 From: nikic Date: Sat, 20 Aug 2011 10:40:27 +0200 Subject: [PATCH] Properly parse escape sequences: * Add support for oct and hex escape sequences * Take used quote type into account when parsing encapsed strings --- grammar/kmyacc.php.parser | 1 - grammar/rebuildParser.php | 14 ++--- grammar/zend_language_parser.phpy | 16 ++--- lib/PHPParser/Node/Scalar/String.php | 58 +++++++++++++------ lib/PHPParser/Parser.php | 15 +++-- .../Tests/Node/Scalar/StringTest.php | 58 +++++++++++++++++++ 6 files changed, 122 insertions(+), 40 deletions(-) create mode 100644 test/PHPParser/Tests/Node/Scalar/StringTest.php diff --git a/grammar/kmyacc.php.parser b/grammar/kmyacc.php.parser index e6d9bd8..3f02d73 100644 --- a/grammar/kmyacc.php.parser +++ b/grammar/kmyacc.php.parser @@ -89,7 +89,6 @@ class #(-p) protected $yyval; protected $yyastk; protected $yysp; - protected $yyaccept; protected $lexer; #endif #if -t diff --git a/grammar/rebuildParser.php b/grammar/rebuildParser.php index 707b712..38b8457 100644 --- a/grammar/rebuildParser.php +++ b/grammar/rebuildParser.php @@ -99,7 +99,7 @@ function resolveNodes($code) { function resolveMacros($code) { return preg_replace_callback( - '~(?error|init|push|pushNormalizing|toArray|parse(?:Var|Encapsed|LNumber|DNumber))' . ARGS . '~', + '~(?error|init|push|pushNormalizing|toArray|parse(?:Var|LNumber|DNumber|Encapsed))' . ARGS . '~', function($matches) { // recurse $matches['args'] = resolveMacros($matches['args']); @@ -144,12 +144,6 @@ function resolveMacros($code) { return 'substr(' . $args[0] . ', 1)'; } - if ('parseEncapsed' == $name) { - assertArgs(1, $args, $name); - - return 'stripcslashes(' . $args[0] . ')'; - } - if ('parseLNumber' == $name) { assertArgs(1, $args, $name); @@ -161,6 +155,12 @@ function resolveMacros($code) { return '(double) ' . $args[0]; } + + if ('parseEncapsed' == $name) { + assertArgs(2, $args, $name); + + return 'foreach (' . $args[0] . ' as &$s) { if (is_string($s)) { $s = PHPParser_Node_Scalar_String::parseEscapeSequences($s, ' . $args[1] . '); } }'; + } }, $code ); diff --git a/grammar/zend_language_parser.phpy b/grammar/zend_language_parser.phpy index 01080b1..25f23c6 100644 --- a/grammar/zend_language_parser.phpy +++ b/grammar/zend_language_parser.phpy @@ -601,8 +601,8 @@ exit_expr: backticks_expr: /* empty */ { $$ = array(); } - | T_ENCAPSED_AND_WHITESPACE { $$ = array(Scalar_String::parseEscapeSequences($1)); } - | encaps_list { $$ = $1; } + | T_ENCAPSED_AND_WHITESPACE { $$ = array(Scalar_String::parseEscapeSequences($1, '`')); } + | encaps_list { parseEncapsed($1, '`'); $$ = $1; } ; ctor_arguments: @@ -622,7 +622,7 @@ common_scalar: | T_FUNC_C { $$ = Scalar_FuncConst[]; } | T_NS_C { $$ = Scalar_NSConst[]; } | T_START_HEREDOC T_ENCAPSED_AND_WHITESPACE T_END_HEREDOC - { $$ = Scalar_String[Scalar_String::parseEscapeSequences($2)]; } + { $$ = Scalar_String[Scalar_String::parseEscapeSequences($2, null)]; } | T_START_HEREDOC T_END_HEREDOC { $$ = Scalar_String['']; } ; @@ -641,8 +641,10 @@ scalar: | class_constant { $$ = $1; } | name { $$ = Expr_ConstFetch[$1]; } | common_scalar { $$ = $1; } - | '"' encaps_list '"' { $$ = Scalar_Encapsed[$2]; } - | T_START_HEREDOC encaps_list T_END_HEREDOC { $$ = Scalar_Encapsed[$2]; } + | '"' encaps_list '"' + { parseEncapsed($2, '"'); $$ = Scalar_Encapsed[$2]; } + | T_START_HEREDOC encaps_list T_END_HEREDOC + { parseEncapsed($2, null); $$ = Scalar_Encapsed[$2]; } ; static_array_pair_list: @@ -760,9 +762,9 @@ array_pair: encaps_list: encaps_list encaps_var { push($1, $2); } - | encaps_list T_ENCAPSED_AND_WHITESPACE { push($1, Scalar_String::parseEscapeSequences($2)); } + | encaps_list T_ENCAPSED_AND_WHITESPACE { push($1, $2); } | encaps_var { init($1); } - | T_ENCAPSED_AND_WHITESPACE encaps_var { init(Scalar_String::parseEscapeSequences($1), $2); } + | T_ENCAPSED_AND_WHITESPACE encaps_var { init($1, $2); } ; encaps_var: diff --git a/lib/PHPParser/Node/Scalar/String.php b/lib/PHPParser/Node/Scalar/String.php index 678c840..6f66d1a 100644 --- a/lib/PHPParser/Node/Scalar/String.php +++ b/lib/PHPParser/Node/Scalar/String.php @@ -24,46 +24,70 @@ class PHPParser_Node_Scalar_String extends PHPParser_Node_Scalar /** * Creates a String node from a string token (parses escape sequences). * - * @param string $s String + * @param string $str String * @param int $line Line * @param null|string $docComment Nearest doc comment * * @return PHPParser_Node_Scalar_String String Node */ - public static function create($s, $line, $docComment) { + public static function create($str, $line = -1, $docComment = null) { $bLength = 0; - if ('b' === $s[0]) { + if ('b' === $str[0]) { $bLength = 1; } - if ('\'' === $s[$bLength]) { - $s = str_replace( + if ('\'' === $str[$bLength]) { + $str = str_replace( array('\\\\', '\\\''), array( '\\', '\''), - substr($s, $bLength + 1, -1) + substr($str, $bLength + 1, -1) ); } else { - $s = self::parseEscapeSequences(substr($s, $bLength + 1, -1)); + $str = self::parseEscapeSequences(substr($str, $bLength + 1, -1), '"'); } - return new self($s, $line, $docComment); + return new self($str, $line, $docComment); } /** - * Parses escape sequences in the content of a doubly quoted string - * or heredoc string. + * Parses escape sequences in strings (all string types apart from single quoted). * - * @param string $s String without quotes + * @param string $str String without quotes + * @param null|string $quote Quote type * * @return string String with escape sequences parsed */ - public static function parseEscapeSequences($s) { - // TODO: parse hex and oct escape sequences + public static function parseEscapeSequences($str, $quote) { + if (null !== $quote) { + $str = str_replace('\\' . $quote, $quote, $str); + } - return str_replace( - array('\\\\', '\"', '\$', '\n', '\r', '\t', '\f', '\v'), - array( '\\', '"', '$', "\n", "\r", "\t", "\f", "\v"), - $s + return preg_replace_callback( + '~\\\\([\\\\$nrtfv]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3})~', + array(__CLASS__, 'parseCallback'), + $str ); } + + protected static $replacements = array( + '\\' => '\\', + '$' => '$', + 'n' => "\n", + 'r' => "\r", + 't' => "\t", + 'f' => "\f", + 'v' => "\v", + ); + + public static function parseCallback($matches) { + $str = $matches[1]; + + if (isset(self::$replacements[$str])) { + return self::$replacements[$str]; + } elseif ('x' === $str[0] || 'X' === $str[0]) { + return chr(hexdec($str)); + } else { + return chr(octdec($str)); + } + } } \ No newline at end of file diff --git a/lib/PHPParser/Parser.php b/lib/PHPParser/Parser.php index 76264b5..2f4e569 100644 --- a/lib/PHPParser/Parser.php +++ b/lib/PHPParser/Parser.php @@ -858,7 +858,6 @@ class PHPParser_Parser protected $yyval; protected $yyastk; protected $yysp; - protected $yyaccept; protected $lexer; /** @@ -2039,11 +2038,11 @@ class PHPParser_Parser } protected function yyn261($line, $docComment) { - $this->yyval = array(PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(1-1)])); + $this->yyval = array(PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(1-1)], '`')); } protected function yyn262($line, $docComment) { - $this->yyval = $this->yyastk[$this->yysp-(1-1)]; + foreach ($this->yyastk[$this->yysp-(1-1)] as &$s) { if (is_string($s)) { $s = PHPParser_Node_Scalar_String::parseEscapeSequences($s, '`'); } }; $this->yyval = $this->yyastk[$this->yysp-(1-1)]; } protected function yyn263($line, $docComment) { @@ -2095,7 +2094,7 @@ class PHPParser_Parser } protected function yyn275($line, $docComment) { - $this->yyval = new PHPParser_Node_Scalar_String(PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(3-2)]), $line, $docComment); + $this->yyval = new PHPParser_Node_Scalar_String(PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(3-2)], null), $line, $docComment); } protected function yyn276($line, $docComment) { @@ -2143,11 +2142,11 @@ class PHPParser_Parser } protected function yyn287($line, $docComment) { - $this->yyval = new PHPParser_Node_Scalar_Encapsed($this->yyastk[$this->yysp-(3-2)], $line, $docComment); + foreach ($this->yyastk[$this->yysp-(3-2)] as &$s) { if (is_string($s)) { $s = PHPParser_Node_Scalar_String::parseEscapeSequences($s, '"'); } }; $this->yyval = new PHPParser_Node_Scalar_Encapsed($this->yyastk[$this->yysp-(3-2)], $line, $docComment); } protected function yyn288($line, $docComment) { - $this->yyval = new PHPParser_Node_Scalar_Encapsed($this->yyastk[$this->yysp-(3-2)], $line, $docComment); + foreach ($this->yyastk[$this->yysp-(3-2)] as &$s) { if (is_string($s)) { $s = PHPParser_Node_Scalar_String::parseEscapeSequences($s, null); } }; $this->yyval = new PHPParser_Node_Scalar_Encapsed($this->yyastk[$this->yysp-(3-2)], $line, $docComment); } protected function yyn289($line, $docComment) { @@ -2359,7 +2358,7 @@ class PHPParser_Parser } protected function yyn341($line, $docComment) { - $this->yyastk[$this->yysp-(2-1)][] = PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(2-2)]); $this->yyval = $this->yyastk[$this->yysp-(2-1)]; + $this->yyastk[$this->yysp-(2-1)][] = $this->yyastk[$this->yysp-(2-2)]; $this->yyval = $this->yyastk[$this->yysp-(2-1)]; } protected function yyn342($line, $docComment) { @@ -2367,7 +2366,7 @@ class PHPParser_Parser } protected function yyn343($line, $docComment) { - $this->yyval = array(PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(2-1)]), $this->yyastk[$this->yysp-(2-2)]); + $this->yyval = array($this->yyastk[$this->yysp-(2-1)], $this->yyastk[$this->yysp-(2-2)]); } protected function yyn344($line, $docComment) { diff --git a/test/PHPParser/Tests/Node/Scalar/StringTest.php b/test/PHPParser/Tests/Node/Scalar/StringTest.php new file mode 100644 index 0000000..bae3d90 --- /dev/null +++ b/test/PHPParser/Tests/Node/Scalar/StringTest.php @@ -0,0 +1,58 @@ +assertEquals( + $expected, + PHPParser_Node_Scalar_String::parseEscapeSequences($string, $quote) + ); + } + + /** + * @dataProvider provideTestCreate + */ + public function testCreate($expected, $string) { + $this->assertEquals( + $expected, + PHPParser_Node_Scalar_String::create($string)->value + ); + } + + public function provideTestParseEscapeSequences() { + return array( + array('"', '\\"', '"'), + array('\\"', '\\"', '`'), + array('\\"\\`', '\\"\\`', null), + array("\\\$\n\r\t\f\v", '\\\\\$\n\r\t\f\v', null), + array(chr(255), '\xFF', null), + array(chr(255), '\377', null), + array(chr(0), '\400', null), + array("\0", '\0', null), + array('\xFF', '\\\\xFF', null), + ); + } + + public function provideTestCreate() { + $tests = array( + array('A', '\'A\''), + array('A', 'b\'A\''), + array('A', '"A"'), + array('A', 'b"A"'), + array('\\', '\'\\\\\''), + array('\'', '\'\\\'\''), + ); + + foreach ($this->provideTestParseEscapeSequences() as $i => $test) { + // skip second and third tests, they aren't for double quotes + if ($i != 1 && $i != 2) { + $tests[] = array($test[0], '"' . $test[1] . '"'); + } + } + + return $tests; + } +} \ No newline at end of file