1
0
mirror of https://github.com/danog/PHP-Parser.git synced 2024-11-26 20:04:48 +01:00

Flexible doc: Validate end label indentation

Move doc string parsing logic from rebuildParsers.php and
String_::parseDocString() into ParserAbstract. This stuff is
going to get complicated now.

For now only implement the validation of the indentation on the
end label.
This commit is contained in:
Nikita Popov 2018-09-21 15:49:09 +02:00
parent 146411bb86
commit 5f73c4de80
8 changed files with 101 additions and 65 deletions

View File

@ -791,11 +791,9 @@ common_scalar:
| T_FUNC_C { $$ = Scalar\MagicConst\Function_[]; }
| T_NS_C { $$ = Scalar\MagicConst\Namespace_[]; }
| T_START_HEREDOC T_ENCAPSED_AND_WHITESPACE T_END_HEREDOC
{ $attrs = attributes(); setDocStringAttrs($attrs, $1);
$$ = new Scalar\String_(Scalar\String_::parseDocString($1, $2, false), $attrs); }
{ $$ = $this->parseDocString($1, $2, $3, attributes(), stackAttributes(#3), false); }
| T_START_HEREDOC T_END_HEREDOC
{ $attrs = attributes(); setDocStringAttrs($attrs, $1);
$$ = new Scalar\String_('', $attrs); }
{ $$ = $this->parseDocString($1, '', $2, attributes(), stackAttributes(#2), false); }
;
static_scalar:
@ -856,8 +854,7 @@ scalar:
{ $attrs = attributes(); $attrs['kind'] = Scalar\String_::KIND_DOUBLE_QUOTED;
parseEncapsed($2, '"', true); $$ = new Scalar\Encapsed($2, $attrs); }
| T_START_HEREDOC encaps_list T_END_HEREDOC
{ $attrs = attributes(); setDocStringAttrs($attrs, $1);
parseEncapsedDoc($2, true); $$ = new Scalar\Encapsed($2, $attrs); }
{ $$ = $this->parseDocString($1, $2, $3, attributes(), stackAttributes(#3), true); }
;
static_array_pair_list:

View File

@ -847,17 +847,14 @@ scalar:
| dereferencable_scalar { $$ = $1; }
| constant { $$ = $1; }
| T_START_HEREDOC T_ENCAPSED_AND_WHITESPACE T_END_HEREDOC
{ $attrs = attributes(); setDocStringAttrs($attrs, $1);
$$ = new Scalar\String_(Scalar\String_::parseDocString($1, $2), $attrs); }
{ $$ = $this->parseDocString($1, $2, $3, attributes(), stackAttributes(#3), true); }
| T_START_HEREDOC T_END_HEREDOC
{ $attrs = attributes(); setDocStringAttrs($attrs, $1);
$$ = new Scalar\String_('', $attrs); }
{ $$ = $this->parseDocString($1, '', $2, attributes(), stackAttributes(#2), true); }
| '"' encaps_list '"'
{ $attrs = attributes(); $attrs['kind'] = Scalar\String_::KIND_DOUBLE_QUOTED;
parseEncapsed($2, '"', true); $$ = new Scalar\Encapsed($2, $attrs); }
| T_START_HEREDOC encaps_list T_END_HEREDOC
{ $attrs = attributes(); setDocStringAttrs($attrs, $1);
parseEncapsedDoc($2, true); $$ = new Scalar\Encapsed($2, $attrs); }
{ $$ = $this->parseDocString($1, $2, $3, attributes(), stackAttributes(#3), true); }
;
optional_expr:

View File

@ -166,15 +166,6 @@ function resolveMacros($code) {
. ' $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, ' . $args[1] . ', ' . $args[2] . '); } }';
}
if ('parseEncapsedDoc' == $name) {
assertArgs(2, $args, $name);
return 'foreach (' . $args[0] . ' as $s) { if ($s instanceof Node\Scalar\EncapsedStringPart) {'
. ' $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, null, ' . $args[1] . '); } }'
. ' $s->value = preg_replace(\'~(\r\n|\n|\r)\z~\', \'\', $s->value);'
. ' if (\'\' === $s->value) array_pop(' . $args[0] . ');';
}
if ('makeNop' == $name) {
assertArgs(3, $args, $name);
@ -192,15 +183,6 @@ function resolveMacros($code) {
. '? Scalar\String_::KIND_SINGLE_QUOTED : Scalar\String_::KIND_DOUBLE_QUOTED)';
}
if ('setDocStringAttrs' == $name) {
assertArgs(2, $args, $name);
return $args[0] . '[\'kind\'] = strpos(' . $args[1] . ', "\'") === false '
. '? Scalar\String_::KIND_HEREDOC : Scalar\String_::KIND_NOWDOC; '
. 'preg_match(\'/\A[bB]?<<<[ \t]*[\\\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\\\'"]?(?:\r\n|\n|\r)\z/\', ' . $args[1] . ', $matches); '
. $args[0] . '[\'docLabel\'] = $matches[1];';
}
if ('prependLeadingComments' == $name) {
assertArgs(1, $args, $name);

View File

@ -134,29 +134,6 @@ class String_ extends Scalar
}
throw new Error('Invalid UTF-8 codepoint escape sequence: Codepoint too large');
}
/**
* @internal
*
* Parses a constant doc string.
*
* @param string $startToken Doc string start token content (<<<SMTHG)
* @param string $str String token content
* @param bool $parseUnicodeEscape Whether to parse PHP 7 \u escapes
*
* @return string Parsed string
*/
public static function parseDocString(string $startToken, string $str, bool $parseUnicodeEscape = true) : string {
// strip last newline (thanks tokenizer for sticking it into the string!)
$str = preg_replace('~(\r\n|\n|\r)\z~', '', $str);
// nowdoc string
if (false !== strpos($startToken, '\'')) {
return $str;
}
return self::parseEscapeSequences($str, null, $parseUnicodeEscape);
}
public function getType() : string {
return 'Scalar_String';

View File

@ -2264,12 +2264,10 @@ class Php5 extends \PhpParser\ParserAbstract
$this->semValue = new Scalar\MagicConst\Namespace_($this->startAttributeStack[$stackPos-(1-1)] + $this->endAttributes);
},
436 => function ($stackPos) {
$attrs = $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes; $attrs['kind'] = strpos($this->semStack[$stackPos-(3-1)], "'") === false ? Scalar\String_::KIND_HEREDOC : Scalar\String_::KIND_NOWDOC; preg_match('/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/', $this->semStack[$stackPos-(3-1)], $matches); $attrs['docLabel'] = $matches[1];;
$this->semValue = new Scalar\String_(Scalar\String_::parseDocString($this->semStack[$stackPos-(3-1)], $this->semStack[$stackPos-(3-2)], false), $attrs);
$this->semValue = $this->parseDocString($this->semStack[$stackPos-(3-1)], $this->semStack[$stackPos-(3-2)], $this->semStack[$stackPos-(3-3)], $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes, $this->startAttributeStack[$stackPos-(3-3)] + $this->endAttributeStack[$stackPos-(3-3)], false);
},
437 => function ($stackPos) {
$attrs = $this->startAttributeStack[$stackPos-(2-1)] + $this->endAttributes; $attrs['kind'] = strpos($this->semStack[$stackPos-(2-1)], "'") === false ? Scalar\String_::KIND_HEREDOC : Scalar\String_::KIND_NOWDOC; preg_match('/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/', $this->semStack[$stackPos-(2-1)], $matches); $attrs['docLabel'] = $matches[1];;
$this->semValue = new Scalar\String_('', $attrs);
$this->semValue = $this->parseDocString($this->semStack[$stackPos-(2-1)], '', $this->semStack[$stackPos-(2-2)], $this->startAttributeStack[$stackPos-(2-1)] + $this->endAttributes, $this->startAttributeStack[$stackPos-(2-2)] + $this->endAttributeStack[$stackPos-(2-2)], false);
},
438 => function ($stackPos) {
$this->semValue = $this->semStack[$stackPos-(1-1)];
@ -2405,8 +2403,7 @@ class Php5 extends \PhpParser\ParserAbstract
foreach ($this->semStack[$stackPos-(3-2)] as $s) { if ($s instanceof Node\Scalar\EncapsedStringPart) { $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, '"', true); } }; $this->semValue = new Scalar\Encapsed($this->semStack[$stackPos-(3-2)], $attrs);
},
482 => function ($stackPos) {
$attrs = $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes; $attrs['kind'] = strpos($this->semStack[$stackPos-(3-1)], "'") === false ? Scalar\String_::KIND_HEREDOC : Scalar\String_::KIND_NOWDOC; preg_match('/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/', $this->semStack[$stackPos-(3-1)], $matches); $attrs['docLabel'] = $matches[1];;
foreach ($this->semStack[$stackPos-(3-2)] as $s) { if ($s instanceof Node\Scalar\EncapsedStringPart) { $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, null, true); } } $s->value = preg_replace('~(\r\n|\n|\r)\z~', '', $s->value); if ('' === $s->value) array_pop($this->semStack[$stackPos-(3-2)]);; $this->semValue = new Scalar\Encapsed($this->semStack[$stackPos-(3-2)], $attrs);
$this->semValue = $this->parseDocString($this->semStack[$stackPos-(3-1)], $this->semStack[$stackPos-(3-2)], $this->semStack[$stackPos-(3-3)], $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes, $this->startAttributeStack[$stackPos-(3-3)] + $this->endAttributeStack[$stackPos-(3-3)], true);
},
483 => function ($stackPos) {
$this->semValue = array();

View File

@ -2201,20 +2201,17 @@ class Php7 extends \PhpParser\ParserAbstract
$this->semValue = $this->semStack[$stackPos-(1-1)];
},
445 => function ($stackPos) {
$attrs = $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes; $attrs['kind'] = strpos($this->semStack[$stackPos-(3-1)], "'") === false ? Scalar\String_::KIND_HEREDOC : Scalar\String_::KIND_NOWDOC; preg_match('/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/', $this->semStack[$stackPos-(3-1)], $matches); $attrs['docLabel'] = $matches[1];;
$this->semValue = new Scalar\String_(Scalar\String_::parseDocString($this->semStack[$stackPos-(3-1)], $this->semStack[$stackPos-(3-2)]), $attrs);
$this->semValue = $this->parseDocString($this->semStack[$stackPos-(3-1)], $this->semStack[$stackPos-(3-2)], $this->semStack[$stackPos-(3-3)], $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes, $this->startAttributeStack[$stackPos-(3-3)] + $this->endAttributeStack[$stackPos-(3-3)], true);
},
446 => function ($stackPos) {
$attrs = $this->startAttributeStack[$stackPos-(2-1)] + $this->endAttributes; $attrs['kind'] = strpos($this->semStack[$stackPos-(2-1)], "'") === false ? Scalar\String_::KIND_HEREDOC : Scalar\String_::KIND_NOWDOC; preg_match('/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/', $this->semStack[$stackPos-(2-1)], $matches); $attrs['docLabel'] = $matches[1];;
$this->semValue = new Scalar\String_('', $attrs);
$this->semValue = $this->parseDocString($this->semStack[$stackPos-(2-1)], '', $this->semStack[$stackPos-(2-2)], $this->startAttributeStack[$stackPos-(2-1)] + $this->endAttributes, $this->startAttributeStack[$stackPos-(2-2)] + $this->endAttributeStack[$stackPos-(2-2)], true);
},
447 => function ($stackPos) {
$attrs = $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes; $attrs['kind'] = Scalar\String_::KIND_DOUBLE_QUOTED;
foreach ($this->semStack[$stackPos-(3-2)] as $s) { if ($s instanceof Node\Scalar\EncapsedStringPart) { $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, '"', true); } }; $this->semValue = new Scalar\Encapsed($this->semStack[$stackPos-(3-2)], $attrs);
},
448 => function ($stackPos) {
$attrs = $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes; $attrs['kind'] = strpos($this->semStack[$stackPos-(3-1)], "'") === false ? Scalar\String_::KIND_HEREDOC : Scalar\String_::KIND_NOWDOC; preg_match('/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/', $this->semStack[$stackPos-(3-1)], $matches); $attrs['docLabel'] = $matches[1];;
foreach ($this->semStack[$stackPos-(3-2)] as $s) { if ($s instanceof Node\Scalar\EncapsedStringPart) { $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, null, true); } } $s->value = preg_replace('~(\r\n|\n|\r)\z~', '', $s->value); if ('' === $s->value) array_pop($this->semStack[$stackPos-(3-2)]);; $this->semValue = new Scalar\Encapsed($this->semStack[$stackPos-(3-2)], $attrs);
$this->semValue = $this->parseDocString($this->semStack[$stackPos-(3-1)], $this->semStack[$stackPos-(3-2)], $this->semStack[$stackPos-(3-3)], $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes, $this->startAttributeStack[$stackPos-(3-3)] + $this->endAttributeStack[$stackPos-(3-3)], true);
},
449 => function ($stackPos) {
$this->semValue = null;

View File

@ -9,6 +9,7 @@ namespace PhpParser;
use PhpParser\Node\Expr;
use PhpParser\Node\Name;
use PhpParser\Node\Param;
use PhpParser\Node\Scalar\Encapsed;
use PhpParser\Node\Scalar\LNumber;
use PhpParser\Node\Scalar\String_;
use PhpParser\Node\Stmt\Class_;
@ -710,6 +711,69 @@ abstract class ParserAbstract implements Parser
return new LNumber($num, $attributes);
}
protected function stripIndentation(string $string, string $indentation, bool $newlineBefore) {
// TODO
}
protected function parseDocString(
string $startToken, $contents, string $endToken,
array $attributes, array $endTokenAttributes, bool $parseUnicodeEscape
) {
$kind = strpos($startToken, "'") === false
? String_::KIND_HEREDOC : String_::KIND_NOWDOC;
$regex = '/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/';
$result = preg_match($regex, $startToken, $matches);
assert($result === 1);
$label = $matches[1];
$result = preg_match('/\A[ \t]*/', $endToken, $matches);
assert($result === 1);
$indentation = $matches[0];
$attributes['kind'] = $kind;
$attributes['docLabel'] = $label;
$attributes['docIndentation'] = $indentation;
$indentHasSpaces = false !== strpos($indentation, " ");
$indentHasTabs = false !== strpos($indentation, "\t");
if ($indentHasSpaces && $indentHasTabs) {
$this->emitError(new Error(
'Invalid indentation - tabs and spaces cannot be mixed',
$endTokenAttributes
));
// Proceed processing as if this doc string is not indented
$indentation = '';
}
if (\is_string($contents)) {
if ($contents === '') {
return new String_('', $attributes);
}
// strip last newline (thanks tokenizer for sticking it into the string!)
$string = preg_replace('~(\r\n|\n|\r)\z~', '', $contents);
if ($kind === String_::KIND_HEREDOC) {
$string = String_::parseEscapeSequences($string, null, $parseUnicodeEscape);
}
return new String_($string, $attributes);
} else {
foreach ($contents as $s) {
if ($s instanceof Node\Scalar\EncapsedStringPart) {
$s->value = String_::parseEscapeSequences($s->value, null, $parseUnicodeEscape);
}
}
$s->value = preg_replace('~(\r\n|\n|\r)\z~', '', $s->value);
if ('' === $s->value) {
array_pop($contents);
}
return new Encapsed($contents, $attributes);
}
}
protected function checkModifier($a, $b, $modifierPos) {
// Jumping through some hoops here because verifyModifier() is also used elsewhere
try {

View File

@ -0,0 +1,25 @@
Error conditions for flexible doc strings
-----
<?php
<<<A
@@{ "\t" }@@A;
<<<A
FooBar
@@{ "\t" }@@A;
-----
Invalid indentation - tabs and spaces cannot be mixed from 4:1 to 4:3
Invalid indentation - tabs and spaces cannot be mixed from 8:1 to 8:3
array(
0: Stmt_Expression(
expr: Scalar_String(
value:
)
)
1: Stmt_Expression(
expr: Scalar_String(
value: FooBar
)
)
)