Flexible doc: Validate and strip indentation

If indentation is invalid, we strip on a best-effort basis.

The error position information is not great, but I don't want to
introduce sub-token error positioning at this point in time.
This commit is contained in:
Nikita Popov 2018-09-21 17:26:47 +02:00
parent 5f73c4de80
commit 0ed9065b4c
3 changed files with 283 additions and 15 deletions

View File

@ -711,8 +711,36 @@ abstract class ParserAbstract implements Parser
return new LNumber($num, $attributes);
}
protected function stripIndentation(string $string, string $indentation, bool $newlineBefore) {
// TODO
protected function stripIndentation(
string $string, int $indentLen, string $indentChar,
bool $newlineAtStart, bool $newlineAtEnd, array $attributes
) {
if ($indentLen === 0) {
return $string;
}
$start = $newlineAtStart ? '(?:(?<=\n)|\A)' : '(?<=\n)';
$end = $newlineAtEnd ? '(?:(?=[\r\n])|\z)' : '(?=[\r\n])';
$regex = '/' . $start . '([ \t]*)(' . $end . ')?/';
return preg_replace_callback(
$regex,
function ($matches) use ($indentLen, $indentChar, $attributes) {
$prefix = substr($matches[1], 0, $indentLen);
if (false !== strpos($prefix, $indentChar === " " ? "\t" : " ")) {
$this->emitError(new Error(
'Invalid indentation - tabs and spaces cannot be mixed', $attributes
));
} elseif (strlen($prefix) < $indentLen && !isset($matches[2])) {
$this->emitError(new Error(
'Invalid body indentation level ' .
'(expecting an indentation level of at least ' . $indentLen . ')',
$attributes
));
}
return substr($matches[0], strlen($prefix));
},
$string
);
}
protected function parseDocString(
@ -747,22 +775,39 @@ abstract class ParserAbstract implements Parser
$indentation = '';
}
$indentLen = \strlen($indentation);
$indentChar = $indentHasSpaces ? " " : "\t";
if (\is_string($contents)) {
if ($contents === '') {
return new String_('', $attributes);
}
// strip last newline (thanks tokenizer for sticking it into the string!)
$string = preg_replace('~(\r\n|\n|\r)\z~', '', $contents);
$contents = $this->stripIndentation(
$contents, $indentLen, $indentChar, true, true, $attributes
);
$contents = preg_replace('~(\r\n|\n|\r)\z~', '', $contents);
if ($kind === String_::KIND_HEREDOC) {
$string = String_::parseEscapeSequences($string, null, $parseUnicodeEscape);
$contents = String_::parseEscapeSequences($contents, null, $parseUnicodeEscape);
}
return new String_($string, $attributes);
return new String_($contents, $attributes);
} else {
foreach ($contents as $s) {
assert(count($contents) > 0);
if (!$contents[0] instanceof Node\Scalar\EncapsedStringPart) {
// If there is no leading encapsed string part, pretend there is an empty one
$this->stripIndentation(
'', $indentLen, $indentChar, true, false, $contents[0]->getAttributes()
);
}
foreach ($contents as $i => $s) {
if ($s instanceof Node\Scalar\EncapsedStringPart) {
$s->value = $this->stripIndentation(
$s->value, $indentLen, $indentChar,
$i === 0, $i === \count($contents) - 1, $s->getAttributes()
);
$s->value = String_::parseEscapeSequences($s->value, null, $parseUnicodeEscape);
}
}

View File

@ -6,11 +6,62 @@ $ary = [
<<<FOO
Test
FOO,
// Value here is wrong
<<<'BAR'
Test
BAR,
];
<<<'END'
END;
<<<END
END;
<<<END
@@{ " " }@@
END;
<<<'END'
a
b
c
d
e
END;
<<<END
a
b
$test
d
e
END;
<<<'END'
a
b
c
d
e
END;
<<<END
a\r\n
\ta\n
b\r\n
$test\n
d\r\n
e\n
END;
-----
array(
0: Stmt_Expression(
@ -30,18 +81,98 @@ array(
1: Expr_ArrayItem(
key: null
value: Scalar_String(
value: Test
comments: array(
0: // Value here is wrong
)
value: Test
)
byRef: false
comments: array(
0: // Value here is wrong
)
)
)
)
)
)
1: Stmt_Expression(
expr: Scalar_String(
value:
)
)
2: Stmt_Expression(
expr: Scalar_String(
value:
)
)
3: Stmt_Expression(
expr: Scalar_String(
value:
)
)
4: Stmt_Expression(
expr: Scalar_String(
value: a
b
c
d
e
)
)
5: Stmt_Expression(
expr: Scalar_Encapsed(
parts: array(
0: Scalar_EncapsedStringPart(
value: a
b
)
1: Expr_Variable(
name: test
)
2: Scalar_EncapsedStringPart(
value:
d
e
)
)
)
)
6: Stmt_Expression(
expr: Scalar_String(
value:
a
b
c
d
e
)
)
7: Stmt_Expression(
expr: Scalar_Encapsed(
parts: array(
0: Scalar_EncapsedStringPart(
value: a
@@{ "\t" }@@a
b
)
1: Expr_Variable(
name: test
)
2: Scalar_EncapsedStringPart(
value:
d
e
)
)
)
)
)

View File

@ -8,9 +8,43 @@ Error conditions for flexible doc strings
<<<A
FooBar
@@{ "\t" }@@A;
echo <<<END
@@{ "\t" }@@ X
@@{ "\t\t" }@@END;
echo <<<END
a
b
c
END;
<<<END
\ta
@@{ "\t" }@@END;
<<<TEST
Foo
$var
TEST;
<<<TEST
$var
TEST;
echo <<<END
a
$a
END;
-----
Invalid indentation - tabs and spaces cannot be mixed from 4:1 to 4:3
Invalid indentation - tabs and spaces cannot be mixed from 8:1 to 8:3
Invalid indentation - tabs and spaces cannot be mixed from 10:6 to 12:5
Invalid body indentation level (expecting an indentation level of at least 5) from 14:6 to 18:8
Invalid body indentation level (expecting an indentation level of at least 1) from 20:1 to 22:4
Invalid body indentation level (expecting an indentation level of at least 2) from 25:1 to 26:0
Invalid body indentation level (expecting an indentation level of at least 1) from 30:1 to 30:4
Invalid body indentation level (expecting an indentation level of at least 1) from 34:1 to 35:0
array(
0: Stmt_Expression(
expr: Scalar_String(
@ -22,4 +56,62 @@ array(
value: FooBar
)
)
2: Stmt_Echo(
exprs: array(
0: Scalar_String(
value: X
)
)
)
3: Stmt_Echo(
exprs: array(
0: Scalar_String(
value: a
b
c
)
)
)
4: Stmt_Expression(
expr: Scalar_String(
value: a
)
)
5: Stmt_Expression(
expr: Scalar_Encapsed(
parts: array(
0: Scalar_EncapsedStringPart(
value: Foo
)
1: Expr_Variable(
name: var
)
)
)
)
6: Stmt_Expression(
expr: Scalar_Encapsed(
parts: array(
0: Expr_Variable(
name: var
)
)
)
)
7: Stmt_Echo(
exprs: array(
0: Scalar_Encapsed(
parts: array(
0: Scalar_EncapsedStringPart(
value: a
)
1: Expr_Variable(
name: a
)
)
)
)
)
)