Make sure match is one character long

This commit is contained in:
Nikita Popov 2021-04-25 21:47:07 +02:00
parent 33d7c8d3d8
commit ce91d139b5
2 changed files with 6 additions and 3 deletions

View File

@ -999,13 +999,13 @@ class Standard extends PrettyPrinterAbstract
}
// Escape control characters and non-UTF-8 characters.
// Regex taken from https://stackoverflow.com/a/11709412/385378.
// Regex based on https://stackoverflow.com/a/11709412/385378.
$regex = '/(
[\x00-\x08\x0E-\x1F] # Control characters
| [\xC0-\xC1] # Invalid UTF-8 Bytes
| [\xF5-\xFF] # Invalid UTF-8 Bytes
| \xE0[\x80-\x9F] # Overlong encoding of prior code point
| \xF0[\x80-\x8F] # Overlong encoding of prior code point
| \xE0(?=[\x80-\x9F]) # Overlong encoding of prior code point
| \xF0(?=[\x80-\x8F]) # Overlong encoding of prior code point
| [\xC2-\xDF](?![\x80-\xBF]) # Invalid UTF-8 Sequence Start
| [\xE0-\xEF](?![\x80-\xBF]{2}) # Invalid UTF-8 Sequence Start
| [\xF0-\xF4](?![\x80-\xBF]{3}) # Invalid UTF-8 Sequence Start
@ -1016,6 +1016,7 @@ class Standard extends PrettyPrinterAbstract
| (?<=[\xF0-\xF4][\x80-\xBF])[\x80-\xBF](?![\x80-\xBF]) # Short 4 byte sequence (2)
)/x';
return preg_replace_callback($regex, function ($matches) {
assert(strlen($matches[0]) === 1);
$hex = dechex(ord($matches[0]));;
return '\\x' . str_pad($hex, 2, '0', \STR_PAD_LEFT);
}, $escaped);

View File

@ -7,6 +7,7 @@ Escape sequences in double-quoted strings
"äöü";
"\xc0\x80";
"\xd0\x01";
"\xf0\x80\x80";
<<<DOC
\n\r\t\f\v\$\"\\
@ -22,6 +23,7 @@ DOC;
"äöü";
"\xc0\x80";
"\xd0\x01";
"\xf0\x80\x80";
<<<DOC
@@{ "\n\r" }@@\t\f\v\$\\"\\
\x00\x01\x02\x03\x04\x05\x06\x07\x08\t@@{ "\n" }@@\v\f@@{ "\r" }@@\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f