diff --git a/grammar/phpyLang.php b/grammar/phpyLang.php new file mode 100644 index 0000000..1a9808d --- /dev/null +++ b/grammar/phpyLang.php @@ -0,0 +1,192 @@ +\'[^\\\\\']*+(?:\\\\.[^\\\\\']*+)*+\') + (?"[^\\\\"]*+(?:\\\\.[^\\\\"]*+)*+") + (?(?&singleQuotedString)|(?&doubleQuotedString)) + (?/\*[^*]*+(?:\*(?!/)[^*]*+)*+\*/) + (?\{[^\'"/{}]*+(?:(?:(?&string)|(?&comment)|(?&code)|/)[^\'"/{}]*+)*+}) +)'; + +const PARAMS = '\[(?[^[\]]*+(?:\[(?¶ms)\][^[\]]*+)*+)\]'; +const ARGS = '\((?[^()]*+(?:\((?&args)\)[^()]*+)*+)\)'; + +/////////////////////////////// +/// Preprocessing functions /// +/////////////////////////////// + +function preprocessGrammar($code) { + $code = resolveNodes($code); + $code = resolveMacros($code); + $code = resolveStackAccess($code); + + return $code; +} + +function resolveNodes($code) { + return preg_replace_callback( + '~\b(?[A-Z][a-zA-Z_\\\\]++)\s*' . PARAMS . '~', + function($matches) { + // recurse + $matches['params'] = resolveNodes($matches['params']); + + $params = magicSplit( + '(?:' . PARAMS . '|' . ARGS . ')(*SKIP)(*FAIL)|,', + $matches['params'] + ); + + $paramCode = ''; + foreach ($params as $param) { + $paramCode .= $param . ', '; + } + + return 'new ' . $matches['name'] . '(' . $paramCode . 'attributes())'; + }, + $code + ); +} + +function resolveMacros($code) { + return preg_replace_callback( + '~\b(?)(?!array\()(?[a-z][A-Za-z]++)' . ARGS . '~', + function($matches) { + // recurse + $matches['args'] = resolveMacros($matches['args']); + + $name = $matches['name']; + $args = magicSplit( + '(?:' . PARAMS . '|' . ARGS . ')(*SKIP)(*FAIL)|,', + $matches['args'] + ); + + if ('attributes' === $name) { + assertArgs(0, $args, $name); + return '$this->startAttributeStack[#1] + $this->endAttributes'; + } + + if ('stackAttributes' === $name) { + assertArgs(1, $args, $name); + return '$this->startAttributeStack[' . $args[0] . ']' + . ' + $this->endAttributeStack[' . $args[0] . ']'; + } + + if ('init' === $name) { + return '$$ = array(' . implode(', ', $args) . ')'; + } + + if ('push' === $name) { + assertArgs(2, $args, $name); + + return $args[0] . '[] = ' . $args[1] . '; $$ = ' . $args[0]; + } + + if ('pushNormalizing' === $name) { + assertArgs(2, $args, $name); + + return 'if (is_array(' . $args[1] . ')) { $$ = array_merge(' . $args[0] . ', ' . $args[1] . '); }' + . ' else { ' . $args[0] . '[] = ' . $args[1] . '; $$ = ' . $args[0] . '; }'; + } + + if ('toArray' == $name) { + assertArgs(1, $args, $name); + + return 'is_array(' . $args[0] . ') ? ' . $args[0] . ' : array(' . $args[0] . ')'; + } + + if ('parseVar' === $name) { + assertArgs(1, $args, $name); + + return 'substr(' . $args[0] . ', 1)'; + } + + if ('parseEncapsed' === $name) { + assertArgs(3, $args, $name); + + return 'foreach (' . $args[0] . ' as $s) { if ($s instanceof Node\Scalar\EncapsedStringPart) {' + . ' $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, ' . $args[1] . ', ' . $args[2] . '); } }'; + } + + if ('makeNop' === $name) { + assertArgs(3, $args, $name); + + return '$startAttributes = ' . $args[1] . ';' + . ' if (isset($startAttributes[\'comments\']))' + . ' { ' . $args[0] . ' = new Stmt\Nop($startAttributes + ' . $args[2] . '); }' + . ' else { ' . $args[0] . ' = null; }'; + } + + if ('makeZeroLengthNop' == $name) { + assertArgs(2, $args, $name); + + return '$startAttributes = ' . $args[1] . ';' + . ' if (isset($startAttributes[\'comments\']))' + . ' { ' . $args[0] . ' = new Stmt\Nop($this->createCommentNopAttributes($startAttributes[\'comments\'])); }' + . ' else { ' . $args[0] . ' = null; }'; + } + + if ('strKind' === $name) { + assertArgs(1, $args, $name); + + return '(' . $args[0] . '[0] === "\'" || (' . $args[0] . '[1] === "\'" && ' + . '(' . $args[0] . '[0] === \'b\' || ' . $args[0] . '[0] === \'B\')) ' + . '? Scalar\String_::KIND_SINGLE_QUOTED : Scalar\String_::KIND_DOUBLE_QUOTED)'; + } + + if ('prependLeadingComments' === $name) { + assertArgs(1, $args, $name); + + return '$attrs = $this->startAttributeStack[#1]; $stmts = ' . $args[0] . '; ' + . 'if (!empty($attrs[\'comments\'])) {' + . '$stmts[0]->setAttribute(\'comments\', ' + . 'array_merge($attrs[\'comments\'], $stmts[0]->getAttribute(\'comments\', []))); }'; + } + + return $matches[0]; + }, + $code + ); +} + +function assertArgs($num, $args, $name) { + if ($num != count($args)) { + die('Wrong argument count for ' . $name . '().'); + } +} + +function resolveStackAccess($code) { + $code = preg_replace('/\$\d+/', '$this->semStack[$0]', $code); + $code = preg_replace('/#(\d+)/', '$$1', $code); + return $code; +} + +function removeTrailingWhitespace($code) { + $lines = explode("\n", $code); + $lines = array_map('rtrim', $lines); + return implode("\n", $lines); +} + +////////////////////////////// +/// Regex helper functions /// +////////////////////////////// + +function regex($regex) { + return '~' . LIB . '(?:' . str_replace('~', '\~', $regex) . ')~'; +} + +function magicSplit($regex, $string) { + $pieces = preg_split(regex('(?:(?&string)|(?&comment)|(?&code))(*SKIP)(*FAIL)|' . $regex), $string); + + foreach ($pieces as &$piece) { + $piece = trim($piece); + } + + if ($pieces === ['']) { + return []; + } + + return $pieces; +} diff --git a/grammar/rebuildParsers.php b/grammar/rebuildParsers.php index 88a53f1..2d0c6b1 100644 --- a/grammar/rebuildParsers.php +++ b/grammar/rebuildParsers.php @@ -1,5 +1,7 @@ 'Php5', __DIR__ . '/php7.y' => 'Php7', @@ -23,21 +25,6 @@ $options = array_flip($argv); $optionDebug = isset($options['--debug']); $optionKeepTmpGrammar = isset($options['--keep-tmp-grammar']); -/////////////////////////////// -/// Utility regex constants /// -/////////////////////////////// - -const LIB = '(?(DEFINE) - (?\'[^\\\\\']*+(?:\\\\.[^\\\\\']*+)*+\') - (?"[^\\\\"]*+(?:\\\\.[^\\\\"]*+)*+") - (?(?&singleQuotedString)|(?&doubleQuotedString)) - (?/\*[^*]*+(?:\*(?!/)[^*]*+)*+\*/) - (?\{[^\'"/{}]*+(?:(?:(?&string)|(?&comment)|(?&code)|/)[^\'"/{}]*+)*+}) -)'; - -const PARAMS = '\[(?[^[\]]*+(?:\[(?¶ms)\][^[\]]*+)*+)\]'; -const ARGS = '\((?[^()]*+(?:\((?&args)\)[^()]*+)*+)\)'; - /////////////////// /// Main script /// /////////////////// @@ -49,10 +36,7 @@ foreach ($grammarFileToName as $grammarFile => $name) { $grammarCode = file_get_contents($grammarFile); $grammarCode = str_replace('%tokens', $tokens, $grammarCode); - - $grammarCode = resolveNodes($grammarCode); - $grammarCode = resolveMacros($grammarCode); - $grammarCode = resolveStackAccess($grammarCode); + $grammarCode = preprocessGrammar($grammarCode); file_put_contents($tmpGrammarFile, $grammarCode); @@ -77,151 +61,9 @@ foreach ($grammarFileToName as $grammarFile => $name) { } } -/////////////////////////////// -/// Preprocessing functions /// -/////////////////////////////// - -function resolveNodes($code) { - return preg_replace_callback( - '~\b(?[A-Z][a-zA-Z_\\\\]++)\s*' . PARAMS . '~', - function($matches) { - // recurse - $matches['params'] = resolveNodes($matches['params']); - - $params = magicSplit( - '(?:' . PARAMS . '|' . ARGS . ')(*SKIP)(*FAIL)|,', - $matches['params'] - ); - - $paramCode = ''; - foreach ($params as $param) { - $paramCode .= $param . ', '; - } - - return 'new ' . $matches['name'] . '(' . $paramCode . 'attributes())'; - }, - $code - ); -} - -function resolveMacros($code) { - return preg_replace_callback( - '~\b(?)(?!array\()(?[a-z][A-Za-z]++)' . ARGS . '~', - function($matches) { - // recurse - $matches['args'] = resolveMacros($matches['args']); - - $name = $matches['name']; - $args = magicSplit( - '(?:' . PARAMS . '|' . ARGS . ')(*SKIP)(*FAIL)|,', - $matches['args'] - ); - - if ('attributes' === $name) { - assertArgs(0, $args, $name); - return '$this->startAttributeStack[#1] + $this->endAttributes'; - } - - if ('stackAttributes' === $name) { - assertArgs(1, $args, $name); - return '$this->startAttributeStack[' . $args[0] . ']' - . ' + $this->endAttributeStack[' . $args[0] . ']'; - } - - if ('init' === $name) { - return '$$ = array(' . implode(', ', $args) . ')'; - } - - if ('push' === $name) { - assertArgs(2, $args, $name); - - return $args[0] . '[] = ' . $args[1] . '; $$ = ' . $args[0]; - } - - if ('pushNormalizing' === $name) { - assertArgs(2, $args, $name); - - return 'if (is_array(' . $args[1] . ')) { $$ = array_merge(' . $args[0] . ', ' . $args[1] . '); }' - . ' else { ' . $args[0] . '[] = ' . $args[1] . '; $$ = ' . $args[0] . '; }'; - } - - if ('toArray' == $name) { - assertArgs(1, $args, $name); - - return 'is_array(' . $args[0] . ') ? ' . $args[0] . ' : array(' . $args[0] . ')'; - } - - if ('parseVar' === $name) { - assertArgs(1, $args, $name); - - return 'substr(' . $args[0] . ', 1)'; - } - - if ('parseEncapsed' === $name) { - assertArgs(3, $args, $name); - - return 'foreach (' . $args[0] . ' as $s) { if ($s instanceof Node\Scalar\EncapsedStringPart) {' - . ' $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, ' . $args[1] . ', ' . $args[2] . '); } }'; - } - - if ('makeNop' === $name) { - assertArgs(3, $args, $name); - - return '$startAttributes = ' . $args[1] . ';' - . ' if (isset($startAttributes[\'comments\']))' - . ' { ' . $args[0] . ' = new Stmt\Nop($startAttributes + ' . $args[2] . '); }' - . ' else { ' . $args[0] . ' = null; }'; - } - - if ('makeZeroLengthNop' == $name) { - assertArgs(2, $args, $name); - - return '$startAttributes = ' . $args[1] . ';' - . ' if (isset($startAttributes[\'comments\']))' - . ' { ' . $args[0] . ' = new Stmt\Nop($this->createCommentNopAttributes($startAttributes[\'comments\'])); }' - . ' else { ' . $args[0] . ' = null; }'; - } - - if ('strKind' === $name) { - assertArgs(1, $args, $name); - - return '(' . $args[0] . '[0] === "\'" || (' . $args[0] . '[1] === "\'" && ' - . '(' . $args[0] . '[0] === \'b\' || ' . $args[0] . '[0] === \'B\')) ' - . '? Scalar\String_::KIND_SINGLE_QUOTED : Scalar\String_::KIND_DOUBLE_QUOTED)'; - } - - if ('prependLeadingComments' === $name) { - assertArgs(1, $args, $name); - - return '$attrs = $this->startAttributeStack[#1]; $stmts = ' . $args[0] . '; ' - . 'if (!empty($attrs[\'comments\'])) {' - . '$stmts[0]->setAttribute(\'comments\', ' - . 'array_merge($attrs[\'comments\'], $stmts[0]->getAttribute(\'comments\', []))); }'; - } - - return $matches[0]; - }, - $code - ); -} - -function assertArgs($num, $args, $name) { - if ($num != count($args)) { - die('Wrong argument count for ' . $name . '().'); - } -} - -function resolveStackAccess($code) { - $code = preg_replace('/\$\d+/', '$this->semStack[$0]', $code); - $code = preg_replace('/#(\d+)/', '$$1', $code); - return $code; -} - -function removeTrailingWhitespace($code) { - $lines = explode("\n", $code); - $lines = array_map('rtrim', $lines); - return implode("\n", $lines); -} +//////////////////////////////// +/// Utility helper functions /// +//////////////////////////////// function ensureDirExists($dir) { if (!is_dir($dir)) { @@ -237,25 +79,3 @@ function execCmd($cmd) { } return $output; } - -////////////////////////////// -/// Regex helper functions /// -////////////////////////////// - -function regex($regex) { - return '~' . LIB . '(?:' . str_replace('~', '\~', $regex) . ')~'; -} - -function magicSplit($regex, $string) { - $pieces = preg_split(regex('(?:(?&string)|(?&comment)|(?&code))(*SKIP)(*FAIL)|' . $regex), $string); - - foreach ($pieces as &$piece) { - $piece = trim($piece); - } - - if ($pieces === ['']) { - return []; - } - - return $pieces; -}