lexer = $lexer; $this->errors = array(); $this->throwOnError = isset($options['throwOnError']) ? $options['throwOnError'] : true; } /** * Get array of errors that occurred during the last parse. * * This method may only return multiple errors if the 'throwOnError' option is disabled. * * @return Error[] */ public function getErrors() { return $this->errors; } /** * Parses PHP code into a node tree. * * @param string $code The source code to parse * * @return Node[]|null Array of statements (or null if the 'throwOnError' option is disabled and the parser was * unable to recover from an error). */ public function parse($code) { $this->lexer->startLexing($code); $this->errors = array(); // We start off with no lookahead-token $symbol = self::SYMBOL_NONE; // The attributes for a node are taken from the first and last token of the node. // From the first token only the startAttributes are taken and from the last only // the endAttributes. Both are merged using the array union operator (+). $startAttributes = '*POISON'; $endAttributes = '*POISON'; $this->endAttributes = $endAttributes; // In order to figure out the attributes for the starting token, we have to keep // them in a stack $this->startAttributeStack = array(); // Start off in the initial state and keep a stack of previous states $state = 0; $stateStack = array($state); // Semantic value stack (contains values of tokens and semantic action results) $this->semStack = array(); // Current position in the stack(s) $this->stackPos = 0; $errorState = 0; for (;;) { //$this->traceNewState($state, $symbol); if ($this->actionBase[$state] == 0) { $rule = $this->actionDefault[$state]; } else { if ($symbol === self::SYMBOL_NONE) { // Fetch the next token id from the lexer and fetch additional info by-ref. // The end attributes are fetched into a temporary variable and only set once the token is really // shifted (not during read). Otherwise you would sometimes get off-by-one errors, when a rule is // reduced after a token was read but not yet shifted. $tokenId = $this->lexer->getNextToken($tokenValue, $startAttributes, $endAttributes); // map the lexer token id to the internally used symbols $symbol = $tokenId >= 0 && $tokenId < $this->tokenToSymbolMapSize ? $this->tokenToSymbol[$tokenId] : $this->invalidSymbol; if ($symbol === $this->invalidSymbol) { throw new \RangeException(sprintf( 'The lexer returned an invalid token (id=%d, value=%s)', $tokenId, $tokenValue )); } // This is necessary to assign some meaningful attributes to /* empty */ productions. They'll get // the attributes of the next token, even though they don't contain it themselves. $this->startAttributeStack[$this->stackPos+1] = $startAttributes; //$this->traceRead($symbol); } $idx = $this->actionBase[$state] + $symbol; if ((($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] == $symbol) || ($state < $this->YY2TBLSTATE && ($idx = $this->actionBase[$state + $this->YYNLSTATES] + $symbol) >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] == $symbol)) && ($action = $this->action[$idx]) != $this->defaultAction) { /* * >= YYNLSTATES: shift and reduce * > 0: shift * = 0: accept * < 0: reduce * = -YYUNEXPECTED: error */ if ($action > 0) { /* shift */ //$this->traceShift($symbol); ++$this->stackPos; $stateStack[$this->stackPos] = $state = $action; $this->semStack[$this->stackPos] = $tokenValue; $this->startAttributeStack[$this->stackPos] = $startAttributes; $this->endAttributes = $endAttributes; $symbol = self::SYMBOL_NONE; if ($errorState) { --$errorState; } if ($action < $this->YYNLSTATES) { continue; } /* $yyn >= YYNLSTATES means shift-and-reduce */ $rule = $action - $this->YYNLSTATES; } else { $rule = -$action; } } else { $rule = $this->actionDefault[$state]; } } for (;;) { if ($rule === 0) { /* accept */ //$this->traceAccept(); return $this->semValue; } elseif ($rule !== $this->unexpectedTokenRule) { /* reduce */ //$this->traceReduce($rule); try { $this->{'reduceRule' . $rule}(); } catch (Error $e) { if (-1 === $e->getStartLine() && isset($startAttributes['startLine'])) { $e->setStartLine($startAttributes['startLine']); } $this->errors[] = $e; if ($this->throwOnError) { throw $e; } else { // Currently can't recover from "special" errors return null; } } /* Goto - shift nonterminal */ $this->stackPos -= $this->ruleToLength[$rule]; $nonTerminal = $this->ruleToNonTerminal[$rule]; $idx = $this->gotoBase[$nonTerminal] + $stateStack[$this->stackPos]; if ($idx >= 0 && $idx < $this->gotoTableSize && $this->gotoCheck[$idx] == $nonTerminal) { $state = $this->goto[$idx]; } else { $state = $this->gotoDefault[$nonTerminal]; } ++$this->stackPos; $stateStack[$this->stackPos] = $state; $this->semStack[$this->stackPos] = $this->semValue; } else { /* error */ switch ($errorState) { case 0: $msg = $this->getErrorMessage($symbol, $state); $error = new Error($msg, $startAttributes + $endAttributes); $this->errors[] = $error; if ($this->throwOnError) { throw $error; } // Break missing intentionally case 1: case 2: $errorState = 3; // Pop until error-expecting state uncovered while (!( (($idx = $this->actionBase[$state] + $this->errorSymbol) >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] == $this->errorSymbol) || ($state < $this->YY2TBLSTATE && ($idx = $this->actionBase[$state + $this->YYNLSTATES] + $this->errorSymbol) >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] == $this->errorSymbol) ) || ($action = $this->action[$idx]) == $this->defaultAction) { // Not totally sure about this if ($this->stackPos <= 0) { // Could not recover from error return null; } $state = $stateStack[--$this->stackPos]; //$this->tracePop($state); } //$this->traceShift($this->errorSymbol); $stateStack[++$this->stackPos] = $state = $action; break; case 3: if ($symbol === 0) { // Reached EOF without recovering from error return null; } //$this->traceDiscard($symbol); $symbol = self::SYMBOL_NONE; break 2; } } if ($state < $this->YYNLSTATES) { break; } /* >= YYNLSTATES means shift-and-reduce */ $rule = $state - $this->YYNLSTATES; } } throw new \RuntimeException('Reached end of parser loop'); } protected function getErrorMessage($symbol, $state) { $expectedString = ''; if ($expected = $this->getExpectedTokens($state)) { $expectedString = ', expecting ' . implode(' or ', $expected); } return 'Syntax error, unexpected ' . $this->symbolToName[$symbol] . $expectedString; } protected function getExpectedTokens($state) { $expected = array(); $base = $this->actionBase[$state]; foreach ($this->symbolToName as $symbol => $name) { $idx = $base + $symbol; if ($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol || $state < $this->YY2TBLSTATE && ($idx = $this->actionBase[$state + $this->YYNLSTATES] + $symbol) >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol ) { if ($this->action[$idx] != $this->unexpectedTokenRule) { if (count($expected) == 4) { /* Too many expected tokens */ return array(); } $expected[] = $name; } } } return $expected; } /* * Tracing functions used for debugging the parser. */ protected function traceNewState($state, $symbol) { echo '% State ' . $state . ', Lookahead ' . ($symbol == self::SYMBOL_NONE ? '--none--' : $this->symbolToName[$symbol]) . "\n"; } protected function traceRead($symbol) { echo '% Reading ' . $this->symbolToName[$symbol] . "\n"; } protected function traceShift($symbol) { echo '% Shift ' . $this->symbolToName[$symbol] . "\n"; } protected function traceAccept() { echo "% Accepted.\n"; } protected function traceReduce($n) { echo '% Reduce by (' . $n . ') ' . $this->productions[$n] . "\n"; } protected function tracePop($state) { echo '% Recovering, uncovered state ' . $state . "\n"; } protected function traceDiscard($symbol) { echo '% Discard ' . $this->symbolToName[$symbol] . "\n"; } /* * Helper functions invoked by semantic actions */ /** * Moves statements of semicolon-style namespaces into $ns->stmts and checks various error conditions. * * @param Node[] $stmts * @return Node[] */ protected function handleNamespaces(array $stmts) { $style = $this->getNamespacingStyle($stmts); if (null === $style) { // not namespaced, nothing to do return $stmts; } elseif ('brace' === $style) { // For braced namespaces we only have to check that there are no invalid statements between the namespaces $afterFirstNamespace = false; foreach ($stmts as $stmt) { if ($stmt instanceof Node\Stmt\Namespace_) { $afterFirstNamespace = true; } elseif (!$stmt instanceof Node\Stmt\HaltCompiler && $afterFirstNamespace) { throw new Error('No code may exist outside of namespace {}', $stmt->getLine()); } } return $stmts; } else { // For semicolon namespaces we have to move the statements after a namespace declaration into ->stmts $resultStmts = array(); $targetStmts =& $resultStmts; foreach ($stmts as $stmt) { if ($stmt instanceof Node\Stmt\Namespace_) { $stmt->stmts = array(); $targetStmts =& $stmt->stmts; $resultStmts[] = $stmt; } elseif ($stmt instanceof Node\Stmt\HaltCompiler) { // __halt_compiler() is not moved into the namespace $resultStmts[] = $stmt; } else { $targetStmts[] = $stmt; } } return $resultStmts; } } private function getNamespacingStyle(array $stmts) { $style = null; $hasNotAllowedStmts = false; foreach ($stmts as $stmt) { if ($stmt instanceof Node\Stmt\Namespace_) { $currentStyle = null === $stmt->stmts ? 'semicolon' : 'brace'; if (null === $style) { $style = $currentStyle; if ($hasNotAllowedStmts) { throw new Error('Namespace declaration statement has to be the very first statement in the script', $stmt->getLine()); } } elseif ($style !== $currentStyle) { throw new Error('Cannot mix bracketed namespace declarations with unbracketed namespace declarations', $stmt->getLine()); } } elseif (!$stmt instanceof Node\Stmt\Declare_ && !$stmt instanceof Node\Stmt\HaltCompiler) { $hasNotAllowedStmts = true; } } return $style; } }