2014-04-20 00:08:59 +02:00
|
|
|
<?php
|
|
|
|
|
|
|
|
namespace PhpParser;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This parser is based on a skeleton written by Moriyoshi Koizumi, which in
|
|
|
|
* turn is based on work by Masato Bito.
|
|
|
|
*/
|
|
|
|
abstract class ParserAbstract
|
|
|
|
{
|
|
|
|
/* The following dummy data must be provided by the extending class: */
|
|
|
|
|
|
|
|
const TOKEN_INVALID = 0;
|
|
|
|
const TOKEN_MAP_SIZE = 0;
|
|
|
|
|
|
|
|
const YYLAST = 0;
|
|
|
|
const YY2TBLSTATE = 0;
|
|
|
|
const YYGLAST = 0;
|
|
|
|
const YYNLSTATES = 0;
|
|
|
|
const YYUNEXPECTED = 0;
|
|
|
|
const YYDEFAULT = 0;
|
|
|
|
|
|
|
|
/* @var array Map of token ids to their respective names */
|
2014-04-20 00:34:31 +02:00
|
|
|
protected $terminals;
|
2014-04-20 00:08:59 +02:00
|
|
|
/* @var array Map which translates lexer tokens to internal tokens */
|
2014-04-20 00:34:31 +02:00
|
|
|
protected $translate;
|
|
|
|
|
|
|
|
protected $yyaction;
|
|
|
|
protected $yycheck;
|
|
|
|
protected $yybase;
|
|
|
|
protected $yydefault;
|
|
|
|
protected $yygoto;
|
|
|
|
protected $yygcheck;
|
|
|
|
protected $yygbase;
|
|
|
|
protected $yygdefault;
|
|
|
|
protected $yylhs;
|
|
|
|
protected $yylen;
|
2014-04-20 00:08:59 +02:00
|
|
|
|
|
|
|
/* This is optional data only necessary when debugging */
|
2014-04-20 00:34:31 +02:00
|
|
|
protected $yyproduction;
|
2014-04-20 00:08:59 +02:00
|
|
|
|
|
|
|
/* End of dummy data */
|
|
|
|
|
|
|
|
const TOKEN_NONE = -1;
|
|
|
|
|
|
|
|
protected $yyval;
|
|
|
|
protected $yyastk;
|
|
|
|
protected $stackPos;
|
|
|
|
protected $lexer;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Creates a parser instance.
|
|
|
|
*
|
|
|
|
* @param Lexer $lexer A lexer
|
|
|
|
*/
|
|
|
|
public function __construct(Lexer $lexer) {
|
|
|
|
$this->lexer = $lexer;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parses PHP code into a node tree.
|
|
|
|
*
|
|
|
|
* @param string $code The source code to parse
|
|
|
|
*
|
|
|
|
* @return Node[] Array of statements
|
|
|
|
*/
|
|
|
|
public function parse($code) {
|
|
|
|
$this->lexer->startLexing($code);
|
|
|
|
|
|
|
|
// We start off with no lookahead-token
|
|
|
|
$tokenId = self::TOKEN_NONE;
|
|
|
|
|
|
|
|
// The attributes for a node are taken from the first and last token of the node.
|
|
|
|
// From the first token only the startAttributes are taken and from the last only
|
|
|
|
// the endAttributes. Both are merged using the array union operator (+).
|
|
|
|
$startAttributes = array('startLine' => 1);
|
|
|
|
$endAttributes = array();
|
|
|
|
|
|
|
|
// In order to figure out the attributes for the starting token, we have to keep
|
|
|
|
// them in a stack
|
|
|
|
$attributeStack = array($startAttributes);
|
|
|
|
|
|
|
|
// Start off in the initial state and keep a stack of previous states
|
|
|
|
$state = 0;
|
|
|
|
$stateStack = array($state);
|
|
|
|
|
|
|
|
// AST stack (?)
|
|
|
|
$this->yyastk = array();
|
|
|
|
|
|
|
|
// Current position in the stack(s)
|
|
|
|
$this->stackPos = 0;
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
//$this->traceNewState($state, $tokenId);
|
|
|
|
|
2014-04-20 00:34:31 +02:00
|
|
|
if ($this->yybase[$state] == 0) {
|
|
|
|
$yyn = $this->yydefault[$state];
|
2014-04-20 00:08:59 +02:00
|
|
|
} else {
|
|
|
|
if ($tokenId === self::TOKEN_NONE) {
|
|
|
|
// Fetch the next token id from the lexer and fetch additional info by-ref.
|
|
|
|
// The end attributes are fetched into a temporary variable and only set once the token is really
|
|
|
|
// shifted (not during read). Otherwise you would sometimes get off-by-one errors, when a rule is
|
|
|
|
// reduced after a token was read but not yet shifted.
|
|
|
|
$origTokenId = $this->lexer->getNextToken($tokenValue, $startAttributes, $nextEndAttributes);
|
|
|
|
|
|
|
|
// map the lexer token id to the internally used token id's
|
|
|
|
$tokenId = $origTokenId >= 0 && $origTokenId < static::TOKEN_MAP_SIZE
|
2014-04-20 00:34:31 +02:00
|
|
|
? $this->translate[$origTokenId]
|
2014-04-20 00:08:59 +02:00
|
|
|
: static::TOKEN_INVALID;
|
|
|
|
|
|
|
|
if ($tokenId === static::TOKEN_INVALID) {
|
|
|
|
throw new \RangeException(sprintf(
|
|
|
|
'The lexer returned an invalid token (id=%d, value=%s)',
|
|
|
|
$origTokenId, $tokenValue
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
|
|
|
$attributeStack[$this->stackPos] = $startAttributes;
|
|
|
|
|
|
|
|
//$this->traceRead($tokenId);
|
|
|
|
}
|
|
|
|
|
2014-04-20 00:34:31 +02:00
|
|
|
if ((($yyn = $this->yybase[$state] + $tokenId) >= 0
|
|
|
|
&& $yyn < static::YYLAST && $this->yycheck[$yyn] == $tokenId
|
2014-04-20 00:08:59 +02:00
|
|
|
|| ($state < static::YY2TBLSTATE
|
2014-04-20 00:34:31 +02:00
|
|
|
&& ($yyn = $this->yybase[$state + static::YYNLSTATES] + $tokenId) >= 0
|
2014-04-20 00:08:59 +02:00
|
|
|
&& $yyn < static::YYLAST
|
2014-04-20 00:34:31 +02:00
|
|
|
&& $this->yycheck[$yyn] == $tokenId))
|
|
|
|
&& ($yyn = $this->yyaction[$yyn]) != static::YYDEFAULT) {
|
2014-04-20 00:08:59 +02:00
|
|
|
/*
|
|
|
|
* >= YYNLSTATE: shift and reduce
|
|
|
|
* > 0: shift
|
|
|
|
* = 0: accept
|
|
|
|
* < 0: reduce
|
|
|
|
* = -YYUNEXPECTED: error
|
|
|
|
*/
|
|
|
|
if ($yyn > 0) {
|
|
|
|
/* shift */
|
|
|
|
//$this->traceShift($tokenId);
|
|
|
|
|
|
|
|
++$this->stackPos;
|
|
|
|
$stateStack[$this->stackPos] = $state = $yyn;
|
|
|
|
$this->yyastk[$this->stackPos] = $tokenValue;
|
|
|
|
$attributeStack[$this->stackPos] = $startAttributes;
|
|
|
|
$endAttributes = $nextEndAttributes;
|
|
|
|
$tokenId = self::TOKEN_NONE;
|
|
|
|
|
|
|
|
if ($yyn < static::YYNLSTATES)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* $yyn >= YYNLSTATES means shift-and-reduce */
|
|
|
|
$yyn -= static::YYNLSTATES;
|
|
|
|
} else {
|
|
|
|
$yyn = -$yyn;
|
|
|
|
}
|
|
|
|
} else {
|
2014-04-20 00:34:31 +02:00
|
|
|
$yyn = $this->yydefault[$state];
|
2014-04-20 00:08:59 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
/* reduce/error */
|
|
|
|
if ($yyn == 0) {
|
|
|
|
/* accept */
|
|
|
|
// $this->traceAccept();
|
|
|
|
return $this->yyval;
|
|
|
|
} elseif ($yyn != static::YYUNEXPECTED) {
|
|
|
|
/* reduce */
|
|
|
|
// $this->traceReduce($yyn);
|
|
|
|
|
|
|
|
try {
|
|
|
|
$this->{'yyn' . $yyn}(
|
2014-04-20 00:34:31 +02:00
|
|
|
$attributeStack[$this->stackPos - $this->yylen[$yyn]]
|
2014-04-20 00:08:59 +02:00
|
|
|
+ $endAttributes
|
|
|
|
);
|
|
|
|
} catch (Error $e) {
|
|
|
|
if (-1 === $e->getRawLine()) {
|
|
|
|
$e->setRawLine($startAttributes['startLine']);
|
|
|
|
}
|
|
|
|
|
|
|
|
throw $e;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Goto - shift nonterminal */
|
2014-04-20 00:34:31 +02:00
|
|
|
$this->stackPos -= $this->yylen[$yyn];
|
|
|
|
$yyn = $this->yylhs[$yyn];
|
|
|
|
if (($yyp = $this->yygbase[$yyn] + $stateStack[$this->stackPos]) >= 0
|
2014-04-20 00:08:59 +02:00
|
|
|
&& $yyp < static::YYGLAST
|
2014-04-20 00:34:31 +02:00
|
|
|
&& $this->yygcheck[$yyp] == $yyn) {
|
|
|
|
$state = $this->yygoto[$yyp];
|
2014-04-20 00:08:59 +02:00
|
|
|
} else {
|
2014-04-20 00:34:31 +02:00
|
|
|
$state = $this->yygdefault[$yyn];
|
2014-04-20 00:08:59 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
++$this->stackPos;
|
|
|
|
|
|
|
|
$stateStack[$this->stackPos] = $state;
|
|
|
|
$this->yyastk[$this->stackPos] = $this->yyval;
|
|
|
|
$attributeStack[$this->stackPos] = $startAttributes;
|
|
|
|
} else {
|
|
|
|
/* error */
|
|
|
|
$expected = array();
|
|
|
|
|
2014-04-20 00:34:31 +02:00
|
|
|
$base = $this->yybase[$state];
|
2014-04-20 00:08:59 +02:00
|
|
|
for ($i = 0; $i < static::TOKEN_MAP_SIZE; ++$i) {
|
|
|
|
$n = $base + $i;
|
2014-04-20 00:34:31 +02:00
|
|
|
if ($n >= 0 && $n < static::YYLAST && $this->yycheck[$n] == $i
|
2014-04-20 00:08:59 +02:00
|
|
|
|| $state < static::YY2TBLSTATE
|
2014-04-20 00:34:31 +02:00
|
|
|
&& ($n = $this->yybase[$state + static::YYNLSTATES] + $i) >= 0
|
|
|
|
&& $n < static::YYLAST && $this->yycheck[$n] == $i
|
2014-04-20 00:08:59 +02:00
|
|
|
) {
|
2014-04-20 00:34:31 +02:00
|
|
|
if ($this->yyaction[$n] != static::YYUNEXPECTED) {
|
2014-04-20 00:08:59 +02:00
|
|
|
if (count($expected) == 4) {
|
|
|
|
/* Too many expected tokens */
|
|
|
|
$expected = array();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2014-04-20 00:34:31 +02:00
|
|
|
$expected[] = $this->terminals[$i];
|
2014-04-20 00:08:59 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$expectedString = '';
|
|
|
|
if ($expected) {
|
|
|
|
$expectedString = ', expecting ' . implode(' or ', $expected);
|
|
|
|
}
|
|
|
|
|
|
|
|
throw new Error(
|
2014-04-20 00:34:31 +02:00
|
|
|
'Syntax error, unexpected ' . $this->terminals[$tokenId] . $expectedString,
|
2014-04-20 00:08:59 +02:00
|
|
|
$startAttributes['startLine']
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($state < static::YYNLSTATES)
|
|
|
|
break;
|
|
|
|
/* >= YYNLSTATES means shift-and-reduce */
|
|
|
|
$yyn = $state - static::YYNLSTATES;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
protected function traceNewState($state, $tokenId) {
|
|
|
|
echo '% State ' . $state
|
2014-04-20 00:34:31 +02:00
|
|
|
. ', Lookahead ' . ($tokenId == self::TOKEN_NONE ? '--none--' : $this->terminals[$tokenId]) . "\n";
|
2014-04-20 00:08:59 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
protected function traceRead($tokenId) {
|
2014-04-20 00:34:31 +02:00
|
|
|
echo '% Reading ' . $this->terminals[$tokenId] . "\n";
|
2014-04-20 00:08:59 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
protected function traceShift($tokenId) {
|
2014-04-20 00:34:31 +02:00
|
|
|
echo '% Shift ' . $this->terminals[$tokenId] . "\n";
|
2014-04-20 00:08:59 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
protected function traceAccept() {
|
|
|
|
echo "% Accepted.\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
protected function traceReduce($n) {
|
2014-04-20 00:34:31 +02:00
|
|
|
echo '% Reduce by (' . $n . ') ' . $this->yyproduction[$n] . "\n";
|
2014-04-20 00:08:59 +02:00
|
|
|
}
|
|
|
|
}
|