Generalize the attribute generation for nodes

Now two arrays are fetched from the lexer: $startAttributes and $endAttributes. When constructing the attributes for a node, the $startAttributes from the first token of the node and the $endAttributes of the last token of the node are merged. Now the end line is saved in the endLine attribute.
2024-11-26 20:04:48 +01:00 · 2012-05-05 17:34:27 +02:00 · 2012-05-05 17:34:27 +02:00 · dd711f2a04
commit dd711f2a04
parent 5438cc0d69
9 changed files with 503 additions and 443 deletions
--- a/grammar/kmyacc.php.parser
+++ b/grammar/kmyacc.php.parser
@ -2,8 +2,8 @@
 $meta #
 #semval($) $this->yyval
 #semval($,%t) $this->yyval
-#semval(%n) $this->yyastk[$this->yysp-(%l-%n)]
-#semval(%n,%t) $this->yyastk[$this->yysp-(%l-%n)]
+#semval(%n) $this->yyastk[$this->stackPos-(%l-%n)]
+#semval(%n,%t) $this->yyastk[$this->stackPos-(%l-%n)]
 #include;

 /* This is an automatically GENERATED file, which should not be manually edited.
@ -94,7 +94,7 @@ class #(-p)

    protected $yyval;
    protected $yyastk;
-    protected $yysp;
+    protected $stackPos;
    protected $lexer;

    /**
@ -162,14 +162,29 @@ class #(-p)
    public function parse($code) {
        $this->lexer->startLexing($code);

-        $this->yysp   = 0;                   // Stack pos
-        $stateStack   = array($state = 0);   // State stack
-        $this->yyastk = array();             // AST   stack (?)
-        $yylstk       = array($yyline  = 1); // Line  stack
-        $yydstk       = array($yyDC = null); // Doc comment stack
-
+        // We start off with no lookahead-token
        $tokenId = self::TOKEN_NONE;

+        // The attributes for a node are taken from the first and last token of the node.
+        // From the first token only the startAttributes are taken and from the last only
+        // the endAttributes. Both are merged using the array union operator (+).
+        $startAttributes = array('startLine' => 1);
+        $endAttributes   = array();
+
+        // In order to figure out the attributes for the starting token, we have to keep
+        // them in a stack
+        $attributeStack = array($startAttributes);
+
+        // Start off in the initial state and keep a stack of previous states
+        $state = 0;
+        $stateStack = array($state);
+
+        // AST stack (?)
+        $this->yyastk = array();
+
+        // Current position in the stack(s)
+        $this->stackPos = 0;
+
        for (;;) {
 #if -t
            $this->YYTRACE_NEWSTATE($state, $tokenId);
@ -180,7 +195,7 @@ class #(-p)
            } else {
                if ($tokenId === self::TOKEN_NONE) {
                    // fetch the next token id from the lexer and fetch additional info by-ref
-                    $origTokenId = $this->lexer->getNextToken($tokenValue, $yyline, $yyDC);
+                    $origTokenId = $this->lexer->getNextToken($tokenValue, $startAttributes, $endAttributes);

                    // map the lexer token id to the internally used token id's
                    $tokenId = $origTokenId >= 0 && $origTokenId < self::TOKEN_MAP_SIZE
@ -194,8 +209,7 @@ class #(-p)
                        ));
                    }

-                    $yylstk[$this->yysp] = $yyline;
-                    $yydstk[$this->yysp] = $yyDC;
+                    $attributeStack[$this->stackPos] = $startAttributes;
 #if -t

                    $this->YYTRACE_READ($tokenId);
@ -222,12 +236,11 @@ class #(-p)
                        $this->YYTRACE_SHIFT($tokenId);

 #endif
-                        ++$this->yysp;
+                        ++$this->stackPos;

-                        $stateStack[$this->yysp]   = $state = $yyn;
-                        $this->yyastk[$this->yysp] = $tokenValue;
-                        $yylstk[$this->yysp]       = $yyline;
-                        $yydstk[$this->yysp]       = $yyDC;
+                        $stateStack[$this->stackPos]     = $state = $yyn;
+                        $this->yyastk[$this->stackPos]   = $tokenValue;
+                        $attributeStack[$this->stackPos] = $startAttributes;
                        $tokenId = self::TOKEN_NONE;

                        if ($yyn < self::YYNLSTATES)
@ -258,23 +271,21 @@ class #(-p)
 #endif
                    try {
                        $this->{'yyn' . $yyn}(
-                            array(
-                                'line'       => $yylstk[$this->yysp - self::$yylen[$yyn]],
-                                'docComment' => $yydstk[$this->yysp - self::$yylen[$yyn]]
-                            )
+                            $attributeStack[$this->stackPos - self::$yylen[$yyn]]
+                            + $endAttributes
                        );
                    } catch (PHPParser_Error $e) {
                        if (-1 === $e->getRawLine()) {
-                            $e->setRawLine($yyline);
+                            $e->setRawLine($startAttributes['startLine']);
                        }

                        throw $e;
                    }

                    /* Goto - shift nonterminal */
-                    $this->yysp -= self::$yylen[$yyn];
+                    $this->stackPos -= self::$yylen[$yyn];
                    $yyn = self::$yylhs[$yyn];
-                    if (($yyp = self::$yygbase[$yyn] + $stateStack[$this->yysp]) >= 0
+                    if (($yyp = self::$yygbase[$yyn] + $stateStack[$this->stackPos]) >= 0
                         && $yyp < self::YYGLAST
                         && self::$yygcheck[$yyp] == $yyn) {
                        $state = self::$yygoto[$yyp];
@ -282,17 +293,16 @@ class #(-p)
                        $state = self::$yygdefault[$yyn];
                    }

-                    ++$this->yysp;
+                    ++$this->stackPos;

-                    $stateStack[$this->yysp]   = $state;
-                    $this->yyastk[$this->yysp] = $this->yyval;
-                    $yylstk[$this->yysp]       = $yyline;
-                    $yydstk[$this->yysp]       = $yyDC;
+                    $stateStack[$this->stackPos]     = $state;
+                    $this->yyastk[$this->stackPos]   = $this->yyval;
+                    $attributeStack[$this->stackPos] = $startAttributes;
                } else {
                    /* error */
                    throw new PHPParser_Error(
                        'Unexpected token ' . self::$terminals[$tokenId],
-                        $yyline
+                        $startAttributes['startLine']
                    );
                }

@ -312,7 +322,7 @@ class #(-p)
 #noact

    protected function yyn%n() {
-        $this->yyval = $this->yyastk[$this->yysp];
+        $this->yyval = $this->yyastk[$this->stackPos];
    }
 #endreduce
 #endif
--- a/lib/PHPParser/Lexer.php
+++ b/lib/PHPParser/Lexer.php
@ -71,22 +71,24 @@ class PHPParser_Lexer
    }

    /**
-     * Returns the next token id.
+     * Fetches the next token.
     *
-     * @param mixed $value      Variable to store token content in
-     * @param mixed $line       Variable to store line in
-     * @param mixed $docComment Variable to store doc comment in
+     * @param mixed $value           Variable to store token content in
+     * @param mixed $startAttributes Variable to store start attributes in
+     * @param mixed $endAttributes   Variable to store end attributes in
     *
     * @return int Token id
     */
-    public function getNextToken(&$value = null, &$line = null, &$docComment = null) {
-        $docComment = null;
+    public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) {
+        $startAttributes = array();
+        $endAttributes   = array();

        while (isset($this->tokens[++$this->pos])) {
            $token = $this->tokens[$this->pos];

            if (is_string($token)) {
-                $line = $this->line;
+                $startAttributes['startLine'] = $this->line;
+                $endAttributes['endLine']     = $this->line;

                // bug in token_get_all
                if ('b"' === $token) {
@ -100,15 +102,19 @@ class PHPParser_Lexer
                $this->line += substr_count($token[1], "\n");

                if (T_DOC_COMMENT === $token[0]) {
-                    $docComment = $token[1];
+                    $startAttributes['docComment'] = $token[1];
                } elseif (!isset($this->dropTokens[$token[0]])) {
                    $value = $token[1];
-                    $line  = $token[2];
+                    $startAttributes['startLine'] = $token[2];
+                    $endAttributes['endLine']     = $this->line;
+
                    return $this->tokenMap[$token[0]];
                }
            }
        }

+        $startAttributes['startLine'] = $this->line;
+
        // 0 is the EOF token
        return 0;
    }
--- a/lib/PHPParser/Lexer/Emulative.php
+++ b/lib/PHPParser/Lexer/Emulative.php
@ -173,8 +173,8 @@ class PHPParser_Lexer_Emulative extends PHPParser_Lexer
        }
    }

-    public function getNextToken(&$value = null, &$line = null, &$docComment = null) {
-        $token = parent::getNextToken($value, $line, $docComment);
+    public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) {
+        $token = parent::getNextToken($value, $startAttributes, $endAttributes);

        // replace new keywords by their respective tokens. This is not done
        // if we currently are in an object access (e.g. in $obj->namespace
--- a/lib/PHPParser/NodeAbstract.php
+++ b/lib/PHPParser/NodeAbstract.php
@ -40,7 +40,7 @@ abstract class PHPParser_NodeAbstract implements PHPParser_Node, IteratorAggrega
     * @return int Line
     */
    public function getLine() {
-        return $this->getAttribute('line', -1);
+        return $this->getAttribute('startLine', -1);
    }

    /**
@ -49,7 +49,7 @@ abstract class PHPParser_NodeAbstract implements PHPParser_Node, IteratorAggrega
     * @param int $line Line
     */
    public function setLine($line) {
-        $this->setAttribute('line', (int) $line);
+        $this->setAttribute('startLine', (int) $line);
    }

    /**
--- a/lib/PHPParser/Parser.php
+++ b/lib/PHPParser/Parser.php
--- a/test/PHPParser/Tests/LexerTest.php
+++ b/test/PHPParser/Tests/LexerTest.php
@ -37,13 +37,13 @@ class PHPParser_Tests_LexerTest extends PHPUnit_Framework_TestCase
     */
    public function testLex($code, $tokens) {
        $this->lexer->startLexing($code);
-        while ($id = $this->lexer->getNextToken($value, $line, $docComment)) {
+        while ($id = $this->lexer->getNextToken($value, $startAttributes, $endAttributes)) {
            $token = array_shift($tokens);

            $this->assertEquals($token[0], $id);
            $this->assertEquals($token[1], $value);
-            $this->assertEquals($token[2], $line);
-            $this->assertEquals($token[3], $docComment);
+            $this->assertEquals($token[2], $startAttributes);
+            $this->assertEquals($token[3], $endAttributes);
        }
    }

@ -53,25 +53,56 @@ class PHPParser_Tests_LexerTest extends PHPUnit_Framework_TestCase
            array(
                '<?php tokens // ?>plaintext',
                array(
-                    array(PHPParser_Parser::T_STRING,      'tokens',    1, null),
-                    array(ord(';'),                        '?>',        1, null),
-                    array(PHPParser_Parser::T_INLINE_HTML, 'plaintext', 1, null),
+                    array(
+                        PHPParser_Parser::T_STRING, 'tokens',
+                        array('startLine' => 1), array('endLine' => 1)
+                    ),
+                    array(
+                        ord(';'), '?>',
+                        array('startLine' => 1), array('endLine' => 1)
+                    ),
+                    array(
+                        PHPParser_Parser::T_INLINE_HTML, 'plaintext',
+                        array('startLine' => 1), array('endLine' => 1)
+                    ),
                )
            ),
            // tests line numbers
            array(
                '<?php' . "\n" . '$ token /** doc' . "\n" . 'comment */ $',
                array(
-                    array(ord('$'),                   '$',     2, null),
-                    array(PHPParser_Parser::T_STRING, 'token', 2, null),
-                    array(ord('$'),                   '$',     3, '/** doc' . "\n" . 'comment */')
+                    array(
+                        ord('$'), '$',
+                        array('startLine' => 2), array('endLine' => 2)
+                    ),
+                    array(
+                        PHPParser_Parser::T_STRING, 'token',
+                        array('startLine' => 2), array('endLine' => 2)
+                    ),
+                    array(
+                        ord('$'), '$',
+                        array('startLine' => 3, 'docComment' => '/** doc' . "\n" . 'comment */'), array('endLine' => 3)
+                    ),
                )
            ),
            // tests doccomment extraction
            array(
                '<?php /** docComment 1 *//** docComment 2 */ token',
                array(
-                    array(PHPParser_Parser::T_STRING, 'token', 1, '/** docComment 2 */'),
+                    array(
+                        PHPParser_Parser::T_STRING, 'token',
+                        array('startLine' => 1, 'docComment' => '/** docComment 2 */'), array('endLine' => 1)
+                    ),
+                )
+            ),
+            // tests differing start and end line
+            array(
+                '<?php "foo' . "\n" . 'bar"',
+                array(
+                    array(
+                        PHPParser_Parser::T_CONSTANT_ENCAPSED_STRING, '"foo' . "\n" . 'bar"',
+                        array('startLine' => 1), array('endLine' => 2)
+                    ),
                )
            ),
        );
--- a/test/PHPParser/Tests/NodeAbstractTest.php
+++ b/test/PHPParser/Tests/NodeAbstractTest.php
@ -4,7 +4,7 @@ class PHPParser_Tests_NodeAbstractTest extends PHPUnit_Framework_TestCase
 {
    public function testConstruct() {
        $attributes = array(
-            'line'       => 10,
+            'startLine'  => 10,
            'docComment' => '/** doc comment */',
        );

--- a/test/PHPParser/Tests/NodeVisitor/NameResolverTest.php
+++ b/test/PHPParser/Tests/NodeVisitor/NameResolverTest.php
@ -202,8 +202,8 @@ EOC;
    public function testAlreadyInUseError() {
        $stmts = array(
            new PHPParser_Node_Stmt_Use(array(
-                new PHPParser_Node_Stmt_UseUse(new PHPParser_Node_Name('A\B'), 'B', array('line' => 1)),
-                new PHPParser_Node_Stmt_UseUse(new PHPParser_Node_Name('C'),   'B', array('line' => 2)),
+                new PHPParser_Node_Stmt_UseUse(new PHPParser_Node_Name('A\B'), 'B', array('startLine' => 1)),
+                new PHPParser_Node_Stmt_UseUse(new PHPParser_Node_Name('C'),   'B', array('startLine' => 2)),
            ))
        );

--- a/test/PHPParser/Tests/Serializer/XMLTest.php
+++ b/test/PHPParser/Tests/Serializer/XMLTest.php
@ -18,35 +18,38 @@ CODE;
 <AST xmlns:node="http://nikic.github.com/PHPParser/XML/node" xmlns:subNode="http://nikic.github.com/PHPParser/XML/subNode" xmlns:attribute="http://nikic.github.com/PHPParser/XML/attribute" xmlns:scalar="http://nikic.github.com/PHPParser/XML/scalar">
 <scalar:array>
  <node:Stmt_Function>
-   <attribute:line>
-    <scalar:int>3</scalar:int>
-   </attribute:line>
   <attribute:docComment>
    <scalar:string>/** doc comment */</scalar:string>
   </attribute:docComment>
+   <attribute:startLine>
+    <scalar:int>3</scalar:int>
+   </attribute:startLine>
+   <attribute:endLine>
+    <scalar:int>5</scalar:int>
+   </attribute:endLine>
   <subNode:byRef>
    <scalar:false/>
   </subNode:byRef>
   <subNode:params>
    <scalar:array>
     <node:Param>
-      <attribute:line>
+      <attribute:startLine>
       <scalar:int>3</scalar:int>
-      </attribute:line>
-      <attribute:docComment>
-       <scalar:null/>
-      </attribute:docComment>
+      </attribute:startLine>
+      <attribute:endLine>
+       <scalar:int>3</scalar:int>
+      </attribute:endLine>
      <subNode:name>
       <scalar:string>a</scalar:string>
      </subNode:name>
      <subNode:default>
       <node:Scalar_LNumber>
-        <attribute:line>
+        <attribute:startLine>
         <scalar:int>3</scalar:int>
-        </attribute:line>
-        <attribute:docComment>
-         <scalar:null/>
-        </attribute:docComment>
+        </attribute:startLine>
+        <attribute:endLine>
+         <scalar:int>3</scalar:int>
+        </attribute:endLine>
        <subNode:value>
         <scalar:int>0</scalar:int>
        </subNode:value>
@ -60,23 +63,23 @@ CODE;
      </subNode:byRef>
     </node:Param>
     <node:Param>
-      <attribute:line>
+      <attribute:startLine>
       <scalar:int>3</scalar:int>
-      </attribute:line>
-      <attribute:docComment>
-       <scalar:null/>
-      </attribute:docComment>
+      </attribute:startLine>
+      <attribute:endLine>
+       <scalar:int>3</scalar:int>
+      </attribute:endLine>
      <subNode:name>
       <scalar:string>b</scalar:string>
      </subNode:name>
      <subNode:default>
       <node:Scalar_DNumber>
-        <attribute:line>
+        <attribute:startLine>
         <scalar:int>3</scalar:int>
-        </attribute:line>
-        <attribute:docComment>
-         <scalar:null/>
-        </attribute:docComment>
+        </attribute:startLine>
+        <attribute:endLine>
+         <scalar:int>3</scalar:int>
+        </attribute:endLine>
        <subNode:value>
         <scalar:float>1</scalar:float>
        </subNode:value>
@ -94,21 +97,21 @@ CODE;
   <subNode:stmts>
    <scalar:array>
     <node:Stmt_Echo>
-      <attribute:line>
+      <attribute:startLine>
       <scalar:int>4</scalar:int>
-      </attribute:line>
-      <attribute:docComment>
-       <scalar:null/>
-      </attribute:docComment>
+      </attribute:startLine>
+      <attribute:endLine>
+       <scalar:int>4</scalar:int>
+      </attribute:endLine>
      <subNode:exprs>
       <scalar:array>
        <node:Scalar_String>
-         <attribute:line>
+         <attribute:startLine>
          <scalar:int>4</scalar:int>
-         </attribute:line>
-         <attribute:docComment>
-          <scalar:null/>
-         </attribute:docComment>
+         </attribute:startLine>
+         <attribute:endLine>
+          <scalar:int>4</scalar:int>
+         </attribute:endLine>
         <subNode:value>
          <scalar:string>Foo</scalar:string>
         </subNode:value>