mirror of
https://github.com/phabelio/PHP-Parser.git
synced 2024-12-11 16:49:43 +01:00
fb3d89e463
* Document new `DONT_TRAVERSER_CURRENT_AND_CHILDREN` constant * Rewording sentences, added 1 space indentation to make bullet list correct
517 lines
18 KiB
Markdown
517 lines
18 KiB
Markdown
Usage of basic components
|
|
=========================
|
|
|
|
This document explains how to use the parser, the pretty printer and the node traverser.
|
|
|
|
Bootstrapping
|
|
-------------
|
|
|
|
To bootstrap the library, include the autoloader generated by composer:
|
|
|
|
```php
|
|
require 'path/to/vendor/autoload.php';
|
|
```
|
|
|
|
Additionally you may want to set the `xdebug.max_nesting_level` ini option to a higher value:
|
|
|
|
```php
|
|
ini_set('xdebug.max_nesting_level', 3000);
|
|
```
|
|
|
|
This ensures that there will be no errors when traversing highly nested node trees. However, it is
|
|
preferable to disable XDebug completely, as it can easily make this library more than five times
|
|
slower.
|
|
|
|
Parsing
|
|
-------
|
|
|
|
In order to parse code, you first have to create a parser instance:
|
|
|
|
```php
|
|
use PhpParser\ParserFactory;
|
|
$parser = (new ParserFactory)->create(ParserFactory::PREFER_PHP7);
|
|
```
|
|
|
|
The factory accepts a kind argument, that determines how different PHP versions are treated:
|
|
|
|
Kind | Behavior
|
|
-----|---------
|
|
`ParserFactory::PREFER_PHP7` | Try to parse code as PHP 7. If this fails, try to parse it as PHP 5.
|
|
`ParserFactory::PREFER_PHP5` | Try to parse code as PHP 5. If this fails, try to parse it as PHP 7.
|
|
`ParserFactory::ONLY_PHP7` | Parse code as PHP 7.
|
|
`ParserFactory::ONLY_PHP5` | Parse code as PHP 5.
|
|
|
|
Unless you have a strong reason to use something else, `PREFER_PHP7` is a reasonable default.
|
|
|
|
The `create()` method optionally accepts a `Lexer` instance as the second argument. Some use cases
|
|
that require customized lexers are discussed in the [lexer documentation](component/Lexer.markdown).
|
|
|
|
Subsequently you can pass PHP code (including the opening `<?php` tag) to the `parse` method in order to
|
|
create a syntax tree. If a syntax error is encountered, an `PhpParser\Error` exception will be thrown:
|
|
|
|
```php
|
|
<?php
|
|
use PhpParser\Error;
|
|
use PhpParser\ParserFactory;
|
|
|
|
$code = <<<'CODE'
|
|
<?php
|
|
function printLine($msg) {
|
|
echo $msg, "\n";
|
|
}
|
|
printLine('Hello World!!!');
|
|
CODE;
|
|
|
|
$parser = (new ParserFactory)->create(ParserFactory::PREFER_PHP7);
|
|
|
|
try {
|
|
$stmts = $parser->parse($code);
|
|
// $stmts is an array of statement nodes
|
|
} catch (Error $e) {
|
|
echo 'Parse Error: ', $e->getMessage();
|
|
}
|
|
```
|
|
|
|
A parser instance can be reused to parse multiple files.
|
|
|
|
Node dumping
|
|
------------
|
|
|
|
To dump the abstact syntax tree in human readable form, a `NodeDumper` can be used:
|
|
|
|
```php
|
|
<?php
|
|
use PhpParser\NodeDumper;
|
|
|
|
$nodeDumper = new NodeDumper;
|
|
echo $nodeDumper->dump($stmts), "\n";
|
|
```
|
|
|
|
For the sample code from the previous section, this will produce the following output:
|
|
|
|
```
|
|
array(
|
|
0: Stmt_Function(
|
|
byRef: false
|
|
name: Identifier(
|
|
name: printLine
|
|
)
|
|
params: array(
|
|
0: Param(
|
|
type: null
|
|
byRef: false
|
|
variadic: false
|
|
var: Expr_Variable(
|
|
name: msg
|
|
)
|
|
default: null
|
|
)
|
|
)
|
|
returnType: null
|
|
stmts: array(
|
|
0: Stmt_Echo(
|
|
exprs: array(
|
|
0: Expr_Variable(
|
|
name: msg
|
|
)
|
|
1: Scalar_String(
|
|
value:
|
|
|
|
)
|
|
)
|
|
)
|
|
)
|
|
)
|
|
1: Stmt_Expression(
|
|
expr: Expr_FuncCall(
|
|
name: Name(
|
|
parts: array(
|
|
0: printLine
|
|
)
|
|
)
|
|
args: array(
|
|
0: Arg(
|
|
value: Scalar_String(
|
|
value: Hello World!!!
|
|
)
|
|
byRef: false
|
|
unpack: false
|
|
)
|
|
)
|
|
)
|
|
)
|
|
)
|
|
```
|
|
|
|
You can also use the `php-parse` script to obtain such a node dump by calling it either with a file
|
|
name or code string:
|
|
|
|
```sh
|
|
vendor/bin/php-parse file.php
|
|
vendor/bin/php-parse "<?php foo();"
|
|
```
|
|
|
|
This can be very helpful if you want to quickly check how certain syntax is represented in the AST.
|
|
|
|
Node tree structure
|
|
-------------------
|
|
|
|
Looking at the node dump above, you can see that `$stmts` for this example code is an array of two
|
|
nodes, a `Stmt_Function` and a `Stmt_Expression`. The corresponding class names are:
|
|
|
|
* `Stmt_Function -> PhpParser\Node\Stmt\Function_`
|
|
* `Stmt_Expression -> PhpParser\Node\Stmt\Expression`
|
|
|
|
The additional `_` at the end of the first class name is necessary, because `Function` is a
|
|
reserved keyword. Many node class names in this library have a trailing `_` to avoid clashing with
|
|
a keyword.
|
|
|
|
As PHP is a large language there are approximately 140 different nodes. In order to make working
|
|
with them easier they are grouped into three categories:
|
|
|
|
* `PhpParser\Node\Stmt`s are statement nodes, i.e. language constructs that do not return
|
|
a value and can not occur in an expression. For example a class definition is a statement.
|
|
It doesn't return a value and you can't write something like `func(class A {});`.
|
|
* `PhpParser\Node\Expr`s are expression nodes, i.e. language constructs that return a value
|
|
and thus can occur in other expressions. Examples of expressions are `$var`
|
|
(`PhpParser\Node\Expr\Variable`) and `func()` (`PhpParser\Node\Expr\FuncCall`).
|
|
* `PhpParser\Node\Scalar`s are nodes representing scalar values, like `'string'`
|
|
(`PhpParser\Node\Scalar\String_`), `0` (`PhpParser\Node\Scalar\LNumber`) or magic constants
|
|
like `__FILE__` (`PhpParser\Node\Scalar\MagicConst\File`). All `PhpParser\Node\Scalar`s extend
|
|
`PhpParser\Node\Expr`, as scalars are expressions, too.
|
|
* There are some nodes not in either of these groups, for example names (`PhpParser\Node\Name`)
|
|
and call arguments (`PhpParser\Node\Arg`).
|
|
|
|
The `Node\Stmt\Expression` node is somewhat confusing in that it contains both the terms "statement"
|
|
and "expression". This node distinguishes `expr`, which is a `Node\Expr`, from `expr;`, which is
|
|
an "expression statement" represented by `Node\Stmt\Expression` and containing `expr` as a sub-node.
|
|
|
|
Every node has a (possibly zero) number of subnodes. You can access subnodes by writing
|
|
`$node->subNodeName`. The `Stmt\Echo_` node has only one subnode `exprs`. So in order to access it
|
|
in the above example you would write `$stmts[0]->exprs`. If you wanted to access the name of the function
|
|
call, you would write `$stmts[0]->exprs[1]->name`.
|
|
|
|
All nodes also define a `getType()` method that returns the node type. The type is the class name
|
|
without the `PhpParser\Node\` prefix and `\` replaced with `_`. It also does not contain a trailing
|
|
`_` for reserved-keyword class names.
|
|
|
|
It is possible to associate custom metadata with a node using the `setAttribute()` method. This data
|
|
can then be retrieved using `hasAttribute()`, `getAttribute()` and `getAttributes()`.
|
|
|
|
By default the lexer adds the `startLine`, `endLine` and `comments` attributes. `comments` is an array
|
|
of `PhpParser\Comment[\Doc]` instances.
|
|
|
|
The start line can also be accessed using `getLine()`/`setLine()` (instead of `getAttribute('startLine')`).
|
|
The last doc comment from the `comments` attribute can be obtained using `getDocComment()`.
|
|
|
|
Pretty printer
|
|
--------------
|
|
|
|
The pretty printer component compiles the AST back to PHP code. As the parser does not retain formatting
|
|
information the formatting is done using a specified scheme. Currently there is only one scheme available,
|
|
namely `PhpParser\PrettyPrinter\Standard`.
|
|
|
|
```php
|
|
use PhpParser\Error;
|
|
use PhpParser\ParserFactory;
|
|
use PhpParser\PrettyPrinter;
|
|
|
|
$code = "<?php echo 'Hi ', hi\\getTarget();";
|
|
|
|
$parser = (new ParserFactory)->create(ParserFactory::PREFER_PHP7);
|
|
$prettyPrinter = new PrettyPrinter\Standard;
|
|
|
|
try {
|
|
// parse
|
|
$stmts = $parser->parse($code);
|
|
|
|
// change
|
|
$stmts[0] // the echo statement
|
|
->exprs // sub expressions
|
|
[0] // the first of them (the string node)
|
|
->value // it's value, i.e. 'Hi '
|
|
= 'Hello '; // change to 'Hello '
|
|
|
|
// pretty print
|
|
$code = $prettyPrinter->prettyPrint($stmts);
|
|
|
|
echo $code;
|
|
} catch (Error $e) {
|
|
echo 'Parse Error: ', $e->getMessage();
|
|
}
|
|
```
|
|
|
|
The above code will output:
|
|
|
|
echo 'Hello ', hi\getTarget();
|
|
|
|
As you can see the source code was first parsed using `PhpParser\Parser->parse()`, then changed and then
|
|
again converted to code using `PhpParser\PrettyPrinter\Standard->prettyPrint()`.
|
|
|
|
The `prettyPrint()` method pretty prints a statements array. It is also possible to pretty print only a
|
|
single expression using `prettyPrintExpr()`.
|
|
|
|
The `prettyPrintFile()` method can be used to print an entire file. This will include the opening `<?php` tag
|
|
and handle inline HTML as the first/last statement more gracefully.
|
|
|
|
> Read more: [Pretty printing documentation](component/Pretty_printing.markdown)
|
|
|
|
Node traversation
|
|
-----------------
|
|
|
|
The above pretty printing example used the fact that the source code was known and thus it was easy to
|
|
write code that accesses a certain part of a node tree and changes it. Normally this is not the case.
|
|
Usually you want to change / analyze code in a generic way, where you don't know how the node tree is
|
|
going to look like.
|
|
|
|
For this purpose the parser provides a component for traversing and visiting the node tree. The basic
|
|
structure of a program using this `PhpParser\NodeTraverser` looks like this:
|
|
|
|
```php
|
|
use PhpParser\NodeTraverser;
|
|
use PhpParser\ParserFactory;
|
|
use PhpParser\PrettyPrinter;
|
|
|
|
$parser = (new ParserFactory)->create(ParserFactory::PREFER_PHP7);
|
|
$traverser = new NodeTraverser;
|
|
$prettyPrinter = new PrettyPrinter\Standard;
|
|
|
|
// add your visitor
|
|
$traverser->addVisitor(new MyNodeVisitor);
|
|
|
|
try {
|
|
$code = file_get_contents($fileName);
|
|
|
|
// parse
|
|
$stmts = $parser->parse($code);
|
|
|
|
// traverse
|
|
$stmts = $traverser->traverse($stmts);
|
|
|
|
// pretty print
|
|
$code = $prettyPrinter->prettyPrintFile($stmts);
|
|
|
|
echo $code;
|
|
} catch (PhpParser\Error $e) {
|
|
echo 'Parse Error: ', $e->getMessage();
|
|
}
|
|
```
|
|
|
|
The corresponding node visitor might look like this:
|
|
|
|
```php
|
|
use PhpParser\Node;
|
|
use PhpParser\NodeVisitorAbstract;
|
|
|
|
class MyNodeVisitor extends NodeVisitorAbstract
|
|
{
|
|
public function leaveNode(Node $node) {
|
|
if ($node instanceof Node\Scalar\String_) {
|
|
$node->value = 'foo';
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
The above node visitor would change all string literals in the program to `'foo'`.
|
|
|
|
All visitors must implement the `PhpParser\NodeVisitor` interface, which defines the following four
|
|
methods:
|
|
|
|
```php
|
|
public function beforeTraverse(array $nodes);
|
|
public function enterNode(\PhpParser\Node $node);
|
|
public function leaveNode(\PhpParser\Node $node);
|
|
public function afterTraverse(array $nodes);
|
|
```
|
|
|
|
The `beforeTraverse()` method is called once before the traversal begins and is passed the nodes the
|
|
traverser was called with. This method can be used for resetting values before traversation or
|
|
preparing the tree for traversal.
|
|
|
|
The `afterTraverse()` method is similar to the `beforeTraverse()` method, with the only difference that
|
|
it is called once after the traversal.
|
|
|
|
The `enterNode()` and `leaveNode()` methods are called on every node, the former when it is entered,
|
|
i.e. before its subnodes are traversed, the latter when it is left.
|
|
|
|
All four methods can either return the changed node or not return at all (i.e. `null`) in which
|
|
case the current node is not changed.
|
|
|
|
The `enterNode()` method can additionally return the value `NodeTraverser::DONT_TRAVERSE_CHILDREN`,
|
|
which instructs the traverser to skip all children of the current node. To furthermore prevent subsequent
|
|
visitors from visiting the current node, `NodeTraverser::DONT_TRAVERSE_CURRENT_AND_CHILDREN` can be used instead.
|
|
|
|
The `leaveNode()` method can additionally return the value `NodeTraverser::REMOVE_NODE`, in which
|
|
case the current node will be removed from the parent array. Furthermore it is possible to return
|
|
an array of nodes, which will be merged into the parent array at the offset of the current node.
|
|
I.e. if in `array(A, B, C)` the node `B` should be replaced with `array(X, Y, Z)` the result will
|
|
be `array(A, X, Y, Z, C)`.
|
|
|
|
Instead of manually implementing the `NodeVisitor` interface you can also extend the `NodeVisitorAbstract`
|
|
class, which will define empty default implementations for all the above methods.
|
|
|
|
> Read more: [Walking the AST](component/Walking_the_AST.markdown)
|
|
|
|
The NameResolver node visitor
|
|
-----------------------------
|
|
|
|
One visitor that is already bundled with the package is `PhpParser\NodeVisitor\NameResolver`. This visitor
|
|
helps you work with namespaced code by trying to resolve most names to fully qualified ones.
|
|
|
|
For example, consider the following code:
|
|
|
|
use A as B;
|
|
new B\C();
|
|
|
|
In order to know that `B\C` really is `A\C` you would need to track aliases and namespaces yourself.
|
|
The `NameResolver` takes care of that and resolves names as far as possible.
|
|
|
|
After running it, most names will be fully qualified. The only names that will stay unqualified are
|
|
unqualified function and constant names. These are resolved at runtime and thus the visitor can't
|
|
know which function they are referring to. In most cases this is a non-issue as the global functions
|
|
are meant.
|
|
|
|
Also the `NameResolver` adds a `namespacedName` subnode to class, function and constant declarations
|
|
that contains the namespaced name instead of only the shortname that is available via `name`.
|
|
|
|
> Read more: [Name resolution documentation](component/Name_resolution.markdown)
|
|
|
|
Example: Converting namespaced code to pseudo namespaces
|
|
--------------------------------------------------------
|
|
|
|
A small example to understand the concept: We want to convert namespaced code to pseudo namespaces
|
|
so it works on 5.2, i.e. names like `A\\B` should be converted to `A_B`. Note that such conversions
|
|
are fairly complicated if you take PHP's dynamic features into account, so our conversion will
|
|
assume that no dynamic features are used.
|
|
|
|
We start off with the following base code:
|
|
|
|
```php
|
|
use PhpParser\ParserFactory;
|
|
use PhpParser\PrettyPrinter;
|
|
use PhpParser\NodeTraverser;
|
|
use PhpParser\NodeVisitor\NameResolver;
|
|
|
|
$inDir = '/some/path';
|
|
$outDir = '/some/other/path';
|
|
|
|
$parser = (new ParserFactory)->create(ParserFactory::PREFER_PHP7);
|
|
$traverser = new NodeTraverser;
|
|
$prettyPrinter = new PrettyPrinter\Standard;
|
|
|
|
$traverser->addVisitor(new NameResolver); // we will need resolved names
|
|
$traverser->addVisitor(new NamespaceConverter); // our own node visitor
|
|
|
|
// iterate over all .php files in the directory
|
|
$files = new \RecursiveIteratorIterator(new \RecursiveDirectoryIterator($inDir));
|
|
$files = new \RegexIterator($files, '/\.php$/');
|
|
|
|
foreach ($files as $file) {
|
|
try {
|
|
// read the file that should be converted
|
|
$code = file_get_contents($file->getPathName());
|
|
|
|
// parse
|
|
$stmts = $parser->parse($code);
|
|
|
|
// traverse
|
|
$stmts = $traverser->traverse($stmts);
|
|
|
|
// pretty print
|
|
$code = $prettyPrinter->prettyPrintFile($stmts);
|
|
|
|
// write the converted file to the target directory
|
|
file_put_contents(
|
|
substr_replace($file->getPathname(), $outDir, 0, strlen($inDir)),
|
|
$code
|
|
);
|
|
} catch (PhpParser\Error $e) {
|
|
echo 'Parse Error: ', $e->getMessage();
|
|
}
|
|
}
|
|
```
|
|
|
|
Now lets start with the main code, the `NodeVisitor\NamespaceConverter`. One thing it needs to do
|
|
is convert `A\\B` style names to `A_B` style ones.
|
|
|
|
```php
|
|
use PhpParser\Node;
|
|
|
|
class NamespaceConverter extends \PhpParser\NodeVisitorAbstract
|
|
{
|
|
public function leaveNode(Node $node) {
|
|
if ($node instanceof Node\Name) {
|
|
return new Node\Name(str_replace('\\', '_', $node->toString()));
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
The above code profits from the fact that the `NameResolver` already resolved all names as far as
|
|
possible, so we don't need to do that. We only need to create a string with the name parts separated
|
|
by underscores instead of backslashes. This is what `str_replace('\\', '_', $node->toString())` does. (If you want to
|
|
create a name with backslashes either write `$node->toString()` or `(string) $node`.) Then we create
|
|
a new name from the string and return it. Returning a new node replaces the old node.
|
|
|
|
Another thing we need to do is change the class/function/const declarations. Currently they contain
|
|
only the shortname (i.e. the last part of the name), but they need to contain the complete name including
|
|
the namespace prefix:
|
|
|
|
```php
|
|
use PhpParser\Node;
|
|
use PhpParser\Node\Stmt;
|
|
|
|
class NodeVisitor_NamespaceConverter extends \PhpParser\NodeVisitorAbstract
|
|
{
|
|
public function leaveNode(Node $node) {
|
|
if ($node instanceof Node\Name) {
|
|
return new Node\Name(str_replace('\\', '_', $node->toString()));
|
|
} elseif ($node instanceof Stmt\Class_
|
|
|| $node instanceof Stmt\Interface_
|
|
|| $node instanceof Stmt\Function_) {
|
|
$node->name = str_replace('\\', '_', $node->namespacedName->toString());
|
|
} elseif ($node instanceof Stmt\Const_) {
|
|
foreach ($node->consts as $const) {
|
|
$const->name = str_replace('\\', '_', $const->namespacedName->toString());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
There is not much more to it than converting the namespaced name to string with `_` as separator.
|
|
|
|
The last thing we need to do is remove the `namespace` and `use` statements:
|
|
|
|
```php
|
|
use PhpParser\Node;
|
|
use PhpParser\Node\Stmt;
|
|
use PhpParser\NodeTraverser;
|
|
|
|
class NodeVisitor_NamespaceConverter extends \PhpParser\NodeVisitorAbstract
|
|
{
|
|
public function leaveNode(Node $node) {
|
|
if ($node instanceof Node\Name) {
|
|
return new Node\Name(str_replace('\\', '_', $node->toString()));
|
|
} elseif ($node instanceof Stmt\Class_
|
|
|| $node instanceof Stmt\Interface_
|
|
|| $node instanceof Stmt\Function_) {
|
|
$node->name = str_replace('\\', '_', $node->namespacedName->toString();
|
|
} elseif ($node instanceof Stmt\Const_) {
|
|
foreach ($node->consts as $const) {
|
|
$const->name = str_replace('\\', '_', $const->namespacedName->toString());
|
|
}
|
|
} elseif ($node instanceof Stmt\Namespace_) {
|
|
// returning an array merges is into the parent array
|
|
return $node->stmts;
|
|
} elseif ($node instanceof Stmt\Use_) {
|
|
// remove use nodes altogether
|
|
return NodeTraverser::REMOVE_NODE;
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
That's all.
|