Merge pull request #88 from adrienlucas/twig-analyzer

Refactor twig analyzer
This commit is contained in:
Matthew Brown 2020-09-24 14:12:24 -04:00 committed by GitHub
commit fcef6b8f59
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 406 additions and 210 deletions

166
src/Twig/Context.php Normal file
View File

@ -0,0 +1,166 @@
<?php
declare(strict_types=1);
namespace Psalm\SymfonyPsalmPlugin\Twig;
use Psalm\CodeLocation;
use Psalm\Internal\Codebase\Taint;
use Psalm\Internal\Taint\Sink;
use Psalm\Internal\Taint\Taintable;
use Psalm\Internal\Taint\TaintNode;
use Psalm\Type\TaintKind;
use Twig\Node\Expression\FilterExpression;
use Twig\Node\Expression\NameExpression;
use Twig\Node\Node;
use Twig\Node\PrintNode;
use Twig\Source;
class Context
{
/** @var array<string, Taintable> */
private $unassignedVariables = [];
/** @var array<string, Taintable> */
private $localVariables = [];
/** @var Source */
private $sourceContext;
/** @var Taint */
private $taint;
public function __construct(Source $sourceContext, Taint $taint)
{
$this->sourceContext = $sourceContext;
$this->taint = $taint;
}
public function addSink(Node $node, Taintable $source): void
{
$codeLocation = $this->getNodeLocation($node);
$sinkName = 'twig_unknown';
if ($node instanceof PrintNode) {
$sinkName = 'twig_print';
}
$sink = Sink::getForMethodArgument(
$sinkName, $sinkName, 0, null, $codeLocation
);
$sink->taints = [
TaintKind::INPUT_HTML,
TaintKind::USER_SECRET,
TaintKind::SYSTEM_SECRET,
];
$this->taint->addSink($sink);
$this->taint->addPath($source, $sink, 'arg');
}
public function taintVariable(NameExpression $expression): Taintable
{
/** @var string $variableName */
$variableName = $expression->getAttribute('name');
$sinkNode = TaintNode::getForAssignment($variableName, $this->getNodeLocation($expression));
$this->taint->addTaintNode($sinkNode);
$sinkNode = $this->addVariableTaintNode($expression);
return $this->addVariableUsage($variableName, $sinkNode);
}
public function getTaintDestination(Taintable $taintSource, FilterExpression $expression): TaintNode
{
/** @var string $filterName */
$filterName = $expression->getNode('filter')->getAttribute('value');
$returnLocation = $this->getNodeLocation($expression);
$taintDestination = TaintNode::getForMethodReturn('filter_'.$filterName, 'filter_'.$filterName, $returnLocation, $returnLocation);
$this->taint->addTaintNode($taintDestination);
$this->taint->addPath($taintSource, $taintDestination, 'arg');
return $taintDestination;
}
public function taintAssignment(NameExpression $destinationVariable, NameExpression $sourceVariable): void
{
/** @var string $destinationName */
$destinationName = $destinationVariable->getAttribute('name');
$taintDestination = $this->addVariableTaintNode($destinationVariable);
/** @var string $sourceName */
$sourceName = $sourceVariable->getAttribute('name');
$taintSource = $this->addVariableTaintNode($sourceVariable);
$this->localVariables[$destinationName] = $taintDestination;
$previousTaint = $this->addVariableUsage($sourceName, $taintSource);
$this->taint->addPath($taintSource, $taintDestination, 'arg');
if ($previousTaint !== $taintSource) {
$this->taint->addPath($previousTaint, $taintSource, 'arg');
}
}
public function taintUnassignedVariables(string $templateName): void
{
foreach ($this->unassignedVariables as $variableName => $taintable) {
$label = strtolower($templateName).'#'.strtolower($variableName);
$taintSource = new TaintNode($label, $label, null, null);
$this->taint->addTaintNode($taintSource);
$this->taint->addPath($taintSource, $taintable, 'arg');
}
}
private function addVariableTaintNode(NameExpression $variableNode): TaintNode
{
/** @var string $variableName */
$variableName = $variableNode->getAttribute('name');
$taintNode = TaintNode::getForAssignment($variableName, $this->getNodeLocation($variableNode));
$this->taint->addTaintNode($taintNode);
return $taintNode;
}
private function addVariableUsage(string $variableName, Taintable $variableTaint): Taintable
{
if (!isset($this->localVariables[$variableName])) {
return $this->unassignedVariables[$variableName] = $variableTaint;
}
return $this->localVariables[$variableName];
}
private function getNodeLocation(Node $node): CodeLocation
{
$fileName = $this->sourceContext->getName();
$filePath = $this->sourceContext->getPath();
$snippet = $this->sourceContext->getCode(); // warning : the getCode method returns the whole template, not only the statement
$fileCode = file_get_contents($filePath);
$lineNumber = $node->getTemplateLine();
$lines = explode("\n", $fileCode);
$file_start = 0;
for ($i = 0; $i < $lineNumber - 1; ++$i) {
$file_start += strlen($lines[$i]) + 1;
}
$file_start += (int) strpos($lines[$lineNumber - 1], $snippet);
$file_end = $file_start + strlen($snippet);
return new CodeLocation\Raw(
$fileCode,
$filePath,
$fileName,
$file_start,
max($file_end, strlen($fileCode))
);
}
}

View File

@ -0,0 +1,69 @@
<?php
declare(strict_types=1);
namespace Psalm\SymfonyPsalmPlugin\Twig;
use Psalm\Internal\Taint\Taintable;
use RuntimeException;
use Twig\Node\Expression\AbstractExpression;
use Twig\Node\Expression\FilterExpression;
use Twig\Node\Expression\NameExpression;
use Twig\Node\PrintNode;
class PrintNodeAnalyzer
{
/** @var Context */
private $context;
public function __construct(Context $context)
{
$this->context = $context;
}
public function analyzePrintNode(PrintNode $node): void
{
$expression = $node->getNode('expr');
if (!$expression instanceof AbstractExpression) {
throw new RuntimeException('The expr node has an expected type.');
}
if ($this->expressionIsEscaped($expression)) {
return;
}
$source = $this->getTaintSource($expression);
if (null !== $source) {
$this->context->addSink($node, $source);
}
}
private function expressionIsEscaped(AbstractExpression $expression): bool
{
if ($expression instanceof FilterExpression && 'escape' === $expression->getNode('filter')->getAttribute('value')) {
return true;
}
return false;
}
private function getTaintSource(AbstractExpression $expression): ?Taintable
{
if ($expression instanceof FilterExpression) {
/** @var AbstractExpression $filteredExpression */
$filteredExpression = $expression->getNode('node');
$taintSource = $this->getTaintSource($filteredExpression);
if (null === $taintSource) {
return null;
}
return $this->context->getTaintDestination($taintSource, $expression);
}
if ($expression instanceof NameExpression) {
return $this->context->taintVariable($expression);
}
return null;
}
}

View File

@ -0,0 +1,59 @@
<?php
declare(strict_types=1);
namespace Psalm\SymfonyPsalmPlugin\Twig;
use Twig\Environment;
use Twig\Node\Expression\AbstractExpression;
use Twig\Node\Expression\NameExpression;
use Twig\Node\Node;
use Twig\Node\PrintNode;
use Twig\Node\SetNode;
use Twig\NodeVisitor\NodeVisitorInterface;
class TaintAnalysisVisitor implements NodeVisitorInterface
{
/** @var Context */
private $context;
public function __construct(Context $context)
{
$this->context = $context;
}
public function enterNode(Node $node, Environment $env): Node
{
if ($node instanceof PrintNode) {
$analyzer = new PrintNodeAnalyzer($this->context);
$analyzer->analyzePrintNode($node);
}
if ($node instanceof SetNode) {
/** @var array<NameExpression> $names */
$names = $node->getNode('names');
/** @var array<AbstractExpression> $values */
$values = $node->getNode('values')->getIterator();
foreach ($names as $i => $name) {
if (!isset($values[$i]) || !$values[$i] instanceof NameExpression) {
continue;
}
$this->context->taintAssignment($name, $values[$i]);
}
}
return $node;
}
public function leaveNode(Node $node, Environment $env): ?Node
{
return $node;
}
public function getPriority()
{
return 0;
}
}

View File

@ -4,24 +4,19 @@ declare(strict_types=1);
namespace Psalm\SymfonyPsalmPlugin\Twig; namespace Psalm\SymfonyPsalmPlugin\Twig;
use Psalm\Context; use Psalm\Context as PsalmContext;
use Psalm\Internal\Analyzer\FileAnalyzer; use Psalm\Internal\Analyzer\FileAnalyzer;
use Psalm\Internal\Codebase\Taint;
use Psalm\Internal\Taint\TaintNode; use Psalm\Internal\Taint\TaintNode;
use Twig\Environment; use Twig\Environment;
use Twig\Loader\FilesystemLoader; use Twig\Loader\FilesystemLoader;
use Twig\NodeTraverser;
class TemplateFileAnalyzer extends FileAnalyzer class TemplateFileAnalyzer extends FileAnalyzer
{ {
/**
* @var Taint|null
*/
private $taint;
public function analyze( public function analyze(
Context $file_context = null, PsalmContext $file_context = null,
bool $preserve_analyzers = false, bool $preserve_analyzers = false,
Context $global_context = null PsalmContext $global_context = null
) { ) {
$codebase = $this->project_analyzer->getCodebase(); $codebase = $this->project_analyzer->getCodebase();
@ -29,7 +24,7 @@ class TemplateFileAnalyzer extends FileAnalyzer
return; return;
} }
$this->taint = $codebase->taint; $taint = $codebase->taint;
$loader = new FilesystemLoader('templates', $codebase->config->base_dir); $loader = new FilesystemLoader('templates', $codebase->config->base_dir);
$twig = new Environment($loader, [ $twig = new Environment($loader, [
@ -44,23 +39,15 @@ class TemplateFileAnalyzer extends FileAnalyzer
$twig_source = $loader->getSourceContext($local_file_name); $twig_source = $loader->getSourceContext($local_file_name);
$tree = $twig->parse($twig->tokenize($twig_source)); $tree = $twig->parse($twig->tokenize($twig_source));
$context = new Context(); $twigContext = new Context($twig_source, $taint);
$moduleAnalyzer = new TwigModuleAnalyzer($context, $twig_source, $codebase->taint); $traverser = new NodeTraverser($twig, [
$moduleAnalyzer->analyzeModule($tree); new TaintAnalysisVisitor($twigContext),
]);
foreach ($context->vars_in_scope as $var_name => $var_type) { $traverser->traverse($tree);
$taint_source = self::getTaintNodeForTwigNamedVariable($local_file_name, $var_name);
$this->taint->addTaintNode($taint_source);
if (null === $var_type->parent_nodes) { $twigContext->taintUnassignedVariables($local_file_name);
continue;
}
foreach ($var_type->parent_nodes as $taint_sink) {
$this->taint->addPath($taint_source, $taint_sink, 'arg');
}
}
} }
public static function getTaintNodeForTwigNamedVariable( public static function getTaintNodeForTwigNamedVariable(

View File

@ -1,185 +0,0 @@
<?php
declare(strict_types=1);
namespace Psalm\SymfonyPsalmPlugin\Twig;
use Psalm\CodeLocation;
use Psalm\Context;
use Psalm\Internal\Codebase\Taint;
use Psalm\Internal\Taint\Sink;
use Psalm\Internal\Taint\Taintable;
use Psalm\Internal\Taint\TaintNode;
use Psalm\Type;
use Psalm\Type\TaintKind;
use RuntimeException;
use Twig\Node\BodyNode;
use Twig\Node\Expression\AbstractExpression;
use Twig\Node\Expression\FilterExpression;
use Twig\Node\Expression\NameExpression;
use Twig\Node\ModuleNode;
use Twig\Node\Node;
use Twig\Node\PrintNode;
use Twig\Source;
/**
* @internal
* This class is not meant to be used outside of the `TemplateFileAnalyzer`
*/
class TwigModuleAnalyzer
{
/**
* @var Context
*/
private $context;
/**
* @var Source
*/
private $twig_source;
/**
* @var Taint
*/
private $taint;
public function __construct(Context $context, Source $twig_source, Taint $taint)
{
$this->context = $context;
$this->twig_source = $twig_source;
$this->taint = $taint;
}
public function analyzeModule(ModuleNode $node): void
{
$bodyNode = $node->getNode('body');
if (!$bodyNode instanceof BodyNode) {
throw new RuntimeException('The body node has an expected type.');
}
$this->analyzeBody($bodyNode);
}
private function analyzeBody(BodyNode $node): void
{
/** @var Node $innerNode */
foreach ($node->getIterator() as $innerNode) {
/** @var Node $sub_node */
foreach ($innerNode->getIterator() as $sub_node) {
if (!$sub_node instanceof PrintNode) {
continue;
}
$this->analyzePrintNode($sub_node);
}
}
}
private function analyzePrintNode(PrintNode $node): Taintable
{
$codeLocation = self::getLocation($node->getSourceContext() ?? $this->twig_source, $node->getTemplateLine());
$sink = Sink::getForMethodArgument(
'twig_print', 'twig_print', 0, null, $codeLocation
);
$sink->taints = [
TaintKind::INPUT_HTML,
TaintKind::USER_SECRET,
TaintKind::SYSTEM_SECRET,
];
$this->taint->addSink($sink);
$expression = $node->getNode('expr');
if (!$expression instanceof AbstractExpression) {
throw new RuntimeException('The expr node has an expected type.');
}
if ($expression_taint = $this->analyzeExpression($expression)) {
$this->taint->addPath($expression_taint, $sink, 'arg');
}
return $sink;
}
private function analyzeExpression(AbstractExpression $expression): ?Taintable
{
if ($expression instanceof FilterExpression) {
return $this->analyzeFilter($expression);
}
return null;
}
private function analyzeFilter(FilterExpression $expression): ?Taintable
{
if ('raw' !== $expression->getNode('filter')->getAttribute('value')) {
return null;
}
$function_id = 'filter_raw';
$return_location = self::getLocation($expression->getSourceContext() ?? $this->twig_source, $expression->getTemplateLine());
$return_taint = TaintNode::getForMethodReturn($function_id, $function_id, $return_location, $return_location);
$this->taint->addTaintNode($return_taint);
$argument_taint = TaintNode::getForMethodArgument(
$function_id, $function_id, 0, $return_location, $return_location // should be $argument_location instead of $return_location
);
$this->taint->addTaintNode($argument_taint);
$this->taint->addPath($argument_taint, $return_taint, 'arg');
$sub_node = $expression->getNode('node');
if ($sub_node instanceof NameExpression) {
$sub_node_taint = $this->analyzeName($sub_node);
$this->taint->addPath($sub_node_taint, $argument_taint, 'arg');
}
return $return_taint;
}
private function analyzeName(NameExpression $expression): Taintable
{
/** @var string $var_id */
$var_id = $expression->getAttribute('name');
$var_type = Type::getMixed();
$this->context->vars_in_scope[$var_id] = $var_type;
$var_location = self::getLocation($expression->getSourceContext() ?? $this->twig_source, $expression->getTemplateLine());
$variable_taint = TaintNode::getForAssignment($var_id, $var_location);
$this->taint->addTaintNode($variable_taint);
$var_type->parent_nodes = [$variable_taint];
return $variable_taint;
}
private static function getLocation(Source $sourceContext, int $lineNumber): CodeLocation
{
$fileName = $sourceContext->getName();
$filePath = $sourceContext->getPath();
$snippet = (string) $sourceContext->getCode(); // warning : the getCode method returns the whole template, not only the statement
$fileCode = file_get_contents($filePath);
$lines = explode("\n", $snippet);
$file_start = 0;
for ($i = 0; $i < $lineNumber - 1; ++$i) {
$file_start += strlen($lines[$i]) + 1;
}
$lineNumber = $lineNumber ?: 1;
$file_start += intval(strpos($lines[$lineNumber - 1], $snippet));
$file_end = $file_start + strlen($snippet);
return new CodeLocation\Raw(
$fileCode,
$filePath,
$fileName,
$file_start,
max($file_end, strlen($fileCode))
);
}
}

View File

@ -51,7 +51,7 @@ Feature: Twig tainting with analyzer
When I run Psalm with taint analysis When I run Psalm with taint analysis
And I see no errors And I see no errors
Scenario: One parameter of the twig rendering is tainted Scenario: One tainted parameter of the twig template is displayed with only the raw filter
Given I have the following code Given I have the following code
""" """
$untrusted = $_GET['untrusted']; $untrusted = $_GET['untrusted'];
@ -68,3 +68,103 @@ Feature: Twig tainting with analyzer
| Type | Message | | Type | Message |
| TaintedInput | Detected tainted html | | TaintedInput | Detected tainted html |
And I see no other errors And I see no other errors
Scenario: One tainted parameter of the twig rendering is displayed with some filter followed by the raw filter
Given I have the following code
"""
$untrusted = $_GET['untrusted'];
twig()->render('index.html.twig', ['untrusted' => $untrusted]);
"""
And I have the following "index.html.twig" template
"""
<h1>
{{ untrusted|upper|raw }}
</h1>
"""
When I run Psalm with taint analysis
Then I see these errors
| Type | Message |
| TaintedInput | Detected tainted html |
And I see no other errors
Scenario: One tainted parameter of the twig rendering is displayed with the raw filter followed by some other filter
Given I have the following code
"""
$untrusted = $_GET['untrusted'];
twig()->render('index.html.twig', ['untrusted' => $untrusted]);
"""
And I have the following "index.html.twig" template
"""
<h1>
{{ untrusted|raw|upper }}
</h1>
"""
When I run Psalm with taint analysis
And I see no errors
Scenario: One parameter of the twig rendering is tainted with inheritance
Given I have the following code
"""
$untrusted = $_GET['untrusted'];
twig()->render('index.html.twig', ['untrusted' => $untrusted]);
"""
And I have the following "base.html.twig" template
"""
Base
{% block body %}{% endblock %}
"""
And I have the following "index.html.twig" template
"""
{% extends 'base.html.twig' %}
{% block body %}
<h1>
{{ untrusted|raw }}
</h1>
{% endblock %}
"""
When I run Psalm with taint analysis
Then I see these errors
| Type | Message |
| TaintedInput | Detected tainted html |
And I see no other errors
Scenario: One tainted parameter of the twig template is displayed with autoescaping deactivated
Given I have the following code
"""
$untrusted = $_GET['untrusted'];
twig()->render('index.html.twig', ['untrusted' => $untrusted]);
"""
And I have the following "index.html.twig" template
"""
{% autoescape false %}
<h1>
{{ untrusted }}
</h1>
{% endautoescape %}
"""
When I run Psalm with taint analysis
Then I see these errors
| Type | Message |
| TaintedInput | Detected tainted html |
And I see no other errors
Scenario: One tainted parameter of the twig template is assigned to a variable and this variable is displayed
Given I have the following code
"""
$untrusted = $_GET['untrusted'];
twig()->render('index.html.twig', ['untrusted' => $untrusted]);
"""
And I have the following "index.html.twig" template
"""
{% set some_local_var = untrusted %}
{% set displayed_var = some_local_var %}
<h1>
{{ displayed_var|raw }}
</h1>
"""
When I run Psalm with taint analysis
Then I see these errors
| Type | Message |
| TaintedInput | Detected tainted html |
And I see no other errors