mirror of
https://github.com/danog/flowfusion.git
synced 2025-01-18 03:22:24 +01:00
127 lines
4.9 KiB
Python
Executable File
127 lines
4.9 KiB
Python
Executable File
import re
|
|
|
|
"""
|
|
dataflow.py is a module for conducting lightweight source code analysis on PHP code.
|
|
It performs a very coarse-grained dataflow extraction, which is different from traditional
|
|
taint analysis. The focus is on tolerance to false positives (FP) and false negatives (FN)
|
|
in the context of fuzz testing.
|
|
"""
|
|
|
|
class PHPFastDataflow:
|
|
"""
|
|
A class that performs fast, coarse-grained dataflow analysis on PHP code.
|
|
It does not guarantee completeness but aims for soundness.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""
|
|
Initializes the PHPFastDataflow object with empty variables and dataflows.
|
|
"""
|
|
self.variables = [] # List to store extracted variables from PHP code
|
|
self.dataflows = [] # List of lists to store dataflows between variables
|
|
|
|
def clean(self):
|
|
"""
|
|
Resets the variables and dataflows to empty lists.
|
|
"""
|
|
self.variables = []
|
|
self.dataflows = []
|
|
|
|
def extract_variables(self):
|
|
"""
|
|
Extracts all PHP variables from the PHP code using a regular expression.
|
|
It ensures that each variable is unique by converting the list to a set.
|
|
"""
|
|
# Regular expression to match valid PHP variables
|
|
regex = r"\$[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*"
|
|
# Find all PHP variables in the provided code
|
|
self.variables = re.findall(regex, self.phpcode)
|
|
# Ensure unique variables by converting the list to a set
|
|
self.variables = list(set(self.variables))
|
|
|
|
def analyze_php_line(self, php_line):
|
|
"""
|
|
Analyzes a single line of PHP code to find variables and check
|
|
if multiple variables are interacting in the same line.
|
|
|
|
Returns:
|
|
- A tuple (True, [variables]) if multiple variables are found.
|
|
- A tuple (False, None) if no interaction between variables is detected.
|
|
"""
|
|
regex = r"\$[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*"
|
|
variables = list(set(re.findall(regex, php_line)))
|
|
if len(variables) > 1:
|
|
return (True, variables)
|
|
else:
|
|
return (False, None)
|
|
|
|
def merge_dataflows(self):
|
|
"""
|
|
Merges the dataflows by grouping variables that interact with each other.
|
|
Variables that appear together in dataflow analysis are merged into a single
|
|
list to represent their relationship.
|
|
"""
|
|
list_of_lists = self.variables
|
|
|
|
# Convert any single variables to lists to ensure consistent structure
|
|
for i in range(len(list_of_lists)):
|
|
if type(list_of_lists[i]) != list:
|
|
list_of_lists[i] = [list_of_lists[i]]
|
|
|
|
# Initialize an empty list to store merged sublists
|
|
merged_lists = []
|
|
|
|
# Iterate through each sublist (representing variables that interact)
|
|
for sublist in list_of_lists:
|
|
merged_with_existing = False
|
|
for merged_sublist in merged_lists:
|
|
# If any variable in the current sublist exists in a merged sublist,
|
|
# merge them by extending the merged list
|
|
if any(var in merged_sublist for var in sublist):
|
|
merged_sublist.extend(var for var in sublist if var not in merged_sublist)
|
|
merged_with_existing = True
|
|
break
|
|
|
|
# If no merge occurred, add the current sublist as a new group
|
|
if not merged_with_existing:
|
|
merged_lists.append(sublist)
|
|
|
|
# Update the variables with the merged dataflows
|
|
self.variables = merged_lists
|
|
|
|
def extract_dataflow(self):
|
|
"""
|
|
Extracts dataflows from the PHP code by analyzing each line of code.
|
|
It identifies variables and their interactions, grouping them into dataflows.
|
|
"""
|
|
for eachline in self.phpcode.split('\n'):
|
|
result, variables = self.analyze_php_line(eachline)
|
|
if result:
|
|
for each_var in variables:
|
|
# If the variable is already in the list, remove and replace it
|
|
if each_var in self.variables:
|
|
self.variables.remove(each_var)
|
|
# Add the new set of interacting variables
|
|
self.variables.append(variables)
|
|
|
|
# Merge dataflows to group interacting variables
|
|
self.merge_dataflows()
|
|
|
|
def analyze(self, phpcode):
|
|
"""
|
|
The main function to analyze a given PHP code for dataflows.
|
|
It extracts variables and their interactions to produce a list
|
|
of dataflows.
|
|
|
|
Args:
|
|
- phpcode: The PHP source code to analyze.
|
|
|
|
Returns:
|
|
- A list of merged dataflows, each representing groups of interacting variables.
|
|
"""
|
|
self.phpcode = phpcode
|
|
self.clean() # Reset variables and dataflows
|
|
self.extract_variables() # Extract variables from the code
|
|
self.extract_dataflow() # Extract dataflows between variables
|
|
return self.variables
|