chore: refactor expression parser to avoid stack overflow issue (#177)

Signed-off-by: azjezz <azjezz@protonmail.com>
This commit is contained in:
Saif Eddin Gmati 2022-12-08 15:49:54 +01:00 committed by GitHub
parent ede98233c5
commit e74d0ec18e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 1324 additions and 1106 deletions

View File

@ -424,14 +424,28 @@ impl Lexer {
state.source.next();
DocStringKind::Nowdoc
}
_ => DocStringKind::Heredoc,
[b'"'] => {
state.source.next();
DocStringKind::Heredoc
}
[_, ..] => DocStringKind::Heredoc,
[] => {
return Err(SyntaxError::UnexpectedEndOfFile(state.source.span()));
}
};
// FIXME: Add support for nowdocs too by checking if a `'`
// character is present before and after the identifier.
let label: ByteString = match self.peek_identifier(state) {
Some(_) => self.consume_identifier(state).into(),
None => unreachable!(),
None => match state.source.current() {
Some(c) => {
return Err(SyntaxError::UnexpectedCharacter(*c, state.source.span()))
}
None => {
return Err(SyntaxError::UnexpectedEndOfFile(state.source.span()));
}
},
};
if doc_string_kind == DocStringKind::Nowdoc {
@ -445,6 +459,8 @@ impl Lexer {
));
}
};
} else if let Some(b'"') = state.source.current() {
state.source.next();
}
if !matches!(state.source.current(), Some(b'\n')) {

1153
src/parser/expressions.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,11 +1,10 @@
use crate::lexer::token::TokenKind;
use crate::parser;
use crate::parser::ast::ArrayItem;
use crate::parser::ast::Expression;
use crate::parser::ast::ListItem;
use crate::parser::error::ParseError;
use crate::parser::error::ParseResult;
use crate::parser::internal::precedences::Precedence;
use crate::parser::expressions;
use crate::parser::internal::utils;
use crate::parser::state::State;
@ -38,7 +37,7 @@ pub fn list_expression(state: &mut State) -> ParseResult<Expression> {
));
}
let mut value = parser::expression(state, Precedence::Lowest)?;
let mut value = expressions::lowest_precedence(state)?;
if state.current.kind == TokenKind::DoubleArrow {
if !has_atleast_one_key && !items.is_empty() {
@ -62,7 +61,7 @@ pub fn list_expression(state: &mut State) -> ParseResult<Expression> {
}
has_atleast_one_key = true;
value = parser::expression(state, Precedence::Lowest)?;
value = expressions::lowest_precedence(state)?;
} else if has_atleast_one_key {
return Err(ParseError::CannotMixKeyedAndUnkeyedEntries(
state.current.span,
@ -148,7 +147,7 @@ pub fn legacy_array_expression(state: &mut State) -> ParseResult<Expression> {
(false, (0, 0))
};
let mut value = parser::expression(state, Precedence::Lowest)?;
let mut value = expressions::lowest_precedence(state)?;
// TODO: return error for `[...$a => $b]`.
if state.current.kind == TokenKind::DoubleArrow {
@ -170,7 +169,7 @@ pub fn legacy_array_expression(state: &mut State) -> ParseResult<Expression> {
false
};
value = parser::expression(state, Precedence::Lowest)?;
value = expressions::lowest_precedence(state)?;
}
items.push(ArrayItem {
@ -211,7 +210,7 @@ fn array_pair(state: &mut State) -> ParseResult<ArrayItem> {
(false, (0, 0))
};
let mut value = parser::expression(state, Precedence::Lowest)?;
let mut value = expressions::lowest_precedence(state)?;
if state.current.kind == TokenKind::DoubleArrow {
state.next();
@ -229,7 +228,7 @@ fn array_pair(state: &mut State) -> ParseResult<ArrayItem> {
} else {
false
};
value = parser::expression(state, Precedence::Lowest)?;
value = expressions::lowest_precedence(state)?;
}
Ok(ArrayItem {

View File

@ -1,9 +1,8 @@
use crate::lexer::token::TokenKind;
use crate::parser;
use crate::parser::ast::attributes::Attribute;
use crate::parser::ast::attributes::AttributeGroup;
use crate::parser::error::ParseResult;
use crate::parser::internal::precedences::Precedence;
use crate::parser::expressions;
use crate::parser::internal::utils;
use crate::parser::state::State;
@ -21,7 +20,7 @@ pub fn gather_attributes(state: &mut State) -> ParseResult<bool> {
while state.current.kind != TokenKind::RightBracket {
let start = state.current.span;
let expression = parser::expression(state, Precedence::Lowest)?;
let expression = expressions::lowest_precedence(state)?;
let end = state.current.span;
members.push(Attribute {

View File

@ -2,7 +2,6 @@ use crate::expect_token;
use crate::expected_scope;
use crate::lexer::token::Span;
use crate::lexer::token::TokenKind;
use crate::parser;
use crate::parser::ast::classish::ClassishConstant;
use crate::parser::ast::enums::BackedEnumCase;
use crate::parser::ast::enums::BackedEnumMember;
@ -15,12 +14,12 @@ use crate::parser::ast::Statement;
use crate::parser::ast::TraitAdaptation;
use crate::parser::error::ParseError;
use crate::parser::error::ParseResult;
use crate::parser::expressions;
use crate::parser::internal::attributes;
use crate::parser::internal::data_type;
use crate::parser::internal::functions;
use crate::parser::internal::identifiers;
use crate::parser::internal::modifiers;
use crate::parser::internal::precedences::Precedence;
use crate::parser::internal::utils;
use crate::parser::state::Scope;
use crate::parser::state::State;
@ -115,7 +114,7 @@ pub fn backed_enum_member(state: &mut State) -> ParseResult<BackedEnumMember> {
utils::skip(state, TokenKind::Equals)?;
let value = parser::expression(state, Precedence::Lowest)?;
let value = expressions::lowest_precedence(state)?;
let end = utils::skip_semicolon(state)?;
@ -152,18 +151,16 @@ pub fn class_like_statement(state: &mut State) -> ParseResult<Statement> {
let start = state.current.span;
let modifiers = modifiers::collect(state)?;
if !has_attributes {
if state.current.kind == TokenKind::Use {
return parse_classish_uses(state);
}
if !has_attributes && state.current.kind == TokenKind::Use {
return parse_classish_uses(state);
}
if state.current.kind == TokenKind::Const {
return Ok(Statement::ClassishConstant(constant(
state,
modifiers::constant_group(modifiers)?,
start,
)?));
}
if state.current.kind == TokenKind::Const {
return Ok(Statement::ClassishConstant(constant(
state,
modifiers::constant_group(modifiers)?,
start,
)?));
}
if state.current.kind == TokenKind::Function {
@ -185,7 +182,7 @@ pub fn class_like_statement(state: &mut State) -> ParseResult<Statement> {
// e.g: = "foo";
if state.current.kind == TokenKind::Equals {
state.next();
value = Some(parser::expression(state, Precedence::Lowest)?);
value = Some(expressions::lowest_precedence(state)?);
}
let class_name: String = expected_scope!([
@ -395,7 +392,7 @@ fn constant(
utils::skip(state, TokenKind::Equals)?;
let value = parser::expression(state, Precedence::Lowest)?;
let value = expressions::lowest_precedence(state)?;
let end = utils::skip_semicolon(state)?;

View File

@ -10,8 +10,8 @@ use crate::parser::ast::MatchArm;
use crate::parser::ast::Statement;
use crate::parser::error::ParseError;
use crate::parser::error::ParseResult;
use crate::parser::expressions;
use crate::parser::internal::blocks;
use crate::parser::internal::precedences::Precedence;
use crate::parser::internal::utils;
use crate::parser::state::State;
@ -20,7 +20,7 @@ pub fn match_expression(state: &mut State) -> ParseResult<Expression> {
utils::skip_left_parenthesis(state)?;
let condition = Box::new(parser::expression(state, Precedence::Lowest)?);
let condition = Box::new(expressions::lowest_precedence(state)?);
utils::skip_right_parenthesis(state)?;
utils::skip_left_brace(state)?;
@ -46,13 +46,13 @@ pub fn match_expression(state: &mut State) -> ParseResult<Expression> {
utils::skip_double_arrow(state)?;
let body = parser::expression(state, Precedence::Lowest)?;
let body = expressions::lowest_precedence(state)?;
default = Some(Box::new(DefaultMatchArm { body }));
} else {
let mut conditions = Vec::new();
while state.current.kind != TokenKind::DoubleArrow {
conditions.push(parser::expression(state, Precedence::Lowest)?);
conditions.push(expressions::lowest_precedence(state)?);
if state.current.kind == TokenKind::Comma {
state.next();
@ -67,7 +67,7 @@ pub fn match_expression(state: &mut State) -> ParseResult<Expression> {
break;
}
let body = parser::expression(state, Precedence::Lowest)?;
let body = expressions::lowest_precedence(state)?;
arms.push(MatchArm { conditions, body });
}
@ -93,7 +93,7 @@ pub fn switch_statement(state: &mut State) -> ParseResult<Statement> {
utils::skip_left_parenthesis(state)?;
let condition = parser::expression(state, Precedence::Lowest)?;
let condition = expressions::lowest_precedence(state)?;
utils::skip_right_parenthesis(state)?;
@ -111,7 +111,7 @@ pub fn switch_statement(state: &mut State) -> ParseResult<Statement> {
TokenKind::Case => {
state.next();
let condition = parser::expression(state, Precedence::Lowest)?;
let condition = expressions::lowest_precedence(state)?;
utils::skip_any_of(state, &[TokenKind::Colon, TokenKind::SemiColon])?;
@ -170,7 +170,7 @@ pub fn if_statement(state: &mut State) -> ParseResult<Statement> {
utils::skip_left_parenthesis(state)?;
let condition = parser::expression(state, Precedence::Lowest)?;
let condition = expressions::lowest_precedence(state)?;
utils::skip_right_parenthesis(state)?;
@ -201,7 +201,7 @@ pub fn if_statement(state: &mut State) -> ParseResult<Statement> {
state.next();
utils::skip_left_parenthesis(state)?;
let condition = parser::expression(state, Precedence::Lowest)?;
let condition = expressions::lowest_precedence(state)?;
utils::skip_right_parenthesis(state)?;
utils::skip_colon(state)?;
@ -260,7 +260,7 @@ pub fn if_statement(state: &mut State) -> ParseResult<Statement> {
utils::skip_left_parenthesis(state)?;
let condition = parser::expression(state, Precedence::Lowest)?;
let condition = expressions::lowest_precedence(state)?;
utils::skip_right_parenthesis(state)?;

View File

@ -1,7 +1,6 @@
use crate::expected_scope;
use crate::lexer::token::Span;
use crate::lexer::token::TokenKind;
use crate::parser;
use crate::parser::ast::functions::ArrowFunction;
use crate::parser::ast::functions::Closure;
use crate::parser::ast::functions::ClosureUse;
@ -13,11 +12,11 @@ use crate::parser::ast::Expression;
use crate::parser::ast::Statement;
use crate::parser::error::ParseError;
use crate::parser::error::ParseResult;
use crate::parser::expressions;
use crate::parser::internal::blocks;
use crate::parser::internal::data_type;
use crate::parser::internal::identifiers;
use crate::parser::internal::parameters;
use crate::parser::internal::precedences::Precedence;
use crate::parser::internal::utils;
use crate::parser::state::Scope;
use crate::parser::state::State;
@ -62,7 +61,7 @@ pub fn anonymous_function(state: &mut State) -> ParseResult<Expression> {
// TODO(azjezz): this shouldn't call expr, we should have a function
// just for variables, so we don't have to go through the whole `match` in `expression(...)`
let var = match parser::expression(state, Precedence::Lowest)? {
let var = match expressions::lowest_precedence(state)? {
s @ Expression::Variable { .. } => ClosureUse { var: s, by_ref },
_ => {
return Err(ParseError::UnexpectedToken(
@ -146,7 +145,7 @@ pub fn arrow_function(state: &mut State) -> ParseResult<Expression> {
utils::skip(state, TokenKind::DoubleArrow)?;
let body = scoped!(state, Scope::ArrowFunction(is_static), {
Box::new(parser::expression(state, Precedence::Lowest)?)
Box::new(expressions::lowest_precedence(state)?)
});
let end = state.current.span;

View File

@ -1,9 +1,8 @@
use crate::lexer::token::TokenKind;
use crate::parser;
use crate::parser::ast::Statement;
use crate::parser::error::ParseResult;
use crate::parser::expressions;
use crate::parser::internal::blocks;
use crate::parser::internal::precedences::Precedence;
use crate::parser::internal::utils;
use crate::parser::state::State;
@ -12,7 +11,7 @@ pub fn foreach_loop(state: &mut State) -> ParseResult<Statement> {
utils::skip_left_parenthesis(state)?;
let expr = parser::expression(state, Precedence::Lowest)?;
let expr = expressions::lowest_precedence(state)?;
utils::skip(state, TokenKind::As)?;
@ -22,7 +21,7 @@ pub fn foreach_loop(state: &mut State) -> ParseResult<Statement> {
}
let mut key_var = None;
let mut value_var = parser::expression(state, Precedence::Lowest)?;
let mut value_var = expressions::lowest_precedence(state)?;
if state.current.kind == TokenKind::DoubleArrow {
state.next();
@ -34,7 +33,7 @@ pub fn foreach_loop(state: &mut State) -> ParseResult<Statement> {
state.next();
}
value_var = parser::expression(state, Precedence::Lowest)?;
value_var = expressions::lowest_precedence(state)?;
}
utils::skip_right_parenthesis(state)?;
@ -76,7 +75,7 @@ pub fn for_loop(state: &mut State) -> ParseResult<Statement> {
break;
}
init.push(parser::expression(state, Precedence::Lowest)?);
init.push(expressions::lowest_precedence(state)?);
if state.current.kind == TokenKind::Comma {
state.next();
@ -93,7 +92,7 @@ pub fn for_loop(state: &mut State) -> ParseResult<Statement> {
break;
}
condition.push(parser::expression(state, Precedence::Lowest)?);
condition.push(expressions::lowest_precedence(state)?);
if state.current.kind == TokenKind::Comma {
state.next();
@ -109,7 +108,7 @@ pub fn for_loop(state: &mut State) -> ParseResult<Statement> {
break;
}
r#loop.push(parser::expression(state, Precedence::Lowest)?);
r#loop.push(expressions::lowest_precedence(state)?);
if state.current.kind == TokenKind::Comma {
state.next();
@ -155,7 +154,7 @@ pub fn do_loop(state: &mut State) -> ParseResult<Statement> {
utils::skip(state, TokenKind::While)?;
utils::skip_left_parenthesis(state)?;
let condition = parser::expression(state, Precedence::Lowest)?;
let condition = expressions::lowest_precedence(state)?;
utils::skip_right_parenthesis(state)?;
utils::skip_semicolon(state)?;
@ -167,7 +166,7 @@ pub fn while_loop(state: &mut State) -> ParseResult<Statement> {
utils::skip_left_parenthesis(state)?;
let condition = parser::expression(state, Precedence::Lowest)?;
let condition = expressions::lowest_precedence(state)?;
utils::skip_right_parenthesis(state)?;
@ -203,7 +202,7 @@ pub fn continue_statement(state: &mut State) -> ParseResult<Statement> {
let mut num = None;
if state.current.kind != TokenKind::SemiColon {
num = Some(parser::expression(state, Precedence::Lowest)?);
num = Some(expressions::lowest_precedence(state)?);
}
utils::skip_semicolon(state)?;
@ -216,7 +215,7 @@ pub fn break_statement(state: &mut State) -> ParseResult<Statement> {
let mut num = None;
if state.current.kind != TokenKind::SemiColon {
num = Some(parser::expression(state, Precedence::Lowest)?);
num = Some(expressions::lowest_precedence(state)?);
}
utils::skip_semicolon(state)?;

View File

@ -1,6 +1,5 @@
use super::identifiers;
use crate::lexer::token::TokenKind;
use crate::parser;
use crate::parser::ast::functions::FunctionParameter;
use crate::parser::ast::functions::FunctionParameterList;
use crate::parser::ast::functions::MethodParameter;
@ -9,10 +8,10 @@ use crate::parser::ast::Arg;
use crate::parser::ast::Expression;
use crate::parser::error::ParseError;
use crate::parser::error::ParseResult;
use crate::parser::expressions;
use crate::parser::internal::attributes;
use crate::parser::internal::data_type;
use crate::parser::internal::modifiers;
use crate::parser::internal::precedences::Precedence;
use crate::parser::internal::utils;
use crate::parser::state::Scope;
use crate::parser::state::State;
@ -52,7 +51,7 @@ pub fn function_parameter_list(state: &mut State) -> Result<FunctionParameterLis
let mut default = None;
if state.current.kind == TokenKind::Equals {
state.next();
default = Some(parser::expression(state, Precedence::Lowest)?);
default = Some(expressions::lowest_precedence(state)?);
}
let end = state.current.span;
@ -202,7 +201,7 @@ pub fn method_parameter_list(state: &mut State) -> Result<MethodParameterList, P
let mut default = None;
if state.current.kind == TokenKind::Equals {
state.next();
default = Some(parser::expression(state, Precedence::Lowest)?);
default = Some(expressions::lowest_precedence(state)?);
}
let end = state.current.span;
@ -277,7 +276,7 @@ pub fn args_list(state: &mut State) -> ParseResult<Vec<Arg>> {
break;
}
let value = parser::expression(state, Precedence::Lowest)?;
let value = expressions::lowest_precedence(state)?;
args.push(Arg {
name,

View File

@ -7,7 +7,7 @@ pub enum Associativity {
}
#[allow(dead_code)]
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub enum Precedence {
Lowest,
IncDec,

View File

@ -1,5 +1,4 @@
use crate::lexer::token::TokenKind;
use crate::parser;
use crate::parser::ast::try_block::CatchBlock;
use crate::parser::ast::try_block::CatchType;
use crate::parser::ast::try_block::FinallyBlock;
@ -7,9 +6,9 @@ use crate::parser::ast::try_block::TryBlock;
use crate::parser::ast::Statement;
use crate::parser::error::ParseError;
use crate::parser::error::ParseResult;
use crate::parser::expressions;
use crate::parser::internal::blocks;
use crate::parser::internal::identifiers;
use crate::parser::internal::precedences::Precedence;
use crate::parser::internal::utils;
use crate::parser::state::State;
@ -39,7 +38,7 @@ pub fn try_block(state: &mut State) -> ParseResult<Statement> {
None
} else {
// TODO(azjezz): this is a variable, no an expression?
Some(parser::expression(state, Precedence::Lowest)?)
Some(expressions::lowest_precedence(state)?)
};
utils::skip_right_parenthesis(state)?;

View File

@ -1,9 +1,8 @@
use crate::lexer::token::TokenKind;
use crate::parser;
use crate::parser::ast::Expression;
use crate::parser::error::ParseResult;
use crate::parser::expressions;
use crate::parser::internal::identifiers;
use crate::parser::internal::precedences::Precedence;
use crate::parser::internal::utils;
use crate::parser::state::State;
use crate::peek_token;
@ -16,7 +15,7 @@ pub fn dynamic_variable(state: &mut State) -> ParseResult<Expression> {
state.next();
// TODO(azjezz): this is not an expression! it's a constant expression
let name = parser::expression(state, Precedence::Lowest)?;
let name = expressions::lowest_precedence(state)?;
utils::skip_right_brace(state)?;

File diff suppressed because it is too large Load Diff

View File

@ -2,9 +2,15 @@ use std::env;
use std::fs;
use std::path::PathBuf;
use std::process::Command;
use std::thread;
use php_parser_rs::lexer::Lexer;
enum TestResult {
Success,
Error(String),
}
#[test]
fn third_party_1_php_standard_library() {
test_repository(
@ -33,9 +39,11 @@ fn third_party_3_symfony_framework() {
"symfony-framework",
"https://github.com/symfony/symfony",
"6.3",
&["src/Symfony"],
&["src/Symfony/"],
&[
// stub
"src/Symfony/Bridge/ProxyManager/Tests/LazyProxy/PhpDumper/Fixtures/proxy-implem.php",
// file contains syntax error used for testing.
"src/Symfony/Component/Config/Tests/Fixtures/ParseError.php",
// FIXME: Remove this one once I've found the energy to sort out heredocs / nowdocs.
"src/Symfony/Component/DependencyInjection/LazyProxy/PhpDumper/LazyServiceDumper.php",
@ -98,12 +106,86 @@ fn test_repository(
}
}
let mut entries = vec![];
for dir in directories {
test_directory(out_path.clone(), out_path.join(dir), ignore);
entries.append(&mut read_directory(
out_path.clone(),
out_path.join(dir),
ignore,
));
}
let mut threads = vec![];
for (index, chunk) in entries.chunks(entries.len() / 4).enumerate() {
let chunk = chunk.to_vec();
let thread = thread::Builder::new()
.stack_size(16 * 1024 * 1024)
.name(format!("{name}:{index}"))
.spawn(move || {
let thread = thread::current();
let thread_name = thread.name().unwrap();
let mut results = vec![];
for (name, filename) in chunk {
let code = std::fs::read(&filename).unwrap();
match Lexer::new().tokenize(&code) {
Ok(tokens) => match php_parser_rs::parse(tokens) {
Ok(ast) => {
println!("✅ [{thread_name}][{name}]: {} statement(s).", ast.len());
results.push(TestResult::Success);
}
Err(error) => {
results.push(TestResult::Error(format!(
"❌ [{thread_name}][{name}]: {error:?}"
)));
}
},
Err(error) => {
results.push(TestResult::Error(format!(
"❌ [{thread_name}][{name}]: {error:?}"
)));
}
}
}
results
});
threads.push(thread);
}
let mut results = vec![];
for thread in threads {
let mut result = thread
.unwrap_or_else(|e| panic!("failed to spawn thread: {:#?}", e))
.join()
.unwrap_or_else(|e| panic!("failed to join thread: {:#?}", e));
results.append(&mut result);
}
let mut fail = false;
results
.iter()
.map(|result| match result {
TestResult::Error(message) => {
fail = true;
println!("{}", message);
}
TestResult::Success => {}
})
.for_each(drop);
if fail {
panic!();
}
}
fn test_directory(root: PathBuf, directory: PathBuf, ignore: &[&str]) {
fn read_directory(root: PathBuf, directory: PathBuf, ignore: &[&str]) -> Vec<(String, PathBuf)> {
let mut results = vec![];
let mut entries = fs::read_dir(&directory)
.unwrap()
.flatten()
@ -114,7 +196,7 @@ fn test_directory(root: PathBuf, directory: PathBuf, ignore: &[&str]) {
for entry in entries {
if entry.is_dir() {
test_directory(root.clone(), entry, ignore);
results.append(&mut read_directory(root.clone(), entry, ignore));
continue;
}
@ -136,26 +218,9 @@ fn test_directory(root: PathBuf, directory: PathBuf, ignore: &[&str]) {
.strip_prefix(root.to_str().unwrap())
.unwrap();
test_file(name, entry);
results.push((name.to_string(), entry));
}
}
}
fn test_file(name: &str, filename: PathBuf) {
let code = std::fs::read(&filename).unwrap();
Lexer::new()
.tokenize(&code)
.map(|tokens| {
php_parser_rs::parse(tokens)
.map(|_| {
println!("✅ successfully parsed file: `\"{}\"`.", name);
})
.unwrap_or_else(|error| {
panic!("❌ failed to parse file: `\"{name}\"`, error: {error:?}")
})
})
.unwrap_or_else(|error| {
panic!("❌ failed to tokenize file: `\"{name}\"`, error: {error:?}")
});
results
}