diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index a8ae0db..2d0c03d 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -424,14 +424,28 @@ impl Lexer { state.source.next(); DocStringKind::Nowdoc } - _ => DocStringKind::Heredoc, + [b'"'] => { + state.source.next(); + DocStringKind::Heredoc + } + [_, ..] => DocStringKind::Heredoc, + [] => { + return Err(SyntaxError::UnexpectedEndOfFile(state.source.span())); + } }; // FIXME: Add support for nowdocs too by checking if a `'` // character is present before and after the identifier. let label: ByteString = match self.peek_identifier(state) { Some(_) => self.consume_identifier(state).into(), - None => unreachable!(), + None => match state.source.current() { + Some(c) => { + return Err(SyntaxError::UnexpectedCharacter(*c, state.source.span())) + } + None => { + return Err(SyntaxError::UnexpectedEndOfFile(state.source.span())); + } + }, }; if doc_string_kind == DocStringKind::Nowdoc { @@ -445,6 +459,8 @@ impl Lexer { )); } }; + } else if let Some(b'"') = state.source.current() { + state.source.next(); } if !matches!(state.source.current(), Some(b'\n')) { diff --git a/src/parser/expressions.rs b/src/parser/expressions.rs new file mode 100644 index 0000000..c3f12c8 --- /dev/null +++ b/src/parser/expressions.rs @@ -0,0 +1,1153 @@ +use crate::expect_token; +use crate::expected_token_err; +use crate::lexer::token::TokenKind; +use crate::lexer::DocStringKind; +use crate::parser::ast; +use crate::parser::ast::identifiers::Identifier; +use crate::parser::ast::variables::Variable; +use crate::parser::ast::StringPart; +use crate::parser::ast::{Expression, IncludeKind, MagicConst}; +use crate::parser::error::ParseError; +use crate::parser::error::ParseResult; +use crate::parser::internal::arrays; +use crate::parser::internal::attributes; +use crate::parser::internal::classish; +use crate::parser::internal::control_flow; +use crate::parser::internal::functions; +use crate::parser::internal::identifiers; +use crate::parser::internal::parameters; +use crate::parser::internal::precedences::Associativity; +use crate::parser::internal::precedences::Precedence; +use crate::parser::internal::utils; +use crate::parser::internal::variables; +use crate::parser::state::State; + +pub fn lowest_precedence(state: &mut State) -> ParseResult { + for_precedence(state, Precedence::Lowest) +} + +pub fn null_coalesce_precedence(state: &mut State) -> ParseResult { + for_precedence(state, Precedence::NullCoalesce) +} + +pub fn clone_or_new_precedence(state: &mut State) -> ParseResult { + for_precedence(state, Precedence::CloneOrNew) +} + +fn for_precedence(state: &mut State, precedence: Precedence) -> ParseResult { + let mut left = create(state)?; + + if state.current.kind == TokenKind::SemiColon { + return Ok(left); + } + + state.skip_comments(); + + loop { + state.skip_comments(); + + if matches!(state.current.kind, TokenKind::SemiColon | TokenKind::Eof) { + break; + } + + let span = state.current.span; + let kind = state.current.kind.clone(); + + if is_postfix(&kind) { + let lpred = Precedence::postfix(&kind); + + if lpred < precedence { + break; + } + + left = postfix(state, left, &kind)?; + continue; + } + + if is_infix(&kind) { + let rpred = Precedence::infix(&kind); + + if rpred < precedence { + break; + } + + if rpred == precedence && matches!(rpred.associativity(), Some(Associativity::Left)) { + break; + } + + if rpred == precedence && matches!(rpred.associativity(), Some(Associativity::Non)) { + return Err(ParseError::UnexpectedToken(kind.to_string(), span)); + } + + state.next(); + + match kind { + TokenKind::Question => { + let then = lowest_precedence(state)?; + utils::skip_colon(state)?; + let otherwise = for_precedence(state, rpred)?; + left = Expression::Ternary { + condition: Box::new(left), + then: Some(Box::new(then)), + r#else: Box::new(otherwise), + } + } + TokenKind::QuestionColon => { + let r#else = lowest_precedence(state)?; + left = Expression::Ternary { + condition: Box::new(left), + then: None, + r#else: Box::new(r#else), + } + } + _ => { + // FIXME: Hacky, should probably be refactored. + left = match kind { + TokenKind::Equals if state.current.kind == TokenKind::Ampersand => { + state.next(); + Expression::Infix { + lhs: Box::new(left), + op: ast::InfixOp::AssignRef, + rhs: Box::new(for_precedence(state, rpred)?), + } + } + TokenKind::Instanceof if state.current.kind == TokenKind::Self_ => { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, + )); + } + + state.next(); + + Expression::Infix { + lhs: Box::new(left), + op: ast::InfixOp::Instanceof, + rhs: Box::new(Expression::Self_), + } + } + TokenKind::Instanceof if state.current.kind == TokenKind::Parent => { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, + )); + } + + state.next(); + + Expression::Infix { + lhs: Box::new(left), + op: ast::InfixOp::Instanceof, + rhs: Box::new(Expression::Parent), + } + } + TokenKind::Instanceof if state.current.kind == TokenKind::Static => { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, + )); + } + + state.next(); + + Expression::Infix { + lhs: Box::new(left), + op: ast::InfixOp::Instanceof, + rhs: Box::new(Expression::Static), + } + } + _ => Expression::Infix { + lhs: Box::new(left), + op: kind.into(), + rhs: Box::new(for_precedence(state, rpred)?), + }, + }; + } + } + + continue; + } + + break; + } + + state.skip_comments(); + + Ok(left) +} + +fn create(state: &mut State) -> ParseResult { + if state.is_eof() { + return Err(ParseError::UnexpectedEndOfFile); + } + + attributes(state) +} + +macro_rules! expressions { + ($(#[before($else:ident), current($( $current:pat_param )|+) $(, peek($( $peek:pat_param )|+))?] $expr:ident($out:expr))+) => { + $( + #[inline(never)] + fn $expr(state: &mut State) -> ParseResult { + state.skip_comments(); + + match &state.current.kind { + $( $current )|+ $( if matches!(&state.peek.kind, $( $peek )|+ ))? => $out(state), + _ => $else(state), + } + } + )+ + }; +} + +expressions! { + #[before(static_arrow_function), current(TokenKind::Attribute)] + attributes(|state: &mut State| { + attributes::gather_attributes(state)?; + + match &state.current.kind { + TokenKind::Static if state.peek.kind == TokenKind::Function => { + functions::anonymous_function(state) + } + TokenKind::Static if state.peek.kind == TokenKind::Fn => { + functions::arrow_function(state) + } + TokenKind::Function => functions::anonymous_function(state), + TokenKind::Fn => functions::arrow_function(state), + _ => { + // Note, we can get attributes and know their span, maybe use that in the + // error in the future? + Err(ParseError::ExpectedItemDefinitionAfterAttributes( + state.current.span, + )) + } + } + }) + + #[before(static_anonymous_function), current(TokenKind::Static), peek(TokenKind::Fn)] + static_arrow_function(|state: &mut State| { + functions::arrow_function(state) + }) + + #[before(arrow_function), current(TokenKind::Static), peek(TokenKind::Function)] + static_anonymous_function(|state: &mut State| { + functions::anonymous_function(state) + }) + + #[before(anonymous_function), current(TokenKind::Fn)] + arrow_function(|state: &mut State| { + functions::arrow_function(state) + }) + + #[before(list), current(TokenKind::Function)] + anonymous_function(|state: &mut State| { + functions::anonymous_function(state) + }) + + #[before(anonymous_class), current(TokenKind::List)] + list(|state: &mut State| { + arrays::list_expression(state) + }) + + #[before(throw), current(TokenKind::New), peek(TokenKind::Class | TokenKind::Attribute)] + anonymous_class(|state: &mut State| { + classish::anonymous_class_definition(state) + }) + + #[before(r#yield), current(TokenKind::Throw)] + throw(|state: &mut State| { + + state.next(); + + // TODO(azjezz): we start parsing from anynomous class here, because we know that + // the right expression can't be an anonymous function, or a list. + // however, there's many other things that it can't be. + let value = anonymous_class(state)?; + + Ok(Expression::Throw{ + value: Box::new(value) + }) + }) + + #[before(clone), current(TokenKind::Yield)] + r#yield(|state: &mut State| { + state.next(); + + if state.current.kind == TokenKind::SemiColon { + Ok(Expression::Yield { + key: None, + value: None, + }) + } else { + let mut from = false; + + if state.current.kind == TokenKind::From { + state.next(); + from = true; + } + + let mut key = None; + let mut value = Box::new(for_precedence( + state, + if from { + Precedence::YieldFrom + } else { + Precedence::Yield + }, + )?); + + if state.current.kind == TokenKind::DoubleArrow && !from { + state.next(); + key = Some(value.clone()); + value = Box::new(for_precedence(state, Precedence::Yield)?); + } + + if from { + Ok(Expression::YieldFrom { value }) + } else { + Ok(Expression::Yield { + key, + value: Some(value), + }) + } + } + }) + + #[before(variable), current(TokenKind::Clone)] + clone(|state: &mut State| { + state.next(); + + let target = for_precedence(state, Precedence::CloneOrNew)?; + + Ok(Expression::Clone { + target: Box::new(target), + }) + }) + + #[before(r#true), current(TokenKind::Variable(_))] + variable(|state: &mut State| { + Ok(Expression::Variable( + identifiers::var(state)? + )) + }) + + #[before(r#false), current(TokenKind::True)] + r#true(|state: &mut State| { + state.next(); + + Ok(Expression::Bool { value: true }) + }) + + #[before(null), current(TokenKind::False)] + r#false(|state: &mut State| { + state.next(); + + Ok(Expression::Bool { value: false }) + }) + + #[before(literal_integer), current(TokenKind::Null)] + null(|state: &mut State| { + state.next(); + + Ok(Expression::Null) + }) + + #[before(literal_float), current(TokenKind::LiteralInteger(_))] + literal_integer(|state: &mut State| { + if let TokenKind::LiteralInteger(i) = &state.current.kind { + let e = Expression::LiteralInteger { i: i.clone() }; + state.next(); + + Ok(e) + } else { + unreachable!("{}:{}", file!(), line!()); + } + }) + + #[before(literal_string), current(TokenKind::LiteralFloat(_))] + literal_float(|state: &mut State| { + if let TokenKind::LiteralFloat(f) = &state.current.kind { + let e = Expression::LiteralFloat { f: f.clone() }; + + state.next(); + + Ok(e) + } else { + unreachable!("{}:{}", file!(), line!()); + } + }) + + #[before(string_part), current(TokenKind::LiteralString(_))] + literal_string(|state: &mut State| { + if let TokenKind::LiteralString(value) = &state.current.kind { + let e = Expression::LiteralString { value: value.clone() }; + state.next(); + + Ok(e) + } else { + unreachable!("{}:{}", file!(), line!()); + } + }) + + #[before(start_doc_string), current(TokenKind::StringPart(_))] + string_part(|state: &mut State| { + interpolated_string(state) + }) + + #[before(backtick), current(TokenKind::StartDocString(_, _))] + start_doc_string(|state: &mut State| { + if let TokenKind::StartDocString(_, kind) = &state.current.kind { + let kind = *kind; + + doc_string(state, kind) + } else { + unreachable!("{}:{}", file!(), line!()); + } + }) + + #[before(identifier), current(TokenKind::Backtick)] + backtick(|state: &mut State| { + shell_exec(state) + }) + + #[before(self_postfix), current(TokenKind::Identifier(_) | TokenKind::QualifiedIdentifier(_) | TokenKind::FullyQualifiedIdentifier(_))] + identifier(|state: &mut State| { + Ok(Expression::Identifier(identifiers::full_name(state)?)) + }) + + #[before(static_postfix), current(TokenKind::Self_)] + self_postfix(|state: &mut State| { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, + )); + } + + state.next(); + + postfix(state, Expression::Self_, &TokenKind::DoubleColon) + }) + + #[before(parent_postfix), current(TokenKind::Static)] + static_postfix(|state: &mut State| { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, + )); + } + + state.next(); + + postfix(state, Expression::Static, &TokenKind::DoubleColon) + }) + + #[before(left_parenthesis), current(TokenKind::Parent)] + parent_postfix(|state: &mut State| { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, + )); + } + + state.next(); + + postfix(state, Expression::Parent, &TokenKind::DoubleColon) + }) + + #[before(r#match), current(TokenKind::LeftParen)] + left_parenthesis(|state: &mut State| { + state.next(); + + let e = lowest_precedence(state)?; + + utils::skip_right_parenthesis(state)?; + + Ok(e) + }) + + #[before(array), current(TokenKind::Match)] + r#match(|state: &mut State| { + control_flow::match_expression(state) + }) + + #[before(left_bracket), current(TokenKind::Array)] + array(|state: &mut State| { + arrays::legacy_array_expression(state) + }) + + #[before(fallback), current(TokenKind::LeftBracket)] + left_bracket(|state: &mut State| { + arrays::array_expression(state) + }) +} + +fn fallback(state: &mut State) -> ParseResult { + let expr = match &state.current.kind { + TokenKind::New => { + utils::skip(state, TokenKind::New)?; + + let target = match state.current.kind { + TokenKind::Self_ => { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, + )); + } + + state.next(); + + Expression::Self_ + } + TokenKind::Static => { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, + )); + } + + state.next(); + + Expression::Static + } + TokenKind::Parent => { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, + )); + } + + state.next(); + + Expression::Parent + } + _ => clone_or_new_precedence(state)?, + }; + + let mut args = vec![]; + if state.current.kind == TokenKind::LeftParen { + args = parameters::args_list(state)?; + } + + Expression::New { + target: Box::new(target), + args, + } + } + TokenKind::DirConstant => { + state.next(); + Expression::MagicConst { + constant: MagicConst::Dir, + } + } + TokenKind::Include + | TokenKind::IncludeOnce + | TokenKind::Require + | TokenKind::RequireOnce => { + let kind: IncludeKind = (&state.current.kind).into(); + state.next(); + + let path = lowest_precedence(state)?; + + Expression::Include { + kind, + path: Box::new(path), + } + } + _ if is_prefix(&state.current.kind) => { + let op = state.current.kind.clone(); + + state.next(); + + let rpred = Precedence::prefix(&op); + let rhs = for_precedence(state, rpred)?; + + prefix(&op, rhs) + } + TokenKind::Dollar => variables::dynamic_variable(state)?, + _ => { + return Err(ParseError::UnexpectedToken( + state.current.kind.to_string(), + state.current.span, + )) + } + }; + + Ok(expr) +} + +fn postfix(state: &mut State, lhs: Expression, op: &TokenKind) -> Result { + Ok(match op { + TokenKind::Coalesce => { + state.next(); + + let rhs = null_coalesce_precedence(state)?; + + Expression::Coalesce { + lhs: Box::new(lhs), + rhs: Box::new(rhs), + } + } + TokenKind::LeftParen => { + let args = parameters::args_list(state)?; + + Expression::Call { + target: Box::new(lhs), + args, + } + } + TokenKind::LeftBracket => { + utils::skip_left_bracket(state)?; + + if state.current.kind == TokenKind::RightBracket { + state.next(); + + Expression::ArrayIndex { + array: Box::new(lhs), + index: None, + } + } else { + let index = lowest_precedence(state)?; + + utils::skip_right_bracket(state)?; + + Expression::ArrayIndex { + array: Box::new(lhs), + index: Some(Box::new(index)), + } + } + } + TokenKind::DoubleColon => { + utils::skip_double_colon(state)?; + + let mut must_be_method_call = false; + + let property = match state.current.kind.clone() { + TokenKind::Dollar => variables::dynamic_variable(state)?, + TokenKind::Variable(_) => Expression::Variable(identifiers::var(state)?), + TokenKind::Identifier(_) => Expression::Identifier(identifiers::ident(state)?), + TokenKind::LeftBrace => { + must_be_method_call = true; + state.next(); + + let name = lowest_precedence(state)?; + + utils::skip_right_brace(state)?; + + Expression::DynamicVariable { + name: Box::new(name), + } + } + TokenKind::Class => { + let start = state.current.span; + state.next(); + let end = state.current.span; + + Expression::Identifier(Identifier { + start, + name: "class".into(), + end, + }) + } + _ if identifiers::is_reserved_ident(&state.current.kind) => { + Expression::Identifier(identifiers::ident_maybe_reserved(state)?) + } + _ => { + return expected_token_err!(["`{`", "`$`", "an identifier"], state); + } + }; + + let lhs = Box::new(lhs); + + match property { + // 1. If we have an identifier and the current token is not a left paren, + // the resulting expression must be a constant fetch. + Expression::Identifier(identifier) + if state.current.kind != TokenKind::LeftParen => + { + Expression::ConstFetch { + target: lhs, + constant: identifier, + } + } + // 2. If the current token is a left paren, or if we know the property expression + // is only valid a method call context, we can assume we're parsing a static + // method call. + _ if state.current.kind == TokenKind::LeftParen || must_be_method_call => { + let args = parameters::args_list(state)?; + + Expression::StaticMethodCall { + target: lhs, + method: Box::new(property), + args, + } + } + // 3. If we haven't met any of the previous conditions, we can assume + // that we're parsing a static property fetch. + _ => Expression::StaticPropertyFetch { + target: lhs, + property: Box::new(property), + }, + } + } + TokenKind::Arrow | TokenKind::NullsafeArrow => { + state.next(); + + let property = match state.current.kind { + TokenKind::LeftBrace => { + utils::skip_left_brace(state)?; + let expr = lowest_precedence(state)?; + utils::skip_right_brace(state)?; + expr + } + TokenKind::Variable(_) => Expression::Variable(identifiers::var(state)?), + TokenKind::Dollar => variables::dynamic_variable(state)?, + _ => Expression::Identifier(identifiers::ident_maybe_reserved(state)?), + }; + + if state.current.kind == TokenKind::LeftParen { + let args = parameters::args_list(state)?; + + if op == &TokenKind::NullsafeArrow { + Expression::NullsafeMethodCall { + target: Box::new(lhs), + method: Box::new(property), + args, + } + } else { + Expression::MethodCall { + target: Box::new(lhs), + method: Box::new(property), + args, + } + } + } else if op == &TokenKind::NullsafeArrow { + Expression::NullsafePropertyFetch { + target: Box::new(lhs), + property: Box::new(property), + } + } else { + Expression::PropertyFetch { + target: Box::new(lhs), + property: Box::new(property), + } + } + } + TokenKind::Increment => { + state.next(); + Expression::Increment { + value: Box::new(lhs), + } + } + TokenKind::Decrement => { + state.next(); + + Expression::Decrement { + value: Box::new(lhs), + } + } + _ => todo!("postfix: {:?}", op), + }) +} + +#[inline(always)] +fn interpolated_string(state: &mut State) -> ParseResult { + let mut parts = Vec::new(); + + while state.current.kind != TokenKind::DoubleQuote { + if let Some(part) = interpolated_string_part(state)? { + parts.push(part); + } + } + + state.next(); + + Ok(Expression::InterpolatedString { parts }) +} + +#[inline(always)] +fn shell_exec(state: &mut State) -> ParseResult { + state.next(); + + let mut parts = Vec::new(); + + while state.current.kind != TokenKind::Backtick { + if let Some(part) = interpolated_string_part(state)? { + parts.push(part); + } + } + + state.next(); + + Ok(Expression::ShellExec { parts }) +} + +#[inline(always)] +fn doc_string(state: &mut State, kind: DocStringKind) -> ParseResult { + state.next(); + + Ok(match kind { + DocStringKind::Heredoc => { + let mut parts = Vec::new(); + + while !matches!(state.current.kind, TokenKind::EndDocString(_, _, _)) { + if let Some(part) = interpolated_string_part(state)? { + parts.push(part); + } + } + + let (indentation_type, indentation_amount) = match state.current.kind { + TokenKind::EndDocString(_, indentation_type, indentation_amount) => { + (indentation_type, indentation_amount) + } + _ => unreachable!(), + }; + + state.next(); + + // FIXME: Can we move this logic above into the loop, by peeking ahead in + // the token stream for the EndHeredoc? Might be more performant. + if let Some(indentation_type) = indentation_type { + let search_char: u8 = indentation_type.into(); + + for part in parts.iter_mut() { + match part { + StringPart::Const(bytes) => { + for _ in 0..indentation_amount { + if bytes.starts_with(&[search_char]) { + bytes.remove(0); + } + } + } + _ => continue, + } + } + } + + Expression::Heredoc { parts } + } + DocStringKind::Nowdoc => { + // FIXME: This feels hacky. We should probably produce different tokens from the lexer + // but since I already had the logic in place for parsing heredocs, this was + // the fastest way to get nowdocs working too. + let mut s = expect_token!([ + TokenKind::StringPart(s) => s + ], state, "constant string"); + + let (indentation_type, indentation_amount) = expect_token!([ + TokenKind::EndDocString(_, indentation_type, indentation_amount) => (indentation_type, indentation_amount) + ], state, "label"); + + // FIXME: Hacky code, but it's late and I want to get this done. + if let Some(indentation_type) = indentation_type { + let search_char: u8 = indentation_type.into(); + let mut lines = s + .split(|b| *b == b'\n') + .map(|s| s.to_vec()) + .collect::>>(); + for line in lines.iter_mut() { + for _ in 0..indentation_amount { + if line.starts_with(&[search_char]) { + line.remove(0); + } + } + } + let mut bytes = Vec::new(); + for (i, line) in lines.iter().enumerate() { + bytes.extend(line); + if i < lines.len() - 1 { + bytes.push(b'\n'); + } + } + s = bytes.into(); + } + + Expression::Nowdoc { value: s } + } + }) +} + +fn interpolated_string_part(state: &mut State) -> ParseResult> { + Ok(match &state.current.kind { + TokenKind::StringPart(s) => { + let part = if s.len() > 0 { + Some(StringPart::Const(s.clone())) + } else { + None + }; + + state.next(); + part + } + TokenKind::DollarLeftBrace => { + state.next(); + let e = match (state.current.kind.clone(), state.peek.kind.clone()) { + (TokenKind::Identifier(name), TokenKind::RightBrace) => { + let start = state.current.span; + let end = state.peek.span; + + state.next(); + state.next(); + // "${var}" + // TODO: we should use a different node for this. + Expression::Variable(Variable { start, name, end }) + } + (TokenKind::Identifier(name), TokenKind::LeftBracket) => { + let start = state.current.span; + let end = state.peek.span; + state.next(); + state.next(); + let var = Expression::Variable(Variable { start, name, end }); + + let e = lowest_precedence(state)?; + utils::skip_right_bracket(state)?; + utils::skip_right_brace(state)?; + + // TODO: we should use a different node for this. + Expression::ArrayIndex { + array: Box::new(var), + index: Some(Box::new(e)), + } + } + _ => { + // Arbitrary expressions are allowed, but are treated as variable variables. + let e = lowest_precedence(state)?; + utils::skip_right_brace(state)?; + + Expression::DynamicVariable { name: Box::new(e) } + } + }; + Some(StringPart::Expr(Box::new(e))) + } + TokenKind::LeftBrace => { + // "{$expr}" + state.next(); + let e = lowest_precedence(state)?; + utils::skip_right_brace(state)?; + Some(StringPart::Expr(Box::new(e))) + } + TokenKind::Variable(_) => { + // "$expr", "$expr[0]", "$expr[name]", "$expr->a" + let var = Expression::Variable(identifiers::var(state)?); + let e = match state.current.kind { + TokenKind::LeftBracket => { + state.next(); + // Full expression syntax is not allowed here, + // so we can't call expression. + let index = match &state.current.kind { + TokenKind::LiteralInteger(i) => { + let e = Expression::LiteralInteger { i: i.clone() }; + state.next(); + e + } + TokenKind::Minus => { + state.next(); + if let TokenKind::LiteralInteger(i) = &state.current.kind { + let e = Expression::Negate { + value: Box::new(Expression::LiteralInteger { i: i.clone() }), + }; + state.next(); + e + } else { + return expected_token_err!("an integer", state); + } + } + TokenKind::Identifier(ident) => { + let e = Expression::LiteralString { + value: ident.clone(), + }; + state.next(); + e + } + TokenKind::Variable(_) => { + let v = identifiers::var(state)?; + Expression::Variable(v) + } + _ => { + return expected_token_err!( + ["`-`", "an integer", "an identifier", "a variable"], + state + ); + } + }; + + utils::skip_right_bracket(state)?; + + Expression::ArrayIndex { + array: Box::new(var), + index: Some(Box::new(index)), + } + } + TokenKind::Arrow => { + state.next(); + Expression::PropertyFetch { + target: Box::new(var), + property: Box::new(Expression::Identifier( + identifiers::ident_maybe_reserved(state)?, + )), + } + } + TokenKind::NullsafeArrow => { + state.next(); + Expression::NullsafePropertyFetch { + target: Box::new(var), + property: Box::new(Expression::Identifier( + identifiers::ident_maybe_reserved(state)?, + )), + } + } + _ => var, + }; + Some(StringPart::Expr(Box::new(e))) + } + _ => { + return expected_token_err!(["`${`", "`{$", "`\"`", "a variable"], state); + } + }) +} + +#[inline(always)] +fn is_prefix(op: &TokenKind) -> bool { + matches!( + op, + TokenKind::Bang + | TokenKind::Print + | TokenKind::BitwiseNot + | TokenKind::Decrement + | TokenKind::Increment + | TokenKind::Minus + | TokenKind::Plus + | TokenKind::StringCast + | TokenKind::BinaryCast + | TokenKind::ObjectCast + | TokenKind::BoolCast + | TokenKind::BooleanCast + | TokenKind::IntCast + | TokenKind::IntegerCast + | TokenKind::FloatCast + | TokenKind::DoubleCast + | TokenKind::RealCast + | TokenKind::UnsetCast + | TokenKind::ArrayCast + | TokenKind::At + ) +} + +#[inline(always)] +fn prefix(op: &TokenKind, rhs: Expression) -> Expression { + match op { + TokenKind::Print => Expression::Print { + value: Box::new(rhs), + }, + TokenKind::Bang => Expression::BooleanNot { + value: Box::new(rhs), + }, + TokenKind::Minus => Expression::Negate { + value: Box::new(rhs), + }, + TokenKind::Plus => Expression::UnaryPlus { + value: Box::new(rhs), + }, + TokenKind::BitwiseNot => Expression::BitwiseNot { + value: Box::new(rhs), + }, + TokenKind::Decrement => Expression::PreDecrement { + value: Box::new(rhs), + }, + TokenKind::Increment => Expression::PreIncrement { + value: Box::new(rhs), + }, + TokenKind::StringCast + | TokenKind::BinaryCast + | TokenKind::ObjectCast + | TokenKind::BoolCast + | TokenKind::BooleanCast + | TokenKind::IntCast + | TokenKind::IntegerCast + | TokenKind::FloatCast + | TokenKind::DoubleCast + | TokenKind::RealCast + | TokenKind::UnsetCast + | TokenKind::ArrayCast => Expression::Cast { + kind: op.into(), + value: Box::new(rhs), + }, + TokenKind::At => Expression::ErrorSuppress { + expr: Box::new(rhs), + }, + _ => unreachable!(), + } +} + +fn is_infix(t: &TokenKind) -> bool { + matches!( + t, + TokenKind::Pow + | TokenKind::RightShiftEquals + | TokenKind::LeftShiftEquals + | TokenKind::CaretEquals + | TokenKind::AmpersandEquals + | TokenKind::PipeEquals + | TokenKind::PercentEquals + | TokenKind::PowEquals + | TokenKind::LogicalAnd + | TokenKind::LogicalOr + | TokenKind::LogicalXor + | TokenKind::Spaceship + | TokenKind::LeftShift + | TokenKind::RightShift + | TokenKind::Ampersand + | TokenKind::Pipe + | TokenKind::Caret + | TokenKind::Percent + | TokenKind::Instanceof + | TokenKind::Asterisk + | TokenKind::Slash + | TokenKind::Plus + | TokenKind::Minus + | TokenKind::Dot + | TokenKind::LessThan + | TokenKind::GreaterThan + | TokenKind::LessThanEquals + | TokenKind::GreaterThanEquals + | TokenKind::DoubleEquals + | TokenKind::TripleEquals + | TokenKind::BangEquals + | TokenKind::BangDoubleEquals + | TokenKind::AngledLeftRight + | TokenKind::Question + | TokenKind::QuestionColon + | TokenKind::BooleanAnd + | TokenKind::BooleanOr + | TokenKind::Equals + | TokenKind::PlusEquals + | TokenKind::MinusEquals + | TokenKind::DotEquals + | TokenKind::CoalesceEqual + | TokenKind::AsteriskEqual + | TokenKind::SlashEquals + ) +} + +#[inline(always)] +fn is_postfix(t: &TokenKind) -> bool { + matches!( + t, + TokenKind::Increment + | TokenKind::Decrement + | TokenKind::LeftParen + | TokenKind::LeftBracket + | TokenKind::Arrow + | TokenKind::NullsafeArrow + | TokenKind::DoubleColon + | TokenKind::Coalesce + ) +} diff --git a/src/parser/internal/arrays.rs b/src/parser/internal/arrays.rs index b9da679..6c71ee9 100644 --- a/src/parser/internal/arrays.rs +++ b/src/parser/internal/arrays.rs @@ -1,11 +1,10 @@ use crate::lexer::token::TokenKind; -use crate::parser; use crate::parser::ast::ArrayItem; use crate::parser::ast::Expression; use crate::parser::ast::ListItem; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; -use crate::parser::internal::precedences::Precedence; +use crate::parser::expressions; use crate::parser::internal::utils; use crate::parser::state::State; @@ -38,7 +37,7 @@ pub fn list_expression(state: &mut State) -> ParseResult { )); } - let mut value = parser::expression(state, Precedence::Lowest)?; + let mut value = expressions::lowest_precedence(state)?; if state.current.kind == TokenKind::DoubleArrow { if !has_atleast_one_key && !items.is_empty() { @@ -62,7 +61,7 @@ pub fn list_expression(state: &mut State) -> ParseResult { } has_atleast_one_key = true; - value = parser::expression(state, Precedence::Lowest)?; + value = expressions::lowest_precedence(state)?; } else if has_atleast_one_key { return Err(ParseError::CannotMixKeyedAndUnkeyedEntries( state.current.span, @@ -148,7 +147,7 @@ pub fn legacy_array_expression(state: &mut State) -> ParseResult { (false, (0, 0)) }; - let mut value = parser::expression(state, Precedence::Lowest)?; + let mut value = expressions::lowest_precedence(state)?; // TODO: return error for `[...$a => $b]`. if state.current.kind == TokenKind::DoubleArrow { @@ -170,7 +169,7 @@ pub fn legacy_array_expression(state: &mut State) -> ParseResult { false }; - value = parser::expression(state, Precedence::Lowest)?; + value = expressions::lowest_precedence(state)?; } items.push(ArrayItem { @@ -211,7 +210,7 @@ fn array_pair(state: &mut State) -> ParseResult { (false, (0, 0)) }; - let mut value = parser::expression(state, Precedence::Lowest)?; + let mut value = expressions::lowest_precedence(state)?; if state.current.kind == TokenKind::DoubleArrow { state.next(); @@ -229,7 +228,7 @@ fn array_pair(state: &mut State) -> ParseResult { } else { false }; - value = parser::expression(state, Precedence::Lowest)?; + value = expressions::lowest_precedence(state)?; } Ok(ArrayItem { diff --git a/src/parser/internal/attributes.rs b/src/parser/internal/attributes.rs index db5a5f6..5e6b973 100644 --- a/src/parser/internal/attributes.rs +++ b/src/parser/internal/attributes.rs @@ -1,9 +1,8 @@ use crate::lexer::token::TokenKind; -use crate::parser; use crate::parser::ast::attributes::Attribute; use crate::parser::ast::attributes::AttributeGroup; use crate::parser::error::ParseResult; -use crate::parser::internal::precedences::Precedence; +use crate::parser::expressions; use crate::parser::internal::utils; use crate::parser::state::State; @@ -21,7 +20,7 @@ pub fn gather_attributes(state: &mut State) -> ParseResult { while state.current.kind != TokenKind::RightBracket { let start = state.current.span; - let expression = parser::expression(state, Precedence::Lowest)?; + let expression = expressions::lowest_precedence(state)?; let end = state.current.span; members.push(Attribute { diff --git a/src/parser/internal/classish_statements.rs b/src/parser/internal/classish_statements.rs index 507ca28..8af345d 100644 --- a/src/parser/internal/classish_statements.rs +++ b/src/parser/internal/classish_statements.rs @@ -2,7 +2,6 @@ use crate::expect_token; use crate::expected_scope; use crate::lexer::token::Span; use crate::lexer::token::TokenKind; -use crate::parser; use crate::parser::ast::classish::ClassishConstant; use crate::parser::ast::enums::BackedEnumCase; use crate::parser::ast::enums::BackedEnumMember; @@ -15,12 +14,12 @@ use crate::parser::ast::Statement; use crate::parser::ast::TraitAdaptation; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; +use crate::parser::expressions; use crate::parser::internal::attributes; use crate::parser::internal::data_type; use crate::parser::internal::functions; use crate::parser::internal::identifiers; use crate::parser::internal::modifiers; -use crate::parser::internal::precedences::Precedence; use crate::parser::internal::utils; use crate::parser::state::Scope; use crate::parser::state::State; @@ -115,7 +114,7 @@ pub fn backed_enum_member(state: &mut State) -> ParseResult { utils::skip(state, TokenKind::Equals)?; - let value = parser::expression(state, Precedence::Lowest)?; + let value = expressions::lowest_precedence(state)?; let end = utils::skip_semicolon(state)?; @@ -152,18 +151,16 @@ pub fn class_like_statement(state: &mut State) -> ParseResult { let start = state.current.span; let modifiers = modifiers::collect(state)?; - if !has_attributes { - if state.current.kind == TokenKind::Use { - return parse_classish_uses(state); - } + if !has_attributes && state.current.kind == TokenKind::Use { + return parse_classish_uses(state); + } - if state.current.kind == TokenKind::Const { - return Ok(Statement::ClassishConstant(constant( - state, - modifiers::constant_group(modifiers)?, - start, - )?)); - } + if state.current.kind == TokenKind::Const { + return Ok(Statement::ClassishConstant(constant( + state, + modifiers::constant_group(modifiers)?, + start, + )?)); } if state.current.kind == TokenKind::Function { @@ -185,7 +182,7 @@ pub fn class_like_statement(state: &mut State) -> ParseResult { // e.g: = "foo"; if state.current.kind == TokenKind::Equals { state.next(); - value = Some(parser::expression(state, Precedence::Lowest)?); + value = Some(expressions::lowest_precedence(state)?); } let class_name: String = expected_scope!([ @@ -395,7 +392,7 @@ fn constant( utils::skip(state, TokenKind::Equals)?; - let value = parser::expression(state, Precedence::Lowest)?; + let value = expressions::lowest_precedence(state)?; let end = utils::skip_semicolon(state)?; diff --git a/src/parser/internal/control_flow.rs b/src/parser/internal/control_flow.rs index a8a3509..f9dcfbd 100644 --- a/src/parser/internal/control_flow.rs +++ b/src/parser/internal/control_flow.rs @@ -10,8 +10,8 @@ use crate::parser::ast::MatchArm; use crate::parser::ast::Statement; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; +use crate::parser::expressions; use crate::parser::internal::blocks; -use crate::parser::internal::precedences::Precedence; use crate::parser::internal::utils; use crate::parser::state::State; @@ -20,7 +20,7 @@ pub fn match_expression(state: &mut State) -> ParseResult { utils::skip_left_parenthesis(state)?; - let condition = Box::new(parser::expression(state, Precedence::Lowest)?); + let condition = Box::new(expressions::lowest_precedence(state)?); utils::skip_right_parenthesis(state)?; utils::skip_left_brace(state)?; @@ -46,13 +46,13 @@ pub fn match_expression(state: &mut State) -> ParseResult { utils::skip_double_arrow(state)?; - let body = parser::expression(state, Precedence::Lowest)?; + let body = expressions::lowest_precedence(state)?; default = Some(Box::new(DefaultMatchArm { body })); } else { let mut conditions = Vec::new(); while state.current.kind != TokenKind::DoubleArrow { - conditions.push(parser::expression(state, Precedence::Lowest)?); + conditions.push(expressions::lowest_precedence(state)?); if state.current.kind == TokenKind::Comma { state.next(); @@ -67,7 +67,7 @@ pub fn match_expression(state: &mut State) -> ParseResult { break; } - let body = parser::expression(state, Precedence::Lowest)?; + let body = expressions::lowest_precedence(state)?; arms.push(MatchArm { conditions, body }); } @@ -93,7 +93,7 @@ pub fn switch_statement(state: &mut State) -> ParseResult { utils::skip_left_parenthesis(state)?; - let condition = parser::expression(state, Precedence::Lowest)?; + let condition = expressions::lowest_precedence(state)?; utils::skip_right_parenthesis(state)?; @@ -111,7 +111,7 @@ pub fn switch_statement(state: &mut State) -> ParseResult { TokenKind::Case => { state.next(); - let condition = parser::expression(state, Precedence::Lowest)?; + let condition = expressions::lowest_precedence(state)?; utils::skip_any_of(state, &[TokenKind::Colon, TokenKind::SemiColon])?; @@ -170,7 +170,7 @@ pub fn if_statement(state: &mut State) -> ParseResult { utils::skip_left_parenthesis(state)?; - let condition = parser::expression(state, Precedence::Lowest)?; + let condition = expressions::lowest_precedence(state)?; utils::skip_right_parenthesis(state)?; @@ -201,7 +201,7 @@ pub fn if_statement(state: &mut State) -> ParseResult { state.next(); utils::skip_left_parenthesis(state)?; - let condition = parser::expression(state, Precedence::Lowest)?; + let condition = expressions::lowest_precedence(state)?; utils::skip_right_parenthesis(state)?; utils::skip_colon(state)?; @@ -260,7 +260,7 @@ pub fn if_statement(state: &mut State) -> ParseResult { utils::skip_left_parenthesis(state)?; - let condition = parser::expression(state, Precedence::Lowest)?; + let condition = expressions::lowest_precedence(state)?; utils::skip_right_parenthesis(state)?; diff --git a/src/parser/internal/functions.rs b/src/parser/internal/functions.rs index 2aa2b40..7ec7c16 100644 --- a/src/parser/internal/functions.rs +++ b/src/parser/internal/functions.rs @@ -1,7 +1,6 @@ use crate::expected_scope; use crate::lexer::token::Span; use crate::lexer::token::TokenKind; -use crate::parser; use crate::parser::ast::functions::ArrowFunction; use crate::parser::ast::functions::Closure; use crate::parser::ast::functions::ClosureUse; @@ -13,11 +12,11 @@ use crate::parser::ast::Expression; use crate::parser::ast::Statement; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; +use crate::parser::expressions; use crate::parser::internal::blocks; use crate::parser::internal::data_type; use crate::parser::internal::identifiers; use crate::parser::internal::parameters; -use crate::parser::internal::precedences::Precedence; use crate::parser::internal::utils; use crate::parser::state::Scope; use crate::parser::state::State; @@ -62,7 +61,7 @@ pub fn anonymous_function(state: &mut State) -> ParseResult { // TODO(azjezz): this shouldn't call expr, we should have a function // just for variables, so we don't have to go through the whole `match` in `expression(...)` - let var = match parser::expression(state, Precedence::Lowest)? { + let var = match expressions::lowest_precedence(state)? { s @ Expression::Variable { .. } => ClosureUse { var: s, by_ref }, _ => { return Err(ParseError::UnexpectedToken( @@ -146,7 +145,7 @@ pub fn arrow_function(state: &mut State) -> ParseResult { utils::skip(state, TokenKind::DoubleArrow)?; let body = scoped!(state, Scope::ArrowFunction(is_static), { - Box::new(parser::expression(state, Precedence::Lowest)?) + Box::new(expressions::lowest_precedence(state)?) }); let end = state.current.span; diff --git a/src/parser/internal/loops.rs b/src/parser/internal/loops.rs index 492c07e..7b4cd43 100644 --- a/src/parser/internal/loops.rs +++ b/src/parser/internal/loops.rs @@ -1,9 +1,8 @@ use crate::lexer::token::TokenKind; -use crate::parser; use crate::parser::ast::Statement; use crate::parser::error::ParseResult; +use crate::parser::expressions; use crate::parser::internal::blocks; -use crate::parser::internal::precedences::Precedence; use crate::parser::internal::utils; use crate::parser::state::State; @@ -12,7 +11,7 @@ pub fn foreach_loop(state: &mut State) -> ParseResult { utils::skip_left_parenthesis(state)?; - let expr = parser::expression(state, Precedence::Lowest)?; + let expr = expressions::lowest_precedence(state)?; utils::skip(state, TokenKind::As)?; @@ -22,7 +21,7 @@ pub fn foreach_loop(state: &mut State) -> ParseResult { } let mut key_var = None; - let mut value_var = parser::expression(state, Precedence::Lowest)?; + let mut value_var = expressions::lowest_precedence(state)?; if state.current.kind == TokenKind::DoubleArrow { state.next(); @@ -34,7 +33,7 @@ pub fn foreach_loop(state: &mut State) -> ParseResult { state.next(); } - value_var = parser::expression(state, Precedence::Lowest)?; + value_var = expressions::lowest_precedence(state)?; } utils::skip_right_parenthesis(state)?; @@ -76,7 +75,7 @@ pub fn for_loop(state: &mut State) -> ParseResult { break; } - init.push(parser::expression(state, Precedence::Lowest)?); + init.push(expressions::lowest_precedence(state)?); if state.current.kind == TokenKind::Comma { state.next(); @@ -93,7 +92,7 @@ pub fn for_loop(state: &mut State) -> ParseResult { break; } - condition.push(parser::expression(state, Precedence::Lowest)?); + condition.push(expressions::lowest_precedence(state)?); if state.current.kind == TokenKind::Comma { state.next(); @@ -109,7 +108,7 @@ pub fn for_loop(state: &mut State) -> ParseResult { break; } - r#loop.push(parser::expression(state, Precedence::Lowest)?); + r#loop.push(expressions::lowest_precedence(state)?); if state.current.kind == TokenKind::Comma { state.next(); @@ -155,7 +154,7 @@ pub fn do_loop(state: &mut State) -> ParseResult { utils::skip(state, TokenKind::While)?; utils::skip_left_parenthesis(state)?; - let condition = parser::expression(state, Precedence::Lowest)?; + let condition = expressions::lowest_precedence(state)?; utils::skip_right_parenthesis(state)?; utils::skip_semicolon(state)?; @@ -167,7 +166,7 @@ pub fn while_loop(state: &mut State) -> ParseResult { utils::skip_left_parenthesis(state)?; - let condition = parser::expression(state, Precedence::Lowest)?; + let condition = expressions::lowest_precedence(state)?; utils::skip_right_parenthesis(state)?; @@ -203,7 +202,7 @@ pub fn continue_statement(state: &mut State) -> ParseResult { let mut num = None; if state.current.kind != TokenKind::SemiColon { - num = Some(parser::expression(state, Precedence::Lowest)?); + num = Some(expressions::lowest_precedence(state)?); } utils::skip_semicolon(state)?; @@ -216,7 +215,7 @@ pub fn break_statement(state: &mut State) -> ParseResult { let mut num = None; if state.current.kind != TokenKind::SemiColon { - num = Some(parser::expression(state, Precedence::Lowest)?); + num = Some(expressions::lowest_precedence(state)?); } utils::skip_semicolon(state)?; diff --git a/src/parser/internal/parameters.rs b/src/parser/internal/parameters.rs index f16facd..9da637c 100644 --- a/src/parser/internal/parameters.rs +++ b/src/parser/internal/parameters.rs @@ -1,6 +1,5 @@ use super::identifiers; use crate::lexer::token::TokenKind; -use crate::parser; use crate::parser::ast::functions::FunctionParameter; use crate::parser::ast::functions::FunctionParameterList; use crate::parser::ast::functions::MethodParameter; @@ -9,10 +8,10 @@ use crate::parser::ast::Arg; use crate::parser::ast::Expression; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; +use crate::parser::expressions; use crate::parser::internal::attributes; use crate::parser::internal::data_type; use crate::parser::internal::modifiers; -use crate::parser::internal::precedences::Precedence; use crate::parser::internal::utils; use crate::parser::state::Scope; use crate::parser::state::State; @@ -52,7 +51,7 @@ pub fn function_parameter_list(state: &mut State) -> Result Result ParseResult> { break; } - let value = parser::expression(state, Precedence::Lowest)?; + let value = expressions::lowest_precedence(state)?; args.push(Arg { name, diff --git a/src/parser/internal/precedences.rs b/src/parser/internal/precedences.rs index 007ec5e..6605f0d 100644 --- a/src/parser/internal/precedences.rs +++ b/src/parser/internal/precedences.rs @@ -7,7 +7,7 @@ pub enum Associativity { } #[allow(dead_code)] -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] pub enum Precedence { Lowest, IncDec, diff --git a/src/parser/internal/try_block.rs b/src/parser/internal/try_block.rs index 712e445..74e40af 100644 --- a/src/parser/internal/try_block.rs +++ b/src/parser/internal/try_block.rs @@ -1,5 +1,4 @@ use crate::lexer::token::TokenKind; -use crate::parser; use crate::parser::ast::try_block::CatchBlock; use crate::parser::ast::try_block::CatchType; use crate::parser::ast::try_block::FinallyBlock; @@ -7,9 +6,9 @@ use crate::parser::ast::try_block::TryBlock; use crate::parser::ast::Statement; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; +use crate::parser::expressions; use crate::parser::internal::blocks; use crate::parser::internal::identifiers; -use crate::parser::internal::precedences::Precedence; use crate::parser::internal::utils; use crate::parser::state::State; @@ -39,7 +38,7 @@ pub fn try_block(state: &mut State) -> ParseResult { None } else { // TODO(azjezz): this is a variable, no an expression? - Some(parser::expression(state, Precedence::Lowest)?) + Some(expressions::lowest_precedence(state)?) }; utils::skip_right_parenthesis(state)?; diff --git a/src/parser/internal/variables.rs b/src/parser/internal/variables.rs index e8efdf3..68084e0 100644 --- a/src/parser/internal/variables.rs +++ b/src/parser/internal/variables.rs @@ -1,9 +1,8 @@ use crate::lexer::token::TokenKind; -use crate::parser; use crate::parser::ast::Expression; use crate::parser::error::ParseResult; +use crate::parser::expressions; use crate::parser::internal::identifiers; -use crate::parser::internal::precedences::Precedence; use crate::parser::internal::utils; use crate::parser::state::State; use crate::peek_token; @@ -16,7 +15,7 @@ pub fn dynamic_variable(state: &mut State) -> ParseResult { state.next(); // TODO(azjezz): this is not an expression! it's a constant expression - let name = parser::expression(state, Precedence::Lowest)?; + let name = expressions::lowest_precedence(state)?; utils::skip_right_brace(state)?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 72fae65..712497f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,20 +1,11 @@ use crate::expect_literal; -use crate::expect_token; -use crate::expected_token_err; use crate::lexer::token::Token; use crate::lexer::token::TokenKind; -use crate::lexer::DocStringKind; use crate::parser::ast::comments::Comment; use crate::parser::ast::comments::CommentFormat; -use crate::parser::ast::identifiers::Identifier; -use crate::parser::ast::variables::Variable; -use crate::parser::ast::{ - Constant, DeclareItem, Expression, IncludeKind, MagicConst, Program, Statement, StaticVar, - StringPart, -}; +use crate::parser::ast::{Constant, DeclareItem, Expression, Program, Statement, StaticVar}; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; -use crate::parser::internal::arrays; use crate::parser::internal::attributes; use crate::parser::internal::blocks; use crate::parser::internal::classish; @@ -24,18 +15,16 @@ use crate::parser::internal::goto; use crate::parser::internal::identifiers; use crate::parser::internal::loops; use crate::parser::internal::namespaces; -use crate::parser::internal::parameters; -use crate::parser::internal::precedences::Associativity; -use crate::parser::internal::precedences::Precedence; + use crate::parser::internal::try_block; use crate::parser::internal::uses; use crate::parser::internal::utils; -use crate::parser::internal::variables; use crate::parser::state::State; pub mod ast; pub mod error; +mod expressions; mod internal; mod macros; mod state; @@ -84,7 +73,7 @@ fn top_level_statement(state: &mut State) -> ParseResult { utils::skip(state, TokenKind::Equals)?; - let value = expression(state, Precedence::Lowest)?; + let value = expressions::lowest_precedence(state)?; constants.push(Constant { name, value }); @@ -155,7 +144,7 @@ fn statement(state: &mut State) -> ParseResult { .. }) ) { - let expr = expression(state, Precedence::Lowest)?; + let expr = expressions::lowest_precedence(state)?; utils::skip_semicolon(state)?; @@ -203,7 +192,7 @@ fn statement(state: &mut State) -> ParseResult { .. } ) { - let expr = expression(state, Precedence::Lowest)?; + let expr = expressions::lowest_precedence(state)?; utils::skip_semicolon(state)?; @@ -292,7 +281,7 @@ fn statement(state: &mut State) -> ParseResult { if state.current.kind == TokenKind::Equals { state.next(); - default = Some(expression(state, Precedence::Lowest)?); + default = Some(expressions::lowest_precedence(state)?); } // TODO: group static vars. @@ -383,7 +372,7 @@ fn statement(state: &mut State) -> ParseResult { let mut values = Vec::new(); loop { - values.push(expression(state, Precedence::Lowest)?); + values.push(expressions::lowest_precedence(state)?); if state.current.kind == TokenKind::Comma { state.next(); @@ -404,7 +393,7 @@ fn statement(state: &mut State) -> ParseResult { ret } else { let ret = Statement::Return { - value: Some(expression(state, Precedence::Lowest)?), + value: Some(expressions::lowest_precedence(state)?), }; utils::skip_semicolon(state)?; ret @@ -420,7 +409,7 @@ fn statement(state: &mut State) -> ParseResult { TokenKind::Try => try_block::try_block(state)?, TokenKind::LeftBrace => blocks::block_statement(state)?, _ => { - let expr = expression(state, Precedence::Lowest)?; + let expr = expressions::lowest_precedence(state)?; utils::skip_semicolon(state)?; @@ -438,998 +427,3 @@ fn statement(state: &mut State) -> ParseResult { Ok(statement) } - -fn expression(state: &mut State, precedence: Precedence) -> ParseResult { - if state.is_eof() { - return Err(ParseError::UnexpectedEndOfFile); - } - - let has_attributes = attributes::gather_attributes(state)?; - - let mut left = if has_attributes { - match &state.current.kind { - TokenKind::Static if state.peek.kind == TokenKind::Function => { - functions::anonymous_function(state)? - } - TokenKind::Static if state.peek.kind == TokenKind::Fn => { - functions::arrow_function(state)? - } - TokenKind::Function => functions::anonymous_function(state)?, - TokenKind::Fn => functions::arrow_function(state)?, - _ => { - // Note, we can get attributes and know their span, maybe use that in the - // error in the future? - return Err(ParseError::ExpectedItemDefinitionAfterAttributes( - state.current.span, - )); - } - } - } else { - match &state.current.kind { - TokenKind::List => arrays::list_expression(state)?, - TokenKind::Static if state.peek.kind == TokenKind::Function => { - functions::anonymous_function(state)? - } - TokenKind::Static if state.peek.kind == TokenKind::Fn => { - functions::arrow_function(state)? - } - TokenKind::Function => functions::anonymous_function(state)?, - TokenKind::Fn => functions::arrow_function(state)?, - TokenKind::New - if state.peek.kind == TokenKind::Class - || state.peek.kind == TokenKind::Attribute => - { - classish::anonymous_class_definition(state)? - } - TokenKind::Throw => { - state.next(); - - let value = expression(state, Precedence::Lowest)?; - - Expression::Throw { - value: Box::new(value), - } - } - TokenKind::Yield => { - state.next(); - - if state.current.kind == TokenKind::SemiColon { - Expression::Yield { - key: None, - value: None, - } - } else { - let mut from = false; - - if state.current.kind == TokenKind::From { - state.next(); - from = true; - } - - let mut key = None; - let mut value = Box::new(expression( - state, - if from { - Precedence::YieldFrom - } else { - Precedence::Yield - }, - )?); - - if state.current.kind == TokenKind::DoubleArrow && !from { - state.next(); - key = Some(value.clone()); - value = Box::new(expression(state, Precedence::Yield)?); - } - - if from { - Expression::YieldFrom { value } - } else { - Expression::Yield { - key, - value: Some(value), - } - } - } - } - TokenKind::Clone => { - state.next(); - - let target = expression(state, Precedence::CloneOrNew)?; - - Expression::Clone { - target: Box::new(target), - } - } - TokenKind::Variable(_) => Expression::Variable(identifiers::var(state)?), - TokenKind::LiteralInteger(i) => { - let e = Expression::LiteralInteger { i: i.clone() }; - state.next(); - e - } - TokenKind::LiteralFloat(f) => { - let f = Expression::LiteralFloat { f: f.clone() }; - state.next(); - f - } - TokenKind::Identifier(_) - | TokenKind::QualifiedIdentifier(_) - | TokenKind::FullyQualifiedIdentifier(_) => { - Expression::Identifier(identifiers::full_name(state)?) - } - TokenKind::Self_ => { - if !state.has_class_scope { - return Err(ParseError::CannotFindTypeInCurrentScope( - state.current.kind.to_string(), - state.current.span, - )); - } - - state.next(); - - postfix(state, Expression::Self_, &TokenKind::DoubleColon)? - } - TokenKind::Static => { - if !state.has_class_scope { - return Err(ParseError::CannotFindTypeInCurrentScope( - state.current.kind.to_string(), - state.current.span, - )); - } - - state.next(); - - postfix(state, Expression::Static, &TokenKind::DoubleColon)? - } - TokenKind::Parent => { - if !state.has_class_scope { - return Err(ParseError::CannotFindTypeInCurrentScope( - state.current.kind.to_string(), - state.current.span, - )); - } - - state.next(); - - postfix(state, Expression::Parent, &TokenKind::DoubleColon)? - } - TokenKind::LiteralString(s) => { - let e = Expression::LiteralString { value: s.clone() }; - state.next(); - e - } - TokenKind::StringPart(_) => interpolated_string(state)?, - TokenKind::StartDocString(_, kind) => { - let kind = *kind; - - doc_string(state, kind)? - } - TokenKind::Backtick => shell_exec(state)?, - TokenKind::True => { - let e = Expression::Bool { value: true }; - state.next(); - e - } - TokenKind::False => { - let e = Expression::Bool { value: false }; - state.next(); - e - } - TokenKind::Null => { - state.next(); - Expression::Null - } - TokenKind::LeftParen => { - state.next(); - - let e = expression(state, Precedence::Lowest)?; - - utils::skip_right_parenthesis(state)?; - - e - } - TokenKind::Match => control_flow::match_expression(state)?, - TokenKind::Array => arrays::legacy_array_expression(state)?, - TokenKind::LeftBracket => arrays::array_expression(state)?, - TokenKind::New => { - utils::skip(state, TokenKind::New)?; - - let target = match state.current.kind { - TokenKind::Self_ => { - if !state.has_class_scope { - return Err(ParseError::CannotFindTypeInCurrentScope( - state.current.kind.to_string(), - state.current.span, - )); - } - - state.next(); - - Expression::Self_ - } - TokenKind::Static => { - if !state.has_class_scope { - return Err(ParseError::CannotFindTypeInCurrentScope( - state.current.kind.to_string(), - state.current.span, - )); - } - - state.next(); - - Expression::Static - } - TokenKind::Parent => { - if !state.has_class_scope { - return Err(ParseError::CannotFindTypeInCurrentScope( - state.current.kind.to_string(), - state.current.span, - )); - } - - state.next(); - - Expression::Parent - } - _ => expression(state, Precedence::CloneOrNew)?, - }; - - let mut args = vec![]; - if state.current.kind == TokenKind::LeftParen { - args = parameters::args_list(state)?; - } - - Expression::New { - target: Box::new(target), - args, - } - } - TokenKind::DirConstant => { - state.next(); - Expression::MagicConst { - constant: MagicConst::Dir, - } - } - TokenKind::Include - | TokenKind::IncludeOnce - | TokenKind::Require - | TokenKind::RequireOnce => { - let kind: IncludeKind = (&state.current.kind).into(); - state.next(); - - let path = expression(state, Precedence::Lowest)?; - - Expression::Include { - kind, - path: Box::new(path), - } - } - _ if is_prefix(&state.current.kind) => { - let op = state.current.kind.clone(); - - state.next(); - - let rpred = Precedence::prefix(&op); - let rhs = expression(state, rpred)?; - - prefix(&op, rhs) - } - TokenKind::Dollar => variables::dynamic_variable(state)?, - _ => { - return Err(ParseError::UnexpectedToken( - state.current.kind.to_string(), - state.current.span, - )) - } - } - }; - - if state.current.kind == TokenKind::SemiColon { - return Ok(left); - } - - state.skip_comments(); - - loop { - state.skip_comments(); - - if matches!(state.current.kind, TokenKind::SemiColon | TokenKind::Eof) { - break; - } - - let span = state.current.span; - let kind = state.current.kind.clone(); - - if is_postfix(&kind) { - let lpred = Precedence::postfix(&kind); - - if lpred < precedence { - break; - } - - left = postfix(state, left, &kind)?; - continue; - } - - if is_infix(&kind) { - let rpred = Precedence::infix(&kind); - - if rpred < precedence { - break; - } - - if rpred == precedence && matches!(rpred.associativity(), Some(Associativity::Left)) { - break; - } - - if rpred == precedence && matches!(rpred.associativity(), Some(Associativity::Non)) { - return Err(ParseError::UnexpectedToken(kind.to_string(), span)); - } - - state.next(); - - match kind { - TokenKind::Question => { - let then = expression(state, Precedence::Lowest)?; - utils::skip_colon(state)?; - let otherwise = expression(state, rpred)?; - left = Expression::Ternary { - condition: Box::new(left), - then: Some(Box::new(then)), - r#else: Box::new(otherwise), - } - } - TokenKind::QuestionColon => { - let r#else = expression(state, Precedence::Lowest)?; - left = Expression::Ternary { - condition: Box::new(left), - then: None, - r#else: Box::new(r#else), - } - } - _ => { - // FIXME: Hacky, should probably be refactored. - left = match kind { - TokenKind::Equals if state.current.kind == TokenKind::Ampersand => { - state.next(); - Expression::Infix { - lhs: Box::new(left), - op: ast::InfixOp::AssignRef, - rhs: Box::new(expression(state, rpred)?), - } - } - TokenKind::Instanceof if state.current.kind == TokenKind::Self_ => { - if !state.has_class_scope { - return Err(ParseError::CannotFindTypeInCurrentScope( - state.current.kind.to_string(), - state.current.span, - )); - } - - state.next(); - - Expression::Infix { - lhs: Box::new(left), - op: ast::InfixOp::Instanceof, - rhs: Box::new(Expression::Self_), - } - } - TokenKind::Instanceof if state.current.kind == TokenKind::Parent => { - if !state.has_class_scope { - return Err(ParseError::CannotFindTypeInCurrentScope( - state.current.kind.to_string(), - state.current.span, - )); - } - - state.next(); - - Expression::Infix { - lhs: Box::new(left), - op: ast::InfixOp::Instanceof, - rhs: Box::new(Expression::Parent), - } - } - TokenKind::Instanceof if state.current.kind == TokenKind::Static => { - if !state.has_class_scope { - return Err(ParseError::CannotFindTypeInCurrentScope( - state.current.kind.to_string(), - state.current.span, - )); - } - - state.next(); - - Expression::Infix { - lhs: Box::new(left), - op: ast::InfixOp::Instanceof, - rhs: Box::new(Expression::Static), - } - } - _ => Expression::Infix { - lhs: Box::new(left), - op: kind.into(), - rhs: Box::new(expression(state, rpred)?), - }, - }; - } - } - - continue; - } - - break; - } - - state.skip_comments(); - - Ok(left) -} - -fn postfix(state: &mut State, lhs: Expression, op: &TokenKind) -> Result { - Ok(match op { - TokenKind::Coalesce => { - state.next(); - - let rhs = expression(state, Precedence::NullCoalesce)?; - - Expression::Coalesce { - lhs: Box::new(lhs), - rhs: Box::new(rhs), - } - } - TokenKind::LeftParen => { - let args = parameters::args_list(state)?; - - Expression::Call { - target: Box::new(lhs), - args, - } - } - TokenKind::LeftBracket => { - utils::skip_left_bracket(state)?; - - if state.current.kind == TokenKind::RightBracket { - state.next(); - - Expression::ArrayIndex { - array: Box::new(lhs), - index: None, - } - } else { - let index = expression(state, Precedence::Lowest)?; - - utils::skip_right_bracket(state)?; - - Expression::ArrayIndex { - array: Box::new(lhs), - index: Some(Box::new(index)), - } - } - } - TokenKind::DoubleColon => { - utils::skip_double_colon(state)?; - - let mut must_be_method_call = false; - - let property = match state.current.kind.clone() { - TokenKind::Dollar => variables::dynamic_variable(state)?, - TokenKind::Variable(_) => Expression::Variable(identifiers::var(state)?), - TokenKind::Identifier(_) => Expression::Identifier(identifiers::ident(state)?), - TokenKind::LeftBrace => { - must_be_method_call = true; - state.next(); - - let name = expression(state, Precedence::Lowest)?; - - utils::skip_right_brace(state)?; - - Expression::DynamicVariable { - name: Box::new(name), - } - } - TokenKind::Class => { - let start = state.current.span; - state.next(); - let end = state.current.span; - - Expression::Identifier(Identifier { - start, - name: "class".into(), - end, - }) - } - _ if identifiers::is_reserved_ident(&state.current.kind) => { - Expression::Identifier(identifiers::ident_maybe_reserved(state)?) - } - _ => { - return expected_token_err!(["`{`", "`$`", "an identifier"], state); - } - }; - - let lhs = Box::new(lhs); - - match property { - // 1. If we have an identifier and the current token is not a left paren, - // the resulting expression must be a constant fetch. - Expression::Identifier(identifier) - if state.current.kind != TokenKind::LeftParen => - { - Expression::ConstFetch { - target: lhs, - constant: identifier, - } - } - // 2. If the current token is a left paren, or if we know the property expression - // is only valid a method call context, we can assume we're parsing a static - // method call. - _ if state.current.kind == TokenKind::LeftParen || must_be_method_call => { - let args = parameters::args_list(state)?; - - Expression::StaticMethodCall { - target: lhs, - method: Box::new(property), - args, - } - } - // 3. If we haven't met any of the previous conditions, we can assume - // that we're parsing a static property fetch. - _ => Expression::StaticPropertyFetch { - target: lhs, - property: Box::new(property), - }, - } - } - TokenKind::Arrow | TokenKind::NullsafeArrow => { - state.next(); - - let property = match state.current.kind { - TokenKind::LeftBrace => { - utils::skip_left_brace(state)?; - let expr = expression(state, Precedence::Lowest)?; - utils::skip_right_brace(state)?; - expr - } - TokenKind::Variable(_) => Expression::Variable(identifiers::var(state)?), - TokenKind::Dollar => variables::dynamic_variable(state)?, - _ => Expression::Identifier(identifiers::ident_maybe_reserved(state)?), - }; - - if state.current.kind == TokenKind::LeftParen { - let args = parameters::args_list(state)?; - - if op == &TokenKind::NullsafeArrow { - Expression::NullsafeMethodCall { - target: Box::new(lhs), - method: Box::new(property), - args, - } - } else { - Expression::MethodCall { - target: Box::new(lhs), - method: Box::new(property), - args, - } - } - } else if op == &TokenKind::NullsafeArrow { - Expression::NullsafePropertyFetch { - target: Box::new(lhs), - property: Box::new(property), - } - } else { - Expression::PropertyFetch { - target: Box::new(lhs), - property: Box::new(property), - } - } - } - TokenKind::Increment => { - state.next(); - Expression::Increment { - value: Box::new(lhs), - } - } - TokenKind::Decrement => { - state.next(); - - Expression::Decrement { - value: Box::new(lhs), - } - } - _ => todo!("postfix: {:?}", op), - }) -} - -#[inline(always)] -fn interpolated_string(state: &mut State) -> ParseResult { - let mut parts = Vec::new(); - - while state.current.kind != TokenKind::DoubleQuote { - if let Some(part) = interpolated_string_part(state)? { - parts.push(part); - } - } - - state.next(); - - Ok(Expression::InterpolatedString { parts }) -} - -#[inline(always)] -fn shell_exec(state: &mut State) -> ParseResult { - state.next(); - - let mut parts = Vec::new(); - - while state.current.kind != TokenKind::Backtick { - if let Some(part) = interpolated_string_part(state)? { - parts.push(part); - } - } - - state.next(); - - Ok(Expression::ShellExec { parts }) -} - -#[inline(always)] -fn doc_string(state: &mut State, kind: DocStringKind) -> ParseResult { - state.next(); - - Ok(match kind { - DocStringKind::Heredoc => { - let mut parts = Vec::new(); - - while !matches!(state.current.kind, TokenKind::EndDocString(_, _, _)) { - if let Some(part) = interpolated_string_part(state)? { - parts.push(part); - } - } - - let (indentation_type, indentation_amount) = match state.current.kind { - TokenKind::EndDocString(_, indentation_type, indentation_amount) => { - (indentation_type, indentation_amount) - } - _ => unreachable!(), - }; - - state.next(); - - // FIXME: Can we move this logic above into the loop, by peeking ahead in - // the token stream for the EndHeredoc? Might be more performant. - if let Some(indentation_type) = indentation_type { - let search_char: u8 = indentation_type.into(); - - for part in parts.iter_mut() { - match part { - StringPart::Const(bytes) => { - for _ in 0..indentation_amount { - if bytes.starts_with(&[search_char]) { - bytes.remove(0); - } - } - } - _ => continue, - } - } - } - - Expression::Heredoc { parts } - } - DocStringKind::Nowdoc => { - // FIXME: This feels hacky. We should probably produce different tokens from the lexer - // but since I already had the logic in place for parsing heredocs, this was - // the fastest way to get nowdocs working too. - let mut s = expect_token!([ - TokenKind::StringPart(s) => s - ], state, "constant string"); - - let (indentation_type, indentation_amount) = expect_token!([ - TokenKind::EndDocString(_, indentation_type, indentation_amount) => (indentation_type, indentation_amount) - ], state, "label"); - - // FIXME: Hacky code, but it's late and I want to get this done. - if let Some(indentation_type) = indentation_type { - let search_char: u8 = indentation_type.into(); - let mut lines = s - .split(|b| *b == b'\n') - .map(|s| s.to_vec()) - .collect::>>(); - for line in lines.iter_mut() { - for _ in 0..indentation_amount { - if line.starts_with(&[search_char]) { - line.remove(0); - } - } - } - let mut bytes = Vec::new(); - for (i, line) in lines.iter().enumerate() { - bytes.extend(line); - if i < lines.len() - 1 { - bytes.push(b'\n'); - } - } - s = bytes.into(); - } - - Expression::Nowdoc { value: s } - } - }) -} - -fn interpolated_string_part(state: &mut State) -> ParseResult> { - Ok(match &state.current.kind { - TokenKind::StringPart(s) => { - let part = if s.len() > 0 { - Some(StringPart::Const(s.clone())) - } else { - None - }; - - state.next(); - part - } - TokenKind::DollarLeftBrace => { - state.next(); - let e = match (state.current.kind.clone(), state.peek.kind.clone()) { - (TokenKind::Identifier(name), TokenKind::RightBrace) => { - let start = state.current.span; - let end = state.peek.span; - - state.next(); - state.next(); - // "${var}" - // TODO: we should use a different node for this. - Expression::Variable(Variable { start, name, end }) - } - (TokenKind::Identifier(name), TokenKind::LeftBracket) => { - let start = state.current.span; - let end = state.peek.span; - state.next(); - state.next(); - let var = Expression::Variable(Variable { start, name, end }); - - let e = expression(state, Precedence::Lowest)?; - utils::skip_right_bracket(state)?; - utils::skip_right_brace(state)?; - - // TODO: we should use a different node for this. - Expression::ArrayIndex { - array: Box::new(var), - index: Some(Box::new(e)), - } - } - _ => { - // Arbitrary expressions are allowed, but are treated as variable variables. - let e = expression(state, Precedence::Lowest)?; - utils::skip_right_brace(state)?; - - Expression::DynamicVariable { name: Box::new(e) } - } - }; - Some(StringPart::Expr(Box::new(e))) - } - TokenKind::LeftBrace => { - // "{$expr}" - state.next(); - let e = expression(state, Precedence::Lowest)?; - utils::skip_right_brace(state)?; - Some(StringPart::Expr(Box::new(e))) - } - TokenKind::Variable(_) => { - // "$expr", "$expr[0]", "$expr[name]", "$expr->a" - let var = Expression::Variable(identifiers::var(state)?); - let e = match state.current.kind { - TokenKind::LeftBracket => { - state.next(); - // Full expression syntax is not allowed here, - // so we can't call expression. - let index = match &state.current.kind { - TokenKind::LiteralInteger(i) => { - let e = Expression::LiteralInteger { i: i.clone() }; - state.next(); - e - } - TokenKind::Minus => { - state.next(); - if let TokenKind::LiteralInteger(i) = &state.current.kind { - let e = Expression::Negate { - value: Box::new(Expression::LiteralInteger { i: i.clone() }), - }; - state.next(); - e - } else { - return expected_token_err!("an integer", state); - } - } - TokenKind::Identifier(ident) => { - let e = Expression::LiteralString { - value: ident.clone(), - }; - state.next(); - e - } - TokenKind::Variable(_) => { - let v = identifiers::var(state)?; - Expression::Variable(v) - } - _ => { - return expected_token_err!( - ["`-`", "an integer", "an identifier", "a variable"], - state - ); - } - }; - - utils::skip_right_bracket(state)?; - - Expression::ArrayIndex { - array: Box::new(var), - index: Some(Box::new(index)), - } - } - TokenKind::Arrow => { - state.next(); - Expression::PropertyFetch { - target: Box::new(var), - property: Box::new(Expression::Identifier( - identifiers::ident_maybe_reserved(state)?, - )), - } - } - TokenKind::NullsafeArrow => { - state.next(); - Expression::NullsafePropertyFetch { - target: Box::new(var), - property: Box::new(Expression::Identifier( - identifiers::ident_maybe_reserved(state)?, - )), - } - } - _ => var, - }; - Some(StringPart::Expr(Box::new(e))) - } - _ => { - return expected_token_err!(["`${`", "`{$", "`\"`", "a variable"], state); - } - }) -} - -#[inline(always)] -fn is_prefix(op: &TokenKind) -> bool { - matches!( - op, - TokenKind::Bang - | TokenKind::Print - | TokenKind::BitwiseNot - | TokenKind::Decrement - | TokenKind::Increment - | TokenKind::Minus - | TokenKind::Plus - | TokenKind::StringCast - | TokenKind::BinaryCast - | TokenKind::ObjectCast - | TokenKind::BoolCast - | TokenKind::BooleanCast - | TokenKind::IntCast - | TokenKind::IntegerCast - | TokenKind::FloatCast - | TokenKind::DoubleCast - | TokenKind::RealCast - | TokenKind::UnsetCast - | TokenKind::ArrayCast - | TokenKind::At - ) -} - -#[inline(always)] -fn prefix(op: &TokenKind, rhs: Expression) -> Expression { - match op { - TokenKind::Print => Expression::Print { - value: Box::new(rhs), - }, - TokenKind::Bang => Expression::BooleanNot { - value: Box::new(rhs), - }, - TokenKind::Minus => Expression::Negate { - value: Box::new(rhs), - }, - TokenKind::Plus => Expression::UnaryPlus { - value: Box::new(rhs), - }, - TokenKind::BitwiseNot => Expression::BitwiseNot { - value: Box::new(rhs), - }, - TokenKind::Decrement => Expression::PreDecrement { - value: Box::new(rhs), - }, - TokenKind::Increment => Expression::PreIncrement { - value: Box::new(rhs), - }, - TokenKind::StringCast - | TokenKind::BinaryCast - | TokenKind::ObjectCast - | TokenKind::BoolCast - | TokenKind::BooleanCast - | TokenKind::IntCast - | TokenKind::IntegerCast - | TokenKind::FloatCast - | TokenKind::DoubleCast - | TokenKind::RealCast - | TokenKind::UnsetCast - | TokenKind::ArrayCast => Expression::Cast { - kind: op.into(), - value: Box::new(rhs), - }, - TokenKind::At => Expression::ErrorSuppress { - expr: Box::new(rhs), - }, - _ => unreachable!(), - } -} - -fn is_infix(t: &TokenKind) -> bool { - matches!( - t, - TokenKind::Pow - | TokenKind::RightShiftEquals - | TokenKind::LeftShiftEquals - | TokenKind::CaretEquals - | TokenKind::AmpersandEquals - | TokenKind::PipeEquals - | TokenKind::PercentEquals - | TokenKind::PowEquals - | TokenKind::LogicalAnd - | TokenKind::LogicalOr - | TokenKind::LogicalXor - | TokenKind::Spaceship - | TokenKind::LeftShift - | TokenKind::RightShift - | TokenKind::Ampersand - | TokenKind::Pipe - | TokenKind::Caret - | TokenKind::Percent - | TokenKind::Instanceof - | TokenKind::Asterisk - | TokenKind::Slash - | TokenKind::Plus - | TokenKind::Minus - | TokenKind::Dot - | TokenKind::LessThan - | TokenKind::GreaterThan - | TokenKind::LessThanEquals - | TokenKind::GreaterThanEquals - | TokenKind::DoubleEquals - | TokenKind::TripleEquals - | TokenKind::BangEquals - | TokenKind::BangDoubleEquals - | TokenKind::AngledLeftRight - | TokenKind::Question - | TokenKind::QuestionColon - | TokenKind::BooleanAnd - | TokenKind::BooleanOr - | TokenKind::Equals - | TokenKind::PlusEquals - | TokenKind::MinusEquals - | TokenKind::DotEquals - | TokenKind::CoalesceEqual - | TokenKind::AsteriskEqual - | TokenKind::SlashEquals - ) -} - -#[inline(always)] -fn is_postfix(t: &TokenKind) -> bool { - matches!( - t, - TokenKind::Increment - | TokenKind::Decrement - | TokenKind::LeftParen - | TokenKind::LeftBracket - | TokenKind::Arrow - | TokenKind::NullsafeArrow - | TokenKind::DoubleColon - | TokenKind::Coalesce - ) -} diff --git a/tests/third_party_tests.rs b/tests/third_party_tests.rs index 8d4eb3d..b335a2b 100644 --- a/tests/third_party_tests.rs +++ b/tests/third_party_tests.rs @@ -2,9 +2,15 @@ use std::env; use std::fs; use std::path::PathBuf; use std::process::Command; +use std::thread; use php_parser_rs::lexer::Lexer; +enum TestResult { + Success, + Error(String), +} + #[test] fn third_party_1_php_standard_library() { test_repository( @@ -33,9 +39,11 @@ fn third_party_3_symfony_framework() { "symfony-framework", "https://github.com/symfony/symfony", "6.3", - &["src/Symfony"], + &["src/Symfony/"], &[ + // stub "src/Symfony/Bridge/ProxyManager/Tests/LazyProxy/PhpDumper/Fixtures/proxy-implem.php", + // file contains syntax error used for testing. "src/Symfony/Component/Config/Tests/Fixtures/ParseError.php", // FIXME: Remove this one once I've found the energy to sort out heredocs / nowdocs. "src/Symfony/Component/DependencyInjection/LazyProxy/PhpDumper/LazyServiceDumper.php", @@ -98,12 +106,86 @@ fn test_repository( } } + let mut entries = vec![]; for dir in directories { - test_directory(out_path.clone(), out_path.join(dir), ignore); + entries.append(&mut read_directory( + out_path.clone(), + out_path.join(dir), + ignore, + )); + } + + let mut threads = vec![]; + for (index, chunk) in entries.chunks(entries.len() / 4).enumerate() { + let chunk = chunk.to_vec(); + let thread = thread::Builder::new() + .stack_size(16 * 1024 * 1024) + .name(format!("{name}:{index}")) + .spawn(move || { + let thread = thread::current(); + let thread_name = thread.name().unwrap(); + + let mut results = vec![]; + for (name, filename) in chunk { + let code = std::fs::read(&filename).unwrap(); + + match Lexer::new().tokenize(&code) { + Ok(tokens) => match php_parser_rs::parse(tokens) { + Ok(ast) => { + println!("✅ [{thread_name}][{name}]: {} statement(s).", ast.len()); + + results.push(TestResult::Success); + } + Err(error) => { + results.push(TestResult::Error(format!( + "❌ [{thread_name}][{name}]: {error:?}" + ))); + } + }, + Err(error) => { + results.push(TestResult::Error(format!( + "❌ [{thread_name}][{name}]: {error:?}" + ))); + } + } + } + + results + }); + + threads.push(thread); + } + + let mut results = vec![]; + for thread in threads { + let mut result = thread + .unwrap_or_else(|e| panic!("failed to spawn thread: {:#?}", e)) + .join() + .unwrap_or_else(|e| panic!("failed to join thread: {:#?}", e)); + + results.append(&mut result); + } + + let mut fail = false; + results + .iter() + .map(|result| match result { + TestResult::Error(message) => { + fail = true; + + println!("{}", message); + } + TestResult::Success => {} + }) + .for_each(drop); + + if fail { + panic!(); } } -fn test_directory(root: PathBuf, directory: PathBuf, ignore: &[&str]) { +fn read_directory(root: PathBuf, directory: PathBuf, ignore: &[&str]) -> Vec<(String, PathBuf)> { + let mut results = vec![]; let mut entries = fs::read_dir(&directory) .unwrap() .flatten() @@ -114,7 +196,7 @@ fn test_directory(root: PathBuf, directory: PathBuf, ignore: &[&str]) { for entry in entries { if entry.is_dir() { - test_directory(root.clone(), entry, ignore); + results.append(&mut read_directory(root.clone(), entry, ignore)); continue; } @@ -136,26 +218,9 @@ fn test_directory(root: PathBuf, directory: PathBuf, ignore: &[&str]) { .strip_prefix(root.to_str().unwrap()) .unwrap(); - test_file(name, entry); + results.push((name.to_string(), entry)); } } -} -fn test_file(name: &str, filename: PathBuf) { - let code = std::fs::read(&filename).unwrap(); - - Lexer::new() - .tokenize(&code) - .map(|tokens| { - php_parser_rs::parse(tokens) - .map(|_| { - println!("✅ successfully parsed file: `\"{}\"`.", name); - }) - .unwrap_or_else(|error| { - panic!("❌ failed to parse file: `\"{name}\"`, error: {error:?}") - }) - }) - .unwrap_or_else(|error| { - panic!("❌ failed to tokenize file: `\"{name}\"`, error: {error:?}") - }); + results }