diff --git a/README.md b/README.md index c23f089..03cd319 100644 --- a/README.md +++ b/README.md @@ -26,12 +26,11 @@ cargo add php-parser-rs ### Example ```rust -use php_parser_rs::parser::Parser; +use php_parser_rs::parse; use php_parser_rs::lexer::Lexer; fn main() -> ParseResult<()> { let lexer = Lexer::new(); - let parser = Parser::new(); let code = " { std::fs::write(ast_filename, format!("{:#?}\n", ast)).unwrap(); diff --git a/src/lib.rs b/src/lib.rs index 6054fb1..4c420bd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,4 @@ pub mod lexer; pub mod parser; + +pub use parser::parse; diff --git a/src/main.rs b/src/main.rs index 0ffe465..b43232d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,5 @@ use php_parser_rs::lexer::Lexer; use php_parser_rs::parser::error::ParseResult; -use php_parser_rs::parser::Parser; fn main() -> ParseResult<()> { let file = match std::env::args().nth(1) { @@ -22,12 +21,11 @@ fn main() -> ParseResult<()> { }; let lexer = Lexer::new(); - let parser = Parser::new(); let tokens = lexer.tokenize(&contents)?; // dbg!(&tokens); - let ast = parser.parse(tokens)?; + let ast = php_parser_rs::parse(tokens)?; dbg!(ast); diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index ec6c58b..c939d53 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -11,6 +11,7 @@ pub mod variables; use std::fmt::Display; use crate::lexer::byte_string::ByteString; +use crate::lexer::token::Span; use crate::lexer::token::TokenKind; use crate::parser::ast::attributes::AttributeGroup; use crate::parser::ast::classish::ClassishConstant; @@ -316,7 +317,7 @@ pub enum Statement { declares: Vec, body: Block, }, - Noop, + Noop(Span), } #[derive(Debug, Clone, PartialEq)] diff --git a/src/parser/internal/arrays.rs b/src/parser/internal/arrays.rs index 83b24f7..b9da679 100644 --- a/src/parser/internal/arrays.rs +++ b/src/parser/internal/arrays.rs @@ -1,4 +1,5 @@ use crate::lexer::token::TokenKind; +use crate::parser; use crate::parser::ast::ArrayItem; use crate::parser::ast::Expression; use crate::parser::ast::ListItem; @@ -7,27 +8,48 @@ use crate::parser::error::ParseResult; use crate::parser::internal::precedences::Precedence; use crate::parser::internal::utils; use crate::parser::state::State; -use crate::parser::Parser; -impl Parser { - pub(in crate::parser) fn list_expression(&self, state: &mut State) -> ParseResult { - utils::skip(state, TokenKind::List)?; - utils::skip_left_parenthesis(state)?; +pub fn list_expression(state: &mut State) -> ParseResult { + utils::skip(state, TokenKind::List)?; + utils::skip_left_parenthesis(state)?; - let mut items = Vec::new(); - let mut has_atleast_one_key = false; + let mut items = Vec::new(); + let mut has_atleast_one_key = false; - while state.current.kind != TokenKind::RightParen { - if state.current.kind == TokenKind::Comma { - items.push(ListItem { - key: None, - value: Expression::Empty, - }); - state.next(); - continue; + while state.current.kind != TokenKind::RightParen { + if state.current.kind == TokenKind::Comma { + items.push(ListItem { + key: None, + value: Expression::Empty, + }); + state.next(); + continue; + } + + let mut key = None; + + if state.current.kind == TokenKind::Ellipsis { + return Err(ParseError::IllegalSpreadOperator(state.current.span)); + } + + if state.current.kind == TokenKind::Ampersand { + return Err(ParseError::CannotAssignReferenceToNonReferencableValue( + state.current.span, + )); + } + + let mut value = parser::expression(state, Precedence::Lowest)?; + + if state.current.kind == TokenKind::DoubleArrow { + if !has_atleast_one_key && !items.is_empty() { + return Err(ParseError::CannotMixKeyedAndUnkeyedEntries( + state.current.span, + )); } - let mut key = None; + state.next(); + + key = Some(value); if state.current.kind == TokenKind::Ellipsis { return Err(ParseError::IllegalSpreadOperator(state.current.span)); @@ -39,94 +61,77 @@ impl Parser { )); } - let mut value = self.expression(state, Precedence::Lowest)?; - - if state.current.kind == TokenKind::DoubleArrow { - if !has_atleast_one_key && !items.is_empty() { - return Err(ParseError::CannotMixKeyedAndUnkeyedEntries( - state.current.span, - )); - } - - state.next(); - - key = Some(value); - - if state.current.kind == TokenKind::Ellipsis { - return Err(ParseError::IllegalSpreadOperator(state.current.span)); - } - - if state.current.kind == TokenKind::Ampersand { - return Err(ParseError::CannotAssignReferenceToNonReferencableValue( - state.current.span, - )); - } - - has_atleast_one_key = true; - value = self.expression(state, Precedence::Lowest)?; - } else if has_atleast_one_key { - return Err(ParseError::CannotMixKeyedAndUnkeyedEntries( - state.current.span, - )); - } - - items.push(ListItem { key, value }); - - state.skip_comments(); - if state.current.kind == TokenKind::Comma { - state.next(); - state.skip_comments(); - } else { - break; - } + has_atleast_one_key = true; + value = parser::expression(state, Precedence::Lowest)?; + } else if has_atleast_one_key { + return Err(ParseError::CannotMixKeyedAndUnkeyedEntries( + state.current.span, + )); } - utils::skip_right_parenthesis(state)?; + items.push(ListItem { key, value }); - Ok(Expression::List { items }) - } - - pub(in crate::parser) fn array_expression(&self, state: &mut State) -> ParseResult { - utils::skip(state, TokenKind::LeftBracket)?; - - let mut items = Vec::new(); state.skip_comments(); - - while state.current.kind != TokenKind::RightBracket { - // TODO: return an error here instead of - // an empty array element - // see: https://3v4l.org/uLTVA - if state.current.kind == TokenKind::Comma { - items.push(ArrayItem { - key: None, - value: Expression::Empty, - unpack: false, - by_ref: false, - }); - state.next(); - continue; - } - - items.push(self.array_pair(state)?); - - state.skip_comments(); - - if state.current.kind != TokenKind::Comma { - break; - } - + if state.current.kind == TokenKind::Comma { state.next(); state.skip_comments(); + } else { + break; } + } + + utils::skip_right_parenthesis(state)?; + + Ok(Expression::List { items }) +} + +pub fn array_expression(state: &mut State) -> ParseResult { + utils::skip(state, TokenKind::LeftBracket)?; + + let mut items = Vec::new(); + state.skip_comments(); + + while state.current.kind != TokenKind::RightBracket { + // TODO: return an error here instead of + // an empty array element + // see: https://3v4l.org/uLTVA + if state.current.kind == TokenKind::Comma { + items.push(ArrayItem { + key: None, + value: Expression::Empty, + unpack: false, + by_ref: false, + }); + state.next(); + continue; + } + + items.push(array_pair(state)?); state.skip_comments(); - utils::skip_right_bracket(state)?; + if state.current.kind != TokenKind::Comma { + break; + } - Ok(Expression::Array { items }) + state.next(); + state.skip_comments(); } - pub(in crate::parser) fn array_pair(&self, state: &mut State) -> ParseResult { + state.skip_comments(); + + utils::skip_right_bracket(state)?; + + Ok(Expression::Array { items }) +} + +pub fn legacy_array_expression(state: &mut State) -> ParseResult { + utils::skip(state, TokenKind::Array)?; + utils::skip_left_parenthesis(state)?; + + let mut items = vec![]; + + while state.current.kind != TokenKind::RightParen { let mut key = None; let unpack = if state.current.kind == TokenKind::Ellipsis { state.next(); @@ -143,7 +148,9 @@ impl Parser { (false, (0, 0)) }; - let mut value = self.expression(state, Precedence::Lowest)?; + let mut value = parser::expression(state, Precedence::Lowest)?; + + // TODO: return error for `[...$a => $b]`. if state.current.kind == TokenKind::DoubleArrow { state.next(); @@ -155,20 +162,80 @@ impl Parser { } key = Some(value); + by_ref = if state.current.kind == TokenKind::Ampersand { state.next(); true } else { false }; - value = self.expression(state, Precedence::Lowest)?; + + value = parser::expression(state, Precedence::Lowest)?; } - Ok(ArrayItem { + items.push(ArrayItem { key, value, unpack, by_ref, - }) + }); + + if state.current.kind == TokenKind::Comma { + state.next(); + } else { + break; + } + + state.skip_comments(); } + + utils::skip_right_parenthesis(state)?; + + Ok(Expression::Array { items }) +} + +fn array_pair(state: &mut State) -> ParseResult { + let mut key = None; + let unpack = if state.current.kind == TokenKind::Ellipsis { + state.next(); + true + } else { + false + }; + + let (mut by_ref, amper_span) = if state.current.kind == TokenKind::Ampersand { + let span = state.current.span; + state.next(); + (true, span) + } else { + (false, (0, 0)) + }; + + let mut value = parser::expression(state, Precedence::Lowest)?; + if state.current.kind == TokenKind::DoubleArrow { + state.next(); + + if by_ref { + return Err(ParseError::UnexpectedToken( + TokenKind::Ampersand.to_string(), + amper_span, + )); + } + + key = Some(value); + by_ref = if state.current.kind == TokenKind::Ampersand { + state.next(); + true + } else { + false + }; + value = parser::expression(state, Precedence::Lowest)?; + } + + Ok(ArrayItem { + key, + value, + unpack, + by_ref, + }) } diff --git a/src/parser/internal/attributes.rs b/src/parser/internal/attributes.rs index c9cb3ad..db5a5f6 100644 --- a/src/parser/internal/attributes.rs +++ b/src/parser/internal/attributes.rs @@ -1,52 +1,50 @@ use crate::lexer::token::TokenKind; +use crate::parser; use crate::parser::ast::attributes::Attribute; use crate::parser::ast::attributes::AttributeGroup; use crate::parser::error::ParseResult; use crate::parser::internal::precedences::Precedence; use crate::parser::internal::utils; use crate::parser::state::State; -use crate::parser::Parser; -impl Parser { - pub(in crate::parser) fn gather_attributes(&self, state: &mut State) -> ParseResult { - state.gather_comments(); +pub fn gather_attributes(state: &mut State) -> ParseResult { + state.gather_comments(); - if state.current.kind != TokenKind::Attribute { - return Ok(false); - } + if state.current.kind != TokenKind::Attribute { + return Ok(false); + } + let start = state.current.span; + let mut members = vec![]; + + state.next(); + + while state.current.kind != TokenKind::RightBracket { let start = state.current.span; - let mut members = vec![]; + let expression = parser::expression(state, Precedence::Lowest)?; + let end = state.current.span; - state.next(); - - while state.current.kind != TokenKind::RightBracket { - let start = state.current.span; - let expression = self.expression(state, Precedence::Lowest)?; - let end = state.current.span; - - members.push(Attribute { - start, - expression, - end, - }); - - if state.current.kind == TokenKind::Comma { - state.next(); - } else { - break; - } - } - - let end = utils::skip_right_bracket(state)?; - - state.attribute(AttributeGroup { + members.push(Attribute { start, - members, + expression, end, }); - // recursive, looking for multiple attribute brackets after each other. - self.gather_attributes(state).map(|_| true) + if state.current.kind == TokenKind::Comma { + state.next(); + } else { + break; + } } + + let end = utils::skip_right_bracket(state)?; + + state.attribute(AttributeGroup { + start, + members, + end, + }); + + // recursive, looking for multiple attribute brackets after each other. + gather_attributes(state).map(|_| true) } diff --git a/src/parser/internal/blocks.rs b/src/parser/internal/blocks.rs index abf0158..cb3a76b 100644 --- a/src/parser/internal/blocks.rs +++ b/src/parser/internal/blocks.rs @@ -1,41 +1,35 @@ use crate::lexer::token::TokenKind; +use crate::parser; use crate::parser::ast::Block; use crate::parser::ast::Statement; use crate::parser::error::ParseResult; use crate::parser::internal::utils; use crate::parser::state::State; -use crate::parser::Parser; -impl Parser { - pub(in crate::parser) fn block_statement(&self, state: &mut State) -> ParseResult { - utils::skip_left_brace(state)?; +pub fn block_statement(state: &mut State) -> ParseResult { + utils::skip_left_brace(state)?; - let body = self.body(state, &TokenKind::RightBrace)?; + let body = body(state, &TokenKind::RightBrace)?; - utils::skip_right_brace(state)?; + utils::skip_right_brace(state)?; - Ok(Statement::Block { body }) - } + Ok(Statement::Block { body }) +} - pub(in crate::parser) fn body( - &self, - state: &mut State, - until: &TokenKind, - ) -> ParseResult { - state.skip_comments(); +pub fn body(state: &mut State, until: &TokenKind) -> ParseResult { + state.skip_comments(); - let mut block = Block::new(); + let mut block = Block::new(); - while !state.is_eof() && &state.current.kind != until { - if let TokenKind::OpenTag(_) = state.current.kind { - state.next(); - continue; - } - - block.push(self.statement(state)?); - state.skip_comments(); + while !state.is_eof() && &state.current.kind != until { + if let TokenKind::OpenTag(_) = state.current.kind { + state.next(); + continue; } - Ok(block) + block.push(parser::statement(state)?); + state.skip_comments(); } + + Ok(block) } diff --git a/src/parser/internal/classish.rs b/src/parser/internal/classish.rs index 42c5e01..ad547f5 100644 --- a/src/parser/internal/classish.rs +++ b/src/parser/internal/classish.rs @@ -7,239 +7,171 @@ use crate::parser::ast::identifiers::Identifier; use crate::parser::ast::Expression; use crate::parser::ast::Statement; use crate::parser::error::ParseResult; +use crate::parser::internal::attributes; +use crate::parser::internal::classish_statements; use crate::parser::internal::identifiers; use crate::parser::internal::modifiers; +use crate::parser::internal::parameters; use crate::parser::internal::utils; use crate::parser::state::Scope; use crate::parser::state::State; -use crate::parser::Parser; use crate::scoped; -impl Parser { - pub(in crate::parser) fn class_definition(&self, state: &mut State) -> ParseResult { - let modifiers = modifiers::class_group(modifiers::collect(state)?)?; +pub fn class_definition(state: &mut State) -> ParseResult { + let modifiers = modifiers::class_group(modifiers::collect(state)?)?; - utils::skip(state, TokenKind::Class)?; + utils::skip(state, TokenKind::Class)?; - let name = identifiers::ident(state)?; + let name = identifiers::ident(state)?; - let mut has_parent = false; - let mut extends: Option = None; + let mut has_parent = false; + let mut extends: Option = None; - if state.current.kind == TokenKind::Extends { - state.next(); - extends = Some(identifiers::full_name(state)?); - has_parent = true; + if state.current.kind == TokenKind::Extends { + state.next(); + extends = Some(identifiers::full_name(state)?); + has_parent = true; + } + + let implements = if state.current.kind == TokenKind::Implements { + state.next(); + + at_least_one_comma_separated::(state, &identifiers::full_name)? + } else { + Vec::new() + }; + + let attributes = state.get_attributes(); + utils::skip_left_brace(state)?; + + let body = scoped!( + state, + Scope::Class(name.clone(), modifiers.clone(), has_parent), + { + let mut body = Vec::new(); + while state.current.kind != TokenKind::RightBrace { + state.gather_comments(); + + if state.current.kind == TokenKind::RightBrace { + state.clear_comments(); + break; + } + + body.push(classish_statements::class_like_statement(state)?); + } + + body } + ); - let implements = if state.current.kind == TokenKind::Implements { + utils::skip_right_brace(state)?; + + Ok(Statement::Class { + name, + attributes, + extends, + implements, + body, + modifiers, + }) +} + +pub fn interface_definition(state: &mut State) -> ParseResult { + utils::skip(state, TokenKind::Interface)?; + + let name = identifiers::ident(state)?; + + scoped!(state, Scope::Interface(name.clone()), { + let extends = if state.current.kind == TokenKind::Extends { state.next(); - self.at_least_one_comma_separated::(state, &|_, state| { + at_least_one_comma_separated::(state, &|state| { identifiers::full_name(state) })? } else { Vec::new() }; - let attributes = state.get_attributes(); utils::skip_left_brace(state)?; - let body = scoped!( - state, - Scope::Class(name.clone(), modifiers.clone(), has_parent), - { - let mut body = Vec::new(); - while state.current.kind != TokenKind::RightBrace { - state.gather_comments(); + let attributes = state.get_attributes(); - if state.current.kind == TokenKind::RightBrace { - state.clear_comments(); - break; - } + let mut body = Vec::new(); + while state.current.kind != TokenKind::RightBrace && !state.is_eof() { + state.gather_comments(); - body.push(self.class_like_statement(state)?); - } - - body + if state.current.kind == TokenKind::RightBrace { + state.clear_comments(); + break; } - ); + body.push(classish_statements::interface_statement(state)?); + } utils::skip_right_brace(state)?; - Ok(Statement::Class { + Ok(Statement::Interface { name, attributes, extends, - implements, body, - modifiers, }) - } + }) +} - pub(in crate::parser) fn interface_definition( - &self, - state: &mut State, - ) -> ParseResult { - utils::skip(state, TokenKind::Interface)?; +pub fn trait_definition(state: &mut State) -> ParseResult { + utils::skip(state, TokenKind::Trait)?; - let name = identifiers::ident(state)?; + let name = identifiers::ident(state)?; - scoped!(state, Scope::Interface(name.clone()), { - let extends = if state.current.kind == TokenKind::Extends { - state.next(); + scoped!(state, Scope::Trait(name.clone()), { + utils::skip_left_brace(state)?; - self.at_least_one_comma_separated::(state, &|_, state| { - identifiers::full_name(state) - })? - } else { - Vec::new() - }; + let attributes = state.get_attributes(); - utils::skip_left_brace(state)?; + let mut body = Vec::new(); + while state.current.kind != TokenKind::RightBrace && !state.is_eof() { + state.gather_comments(); - let attributes = state.get_attributes(); - - let mut body = Vec::new(); - while state.current.kind != TokenKind::RightBrace && !state.is_eof() { - state.gather_comments(); - - if state.current.kind == TokenKind::RightBrace { - state.clear_comments(); - break; - } - - body.push(self.interface_statement(state)?); + if state.current.kind == TokenKind::RightBrace { + state.clear_comments(); + break; } - utils::skip_right_brace(state)?; - Ok(Statement::Interface { - name, - attributes, - extends, - body, - }) - }) - } - - pub(in crate::parser) fn trait_definition(&self, state: &mut State) -> ParseResult { - utils::skip(state, TokenKind::Trait)?; - - let name = identifiers::ident(state)?; - - scoped!(state, Scope::Trait(name.clone()), { - utils::skip_left_brace(state)?; - - let attributes = state.get_attributes(); - - let mut body = Vec::new(); - while state.current.kind != TokenKind::RightBrace && !state.is_eof() { - state.gather_comments(); - - if state.current.kind == TokenKind::RightBrace { - state.clear_comments(); - break; - } - - body.push(self.class_like_statement(state)?); - } - utils::skip_right_brace(state)?; - - Ok(Statement::Trait { - name, - attributes, - body, - }) - }) - } - - pub(in crate::parser) fn anonymous_class_definition( - &self, - state: &mut State, - ) -> ParseResult { - utils::skip(state, TokenKind::New)?; - - self.gather_attributes(state)?; - - utils::skip(state, TokenKind::Class)?; - - let mut args = vec![]; - - if state.current.kind == TokenKind::LeftParen { - args = self.args_list(state)?; + body.push(classish_statements::class_like_statement(state)?); } + utils::skip_right_brace(state)?; - let mut has_parent = false; - let mut extends: Option = None; - - if state.current.kind == TokenKind::Extends { - state.next(); - extends = Some(identifiers::full_name(state)?); - has_parent = true; - } - - scoped!(state, Scope::AnonymousClass(has_parent), { - let mut implements = Vec::new(); - if state.current.kind == TokenKind::Implements { - state.next(); - - while state.current.kind != TokenKind::LeftBrace { - implements.push(identifiers::full_name(state)?); - - if state.current.kind == TokenKind::Comma { - state.next(); - } else { - break; - } - } - } - - utils::skip_left_brace(state)?; - - let attributes = state.get_attributes(); - - let mut body = Vec::new(); - while state.current.kind != TokenKind::RightBrace && !state.is_eof() { - body.push(self.class_like_statement(state)?); - } - - utils::skip_right_brace(state)?; - - Ok(Expression::New { - target: Box::new(Expression::AnonymousClass { - attributes, - extends, - implements, - body, - }), - args, - }) + Ok(Statement::Trait { + name, + attributes, + body, }) + }) +} + +pub fn anonymous_class_definition(state: &mut State) -> ParseResult { + utils::skip(state, TokenKind::New)?; + + attributes::gather_attributes(state)?; + + utils::skip(state, TokenKind::Class)?; + + let mut args = vec![]; + + if state.current.kind == TokenKind::LeftParen { + args = parameters::args_list(state)?; } - pub(in crate::parser) fn enum_definition(&self, state: &mut State) -> ParseResult { - let start = state.current.span; + let mut has_parent = false; + let mut extends: Option = None; - utils::skip(state, TokenKind::Enum)?; - - let name = identifiers::ident(state)?; - - let backed_type: Option = if state.current.kind == TokenKind::Colon { - utils::colon(state)?; - - expect_token!([ - TokenKind::Identifier(s) if s == b"string" || s == b"int" => { - Some(match &s[..] { - b"string" => BackedEnumType::String, - b"int" => BackedEnumType::Int, - _ => unreachable!(), - }) - }, - ], state, ["`string`", "`int`",]) - } else { - None - }; + if state.current.kind == TokenKind::Extends { + state.next(); + extends = Some(identifiers::full_name(state)?); + has_parent = true; + } + scoped!(state, Scope::AnonymousClass(has_parent), { let mut implements = Vec::new(); if state.current.kind == TokenKind::Implements { state.next(); @@ -255,72 +187,131 @@ impl Parser { } } + utils::skip_left_brace(state)?; + let attributes = state.get_attributes(); - if let Some(backed_type) = backed_type { - let (members, end) = scoped!(state, Scope::Enum(name.clone(), true), { - utils::skip_left_brace(state)?; - // TODO(azjezz): we know members might have corrupted start span, we could updated it here? - // as we know the correct start span is `state.current.span`. - let mut members = Vec::new(); - while state.current.kind != TokenKind::RightBrace { - state.skip_comments(); - members.push(self.backed_enum_member(state)?); - } - - let end = utils::skip_right_brace(state)?; - - (members, end) - }); - - Ok(Statement::BackedEnum(BackedEnum { - start, - end, - name, - backed_type, - attributes, - implements, - members, - })) - } else { - let (members, end) = scoped!(state, Scope::Enum(name.clone(), false), { - utils::skip_left_brace(state)?; - - let mut members = Vec::new(); - while state.current.kind != TokenKind::RightBrace { - state.skip_comments(); - members.push(self.unit_enum_member(state)?); - } - - (members, utils::skip_right_brace(state)?) - }); - - Ok(Statement::UnitEnum(UnitEnum { - start, - end, - name, - attributes, - implements, - members, - })) + let mut body = Vec::new(); + while state.current.kind != TokenKind::RightBrace && !state.is_eof() { + body.push(classish_statements::class_like_statement(state)?); } - } - fn at_least_one_comma_separated( - &self, - state: &mut State, - func: &(dyn Fn(&Parser, &mut State) -> ParseResult), - ) -> ParseResult> { - let mut result: Vec = vec![]; - loop { - result.push(func(self, state)?); - if state.current.kind != TokenKind::Comma { + utils::skip_right_brace(state)?; + + Ok(Expression::New { + target: Box::new(Expression::AnonymousClass { + attributes, + extends, + implements, + body, + }), + args, + }) + }) +} + +pub fn enum_definition(state: &mut State) -> ParseResult { + let start = state.current.span; + + utils::skip(state, TokenKind::Enum)?; + + let name = identifiers::ident(state)?; + + let backed_type: Option = if state.current.kind == TokenKind::Colon { + utils::skip_colon(state)?; + + expect_token!([ + TokenKind::Identifier(s) if s == b"string" || s == b"int" => { + Some(match &s[..] { + b"string" => BackedEnumType::String, + b"int" => BackedEnumType::Int, + _ => unreachable!(), + }) + }, + ], state, ["`string`", "`int`",]) + } else { + None + }; + + let mut implements = Vec::new(); + if state.current.kind == TokenKind::Implements { + state.next(); + + while state.current.kind != TokenKind::LeftBrace { + implements.push(identifiers::full_name(state)?); + + if state.current.kind == TokenKind::Comma { + state.next(); + } else { break; } - - state.next(); } + } - Ok(result) + let attributes = state.get_attributes(); + if let Some(backed_type) = backed_type { + let (members, end) = scoped!(state, Scope::Enum(name.clone(), true), { + utils::skip_left_brace(state)?; + + // TODO(azjezz): we know members might have corrupted start span, we could updated it here? + // as we know the correct start span is `state.current.span`. + let mut members = Vec::new(); + while state.current.kind != TokenKind::RightBrace { + state.skip_comments(); + members.push(classish_statements::backed_enum_member(state)?); + } + + let end = utils::skip_right_brace(state)?; + + (members, end) + }); + + Ok(Statement::BackedEnum(BackedEnum { + start, + end, + name, + backed_type, + attributes, + implements, + members, + })) + } else { + let (members, end) = scoped!(state, Scope::Enum(name.clone(), false), { + utils::skip_left_brace(state)?; + + let mut members = Vec::new(); + while state.current.kind != TokenKind::RightBrace { + state.skip_comments(); + members.push(classish_statements::unit_enum_member(state)?); + } + + (members, utils::skip_right_brace(state)?) + }); + + Ok(Statement::UnitEnum(UnitEnum { + start, + end, + name, + attributes, + implements, + members, + })) } } + +fn at_least_one_comma_separated( + state: &mut State, + func: &(dyn Fn(&mut State) -> ParseResult), +) -> ParseResult> { + let mut result: Vec = vec![]; + loop { + result.push(func(state)?); + if state.current.kind != TokenKind::Comma { + break; + } + + state.next(); + } + + Ok(result) +} diff --git a/src/parser/internal/classish_statements.rs b/src/parser/internal/classish_statements.rs index 58eb173..507ca28 100644 --- a/src/parser/internal/classish_statements.rs +++ b/src/parser/internal/classish_statements.rs @@ -2,6 +2,7 @@ use crate::expect_token; use crate::expected_scope; use crate::lexer::token::Span; use crate::lexer::token::TokenKind; +use crate::parser; use crate::parser::ast::classish::ClassishConstant; use crate::parser::ast::enums::BackedEnumCase; use crate::parser::ast::enums::BackedEnumMember; @@ -14,291 +15,277 @@ use crate::parser::ast::Statement; use crate::parser::ast::TraitAdaptation; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; +use crate::parser::internal::attributes; use crate::parser::internal::data_type; +use crate::parser::internal::functions; use crate::parser::internal::identifiers; use crate::parser::internal::modifiers; use crate::parser::internal::precedences::Precedence; use crate::parser::internal::utils; use crate::parser::state::Scope; use crate::parser::state::State; -use crate::parser::Parser; use crate::peek_token; -impl Parser { - pub(in crate::parser) fn interface_statement( - &self, - state: &mut State, - ) -> ParseResult { - let has_attributes = self.gather_attributes(state)?; - let start = state.current.span; - let modifiers = modifiers::collect(state)?; +pub fn interface_statement(state: &mut State) -> ParseResult { + let has_attributes = attributes::gather_attributes(state)?; + let start = state.current.span; + let modifiers = modifiers::collect(state)?; - // if we have attributes, don't check const, we need a method. - if has_attributes || state.current.kind == TokenKind::Function { - Ok(Statement::Method(self.method( - state, - modifiers::interface_method_group(modifiers)?, - start, - )?)) - } else { - Ok(Statement::ClassishConstant(self.constant( - state, - modifiers::interface_constant_group(modifiers)?, - start, - )?)) - } + // if we have attributes, don't check const, we need a method. + if has_attributes || state.current.kind == TokenKind::Function { + Ok(Statement::Method(functions::method( + state, + modifiers::interface_method_group(modifiers)?, + start, + )?)) + } else { + Ok(Statement::ClassishConstant(constant( + state, + modifiers::interface_constant_group(modifiers)?, + start, + )?)) } +} - pub(in crate::parser) fn unit_enum_member( - &self, - state: &mut State, - ) -> ParseResult { - let enum_name = expected_scope!([ +pub fn unit_enum_member(state: &mut State) -> ParseResult { + let enum_name = expected_scope!([ Scope::Enum(enum_name, _) => enum_name, ], state); - let has_attributes = self.gather_attributes(state)?; - - if !has_attributes && state.current.kind == TokenKind::Case { - let start = state.current.span; - state.next(); - - let name = identifiers::ident(state)?; - - if state.current.kind == TokenKind::Equals { - return Err(ParseError::CaseValueForUnitEnum( - name.to_string(), - state.named(&enum_name), - state.current.span, - )); - } - - let end = utils::skip_semicolon(state)?; - - return Ok(UnitEnumMember::Case(UnitEnumCase { start, end, name })); - } + let has_attributes = attributes::gather_attributes(state)?; + if !has_attributes && state.current.kind == TokenKind::Case { let start = state.current.span; - let modifiers = modifiers::collect(state)?; + state.next(); - // if we have attributes, don't check const, we need a method. - if has_attributes || state.current.kind == TokenKind::Function { - Ok(UnitEnumMember::Method(self.method( - state, - modifiers::enum_method_group(modifiers)?, - start, - )?)) - } else { - Ok(UnitEnumMember::Constant(self.constant( - state, - modifiers::constant_group(modifiers)?, - start, - )?)) + let name = identifiers::ident(state)?; + + if state.current.kind == TokenKind::Equals { + return Err(ParseError::CaseValueForUnitEnum( + name.to_string(), + state.named(&enum_name), + state.current.span, + )); } + + let end = utils::skip_semicolon(state)?; + + return Ok(UnitEnumMember::Case(UnitEnumCase { start, end, name })); } - pub(in crate::parser) fn backed_enum_member( - &self, - state: &mut State, - ) -> ParseResult { - let enum_name = expected_scope!([ + let start = state.current.span; + let modifiers = modifiers::collect(state)?; + + // if we have attributes, don't check const, we need a method. + if has_attributes || state.current.kind == TokenKind::Function { + Ok(UnitEnumMember::Method(functions::method( + state, + modifiers::enum_method_group(modifiers)?, + start, + )?)) + } else { + Ok(UnitEnumMember::Constant(constant( + state, + modifiers::constant_group(modifiers)?, + start, + )?)) + } +} + +pub fn backed_enum_member(state: &mut State) -> ParseResult { + let enum_name = expected_scope!([ Scope::Enum(enum_name, _) => enum_name, ], state); - let has_attributes = self.gather_attributes(state)?; - - if !has_attributes && state.current.kind == TokenKind::Case { - let start = state.current.span; - state.next(); - - let name = identifiers::ident(state)?; - - if state.current.kind == TokenKind::SemiColon { - return Err(ParseError::MissingCaseValueForBackedEnum( - name.to_string(), - state.named(&enum_name), - state.current.span, - )); - } - - utils::skip(state, TokenKind::Equals)?; - - let value = self.expression(state, Precedence::Lowest)?; - - let end = utils::skip_semicolon(state)?; - - return Ok(BackedEnumMember::Case(BackedEnumCase { - start, - end, - name, - value, - })); - } + let has_attributes = attributes::gather_attributes(state)?; + if !has_attributes && state.current.kind == TokenKind::Case { let start = state.current.span; - let modifiers = modifiers::collect(state)?; + state.next(); - // if we have attributes, don't check const, we need a method. - if has_attributes || state.current.kind == TokenKind::Function { - Ok(BackedEnumMember::Method(self.method( - state, - modifiers::enum_method_group(modifiers)?, - start, - )?)) - } else { - Ok(BackedEnumMember::Constant(self.constant( - state, - modifiers::constant_group(modifiers)?, - start, - )?)) + let name = identifiers::ident(state)?; + + if state.current.kind == TokenKind::SemiColon { + return Err(ParseError::MissingCaseValueForBackedEnum( + name.to_string(), + state.named(&enum_name), + state.current.span, + )); } + + utils::skip(state, TokenKind::Equals)?; + + let value = parser::expression(state, Precedence::Lowest)?; + + let end = utils::skip_semicolon(state)?; + + return Ok(BackedEnumMember::Case(BackedEnumCase { + start, + end, + name, + value, + })); } - pub(in crate::parser) fn class_like_statement( - &self, - state: &mut State, - ) -> ParseResult { - let has_attributes = self.gather_attributes(state)?; + let start = state.current.span; + let modifiers = modifiers::collect(state)?; - let start = state.current.span; - let modifiers = modifiers::collect(state)?; + // if we have attributes, don't check const, we need a method. + if has_attributes || state.current.kind == TokenKind::Function { + Ok(BackedEnumMember::Method(functions::method( + state, + modifiers::enum_method_group(modifiers)?, + start, + )?)) + } else { + Ok(BackedEnumMember::Constant(constant( + state, + modifiers::constant_group(modifiers)?, + start, + )?)) + } +} - if !has_attributes { - if state.current.kind == TokenKind::Use { - return self.parse_classish_uses(state); - } +pub fn class_like_statement(state: &mut State) -> ParseResult { + let has_attributes = attributes::gather_attributes(state)?; - if state.current.kind == TokenKind::Const { - return Ok(Statement::ClassishConstant(self.constant( - state, - modifiers::constant_group(modifiers)?, - start, - )?)); - } + let start = state.current.span; + let modifiers = modifiers::collect(state)?; + + if !has_attributes { + if state.current.kind == TokenKind::Use { + return parse_classish_uses(state); } - if state.current.kind == TokenKind::Function { - return Ok(Statement::Method(self.method( + if state.current.kind == TokenKind::Const { + return Ok(Statement::ClassishConstant(constant( state, - modifiers::method_group(modifiers)?, + modifiers::constant_group(modifiers)?, start, )?)); } + } - // e.g: public static - let modifiers = modifiers::property_group(modifiers)?; - // e.g: string - let ty = data_type::optional_data_type(state)?; - // e.g: $name - let var = identifiers::var(state)?; + if state.current.kind == TokenKind::Function { + return Ok(Statement::Method(functions::method( + state, + modifiers::method_group(modifiers)?, + start, + )?)); + } - let mut value = None; - // e.g: = "foo"; - if state.current.kind == TokenKind::Equals { - state.next(); - value = Some(self.expression(state, Precedence::Lowest)?); - } + // e.g: public static + let modifiers = modifiers::property_group(modifiers)?; + // e.g: string + let ty = data_type::optional_data_type(state)?; + // e.g: $name + let var = identifiers::var(state)?; - let class_name: String = expected_scope!([ + let mut value = None; + // e.g: = "foo"; + if state.current.kind == TokenKind::Equals { + state.next(); + value = Some(parser::expression(state, Precedence::Lowest)?); + } + + let class_name: String = expected_scope!([ Scope::Trait(name) | Scope::Class(name, _, _) => state.named(&name), Scope::AnonymousClass(_) => state.named("class@anonymous"), ], state); - if modifiers.has_readonly() { - if modifiers.has_static() { - return Err(ParseError::StaticPropertyUsingReadonlyModifier( - class_name, - var.to_string(), - state.current.span, - )); - } - - if value.is_some() { - return Err(ParseError::ReadonlyPropertyHasDefaultValue( - class_name, - var.to_string(), - state.current.span, - )); - } + if modifiers.has_readonly() { + if modifiers.has_static() { + return Err(ParseError::StaticPropertyUsingReadonlyModifier( + class_name, + var.to_string(), + state.current.span, + )); } - match &ty { - Some(ty) => { - if ty.includes_callable() || ty.is_bottom() { - return Err(ParseError::ForbiddenTypeUsedInProperty( - class_name, - var.to_string(), - ty.clone(), - state.current.span, - )); - } - } - None => { - if modifiers.has_readonly() { - return Err(ParseError::MissingTypeForReadonlyProperty( - class_name, - var.to_string(), - state.current.span, - )); - } - } + if value.is_some() { + return Err(ParseError::ReadonlyPropertyHasDefaultValue( + class_name, + var.to_string(), + state.current.span, + )); } - - utils::skip_semicolon(state)?; - - Ok(Statement::Property { - var, - value, - r#type: ty, - modifiers, - attributes: state.get_attributes(), - }) } - fn parse_classish_uses(&self, state: &mut State) -> ParseResult { - state.next(); - - let mut traits = Vec::new(); - - while state.current.kind != TokenKind::SemiColon - && state.current.kind != TokenKind::LeftBrace - { - let t = identifiers::full_name(state)?; - traits.push(t); - - if state.current.kind == TokenKind::Comma { - if state.peek.kind == TokenKind::SemiColon { - // will fail with unexpected token `,` - // as `use` doesn't allow for trailing commas. - utils::skip_semicolon(state)?; - } else if state.peek.kind == TokenKind::LeftBrace { - // will fail with unexpected token `{` - // as `use` doesn't allow for trailing commas. - utils::skip_left_brace(state)?; - } else { - state.next(); - } - } else { - break; + match &ty { + Some(ty) => { + if ty.includes_callable() || ty.is_bottom() { + return Err(ParseError::ForbiddenTypeUsedInProperty( + class_name, + var.to_string(), + ty.clone(), + state.current.span, + )); } } + None => { + if modifiers.has_readonly() { + return Err(ParseError::MissingTypeForReadonlyProperty( + class_name, + var.to_string(), + state.current.span, + )); + } + } + } - let mut adaptations = Vec::new(); - if state.current.kind == TokenKind::LeftBrace { - utils::skip_left_brace(state)?; + utils::skip_semicolon(state)?; - while state.current.kind != TokenKind::RightBrace { - let (r#trait, method): (Option, Identifier) = match state.peek.kind { - TokenKind::DoubleColon => { - let r#trait = identifiers::full_name(state)?; - state.next(); - let method = identifiers::ident(state)?; - (Some(r#trait), method) - } - _ => (None, identifiers::ident(state)?), - }; + Ok(Statement::Property { + var, + value, + r#type: ty, + modifiers, + attributes: state.get_attributes(), + }) +} - expect_token!([ +fn parse_classish_uses(state: &mut State) -> ParseResult { + state.next(); + + let mut traits = Vec::new(); + + while state.current.kind != TokenKind::SemiColon && state.current.kind != TokenKind::LeftBrace { + let t = identifiers::full_name(state)?; + traits.push(t); + + if state.current.kind == TokenKind::Comma { + if state.peek.kind == TokenKind::SemiColon { + // will fail with unexpected token `,` + // as `use` doesn't allow for trailing commas. + utils::skip_semicolon(state)?; + } else if state.peek.kind == TokenKind::LeftBrace { + // will fail with unexpected token `{` + // as `use` doesn't allow for trailing commas. + utils::skip_left_brace(state)?; + } else { + state.next(); + } + } else { + break; + } + } + + let mut adaptations = Vec::new(); + if state.current.kind == TokenKind::LeftBrace { + utils::skip_left_brace(state)?; + + while state.current.kind != TokenKind::RightBrace { + let (r#trait, method): (Option, Identifier) = match state.peek.kind { + TokenKind::DoubleColon => { + let r#trait = identifiers::full_name(state)?; + state.next(); + let method = identifiers::ident(state)?; + (Some(r#trait), method) + } + _ => (None, identifiers::ident(state)?), + }; + + expect_token!([ TokenKind::As => { match state.current.kind { TokenKind::Public | TokenKind::Protected | TokenKind::Private => { @@ -383,42 +370,40 @@ impl Parser { } ], state, ["`as`", "`insteadof`"]); - utils::skip_semicolon(state)?; - } - - utils::skip_right_brace(state)?; - } else { utils::skip_semicolon(state)?; } - Ok(Statement::TraitUse { - traits, - adaptations, - }) + utils::skip_right_brace(state)?; + } else { + utils::skip_semicolon(state)?; } - fn constant( - &self, - state: &mut State, - modifiers: ConstantModifierGroup, - start: Span, - ) -> ParseResult { - state.next(); - - let name = identifiers::ident(state)?; - - utils::skip(state, TokenKind::Equals)?; - - let value = self.expression(state, Precedence::Lowest)?; - - let end = utils::skip_semicolon(state)?; - - Ok(ClassishConstant { - start, - end, - name, - value, - modifiers, - }) - } + Ok(Statement::TraitUse { + traits, + adaptations, + }) +} + +fn constant( + state: &mut State, + modifiers: ConstantModifierGroup, + start: Span, +) -> ParseResult { + state.next(); + + let name = identifiers::ident(state)?; + + utils::skip(state, TokenKind::Equals)?; + + let value = parser::expression(state, Precedence::Lowest)?; + + let end = utils::skip_semicolon(state)?; + + Ok(ClassishConstant { + start, + end, + name, + value, + modifiers, + }) } diff --git a/src/parser/internal/control_flow.rs b/src/parser/internal/control_flow.rs index f8e0396..a8a3509 100644 --- a/src/parser/internal/control_flow.rs +++ b/src/parser/internal/control_flow.rs @@ -1,108 +1,212 @@ use crate::expected_token_err; use crate::lexer::token::TokenKind; +use crate::parser; use crate::parser::ast::Block; use crate::parser::ast::Case; +use crate::parser::ast::DefaultMatchArm; use crate::parser::ast::ElseIf; +use crate::parser::ast::Expression; +use crate::parser::ast::MatchArm; use crate::parser::ast::Statement; +use crate::parser::error::ParseError; use crate::parser::error::ParseResult; +use crate::parser::internal::blocks; use crate::parser::internal::precedences::Precedence; use crate::parser::internal::utils; use crate::parser::state::State; -use crate::parser::Parser; -impl Parser { - pub(in crate::parser) fn switch_statement(&self, state: &mut State) -> ParseResult { - utils::skip(state, TokenKind::Switch)?; +pub fn match_expression(state: &mut State) -> ParseResult { + utils::skip(state, TokenKind::Match)?; - utils::skip_left_parenthesis(state)?; + utils::skip_left_parenthesis(state)?; - let condition = self.expression(state, Precedence::Lowest)?; + let condition = Box::new(parser::expression(state, Precedence::Lowest)?); - utils::skip_right_parenthesis(state)?; + utils::skip_right_parenthesis(state)?; + utils::skip_left_brace(state)?; - let end_token = if state.current.kind == TokenKind::Colon { - utils::colon(state)?; - TokenKind::EndSwitch + let mut default = None; + let mut arms = Vec::new(); + while state.current.kind != TokenKind::RightBrace { + state.skip_comments(); + + if state.current.kind == TokenKind::Default { + if default.is_some() { + return Err(ParseError::MatchExpressionWithMultipleDefaultArms( + state.current.span, + )); + } + + state.next(); + + // match conditions can have an extra comma at the end, including `default`. + if state.current.kind == TokenKind::Comma { + state.next(); + } + + utils::skip_double_arrow(state)?; + + let body = parser::expression(state, Precedence::Lowest)?; + + default = Some(Box::new(DefaultMatchArm { body })); } else { - utils::skip_left_brace(state)?; - TokenKind::RightBrace - }; + let mut conditions = Vec::new(); + while state.current.kind != TokenKind::DoubleArrow { + conditions.push(parser::expression(state, Precedence::Lowest)?); - let mut cases = Vec::new(); - while state.current.kind != end_token { - match state.current.kind { - TokenKind::Case => { + if state.current.kind == TokenKind::Comma { state.next(); - - let condition = self.expression(state, Precedence::Lowest)?; - - utils::skip_any_of(state, &[TokenKind::Colon, TokenKind::SemiColon])?; - - let mut body = Block::new(); - - while state.current.kind != TokenKind::Case - && state.current.kind != TokenKind::Default - && state.current.kind != TokenKind::RightBrace - { - body.push(self.statement(state)?); - state.skip_comments(); - } - - cases.push(Case { - condition: Some(condition), - body, - }); - } - TokenKind::Default => { - state.next(); - - utils::skip_any_of(state, &[TokenKind::Colon, TokenKind::SemiColon])?; - - let mut body = Block::new(); - - while state.current.kind != TokenKind::Case - && state.current.kind != TokenKind::Default - && state.current.kind != TokenKind::RightBrace - { - body.push(self.statement(state)?); - } - - cases.push(Case { - condition: None, - body, - }); - } - _ => { - return expected_token_err!(["`case`", "`default`"], state); + } else { + break; } } + + if !conditions.is_empty() { + utils::skip_double_arrow(state)?; + } else { + break; + } + + let body = parser::expression(state, Precedence::Lowest)?; + + arms.push(MatchArm { conditions, body }); } - if end_token == TokenKind::EndSwitch { - utils::skip(state, TokenKind::EndSwitch)?; - utils::skip_semicolon(state)?; + if state.current.kind == TokenKind::Comma { + state.next(); } else { - utils::skip_right_brace(state)?; + break; } - - Ok(Statement::Switch { condition, cases }) } - pub(in crate::parser) fn if_statement(&self, state: &mut State) -> ParseResult { - utils::skip(state, TokenKind::If)?; + utils::skip_right_brace(state)?; - utils::skip_left_parenthesis(state)?; + Ok(Expression::Match { + condition, + default, + arms, + }) +} - let condition = self.expression(state, Precedence::Lowest)?; +pub fn switch_statement(state: &mut State) -> ParseResult { + utils::skip(state, TokenKind::Switch)?; - utils::skip_right_parenthesis(state)?; + utils::skip_left_parenthesis(state)?; - // FIXME: Tidy up duplication and make the intent a bit clearer. + let condition = parser::expression(state, Precedence::Lowest)?; + + utils::skip_right_parenthesis(state)?; + + let end_token = if state.current.kind == TokenKind::Colon { + utils::skip_colon(state)?; + TokenKind::EndSwitch + } else { + utils::skip_left_brace(state)?; + TokenKind::RightBrace + }; + + let mut cases = Vec::new(); + while state.current.kind != end_token { match state.current.kind { - TokenKind::Colon => { - utils::colon(state)?; + TokenKind::Case => { + state.next(); - let mut then = vec![]; + let condition = parser::expression(state, Precedence::Lowest)?; + + utils::skip_any_of(state, &[TokenKind::Colon, TokenKind::SemiColon])?; + + let mut body = Block::new(); + + while state.current.kind != TokenKind::Case + && state.current.kind != TokenKind::Default + && state.current.kind != TokenKind::RightBrace + { + body.push(parser::statement(state)?); + state.skip_comments(); + } + + cases.push(Case { + condition: Some(condition), + body, + }); + } + TokenKind::Default => { + state.next(); + + utils::skip_any_of(state, &[TokenKind::Colon, TokenKind::SemiColon])?; + + let mut body = Block::new(); + + while state.current.kind != TokenKind::Case + && state.current.kind != TokenKind::Default + && state.current.kind != TokenKind::RightBrace + { + body.push(parser::statement(state)?); + } + + cases.push(Case { + condition: None, + body, + }); + } + _ => { + return expected_token_err!(["`case`", "`default`"], state); + } + } + } + + if end_token == TokenKind::EndSwitch { + utils::skip(state, TokenKind::EndSwitch)?; + utils::skip_semicolon(state)?; + } else { + utils::skip_right_brace(state)?; + } + + Ok(Statement::Switch { condition, cases }) +} + +pub fn if_statement(state: &mut State) -> ParseResult { + utils::skip(state, TokenKind::If)?; + + utils::skip_left_parenthesis(state)?; + + let condition = parser::expression(state, Precedence::Lowest)?; + + utils::skip_right_parenthesis(state)?; + + // FIXME: Tidy up duplication and make the intent a bit clearer. + match state.current.kind { + TokenKind::Colon => { + utils::skip_colon(state)?; + + let mut then = vec![]; + while !matches!( + state.current.kind, + TokenKind::ElseIf | TokenKind::Else | TokenKind::EndIf + ) { + if let TokenKind::OpenTag(_) = state.current.kind { + state.next(); + continue; + } + + then.push(parser::statement(state)?); + } + + let mut else_ifs = vec![]; + loop { + if state.current.kind != TokenKind::ElseIf { + break; + } + + state.next(); + + utils::skip_left_parenthesis(state)?; + let condition = parser::expression(state, Precedence::Lowest)?; + utils::skip_right_parenthesis(state)?; + + utils::skip_colon(state)?; + + let mut body = vec![]; while !matches!( state.current.kind, TokenKind::ElseIf | TokenKind::Else | TokenKind::EndIf @@ -112,122 +216,94 @@ impl Parser { continue; } - then.push(self.statement(state)?); + body.push(parser::statement(state)?); } - let mut else_ifs = vec![]; - loop { - if state.current.kind != TokenKind::ElseIf { - break; - } + else_ifs.push(ElseIf { condition, body }); + } + let mut r#else = None; + if state.current.kind == TokenKind::Else { + state.next(); + utils::skip_colon(state)?; + + let body = blocks::body(state, &TokenKind::EndIf)?; + + r#else = Some(body); + } + + utils::skip(state, TokenKind::EndIf)?; + + utils::skip_semicolon(state)?; + + Ok(Statement::If { + condition, + then, + else_ifs, + r#else, + }) + } + _ => { + let then = if state.current.kind == TokenKind::LeftBrace { + utils::skip_left_brace(state)?; + let then = blocks::body(state, &TokenKind::RightBrace)?; + utils::skip_right_brace(state)?; + then + } else { + vec![parser::statement(state)?] + }; + + let mut else_ifs: Vec = Vec::new(); + loop { + if state.current.kind == TokenKind::ElseIf { state.next(); utils::skip_left_parenthesis(state)?; - let condition = self.expression(state, Precedence::Lowest)?; + + let condition = parser::expression(state, Precedence::Lowest)?; + utils::skip_right_parenthesis(state)?; - utils::colon(state)?; + utils::skip_left_brace(state)?; - let mut body = vec![]; - while !matches!( - state.current.kind, - TokenKind::ElseIf | TokenKind::Else | TokenKind::EndIf - ) { - if let TokenKind::OpenTag(_) = state.current.kind { - state.next(); - continue; - } + let body = blocks::body(state, &TokenKind::RightBrace)?; - body.push(self.statement(state)?); - } + utils::skip_right_brace(state)?; else_ifs.push(ElseIf { condition, body }); + } else { + break; } + } - let mut r#else = None; - if state.current.kind == TokenKind::Else { - state.next(); - utils::colon(state)?; - - let body = self.body(state, &TokenKind::EndIf)?; - - r#else = Some(body); - } - - utils::skip(state, TokenKind::EndIf)?; - - utils::skip_semicolon(state)?; - - Ok(Statement::If { + if state.current.kind != TokenKind::Else { + return Ok(Statement::If { condition, then, else_ifs, - r#else, - }) + r#else: None, + }); } - _ => { - let then = if state.current.kind == TokenKind::LeftBrace { - utils::skip_left_brace(state)?; - let then = self.body(state, &TokenKind::RightBrace)?; - utils::skip_right_brace(state)?; - then - } else { - vec![self.statement(state)?] - }; - let mut else_ifs: Vec = Vec::new(); - loop { - if state.current.kind == TokenKind::ElseIf { - state.next(); + utils::skip(state, TokenKind::Else)?; - utils::skip_left_parenthesis(state)?; + let r#else; + if state.current.kind == TokenKind::LeftBrace { + utils::skip_left_brace(state)?; - let condition = self.expression(state, Precedence::Lowest)?; + r#else = blocks::body(state, &TokenKind::RightBrace)?; - utils::skip_right_parenthesis(state)?; - - utils::skip_left_brace(state)?; - - let body = self.body(state, &TokenKind::RightBrace)?; - - utils::skip_right_brace(state)?; - - else_ifs.push(ElseIf { condition, body }); - } else { - break; - } - } - - if state.current.kind != TokenKind::Else { - return Ok(Statement::If { - condition, - then, - else_ifs, - r#else: None, - }); - } - - utils::skip(state, TokenKind::Else)?; - - let r#else; - if state.current.kind == TokenKind::LeftBrace { - utils::skip_left_brace(state)?; - - r#else = self.body(state, &TokenKind::RightBrace)?; - - utils::skip_right_brace(state)?; - } else { - r#else = vec![self.statement(state)?]; - } - - Ok(Statement::If { - condition, - then, - else_ifs, - r#else: Some(r#else), - }) + utils::skip_right_brace(state)?; + } else { + r#else = vec![parser::statement(state)?]; } + + Ok(Statement::If { + condition, + then, + else_ifs, + r#else: Some(r#else), + }) } } } diff --git a/src/parser/internal/functions.rs b/src/parser/internal/functions.rs index fdfd1a2..2aa2b40 100644 --- a/src/parser/internal/functions.rs +++ b/src/parser/internal/functions.rs @@ -1,6 +1,7 @@ use crate::expected_scope; use crate::lexer::token::Span; use crate::lexer::token::TokenKind; +use crate::parser; use crate::parser::ast::functions::ArrowFunction; use crate::parser::ast::functions::Closure; use crate::parser::ast::functions::ClosureUse; @@ -12,239 +13,235 @@ use crate::parser::ast::Expression; use crate::parser::ast::Statement; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; +use crate::parser::internal::blocks; use crate::parser::internal::data_type; use crate::parser::internal::identifiers; +use crate::parser::internal::parameters; use crate::parser::internal::precedences::Precedence; use crate::parser::internal::utils; use crate::parser::state::Scope; use crate::parser::state::State; -use crate::parser::Parser; use crate::scoped; -impl Parser { - pub(in crate::parser) fn anonymous_function( - &self, - state: &mut State, - ) -> ParseResult { - let start = state.current.span; +pub fn anonymous_function(state: &mut State) -> ParseResult { + let start = state.current.span; - let is_static = if state.current.kind == TokenKind::Static { - state.next(); + let is_static = if state.current.kind == TokenKind::Static { + state.next(); - true - } else { - false - }; + true + } else { + false + }; - utils::skip(state, TokenKind::Function)?; + utils::skip(state, TokenKind::Function)?; - let by_ref = if state.current.kind == TokenKind::Ampersand { - state.next(); - true - } else { - false - }; + let by_ref = if state.current.kind == TokenKind::Ampersand { + state.next(); + true + } else { + false + }; - let attributes = state.get_attributes(); - let parameters = self.function_parameter_list(state)?; + let attributes = state.get_attributes(); + let parameters = parameters::function_parameter_list(state)?; - let mut uses = vec![]; - if state.current.kind == TokenKind::Use { - state.next(); + let mut uses = vec![]; + if state.current.kind == TokenKind::Use { + state.next(); - utils::skip_left_parenthesis(state)?; + utils::skip_left_parenthesis(state)?; - while state.current.kind != TokenKind::RightParen { - let mut by_ref = false; - if state.current.kind == TokenKind::Ampersand { - state.next(); + while state.current.kind != TokenKind::RightParen { + let mut by_ref = false; + if state.current.kind == TokenKind::Ampersand { + state.next(); - by_ref = true; - } - - // TODO(azjezz): this shouldn't call expr, we should have a function - // just for variables, so we don't have to go through the whole `match` in `expression(...)` - let var = match self.expression(state, Precedence::Lowest)? { - s @ Expression::Variable { .. } => ClosureUse { var: s, by_ref }, - _ => { - return Err(ParseError::UnexpectedToken( - "expected variable".into(), - state.current.span, - )) - } - }; - - uses.push(var); - - if state.current.kind == TokenKind::Comma { - state.next(); - } else { - break; - } + by_ref = true; } - utils::skip_right_parenthesis(state)?; - } + // TODO(azjezz): this shouldn't call expr, we should have a function + // just for variables, so we don't have to go through the whole `match` in `expression(...)` + let var = match parser::expression(state, Precedence::Lowest)? { + s @ Expression::Variable { .. } => ClosureUse { var: s, by_ref }, + _ => { + return Err(ParseError::UnexpectedToken( + "expected variable".into(), + state.current.span, + )) + } + }; - let mut return_ty = None; - if state.current.kind == TokenKind::Colon { - utils::colon(state)?; + uses.push(var); - return_ty = Some(data_type::data_type(state)?); - } - - let (body, end) = scoped!(state, Scope::AnonymousFunction(is_static), { - utils::skip_left_brace(state)?; - - let body = self.body(state, &TokenKind::RightBrace)?; - let end = utils::skip_right_brace(state)?; - - (body, end) - }); - - Ok(Expression::Closure(Closure { - start, - end, - attributes, - parameters, - uses, - return_ty, - body, - r#static: is_static, - by_ref, - })) - } - - pub(in crate::parser) fn arrow_function(&self, state: &mut State) -> ParseResult { - let start = state.current.span; - - let is_static = if state.current.kind == TokenKind::Static { - state.next(); - - true - } else { - false - }; - - utils::skip(state, TokenKind::Fn)?; - - let by_ref = if state.current.kind == TokenKind::Ampersand { - state.next(); - true - } else { - false - }; - - let attributes = state.get_attributes(); - let parameters = self.function_parameter_list(state)?; - - let mut return_type = None; - if state.current.kind == TokenKind::Colon { - utils::colon(state)?; - - return_type = Some(data_type::data_type(state)?); - } - - utils::skip(state, TokenKind::DoubleArrow)?; - - let body = scoped!(state, Scope::ArrowFunction(is_static), { - Box::new(self.expression(state, Precedence::Lowest)?) - }); - - let end = state.current.span; - - Ok(Expression::ArrowFunction(ArrowFunction { - start, - end, - attributes, - parameters, - return_type, - body, - by_ref, - r#static: is_static, - })) - } - - pub(in crate::parser) fn function(&self, state: &mut State) -> ParseResult { - let start = state.current.span; - - utils::skip(state, TokenKind::Function)?; - - let by_ref = if state.current.kind == TokenKind::Ampersand { - state.next(); - true - } else { - false - }; - - let name = if state.current.kind == TokenKind::Null { - let start = state.current.span; - let end = (start.0, start.1 + 4); - - state.next(); - - Identifier { - start, - name: "null".into(), - end, + if state.current.kind == TokenKind::Comma { + state.next(); + } else { + break; } - } else { - identifiers::ident(state)? - }; - - // get attributes before processing parameters, otherwise - // parameters will steal attributes of this function. - let attributes = state.get_attributes(); - - let parameters = self.function_parameter_list(state)?; - - let mut return_type = None; - - if state.current.kind == TokenKind::Colon { - utils::colon(state)?; - - return_type = Some(data_type::data_type(state)?); } - let (body, end) = scoped!(state, Scope::Function(name.clone()), { - utils::skip_left_brace(state)?; - - let body = self.body(state, &TokenKind::RightBrace)?; - let end = utils::skip_right_brace(state)?; - - (body, end) - }); - - Ok(Statement::Function(Function { - start, - end, - name, - attributes, - parameters, - return_type, - body, - by_ref, - })) + utils::skip_right_parenthesis(state)?; } - pub(in crate::parser) fn method( - &self, - state: &mut State, - modifiers: MethodModifierGroup, - start: Span, - ) -> ParseResult { - utils::skip(state, TokenKind::Function)?; + let mut return_ty = None; + if state.current.kind == TokenKind::Colon { + utils::skip_colon(state)?; - let by_ref = if state.current.kind == TokenKind::Ampersand { - state.next(); - true - } else { - false - }; + return_ty = Some(data_type::data_type(state)?); + } - let name = identifiers::ident_maybe_reserved(state)?; + let (body, end) = scoped!(state, Scope::AnonymousFunction(is_static), { + utils::skip_left_brace(state)?; - let has_body = expected_scope!([ + let body = blocks::body(state, &TokenKind::RightBrace)?; + let end = utils::skip_right_brace(state)?; + + (body, end) + }); + + Ok(Expression::Closure(Closure { + start, + end, + attributes, + parameters, + uses, + return_ty, + body, + r#static: is_static, + by_ref, + })) +} + +pub fn arrow_function(state: &mut State) -> ParseResult { + let start = state.current.span; + + let is_static = if state.current.kind == TokenKind::Static { + state.next(); + + true + } else { + false + }; + + utils::skip(state, TokenKind::Fn)?; + + let by_ref = if state.current.kind == TokenKind::Ampersand { + state.next(); + true + } else { + false + }; + + let attributes = state.get_attributes(); + let parameters = parameters::function_parameter_list(state)?; + + let mut return_type = None; + if state.current.kind == TokenKind::Colon { + utils::skip_colon(state)?; + + return_type = Some(data_type::data_type(state)?); + } + + utils::skip(state, TokenKind::DoubleArrow)?; + + let body = scoped!(state, Scope::ArrowFunction(is_static), { + Box::new(parser::expression(state, Precedence::Lowest)?) + }); + + let end = state.current.span; + + Ok(Expression::ArrowFunction(ArrowFunction { + start, + end, + attributes, + parameters, + return_type, + body, + by_ref, + r#static: is_static, + })) +} + +pub fn function(state: &mut State) -> ParseResult { + let start = state.current.span; + + utils::skip(state, TokenKind::Function)?; + + let by_ref = if state.current.kind == TokenKind::Ampersand { + state.next(); + true + } else { + false + }; + + let name = if state.current.kind == TokenKind::Null { + let start = state.current.span; + let end = (start.0, start.1 + 4); + + state.next(); + + Identifier { + start, + name: "null".into(), + end, + } + } else { + identifiers::ident(state)? + }; + + // get attributes before processing parameters, otherwise + // parameters will steal attributes of this function. + let attributes = state.get_attributes(); + + let parameters = parameters::function_parameter_list(state)?; + + let mut return_type = None; + + if state.current.kind == TokenKind::Colon { + utils::skip_colon(state)?; + + return_type = Some(data_type::data_type(state)?); + } + + let (body, end) = scoped!(state, Scope::Function(name.clone()), { + utils::skip_left_brace(state)?; + + let body = blocks::body(state, &TokenKind::RightBrace)?; + let end = utils::skip_right_brace(state)?; + + (body, end) + }); + + Ok(Statement::Function(Function { + start, + end, + name, + attributes, + parameters, + return_type, + body, + by_ref, + })) +} + +pub fn method( + state: &mut State, + modifiers: MethodModifierGroup, + start: Span, +) -> ParseResult { + utils::skip(state, TokenKind::Function)?; + + let by_ref = if state.current.kind == TokenKind::Ampersand { + state.next(); + true + } else { + false + }; + + let name = identifiers::ident_maybe_reserved(state)?; + + let has_body = expected_scope!([ Scope::Class(_, class_modifiers, _) => { if !class_modifiers.has_abstract() && modifiers.has_abstract() { return Err(ParseError::AbstractModifierOnNonAbstractClassMethod( @@ -269,47 +266,46 @@ impl Parser { Scope::AnonymousClass(_) => true, ], state); - // get attributes before processing parameters, otherwise - // parameters will steal attributes of this method. - let attributes = state.get_attributes(); + // get attributes before processing parameters, otherwise + // parameters will steal attributes of this method. + let attributes = state.get_attributes(); - let (parameters, body, return_type, end) = - scoped!(state, Scope::Method(name.clone(), modifiers.clone()), { - let parameters = self.method_parameter_list(state)?; + let (parameters, body, return_type, end) = + scoped!(state, Scope::Method(name.clone(), modifiers.clone()), { + let parameters = parameters::method_parameter_list(state)?; - let mut return_type = None; + let mut return_type = None; - if state.current.kind == TokenKind::Colon { - utils::colon(state)?; + if state.current.kind == TokenKind::Colon { + utils::skip_colon(state)?; - return_type = Some(data_type::data_type(state)?); - } + return_type = Some(data_type::data_type(state)?); + } - if !has_body { - let end = utils::skip_semicolon(state)?; + if !has_body { + let end = utils::skip_semicolon(state)?; - (parameters, None, return_type, end) - } else { - utils::skip_left_brace(state)?; + (parameters, None, return_type, end) + } else { + utils::skip_left_brace(state)?; - let body = self.body(state, &TokenKind::RightBrace)?; + let body = blocks::body(state, &TokenKind::RightBrace)?; - let end = utils::skip_right_brace(state)?; + let end = utils::skip_right_brace(state)?; - (parameters, Some(body), return_type, end) - } - }); + (parameters, Some(body), return_type, end) + } + }); - Ok(Method { - start, - end, - attributes, - name, - parameters, - body, - return_type, - by_ref, - modifiers, - }) - } + Ok(Method { + start, + end, + attributes, + name, + parameters, + body, + return_type, + by_ref, + modifiers, + }) } diff --git a/src/parser/internal/goto.rs b/src/parser/internal/goto.rs new file mode 100644 index 0000000..60eb058 --- /dev/null +++ b/src/parser/internal/goto.rs @@ -0,0 +1,24 @@ +use crate::lexer::token::TokenKind; +use crate::parser::ast::Statement; +use crate::parser::error::ParseResult; +use crate::parser::internal::identifiers; +use crate::parser::internal::utils; +use crate::parser::state::State; + +pub fn label_statement(state: &mut State) -> ParseResult { + let label = identifiers::ident(state)?; + + utils::skip_colon(state)?; + + Ok(Statement::Label { label }) +} + +pub fn goto_statement(state: &mut State) -> ParseResult { + utils::skip(state, TokenKind::Goto)?; + + let label = identifiers::ident(state)?; + + utils::skip_semicolon(state)?; + + Ok(Statement::Goto { label }) +} diff --git a/src/parser/internal/loops.rs b/src/parser/internal/loops.rs index e8b4322..492c07e 100644 --- a/src/parser/internal/loops.rs +++ b/src/parser/internal/loops.rs @@ -1,229 +1,225 @@ use crate::lexer::token::TokenKind; +use crate::parser; use crate::parser::ast::Statement; use crate::parser::error::ParseResult; +use crate::parser::internal::blocks; use crate::parser::internal::precedences::Precedence; use crate::parser::internal::utils; use crate::parser::state::State; -use crate::parser::Parser; -impl Parser { - pub(in crate::parser) fn foreach_loop(&self, state: &mut State) -> ParseResult { - utils::skip(state, TokenKind::Foreach)?; +pub fn foreach_loop(state: &mut State) -> ParseResult { + utils::skip(state, TokenKind::Foreach)?; - utils::skip_left_parenthesis(state)?; + utils::skip_left_parenthesis(state)?; - let expr = self.expression(state, Precedence::Lowest)?; + let expr = parser::expression(state, Precedence::Lowest)?; - utils::skip(state, TokenKind::As)?; + utils::skip(state, TokenKind::As)?; - let mut by_ref = state.current.kind == TokenKind::Ampersand; + let mut by_ref = state.current.kind == TokenKind::Ampersand; + if by_ref { + state.next(); + } + + let mut key_var = None; + let mut value_var = parser::expression(state, Precedence::Lowest)?; + + if state.current.kind == TokenKind::DoubleArrow { + state.next(); + + key_var = Some(value_var.clone()); + + by_ref = state.current.kind == TokenKind::Ampersand; if by_ref { state.next(); } - let mut key_var = None; - let mut value_var = self.expression(state, Precedence::Lowest)?; - - if state.current.kind == TokenKind::DoubleArrow { - state.next(); - - key_var = Some(value_var.clone()); - - by_ref = state.current.kind == TokenKind::Ampersand; - if by_ref { - state.next(); - } - - value_var = self.expression(state, Precedence::Lowest)?; - } - - utils::skip_right_parenthesis(state)?; - - let end_token = if state.current.kind == TokenKind::Colon { - utils::colon(state)?; - TokenKind::EndForeach - } else { - utils::skip_left_brace(state)?; - TokenKind::RightBrace - }; - - let body = self.body(state, &end_token)?; - - if end_token == TokenKind::EndForeach { - utils::skip(state, TokenKind::EndForeach)?; - utils::skip_semicolon(state)?; - } else { - utils::skip_right_brace(state)?; - } - - Ok(Statement::Foreach { - expr, - by_ref, - key_var, - value_var, - body, - }) + value_var = parser::expression(state, Precedence::Lowest)?; } - pub(in crate::parser) fn for_loop(&self, state: &mut State) -> ParseResult { - utils::skip(state, TokenKind::For)?; - - utils::skip_left_parenthesis(state)?; - - let mut init = Vec::new(); - loop { - if state.current.kind == TokenKind::SemiColon { - break; - } - - init.push(self.expression(state, Precedence::Lowest)?); - - if state.current.kind == TokenKind::Comma { - state.next(); - } else { - break; - } - } - - utils::skip_semicolon(state)?; - - let mut condition = Vec::new(); - loop { - if state.current.kind == TokenKind::SemiColon { - break; - } - - condition.push(self.expression(state, Precedence::Lowest)?); - - if state.current.kind == TokenKind::Comma { - state.next(); - } else { - break; - } - } - utils::skip_semicolon(state)?; - - let mut r#loop = Vec::new(); - loop { - if state.current.kind == TokenKind::RightParen { - break; - } - - r#loop.push(self.expression(state, Precedence::Lowest)?); - - if state.current.kind == TokenKind::Comma { - state.next(); - } else { - break; - } - } - - utils::skip_right_parenthesis(state)?; - - let end_token = if state.current.kind == TokenKind::Colon { - utils::colon(state)?; - TokenKind::EndFor - } else { - utils::skip_left_brace(state)?; - TokenKind::RightBrace - }; - - let then = self.body(state, &end_token)?; - - if end_token == TokenKind::EndFor { - utils::skip(state, TokenKind::EndFor)?; - utils::skip_semicolon(state)?; - } else { - utils::skip_right_brace(state)?; - }; - - Ok(Statement::For { - init, - condition, - r#loop, - then, - }) - } - - pub(in crate::parser) fn do_loop(&self, state: &mut State) -> ParseResult { - utils::skip(state, TokenKind::Do)?; + utils::skip_right_parenthesis(state)?; + let end_token = if state.current.kind == TokenKind::Colon { + utils::skip_colon(state)?; + TokenKind::EndForeach + } else { utils::skip_left_brace(state)?; - let body = self.body(state, &TokenKind::RightBrace)?; - utils::skip_right_brace(state)?; + TokenKind::RightBrace + }; - utils::skip(state, TokenKind::While)?; + let body = blocks::body(state, &end_token)?; - utils::skip_left_parenthesis(state)?; - let condition = self.expression(state, Precedence::Lowest)?; - utils::skip_right_parenthesis(state)?; + if end_token == TokenKind::EndForeach { + utils::skip(state, TokenKind::EndForeach)?; utils::skip_semicolon(state)?; - - Ok(Statement::DoWhile { condition, body }) + } else { + utils::skip_right_brace(state)?; } - pub(in crate::parser) fn while_loop(&self, state: &mut State) -> ParseResult { - utils::skip(state, TokenKind::While)?; + Ok(Statement::Foreach { + expr, + by_ref, + key_var, + value_var, + body, + }) +} - utils::skip_left_parenthesis(state)?; +pub fn for_loop(state: &mut State) -> ParseResult { + utils::skip(state, TokenKind::For)?; - let condition = self.expression(state, Precedence::Lowest)?; + utils::skip_left_parenthesis(state)?; - utils::skip_right_parenthesis(state)?; + let mut init = Vec::new(); + loop { + if state.current.kind == TokenKind::SemiColon { + break; + } - let body = if state.current.kind == TokenKind::SemiColon { - utils::skip_semicolon(state)?; - vec![] + init.push(parser::expression(state, Precedence::Lowest)?); + + if state.current.kind == TokenKind::Comma { + state.next(); } else { - let end_token = if state.current.kind == TokenKind::Colon { - utils::colon(state)?; - TokenKind::EndWhile - } else { - utils::skip_left_brace(state)?; - TokenKind::RightBrace - }; + break; + } + } - let body = self.body(state, &end_token)?; + utils::skip_semicolon(state)?; - if end_token == TokenKind::RightBrace { - utils::skip_right_brace(state)?; - } else { - utils::skip(state, TokenKind::EndWhile)?; - utils::skip_semicolon(state)?; - } + let mut condition = Vec::new(); + loop { + if state.current.kind == TokenKind::SemiColon { + break; + } - body + condition.push(parser::expression(state, Precedence::Lowest)?); + + if state.current.kind == TokenKind::Comma { + state.next(); + } else { + break; + } + } + utils::skip_semicolon(state)?; + + let mut r#loop = Vec::new(); + loop { + if state.current.kind == TokenKind::RightParen { + break; + } + + r#loop.push(parser::expression(state, Precedence::Lowest)?); + + if state.current.kind == TokenKind::Comma { + state.next(); + } else { + break; + } + } + + utils::skip_right_parenthesis(state)?; + + let end_token = if state.current.kind == TokenKind::Colon { + utils::skip_colon(state)?; + TokenKind::EndFor + } else { + utils::skip_left_brace(state)?; + TokenKind::RightBrace + }; + + let then = blocks::body(state, &end_token)?; + + if end_token == TokenKind::EndFor { + utils::skip(state, TokenKind::EndFor)?; + utils::skip_semicolon(state)?; + } else { + utils::skip_right_brace(state)?; + }; + + Ok(Statement::For { + init, + condition, + r#loop, + then, + }) +} + +pub fn do_loop(state: &mut State) -> ParseResult { + utils::skip(state, TokenKind::Do)?; + + utils::skip_left_brace(state)?; + let body = blocks::body(state, &TokenKind::RightBrace)?; + utils::skip_right_brace(state)?; + + utils::skip(state, TokenKind::While)?; + + utils::skip_left_parenthesis(state)?; + let condition = parser::expression(state, Precedence::Lowest)?; + utils::skip_right_parenthesis(state)?; + utils::skip_semicolon(state)?; + + Ok(Statement::DoWhile { condition, body }) +} + +pub fn while_loop(state: &mut State) -> ParseResult { + utils::skip(state, TokenKind::While)?; + + utils::skip_left_parenthesis(state)?; + + let condition = parser::expression(state, Precedence::Lowest)?; + + utils::skip_right_parenthesis(state)?; + + let body = if state.current.kind == TokenKind::SemiColon { + utils::skip_semicolon(state)?; + vec![] + } else { + let end_token = if state.current.kind == TokenKind::Colon { + utils::skip_colon(state)?; + TokenKind::EndWhile + } else { + utils::skip_left_brace(state)?; + TokenKind::RightBrace }; - Ok(Statement::While { condition, body }) - } + let body = blocks::body(state, &end_token)?; - pub(in crate::parser) fn continue_statement( - &self, - state: &mut State, - ) -> ParseResult { - utils::skip(state, TokenKind::Continue)?; - - let mut num = None; - if state.current.kind != TokenKind::SemiColon { - num = Some(self.expression(state, Precedence::Lowest)?); + if end_token == TokenKind::RightBrace { + utils::skip_right_brace(state)?; + } else { + utils::skip(state, TokenKind::EndWhile)?; + utils::skip_semicolon(state)?; } - utils::skip_semicolon(state)?; + body + }; - Ok(Statement::Continue { num }) - } - - pub(in crate::parser) fn break_statement(&self, state: &mut State) -> ParseResult { - utils::skip(state, TokenKind::Break)?; - - let mut num = None; - if state.current.kind != TokenKind::SemiColon { - num = Some(self.expression(state, Precedence::Lowest)?); - } - - utils::skip_semicolon(state)?; - - Ok(Statement::Break { num }) - } + Ok(Statement::While { condition, body }) +} + +pub fn continue_statement(state: &mut State) -> ParseResult { + utils::skip(state, TokenKind::Continue)?; + + let mut num = None; + if state.current.kind != TokenKind::SemiColon { + num = Some(parser::expression(state, Precedence::Lowest)?); + } + + utils::skip_semicolon(state)?; + + Ok(Statement::Continue { num }) +} + +pub fn break_statement(state: &mut State) -> ParseResult { + utils::skip(state, TokenKind::Break)?; + + let mut num = None; + if state.current.kind != TokenKind::SemiColon { + num = Some(parser::expression(state, Precedence::Lowest)?); + } + + utils::skip_semicolon(state)?; + + Ok(Statement::Break { num }) } diff --git a/src/parser/internal/mod.rs b/src/parser/internal/mod.rs index f25d05f..d8f2ffe 100644 --- a/src/parser/internal/mod.rs +++ b/src/parser/internal/mod.rs @@ -6,6 +6,7 @@ pub(in crate::parser) mod classish_statements; pub(in crate::parser) mod control_flow; pub(in crate::parser) mod data_type; pub(in crate::parser) mod functions; +pub(in crate::parser) mod goto; pub(in crate::parser) mod identifiers; pub(in crate::parser) mod loops; pub(in crate::parser) mod modifiers; @@ -13,5 +14,6 @@ pub(in crate::parser) mod namespaces; pub(in crate::parser) mod parameters; pub(in crate::parser) mod precedences; pub(in crate::parser) mod try_block; +pub(in crate::parser) mod uses; pub(in crate::parser) mod utils; pub(in crate::parser) mod variables; diff --git a/src/parser/internal/namespaces.rs b/src/parser/internal/namespaces.rs index b1ccccc..3912013 100644 --- a/src/parser/internal/namespaces.rs +++ b/src/parser/internal/namespaces.rs @@ -1,4 +1,5 @@ use crate::lexer::token::TokenKind; +use crate::parser; use crate::parser::ast::identifiers::Identifier; use crate::parser::ast::Block; use crate::parser::ast::Statement; @@ -9,72 +10,65 @@ use crate::parser::internal::utils; use crate::parser::state::NamespaceType; use crate::parser::state::Scope; use crate::parser::state::State; -use crate::parser::Parser; use crate::scoped; -impl Parser { - pub(in crate::parser) fn namespace(&self, state: &mut State) -> ParseResult { - state.next(); +pub fn namespace(state: &mut State) -> ParseResult { + state.next(); - let name = identifiers::optional_name(state); + let name = identifiers::optional_name(state); - if let Some(name) = &name { - if state.current.kind != TokenKind::LeftBrace { - if let Some(NamespaceType::Braced) = state.namespace_type() { - return Err(ParseError::MixingBracedAndUnBracedNamespaceDeclarations( - state.current.span, - )); - } - - return self.unbraced_namespace(state, name.clone()); + if let Some(name) = &name { + if state.current.kind != TokenKind::LeftBrace { + if let Some(NamespaceType::Braced) = state.namespace_type() { + return Err(ParseError::MixingBracedAndUnBracedNamespaceDeclarations( + state.current.span, + )); } - } - match state.namespace_type() { - Some(NamespaceType::Unbraced) => Err( - ParseError::MixingBracedAndUnBracedNamespaceDeclarations(state.current.span), - ), - Some(NamespaceType::Braced) if state.namespace().is_some() => { - Err(ParseError::NestedNamespaceDeclarations(state.current.span)) - } - _ => self.braced_namespace(state, name), + return unbraced_namespace(state, name.clone()); } } - fn unbraced_namespace(&self, state: &mut State, name: Identifier) -> ParseResult { - let body = scoped!(state, Scope::Namespace(name.clone()), { - let mut body = Block::new(); - // since this is an unbraced namespace, as soon as we encouter another - // `namespace` token as a top level statement, this namespace scope ends. - // otherwise we will end up with nested namespace statements. - while state.current.kind != TokenKind::Namespace && !state.is_eof() { - body.push(self.top_level_statement(state)?); - } - - body - }); - - Ok(Statement::Namespace { name, body }) - } - - fn braced_namespace( - &self, - state: &mut State, - name: Option, - ) -> ParseResult { - utils::skip_left_brace(state)?; - - let body = scoped!(state, Scope::BracedNamespace(name.clone()), { - let mut body = Block::new(); - while state.current.kind != TokenKind::RightBrace && !state.is_eof() { - body.push(self.top_level_statement(state)?); - } - - body - }); - - utils::skip_right_brace(state)?; - - Ok(Statement::BracedNamespace { name, body }) + match state.namespace_type() { + Some(NamespaceType::Unbraced) => Err( + ParseError::MixingBracedAndUnBracedNamespaceDeclarations(state.current.span), + ), + Some(NamespaceType::Braced) if state.namespace().is_some() => { + Err(ParseError::NestedNamespaceDeclarations(state.current.span)) + } + _ => braced_namespace(state, name), } } + +fn unbraced_namespace(state: &mut State, name: Identifier) -> ParseResult { + let body = scoped!(state, Scope::Namespace(name.clone()), { + let mut body = Block::new(); + // since this is an unbraced namespace, as soon as we encouter another + // `namespace` token as a top level statement, this namespace scope ends. + // otherwise we will end up with nested namespace statements. + while state.current.kind != TokenKind::Namespace && !state.is_eof() { + body.push(parser::top_level_statement(state)?); + } + + body + }); + + Ok(Statement::Namespace { name, body }) +} + +fn braced_namespace(state: &mut State, name: Option) -> ParseResult { + utils::skip_left_brace(state)?; + + let body = scoped!(state, Scope::BracedNamespace(name.clone()), { + let mut body = Block::new(); + while state.current.kind != TokenKind::RightBrace && !state.is_eof() { + body.push(parser::top_level_statement(state)?); + } + + body + }); + + utils::skip_right_brace(state)?; + + Ok(Statement::BracedNamespace { name, body }) +} diff --git a/src/parser/internal/parameters.rs b/src/parser/internal/parameters.rs index a827826..f16facd 100644 --- a/src/parser/internal/parameters.rs +++ b/src/parser/internal/parameters.rs @@ -1,5 +1,6 @@ use super::identifiers; use crate::lexer::token::TokenKind; +use crate::parser; use crate::parser::ast::functions::FunctionParameter; use crate::parser::ast::functions::FunctionParameterList; use crate::parser::ast::functions::MethodParameter; @@ -8,298 +9,290 @@ use crate::parser::ast::Arg; use crate::parser::ast::Expression; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; +use crate::parser::internal::attributes; use crate::parser::internal::data_type; use crate::parser::internal::modifiers; use crate::parser::internal::precedences::Precedence; use crate::parser::internal::utils; use crate::parser::state::Scope; use crate::parser::state::State; -use crate::parser::Parser; -impl Parser { - pub(in crate::parser) fn function_parameter_list( - &self, - state: &mut State, - ) -> Result { - let mut members = Vec::new(); +pub fn function_parameter_list(state: &mut State) -> Result { + let mut members = Vec::new(); - let list_start = state.current.span; - utils::skip_left_parenthesis(state)?; + let list_start = state.current.span; + utils::skip_left_parenthesis(state)?; + + state.skip_comments(); + + while !state.is_eof() && state.current.kind != TokenKind::RightParen { + let start = state.current.span; + + attributes::gather_attributes(state)?; + + let ty = data_type::optional_data_type(state)?; + + let mut variadic = false; + let mut by_ref = false; + + if state.current.kind == TokenKind::Ampersand { + state.next(); + by_ref = true; + } + + if state.current.kind == TokenKind::Ellipsis { + state.next(); + + variadic = true; + } + + // 2. Then expect a variable. + let var = identifiers::var(state)?; + + let mut default = None; + if state.current.kind == TokenKind::Equals { + state.next(); + default = Some(parser::expression(state, Precedence::Lowest)?); + } + + let end = state.current.span; + + members.push(FunctionParameter { + start, + end, + name: var, + attributes: state.get_attributes(), + r#type: ty, + variadic, + default, + by_ref, + }); state.skip_comments(); - while !state.is_eof() && state.current.kind != TokenKind::RightParen { - let start = state.current.span; - - self.gather_attributes(state)?; - - let ty = data_type::optional_data_type(state)?; - - let mut variadic = false; - let mut by_ref = false; - - if state.current.kind == TokenKind::Ampersand { - state.next(); - by_ref = true; - } - - if state.current.kind == TokenKind::Ellipsis { - state.next(); - - variadic = true; - } - - // 2. Then expect a variable. - let var = identifiers::var(state)?; - - let mut default = None; - if state.current.kind == TokenKind::Equals { - state.next(); - default = Some(self.expression(state, Precedence::Lowest)?); - } - - let end = state.current.span; - - members.push(FunctionParameter { - start, - end, - name: var, - attributes: state.get_attributes(), - r#type: ty, - variadic, - default, - by_ref, - }); - - state.skip_comments(); - - if state.current.kind == TokenKind::Comma { - state.next(); - } else { - break; - } + if state.current.kind == TokenKind::Comma { + state.next(); + } else { + break; } - - utils::skip_right_parenthesis(state)?; - - let list_end = state.current.span; - - Ok(FunctionParameterList { - start: list_start, - end: list_end, - members, - }) } - /// TODO(azjezz): split this into `method_parameter_list` and `abstract_method_parameter_list`? - /// abstract method parameter list won't have a promoted property, so some of the logic - /// here can be avoided for performance. - pub(in crate::parser) fn method_parameter_list( - &self, - state: &mut State, - ) -> Result { - let mut class_name = String::new(); - let construct: i8 = match state.scope()? { - Scope::Method(name, modifiers) => { - if name.to_string() != "__construct" { - 0 - } else { - match state.parent()? { - // can only have abstract ctor - Scope::Interface(_) => 1, - // can only have concret ctor - Scope::AnonymousClass(_) => { - class_name = state.named("class@anonymous"); + utils::skip_right_parenthesis(state)?; + + let list_end = state.current.span; + + Ok(FunctionParameterList { + start: list_start, + end: list_end, + members, + }) +} + +/// TODO(azjezz): split this into `method_parameter_list` and `abstract_method_parameter_list`? +/// abstract method parameter list won't have a promoted property, so some of the logic +/// here can be avoided for performance. +pub fn method_parameter_list(state: &mut State) -> Result { + let mut class_name = String::new(); + let construct: i8 = match state.scope()? { + Scope::Method(name, modifiers) => { + if name.to_string() != "__construct" { + 0 + } else { + match state.parent()? { + // can only have abstract ctor + Scope::Interface(_) => 1, + // can only have concret ctor + Scope::AnonymousClass(_) => { + class_name = state.named("class@anonymous"); + + 2 + } + // can have either abstract or concret ctor, + // depens on method modifiers. + Scope::Class(name, _, _) | Scope::Trait(name) => { + if modifiers.has_abstract() { + 1 + } else { + class_name = state.named(name); 2 } - // can have either abstract or concret ctor, - // depens on method modifiers. - Scope::Class(name, _, _) | Scope::Trait(name) => { - if modifiers.has_abstract() { - 1 - } else { - class_name = state.named(name); - - 2 - } - } - _ => unreachable!(), } + _ => unreachable!(), } } - scope => unreachable!("shouldn't reach scope `{:?}`", scope), - }; + } + scope => unreachable!("shouldn't reach scope `{:?}`", scope), + }; - let mut members = Vec::new(); + let mut members = Vec::new(); - let list_start = state.current.span; - utils::skip_left_parenthesis(state)?; + let list_start = state.current.span; + utils::skip_left_parenthesis(state)?; - state.skip_comments(); + state.skip_comments(); - while !state.is_eof() && state.current.kind != TokenKind::RightParen { - let start = state.current.span; + while !state.is_eof() && state.current.kind != TokenKind::RightParen { + let start = state.current.span; - self.gather_attributes(state)?; + attributes::gather_attributes(state)?; - let modifiers = modifiers::promoted_property_group(modifiers::collect(state)?)?; + let modifiers = modifiers::promoted_property_group(modifiers::collect(state)?)?; - let ty = data_type::optional_data_type(state)?; + let ty = data_type::optional_data_type(state)?; - let mut variadic = false; - let mut by_ref = false; + let mut variadic = false; + let mut by_ref = false; - if matches!(state.current.kind, TokenKind::Ampersand) { - state.next(); - by_ref = true; - } - - if matches!(state.current.kind, TokenKind::Ellipsis) { - state.next(); - if !modifiers.is_empty() { - return Err(ParseError::VariadicPromotedProperty(state.current.span)); - } - - variadic = true; - } - - // 2. Then expect a variable. - let var = identifiers::var(state)?; + if matches!(state.current.kind, TokenKind::Ampersand) { + state.next(); + by_ref = true; + } + if matches!(state.current.kind, TokenKind::Ellipsis) { + state.next(); if !modifiers.is_empty() { - match construct { - 0 => { - return Err(ParseError::PromotedPropertyOutsideConstructor( + return Err(ParseError::VariadicPromotedProperty(state.current.span)); + } + + variadic = true; + } + + // 2. Then expect a variable. + let var = identifiers::var(state)?; + + if !modifiers.is_empty() { + match construct { + 0 => { + return Err(ParseError::PromotedPropertyOutsideConstructor( + state.current.span, + )); + } + 1 => { + return Err(ParseError::PromotedPropertyOnAbstractConstructor( + state.current.span, + )); + } + _ => {} + } + + match &ty { + Some(ty) => { + if ty.includes_callable() || ty.is_bottom() { + return Err(ParseError::ForbiddenTypeUsedInProperty( + class_name, + var.to_string(), + ty.clone(), state.current.span, )); } - 1 => { - return Err(ParseError::PromotedPropertyOnAbstractConstructor( + } + None => { + if modifiers.has_readonly() { + return Err(ParseError::MissingTypeForReadonlyProperty( + class_name, + var.to_string(), state.current.span, )); } - _ => {} } - - match &ty { - Some(ty) => { - if ty.includes_callable() || ty.is_bottom() { - return Err(ParseError::ForbiddenTypeUsedInProperty( - class_name, - var.to_string(), - ty.clone(), - state.current.span, - )); - } - } - None => { - if modifiers.has_readonly() { - return Err(ParseError::MissingTypeForReadonlyProperty( - class_name, - var.to_string(), - state.current.span, - )); - } - } - } - } - - let mut default = None; - if state.current.kind == TokenKind::Equals { - state.next(); - default = Some(self.expression(state, Precedence::Lowest)?); - } - - let end = state.current.span; - - members.push(MethodParameter { - start, - end, - name: var, - attributes: state.get_attributes(), - r#type: ty, - variadic, - default, - modifiers, - by_ref, - }); - - state.skip_comments(); - - if state.current.kind == TokenKind::Comma { - state.next(); - } else { - break; } } - utils::skip_right_parenthesis(state)?; + let mut default = None; + if state.current.kind == TokenKind::Equals { + state.next(); + default = Some(parser::expression(state, Precedence::Lowest)?); + } - let list_end = state.current.span; + let end = state.current.span; - Ok(MethodParameterList { - start: list_start, - end: list_end, - members, - }) - } + members.push(MethodParameter { + start, + end, + name: var, + attributes: state.get_attributes(), + r#type: ty, + variadic, + default, + modifiers, + by_ref, + }); - pub(in crate::parser) fn args_list(&self, state: &mut State) -> ParseResult> { - utils::skip_left_parenthesis(state)?; state.skip_comments(); - let mut args = Vec::new(); - let mut has_used_named_arguments = false; + if state.current.kind == TokenKind::Comma { + state.next(); + } else { + break; + } + } - while !state.is_eof() && state.current.kind != TokenKind::RightParen { - let mut name = None; - let mut unpack = false; - if (matches!(state.current.kind, TokenKind::Identifier(_)) - || identifiers::is_reserved_ident(&state.current.kind)) - && state.peek.kind == TokenKind::Colon - { - name = Some(identifiers::ident_maybe_reserved(state)?); - has_used_named_arguments = true; - state.next(); - } else if state.current.kind == TokenKind::Ellipsis { - state.next(); - unpack = true; - } + utils::skip_right_parenthesis(state)?; - if name.is_none() && has_used_named_arguments { - return Err(ParseError::CannotUsePositionalArgumentAfterNamedArgument( - state.current.span, - )); - } + let list_end = state.current.span; - if unpack && state.current.kind == TokenKind::RightParen { - args.push(Arg { - name: None, - unpack: false, - value: Expression::VariadicPlaceholder, - }); + Ok(MethodParameterList { + start: list_start, + end: list_end, + members, + }) +} - break; - } +pub fn args_list(state: &mut State) -> ParseResult> { + utils::skip_left_parenthesis(state)?; + state.skip_comments(); - let value = self.expression(state, Precedence::Lowest)?; + let mut args = Vec::new(); + let mut has_used_named_arguments = false; - args.push(Arg { - name, - unpack, - value, - }); - - if state.current.kind == TokenKind::Comma { - state.next(); - } else { - break; - } + while !state.is_eof() && state.current.kind != TokenKind::RightParen { + let mut name = None; + let mut unpack = false; + if (matches!(state.current.kind, TokenKind::Identifier(_)) + || identifiers::is_reserved_ident(&state.current.kind)) + && state.peek.kind == TokenKind::Colon + { + name = Some(identifiers::ident_maybe_reserved(state)?); + has_used_named_arguments = true; + state.next(); + } else if state.current.kind == TokenKind::Ellipsis { + state.next(); + unpack = true; } - utils::skip_right_parenthesis(state)?; + if name.is_none() && has_used_named_arguments { + return Err(ParseError::CannotUsePositionalArgumentAfterNamedArgument( + state.current.span, + )); + } - Ok(args) + if unpack && state.current.kind == TokenKind::RightParen { + args.push(Arg { + name: None, + unpack: false, + value: Expression::VariadicPlaceholder, + }); + + break; + } + + let value = parser::expression(state, Precedence::Lowest)?; + + args.push(Arg { + name, + unpack, + value, + }); + + if state.current.kind == TokenKind::Comma { + state.next(); + } else { + break; + } } + + utils::skip_right_parenthesis(state)?; + + Ok(args) } diff --git a/src/parser/internal/try_block.rs b/src/parser/internal/try_block.rs index ee7a992..712e445 100644 --- a/src/parser/internal/try_block.rs +++ b/src/parser/internal/try_block.rs @@ -1,4 +1,5 @@ use crate::lexer::token::TokenKind; +use crate::parser; use crate::parser::ast::try_block::CatchBlock; use crate::parser::ast::try_block::CatchType; use crate::parser::ast::try_block::FinallyBlock; @@ -6,92 +7,90 @@ use crate::parser::ast::try_block::TryBlock; use crate::parser::ast::Statement; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; +use crate::parser::internal::blocks; use crate::parser::internal::identifiers; use crate::parser::internal::precedences::Precedence; use crate::parser::internal::utils; use crate::parser::state::State; -use crate::parser::Parser; -impl Parser { - pub(in crate::parser) fn try_block(&self, state: &mut State) -> ParseResult { - let start = state.current.span; +pub fn try_block(state: &mut State) -> ParseResult { + let start = state.current.span; + + state.next(); + utils::skip_left_brace(state)?; + + let body = blocks::body(state, &TokenKind::RightBrace)?; + + utils::skip_right_brace(state)?; + + let mut catches = Vec::new(); + loop { + if state.current.kind != TokenKind::Catch { + break; + } + + let catch_start = state.current.span; state.next(); + utils::skip_left_parenthesis(state)?; + + let types = catch_type(state)?; + let var = if state.current.kind == TokenKind::RightParen { + None + } else { + // TODO(azjezz): this is a variable, no an expression? + Some(parser::expression(state, Precedence::Lowest)?) + }; + + utils::skip_right_parenthesis(state)?; utils::skip_left_brace(state)?; - let body = self.body(state, &TokenKind::RightBrace)?; + let catch_body = blocks::body(state, &TokenKind::RightBrace)?; utils::skip_right_brace(state)?; - let mut catches = Vec::new(); - loop { - if state.current.kind != TokenKind::Catch { - break; - } + let catch_end = state.current.span; - let catch_start = state.current.span; - - state.next(); - utils::skip_left_parenthesis(state)?; - - let types = catch_type(state)?; - let var = if state.current.kind == TokenKind::RightParen { - None - } else { - // TODO(azjezz): this is a variable, no an expression? - Some(self.expression(state, Precedence::Lowest)?) - }; - - utils::skip_right_parenthesis(state)?; - utils::skip_left_brace(state)?; - - let catch_body = self.body(state, &TokenKind::RightBrace)?; - - utils::skip_right_brace(state)?; - - let catch_end = state.current.span; - - catches.push(CatchBlock { - start: catch_start, - end: catch_end, - types, - var, - body: catch_body, - }) - } - - let mut finally = None; - if state.current.kind == TokenKind::Finally { - let finally_start = state.current.span; - state.next(); - utils::skip_left_brace(state)?; - - let finally_body = self.body(state, &TokenKind::RightBrace)?; - - utils::skip_right_brace(state)?; - let finally_end = state.current.span; - - finally = Some(FinallyBlock { - start: finally_start, - end: finally_end, - body: finally_body, - }); - } - - if catches.is_empty() && finally.is_none() { - return Err(ParseError::TryWithoutCatchOrFinally(start)); - } - - let end = state.current.span; - - Ok(Statement::Try(TryBlock { - start, - end, - body, - catches, - finally, - })) + catches.push(CatchBlock { + start: catch_start, + end: catch_end, + types, + var, + body: catch_body, + }) } + + let mut finally = None; + if state.current.kind == TokenKind::Finally { + let finally_start = state.current.span; + state.next(); + utils::skip_left_brace(state)?; + + let finally_body = blocks::body(state, &TokenKind::RightBrace)?; + + utils::skip_right_brace(state)?; + let finally_end = state.current.span; + + finally = Some(FinallyBlock { + start: finally_start, + end: finally_end, + body: finally_body, + }); + } + + if catches.is_empty() && finally.is_none() { + return Err(ParseError::TryWithoutCatchOrFinally(start)); + } + + let end = state.current.span; + + Ok(Statement::Try(TryBlock { + start, + end, + body, + catches, + finally, + })) } #[inline(always)] diff --git a/src/parser/internal/uses.rs b/src/parser/internal/uses.rs new file mode 100644 index 0000000..e0cbeff --- /dev/null +++ b/src/parser/internal/uses.rs @@ -0,0 +1,75 @@ +use crate::lexer::token::TokenKind; +use crate::parser::ast::Statement; +use crate::parser::ast::Use; +use crate::parser::ast::UseKind; +use crate::parser::error::ParseResult; +use crate::parser::internal::identifiers; +use crate::parser::internal::utils; +use crate::parser::state::State; + +pub fn use_statement(state: &mut State) -> ParseResult { + state.next(); + + let kind = match state.current.kind { + TokenKind::Function => { + state.next(); + UseKind::Function + } + TokenKind::Const => { + state.next(); + UseKind::Const + } + _ => UseKind::Normal, + }; + + if state.peek.kind == TokenKind::LeftBrace { + let prefix = identifiers::full_name(state)?; + state.next(); + + let mut uses = Vec::new(); + while state.current.kind != TokenKind::RightBrace { + let name = identifiers::full_name(state)?; + let mut alias = None; + + if state.current.kind == TokenKind::As { + state.next(); + alias = Some(identifiers::ident(state)?); + } + + uses.push(Use { name, alias }); + + if state.current.kind == TokenKind::Comma { + state.next(); + continue; + } + } + + utils::skip_right_brace(state)?; + utils::skip_semicolon(state)?; + + Ok(Statement::GroupUse { prefix, kind, uses }) + } else { + let mut uses = Vec::new(); + while !state.is_eof() { + let name = identifiers::full_name(state)?; + let mut alias = None; + + if state.current.kind == TokenKind::As { + state.next(); + alias = Some(identifiers::ident(state)?); + } + + uses.push(Use { name, alias }); + + if state.current.kind == TokenKind::Comma { + state.next(); + continue; + } + + utils::skip_semicolon(state)?; + break; + } + + Ok(Statement::Use { uses, kind }) + } +} diff --git a/src/parser/internal/utils.rs b/src/parser/internal/utils.rs index 22a94c7..8d3a0d8 100644 --- a/src/parser/internal/utils.rs +++ b/src/parser/internal/utils.rs @@ -70,7 +70,7 @@ pub fn skip_double_colon(state: &mut State) -> ParseResult { skip(state, TokenKind::DoubleColon) } -pub fn colon(state: &mut State) -> ParseResult { +pub fn skip_colon(state: &mut State) -> ParseResult { let span = skip(state, TokenKind::Colon)?; // A closing PHP tag is valid after a colon, since // that typically indicates the start of a block (control structures). @@ -81,8 +81,6 @@ pub fn colon(state: &mut State) -> ParseResult { } pub fn skip(state: &mut State, kind: TokenKind) -> ParseResult { - state.skip_comments(); - if state.current.kind == kind { let end = state.current.span; diff --git a/src/parser/internal/variables.rs b/src/parser/internal/variables.rs index a582c9a..e8efdf3 100644 --- a/src/parser/internal/variables.rs +++ b/src/parser/internal/variables.rs @@ -1,36 +1,35 @@ use crate::lexer::token::TokenKind; +use crate::parser; use crate::parser::ast::Expression; use crate::parser::error::ParseResult; use crate::parser::internal::identifiers; use crate::parser::internal::precedences::Precedence; use crate::parser::internal::utils; use crate::parser::state::State; -use crate::parser::Parser; use crate::peek_token; -impl Parser { - pub(in crate::parser) fn dynamic_variable(&self, state: &mut State) -> ParseResult { - state.next(); +pub fn dynamic_variable(state: &mut State) -> ParseResult { + state.next(); - let expr = peek_token!([ - TokenKind::LeftBrace => { - state.next(); + let expr = peek_token!([ + TokenKind::LeftBrace => { + state.next(); - let name = self.expression(state, Precedence::Lowest)?; + // TODO(azjezz): this is not an expression! it's a constant expression + let name = parser::expression(state, Precedence::Lowest)?; - utils::skip_right_brace(state)?; + utils::skip_right_brace(state)?; - Expression::DynamicVariable { - name: Box::new(name), - } - }, - TokenKind::Variable(_) => { - Expression::DynamicVariable { - name: Box::new(Expression::Variable(identifiers::var(state)?)), - } + Expression::DynamicVariable { + name: Box::new(name), } - ], state, ["`{`", "a variable"]); + }, + TokenKind::Variable(_) => { + Expression::DynamicVariable { + name: Box::new(Expression::Variable(identifiers::var(state)?)), + } + } + ], state, ["`{`", "a variable"]); - Ok(expr) - } + Ok(expr) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 81f4a93..72fae65 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8,17 +8,29 @@ use crate::parser::ast::comments::Comment; use crate::parser::ast::comments::CommentFormat; use crate::parser::ast::identifiers::Identifier; use crate::parser::ast::variables::Variable; -use crate::parser::ast::DefaultMatchArm; use crate::parser::ast::{ - ArrayItem, Constant, DeclareItem, Expression, IncludeKind, MagicConst, MatchArm, Program, - Statement, StaticVar, StringPart, Use, UseKind, + Constant, DeclareItem, Expression, IncludeKind, MagicConst, Program, Statement, StaticVar, + StringPart, }; use crate::parser::error::ParseError; use crate::parser::error::ParseResult; +use crate::parser::internal::arrays; +use crate::parser::internal::attributes; +use crate::parser::internal::blocks; +use crate::parser::internal::classish; +use crate::parser::internal::control_flow; +use crate::parser::internal::functions; +use crate::parser::internal::goto; use crate::parser::internal::identifiers; -use crate::parser::internal::identifiers::is_reserved_ident; -use crate::parser::internal::precedences::{Associativity, Precedence}; +use crate::parser::internal::loops; +use crate::parser::internal::namespaces; +use crate::parser::internal::parameters; +use crate::parser::internal::precedences::Associativity; +use crate::parser::internal::precedences::Precedence; +use crate::parser::internal::try_block; +use crate::parser::internal::uses; use crate::parser::internal::utils; +use crate::parser::internal::variables; use crate::parser::state::State; pub mod ast; @@ -28,126 +40,263 @@ mod internal; mod macros; mod state; -#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)] -pub struct Parser; +pub fn parse(tokens: Vec) -> ParseResult { + let mut state = State::new(tokens); -impl Parser { - pub const fn new() -> Self { - Self {} - } + let mut ast = Program::new(); - pub fn parse(&self, tokens: Vec) -> ParseResult { - let mut state = State::new(tokens); - - let mut ast = Program::new(); - - while state.current.kind != TokenKind::Eof { - if matches!( - state.current.kind, - TokenKind::OpenTag(_) | TokenKind::CloseTag - ) { - state.next(); - continue; - } - - state.gather_comments(); - - if state.is_eof() { - break; - } - - ast.push(self.top_level_statement(&mut state)?); - - state.clear_comments(); + while state.current.kind != TokenKind::Eof { + if matches!( + state.current.kind, + TokenKind::OpenTag(_) | TokenKind::CloseTag + ) { + state.next(); + continue; } - Ok(ast.to_vec()) + state.gather_comments(); + + if state.is_eof() { + break; + } + + ast.push(top_level_statement(&mut state)?); + + state.clear_comments(); } - fn top_level_statement(&self, state: &mut State) -> ParseResult { - state.skip_comments(); + Ok(ast.to_vec()) +} - let statement = match &state.current.kind { - TokenKind::Namespace => self.namespace(state)?, - TokenKind::Use => { - state.next(); +fn top_level_statement(state: &mut State) -> ParseResult { + state.skip_comments(); - let kind = match state.current.kind { - TokenKind::Function => { - state.next(); - UseKind::Function - } - TokenKind::Const => { - state.next(); - UseKind::Const - } - _ => UseKind::Normal, - }; + let statement = match &state.current.kind { + TokenKind::Namespace => namespaces::namespace(state)?, + TokenKind::Use => uses::use_statement(state)?, + TokenKind::Const => { + state.next(); - if state.peek.kind == TokenKind::LeftBrace { - let prefix = identifiers::full_name(state)?; + let mut constants = vec![]; + + loop { + let name = identifiers::ident(state)?; + + utils::skip(state, TokenKind::Equals)?; + + let value = expression(state, Precedence::Lowest)?; + + constants.push(Constant { name, value }); + + if state.current.kind == TokenKind::Comma { state.next(); - - let mut uses = Vec::new(); - while state.current.kind != TokenKind::RightBrace { - let name = identifiers::full_name(state)?; - let mut alias = None; - - if state.current.kind == TokenKind::As { - state.next(); - alias = Some(identifiers::ident(state)?); - } - - uses.push(Use { name, alias }); - - if state.current.kind == TokenKind::Comma { - state.next(); - continue; - } - } - - utils::skip_right_brace(state)?; - utils::skip_semicolon(state)?; - - Statement::GroupUse { prefix, kind, uses } } else { - let mut uses = Vec::new(); - while !state.is_eof() { - let name = identifiers::full_name(state)?; - let mut alias = None; - - if state.current.kind == TokenKind::As { - state.next(); - alias = Some(identifiers::ident(state)?); - } - - uses.push(Use { name, alias }); - - if state.current.kind == TokenKind::Comma { - state.next(); - continue; - } - - utils::skip_semicolon(state)?; - break; - } - - Statement::Use { uses, kind } + break; } } - TokenKind::Const => { + + utils::skip_semicolon(state)?; + + Statement::Constant { constants } + } + TokenKind::HaltCompiler => { + state.next(); + + let content = if let TokenKind::InlineHtml(content) = state.current.kind.clone() { state.next(); + Some(content) + } else { + None + }; - let mut constants = vec![]; + Statement::HaltCompiler { content } + } + _ => statement(state)?, + }; + state.clear_comments(); + + // A closing PHP tag is valid after the end of any top-level statement. + if state.current.kind == TokenKind::CloseTag { + state.next(); + } + + Ok(statement) +} + +fn statement(state: &mut State) -> ParseResult { + let has_attributes = attributes::gather_attributes(state)?; + + let statement = if has_attributes { + match &state.current.kind { + TokenKind::Abstract => classish::class_definition(state)?, + TokenKind::Readonly => classish::class_definition(state)?, + TokenKind::Final => classish::class_definition(state)?, + TokenKind::Class => classish::class_definition(state)?, + TokenKind::Interface => classish::interface_definition(state)?, + TokenKind::Trait => classish::trait_definition(state)?, + TokenKind::Enum => classish::enum_definition(state)?, + TokenKind::Function + if matches!( + state.peek.kind, + TokenKind::Identifier(_) | TokenKind::Null | TokenKind::Ampersand + ) => + { + // FIXME: This is incredibly hacky but we don't have a way to look at + // the next N tokens right now. We could probably do with a `peek_buf()` + // method like the Lexer has. + if state.peek.kind == TokenKind::Ampersand { + let mut cloned = state.iter.clone(); + if let Some((index, _)) = state.iter.clone().enumerate().next() { + if !matches!( + cloned.nth(index), + Some(Token { + kind: TokenKind::Identifier(_), + .. + }) + ) { + let expr = expression(state, Precedence::Lowest)?; + + utils::skip_semicolon(state)?; + + return Ok(Statement::Expression { expr }); + } + } + + functions::function(state)? + } else { + functions::function(state)? + } + } + _ => { + // Note, we can get attributes and know their span, maybe use that in the + // error in the future? + return Err(ParseError::ExpectedItemDefinitionAfterAttributes( + state.current.span, + )); + } + } + } else { + match &state.current.kind { + TokenKind::Abstract => classish::class_definition(state)?, + TokenKind::Readonly => classish::class_definition(state)?, + TokenKind::Final => classish::class_definition(state)?, + TokenKind::Class => classish::class_definition(state)?, + TokenKind::Interface => classish::interface_definition(state)?, + TokenKind::Trait => classish::trait_definition(state)?, + TokenKind::Enum => classish::enum_definition(state)?, + TokenKind::Function + if matches!( + state.peek.kind, + TokenKind::Identifier(_) | TokenKind::Null | TokenKind::Ampersand + ) => + { + // FIXME: This is incredibly hacky but we don't have a way to look at + // the next N tokens right now. We could probably do with a `peek_buf()` + // method like the Lexer has. + if state.peek.kind == TokenKind::Ampersand { + if let Some((_, token)) = state.iter.clone().enumerate().next() { + if !matches!( + token, + Token { + kind: TokenKind::Identifier(_), + .. + } + ) { + let expr = expression(state, Precedence::Lowest)?; + + utils::skip_semicolon(state)?; + + return Ok(Statement::Expression { expr }); + } + } + + functions::function(state)? + } else { + functions::function(state)? + } + } + TokenKind::Goto => goto::goto_statement(state)?, + TokenKind::Identifier(_) if state.peek.kind == TokenKind::Colon => { + goto::label_statement(state)? + } + TokenKind::Declare => { + state.next(); + utils::skip_left_parenthesis(state)?; + + let mut declares = Vec::new(); loop { - let name = identifiers::ident(state)?; + let key = identifiers::ident(state)?; utils::skip(state, TokenKind::Equals)?; - let value = self.expression(state, Precedence::Lowest)?; + let value = expect_literal!(state); - constants.push(Constant { name, value }); + declares.push(DeclareItem { key, value }); + + if state.current.kind == TokenKind::Comma { + state.next(); + } else { + break; + } + } + + utils::skip_right_parenthesis(state)?; + + let body = if state.current.kind == TokenKind::LeftBrace { + state.next(); + let b = blocks::body(state, &TokenKind::RightBrace)?; + utils::skip_right_brace(state)?; + b + } else if state.current.kind == TokenKind::Colon { + utils::skip_colon(state)?; + let b = blocks::body(state, &TokenKind::EndDeclare)?; + utils::skip(state, TokenKind::EndDeclare)?; + utils::skip_semicolon(state)?; + b + } else { + utils::skip_semicolon(state)?; + vec![] + }; + + Statement::Declare { declares, body } + } + TokenKind::Global => { + state.next(); + + let mut vars = vec![]; + // `loop` instead of `while` as we don't allow for extra commas. + loop { + vars.push(identifiers::var(state)?); + + if state.current.kind == TokenKind::Comma { + state.next(); + } else { + break; + } + } + + utils::skip_semicolon(state)?; + Statement::Global { vars } + } + TokenKind::Static if matches!(state.peek.kind, TokenKind::Variable(_)) => { + state.next(); + + let mut vars = vec![]; + + // `loop` instead of `while` as we don't allow for extra commas. + loop { + let var = identifiers::var(state)?; + let mut default = None; + + if state.current.kind == TokenKind::Equals { + state.next(); + + default = Some(expression(state, Precedence::Lowest)?); + } + + // TODO: group static vars. + vars.push(StaticVar { var, default }); if state.current.kind == TokenKind::Comma { state.next(); @@ -158,1274 +307,993 @@ impl Parser { utils::skip_semicolon(state)?; - Statement::Constant { constants } + Statement::Static { vars } } - TokenKind::HaltCompiler => { + TokenKind::InlineHtml(html) => { + let s = Statement::InlineHtml(html.clone()); + state.next(); + s + } + TokenKind::SingleLineComment(comment) => { + let start = state.current.span; + let content = comment.clone(); + state.next(); + let end = state.current.span; + let format = CommentFormat::SingleLine; + + Statement::Comment(Comment { + start, + end, + format, + content, + }) + } + TokenKind::MultiLineComment(comment) => { + let start = state.current.span; + let content = comment.clone(); + state.next(); + let end = state.current.span; + let format = CommentFormat::MultiLine; + + Statement::Comment(Comment { + start, + end, + format, + content, + }) + } + TokenKind::HashMarkComment(comment) => { + let start = state.current.span; + let content = comment.clone(); + state.next(); + let end = state.current.span; + let format = CommentFormat::HashMark; + + Statement::Comment(Comment { + start, + end, + format, + content, + }) + } + TokenKind::DocumentComment(comment) => { + let start = state.current.span; + let content = comment.clone(); + state.next(); + let end = state.current.span; + let format = CommentFormat::Document; + + Statement::Comment(Comment { + start, + end, + format, + content, + }) + } + TokenKind::Do => loops::do_loop(state)?, + TokenKind::While => loops::while_loop(state)?, + TokenKind::For => loops::for_loop(state)?, + TokenKind::Foreach => loops::foreach_loop(state)?, + TokenKind::Continue => loops::continue_statement(state)?, + TokenKind::Break => loops::break_statement(state)?, + TokenKind::Switch => control_flow::switch_statement(state)?, + TokenKind::If => control_flow::if_statement(state)?, + TokenKind::Echo => { state.next(); - let content = if let TokenKind::InlineHtml(content) = state.current.kind.clone() { - state.next(); - Some(content) + let mut values = Vec::new(); + loop { + values.push(expression(state, Precedence::Lowest)?); + + if state.current.kind == TokenKind::Comma { + state.next(); + } else { + break; + } + } + + utils::skip_semicolon(state)?; + Statement::Echo { values } + } + TokenKind::Return => { + state.next(); + + if TokenKind::SemiColon == state.current.kind { + let ret = Statement::Return { value: None }; + utils::skip_semicolon(state)?; + ret } else { - None + let ret = Statement::Return { + value: Some(expression(state, Precedence::Lowest)?), + }; + utils::skip_semicolon(state)?; + ret + } + } + TokenKind::SemiColon => { + let start = state.current.span; + + state.next(); + + Statement::Noop(start) + } + TokenKind::Try => try_block::try_block(state)?, + TokenKind::LeftBrace => blocks::block_statement(state)?, + _ => { + let expr = expression(state, Precedence::Lowest)?; + + utils::skip_semicolon(state)?; + + Statement::Expression { expr } + } + } + }; + + state.skip_comments(); + + // A closing PHP tag is valid after the end of any top-level statement. + if state.current.kind == TokenKind::CloseTag { + state.next(); + } + + Ok(statement) +} + +fn expression(state: &mut State, precedence: Precedence) -> ParseResult { + if state.is_eof() { + return Err(ParseError::UnexpectedEndOfFile); + } + + let has_attributes = attributes::gather_attributes(state)?; + + let mut left = if has_attributes { + match &state.current.kind { + TokenKind::Static if state.peek.kind == TokenKind::Function => { + functions::anonymous_function(state)? + } + TokenKind::Static if state.peek.kind == TokenKind::Fn => { + functions::arrow_function(state)? + } + TokenKind::Function => functions::anonymous_function(state)?, + TokenKind::Fn => functions::arrow_function(state)?, + _ => { + // Note, we can get attributes and know their span, maybe use that in the + // error in the future? + return Err(ParseError::ExpectedItemDefinitionAfterAttributes( + state.current.span, + )); + } + } + } else { + match &state.current.kind { + TokenKind::List => arrays::list_expression(state)?, + TokenKind::Static if state.peek.kind == TokenKind::Function => { + functions::anonymous_function(state)? + } + TokenKind::Static if state.peek.kind == TokenKind::Fn => { + functions::arrow_function(state)? + } + TokenKind::Function => functions::anonymous_function(state)?, + TokenKind::Fn => functions::arrow_function(state)?, + TokenKind::New + if state.peek.kind == TokenKind::Class + || state.peek.kind == TokenKind::Attribute => + { + classish::anonymous_class_definition(state)? + } + TokenKind::Throw => { + state.next(); + + let value = expression(state, Precedence::Lowest)?; + + Expression::Throw { + value: Box::new(value), + } + } + TokenKind::Yield => { + state.next(); + + if state.current.kind == TokenKind::SemiColon { + Expression::Yield { + key: None, + value: None, + } + } else { + let mut from = false; + + if state.current.kind == TokenKind::From { + state.next(); + from = true; + } + + let mut key = None; + let mut value = Box::new(expression( + state, + if from { + Precedence::YieldFrom + } else { + Precedence::Yield + }, + )?); + + if state.current.kind == TokenKind::DoubleArrow && !from { + state.next(); + key = Some(value.clone()); + value = Box::new(expression(state, Precedence::Yield)?); + } + + if from { + Expression::YieldFrom { value } + } else { + Expression::Yield { + key, + value: Some(value), + } + } + } + } + TokenKind::Clone => { + state.next(); + + let target = expression(state, Precedence::CloneOrNew)?; + + Expression::Clone { + target: Box::new(target), + } + } + TokenKind::Variable(_) => Expression::Variable(identifiers::var(state)?), + TokenKind::LiteralInteger(i) => { + let e = Expression::LiteralInteger { i: i.clone() }; + state.next(); + e + } + TokenKind::LiteralFloat(f) => { + let f = Expression::LiteralFloat { f: f.clone() }; + state.next(); + f + } + TokenKind::Identifier(_) + | TokenKind::QualifiedIdentifier(_) + | TokenKind::FullyQualifiedIdentifier(_) => { + Expression::Identifier(identifiers::full_name(state)?) + } + TokenKind::Self_ => { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, + )); + } + + state.next(); + + postfix(state, Expression::Self_, &TokenKind::DoubleColon)? + } + TokenKind::Static => { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, + )); + } + + state.next(); + + postfix(state, Expression::Static, &TokenKind::DoubleColon)? + } + TokenKind::Parent => { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, + )); + } + + state.next(); + + postfix(state, Expression::Parent, &TokenKind::DoubleColon)? + } + TokenKind::LiteralString(s) => { + let e = Expression::LiteralString { value: s.clone() }; + state.next(); + e + } + TokenKind::StringPart(_) => interpolated_string(state)?, + TokenKind::StartDocString(_, kind) => { + let kind = *kind; + + doc_string(state, kind)? + } + TokenKind::Backtick => shell_exec(state)?, + TokenKind::True => { + let e = Expression::Bool { value: true }; + state.next(); + e + } + TokenKind::False => { + let e = Expression::Bool { value: false }; + state.next(); + e + } + TokenKind::Null => { + state.next(); + Expression::Null + } + TokenKind::LeftParen => { + state.next(); + + let e = expression(state, Precedence::Lowest)?; + + utils::skip_right_parenthesis(state)?; + + e + } + TokenKind::Match => control_flow::match_expression(state)?, + TokenKind::Array => arrays::legacy_array_expression(state)?, + TokenKind::LeftBracket => arrays::array_expression(state)?, + TokenKind::New => { + utils::skip(state, TokenKind::New)?; + + let target = match state.current.kind { + TokenKind::Self_ => { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, + )); + } + + state.next(); + + Expression::Self_ + } + TokenKind::Static => { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, + )); + } + + state.next(); + + Expression::Static + } + TokenKind::Parent => { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, + )); + } + + state.next(); + + Expression::Parent + } + _ => expression(state, Precedence::CloneOrNew)?, }; - Statement::HaltCompiler { content } + let mut args = vec![]; + if state.current.kind == TokenKind::LeftParen { + args = parameters::args_list(state)?; + } + + Expression::New { + target: Box::new(target), + args, + } } - _ => self.statement(state)?, - }; + TokenKind::DirConstant => { + state.next(); + Expression::MagicConst { + constant: MagicConst::Dir, + } + } + TokenKind::Include + | TokenKind::IncludeOnce + | TokenKind::Require + | TokenKind::RequireOnce => { + let kind: IncludeKind = (&state.current.kind).into(); + state.next(); - state.clear_comments(); + let path = expression(state, Precedence::Lowest)?; - // A closing PHP tag is valid after the end of any top-level statement. - if state.current.kind == TokenKind::CloseTag { - state.next(); + Expression::Include { + kind, + path: Box::new(path), + } + } + _ if is_prefix(&state.current.kind) => { + let op = state.current.kind.clone(); + + state.next(); + + let rpred = Precedence::prefix(&op); + let rhs = expression(state, rpred)?; + + prefix(&op, rhs) + } + TokenKind::Dollar => variables::dynamic_variable(state)?, + _ => { + return Err(ParseError::UnexpectedToken( + state.current.kind.to_string(), + state.current.span, + )) + } } + }; - Ok(statement) + if state.current.kind == TokenKind::SemiColon { + return Ok(left); } - fn statement(&self, state: &mut State) -> ParseResult { - let has_attributes = self.gather_attributes(state)?; - - let statement = if has_attributes { - match &state.current.kind { - TokenKind::Abstract => self.class_definition(state)?, - TokenKind::Readonly => self.class_definition(state)?, - TokenKind::Final => self.class_definition(state)?, - TokenKind::Class => self.class_definition(state)?, - TokenKind::Interface => self.interface_definition(state)?, - TokenKind::Trait => self.trait_definition(state)?, - TokenKind::Enum => self.enum_definition(state)?, - TokenKind::Function - if matches!( - state.peek.kind, - TokenKind::Identifier(_) | TokenKind::Null | TokenKind::Ampersand - ) => - { - // FIXME: This is incredibly hacky but we don't have a way to look at - // the next N tokens right now. We could probably do with a `peek_buf()` - // method like the Lexer has. - if state.peek.kind == TokenKind::Ampersand { - let mut cloned = state.iter.clone(); - if let Some((index, _)) = state.iter.clone().enumerate().next() { - if !matches!( - cloned.nth(index), - Some(Token { - kind: TokenKind::Identifier(_), - .. - }) - ) { - let expr = self.expression(state, Precedence::Lowest)?; - - utils::skip_semicolon(state)?; - - return Ok(Statement::Expression { expr }); - } - } - - self.function(state)? - } else { - self.function(state)? - } - } - _ => { - // Note, we can get attributes and know their span, maybe use that in the - // error in the future? - return Err(ParseError::ExpectedItemDefinitionAfterAttributes( - state.current.span, - )); - } - } - } else { - match &state.current.kind { - TokenKind::Abstract => self.class_definition(state)?, - TokenKind::Readonly => self.class_definition(state)?, - TokenKind::Final => self.class_definition(state)?, - TokenKind::Class => self.class_definition(state)?, - TokenKind::Interface => self.interface_definition(state)?, - TokenKind::Trait => self.trait_definition(state)?, - TokenKind::Enum => self.enum_definition(state)?, - TokenKind::Function - if matches!( - state.peek.kind, - TokenKind::Identifier(_) | TokenKind::Null | TokenKind::Ampersand - ) => - { - // FIXME: This is incredibly hacky but we don't have a way to look at - // the next N tokens right now. We could probably do with a `peek_buf()` - // method like the Lexer has. - if state.peek.kind == TokenKind::Ampersand { - if let Some((_, token)) = state.iter.clone().enumerate().next() { - if !matches!( - token, - Token { - kind: TokenKind::Identifier(_), - .. - } - ) { - let expr = self.expression(state, Precedence::Lowest)?; - - utils::skip_semicolon(state)?; - - return Ok(Statement::Expression { expr }); - } - } - - self.function(state)? - } else { - self.function(state)? - } - } - TokenKind::Goto => { - state.next(); - - let label = identifiers::ident(state)?; - - utils::skip_semicolon(state)?; - - Statement::Goto { label } - } - TokenKind::Identifier(_) if state.peek.kind == TokenKind::Colon => { - let label = identifiers::ident(state)?; - - utils::colon(state)?; - - Statement::Label { label } - } - TokenKind::Declare => { - state.next(); - utils::skip_left_parenthesis(state)?; - - let mut declares = Vec::new(); - loop { - let key = identifiers::ident(state)?; - - utils::skip(state, TokenKind::Equals)?; - - let value = expect_literal!(state); - - declares.push(DeclareItem { key, value }); - - if state.current.kind == TokenKind::Comma { - state.next(); - } else { - break; - } - } - - utils::skip_right_parenthesis(state)?; - - let body = if state.current.kind == TokenKind::LeftBrace { - state.next(); - let b = self.body(state, &TokenKind::RightBrace)?; - utils::skip_right_brace(state)?; - b - } else if state.current.kind == TokenKind::Colon { - utils::colon(state)?; - let b = self.body(state, &TokenKind::EndDeclare)?; - utils::skip(state, TokenKind::EndDeclare)?; - utils::skip_semicolon(state)?; - b - } else { - utils::skip_semicolon(state)?; - vec![] - }; - - Statement::Declare { declares, body } - } - TokenKind::Global => { - state.next(); - - let mut vars = vec![]; - // `loop` instead of `while` as we don't allow for extra commas. - loop { - vars.push(identifiers::var(state)?); - - if state.current.kind == TokenKind::Comma { - state.next(); - } else { - break; - } - } - - utils::skip_semicolon(state)?; - Statement::Global { vars } - } - TokenKind::Static if matches!(state.peek.kind, TokenKind::Variable(_)) => { - state.next(); - - let mut vars = vec![]; - - // `loop` instead of `while` as we don't allow for extra commas. - loop { - let var = identifiers::var(state)?; - let mut default = None; - - if state.current.kind == TokenKind::Equals { - state.next(); - - default = Some(self.expression(state, Precedence::Lowest)?); - } - - // TODO: group static vars. - vars.push(StaticVar { var, default }); - - if state.current.kind == TokenKind::Comma { - state.next(); - } else { - break; - } - } - - utils::skip_semicolon(state)?; - - Statement::Static { vars } - } - TokenKind::InlineHtml(html) => { - let s = Statement::InlineHtml(html.clone()); - state.next(); - s - } - TokenKind::SingleLineComment(comment) => { - let start = state.current.span; - let content = comment.clone(); - state.next(); - let end = state.current.span; - let format = CommentFormat::SingleLine; - - Statement::Comment(Comment { - start, - end, - format, - content, - }) - } - TokenKind::MultiLineComment(comment) => { - let start = state.current.span; - let content = comment.clone(); - state.next(); - let end = state.current.span; - let format = CommentFormat::MultiLine; - - Statement::Comment(Comment { - start, - end, - format, - content, - }) - } - TokenKind::HashMarkComment(comment) => { - let start = state.current.span; - let content = comment.clone(); - state.next(); - let end = state.current.span; - let format = CommentFormat::HashMark; - - Statement::Comment(Comment { - start, - end, - format, - content, - }) - } - TokenKind::DocumentComment(comment) => { - let start = state.current.span; - let content = comment.clone(); - state.next(); - let end = state.current.span; - let format = CommentFormat::Document; - - Statement::Comment(Comment { - start, - end, - format, - content, - }) - } - TokenKind::Do => self.do_loop(state)?, - TokenKind::While => self.while_loop(state)?, - TokenKind::For => self.for_loop(state)?, - TokenKind::Foreach => self.foreach_loop(state)?, - TokenKind::Switch => self.switch_statement(state)?, - TokenKind::Continue => self.continue_statement(state)?, - TokenKind::Break => self.break_statement(state)?, - TokenKind::If => self.if_statement(state)?, - TokenKind::Echo => { - state.next(); - - let mut values = Vec::new(); - loop { - values.push(self.expression(state, Precedence::Lowest)?); - - if state.current.kind == TokenKind::Comma { - state.next(); - } else { - break; - } - } - - utils::skip_semicolon(state)?; - Statement::Echo { values } - } - TokenKind::Return => { - state.next(); - - if let Token { - kind: TokenKind::SemiColon, - .. - } = state.current - { - let ret = Statement::Return { value: None }; - utils::skip_semicolon(state)?; - ret - } else { - let ret = Statement::Return { - value: self.expression(state, Precedence::Lowest).ok(), - }; - utils::skip_semicolon(state)?; - ret - } - } - TokenKind::SemiColon => { - state.next(); - - Statement::Noop - } - TokenKind::Try => self.try_block(state)?, - TokenKind::LeftBrace => self.block_statement(state)?, - _ => { - let expr = self.expression(state, Precedence::Lowest)?; - - utils::skip_semicolon(state)?; - - Statement::Expression { expr } - } - } - }; + state.skip_comments(); + loop { state.skip_comments(); - // A closing PHP tag is valid after the end of any top-level statement. - if state.current.kind == TokenKind::CloseTag { - state.next(); + if matches!(state.current.kind, TokenKind::SemiColon | TokenKind::Eof) { + break; } - Ok(statement) - } + let span = state.current.span; + let kind = state.current.kind.clone(); - fn expression(&self, state: &mut State, precedence: Precedence) -> ParseResult { - if state.is_eof() { - return Err(ParseError::UnexpectedEndOfFile); - } + if is_postfix(&kind) { + let lpred = Precedence::postfix(&kind); - let has_attributes = self.gather_attributes(state)?; - - let mut left = if has_attributes { - match &state.current.kind { - TokenKind::Static if state.peek.kind == TokenKind::Function => { - self.anonymous_function(state)? - } - TokenKind::Static if state.peek.kind == TokenKind::Fn => { - self.arrow_function(state)? - } - TokenKind::Function => self.anonymous_function(state)?, - TokenKind::Fn => self.arrow_function(state)?, - _ => { - // Note, we can get attributes and know their span, maybe use that in the - // error in the future? - return Err(ParseError::ExpectedItemDefinitionAfterAttributes( - state.current.span, - )); - } + if lpred < precedence { + break; } - } else { - match &state.current.kind { - TokenKind::List => self.list_expression(state)?, - TokenKind::Static if state.peek.kind == TokenKind::Function => { - self.anonymous_function(state)? - } - TokenKind::Static if state.peek.kind == TokenKind::Fn => { - self.arrow_function(state)? - } - TokenKind::Function => self.anonymous_function(state)?, - TokenKind::Fn => self.arrow_function(state)?, - TokenKind::New - if state.peek.kind == TokenKind::Class - || state.peek.kind == TokenKind::Attribute => - { - self.anonymous_class_definition(state)? - } - TokenKind::Throw => { - state.next(); - let value = self.expression(state, Precedence::Lowest)?; + left = postfix(state, left, &kind)?; + continue; + } - Expression::Throw { - value: Box::new(value), + if is_infix(&kind) { + let rpred = Precedence::infix(&kind); + + if rpred < precedence { + break; + } + + if rpred == precedence && matches!(rpred.associativity(), Some(Associativity::Left)) { + break; + } + + if rpred == precedence && matches!(rpred.associativity(), Some(Associativity::Non)) { + return Err(ParseError::UnexpectedToken(kind.to_string(), span)); + } + + state.next(); + + match kind { + TokenKind::Question => { + let then = expression(state, Precedence::Lowest)?; + utils::skip_colon(state)?; + let otherwise = expression(state, rpred)?; + left = Expression::Ternary { + condition: Box::new(left), + then: Some(Box::new(then)), + r#else: Box::new(otherwise), } } - TokenKind::Yield => { - state.next(); - - if state.current.kind == TokenKind::SemiColon { - Expression::Yield { - key: None, - value: None, - } - } else { - let mut from = false; - - if state.current.kind == TokenKind::From { + TokenKind::QuestionColon => { + let r#else = expression(state, Precedence::Lowest)?; + left = Expression::Ternary { + condition: Box::new(left), + then: None, + r#else: Box::new(r#else), + } + } + _ => { + // FIXME: Hacky, should probably be refactored. + left = match kind { + TokenKind::Equals if state.current.kind == TokenKind::Ampersand => { state.next(); - from = true; - } - - let mut key = None; - let mut value = Box::new(self.expression( - state, - if from { - Precedence::YieldFrom - } else { - Precedence::Yield - }, - )?); - - if state.current.kind == TokenKind::DoubleArrow && !from { - state.next(); - key = Some(value.clone()); - value = Box::new(self.expression(state, Precedence::Yield)?); - } - - if from { - Expression::YieldFrom { value } - } else { - Expression::Yield { - key, - value: Some(value), + Expression::Infix { + lhs: Box::new(left), + op: ast::InfixOp::AssignRef, + rhs: Box::new(expression(state, rpred)?), } } - } - } - TokenKind::Clone => { - state.next(); - - let target = self.expression(state, Precedence::CloneOrNew)?; - - Expression::Clone { - target: Box::new(target), - } - } - TokenKind::Variable(_) => Expression::Variable(identifiers::var(state)?), - TokenKind::LiteralInteger(i) => { - let e = Expression::LiteralInteger { i: i.clone() }; - state.next(); - e - } - TokenKind::LiteralFloat(f) => { - let f = Expression::LiteralFloat { f: f.clone() }; - state.next(); - f - } - TokenKind::Identifier(_) - | TokenKind::QualifiedIdentifier(_) - | TokenKind::FullyQualifiedIdentifier(_) => { - Expression::Identifier(identifiers::full_name(state)?) - } - TokenKind::Self_ => { - if !state.has_class_scope { - return Err(ParseError::CannotFindTypeInCurrentScope( - state.current.kind.to_string(), - state.current.span, - )); - } - - state.next(); - - self.postfix(state, Expression::Self_, &TokenKind::DoubleColon)? - } - TokenKind::Static => { - if !state.has_class_scope { - return Err(ParseError::CannotFindTypeInCurrentScope( - state.current.kind.to_string(), - state.current.span, - )); - } - - state.next(); - - self.postfix(state, Expression::Static, &TokenKind::DoubleColon)? - } - TokenKind::Parent => { - if !state.has_class_parent_scope { - return Err(ParseError::CannotFindTypeInCurrentScope( - state.current.kind.to_string(), - state.current.span, - )); - } - - state.next(); - - self.postfix(state, Expression::Parent, &TokenKind::DoubleColon)? - } - TokenKind::LiteralString(s) => { - let e = Expression::LiteralString { value: s.clone() }; - state.next(); - e - } - TokenKind::StringPart(_) => self.interpolated_string(state)?, - TokenKind::StartDocString(_, kind) => { - let kind = *kind; - - self.doc_string(state, kind)? - } - TokenKind::Backtick => self.shell_exec(state)?, - TokenKind::True => { - let e = Expression::Bool { value: true }; - state.next(); - e - } - TokenKind::False => { - let e = Expression::Bool { value: false }; - state.next(); - e - } - TokenKind::Null => { - state.next(); - Expression::Null - } - TokenKind::LeftParen => { - state.next(); - - let e = self.expression(state, Precedence::Lowest)?; - - utils::skip_right_parenthesis(state)?; - - e - } - TokenKind::Match => { - state.next(); - utils::skip_left_parenthesis(state)?; - - let condition = Box::new(self.expression(state, Precedence::Lowest)?); - - utils::skip_right_parenthesis(state)?; - utils::skip_left_brace(state)?; - - let mut default = None; - let mut arms = Vec::new(); - while state.current.kind != TokenKind::RightBrace { - state.skip_comments(); - - if state.current.kind == TokenKind::Default { - if default.is_some() { - return Err(ParseError::MatchExpressionWithMultipleDefaultArms( + TokenKind::Instanceof if state.current.kind == TokenKind::Self_ => { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), state.current.span, )); } state.next(); - // match conditions can have an extra comma at the end, including `default`. - if state.current.kind == TokenKind::Comma { - state.next(); + Expression::Infix { + lhs: Box::new(left), + op: ast::InfixOp::Instanceof, + rhs: Box::new(Expression::Self_), } - - utils::skip_double_arrow(state)?; - - let body = self.expression(state, Precedence::Lowest)?; - - default = Some(Box::new(DefaultMatchArm { body })); - } else { - let mut conditions = Vec::new(); - while state.current.kind != TokenKind::DoubleArrow { - conditions.push(self.expression(state, Precedence::Lowest)?); - - if state.current.kind == TokenKind::Comma { - state.next(); - } else { - break; - } - } - - if !conditions.is_empty() { - utils::skip_double_arrow(state)?; - } else { - break; - } - - let body = self.expression(state, Precedence::Lowest)?; - - arms.push(MatchArm { conditions, body }); } - - if state.current.kind == TokenKind::Comma { - state.next(); - } else { - break; - } - } - - utils::skip_right_brace(state)?; - - Expression::Match { - condition, - default, - arms, - } - } - TokenKind::Array => { - let mut items = vec![]; - - state.next(); - - utils::skip_left_parenthesis(state)?; - - while state.current.kind != TokenKind::RightParen { - let mut key = None; - let unpack = if state.current.kind == TokenKind::Ellipsis { - state.next(); - true - } else { - false - }; - - let (mut by_ref, amper_span) = if state.current.kind == TokenKind::Ampersand - { - let span = state.current.span; - state.next(); - (true, span) - } else { - (false, (0, 0)) - }; - - let mut value = self.expression(state, Precedence::Lowest)?; - - // TODO: return error for `[...$a => $b]`. - if state.current.kind == TokenKind::DoubleArrow { - state.next(); - - if by_ref { - return Err(ParseError::UnexpectedToken( - TokenKind::Ampersand.to_string(), - amper_span, + TokenKind::Instanceof if state.current.kind == TokenKind::Parent => { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, )); } - key = Some(value); - - by_ref = if state.current.kind == TokenKind::Ampersand { - state.next(); - true - } else { - false - }; - - value = self.expression(state, Precedence::Lowest)?; - } - - items.push(ArrayItem { - key, - value, - unpack, - by_ref, - }); - - if state.current.kind == TokenKind::Comma { state.next(); - } else { - break; + + Expression::Infix { + lhs: Box::new(left), + op: ast::InfixOp::Instanceof, + rhs: Box::new(Expression::Parent), + } } + TokenKind::Instanceof if state.current.kind == TokenKind::Static => { + if !state.has_class_scope { + return Err(ParseError::CannotFindTypeInCurrentScope( + state.current.kind.to_string(), + state.current.span, + )); + } - state.skip_comments(); - } + state.next(); - utils::skip_right_parenthesis(state)?; - - Expression::Array { items } + Expression::Infix { + lhs: Box::new(left), + op: ast::InfixOp::Instanceof, + rhs: Box::new(Expression::Static), + } + } + _ => Expression::Infix { + lhs: Box::new(left), + op: kind.into(), + rhs: Box::new(expression(state, rpred)?), + }, + }; } - TokenKind::LeftBracket => self.array_expression(state)?, - TokenKind::New => { + } + + continue; + } + + break; + } + + state.skip_comments(); + + Ok(left) +} + +fn postfix(state: &mut State, lhs: Expression, op: &TokenKind) -> Result { + Ok(match op { + TokenKind::Coalesce => { + state.next(); + + let rhs = expression(state, Precedence::NullCoalesce)?; + + Expression::Coalesce { + lhs: Box::new(lhs), + rhs: Box::new(rhs), + } + } + TokenKind::LeftParen => { + let args = parameters::args_list(state)?; + + Expression::Call { + target: Box::new(lhs), + args, + } + } + TokenKind::LeftBracket => { + utils::skip_left_bracket(state)?; + + if state.current.kind == TokenKind::RightBracket { + state.next(); + + Expression::ArrayIndex { + array: Box::new(lhs), + index: None, + } + } else { + let index = expression(state, Precedence::Lowest)?; + + utils::skip_right_bracket(state)?; + + Expression::ArrayIndex { + array: Box::new(lhs), + index: Some(Box::new(index)), + } + } + } + TokenKind::DoubleColon => { + utils::skip_double_colon(state)?; + + let mut must_be_method_call = false; + + let property = match state.current.kind.clone() { + TokenKind::Dollar => variables::dynamic_variable(state)?, + TokenKind::Variable(_) => Expression::Variable(identifiers::var(state)?), + TokenKind::Identifier(_) => Expression::Identifier(identifiers::ident(state)?), + TokenKind::LeftBrace => { + must_be_method_call = true; state.next(); - let mut args = vec![]; - let target = self.expression(state, Precedence::CloneOrNew)?; + let name = expression(state, Precedence::Lowest)?; - if state.current.kind == TokenKind::LeftParen { - args = self.args_list(state)?; + utils::skip_right_brace(state)?; + + Expression::DynamicVariable { + name: Box::new(name), } + } + TokenKind::Class => { + let start = state.current.span; + state.next(); + let end = state.current.span; - Expression::New { - target: Box::new(target), + Expression::Identifier(Identifier { + start, + name: "class".into(), + end, + }) + } + _ if identifiers::is_reserved_ident(&state.current.kind) => { + Expression::Identifier(identifiers::ident_maybe_reserved(state)?) + } + _ => { + return expected_token_err!(["`{`", "`$`", "an identifier"], state); + } + }; + + let lhs = Box::new(lhs); + + match property { + // 1. If we have an identifier and the current token is not a left paren, + // the resulting expression must be a constant fetch. + Expression::Identifier(identifier) + if state.current.kind != TokenKind::LeftParen => + { + Expression::ConstFetch { + target: lhs, + constant: identifier, + } + } + // 2. If the current token is a left paren, or if we know the property expression + // is only valid a method call context, we can assume we're parsing a static + // method call. + _ if state.current.kind == TokenKind::LeftParen || must_be_method_call => { + let args = parameters::args_list(state)?; + + Expression::StaticMethodCall { + target: lhs, + method: Box::new(property), args, } } - TokenKind::DirConstant => { - state.next(); - Expression::MagicConst { - constant: MagicConst::Dir, - } - } - TokenKind::Include - | TokenKind::IncludeOnce - | TokenKind::Require - | TokenKind::RequireOnce => { - let kind: IncludeKind = (&state.current.kind).into(); - state.next(); - - let path = self.expression(state, Precedence::Lowest)?; - - Expression::Include { - kind, - path: Box::new(path), - } - } - _ if is_prefix(&state.current.kind) => { - let op = state.current.kind.clone(); - - state.next(); - - let rpred = Precedence::prefix(&op); - let rhs = self.expression(state, rpred)?; - - prefix(&op, rhs) - } - TokenKind::Dollar => self.dynamic_variable(state)?, - _ => { - return Err(ParseError::UnexpectedToken( - state.current.kind.to_string(), - state.current.span, - )) - } + // 3. If we haven't met any of the previous conditions, we can assume + // that we're parsing a static property fetch. + _ => Expression::StaticPropertyFetch { + target: lhs, + property: Box::new(property), + }, } - }; - - if state.current.kind == TokenKind::SemiColon { - return Ok(left); } + TokenKind::Arrow | TokenKind::NullsafeArrow => { + state.next(); - state.skip_comments(); - - loop { - state.skip_comments(); - - if matches!(state.current.kind, TokenKind::SemiColon | TokenKind::Eof) { - break; - } - - let span = state.current.span; - let kind = state.current.kind.clone(); - - if is_postfix(&kind) { - let lpred = Precedence::postfix(&kind); - - if lpred < precedence { - break; + let property = match state.current.kind { + TokenKind::LeftBrace => { + utils::skip_left_brace(state)?; + let expr = expression(state, Precedence::Lowest)?; + utils::skip_right_brace(state)?; + expr } + TokenKind::Variable(_) => Expression::Variable(identifiers::var(state)?), + TokenKind::Dollar => variables::dynamic_variable(state)?, + _ => Expression::Identifier(identifiers::ident_maybe_reserved(state)?), + }; - left = self.postfix(state, left, &kind)?; - continue; - } + if state.current.kind == TokenKind::LeftParen { + let args = parameters::args_list(state)?; - if is_infix(&kind) { - let rpred = Precedence::infix(&kind); - - if rpred < precedence { - break; - } - - if rpred == precedence && matches!(rpred.associativity(), Some(Associativity::Left)) - { - break; - } - - if rpred == precedence && matches!(rpred.associativity(), Some(Associativity::Non)) - { - return Err(ParseError::UnexpectedToken(kind.to_string(), span)); - } - - state.next(); - - match kind { - TokenKind::Question => { - let then = self.expression(state, Precedence::Lowest)?; - utils::colon(state)?; - let otherwise = self.expression(state, rpred)?; - left = Expression::Ternary { - condition: Box::new(left), - then: Some(Box::new(then)), - r#else: Box::new(otherwise), - } - } - TokenKind::QuestionColon => { - let r#else = self.expression(state, Precedence::Lowest)?; - left = Expression::Ternary { - condition: Box::new(left), - then: None, - r#else: Box::new(r#else), - } - } - _ => { - // FIXME: Hacky, should probably be refactored. - let by_ref = - kind == TokenKind::Equals && state.current.kind == TokenKind::Ampersand; - if by_ref { - state.next(); - } - - let rhs = self.expression(state, rpred)?; - - left = infix(left, kind, rhs, by_ref); - } - } - - continue; - } - - break; - } - - state.skip_comments(); - - Ok(left) - } - - fn postfix( - &self, - state: &mut State, - lhs: Expression, - op: &TokenKind, - ) -> Result { - Ok(match op { - TokenKind::Coalesce => { - state.next(); - - let rhs = self.expression(state, Precedence::NullCoalesce)?; - - Expression::Coalesce { - lhs: Box::new(lhs), - rhs: Box::new(rhs), - } - } - TokenKind::LeftParen => { - let args = self.args_list(state)?; - - Expression::Call { - target: Box::new(lhs), - args, - } - } - TokenKind::LeftBracket => { - utils::skip_left_bracket(state)?; - - if state.current.kind == TokenKind::RightBracket { - state.next(); - - Expression::ArrayIndex { - array: Box::new(lhs), - index: None, + if op == &TokenKind::NullsafeArrow { + Expression::NullsafeMethodCall { + target: Box::new(lhs), + method: Box::new(property), + args, } } else { - let index = self.expression(state, Precedence::Lowest)?; + Expression::MethodCall { + target: Box::new(lhs), + method: Box::new(property), + args, + } + } + } else if op == &TokenKind::NullsafeArrow { + Expression::NullsafePropertyFetch { + target: Box::new(lhs), + property: Box::new(property), + } + } else { + Expression::PropertyFetch { + target: Box::new(lhs), + property: Box::new(property), + } + } + } + TokenKind::Increment => { + state.next(); + Expression::Increment { + value: Box::new(lhs), + } + } + TokenKind::Decrement => { + state.next(); + + Expression::Decrement { + value: Box::new(lhs), + } + } + _ => todo!("postfix: {:?}", op), + }) +} + +#[inline(always)] +fn interpolated_string(state: &mut State) -> ParseResult { + let mut parts = Vec::new(); + + while state.current.kind != TokenKind::DoubleQuote { + if let Some(part) = interpolated_string_part(state)? { + parts.push(part); + } + } + + state.next(); + + Ok(Expression::InterpolatedString { parts }) +} + +#[inline(always)] +fn shell_exec(state: &mut State) -> ParseResult { + state.next(); + + let mut parts = Vec::new(); + + while state.current.kind != TokenKind::Backtick { + if let Some(part) = interpolated_string_part(state)? { + parts.push(part); + } + } + + state.next(); + + Ok(Expression::ShellExec { parts }) +} + +#[inline(always)] +fn doc_string(state: &mut State, kind: DocStringKind) -> ParseResult { + state.next(); + + Ok(match kind { + DocStringKind::Heredoc => { + let mut parts = Vec::new(); + + while !matches!(state.current.kind, TokenKind::EndDocString(_, _, _)) { + if let Some(part) = interpolated_string_part(state)? { + parts.push(part); + } + } + + let (indentation_type, indentation_amount) = match state.current.kind { + TokenKind::EndDocString(_, indentation_type, indentation_amount) => { + (indentation_type, indentation_amount) + } + _ => unreachable!(), + }; + + state.next(); + + // FIXME: Can we move this logic above into the loop, by peeking ahead in + // the token stream for the EndHeredoc? Might be more performant. + if let Some(indentation_type) = indentation_type { + let search_char: u8 = indentation_type.into(); + + for part in parts.iter_mut() { + match part { + StringPart::Const(bytes) => { + for _ in 0..indentation_amount { + if bytes.starts_with(&[search_char]) { + bytes.remove(0); + } + } + } + _ => continue, + } + } + } + + Expression::Heredoc { parts } + } + DocStringKind::Nowdoc => { + // FIXME: This feels hacky. We should probably produce different tokens from the lexer + // but since I already had the logic in place for parsing heredocs, this was + // the fastest way to get nowdocs working too. + let mut s = expect_token!([ + TokenKind::StringPart(s) => s + ], state, "constant string"); + + let (indentation_type, indentation_amount) = expect_token!([ + TokenKind::EndDocString(_, indentation_type, indentation_amount) => (indentation_type, indentation_amount) + ], state, "label"); + + // FIXME: Hacky code, but it's late and I want to get this done. + if let Some(indentation_type) = indentation_type { + let search_char: u8 = indentation_type.into(); + let mut lines = s + .split(|b| *b == b'\n') + .map(|s| s.to_vec()) + .collect::>>(); + for line in lines.iter_mut() { + for _ in 0..indentation_amount { + if line.starts_with(&[search_char]) { + line.remove(0); + } + } + } + let mut bytes = Vec::new(); + for (i, line) in lines.iter().enumerate() { + bytes.extend(line); + if i < lines.len() - 1 { + bytes.push(b'\n'); + } + } + s = bytes.into(); + } + + Expression::Nowdoc { value: s } + } + }) +} + +fn interpolated_string_part(state: &mut State) -> ParseResult> { + Ok(match &state.current.kind { + TokenKind::StringPart(s) => { + let part = if s.len() > 0 { + Some(StringPart::Const(s.clone())) + } else { + None + }; + + state.next(); + part + } + TokenKind::DollarLeftBrace => { + state.next(); + let e = match (state.current.kind.clone(), state.peek.kind.clone()) { + (TokenKind::Identifier(name), TokenKind::RightBrace) => { + let start = state.current.span; + let end = state.peek.span; + + state.next(); + state.next(); + // "${var}" + // TODO: we should use a different node for this. + Expression::Variable(Variable { start, name, end }) + } + (TokenKind::Identifier(name), TokenKind::LeftBracket) => { + let start = state.current.span; + let end = state.peek.span; + state.next(); + state.next(); + let var = Expression::Variable(Variable { start, name, end }); + + let e = expression(state, Precedence::Lowest)?; + utils::skip_right_bracket(state)?; + utils::skip_right_brace(state)?; + + // TODO: we should use a different node for this. + Expression::ArrayIndex { + array: Box::new(var), + index: Some(Box::new(e)), + } + } + _ => { + // Arbitrary expressions are allowed, but are treated as variable variables. + let e = expression(state, Precedence::Lowest)?; + utils::skip_right_brace(state)?; + + Expression::DynamicVariable { name: Box::new(e) } + } + }; + Some(StringPart::Expr(Box::new(e))) + } + TokenKind::LeftBrace => { + // "{$expr}" + state.next(); + let e = expression(state, Precedence::Lowest)?; + utils::skip_right_brace(state)?; + Some(StringPart::Expr(Box::new(e))) + } + TokenKind::Variable(_) => { + // "$expr", "$expr[0]", "$expr[name]", "$expr->a" + let var = Expression::Variable(identifiers::var(state)?); + let e = match state.current.kind { + TokenKind::LeftBracket => { + state.next(); + // Full expression syntax is not allowed here, + // so we can't call expression. + let index = match &state.current.kind { + TokenKind::LiteralInteger(i) => { + let e = Expression::LiteralInteger { i: i.clone() }; + state.next(); + e + } + TokenKind::Minus => { + state.next(); + if let TokenKind::LiteralInteger(i) = &state.current.kind { + let e = Expression::Negate { + value: Box::new(Expression::LiteralInteger { i: i.clone() }), + }; + state.next(); + e + } else { + return expected_token_err!("an integer", state); + } + } + TokenKind::Identifier(ident) => { + let e = Expression::LiteralString { + value: ident.clone(), + }; + state.next(); + e + } + TokenKind::Variable(_) => { + let v = identifiers::var(state)?; + Expression::Variable(v) + } + _ => { + return expected_token_err!( + ["`-`", "an integer", "an identifier", "a variable"], + state + ); + } + }; utils::skip_right_bracket(state)?; Expression::ArrayIndex { - array: Box::new(lhs), + array: Box::new(var), index: Some(Box::new(index)), } } - } - TokenKind::DoubleColon => { - utils::skip_double_colon(state)?; - - let mut must_be_method_call = false; - - let property = match state.current.kind.clone() { - TokenKind::Dollar => self.dynamic_variable(state)?, - TokenKind::Variable(_) => Expression::Variable(identifiers::var(state)?), - TokenKind::Identifier(_) => Expression::Identifier(identifiers::ident(state)?), - TokenKind::LeftBrace => { - must_be_method_call = true; - state.next(); - - let name = self.expression(state, Precedence::Lowest)?; - - utils::skip_right_brace(state)?; - - Expression::DynamicVariable { - name: Box::new(name), - } - } - TokenKind::Class => { - let start = state.current.span; - state.next(); - let end = state.current.span; - - Expression::Identifier(Identifier { - start, - name: "class".into(), - end, - }) - } - _ if is_reserved_ident(&state.current.kind) => { - Expression::Identifier(identifiers::ident_maybe_reserved(state)?) - } - _ => { - return expected_token_err!(["`{`", "`$`", "an identifier"], state); - } - }; - - let lhs = Box::new(lhs); - - match property { - // 1. If we have an identifier and the current token is not a left paren, - // the resulting expression must be a constant fetch. - Expression::Identifier(identifier) - if state.current.kind != TokenKind::LeftParen => - { - Expression::ConstFetch { - target: lhs, - constant: identifier, - } - } - // 2. If the current token is a left paren, or if we know the property expression - // is only valid a method call context, we can assume we're parsing a static - // method call. - _ if state.current.kind == TokenKind::LeftParen || must_be_method_call => { - let args = self.args_list(state)?; - - Expression::StaticMethodCall { - target: lhs, - method: Box::new(property), - args, - } - } - // 3. If we haven't met any of the previous conditions, we can assume - // that we're parsing a static property fetch. - _ => Expression::StaticPropertyFetch { - target: lhs, - property: Box::new(property), - }, - } - } - TokenKind::Arrow | TokenKind::NullsafeArrow => { - state.next(); - - let property = match state.current.kind { - TokenKind::LeftBrace => { - utils::skip_left_brace(state)?; - let expr = self.expression(state, Precedence::Lowest)?; - utils::skip_right_brace(state)?; - expr - } - TokenKind::Variable(_) => Expression::Variable(identifiers::var(state)?), - TokenKind::Dollar => self.dynamic_variable(state)?, - _ => Expression::Identifier(identifiers::ident_maybe_reserved(state)?), - }; - - if state.current.kind == TokenKind::LeftParen { - let args = self.args_list(state)?; - - if op == &TokenKind::NullsafeArrow { - Expression::NullsafeMethodCall { - target: Box::new(lhs), - method: Box::new(property), - args, - } - } else { - Expression::MethodCall { - target: Box::new(lhs), - method: Box::new(property), - args, - } - } - } else if op == &TokenKind::NullsafeArrow { - Expression::NullsafePropertyFetch { - target: Box::new(lhs), - property: Box::new(property), - } - } else { + TokenKind::Arrow => { + state.next(); Expression::PropertyFetch { - target: Box::new(lhs), - property: Box::new(property), + target: Box::new(var), + property: Box::new(Expression::Identifier( + identifiers::ident_maybe_reserved(state)?, + )), } } - } - TokenKind::Increment => { - state.next(); - Expression::Increment { - value: Box::new(lhs), + TokenKind::NullsafeArrow => { + state.next(); + Expression::NullsafePropertyFetch { + target: Box::new(var), + property: Box::new(Expression::Identifier( + identifiers::ident_maybe_reserved(state)?, + )), + } } - } - TokenKind::Decrement => { - state.next(); - - Expression::Decrement { - value: Box::new(lhs), - } - } - _ => todo!("postfix: {:?}", op), - }) - } - - #[inline(always)] - fn interpolated_string(&self, state: &mut State) -> ParseResult { - let mut parts = Vec::new(); - - while state.current.kind != TokenKind::DoubleQuote { - if let Some(part) = self.interpolated_string_part(state)? { - parts.push(part); - } + _ => var, + }; + Some(StringPart::Expr(Box::new(e))) } - - state.next(); - - Ok(Expression::InterpolatedString { parts }) - } - - #[inline(always)] - fn shell_exec(&self, state: &mut State) -> ParseResult { - state.next(); - - let mut parts = Vec::new(); - - while state.current.kind != TokenKind::Backtick { - if let Some(part) = self.interpolated_string_part(state)? { - parts.push(part); - } + _ => { + return expected_token_err!(["`${`", "`{$", "`\"`", "a variable"], state); } - - state.next(); - - Ok(Expression::ShellExec { parts }) - } - - #[inline(always)] - fn doc_string(&self, state: &mut State, kind: DocStringKind) -> ParseResult { - state.next(); - - Ok(match kind { - DocStringKind::Heredoc => { - let mut parts = Vec::new(); - - while !matches!(state.current.kind, TokenKind::EndDocString(_, _, _)) { - if let Some(part) = self.interpolated_string_part(state)? { - parts.push(part); - } - } - - let (indentation_type, indentation_amount) = match state.current.kind { - TokenKind::EndDocString(_, indentation_type, indentation_amount) => { - (indentation_type, indentation_amount) - } - _ => unreachable!(), - }; - - state.next(); - - // FIXME: Can we move this logic above into the loop, by peeking ahead in - // the token stream for the EndHeredoc? Might be more performant. - if let Some(indentation_type) = indentation_type { - let search_char: u8 = indentation_type.into(); - - for part in parts.iter_mut() { - match part { - StringPart::Const(bytes) => { - for _ in 0..indentation_amount { - if bytes.starts_with(&[search_char]) { - bytes.remove(0); - } - } - } - _ => continue, - } - } - } - - Expression::Heredoc { parts } - } - DocStringKind::Nowdoc => { - // FIXME: This feels hacky. We should probably produce different tokens from the lexer - // but since I already had the logic in place for parsing heredocs, this was - // the fastest way to get nowdocs working too. - let mut s = expect_token!([ - TokenKind::StringPart(s) => s - ], state, "constant string"); - - let (indentation_type, indentation_amount) = expect_token!([ - TokenKind::EndDocString(_, indentation_type, indentation_amount) => (indentation_type, indentation_amount) - ], state, "label"); - - // FIXME: Hacky code, but it's late and I want to get this done. - if let Some(indentation_type) = indentation_type { - let search_char: u8 = indentation_type.into(); - let mut lines = s - .split(|b| *b == b'\n') - .map(|s| s.to_vec()) - .collect::>>(); - for line in lines.iter_mut() { - for _ in 0..indentation_amount { - if line.starts_with(&[search_char]) { - line.remove(0); - } - } - } - let mut bytes = Vec::new(); - for (i, line) in lines.iter().enumerate() { - bytes.extend(line); - if i < lines.len() - 1 { - bytes.push(b'\n'); - } - } - s = bytes.into(); - } - - Expression::Nowdoc { value: s } - } - }) - } - - fn interpolated_string_part(&self, state: &mut State) -> ParseResult> { - Ok(match &state.current.kind { - TokenKind::StringPart(s) => { - let part = if s.len() > 0 { - Some(StringPart::Const(s.clone())) - } else { - None - }; - - state.next(); - part - } - TokenKind::DollarLeftBrace => { - state.next(); - let e = match (state.current.kind.clone(), state.peek.kind.clone()) { - (TokenKind::Identifier(name), TokenKind::RightBrace) => { - let start = state.current.span; - let end = state.peek.span; - - state.next(); - state.next(); - // "${var}" - // TODO: we should use a different node for this. - Expression::Variable(Variable { start, name, end }) - } - (TokenKind::Identifier(name), TokenKind::LeftBracket) => { - let start = state.current.span; - let end = state.peek.span; - state.next(); - state.next(); - let var = Expression::Variable(Variable { start, name, end }); - - let e = self.expression(state, Precedence::Lowest)?; - utils::skip_right_bracket(state)?; - utils::skip_right_brace(state)?; - - // TODO: we should use a different node for this. - Expression::ArrayIndex { - array: Box::new(var), - index: Some(Box::new(e)), - } - } - _ => { - // Arbitrary expressions are allowed, but are treated as variable variables. - let e = self.expression(state, Precedence::Lowest)?; - utils::skip_right_brace(state)?; - - Expression::DynamicVariable { name: Box::new(e) } - } - }; - Some(StringPart::Expr(Box::new(e))) - } - TokenKind::LeftBrace => { - // "{$expr}" - state.next(); - let e = self.expression(state, Precedence::Lowest)?; - utils::skip_right_brace(state)?; - Some(StringPart::Expr(Box::new(e))) - } - TokenKind::Variable(_) => { - // "$expr", "$expr[0]", "$expr[name]", "$expr->a" - let var = Expression::Variable(identifiers::var(state)?); - let e = match state.current.kind { - TokenKind::LeftBracket => { - state.next(); - // Full expression syntax is not allowed here, - // so we can't call self.expression. - let index = match &state.current.kind { - TokenKind::LiteralInteger(i) => { - let e = Expression::LiteralInteger { i: i.clone() }; - state.next(); - e - } - TokenKind::Minus => { - state.next(); - if let TokenKind::LiteralInteger(i) = &state.current.kind { - let e = Expression::Negate { - value: Box::new(Expression::LiteralInteger { - i: i.clone(), - }), - }; - state.next(); - e - } else { - return expected_token_err!("an integer", state); - } - } - TokenKind::Identifier(ident) => { - let e = Expression::LiteralString { - value: ident.clone(), - }; - state.next(); - e - } - TokenKind::Variable(_) => { - let v = identifiers::var(state)?; - Expression::Variable(v) - } - _ => { - return expected_token_err!( - ["`-`", "an integer", "an identifier", "a variable"], - state - ); - } - }; - - utils::skip_right_bracket(state)?; - - Expression::ArrayIndex { - array: Box::new(var), - index: Some(Box::new(index)), - } - } - TokenKind::Arrow => { - state.next(); - Expression::PropertyFetch { - target: Box::new(var), - property: Box::new(Expression::Identifier( - identifiers::ident_maybe_reserved(state)?, - )), - } - } - TokenKind::NullsafeArrow => { - state.next(); - Expression::NullsafePropertyFetch { - target: Box::new(var), - property: Box::new(Expression::Identifier( - identifiers::ident_maybe_reserved(state)?, - )), - } - } - _ => var, - }; - Some(StringPart::Expr(Box::new(e))) - } - _ => { - return expected_token_err!(["`${`", "`{$", "`\"`", "a variable"], state); - } - }) - } + }) } #[inline(always)] @@ -1501,18 +1369,6 @@ fn prefix(op: &TokenKind, rhs: Expression) -> Expression { } } -#[inline(always)] -fn infix(lhs: Expression, op: TokenKind, rhs: Expression, by_ref: bool) -> Expression { - Expression::Infix { - lhs: Box::new(lhs), - op: match (&op, by_ref) { - (TokenKind::Equals, true) => ast::InfixOp::AssignRef, - _ => op.into(), - }, - rhs: Box::new(rhs), - } -} - fn is_infix(t: &TokenKind) -> bool { matches!( t, diff --git a/tests/fixtures/0067/ast.txt b/tests/fixtures/0067/ast.txt index 406f718..21be797 100644 --- a/tests/fixtures/0067/ast.txt +++ b/tests/fixtures/0067/ast.txt @@ -1,3 +1,8 @@ [ - Noop, + Noop( + ( + 1, + 7, + ), + ), ] diff --git a/tests/fixtures/0153/ast.txt b/tests/fixtures/0153/ast.txt index 4da6dac..c82927d 100644 --- a/tests/fixtures/0153/ast.txt +++ b/tests/fixtures/0153/ast.txt @@ -12,7 +12,12 @@ ), }, body: [ - Noop, + Noop( + ( + 3, + 18, + ), + ), Class { name: Identifier { start: ( diff --git a/tests/fixtures/0155/ast.txt b/tests/fixtures/0155/ast.txt index 1d1f854..da05ae8 100644 --- a/tests/fixtures/0155/ast.txt +++ b/tests/fixtures/0155/ast.txt @@ -12,7 +12,12 @@ ), }, body: [ - Noop, + Noop( + ( + 3, + 18, + ), + ), Class { name: Identifier { start: ( diff --git a/tests/fixtures/0158/ast.txt b/tests/fixtures/0158/ast.txt index 14b28c7..adcc6d1 100644 --- a/tests/fixtures/0158/ast.txt +++ b/tests/fixtures/0158/ast.txt @@ -12,7 +12,12 @@ ), }, body: [ - Noop, + Noop( + ( + 3, + 18, + ), + ), Function( Function { start: ( @@ -66,7 +71,12 @@ ), }, body: [ - Noop, + Noop( + ( + 7, + 18, + ), + ), Function( Function { start: ( diff --git a/tests/fixtures/0178/ast.txt b/tests/fixtures/0178/ast.txt index 9e60d88..3f1f95a 100644 --- a/tests/fixtures/0178/ast.txt +++ b/tests/fixtures/0178/ast.txt @@ -33,7 +33,12 @@ ), }, body: [ - Noop, + Noop( + ( + 13, + 23, + ), + ), Use { uses: [ Use { diff --git a/tests/fixtures/0220/ast.txt b/tests/fixtures/0220/ast.txt index 9f0f8e5..2b39c3a 100644 --- a/tests/fixtures/0220/ast.txt +++ b/tests/fixtures/0220/ast.txt @@ -19,7 +19,12 @@ modifiers: [], }, }, - Noop, + Noop( + ( + 3, + 13, + ), + ), Expression { expr: Infix { lhs: Variable( diff --git a/tests/fixtures/0274/code.php b/tests/fixtures/0274/code.php new file mode 100644 index 0000000..dda51d6 --- /dev/null +++ b/tests/fixtures/0274/code.php @@ -0,0 +1,9 @@ + Parse Error: Cannot find type `parent` in this scope on line 6 on column 20 diff --git a/tests/fixtures/0274/tokens.txt b/tests/fixtures/0274/tokens.txt new file mode 100644 index 0000000..e458925 --- /dev/null +++ b/tests/fixtures/0274/tokens.txt @@ -0,0 +1,201 @@ +[ + Token { + kind: OpenTag( + Full, + ), + span: ( + 1, + 1, + ), + }, + Token { + kind: Class, + span: ( + 3, + 1, + ), + }, + Token { + kind: Identifier( + "a", + ), + span: ( + 3, + 7, + ), + }, + Token { + kind: LeftBrace, + span: ( + 3, + 9, + ), + }, + Token { + kind: Public, + span: ( + 4, + 5, + ), + }, + Token { + kind: Function, + span: ( + 4, + 12, + ), + }, + Token { + kind: Identifier( + "foo", + ), + span: ( + 4, + 21, + ), + }, + Token { + kind: LeftParen, + span: ( + 4, + 24, + ), + }, + Token { + kind: RightParen, + span: ( + 4, + 25, + ), + }, + Token { + kind: LeftBrace, + span: ( + 4, + 27, + ), + }, + Token { + kind: Variable( + "q", + ), + span: ( + 5, + 9, + ), + }, + Token { + kind: Equals, + span: ( + 5, + 12, + ), + }, + Token { + kind: Function, + span: ( + 5, + 14, + ), + }, + Token { + kind: LeftParen, + span: ( + 5, + 22, + ), + }, + Token { + kind: RightParen, + span: ( + 5, + 23, + ), + }, + Token { + kind: LeftBrace, + span: ( + 5, + 25, + ), + }, + Token { + kind: Return, + span: ( + 6, + 13, + ), + }, + Token { + kind: Parent, + span: ( + 6, + 20, + ), + }, + Token { + kind: DoubleColon, + span: ( + 6, + 26, + ), + }, + Token { + kind: Identifier( + "bar", + ), + span: ( + 6, + 28, + ), + }, + Token { + kind: LeftParen, + span: ( + 6, + 31, + ), + }, + Token { + kind: RightParen, + span: ( + 6, + 32, + ), + }, + Token { + kind: SemiColon, + span: ( + 6, + 33, + ), + }, + Token { + kind: RightBrace, + span: ( + 7, + 9, + ), + }, + Token { + kind: SemiColon, + span: ( + 7, + 10, + ), + }, + Token { + kind: RightBrace, + span: ( + 8, + 5, + ), + }, + Token { + kind: RightBrace, + span: ( + 9, + 1, + ), + }, +] diff --git a/tests/test.rs b/tests/test.rs index 2a6887b..7793095 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -5,10 +5,8 @@ use std::path::PathBuf; use pretty_assertions::assert_str_eq; use php_parser_rs::lexer::Lexer; -use php_parser_rs::parser::Parser; static LEXER: Lexer = Lexer::new(); -static PARSER: Parser = Parser::new(); #[test] fn test_fixtures() { @@ -71,7 +69,7 @@ fn test_fixtures() { if ast_file.exists() { let expected_ast = std::fs::read_to_string(&ast_file).unwrap(); - let ast = PARSER.parse(tokens).unwrap(); + let ast = php_parser_rs::parse(tokens).unwrap(); assert_str_eq!( expected_ast.trim(), format!("{:#?}", ast), @@ -89,7 +87,7 @@ fn test_fixtures() { ); let expected_error = std::fs::read_to_string(&parse_err_file).unwrap(); - let error = PARSER.parse(tokens).err().unwrap(); + let error = php_parser_rs::parse(tokens).err().unwrap(); assert_str_eq!( expected_error.trim(), diff --git a/tests/third_party_tests.rs b/tests/third_party_tests.rs index 3784fca..8d4eb3d 100644 --- a/tests/third_party_tests.rs +++ b/tests/third_party_tests.rs @@ -4,7 +4,6 @@ use std::path::PathBuf; use std::process::Command; use php_parser_rs::lexer::Lexer; -use php_parser_rs::parser::Parser; #[test] fn third_party_1_php_standard_library() { @@ -148,8 +147,7 @@ fn test_file(name: &str, filename: PathBuf) { Lexer::new() .tokenize(&code) .map(|tokens| { - Parser::new() - .parse(tokens) + php_parser_rs::parse(tokens) .map(|_| { println!("✅ successfully parsed file: `\"{}\"`.", name); })