From 5b97f54ad48540c65ba819f172f645229cf0f63a Mon Sep 17 00:00:00 2001 From: Ryan Chandler Date: Tue, 19 Jul 2022 13:50:14 +0100 Subject: [PATCH] parser: refactor to use a borrow friendly structure --- trunk_lexer/src/token.rs | 9 +- trunk_parser/src/ast.rs | 15 +++ trunk_parser/src/parser.rs | 238 +++++++++++++++++++++---------------- 3 files changed, 159 insertions(+), 103 deletions(-) diff --git a/trunk_lexer/src/token.rs b/trunk_lexer/src/token.rs index 748a5e9..4fe2b3c 100644 --- a/trunk_lexer/src/token.rs +++ b/trunk_lexer/src/token.rs @@ -34,10 +34,17 @@ pub enum TokenKind { SemiColon, Comma, InlineHtml(String), + Eof, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Token { pub kind: TokenKind, pub span: Span, +} + +impl Default for Token { + fn default() -> Self { + Self { kind: TokenKind::Eof, span: (0, 0) } + } } \ No newline at end of file diff --git a/trunk_parser/src/ast.rs b/trunk_parser/src/ast.rs index efcabd8..e6fc555 100644 --- a/trunk_parser/src/ast.rs +++ b/trunk_parser/src/ast.rs @@ -14,6 +14,12 @@ impl From for Identifier { } } +impl From<&String> for Identifier { + fn from(name: &String) -> Self { + Self::from(name.to_string()) + } +} + #[derive(Debug, PartialEq, Clone)] pub struct Param { name: Expression, @@ -25,6 +31,12 @@ impl From for Param { } } +impl From<&String> for Param { + fn from(name: &String) -> Self { + Self::from(name.to_string()) + } +} + impl From<&str> for Param { fn from(name: &str) -> Self { Self::from(name.to_string()) @@ -67,6 +79,9 @@ pub enum Statement { Echo { values: Vec, }, + Expression { + expr: Expression, + } } #[derive(Debug, PartialEq, Clone)] diff --git a/trunk_parser/src/parser.rs b/trunk_parser/src/parser.rs index 49479e9..b23d481 100644 --- a/trunk_parser/src/parser.rs +++ b/trunk_parser/src/parser.rs @@ -1,83 +1,83 @@ use std::{vec::IntoIter}; -use std::iter::Peekable; use trunk_lexer::{Token, TokenKind}; use crate::{Program, Statement, Block, Expression, ast::MethodFlag}; macro_rules! expect { - ($actual:expr, $expected:pat, $out:expr, $message:literal) => { - match $actual { - Some(token) => match token.kind { - $expected => $out, - _ => return Err(ParseError::ExpectedToken($message.into())) + ($parser:expr, $expected:pat, $out:expr, $message:literal) => { + match $parser.current.kind.clone() { + $expected => { + $parser.next(); + $out }, - None => return Err(ParseError::ExpectedToken($message.into())) + _ => return Err(ParseError::ExpectedToken($message.into())), } }; - ($actual:expr, $expected:pat, $message:literal) => { - match $actual { - Some(token) => match token.kind { - $expected => (), - _ => return Err(ParseError::ExpectedToken($message.into())) - }, - None => return Err(ParseError::ExpectedToken($message.into())) + ($parser:expr, $expected:pat, $message:literal) => { + match $parser.current.kind.clone() { + $expected => { $parser.next(); }, + _ => return Err(ParseError::ExpectedToken($message.into())), } }; } -pub struct Parser; +pub struct Parser { + pub current: Token, + pub peek: Token, + iter: IntoIter, +} #[allow(dead_code)] impl Parser { - pub fn new() -> Self { - Self + pub fn new(tokens: Vec) -> Self { + let mut this = Self { + current: Token::default(), + peek: Token::default(), + iter: tokens.into_iter(), + }; + + this.next(); + this.next(); + this } - pub fn parse(&self, tokens: Vec) -> Result { - let mut program = Program::new(); - let mut iter = tokens.into_iter().peekable(); - - while let Some(t) = iter.next() { - if let TokenKind::OpenTag(_) = t.kind { - continue; - } - - program.push(self.statement(t, &mut iter)?); - } - - Ok(program) - } - - #[allow(dead_code)] - fn statement(&self, t: Token, tokens: &mut Peekable>) -> Result { - Ok(match t.kind { - TokenKind::InlineHtml(html) => Statement::InlineHtml(html), + fn statement(&mut self) -> Result { + Ok(match &self.current.kind { + TokenKind::InlineHtml(html) => { + let s = Statement::InlineHtml(html.to_string()); + self.next(); + s + }, TokenKind::If => { - expect!(tokens.next(), TokenKind::LeftParen, "expected ("); + self.next(); - let condition = self.expression(tokens, 0)?; + expect!(self, TokenKind::LeftParen, "expected ("); - expect!(tokens.next(), TokenKind::RightParen, "expected )"); + let condition = self.expression(0)?; + + expect!(self, TokenKind::RightParen, "expected )"); // TODO: Support one-liner if statements. - expect!(tokens.next(), TokenKind::LeftBrace, "expected {"); + expect!(self, TokenKind::LeftBrace, "expected {"); let mut then = Block::new(); - while let Some(t) = tokens.peek() && t.kind != TokenKind::RightBrace { - then.push(self.statement(tokens.next().unwrap(), tokens)?); + while ! self.is_eof() && self.current.kind != TokenKind::RightBrace { + then.push(self.statement()?); } // TODO: Support one-liner if statements. - expect!(tokens.next(), TokenKind::RightBrace, "expected }"); + expect!(self, TokenKind::RightBrace, "expected }"); Statement::If { condition, then } }, TokenKind::Class => { - let name = expect!(tokens.next(), TokenKind::Identifier(i), i, "expected class name"); - expect!(tokens.next(), TokenKind::LeftBrace, "expected left-brace"); + self.next(); + + let name = expect!(self, TokenKind::Identifier(i), i, "expected class name"); + expect!(self, TokenKind::LeftBrace, "expected left-brace"); let mut body = Vec::new(); - while let Some(t) = tokens.peek() && t.kind != TokenKind::RightBrace { - let statement = match self.statement(tokens.next().unwrap(), tokens)? { + while ! self.is_eof() && self.current.kind != TokenKind::RightBrace { + let statement = match self.statement()? { Statement::Function { name, params, body } => { Statement::Method { name, params, body, flags: vec![] } }, @@ -88,105 +88,115 @@ impl Parser { body.push(statement); } - expect!(tokens.next(), TokenKind::RightBrace, "expected right-brace"); + expect!(self, TokenKind::RightBrace, "expected right-brace"); Statement::Class { name: name.into(), body } }, TokenKind::Echo => { + self.next(); + let mut values = Vec::new(); - while let Some(t) = tokens.peek() && t.kind != TokenKind::SemiColon { - values.push(self.expression(tokens, 0)?); + while ! self.is_eof() && self.current.kind != TokenKind::SemiColon { + values.push(self.expression(0)?); // `echo` supports multiple expressions separated by a comma. // TODO: Disallow trailing commas when the next token is a semi-colon. - if let Some(t) = tokens.peek() && t.kind == TokenKind::Comma { - tokens.next(); + if ! self.is_eof() && self.current.kind == TokenKind::Comma { + self.next(); } } - expect!(tokens.next(), TokenKind::SemiColon, "expected semi-colon at the end of an echo statement"); + expect!(self, TokenKind::SemiColon, "expected semi-colon at the end of an echo statement"); Statement::Echo { values } }, TokenKind::Return => { - if let Some(Token { kind: TokenKind::SemiColon, .. }) = tokens.peek() { + self.next(); + + if let Token { kind: TokenKind::SemiColon, .. } = self.current { let ret = Statement::Return { value: None }; - expect!(tokens.next(), TokenKind::SemiColon, "expected semi-colon at the end of return statement."); + expect!(self, TokenKind::SemiColon, "expected semi-colon at the end of return statement."); ret } else { - let ret = Statement::Return { value: self.expression(tokens, 0).ok() }; - expect!(tokens.next(), TokenKind::SemiColon, "expected semi-colon at the end of return statement."); + let ret = Statement::Return { value: self.expression(0).ok() }; + expect!(self, TokenKind::SemiColon, "expected semi-colon at the end of return statement."); ret } }, TokenKind::Function => { - let name = expect!(tokens.next(), TokenKind::Identifier(i), i, "expected identifier"); + self.next(); - expect!(tokens.next(), TokenKind::LeftParen, "expected ("); + let name = expect!(self, TokenKind::Identifier(i), i, "expected identifier"); + + expect!(self, TokenKind::LeftParen, "expected ("); let mut params = Vec::new(); - while let Some(n) = tokens.peek() && n.kind != TokenKind::RightParen { + while ! self.is_eof() && self.current.kind != TokenKind::RightParen { // TODO: Support variable types and default values. - params.push(expect!(tokens.next(), TokenKind::Variable(v), v, "expected variable").into()); + params.push(expect!(self, TokenKind::Variable(v), v, "expected variable").into()); - if let Some(Token { kind: TokenKind::Comma, .. }) = tokens.peek() { - tokens.next(); + if let Token { kind: TokenKind::Comma, .. } = self.current { + self.next(); } } - expect!(tokens.next(), TokenKind::RightParen, "expected )"); + expect!(self, TokenKind::RightParen, "expected )"); // TODO: Support return types here. - expect!(tokens.next(), TokenKind::LeftBrace, "expected {"); + expect!(self, TokenKind::LeftBrace, "expected {"); let mut body = Block::new(); - while let Some(n) = tokens.peek() && n.kind != TokenKind::RightBrace { - body.push(self.statement(tokens.next().unwrap(), tokens)?); + while ! self.is_eof() && self.current.kind != TokenKind::RightBrace { + body.push(self.statement()?); } - expect!(tokens.next(), TokenKind::RightBrace, "expected }"); + expect!(self, TokenKind::RightBrace, "expected }"); Statement::Function { name: name.into(), params, body } }, - _ if is_method_visibility_modifier(&t.kind) => { - let mut flags = vec![visibility_token_to_flag(&t.kind)]; + _ if is_method_visibility_modifier(&self.current.kind) => { + let mut flags = vec![visibility_token_to_flag(&self.current.kind)]; + self.next(); - while let Some(t) = tokens.peek() && is_method_visibility_modifier(&t.kind) { - let next = tokens.next().unwrap(); - - flags.push(visibility_token_to_flag(&next.kind)); + while ! self.is_eof() && is_method_visibility_modifier(&self.current.kind) { + flags.push(visibility_token_to_flag(&self.current.kind)); + self.next(); } - match self.statement(tokens.next().unwrap(), tokens)? { + match self.statement()? { Statement::Function { name, params, body } => { Statement::Method { name, params, body, flags } }, _ => return Err(ParseError::InvalidClassStatement("Classes can only contain properties, constants and methods.".into())) } }, - _ => todo!("unhandled token: {:?}", t) + _ => { + let expr = self.expression(0)?; + + Statement::Expression { expr } + } }) } - fn expression(&self, tokens: &mut Peekable>, bp: u8) -> Result { - if tokens.peek().is_none() { + fn expression(&mut self, bp: u8) -> Result { + if self.is_eof() { return Err(ParseError::UnexpectedEndOfFile); } - let t = tokens.next().unwrap(); - - let mut lhs = match t.kind { - TokenKind::Variable(v) => Expression::Variable(v), - TokenKind::Int(i) => Expression::Int(i), - TokenKind::Identifier(i) => Expression::Identifier(i), - _ => todo!("lhs: {:?}", t.kind), + let mut lhs = match &self.current.kind { + TokenKind::Variable(v) => Expression::Variable(v.to_string()), + TokenKind::Int(i) => Expression::Int(*i), + TokenKind::Identifier(i) => Expression::Identifier(i.to_string()), + _ => todo!("expr lhs: {:?}", self.current.kind), }; + self.next(); + loop { - let kind = match tokens.peek() { - Some(Token { kind: TokenKind::SemiColon, .. }) | None => break, - Some(Token { kind, .. }) => kind.clone(), + let kind = match &self.current { + Token { kind: TokenKind::SemiColon | TokenKind::Eof, .. } => break, + Token { kind, .. } => kind.clone() }; if let Some(lbp) = postfix_binding_power(&kind) { @@ -194,10 +204,10 @@ impl Parser { break; } - tokens.next(); + self.next(); let op = kind.clone(); - lhs = self.postfix(tokens, lhs, &op)?; + lhs = self.postfix(lhs, &op)?; continue; } @@ -207,10 +217,10 @@ impl Parser { break; } - tokens.next(); + self.next(); let op = kind.clone(); - let rhs = self.expression(tokens, rbp)?; + let rhs = self.expression(rbp)?; lhs = infix(lhs, op, rhs); continue; @@ -222,25 +232,49 @@ impl Parser { Ok(lhs) } - fn postfix(&self, tokens: &mut Peekable>, lhs: Expression, op: &TokenKind) -> Result { + fn postfix(&mut self, lhs: Expression, op: &TokenKind) -> Result { Ok(match op { TokenKind::LeftParen => { let mut args = Vec::new(); - while let Some(t) = tokens.peek() && t.kind != TokenKind::RightParen { - args.push(self.expression(tokens, 0)?); + while ! self.is_eof() && self.current.kind != TokenKind::RightParen { + args.push(self.expression(0)?); - if let Some(Token { kind: TokenKind::Comma, .. }) = tokens.peek() { - tokens.next(); + if let Token { kind: TokenKind::Comma, .. } = self.current { + self.next(); } } - expect!(tokens.next(), TokenKind::RightParen, "expected )"); + expect!(self, TokenKind::RightParen, "expected )"); Expression::Call(Box::new(lhs), args) }, _ => todo!("postfix: {:?}", op), }) } + + fn is_eof(&self) -> bool { + self.current.kind == TokenKind::Eof + } + + pub fn next(&mut self) { + self.current = self.peek.clone(); + self.peek = self.iter.next().unwrap_or_default() + } + + pub fn parse(&mut self) -> Result { + let mut ast = Program::new(); + + while self.current.kind != TokenKind::Eof { + if let TokenKind::OpenTag(_) = self.current.kind { + self.next(); + continue; + } + + ast.push(self.statement()?); + } + + Ok(ast.to_vec()) + } } fn is_method_visibility_modifier(kind: &TokenKind) -> bool { @@ -253,7 +287,7 @@ fn visibility_token_to_flag(kind: &TokenKind) -> MethodFlag { TokenKind::Protected => MethodFlag::Protected, TokenKind::Private => MethodFlag::Private, TokenKind::Static => MethodFlag::Static, - _ => unreachable!() + _ => unreachable!("{:?}", kind) } } @@ -286,7 +320,7 @@ pub enum ParseError { #[cfg(test)] mod tests { use trunk_lexer::Lexer; - use crate::{Statement, Block, Param, Expression, ast::{InfixOp, MethodFlag}}; + use crate::{Statement, Param, Expression, ast::{InfixOp, MethodFlag}}; use super::Parser; macro_rules! function { @@ -467,8 +501,8 @@ mod tests { let mut lexer = Lexer::new(None); let tokens = lexer.tokenize(source).unwrap(); - let parser = Parser::new(); - let ast = parser.parse(tokens).unwrap(); + let mut parser = Parser::new(tokens); + let ast = parser.parse().unwrap(); assert_eq!(ast, expected); }