diff --git a/trunk_lexer/src/lexer.rs b/trunk_lexer/src/lexer.rs index fb95466..45485e5 100644 --- a/trunk_lexer/src/lexer.rs +++ b/trunk_lexer/src/lexer.rs @@ -1,4 +1,4 @@ -use crate::{Token, TokenKind, OpenTagKind}; +use crate::{OpenTagKind, Token, TokenKind}; #[derive(Debug)] pub enum LexerState { @@ -52,15 +52,15 @@ impl Lexer { // of some description. LexerState::Initial => { tokens.append(&mut self.initial()?); - }, + } // The scripting state is entered when an open tag is encountered in the source code. // This tells the lexer to start analysing characters at PHP tokens instead of inline HTML. LexerState::Scripting => { while let Some(c) = self.peek { - if ! c.is_whitespace() && ! ['\n', '\t', '\r'].contains(&c) { + if !c.is_whitespace() && !['\n', '\t', '\r'].contains(&c) { break; } - + if c == '\n' { self.line += 1; self.col = 0; @@ -77,7 +77,7 @@ impl Lexer { } tokens.push(self.scripting()?); - }, + } } } @@ -107,7 +107,7 @@ impl Lexer { self.enter_state(LexerState::Scripting); - let mut tokens = vec!(); + let mut tokens = vec![]; if !buffer.is_empty() { tokens.push(Token { @@ -115,10 +115,10 @@ impl Lexer { span: (self.line, self.col.saturating_sub(5)), }); } - + tokens.push(Token { kind: TokenKind::OpenTag(OpenTagKind::Full), - span: (self.line, self.col) + span: (self.line, self.col), }); return Ok(tokens); @@ -138,20 +138,18 @@ impl Lexer { buffer.push(char); } - }, + } _ => { self.next(); buffer.push(char); - }, + } } } - Ok(vec![ - Token { - kind: TokenKind::InlineHtml(buffer), - span: (self.line, self.col) - } - ]) + Ok(vec![Token { + kind: TokenKind::InlineHtml(buffer), + span: (self.line, self.col), + }]) } fn scripting(&mut self) -> Result { @@ -187,7 +185,7 @@ impl Lexer { } else { TokenKind::Bang } - }, + } '&' => { self.col += 1; @@ -200,7 +198,7 @@ impl Lexer { } else { TokenKind::Ampersand } - }, + } '?' => { // This is a close tag, we can enter "Initial" mode again. if let Some('>') = self.peek { @@ -236,7 +234,7 @@ impl Lexer { } else { TokenKind::Question } - }, + } '=' => { if let Some('=') = self.peek { self.next(); @@ -261,7 +259,7 @@ impl Lexer { TokenKind::Equals } - }, + } // Single quoted string. '\'' => { self.col += 1; @@ -270,7 +268,7 @@ impl Lexer { let mut escaping = false; while let Some(n) = self.peek { - if ! escaping && n == '\'' { + if !escaping && n == '\'' { self.next(); break; @@ -303,7 +301,7 @@ impl Lexer { } TokenKind::ConstantString(buffer) - }, + } '"' => { self.col += 1; @@ -311,7 +309,7 @@ impl Lexer { let mut escaping = false; while let Some(n) = self.peek { - if ! escaping && n == '"' { + if !escaping && n == '"' { self.next(); break; @@ -344,7 +342,7 @@ impl Lexer { } TokenKind::ConstantString(buffer) - }, + } '$' => { let mut buffer = String::new(); @@ -356,8 +354,8 @@ impl Lexer { self.col += 1; buffer.push(n); self.next(); - }, - 'a'..='z' | 'A'..='Z' | '\u{80}'..='\u{ff}' | '_' => { + } + 'a'..='z' | 'A'..='Z' | '\u{80}'..='\u{ff}' | '_' => { self.col += 1; buffer.push(n); @@ -368,7 +366,7 @@ impl Lexer { } TokenKind::Variable(buffer) - }, + } '.' => { self.col += 1; @@ -382,19 +380,19 @@ impl Lexer { underscore = false; buffer.push(n); self.next(); - + self.col += 1; - }, + } '_' => { if underscore { return Err(LexerError::UnexpectedCharacter(n)); } - + underscore = true; self.next(); - + self.col += 1; - }, + } _ => break, } } @@ -421,7 +419,7 @@ impl Lexer { } else { TokenKind::Dot } - }, + } '0'..='9' => { let mut buffer = String::from(char); let mut underscore = false; @@ -437,7 +435,7 @@ impl Lexer { self.next(); self.col += 1; - }, + } '.' => { if is_float { return Err(LexerError::UnexpectedCharacter(n)); @@ -447,7 +445,7 @@ impl Lexer { buffer.push(n); self.next(); self.col += 1; - }, + } '_' => { if underscore { return Err(LexerError::UnexpectedCharacter(n)); @@ -457,7 +455,7 @@ impl Lexer { self.next(); self.col += 1; - }, + } _ => break, } } @@ -467,7 +465,7 @@ impl Lexer { } else { TokenKind::Int(buffer.parse().unwrap()) } - }, + } '\\' => { self.col += 1; @@ -481,7 +479,7 @@ impl Lexer { } else { TokenKind::NamespaceSeparator } - }, + } _ if char.is_alphabetic() || char == '_' => { self.col += 1; @@ -498,7 +496,7 @@ impl Lexer { continue; } - if next == '\\' && ! last_was_slash { + if next == '\\' && !last_was_slash { qualified = true; last_was_slash = true; buffer.push(next); @@ -515,7 +513,7 @@ impl Lexer { } else { identifier_to_keyword(&buffer).unwrap_or(TokenKind::Identifier(buffer)) } - }, + } '/' | '#' => { self.col += 1; @@ -545,7 +543,7 @@ impl Lexer { let t = self.current.unwrap(); match t { - '*' => { + '*' => { if let Some('/') = self.peek { self.col += 2; buffer.push_str("*/"); @@ -589,7 +587,7 @@ impl Lexer { TokenKind::Comment(buffer) } - }, + } '*' => { self.col += 1; @@ -604,10 +602,10 @@ impl Lexer { } else { TokenKind::Asterisk } - }, + } '|' => { self.col += 1; - + if let Some('|') = self.peek { self.col += 1; @@ -617,23 +615,23 @@ impl Lexer { } else { TokenKind::Pipe } - }, + } '{' => { self.col += 1; TokenKind::LeftBrace - }, + } '}' => { self.col += 1; TokenKind::RightBrace - }, + } '(' => { self.col += 1; if self.try_read("string)") { self.col += 7; self.skip(8); - - TokenKind::StringCast + + TokenKind::StringCast } else if self.try_read("object)") { self.col += 7; self.skip(8); @@ -654,15 +652,15 @@ impl Lexer { } else { TokenKind::LeftParen } - }, + } ')' => { self.col += 1; TokenKind::RightParen - }, + } ';' => { self.col += 1; TokenKind::SemiColon - }, + } '+' => { self.col += 1; @@ -670,7 +668,7 @@ impl Lexer { self.col += 1; self.next(); - + TokenKind::PlusEquals } else if let Some('+') = self.peek { self.col += 1; @@ -681,10 +679,10 @@ impl Lexer { } else { TokenKind::Plus } - }, + } '-' => { self.col += 1; - + if let Some('>') = self.peek { self.col += 1; @@ -698,7 +696,7 @@ impl Lexer { } else { TokenKind::Minus } - }, + } '<' => { self.col += 1; @@ -718,11 +716,11 @@ impl Lexer { todo!("heredocs & nowdocs"); } else { TokenKind::LeftShift - } + } } else { TokenKind::LessThan } - }, + } '>' => { self.col += 1; @@ -735,37 +733,42 @@ impl Lexer { } else { TokenKind::GreaterThan } - }, + } ',' => { self.col += 1; TokenKind::Comma - }, + } '[' => { self.col += 1; TokenKind::LeftBracket - }, + } ']' => { self.col += 1; TokenKind::RightBracket - }, + } ':' => { self.col += 1; if let Some(':') = self.peek { self.col += 1; - + self.next(); TokenKind::DoubleColon } else { TokenKind::Colon } - }, - _ => unimplemented!(" char: {}, line: {}, col: {}", char, self.line, self.col), + } + _ => unimplemented!( + " char: {}, line: {}, col: {}", + char, + self.line, + self.col + ), }; Ok(Token { kind, - span: (self.line, self.col) + span: (self.line, self.col), }) } @@ -872,8 +875,8 @@ pub enum LexerError { #[cfg(test)] mod tests { - use crate::{TokenKind, OpenTagKind, Token}; use super::Lexer; + use crate::{OpenTagKind, Token, TokenKind}; macro_rules! open { () => { @@ -881,7 +884,7 @@ mod tests { }; ($kind:expr) => { TokenKind::OpenTag($kind) - } + }; } macro_rules! var { ($v:expr) => { @@ -896,18 +899,15 @@ mod tests { #[test] fn basic_tokens() { - assert_tokens("", &[ - open!(), - TokenKind::CloseTag, - ]); + assert_tokens("", &[open!(), TokenKind::CloseTag]); } #[test] fn inline_html() { - assert_tokens("Hello, world!\n", &[ - open!(), - TokenKind::Arrow, - ]); + assert_tokens("", &[open!(), TokenKind::Arrow]); } #[test] fn math() { - assert_tokens(" Vec<(usize, usize)> { let tokens = get_tokens(source); - let mut spans = vec!(); - + let mut spans = vec![]; + for token in tokens { spans.push(token.span); } @@ -1136,4 +1145,4 @@ function hello_world() { let mut lexer = Lexer::new(None); lexer.tokenize(source).unwrap() } -} \ No newline at end of file +}