From 452592b8fe1c656a29a4e9b98153ecf94acc7ddd Mon Sep 17 00:00:00 2001 From: Saif Eddin Gmati <29315886+azjezz@users.noreply.github.com> Date: Tue, 6 Dec 2022 12:47:27 +0100 Subject: [PATCH] chore: small perf improvements for the lexer (#165) Signed-off-by: azjezz --- src/lexer/mod.rs | 53 +++++++++++++++++++++++---------------------- src/lexer/state.rs | 20 ++++++++--------- src/parser/state.rs | 2 +- 3 files changed, 38 insertions(+), 37 deletions(-) diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 0863369..52664fa 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -67,15 +67,15 @@ impl Lexer { } // The double quote state is entered when inside a double-quoted string that // contains variables. - StackFrame::DoubleQuote => tokens.extend(self.double_quote(&mut state)?), + StackFrame::DoubleQuote => self.double_quote(&mut state, &mut tokens)?, // The shell exec state is entered when inside of a execution string (`). - StackFrame::ShellExec => tokens.extend(self.shell_exec(&mut state)?), + StackFrame::ShellExec => self.shell_exec(&mut state, &mut tokens)?, // The doc string state is entered when tokenizing heredocs and nowdocs. StackFrame::DocString(kind, label) => { let kind = *kind; let label = label.clone(); - tokens.extend(self.docstring(&mut state, kind, label)?) + self.docstring(&mut state, &mut tokens, kind, label)?; } // LookingForProperty is entered inside double quotes, // backticks, or a heredoc, expecting a variable name. @@ -118,7 +118,7 @@ impl Lexer { let tag_span = state.span; state.skip(5); - state.set(StackFrame::Scripting)?; + state.replace(StackFrame::Scripting); let mut tokens = vec![]; @@ -152,7 +152,7 @@ impl Lexer { let kind = match state.peek_buf() { [b'`', ..] => { state.next(); - state.set(StackFrame::ShellExec)?; + state.replace(StackFrame::ShellExec); TokenKind::Backtick } [b'@', ..] => { @@ -188,7 +188,7 @@ impl Lexer { // This is a close tag, we can enter "Initial" mode again. state.skip(2); - state.set(StackFrame::Initial)?; + state.replace(StackFrame::Initial); TokenKind::CloseTag } @@ -322,7 +322,7 @@ impl Lexer { match state.peek_buf() { [b'(', b')', b';', ..] => { state.skip(3); - state.set(StackFrame::Halted)?; + state.replace(StackFrame::Halted); } _ => return Err(SyntaxError::InvalidHaltCompiler(state.span)), } @@ -335,7 +335,7 @@ impl Lexer { state.next(); let mut buffer = vec![b'/']; - while state.current.is_some() { + loop { match state.peek_buf() { [b'*', b'/', ..] => { state.skip(2); @@ -346,7 +346,9 @@ impl Lexer { state.next(); buffer.push(t); } - [] => unreachable!(), + _ => { + break; + } } } @@ -567,7 +569,7 @@ impl Lexer { } state.next(); - state.set(StackFrame::DocString(doc_string_kind, label.clone()))?; + state.replace(StackFrame::DocString(doc_string_kind, label.clone())); TokenKind::StartDocString(label, doc_string_kind) } @@ -650,7 +652,7 @@ impl Lexer { Ok(Token { kind, span }) } - fn double_quote(&self, state: &mut State) -> SyntaxResult> { + fn double_quote(&self, state: &mut State, tokens: &mut Vec) -> SyntaxResult<()> { let span = state.span; let mut buffer = Vec::new(); let kind = loop { @@ -668,7 +670,7 @@ impl Lexer { } [b'"', ..] => { state.next(); - state.set(StackFrame::Scripting)?; + state.replace(StackFrame::Scripting); break TokenKind::DoubleQuote; } &[b'\\', b @ (b'"' | b'\\' | b'$'), ..] => { @@ -781,7 +783,6 @@ impl Lexer { } }; - let mut tokens = Vec::new(); if !buffer.is_empty() { tokens.push(Token { kind: TokenKind::StringPart(buffer.into()), @@ -790,10 +791,10 @@ impl Lexer { } tokens.push(Token { kind, span }); - Ok(tokens) + Ok(()) } - fn shell_exec(&self, state: &mut State) -> SyntaxResult> { + fn shell_exec(&self, state: &mut State, tokens: &mut Vec) -> SyntaxResult<()> { let span = state.span; let mut buffer = Vec::new(); let kind = loop { @@ -811,7 +812,7 @@ impl Lexer { } [b'`', ..] => { state.next(); - state.set(StackFrame::Scripting)?; + state.replace(StackFrame::Scripting); break TokenKind::Backtick; } [b'$', ident_start!(), ..] => { @@ -837,7 +838,6 @@ impl Lexer { } }; - let mut tokens = Vec::new(); if !buffer.is_empty() { tokens.push(Token { kind: TokenKind::StringPart(buffer.into()), @@ -846,15 +846,17 @@ impl Lexer { } tokens.push(Token { kind, span }); - Ok(tokens) + + Ok(()) } fn docstring( &self, state: &mut State, + tokens: &mut Vec, kind: DocStringKind, label: ByteString, - ) -> SyntaxResult> { + ) -> SyntaxResult<()> { let span = state.span; let mut buffer = Vec::new(); let mut new_line = false; @@ -931,7 +933,7 @@ impl Lexer { // If we can see the label here, we can consume it and exit early. if state.try_read(&label) { state.skip(label.len()); - state.set(StackFrame::Scripting)?; + state.replace(StackFrame::Scripting); break TokenKind::EndDocString(label, None, 0); } @@ -1018,7 +1020,7 @@ impl Lexer { // and can consume the label, sending the indentation amount along to the parser // to normalize. state.skip(label.len()); - state.set(StackFrame::Scripting)?; + state.replace(StackFrame::Scripting); break TokenKind::EndDocString( label, indentation_type, @@ -1045,7 +1047,6 @@ impl Lexer { buffer.pop(); } - let mut tokens = Vec::new(); if !buffer.is_empty() { tokens.push(Token { kind: TokenKind::StringPart(buffer.into()), @@ -1054,7 +1055,7 @@ impl Lexer { } tokens.push(Token { kind, span }); - Ok(tokens) + Ok(()) } fn looking_for_varname(&self, state: &mut State) -> SyntaxResult> { @@ -1065,7 +1066,7 @@ impl Lexer { let ident = ident.to_vec(); let span = state.span; state.skip(ident.len()); - state.set(StackFrame::Scripting)?; + state.replace(StackFrame::Scripting); return Ok(Some(Token { kind: TokenKind::Identifier(ident.into()), span, @@ -1073,7 +1074,7 @@ impl Lexer { } } - state.set(StackFrame::Scripting)?; + state.replace(StackFrame::Scripting); Ok(None) } @@ -1270,7 +1271,7 @@ impl Lexer { Ok(if constant { TokenKind::LiteralString(buffer.into()) } else { - state.set(StackFrame::DoubleQuote)?; + state.replace(StackFrame::DoubleQuote); TokenKind::StringPart(buffer.into()) }) } diff --git a/src/lexer/state.rs b/src/lexer/state.rs index 6055267..092a809 100644 --- a/src/lexer/state.rs +++ b/src/lexer/state.rs @@ -38,8 +38,11 @@ impl State { let chars = input.as_ref().to_vec(); let current = chars.first().copied(); + let mut stack = VecDeque::with_capacity(32); + stack.push_back(StackFrame::Initial); + Self { - stack: VecDeque::from([StackFrame::Initial]), + stack, chars, current, cursor: 0, @@ -47,21 +50,18 @@ impl State { } } - pub fn set(&mut self, state: StackFrame) -> SyntaxResult<()> { - *self - .stack - .back_mut() - .ok_or(SyntaxError::UnpredictableState(self.span))? = state; - - Ok(()) - } - pub fn frame(&self) -> SyntaxResult<&StackFrame> { self.stack .back() .ok_or(SyntaxError::UnpredictableState(self.span)) } + pub fn replace(&mut self, state: StackFrame) { + let i = self.stack.len() - 1; + + self.stack[i] = state; + } + pub fn enter(&mut self, state: StackFrame) { self.stack.push_back(state); } diff --git a/src/parser/state.rs b/src/parser/state.rs index 8a3a2ef..702cea7 100644 --- a/src/parser/state.rs +++ b/src/parser/state.rs @@ -54,7 +54,7 @@ impl State { let mut iter = tokens.into_iter(); Self { - stack: VecDeque::with_capacity(3), + stack: VecDeque::with_capacity(32), current: iter.next().unwrap_or_default(), peek: iter.next().unwrap_or_default(), iter,