mirror of
https://github.com/danog/parser.git
synced 2024-11-26 20:04:57 +01:00
fix: better heredoc/nowdoc implementation (#183)
This commit is contained in:
parent
bd2023c3c4
commit
825577d18d
460
src/lexer/mod.rs
460
src/lexer/mod.rs
@ -68,7 +68,7 @@ impl Lexer {
|
||||
// The shell exec state is entered when inside of a execution string (`).
|
||||
StackFrame::ShellExec => self.shell_exec(&mut state, &mut tokens)?,
|
||||
// The doc string state is entered when tokenizing heredocs and nowdocs.
|
||||
StackFrame::DocString(kind, label) => {
|
||||
StackFrame::DocString(kind, label, ..) => {
|
||||
let kind = *kind;
|
||||
let label = label.clone();
|
||||
|
||||
@ -471,7 +471,12 @@ impl Lexer {
|
||||
}
|
||||
|
||||
state.source.next();
|
||||
state.replace(StackFrame::DocString(doc_string_kind, label.clone()));
|
||||
state.replace(StackFrame::DocString(
|
||||
doc_string_kind,
|
||||
label.clone(),
|
||||
DocStringIndentationKind::None,
|
||||
0,
|
||||
));
|
||||
|
||||
TokenKind::StartDocString(label, doc_string_kind)
|
||||
}
|
||||
@ -877,52 +882,128 @@ impl Lexer {
|
||||
kind: DocStringKind,
|
||||
label: ByteString,
|
||||
) -> SyntaxResult<()> {
|
||||
let span = state.source.span();
|
||||
let mut buffer = Vec::new();
|
||||
let mut new_line = false;
|
||||
|
||||
let mut indentation_amount: usize = 0;
|
||||
|
||||
// 1. Check if there's any whitespace here. It can either be a space or tab character.
|
||||
let indentation_type = match state.source.read(1) {
|
||||
[b' '] => Some(DocStringIndentationKind::Space),
|
||||
[b'\t'] => Some(DocStringIndentationKind::Tab),
|
||||
_ => None,
|
||||
match kind {
|
||||
DocStringKind::Heredoc => self.heredoc(state, tokens, label)?,
|
||||
DocStringKind::Nowdoc => self.nowdoc(state, tokens, label)?,
|
||||
};
|
||||
|
||||
// 2. Count how much whitespace there is on this line.
|
||||
if let Some(indentation_type) = indentation_type {
|
||||
loop {
|
||||
match (indentation_type, state.source.read(1)) {
|
||||
(DocStringIndentationKind::Space, [b' ']) => {
|
||||
indentation_amount += 1;
|
||||
state.source.next();
|
||||
buffer.push(b' ');
|
||||
}
|
||||
(DocStringIndentationKind::Tab, [b'\t']) => {
|
||||
indentation_amount += 1;
|
||||
state.source.next();
|
||||
buffer.push(b'\t');
|
||||
}
|
||||
_ => break,
|
||||
};
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn heredoc(
|
||||
&self,
|
||||
state: &mut State,
|
||||
tokens: &mut Vec<Token>,
|
||||
label: ByteString,
|
||||
) -> SyntaxResult<()> {
|
||||
let span = state.source.span();
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
|
||||
let kind = loop {
|
||||
match state.source.read(2) {
|
||||
[b'$', b'{'] if kind == DocStringKind::Heredoc => {
|
||||
match state.source.read(3) {
|
||||
[b'$', b'{', ..] => {
|
||||
state.source.skip(2);
|
||||
state.enter(StackFrame::LookingForVarname);
|
||||
break TokenKind::DollarLeftBrace;
|
||||
}
|
||||
[b'{', b'$'] if kind == DocStringKind::Heredoc => {
|
||||
[b'{', b'$', ..] => {
|
||||
// Intentionally only consume the left brace.
|
||||
state.source.next();
|
||||
state.enter(StackFrame::Scripting);
|
||||
break TokenKind::LeftBrace;
|
||||
}
|
||||
[b'$', ident_start!()] if kind == DocStringKind::Heredoc => {
|
||||
&[b'\\', b @ (b'"' | b'\\' | b'$'), ..] => {
|
||||
state.source.skip(2);
|
||||
buffer.push(b);
|
||||
}
|
||||
&[b'\\', b'n', ..] => {
|
||||
state.source.skip(2);
|
||||
buffer.push(b'\n');
|
||||
}
|
||||
&[b'\\', b'r', ..] => {
|
||||
state.source.skip(2);
|
||||
buffer.push(b'\r');
|
||||
}
|
||||
&[b'\\', b't', ..] => {
|
||||
state.source.skip(2);
|
||||
buffer.push(b'\t');
|
||||
}
|
||||
&[b'\\', b'v', ..] => {
|
||||
state.source.skip(2);
|
||||
buffer.push(b'\x0b');
|
||||
}
|
||||
&[b'\\', b'e', ..] => {
|
||||
state.source.skip(2);
|
||||
buffer.push(b'\x1b');
|
||||
}
|
||||
&[b'\\', b'f', ..] => {
|
||||
state.source.skip(2);
|
||||
buffer.push(b'\x0c');
|
||||
}
|
||||
&[b'\\', b'x', b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')] => {
|
||||
state.source.skip(3);
|
||||
|
||||
let mut hex = String::from(b as char);
|
||||
if let Some(b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) =
|
||||
state.source.current()
|
||||
{
|
||||
state.source.next();
|
||||
hex.push(*b as char);
|
||||
}
|
||||
|
||||
let b = u8::from_str_radix(&hex, 16).unwrap();
|
||||
buffer.push(b);
|
||||
}
|
||||
&[b'\\', b'u', b'{'] => {
|
||||
state.source.skip(3);
|
||||
|
||||
let mut code_point = String::new();
|
||||
while let Some(b @ (b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) =
|
||||
state.source.current()
|
||||
{
|
||||
state.source.next();
|
||||
code_point.push(*b as char);
|
||||
}
|
||||
|
||||
if code_point.is_empty() || state.source.current() != Some(&b'}') {
|
||||
return Err(SyntaxError::InvalidUnicodeEscape(state.source.span()));
|
||||
}
|
||||
state.source.next();
|
||||
|
||||
let c = if let Ok(c) = u32::from_str_radix(&code_point, 16) {
|
||||
c
|
||||
} else {
|
||||
return Err(SyntaxError::InvalidUnicodeEscape(state.source.span()));
|
||||
};
|
||||
|
||||
if let Some(c) = char::from_u32(c) {
|
||||
let mut tmp = [0; 4];
|
||||
let bytes = c.encode_utf8(&mut tmp);
|
||||
buffer.extend(bytes.as_bytes());
|
||||
} else {
|
||||
return Err(SyntaxError::InvalidUnicodeEscape(state.source.span()));
|
||||
}
|
||||
}
|
||||
&[b'\\', b @ b'0'..=b'7', ..] => {
|
||||
state.source.skip(2);
|
||||
|
||||
let mut octal = String::from(b as char);
|
||||
if let Some(b @ b'0'..=b'7') = state.source.current() {
|
||||
state.source.next();
|
||||
octal.push(*b as char);
|
||||
}
|
||||
if let Some(b @ b'0'..=b'7') = state.source.current() {
|
||||
state.source.next();
|
||||
octal.push(*b as char);
|
||||
}
|
||||
|
||||
if let Ok(b) = u8::from_str_radix(&octal, 8) {
|
||||
buffer.push(b);
|
||||
} else {
|
||||
return Err(SyntaxError::InvalidOctalEscape(state.source.span()));
|
||||
}
|
||||
}
|
||||
[b'$', ident_start!(), ..] => {
|
||||
state.source.next();
|
||||
let ident = self.consume_identifier(state);
|
||||
|
||||
@ -936,139 +1017,88 @@ impl Lexer {
|
||||
|
||||
break TokenKind::Variable(ident.into());
|
||||
}
|
||||
&[b'\n', ..] => {
|
||||
new_line = true;
|
||||
state.source.next();
|
||||
// If we find a new-line, we can start to check if we can see the EndHeredoc token.
|
||||
[b'\n', ..] => {
|
||||
buffer.push(b'\n');
|
||||
state.source.next();
|
||||
|
||||
// Check if we can see the closing label right here.
|
||||
if state.source.at(&label, label.len()) {
|
||||
state.source.skip(label.len());
|
||||
state.replace(StackFrame::Scripting);
|
||||
break TokenKind::EndDocString(label, DocStringIndentationKind::None, 0);
|
||||
}
|
||||
|
||||
// Check if there's any whitespace first.
|
||||
let (whitespace_kind, whitespace_amount) = match state.source.read(1) {
|
||||
[b' '] => {
|
||||
let mut amount = 0;
|
||||
while state.source.read(1) == [b' '] {
|
||||
amount += 1;
|
||||
state.source.next();
|
||||
}
|
||||
(DocStringIndentationKind::Space, amount)
|
||||
}
|
||||
[b'\t'] => {
|
||||
let mut amount = 0;
|
||||
while state.source.read(1) == [b'\t'] {
|
||||
amount += 1;
|
||||
state.source.next();
|
||||
}
|
||||
(DocStringIndentationKind::Tab, amount)
|
||||
}
|
||||
_ => (DocStringIndentationKind::None, 0),
|
||||
};
|
||||
|
||||
// We've figured out what type of whitespace was being used
|
||||
// at the start of the line.
|
||||
// We should now check for any extra whitespace, of any kind.
|
||||
let mut extra_whitespace_buffer = Vec::new();
|
||||
while let [b @ b' ' | b @ b'\t'] = state.source.read(1) {
|
||||
extra_whitespace_buffer.push(b);
|
||||
state.source.next();
|
||||
}
|
||||
|
||||
// We've consumed all leading whitespace on this line now,
|
||||
// so let's try to read the label again.
|
||||
if state.source.at(&label, label.len()) {
|
||||
// We've found the label, finally! We need to do 1 last
|
||||
// check to make sure there wasn't a mixture of indentation types.
|
||||
if whitespace_kind != DocStringIndentationKind::None
|
||||
&& !extra_whitespace_buffer.is_empty()
|
||||
{
|
||||
return Err(SyntaxError::InvalidDocIndentation(state.source.span()));
|
||||
}
|
||||
|
||||
// If we get here, only 1 type of indentation was found. We can move
|
||||
// the process along by reading over the label and breaking out
|
||||
// with the EndHeredoc token, storing the kind and amount of whitespace.
|
||||
state.source.skip(label.len());
|
||||
state.replace(StackFrame::Scripting);
|
||||
break TokenKind::EndDocString(label, whitespace_kind, whitespace_amount);
|
||||
} else {
|
||||
// We didn't find the label. The buffer still needs to know about
|
||||
// the whitespace, so let's extend the buffer with the whitespace
|
||||
// and let the loop run again to handle the rest of the line.
|
||||
if whitespace_kind != DocStringIndentationKind::None {
|
||||
let whitespace_char: u8 = whitespace_kind.into();
|
||||
for _ in 0..whitespace_amount {
|
||||
buffer.push(whitespace_char);
|
||||
}
|
||||
}
|
||||
|
||||
buffer.extend(extra_whitespace_buffer);
|
||||
}
|
||||
}
|
||||
&[b, ..] => {
|
||||
// If we're not on a new line, just add to the buffer as usual.
|
||||
if !new_line {
|
||||
new_line = false;
|
||||
state.source.next();
|
||||
buffer.push(b);
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we can see the label here, we can consume it and exit early.
|
||||
if state.source.at(&label, label.length) {
|
||||
state.source.skip(label.length);
|
||||
state.replace(StackFrame::Scripting);
|
||||
break TokenKind::EndDocString(label, None, 0);
|
||||
}
|
||||
|
||||
// We know the label isn't at the start of the line, so we can
|
||||
// check if the line starts with any whitespace.
|
||||
let line_starts_with_whitespace =
|
||||
matches!(state.source.read(1), [b' '] | [b'\t']);
|
||||
let mut current_indentation_amount = 0;
|
||||
|
||||
// If the line does start with whitespace, let's figure out what the current
|
||||
// indentation type is and how much whitespace there is.
|
||||
if line_starts_with_whitespace {
|
||||
let current_indentation_type = match state.source.read(1) {
|
||||
[b' '] => DocStringIndentationKind::Space,
|
||||
[b'\t'] => DocStringIndentationKind::Tab,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
// If there was indentation on a previous line, we need to check
|
||||
// if the current indentation type is the same or different.
|
||||
// If it's different, we need to produce an error.
|
||||
if let Some(indentation_type) = indentation_type {
|
||||
if indentation_type != current_indentation_type {
|
||||
return Err(SyntaxError::InvalidDocIndentation(
|
||||
state.source.span(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
let mut leading_whitespace_buffer = Vec::new();
|
||||
|
||||
// If the type of whitespace is the same, we want to know
|
||||
// how much whitespace is on this line. We only care about
|
||||
// the smallest amount of whitespace in this case.
|
||||
loop {
|
||||
match (current_indentation_type, state.source.read(1)) {
|
||||
(DocStringIndentationKind::Space, [b' ']) => {
|
||||
leading_whitespace_buffer.push(b' ');
|
||||
current_indentation_amount += 1;
|
||||
state.source.next();
|
||||
}
|
||||
(DocStringIndentationKind::Tab, [b'\t']) => {
|
||||
leading_whitespace_buffer.push(b'\t');
|
||||
current_indentation_amount += 1;
|
||||
state.source.next();
|
||||
}
|
||||
_ => break,
|
||||
};
|
||||
}
|
||||
|
||||
// If we can read the label at this point, we then need to check if the amount
|
||||
// of indentation is the same or less than the smallest amount encountered thus far.
|
||||
if state.source.at(&label, label.length)
|
||||
&& current_indentation_amount > indentation_amount
|
||||
{
|
||||
return Err(SyntaxError::InvalidDocBodyIndentationLevel(
|
||||
current_indentation_amount,
|
||||
state.source.span(),
|
||||
));
|
||||
}
|
||||
|
||||
// If we've found less whitespace here, we should update the minimum.
|
||||
if current_indentation_amount < indentation_amount {
|
||||
indentation_amount = current_indentation_amount;
|
||||
}
|
||||
|
||||
let mut whitespace_buffer = Vec::new();
|
||||
|
||||
// We should now try to consume anymore whitespace, since the doc body
|
||||
// can include spaces or tabs. We should also push it to the buffer,
|
||||
// in case we don't encounter the label. In theory, the only whitespace
|
||||
// we'll encounter here is the character not found by the checks above.
|
||||
while let [b @ b' ' | b @ b'\t'] = state.source.read(1) {
|
||||
whitespace_buffer.push(*b);
|
||||
state.source.next();
|
||||
}
|
||||
|
||||
// Check if we can read the label again now.
|
||||
if state.source.at(&label, label.length) {
|
||||
// If there was extra whitespace after indentation, we need
|
||||
// to error out about mixed indentation types.
|
||||
if !whitespace_buffer.is_empty() {
|
||||
return Err(SyntaxError::InvalidDocIndentation(
|
||||
state.source.span(),
|
||||
));
|
||||
}
|
||||
|
||||
// If no extra whitespace was found, we've reached the end of the heredoc
|
||||
// and can consume the label, sending the indentation amount along to the parser
|
||||
// to normalize.
|
||||
state.source.skip(label.length);
|
||||
state.replace(StackFrame::Scripting);
|
||||
|
||||
break TokenKind::EndDocString(
|
||||
label,
|
||||
indentation_type,
|
||||
current_indentation_amount,
|
||||
);
|
||||
} else {
|
||||
buffer.extend(leading_whitespace_buffer);
|
||||
buffer.extend(whitespace_buffer);
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
new_line = false;
|
||||
state.source.next();
|
||||
buffer.push(b);
|
||||
}
|
||||
state.source.next();
|
||||
buffer.push(b);
|
||||
}
|
||||
[] => return Err(SyntaxError::UnexpectedEndOfFile(state.source.span())),
|
||||
}
|
||||
};
|
||||
|
||||
// Trailing line breaks in the last segment of a heredoc
|
||||
// shouldn't end up in the final string.
|
||||
// Any trailing line breaks should be removed from the final heredoc.
|
||||
if buffer.last() == Some(&b'\n') {
|
||||
buffer.pop();
|
||||
}
|
||||
@ -1081,6 +1111,116 @@ impl Lexer {
|
||||
}
|
||||
|
||||
tokens.push(Token { kind, span });
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn nowdoc(
|
||||
&self,
|
||||
state: &mut State,
|
||||
tokens: &mut Vec<Token>,
|
||||
label: ByteString,
|
||||
) -> SyntaxResult<()> {
|
||||
let span = state.source.span();
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
|
||||
let kind = loop {
|
||||
match state.source.read(3) {
|
||||
// If we find a new-line, we can start to check if we can see the EndHeredoc token.
|
||||
[b'\n', ..] => {
|
||||
buffer.push(b'\n');
|
||||
state.source.next();
|
||||
|
||||
// Check if we can see the closing label right here.
|
||||
if state.source.at(&label, label.len()) {
|
||||
state.source.skip(label.len());
|
||||
state.replace(StackFrame::Scripting);
|
||||
break TokenKind::EndDocString(label, DocStringIndentationKind::None, 0);
|
||||
}
|
||||
|
||||
// Check if there's any whitespace first.
|
||||
let (whitespace_kind, whitespace_amount) = match state.source.read(1) {
|
||||
[b' '] => {
|
||||
let mut amount = 0;
|
||||
while state.source.read(1) == [b' '] {
|
||||
amount += 1;
|
||||
state.source.next();
|
||||
}
|
||||
(DocStringIndentationKind::Space, amount)
|
||||
}
|
||||
[b'\t'] => {
|
||||
let mut amount = 0;
|
||||
while state.source.read(1) == [b'\t'] {
|
||||
amount += 1;
|
||||
state.source.next();
|
||||
}
|
||||
(DocStringIndentationKind::Tab, amount)
|
||||
}
|
||||
_ => (DocStringIndentationKind::None, 0),
|
||||
};
|
||||
|
||||
// We've figured out what type of whitespace was being used
|
||||
// at the start of the line.
|
||||
// We should now check for any extra whitespace, of any kind.
|
||||
let mut extra_whitespace_buffer = Vec::new();
|
||||
while let [b @ b' ' | b @ b'\t'] = state.source.read(1) {
|
||||
extra_whitespace_buffer.push(b);
|
||||
state.source.next();
|
||||
}
|
||||
|
||||
// We've consumed all leading whitespace on this line now,
|
||||
// so let's try to read the label again.
|
||||
if state.source.at(&label, label.len()) {
|
||||
// We've found the label, finally! We need to do 1 last
|
||||
// check to make sure there wasn't a mixture of indentation types.
|
||||
if whitespace_kind != DocStringIndentationKind::None
|
||||
&& !extra_whitespace_buffer.is_empty()
|
||||
{
|
||||
return Err(SyntaxError::InvalidDocIndentation(state.source.span()));
|
||||
}
|
||||
|
||||
// If we get here, only 1 type of indentation was found. We can move
|
||||
// the process along by reading over the label and breaking out
|
||||
// with the EndHeredoc token, storing the kind and amount of whitespace.
|
||||
state.source.skip(label.len());
|
||||
state.replace(StackFrame::Scripting);
|
||||
break TokenKind::EndDocString(label, whitespace_kind, whitespace_amount);
|
||||
} else {
|
||||
// We didn't find the label. The buffer still needs to know about
|
||||
// the whitespace, so let's extend the buffer with the whitespace
|
||||
// and let the loop run again to handle the rest of the line.
|
||||
if whitespace_kind != DocStringIndentationKind::None {
|
||||
let whitespace_char: u8 = whitespace_kind.into();
|
||||
for _ in 0..whitespace_amount {
|
||||
buffer.push(whitespace_char);
|
||||
}
|
||||
}
|
||||
|
||||
buffer.extend(extra_whitespace_buffer);
|
||||
}
|
||||
}
|
||||
&[b, ..] => {
|
||||
state.source.next();
|
||||
buffer.push(b);
|
||||
}
|
||||
[] => return Err(SyntaxError::UnexpectedEndOfFile(state.source.span())),
|
||||
}
|
||||
};
|
||||
|
||||
// Any trailing line breaks should be removed from the final heredoc.
|
||||
if buffer.last() == Some(&b'\n') {
|
||||
buffer.pop();
|
||||
}
|
||||
|
||||
if !buffer.is_empty() {
|
||||
tokens.push(Token {
|
||||
kind: TokenKind::StringPart(buffer.into()),
|
||||
span,
|
||||
})
|
||||
}
|
||||
|
||||
tokens.push(Token { kind, span });
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,9 @@ use crate::lexer::error::SyntaxError;
|
||||
use crate::lexer::error::SyntaxResult;
|
||||
use crate::lexer::source::Source;
|
||||
|
||||
use super::token::DocStringIndentationAmount;
|
||||
use super::token::DocStringIndentationKind;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Clone, Copy)]
|
||||
pub enum DocStringKind {
|
||||
Heredoc,
|
||||
@ -18,7 +21,12 @@ pub enum StackFrame {
|
||||
Halted,
|
||||
DoubleQuote,
|
||||
ShellExec,
|
||||
DocString(DocStringKind, ByteString),
|
||||
DocString(
|
||||
DocStringKind,
|
||||
ByteString,
|
||||
DocStringIndentationKind,
|
||||
DocStringIndentationAmount,
|
||||
),
|
||||
LookingForVarname,
|
||||
LookingForProperty,
|
||||
VarOffset,
|
||||
|
@ -11,10 +11,14 @@ pub enum OpenTagKind {
|
||||
Full,
|
||||
}
|
||||
|
||||
pub type DocStringIndentationAmount = usize;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum DocStringIndentationKind {
|
||||
Space,
|
||||
Tab,
|
||||
None,
|
||||
Both,
|
||||
}
|
||||
|
||||
impl From<u8> for DocStringIndentationKind {
|
||||
@ -32,6 +36,7 @@ impl From<DocStringIndentationKind> for u8 {
|
||||
match kind {
|
||||
DocStringIndentationKind::Space => b' ',
|
||||
DocStringIndentationKind::Tab => b'\t',
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -43,7 +48,7 @@ pub enum TokenKind {
|
||||
Parent,
|
||||
Backtick,
|
||||
StartDocString(ByteString, DocStringKind),
|
||||
EndDocString(ByteString, Option<DocStringIndentationKind>, usize),
|
||||
EndDocString(ByteString, DocStringIndentationKind, usize),
|
||||
From,
|
||||
Print,
|
||||
Dollar,
|
||||
|
@ -1,5 +1,7 @@
|
||||
use crate::expect_token;
|
||||
use crate::expected_token_err;
|
||||
use crate::lexer::error::SyntaxError;
|
||||
use crate::lexer::token::DocStringIndentationKind;
|
||||
use crate::lexer::token::TokenKind;
|
||||
use crate::lexer::DocStringKind;
|
||||
use crate::parser::ast;
|
||||
@ -971,6 +973,7 @@ fn shell_exec(state: &mut State) -> ParseResult<Expression> {
|
||||
|
||||
#[inline(always)]
|
||||
fn doc_string(state: &mut State, kind: DocStringKind) -> ParseResult<Expression> {
|
||||
let span = state.current.span;
|
||||
state.next();
|
||||
|
||||
Ok(match kind {
|
||||
@ -992,19 +995,65 @@ fn doc_string(state: &mut State, kind: DocStringKind) -> ParseResult<Expression>
|
||||
|
||||
state.next();
|
||||
|
||||
// FIXME: Can we move this logic above into the loop, by peeking ahead in
|
||||
// the token stream for the EndHeredoc? Might be more performant.
|
||||
if let Some(indentation_type) = indentation_type {
|
||||
let search_char: u8 = indentation_type.into();
|
||||
let mut new_line = true;
|
||||
if indentation_type != DocStringIndentationKind::None {
|
||||
let indentation_char: u8 = indentation_type.into();
|
||||
|
||||
for part in parts.iter_mut() {
|
||||
// We only need to strip and validate indentation
|
||||
// for individual lines, so we can skip checks if
|
||||
// we know we're not on a new line.
|
||||
if !new_line {
|
||||
continue;
|
||||
}
|
||||
|
||||
match part {
|
||||
StringPart::Const(bytes) => {
|
||||
for _ in 0..indentation_amount {
|
||||
if bytes.starts_with(&[search_char]) {
|
||||
bytes.remove(0);
|
||||
}
|
||||
// 1. If this line doesn't start with any whitespace,
|
||||
// we can return an error early because we know
|
||||
// the label was indented.
|
||||
if !bytes.starts_with(&[b' ']) && !bytes.starts_with(&[b'\t']) {
|
||||
return Err(ParseError::SyntaxError(
|
||||
SyntaxError::InvalidDocBodyIndentationLevel(
|
||||
indentation_amount,
|
||||
span,
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
// 2. If this line doesn't start with the correct
|
||||
// type of whitespace, we can also return an error.
|
||||
if !bytes.starts_with(&[indentation_char]) {
|
||||
return Err(ParseError::SyntaxError(
|
||||
SyntaxError::InvalidDocIndentation(span),
|
||||
));
|
||||
}
|
||||
|
||||
// 3. We now know that the whitespace at the start of
|
||||
// this line is correct, so we need to check that the
|
||||
// amount of whitespace is correct too. In this case,
|
||||
// the amount of whitespace just needs to be at least
|
||||
// the same, so we can create a vector containing the
|
||||
// minimum and check using `starts_with()`.
|
||||
let expected_whitespace_buffer =
|
||||
vec![indentation_char; indentation_amount];
|
||||
if !bytes.starts_with(&expected_whitespace_buffer) {
|
||||
return Err(ParseError::SyntaxError(
|
||||
SyntaxError::InvalidDocBodyIndentationLevel(
|
||||
indentation_amount,
|
||||
span,
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
// 4. All of the above checks have passed, so we know
|
||||
// there are no more possible errors. Let's now
|
||||
// strip the leading whitespace accordingly.
|
||||
*bytes = bytes
|
||||
.strip_prefix(&expected_whitespace_buffer[..])
|
||||
.unwrap()
|
||||
.into();
|
||||
new_line = bytes.ends_with(&[b'\n']);
|
||||
}
|
||||
_ => continue,
|
||||
}
|
||||
@ -1014,31 +1063,71 @@ fn doc_string(state: &mut State, kind: DocStringKind) -> ParseResult<Expression>
|
||||
Expression::Heredoc { parts }
|
||||
}
|
||||
DocStringKind::Nowdoc => {
|
||||
// FIXME: This feels hacky. We should probably produce different tokens from the lexer
|
||||
// but since I already had the logic in place for parsing heredocs, this was
|
||||
// the fastest way to get nowdocs working too.
|
||||
let mut s = expect_token!([
|
||||
TokenKind::StringPart(s) => s
|
||||
], state, "constant string");
|
||||
let mut string_part = expect_token!([
|
||||
TokenKind::StringPart(s) => s,
|
||||
], state, "constant string");
|
||||
|
||||
let (indentation_type, indentation_amount) = expect_token!([
|
||||
TokenKind::EndDocString(_, indentation_type, indentation_amount) => (indentation_type, indentation_amount)
|
||||
], state, "label");
|
||||
let (indentation_type, indentation_amount) = match state.current.kind {
|
||||
TokenKind::EndDocString(_, indentation_type, indentation_amount) => {
|
||||
(indentation_type, indentation_amount)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
// FIXME: Hacky code, but it's late and I want to get this done.
|
||||
if let Some(indentation_type) = indentation_type {
|
||||
let search_char: u8 = indentation_type.into();
|
||||
let mut lines = s
|
||||
state.next();
|
||||
|
||||
if indentation_type != DocStringIndentationKind::None {
|
||||
let indentation_char: u8 = indentation_type.into();
|
||||
|
||||
let mut lines = string_part
|
||||
.split(|b| *b == b'\n')
|
||||
.map(|s| s.to_vec())
|
||||
.collect::<Vec<Vec<u8>>>();
|
||||
|
||||
for line in lines.iter_mut() {
|
||||
for _ in 0..indentation_amount {
|
||||
if line.starts_with(&[search_char]) {
|
||||
line.remove(0);
|
||||
}
|
||||
if line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 1. If this line doesn't start with any whitespace,
|
||||
// we can return an error early because we know
|
||||
// the label was indented.
|
||||
if !line.starts_with(&[b' ']) && !line.starts_with(&[b'\t']) {
|
||||
return Err(ParseError::SyntaxError(
|
||||
SyntaxError::InvalidDocBodyIndentationLevel(indentation_amount, span),
|
||||
));
|
||||
}
|
||||
|
||||
// 2. If this line doesn't start with the correct
|
||||
// type of whitespace, we can also return an error.
|
||||
if !line.starts_with(&[indentation_char]) {
|
||||
return Err(ParseError::SyntaxError(SyntaxError::InvalidDocIndentation(
|
||||
span,
|
||||
)));
|
||||
}
|
||||
|
||||
// 3. We now know that the whitespace at the start of
|
||||
// this line is correct, so we need to check that the
|
||||
// amount of whitespace is correct too. In this case,
|
||||
// the amount of whitespace just needs to be at least
|
||||
// the same, so we can create a vector containing the
|
||||
// minimum and check using `starts_with()`.
|
||||
let expected_whitespace_buffer = vec![indentation_char; indentation_amount];
|
||||
if !line.starts_with(&expected_whitespace_buffer) {
|
||||
return Err(ParseError::SyntaxError(
|
||||
SyntaxError::InvalidDocBodyIndentationLevel(indentation_amount, span),
|
||||
));
|
||||
}
|
||||
|
||||
// 4. All of the above checks have passed, so we know
|
||||
// there are no more possible errors. Let's now
|
||||
// strip the leading whitespace accordingly.
|
||||
*line = line
|
||||
.strip_prefix(&expected_whitespace_buffer[..])
|
||||
.unwrap()
|
||||
.into();
|
||||
}
|
||||
|
||||
let mut bytes = Vec::new();
|
||||
for (i, line) in lines.iter().enumerate() {
|
||||
bytes.extend(line);
|
||||
@ -1046,10 +1135,10 @@ fn doc_string(state: &mut State, kind: DocStringKind) -> ParseResult<Expression>
|
||||
bytes.push(b'\n');
|
||||
}
|
||||
}
|
||||
s = bytes.into();
|
||||
string_part = bytes.into();
|
||||
}
|
||||
|
||||
Expression::Nowdoc { value: s }
|
||||
Expression::Nowdoc { value: string_part }
|
||||
}
|
||||
})
|
||||
}
|
||||
|
1
tests/fixtures/0226/lexer-error.txt
vendored
1
tests/fixtures/0226/lexer-error.txt
vendored
@ -1 +0,0 @@
|
||||
InvalidDocBodyIndentationLevel(4, (5, 5)) -> Syntax Error: Invalid body indentation level - expecting an indentation level of at least 4 on line 5
|
1
tests/fixtures/0226/parser-error.txt
vendored
Normal file
1
tests/fixtures/0226/parser-error.txt
vendored
Normal file
@ -0,0 +1 @@
|
||||
SyntaxError(InvalidDocBodyIndentationLevel(4, (3, 1))) -> Syntax Error: Invalid body indentation level - expecting an indentation level of at least 4 on line 3
|
1
tests/fixtures/0227/lexer-error.txt
vendored
1
tests/fixtures/0227/lexer-error.txt
vendored
@ -1 +0,0 @@
|
||||
InvalidDocIndentation((5, 1)) -> Syntax Error: Invalid indentation - cannot use tabs and spaces on line 5
|
1
tests/fixtures/0227/parser-error.txt
vendored
Normal file
1
tests/fixtures/0227/parser-error.txt
vendored
Normal file
@ -0,0 +1 @@
|
||||
SyntaxError(InvalidDocIndentation((3, 1))) -> Syntax Error: Invalid indentation - cannot use tabs and spaces on line 3
|
1
tests/fixtures/0233/lexer-error.txt
vendored
1
tests/fixtures/0233/lexer-error.txt
vendored
@ -1 +0,0 @@
|
||||
InvalidDocBodyIndentationLevel(4, (5, 5)) -> Syntax Error: Invalid body indentation level - expecting an indentation level of at least 4 on line 5
|
1
tests/fixtures/0233/parser-error.txt
vendored
Normal file
1
tests/fixtures/0233/parser-error.txt
vendored
Normal file
@ -0,0 +1 @@
|
||||
SyntaxError(InvalidDocBodyIndentationLevel(4, (3, 1))) -> Syntax Error: Invalid body indentation level - expecting an indentation level of at least 4 on line 3
|
1
tests/fixtures/0234/lexer-error.txt
vendored
1
tests/fixtures/0234/lexer-error.txt
vendored
@ -1 +0,0 @@
|
||||
InvalidDocBodyIndentationLevel(4, (5, 5)) -> Syntax Error: Invalid body indentation level - expecting an indentation level of at least 4 on line 5
|
1
tests/fixtures/0234/parser-error.txt
vendored
Normal file
1
tests/fixtures/0234/parser-error.txt
vendored
Normal file
@ -0,0 +1 @@
|
||||
SyntaxError(InvalidDocBodyIndentationLevel(4, (3, 1))) -> Syntax Error: Invalid body indentation level - expecting an indentation level of at least 4 on line 3
|
Loading…
Reference in New Issue
Block a user