lexer: recognise all tokens in laravel/framework

This commit is contained in:
Ryan Chandler 2022-07-22 13:55:29 +01:00
parent 8070a8dc71
commit 2e298b2cec
No known key found for this signature in database
GPG Key ID: F113BCADDB3B0CCA
4 changed files with 73 additions and 11 deletions

View File

@ -6,7 +6,7 @@ dir=$(realpath $1)
for file in $(find $dir -name "*.php") for file in $(find $dir -name "*.php")
do do
cargo run -- $file --lexer cargo run -q -- $file --lexer
if [ $? -ne 0 ] if [ $? -ne 0 ]
then then

View File

@ -8,29 +8,45 @@ use trunk_parser::Parser;
#[structopt(name = "phpast", about = "Generate an abstract syntax tree from a PHP file.")] #[structopt(name = "phpast", about = "Generate an abstract syntax tree from a PHP file.")]
struct Args { struct Args {
#[structopt(parse(from_os_str), help = "The input file to use.")] #[structopt(parse(from_os_str), help = "The input file to use.")]
file: PathBuf, file: Option<PathBuf>,
#[structopt(short, long, help = "Output the abstract syntax tree as JSON.")] #[structopt(short, long, help = "Output the abstract syntax tree as JSON.")]
json: bool, json: bool,
#[structopt(short, long, help = "Only execute the lexer on the source file.")] #[structopt(short, long, help = "Only execute the lexer on the source file.")]
lexer: bool, lexer: bool,
#[structopt(short, long, help = "Provide a string to execute.")]
run: Option<String>,
#[structopt(short, long, help = "Dump tokens.")]
dump_tokens: bool,
} }
fn main() { fn main() {
let args = Args::from_args(); let args = Args::from_args();
let input = match std::fs::read_to_string(args.file) { let input = if args.file.is_some() {
Ok(contents) => contents, match std::fs::read_to_string(args.file.unwrap()) {
Err(e) => { Ok(contents) => contents,
eprintln!("{}", e); Err(e) => {
exit(1); eprintln!("{}", e);
}, exit(1);
},
}
} else if args.run.is_some() {
args.run.unwrap()
} else {
panic!("boo!");
}; };
let mut lexer = Lexer::new(None); let mut lexer = Lexer::new(None);
let tokens = lexer.tokenize(&input[..]).unwrap(); let tokens = lexer.tokenize(&input[..]).unwrap();
if args.dump_tokens {
dbg!(&tokens);
}
if args.lexer { if args.lexer {
return; return;
} }

View File

@ -156,6 +156,23 @@ impl Lexer {
let char = it.next().unwrap(); let char = it.next().unwrap();
let kind = match char { let kind = match char {
'!' => {
self.col += 1;
TokenKind::Bang
},
'&' => {
self.col += 1;
if let Some('&') = it.peek() {
self.col += 1;
it.next();
TokenKind::BooleanAnd
} else {
TokenKind::BitAnd
}
},
'?' => { '?' => {
// This is a close tag, we can enter "Initial" mode again. // This is a close tag, we can enter "Initial" mode again.
if let Some('>') = it.peek() { if let Some('>') = it.peek() {
@ -167,7 +184,7 @@ impl Lexer {
TokenKind::CloseTag TokenKind::CloseTag
} else { } else {
todo!(); TokenKind::Question
} }
}, },
'=' => { '=' => {
@ -229,6 +246,8 @@ impl Lexer {
self.col += 1; self.col += 1;
} }
escaping = false;
buffer.push(*n); buffer.push(*n);
it.next(); it.next();
} }
@ -329,7 +348,7 @@ impl Lexer {
} }
} }
if (is_float) { if is_float {
TokenKind::Float(buffer.parse().unwrap()) TokenKind::Float(buffer.parse().unwrap())
} else { } else {
TokenKind::Int(buffer.parse().unwrap()) TokenKind::Int(buffer.parse().unwrap())
@ -465,7 +484,29 @@ impl Lexer {
}, },
'<' => { '<' => {
self.col += 1; self.col += 1;
TokenKind::LessThan
if let Some('=') = it.peek() {
it.next();
self.col += 1;
TokenKind::LessThanEquals
} else {
TokenKind::LessThan
}
},
'>' => {
self.col += 1;
if let Some('=') = it.peek() {
it.next();
self.col += 1;
TokenKind::GreaterThanEquals
} else {
TokenKind::GreaterThan
}
}, },
',' => { ',' => {
self.col += 1; self.col += 1;

View File

@ -102,6 +102,11 @@ pub enum TokenKind {
FullyQualifiedIdentifier(String), FullyQualifiedIdentifier(String),
QualifiedIdentifier(String), QualifiedIdentifier(String),
Colon, Colon,
Caret,
Question,
Bang,
And,
BitAnd,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]