Added parser with most things supported

This commit is contained in:
Kasper 2019-12-15 20:21:14 +01:00
parent 670aa7e732
commit 3587cb94a9
3 changed files with 251 additions and 41 deletions

View File

@ -3,7 +3,8 @@ use decimal::d128;
use crate::{Token, TokenVector};
use crate::Operator::{Percent, Caret, Divide, Factorial, LeftParen, Minus, Modulo, Multiply, Plus, RightParen};
use crate::TextOperator::{Of, To};
use crate::Identifier::{Acos, Acosh, Asin, Asinh, Atan, Atanh, Cbrt, Ceil, Cos, Cosh, Exp, Fabs, Floor, Ln, Log, Pi, Round, Sin, Sinh, Sqrt, Tan, Tanh, E};
use crate::Constant::{E, Pi};
use crate::FunctionIdentifier::{Acos, Acosh, Asin, Asinh, Atan, Atanh, Cbrt, Ceil, Cos, Cosh, Exp, Fabs, Floor, Ln, Log, Round, Sin, Sinh, Sqrt, Tan, Tanh};
use crate::Unit::{Normal};
pub fn lex(input: &str) -> Result<TokenVector, String> {
@ -33,7 +34,7 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
right_paren_count += 1;
tokens.push(Token::Operator(RightParen));
},
'π' => tokens.push(Token::Identifier(Pi)),
'π' => tokens.push(Token::Constant(Pi)),
',' => {},
value if value.is_whitespace() => {},
value if value.is_alphabetic() => {
@ -62,37 +63,37 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
"to" => tokens.push(Token::TextOperator(To)),
"of" => tokens.push(Token::TextOperator(Of)),
"pi" => tokens.push(Token::Identifier(Pi)),
"e" => tokens.push(Token::Identifier(E)),
"pi" => tokens.push(Token::Constant(Pi)),
"e" => tokens.push(Token::Constant(E)),
"mod" => tokens.push(Token::Operator(Modulo)),
"sqrt" => tokens.push(Token::Identifier(Sqrt)),
"cbrt" => tokens.push(Token::Identifier(Cbrt)),
"sqrt" => tokens.push(Token::FunctionIdentifier(Sqrt)),
"cbrt" => tokens.push(Token::FunctionIdentifier(Cbrt)),
"log" => tokens.push(Token::Identifier(Log)),
"ln" => tokens.push(Token::Identifier(Ln)),
"exp" => tokens.push(Token::Identifier(Exp)),
"log" => tokens.push(Token::FunctionIdentifier(Log)),
"ln" => tokens.push(Token::FunctionIdentifier(Ln)),
"exp" => tokens.push(Token::FunctionIdentifier(Exp)),
"ceil" => tokens.push(Token::Identifier(Ceil)),
"floor" => tokens.push(Token::Identifier(Floor)),
"round" | "rint" => tokens.push(Token::Identifier(Round)),
"fabs" => tokens.push(Token::Identifier(Fabs)),
"ceil" => tokens.push(Token::FunctionIdentifier(Ceil)),
"floor" => tokens.push(Token::FunctionIdentifier(Floor)),
"round" | "rint" => tokens.push(Token::FunctionIdentifier(Round)),
"fabs" => tokens.push(Token::FunctionIdentifier(Fabs)),
"sin" => tokens.push(Token::Identifier(Sin)),
"cos" => tokens.push(Token::Identifier(Cos)),
"tan" => tokens.push(Token::Identifier(Tan)),
"asin" => tokens.push(Token::Identifier(Asin)),
"acos" => tokens.push(Token::Identifier(Acos)),
"atan" => tokens.push(Token::Identifier(Atan)),
"sinh" => tokens.push(Token::Identifier(Sinh)),
"cosh" => tokens.push(Token::Identifier(Cosh)),
"tanh" => tokens.push(Token::Identifier(Tanh)),
"asinh" => tokens.push(Token::Identifier(Asinh)),
"acosh" => tokens.push(Token::Identifier(Acosh)),
"atanh" => tokens.push(Token::Identifier(Atanh)),
"sin" => tokens.push(Token::FunctionIdentifier(Sin)),
"cos" => tokens.push(Token::FunctionIdentifier(Cos)),
"tan" => tokens.push(Token::FunctionIdentifier(Tan)),
"asin" => tokens.push(Token::FunctionIdentifier(Asin)),
"acos" => tokens.push(Token::FunctionIdentifier(Acos)),
"atan" => tokens.push(Token::FunctionIdentifier(Atan)),
"sinh" => tokens.push(Token::FunctionIdentifier(Sinh)),
"cosh" => tokens.push(Token::FunctionIdentifier(Cosh)),
"tanh" => tokens.push(Token::FunctionIdentifier(Tanh)),
"asinh" => tokens.push(Token::FunctionIdentifier(Asinh)),
"acosh" => tokens.push(Token::FunctionIdentifier(Acosh)),
"atanh" => tokens.push(Token::FunctionIdentifier(Atanh)),
_ => {
return Err(format!("Invalid string: {}", string))
return Err(format!("Invalid string: {}", string));
}
}
@ -139,14 +140,11 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
// auto insert missing parentheses in first and last position
if left_paren_count > right_paren_count {
println!("Added right_parens");
let missing_right_parens = left_paren_count - right_paren_count;
println!("{}", missing_right_parens);
for _ in 0..missing_right_parens {
tokens.push(Token::Operator(RightParen));
}
} else if left_paren_count < right_paren_count {
println!("Added left_parens");
let missing_left_parens = right_paren_count - left_paren_count;
for _ in 0..missing_left_parens {
tokens.insert(0, Token::Operator(LeftParen));
@ -184,5 +182,5 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
token_index += 1;
}
return Ok(tokens)
Ok(tokens)
}

View File

@ -1,7 +1,7 @@
use std::time::{Instant};
use decimal::d128;
#[derive(Debug)]
#[derive(Clone, Debug)]
pub enum Operator {
Plus,
Minus,
@ -15,17 +15,20 @@ pub enum Operator {
RightParen, // lexer only
}
#[derive(Debug)]
#[derive(Clone, Debug)]
pub enum TextOperator {
To,
Of,
}
#[derive(Debug)]
pub enum Identifier {
#[derive(Clone, Debug)]
pub enum Constant {
Pi,
E,
}
#[derive(Clone, Debug)]
pub enum FunctionIdentifier {
Sqrt,
Cbrt,
@ -52,16 +55,17 @@ pub enum Identifier {
Atanh,
}
#[derive(Debug)]
#[derive(Clone, Copy, Debug)]
pub enum Unit {
Normal,
}
#[derive(Debug)]
#[derive(Clone, Debug)]
pub enum Token {
Operator(Operator),
Number((d128, Unit)),
Identifier(Identifier),
FunctionIdentifier(FunctionIdentifier),
Constant(Constant),
Paren, // parser only
TextOperator(TextOperator),
}
@ -69,21 +73,32 @@ pub enum Token {
pub type TokenVector = Vec<Token>;
mod lexer;
mod parser;
fn main() {
let now = Instant::now();
let lex_start = Instant::now();
use std::env;
let args: Vec<String> = env::args().collect();
let s = if args.len() == 2 { &args[1] } else { "0.1" };
let s = if args.len() >= 2 { &args[1] } else { "0.1" };
match lexer::lex(s) {
Ok(tokens) => {
let lex_time = Instant::now().duration_since(lex_start).as_nanos() as f32;
println!("Lexed TokenVector: {:?}", tokens);
let parse_start = Instant::now();
match parser::parse(&tokens) {
Ok(ast) => {
let parse_time = Instant::now().duration_since(parse_start).as_nanos() as f32;
println!("Parsed AstNode: {:#?}", ast);
println!("\u{23f1} {:.3}ms lexing", lex_time/1000.0/1000.0);
println!("\u{23f1} {:.3}ms parsing", parse_time/1000.0/1000.0);
},
Err(e) => println!("parsing error: {}", e),
}
},
Err(e) => println!("lexing error: {}", e),
}
let duration = Instant::now().duration_since(now).as_nanos() as f32;
println!("\u{23f1} {:.3}ms lexing", duration/1000.0/1000.0);
}

197
src/parser.rs Normal file
View File

@ -0,0 +1,197 @@
use crate::{Token, TokenVector};
#[allow(unused_imports)]
use crate::Operator::{Percent, Caret, Divide, Factorial, LeftParen, Minus, Modulo, Multiply, Plus, RightParen};
use crate::TextOperator::{To, Of};
#[derive(Debug)]
pub struct AstNode {
children: Vec<AstNode>,
entry: Token,
}
impl AstNode {
pub fn new(token: Token) -> AstNode {
AstNode {
children: Vec::new(),
entry: token,
}
}
}
pub fn parse(tokens: &TokenVector) -> Result<AstNode, String> {
parse_level_1(tokens, 0).and_then(|(ast, next_pos)| if next_pos == tokens.len() {
Ok(ast)
} else {
Err(format!("Expected end of input, found {:?} at {}", tokens[next_pos], next_pos))
})
}
// level 1 precedence (lowest): to, of
fn parse_level_1(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
// do higher precedences first, then come back down
let (mut node, mut pos) = parse_level_2(tokens, pos)?;
// now we loop through the next tokens
loop {
let token = tokens.get(pos);
match token {
// if there's a match, we once again do higher precedences, then come
// back down again and continue the loop
Some(&Token::TextOperator(To)) | Some(&Token::TextOperator(Of)) => {
let (right_node, next_pos) = parse_level_2(tokens, pos + 1)?;
let mut new_node = AstNode::new(token.unwrap().clone());
new_node.children.push(node);
new_node.children.push(right_node);
node = new_node;
pos = next_pos;
},
// if there's no match, we go down to a lower precedence (or, in this
// case, we're done)
_ => {
return Ok((node, pos));
},
}
}
}
// level 2 precedence: +, -
fn parse_level_2(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
let (mut node, mut pos) = parse_level_3(tokens, pos)?;
loop {
let token = tokens.get(pos);
match token {
Some(&Token::Operator(Plus)) | Some(&Token::Operator(Minus)) => {
let (right_node, next_pos) = parse_level_3(tokens, pos + 1)?;
let mut new_node = AstNode::new(token.unwrap().clone());
new_node.children.push(node);
new_node.children.push(right_node);
node = new_node;
pos = next_pos;
},
_ => {
return Ok((node, pos));
},
}
}
}
// level 3 precedence: *, /, modulo
fn parse_level_3(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
let (mut node, mut pos) = parse_level_4(tokens, pos)?;
loop {
let token = tokens.get(pos);
match token {
Some(&Token::Operator(Multiply)) | Some(&Token::Operator(Divide)) | Some(&Token::Operator(Modulo)) => {
let (right_node, next_pos) = parse_level_4(tokens, pos + 1)?;
let mut new_node = AstNode::new(token.unwrap().clone());
new_node.children.push(node);
new_node.children.push(right_node);
node = new_node;
pos = next_pos;
},
_ => {
return Ok((node, pos));
},
}
}
}
// level 4 precedence: ^
fn parse_level_4(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
let (mut node, mut pos) = parse_level_5(tokens, pos)?;
loop {
let token = tokens.get(pos);
match token {
Some(&Token::Operator(Caret)) => {
let (right_node, next_pos) = parse_level_5(tokens, pos + 1)?;
let mut new_node = AstNode::new(token.unwrap().clone());
new_node.children.push(node);
new_node.children.push(right_node);
node = new_node;
pos = next_pos;
},
_ => {
return Ok((node, pos));
},
}
}
}
// level 5 precedence: !, percent
fn parse_level_5(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
let (mut node, mut pos) = parse_level_6(tokens, pos)?;
loop {
let token = tokens.get(pos);
match token {
Some(&Token::Operator(Factorial)) | Some(&Token::Operator(Percent)) => {
// Here we are handling unary operators, aka stuff written as
// "Number Operator" (3!) instead of "Number Operator Number" (3+3).
// Therefore, if we find a match, we don't parse what comes after it.
let mut new_node = AstNode::new(token.unwrap().clone());
new_node.children.push(node);
node = new_node;
pos += 1;
},
_ => {
// let's say we parse 1+2. parse_level_6 then returns 1, and token
// is set to plus. Plus has lower precedence than level 4, so we
// don't do anything, and pass the number down to a lower precedence.
return Ok((node, pos));
},
}
}
}
// level 6 precedence: numbers, parens
fn parse_level_6(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
let token: &Token = tokens.get(pos).expect(&format!("Unexpected end of input at {}", pos));
match token {
&Token::Number((_number, _unit)) => {
let node = AstNode::new(token.clone());
Ok((node, pos + 1))
},
Token::Constant(_constant) => {
let node = AstNode::new(token.clone());
Ok((node, pos + 1))
},
Token::FunctionIdentifier(_function_identifier) => {
let left_paren_pos = pos + 1;
let left_paren_token = tokens.get(left_paren_pos);
// check if ( comes after function identifier, like log(
match left_paren_token {
Some(&Token::Operator(LeftParen)) => {
// parse everything inside as you would with normal parentheses,
// then put it inside an ast node.
parse_level_1(tokens, left_paren_pos + 1).and_then(|(node, next_pos)| {
if let Some(&Token::Operator(RightParen)) = tokens.get(next_pos) {
let mut function_node = AstNode::new(token.clone());
function_node.children.push(node);
Ok((function_node, next_pos + 1))
} else {
Err(format!("Expected closing paren at {} but found {:?}", next_pos, tokens.get(next_pos)))
}
})
},
_ => {
return Err(format!("Expected ( after {} at {:?} but found {:?}", left_paren_pos, token, left_paren_token));
}
}
// Ok((node, pos + 1))
},
Token::Operator(LeftParen) => {
parse_level_1(tokens, pos + 1).and_then(|(node, next_pos)| {
if let Some(&Token::Operator(RightParen)) = tokens.get(next_pos) {
let mut paren = AstNode::new(Token::Paren);
paren.children.push(node);
Ok((paren, next_pos + 1))
} else {
Err(format!("Expected closing paren at {} but found {:?}", next_pos, tokens.get(next_pos)))
}
})
},
_ => {
Err(format!("Unexpected token {:?}, expected paren or number", token))
},
}
}