diff --git a/src/lexer.rs b/src/lexer.rs index c57b52a..e71fc29 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -3,7 +3,8 @@ use decimal::d128; use crate::{Token, TokenVector}; use crate::Operator::{Percent, Caret, Divide, Factorial, LeftParen, Minus, Modulo, Multiply, Plus, RightParen}; use crate::TextOperator::{Of, To}; -use crate::Identifier::{Acos, Acosh, Asin, Asinh, Atan, Atanh, Cbrt, Ceil, Cos, Cosh, Exp, Fabs, Floor, Ln, Log, Pi, Round, Sin, Sinh, Sqrt, Tan, Tanh, E}; +use crate::Constant::{E, Pi}; +use crate::FunctionIdentifier::{Acos, Acosh, Asin, Asinh, Atan, Atanh, Cbrt, Ceil, Cos, Cosh, Exp, Fabs, Floor, Ln, Log, Round, Sin, Sinh, Sqrt, Tan, Tanh}; use crate::Unit::{Normal}; pub fn lex(input: &str) -> Result { @@ -33,7 +34,7 @@ pub fn lex(input: &str) -> Result { right_paren_count += 1; tokens.push(Token::Operator(RightParen)); }, - 'π' => tokens.push(Token::Identifier(Pi)), + 'π' => tokens.push(Token::Constant(Pi)), ',' => {}, value if value.is_whitespace() => {}, value if value.is_alphabetic() => { @@ -62,37 +63,37 @@ pub fn lex(input: &str) -> Result { "to" => tokens.push(Token::TextOperator(To)), "of" => tokens.push(Token::TextOperator(Of)), - "pi" => tokens.push(Token::Identifier(Pi)), - "e" => tokens.push(Token::Identifier(E)), + "pi" => tokens.push(Token::Constant(Pi)), + "e" => tokens.push(Token::Constant(E)), "mod" => tokens.push(Token::Operator(Modulo)), - "sqrt" => tokens.push(Token::Identifier(Sqrt)), - "cbrt" => tokens.push(Token::Identifier(Cbrt)), + "sqrt" => tokens.push(Token::FunctionIdentifier(Sqrt)), + "cbrt" => tokens.push(Token::FunctionIdentifier(Cbrt)), - "log" => tokens.push(Token::Identifier(Log)), - "ln" => tokens.push(Token::Identifier(Ln)), - "exp" => tokens.push(Token::Identifier(Exp)), + "log" => tokens.push(Token::FunctionIdentifier(Log)), + "ln" => tokens.push(Token::FunctionIdentifier(Ln)), + "exp" => tokens.push(Token::FunctionIdentifier(Exp)), - "ceil" => tokens.push(Token::Identifier(Ceil)), - "floor" => tokens.push(Token::Identifier(Floor)), - "round" | "rint" => tokens.push(Token::Identifier(Round)), - "fabs" => tokens.push(Token::Identifier(Fabs)), + "ceil" => tokens.push(Token::FunctionIdentifier(Ceil)), + "floor" => tokens.push(Token::FunctionIdentifier(Floor)), + "round" | "rint" => tokens.push(Token::FunctionIdentifier(Round)), + "fabs" => tokens.push(Token::FunctionIdentifier(Fabs)), - "sin" => tokens.push(Token::Identifier(Sin)), - "cos" => tokens.push(Token::Identifier(Cos)), - "tan" => tokens.push(Token::Identifier(Tan)), - "asin" => tokens.push(Token::Identifier(Asin)), - "acos" => tokens.push(Token::Identifier(Acos)), - "atan" => tokens.push(Token::Identifier(Atan)), - "sinh" => tokens.push(Token::Identifier(Sinh)), - "cosh" => tokens.push(Token::Identifier(Cosh)), - "tanh" => tokens.push(Token::Identifier(Tanh)), - "asinh" => tokens.push(Token::Identifier(Asinh)), - "acosh" => tokens.push(Token::Identifier(Acosh)), - "atanh" => tokens.push(Token::Identifier(Atanh)), + "sin" => tokens.push(Token::FunctionIdentifier(Sin)), + "cos" => tokens.push(Token::FunctionIdentifier(Cos)), + "tan" => tokens.push(Token::FunctionIdentifier(Tan)), + "asin" => tokens.push(Token::FunctionIdentifier(Asin)), + "acos" => tokens.push(Token::FunctionIdentifier(Acos)), + "atan" => tokens.push(Token::FunctionIdentifier(Atan)), + "sinh" => tokens.push(Token::FunctionIdentifier(Sinh)), + "cosh" => tokens.push(Token::FunctionIdentifier(Cosh)), + "tanh" => tokens.push(Token::FunctionIdentifier(Tanh)), + "asinh" => tokens.push(Token::FunctionIdentifier(Asinh)), + "acosh" => tokens.push(Token::FunctionIdentifier(Acosh)), + "atanh" => tokens.push(Token::FunctionIdentifier(Atanh)), _ => { - return Err(format!("Invalid string: {}", string)) + return Err(format!("Invalid string: {}", string)); } } @@ -139,14 +140,11 @@ pub fn lex(input: &str) -> Result { // auto insert missing parentheses in first and last position if left_paren_count > right_paren_count { - println!("Added right_parens"); let missing_right_parens = left_paren_count - right_paren_count; - println!("{}", missing_right_parens); for _ in 0..missing_right_parens { tokens.push(Token::Operator(RightParen)); } } else if left_paren_count < right_paren_count { - println!("Added left_parens"); let missing_left_parens = right_paren_count - left_paren_count; for _ in 0..missing_left_parens { tokens.insert(0, Token::Operator(LeftParen)); @@ -184,5 +182,5 @@ pub fn lex(input: &str) -> Result { token_index += 1; } - return Ok(tokens) + Ok(tokens) } diff --git a/src/main.rs b/src/main.rs index e6a0e63..42b986b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,7 @@ use std::time::{Instant}; use decimal::d128; -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum Operator { Plus, Minus, @@ -15,17 +15,20 @@ pub enum Operator { RightParen, // lexer only } -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum TextOperator { To, Of, } -#[derive(Debug)] -pub enum Identifier { +#[derive(Clone, Debug)] +pub enum Constant { Pi, E, +} +#[derive(Clone, Debug)] +pub enum FunctionIdentifier { Sqrt, Cbrt, @@ -52,16 +55,17 @@ pub enum Identifier { Atanh, } -#[derive(Debug)] +#[derive(Clone, Copy, Debug)] pub enum Unit { Normal, } -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum Token { Operator(Operator), Number((d128, Unit)), - Identifier(Identifier), + FunctionIdentifier(FunctionIdentifier), + Constant(Constant), Paren, // parser only TextOperator(TextOperator), } @@ -69,21 +73,32 @@ pub enum Token { pub type TokenVector = Vec; mod lexer; +mod parser; fn main() { - let now = Instant::now(); + let lex_start = Instant::now(); use std::env; let args: Vec = env::args().collect(); - let s = if args.len() == 2 { &args[1] } else { "0.1" }; + let s = if args.len() >= 2 { &args[1] } else { "0.1" }; match lexer::lex(s) { Ok(tokens) => { + let lex_time = Instant::now().duration_since(lex_start).as_nanos() as f32; println!("Lexed TokenVector: {:?}", tokens); + + let parse_start = Instant::now(); + match parser::parse(&tokens) { + Ok(ast) => { + let parse_time = Instant::now().duration_since(parse_start).as_nanos() as f32; + println!("Parsed AstNode: {:#?}", ast); + println!("\u{23f1} {:.3}ms lexing", lex_time/1000.0/1000.0); + println!("\u{23f1} {:.3}ms parsing", parse_time/1000.0/1000.0); + }, + Err(e) => println!("parsing error: {}", e), + } }, Err(e) => println!("lexing error: {}", e), } - let duration = Instant::now().duration_since(now).as_nanos() as f32; - println!("\u{23f1} {:.3}ms lexing", duration/1000.0/1000.0); } diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..e36f2bf --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,197 @@ +use crate::{Token, TokenVector}; +#[allow(unused_imports)] +use crate::Operator::{Percent, Caret, Divide, Factorial, LeftParen, Minus, Modulo, Multiply, Plus, RightParen}; +use crate::TextOperator::{To, Of}; + +#[derive(Debug)] +pub struct AstNode { + children: Vec, + entry: Token, +} + +impl AstNode { + pub fn new(token: Token) -> AstNode { + AstNode { + children: Vec::new(), + entry: token, + } + } +} + +pub fn parse(tokens: &TokenVector) -> Result { + parse_level_1(tokens, 0).and_then(|(ast, next_pos)| if next_pos == tokens.len() { + Ok(ast) + } else { + Err(format!("Expected end of input, found {:?} at {}", tokens[next_pos], next_pos)) + }) +} + +// level 1 precedence (lowest): to, of +fn parse_level_1(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> { + // do higher precedences first, then come back down + let (mut node, mut pos) = parse_level_2(tokens, pos)?; + // now we loop through the next tokens + loop { + let token = tokens.get(pos); + match token { + // if there's a match, we once again do higher precedences, then come + // back down again and continue the loop + Some(&Token::TextOperator(To)) | Some(&Token::TextOperator(Of)) => { + let (right_node, next_pos) = parse_level_2(tokens, pos + 1)?; + let mut new_node = AstNode::new(token.unwrap().clone()); + new_node.children.push(node); + new_node.children.push(right_node); + node = new_node; + pos = next_pos; + }, + // if there's no match, we go down to a lower precedence (or, in this + // case, we're done) + _ => { + return Ok((node, pos)); + }, + } + } +} + +// level 2 precedence: +, - +fn parse_level_2(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> { + let (mut node, mut pos) = parse_level_3(tokens, pos)?; + loop { + let token = tokens.get(pos); + match token { + Some(&Token::Operator(Plus)) | Some(&Token::Operator(Minus)) => { + let (right_node, next_pos) = parse_level_3(tokens, pos + 1)?; + let mut new_node = AstNode::new(token.unwrap().clone()); + new_node.children.push(node); + new_node.children.push(right_node); + node = new_node; + pos = next_pos; + }, + _ => { + return Ok((node, pos)); + }, + } + } +} + +// level 3 precedence: *, /, modulo +fn parse_level_3(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> { + let (mut node, mut pos) = parse_level_4(tokens, pos)?; + loop { + let token = tokens.get(pos); + match token { + Some(&Token::Operator(Multiply)) | Some(&Token::Operator(Divide)) | Some(&Token::Operator(Modulo)) => { + let (right_node, next_pos) = parse_level_4(tokens, pos + 1)?; + let mut new_node = AstNode::new(token.unwrap().clone()); + new_node.children.push(node); + new_node.children.push(right_node); + node = new_node; + pos = next_pos; + }, + _ => { + return Ok((node, pos)); + }, + } + } +} + +// level 4 precedence: ^ +fn parse_level_4(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> { + let (mut node, mut pos) = parse_level_5(tokens, pos)?; + loop { + let token = tokens.get(pos); + match token { + Some(&Token::Operator(Caret)) => { + let (right_node, next_pos) = parse_level_5(tokens, pos + 1)?; + let mut new_node = AstNode::new(token.unwrap().clone()); + new_node.children.push(node); + new_node.children.push(right_node); + node = new_node; + pos = next_pos; + }, + _ => { + return Ok((node, pos)); + }, + } + } +} + +// level 5 precedence: !, percent +fn parse_level_5(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> { + let (mut node, mut pos) = parse_level_6(tokens, pos)?; + loop { + let token = tokens.get(pos); + match token { + Some(&Token::Operator(Factorial)) | Some(&Token::Operator(Percent)) => { + + // Here we are handling unary operators, aka stuff written as + // "Number Operator" (3!) instead of "Number Operator Number" (3+3). + // Therefore, if we find a match, we don't parse what comes after it. + let mut new_node = AstNode::new(token.unwrap().clone()); + new_node.children.push(node); + node = new_node; + pos += 1; + }, + _ => { + // let's say we parse 1+2. parse_level_6 then returns 1, and token + // is set to plus. Plus has lower precedence than level 4, so we + // don't do anything, and pass the number down to a lower precedence. + return Ok((node, pos)); + }, + } + } +} + +// level 6 precedence: numbers, parens +fn parse_level_6(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> { + let token: &Token = tokens.get(pos).expect(&format!("Unexpected end of input at {}", pos)); + match token { + &Token::Number((_number, _unit)) => { + let node = AstNode::new(token.clone()); + Ok((node, pos + 1)) + }, + Token::Constant(_constant) => { + let node = AstNode::new(token.clone()); + Ok((node, pos + 1)) + }, + Token::FunctionIdentifier(_function_identifier) => { + let left_paren_pos = pos + 1; + let left_paren_token = tokens.get(left_paren_pos); + // check if ( comes after function identifier, like log( + match left_paren_token { + Some(&Token::Operator(LeftParen)) => { + // parse everything inside as you would with normal parentheses, + // then put it inside an ast node. + parse_level_1(tokens, left_paren_pos + 1).and_then(|(node, next_pos)| { + if let Some(&Token::Operator(RightParen)) = tokens.get(next_pos) { + let mut function_node = AstNode::new(token.clone()); + function_node.children.push(node); + Ok((function_node, next_pos + 1)) + } else { + Err(format!("Expected closing paren at {} but found {:?}", next_pos, tokens.get(next_pos))) + } + }) + }, + _ => { + return Err(format!("Expected ( after {} at {:?} but found {:?}", left_paren_pos, token, left_paren_token)); + } + } + + // Ok((node, pos + 1)) + }, + Token::Operator(LeftParen) => { + parse_level_1(tokens, pos + 1).and_then(|(node, next_pos)| { + if let Some(&Token::Operator(RightParen)) = tokens.get(next_pos) { + let mut paren = AstNode::new(Token::Paren); + paren.children.push(node); + Ok((paren, next_pos + 1)) + } else { + Err(format!("Expected closing paren at {} but found {:?}", next_pos, tokens.get(next_pos))) + } + }) + }, + _ => { + Err(format!("Unexpected token {:?}, expected paren or number", token)) + }, + } +}