Added parser with most things supported

2019-12-15 20:21:14 +01:00 · 2019-12-15 20:21:14 +01:00 · 3587cb94a9
commit 3587cb94a9
parent 670aa7e732
3 changed files with 251 additions and 41 deletions
--- a/src/lexer.rs
+++ b/src/lexer.rs
@ -3,7 +3,8 @@ use decimal::d128;
 use crate::{Token, TokenVector};
 use crate::Operator::{Percent, Caret, Divide, Factorial, LeftParen, Minus, Modulo, Multiply, Plus, RightParen};
 use crate::TextOperator::{Of, To};
-use crate::Identifier::{Acos, Acosh, Asin, Asinh, Atan, Atanh, Cbrt, Ceil, Cos, Cosh, Exp, Fabs, Floor, Ln, Log, Pi, Round, Sin, Sinh, Sqrt, Tan, Tanh, E};
+use crate::Constant::{E, Pi};
+use crate::FunctionIdentifier::{Acos, Acosh, Asin, Asinh, Atan, Atanh, Cbrt, Ceil, Cos, Cosh, Exp, Fabs, Floor, Ln, Log, Round, Sin, Sinh, Sqrt, Tan, Tanh};
 use crate::Unit::{Normal};

 pub fn lex(input: &str) -> Result<TokenVector, String> {
@ -33,7 +34,7 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
        right_paren_count += 1;
        tokens.push(Token::Operator(RightParen));
      },
-      'π' => tokens.push(Token::Identifier(Pi)),
+      'π' => tokens.push(Token::Constant(Pi)),
      ',' => {},
      value if value.is_whitespace() => {},
      value if value.is_alphabetic() => {
@ -62,37 +63,37 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
          "to" => tokens.push(Token::TextOperator(To)),
          "of" => tokens.push(Token::TextOperator(Of)),

-          "pi" => tokens.push(Token::Identifier(Pi)),
-          "e" => tokens.push(Token::Identifier(E)),
+          "pi" => tokens.push(Token::Constant(Pi)),
+          "e" => tokens.push(Token::Constant(E)),
          
          "mod" => tokens.push(Token::Operator(Modulo)),

-          "sqrt" => tokens.push(Token::Identifier(Sqrt)),
-          "cbrt" => tokens.push(Token::Identifier(Cbrt)),
+          "sqrt" => tokens.push(Token::FunctionIdentifier(Sqrt)),
+          "cbrt" => tokens.push(Token::FunctionIdentifier(Cbrt)),
          
-          "log" => tokens.push(Token::Identifier(Log)),
-          "ln" => tokens.push(Token::Identifier(Ln)),
-          "exp" => tokens.push(Token::Identifier(Exp)),
+          "log" => tokens.push(Token::FunctionIdentifier(Log)),
+          "ln" => tokens.push(Token::FunctionIdentifier(Ln)),
+          "exp" => tokens.push(Token::FunctionIdentifier(Exp)),

-          "ceil" => tokens.push(Token::Identifier(Ceil)),
-          "floor" => tokens.push(Token::Identifier(Floor)),
-          "round" | "rint" => tokens.push(Token::Identifier(Round)),
-          "fabs" => tokens.push(Token::Identifier(Fabs)),
+          "ceil" => tokens.push(Token::FunctionIdentifier(Ceil)),
+          "floor" => tokens.push(Token::FunctionIdentifier(Floor)),
+          "round" | "rint" => tokens.push(Token::FunctionIdentifier(Round)),
+          "fabs" => tokens.push(Token::FunctionIdentifier(Fabs)),

-          "sin" => tokens.push(Token::Identifier(Sin)),
-          "cos" => tokens.push(Token::Identifier(Cos)),
-          "tan" => tokens.push(Token::Identifier(Tan)),
-          "asin" => tokens.push(Token::Identifier(Asin)),
-          "acos" => tokens.push(Token::Identifier(Acos)),
-          "atan" => tokens.push(Token::Identifier(Atan)),
-          "sinh" => tokens.push(Token::Identifier(Sinh)),
-          "cosh" => tokens.push(Token::Identifier(Cosh)),
-          "tanh" => tokens.push(Token::Identifier(Tanh)),
-          "asinh" => tokens.push(Token::Identifier(Asinh)),
-          "acosh" => tokens.push(Token::Identifier(Acosh)),
-          "atanh" => tokens.push(Token::Identifier(Atanh)),
+          "sin" => tokens.push(Token::FunctionIdentifier(Sin)),
+          "cos" => tokens.push(Token::FunctionIdentifier(Cos)),
+          "tan" => tokens.push(Token::FunctionIdentifier(Tan)),
+          "asin" => tokens.push(Token::FunctionIdentifier(Asin)),
+          "acos" => tokens.push(Token::FunctionIdentifier(Acos)),
+          "atan" => tokens.push(Token::FunctionIdentifier(Atan)),
+          "sinh" => tokens.push(Token::FunctionIdentifier(Sinh)),
+          "cosh" => tokens.push(Token::FunctionIdentifier(Cosh)),
+          "tanh" => tokens.push(Token::FunctionIdentifier(Tanh)),
+          "asinh" => tokens.push(Token::FunctionIdentifier(Asinh)),
+          "acosh" => tokens.push(Token::FunctionIdentifier(Acosh)),
+          "atanh" => tokens.push(Token::FunctionIdentifier(Atanh)),
          _ => {
-            return Err(format!("Invalid string: {}", string))
+            return Err(format!("Invalid string: {}", string));
          }
        }
        
@ -139,14 +140,11 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {

  // auto insert missing parentheses in first and last position
  if left_paren_count > right_paren_count {
-    println!("Added right_parens");
    let missing_right_parens = left_paren_count - right_paren_count;
-    println!("{}", missing_right_parens);
    for _ in 0..missing_right_parens {
      tokens.push(Token::Operator(RightParen));
    }
  } else if left_paren_count < right_paren_count {
-    println!("Added left_parens");
    let missing_left_parens = right_paren_count - left_paren_count;
    for _ in 0..missing_left_parens {
      tokens.insert(0, Token::Operator(LeftParen));
@ -184,5 +182,5 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
    token_index += 1;
  }

-  return Ok(tokens)
+  Ok(tokens)
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -1,7 +1,7 @@
 use std::time::{Instant};
 use decimal::d128;

-#[derive(Debug)]
+#[derive(Clone, Debug)]
 pub enum Operator {
  Plus,
  Minus,
@ -15,17 +15,20 @@ pub enum Operator {
  RightParen, // lexer only
 }

-#[derive(Debug)]
+#[derive(Clone, Debug)]
 pub enum TextOperator {
  To,
  Of,
 }

-#[derive(Debug)]
-pub enum Identifier {
+#[derive(Clone, Debug)]
+pub enum Constant {
  Pi,
  E,
+}

+#[derive(Clone, Debug)]
+pub enum FunctionIdentifier {
  Sqrt,
  Cbrt,

@ -52,16 +55,17 @@ pub enum Identifier {
  Atanh,
 }

-#[derive(Debug)]
+#[derive(Clone, Copy, Debug)]
 pub enum Unit {
  Normal,
 }

-#[derive(Debug)]
+#[derive(Clone, Debug)]
 pub enum Token {
  Operator(Operator),
  Number((d128, Unit)),
-  Identifier(Identifier),
+  FunctionIdentifier(FunctionIdentifier),
+  Constant(Constant),
  Paren, // parser only
  TextOperator(TextOperator),
 }
@ -69,21 +73,32 @@ pub enum Token {
 pub type TokenVector = Vec<Token>;

 mod lexer;
+mod parser;

 fn main() {
-  let now = Instant::now();
+  let lex_start = Instant::now();
  
  use std::env;
  let args: Vec<String> = env::args().collect();
-  let s = if args.len() == 2 { &args[1] } else { "0.1" };
+  let s = if args.len() >= 2 { &args[1] } else { "0.1" };

  match lexer::lex(s) {
    Ok(tokens) => {
+      let lex_time = Instant::now().duration_since(lex_start).as_nanos() as f32;
      println!("Lexed TokenVector: {:?}", tokens);
+
+      let parse_start = Instant::now();
+      match parser::parse(&tokens) {
+        Ok(ast) => {
+          let parse_time = Instant::now().duration_since(parse_start).as_nanos() as f32;
+          println!("Parsed AstNode: {:#?}", ast);
+          println!("\u{23f1}  {:.3}ms lexing", lex_time/1000.0/1000.0);
+          println!("\u{23f1}  {:.3}ms parsing", parse_time/1000.0/1000.0);
+        },
+        Err(e) => println!("parsing error: {}", e),
+      }
    },
    Err(e) => println!("lexing error: {}", e),
  }
  
-  let duration = Instant::now().duration_since(now).as_nanos() as f32;
-  println!("\u{23f1}  {:.3}ms lexing", duration/1000.0/1000.0);
 }
--- a/src/parser.rs
+++ b/src/parser.rs
@ -0,0 +1,197 @@
+use crate::{Token, TokenVector};
+#[allow(unused_imports)]
+use crate::Operator::{Percent, Caret, Divide, Factorial, LeftParen, Minus, Modulo, Multiply, Plus, RightParen};
+use crate::TextOperator::{To, Of};
+
+#[derive(Debug)]
+pub struct AstNode {
+    children: Vec<AstNode>,
+    entry: Token,
+}
+
+impl AstNode {
+  pub fn new(token: Token) -> AstNode {
+    AstNode {
+      children: Vec::new(),
+      entry: token,
+    }
+  }
+}
+
+pub fn parse(tokens: &TokenVector) -> Result<AstNode, String> {
+  parse_level_1(tokens, 0).and_then(|(ast, next_pos)| if next_pos == tokens.len() {
+      Ok(ast)
+  } else {
+      Err(format!("Expected end of input, found {:?} at {}", tokens[next_pos], next_pos))
+  })
+}
+
+// level 1 precedence (lowest): to, of
+fn parse_level_1(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
+  // do higher precedences first, then come back down
+  let (mut node, mut pos) = parse_level_2(tokens, pos)?;
+  // now we loop through the next tokens
+  loop {
+    let token = tokens.get(pos);
+    match token {
+      // if there's a match, we once again do higher precedences, then come
+      // back down again and continue the loop
+      Some(&Token::TextOperator(To)) | Some(&Token::TextOperator(Of)) => {
+        let (right_node, next_pos) = parse_level_2(tokens, pos + 1)?;
+        let mut new_node = AstNode::new(token.unwrap().clone());
+        new_node.children.push(node);
+        new_node.children.push(right_node);
+        node = new_node;
+        pos = next_pos;
+      },
+      // if there's no match, we go down to a lower precedence (or, in this
+      // case, we're done)
+      _ => {
+        return Ok((node, pos));
+      },
+    }
+  }
+}
+
+// level 2 precedence: +, -
+fn parse_level_2(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
+  let (mut node, mut pos) = parse_level_3(tokens, pos)?;
+  loop {
+    let token = tokens.get(pos);
+    match token {
+      Some(&Token::Operator(Plus)) | Some(&Token::Operator(Minus)) => {
+        let (right_node, next_pos) = parse_level_3(tokens, pos + 1)?;
+        let mut new_node = AstNode::new(token.unwrap().clone());
+        new_node.children.push(node);
+        new_node.children.push(right_node);
+        node = new_node;
+        pos = next_pos;
+      },
+      _ => {
+        return Ok((node, pos));
+      },
+    }
+  }
+}
+
+// level 3 precedence: *, /, modulo
+fn parse_level_3(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
+  let (mut node, mut pos) = parse_level_4(tokens, pos)?;
+  loop {
+    let token = tokens.get(pos);
+    match token {
+      Some(&Token::Operator(Multiply)) | Some(&Token::Operator(Divide)) | Some(&Token::Operator(Modulo)) => {
+        let (right_node, next_pos) = parse_level_4(tokens, pos + 1)?;
+        let mut new_node = AstNode::new(token.unwrap().clone());
+        new_node.children.push(node);
+        new_node.children.push(right_node);
+        node = new_node;
+        pos = next_pos;
+      },
+      _ => {
+        return Ok((node, pos));
+      },
+    }
+  }
+}
+
+// level 4 precedence: ^
+fn parse_level_4(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
+  let (mut node, mut pos) = parse_level_5(tokens, pos)?;
+  loop {
+    let token = tokens.get(pos);
+    match token {
+      Some(&Token::Operator(Caret)) => {
+        let (right_node, next_pos) = parse_level_5(tokens, pos + 1)?;
+        let mut new_node = AstNode::new(token.unwrap().clone());
+        new_node.children.push(node);
+        new_node.children.push(right_node);
+        node = new_node;
+        pos = next_pos;
+      },
+      _ => {
+        return Ok((node, pos));
+      },
+    }
+  }
+}
+
+// level 5 precedence: !, percent
+fn parse_level_5(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
+  let (mut node, mut pos) = parse_level_6(tokens, pos)?;
+  loop {
+    let token = tokens.get(pos);
+    match token {
+      Some(&Token::Operator(Factorial)) | Some(&Token::Operator(Percent)) => {
+        
+        // Here we are handling unary operators, aka stuff written as
+        // "Number Operator" (3!) instead of "Number Operator Number" (3+3).
+        // Therefore, if we find a match, we don't parse what comes after it.
+        let mut new_node = AstNode::new(token.unwrap().clone());
+        new_node.children.push(node);
+        node = new_node;
+        pos += 1;
+      },
+      _ => {
+        // let's say we parse 1+2. parse_level_6 then returns 1, and token
+        // is set to plus. Plus has lower precedence than level 4, so we
+        // don't do anything, and pass the number down to a lower precedence.
+        return Ok((node, pos));
+      },
+    }
+  }
+}
+
+// level 6 precedence: numbers, parens
+fn parse_level_6(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
+  let token: &Token = tokens.get(pos).expect(&format!("Unexpected end of input at {}", pos));
+  match token {
+    &Token::Number((_number, _unit)) => {
+      let node = AstNode::new(token.clone());
+      Ok((node, pos + 1))
+    },
+    Token::Constant(_constant) => {
+      let node = AstNode::new(token.clone());
+      Ok((node, pos + 1))
+    },
+    Token::FunctionIdentifier(_function_identifier) => {
+      let left_paren_pos = pos + 1;
+      let left_paren_token = tokens.get(left_paren_pos);
+      // check if ( comes after function identifier, like log(
+      match left_paren_token {
+        Some(&Token::Operator(LeftParen)) => {
+          // parse everything inside as you would with normal parentheses,
+          // then put it inside an ast node.
+          parse_level_1(tokens, left_paren_pos + 1).and_then(|(node, next_pos)| {
+            if let Some(&Token::Operator(RightParen)) = tokens.get(next_pos) {
+              let mut function_node = AstNode::new(token.clone());
+              function_node.children.push(node);
+              Ok((function_node, next_pos + 1))
+            } else {
+              Err(format!("Expected closing paren at {} but found {:?}", next_pos, tokens.get(next_pos)))
+            }
+          })
+        },
+        _ => {
+          return Err(format!("Expected ( after {} at {:?} but found {:?}", left_paren_pos, token, left_paren_token));
+        }
+      }
+
+      // Ok((node, pos + 1))
+    },
+    Token::Operator(LeftParen) => {
+      parse_level_1(tokens, pos + 1).and_then(|(node, next_pos)| {
+        if let Some(&Token::Operator(RightParen)) = tokens.get(next_pos) {
+          let mut paren = AstNode::new(Token::Paren);
+          paren.children.push(node);
+          Ok((paren, next_pos + 1))
+        } else {
+          Err(format!("Expected closing paren at {} but found {:?}", next_pos, tokens.get(next_pos)))
+        }
+      })
+    },
+    _ => {
+      Err(format!("Unexpected token {:?}, expected paren or number", token))
+    },
+  }
+}