Added parser with most things supported
This commit is contained in:
parent
670aa7e732
commit
3587cb94a9
58
src/lexer.rs
58
src/lexer.rs
@ -3,7 +3,8 @@ use decimal::d128;
|
|||||||
use crate::{Token, TokenVector};
|
use crate::{Token, TokenVector};
|
||||||
use crate::Operator::{Percent, Caret, Divide, Factorial, LeftParen, Minus, Modulo, Multiply, Plus, RightParen};
|
use crate::Operator::{Percent, Caret, Divide, Factorial, LeftParen, Minus, Modulo, Multiply, Plus, RightParen};
|
||||||
use crate::TextOperator::{Of, To};
|
use crate::TextOperator::{Of, To};
|
||||||
use crate::Identifier::{Acos, Acosh, Asin, Asinh, Atan, Atanh, Cbrt, Ceil, Cos, Cosh, Exp, Fabs, Floor, Ln, Log, Pi, Round, Sin, Sinh, Sqrt, Tan, Tanh, E};
|
use crate::Constant::{E, Pi};
|
||||||
|
use crate::FunctionIdentifier::{Acos, Acosh, Asin, Asinh, Atan, Atanh, Cbrt, Ceil, Cos, Cosh, Exp, Fabs, Floor, Ln, Log, Round, Sin, Sinh, Sqrt, Tan, Tanh};
|
||||||
use crate::Unit::{Normal};
|
use crate::Unit::{Normal};
|
||||||
|
|
||||||
pub fn lex(input: &str) -> Result<TokenVector, String> {
|
pub fn lex(input: &str) -> Result<TokenVector, String> {
|
||||||
@ -33,7 +34,7 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
|
|||||||
right_paren_count += 1;
|
right_paren_count += 1;
|
||||||
tokens.push(Token::Operator(RightParen));
|
tokens.push(Token::Operator(RightParen));
|
||||||
},
|
},
|
||||||
'π' => tokens.push(Token::Identifier(Pi)),
|
'π' => tokens.push(Token::Constant(Pi)),
|
||||||
',' => {},
|
',' => {},
|
||||||
value if value.is_whitespace() => {},
|
value if value.is_whitespace() => {},
|
||||||
value if value.is_alphabetic() => {
|
value if value.is_alphabetic() => {
|
||||||
@ -62,37 +63,37 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
|
|||||||
"to" => tokens.push(Token::TextOperator(To)),
|
"to" => tokens.push(Token::TextOperator(To)),
|
||||||
"of" => tokens.push(Token::TextOperator(Of)),
|
"of" => tokens.push(Token::TextOperator(Of)),
|
||||||
|
|
||||||
"pi" => tokens.push(Token::Identifier(Pi)),
|
"pi" => tokens.push(Token::Constant(Pi)),
|
||||||
"e" => tokens.push(Token::Identifier(E)),
|
"e" => tokens.push(Token::Constant(E)),
|
||||||
|
|
||||||
"mod" => tokens.push(Token::Operator(Modulo)),
|
"mod" => tokens.push(Token::Operator(Modulo)),
|
||||||
|
|
||||||
"sqrt" => tokens.push(Token::Identifier(Sqrt)),
|
"sqrt" => tokens.push(Token::FunctionIdentifier(Sqrt)),
|
||||||
"cbrt" => tokens.push(Token::Identifier(Cbrt)),
|
"cbrt" => tokens.push(Token::FunctionIdentifier(Cbrt)),
|
||||||
|
|
||||||
"log" => tokens.push(Token::Identifier(Log)),
|
"log" => tokens.push(Token::FunctionIdentifier(Log)),
|
||||||
"ln" => tokens.push(Token::Identifier(Ln)),
|
"ln" => tokens.push(Token::FunctionIdentifier(Ln)),
|
||||||
"exp" => tokens.push(Token::Identifier(Exp)),
|
"exp" => tokens.push(Token::FunctionIdentifier(Exp)),
|
||||||
|
|
||||||
"ceil" => tokens.push(Token::Identifier(Ceil)),
|
"ceil" => tokens.push(Token::FunctionIdentifier(Ceil)),
|
||||||
"floor" => tokens.push(Token::Identifier(Floor)),
|
"floor" => tokens.push(Token::FunctionIdentifier(Floor)),
|
||||||
"round" | "rint" => tokens.push(Token::Identifier(Round)),
|
"round" | "rint" => tokens.push(Token::FunctionIdentifier(Round)),
|
||||||
"fabs" => tokens.push(Token::Identifier(Fabs)),
|
"fabs" => tokens.push(Token::FunctionIdentifier(Fabs)),
|
||||||
|
|
||||||
"sin" => tokens.push(Token::Identifier(Sin)),
|
"sin" => tokens.push(Token::FunctionIdentifier(Sin)),
|
||||||
"cos" => tokens.push(Token::Identifier(Cos)),
|
"cos" => tokens.push(Token::FunctionIdentifier(Cos)),
|
||||||
"tan" => tokens.push(Token::Identifier(Tan)),
|
"tan" => tokens.push(Token::FunctionIdentifier(Tan)),
|
||||||
"asin" => tokens.push(Token::Identifier(Asin)),
|
"asin" => tokens.push(Token::FunctionIdentifier(Asin)),
|
||||||
"acos" => tokens.push(Token::Identifier(Acos)),
|
"acos" => tokens.push(Token::FunctionIdentifier(Acos)),
|
||||||
"atan" => tokens.push(Token::Identifier(Atan)),
|
"atan" => tokens.push(Token::FunctionIdentifier(Atan)),
|
||||||
"sinh" => tokens.push(Token::Identifier(Sinh)),
|
"sinh" => tokens.push(Token::FunctionIdentifier(Sinh)),
|
||||||
"cosh" => tokens.push(Token::Identifier(Cosh)),
|
"cosh" => tokens.push(Token::FunctionIdentifier(Cosh)),
|
||||||
"tanh" => tokens.push(Token::Identifier(Tanh)),
|
"tanh" => tokens.push(Token::FunctionIdentifier(Tanh)),
|
||||||
"asinh" => tokens.push(Token::Identifier(Asinh)),
|
"asinh" => tokens.push(Token::FunctionIdentifier(Asinh)),
|
||||||
"acosh" => tokens.push(Token::Identifier(Acosh)),
|
"acosh" => tokens.push(Token::FunctionIdentifier(Acosh)),
|
||||||
"atanh" => tokens.push(Token::Identifier(Atanh)),
|
"atanh" => tokens.push(Token::FunctionIdentifier(Atanh)),
|
||||||
_ => {
|
_ => {
|
||||||
return Err(format!("Invalid string: {}", string))
|
return Err(format!("Invalid string: {}", string));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -139,14 +140,11 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
|
|||||||
|
|
||||||
// auto insert missing parentheses in first and last position
|
// auto insert missing parentheses in first and last position
|
||||||
if left_paren_count > right_paren_count {
|
if left_paren_count > right_paren_count {
|
||||||
println!("Added right_parens");
|
|
||||||
let missing_right_parens = left_paren_count - right_paren_count;
|
let missing_right_parens = left_paren_count - right_paren_count;
|
||||||
println!("{}", missing_right_parens);
|
|
||||||
for _ in 0..missing_right_parens {
|
for _ in 0..missing_right_parens {
|
||||||
tokens.push(Token::Operator(RightParen));
|
tokens.push(Token::Operator(RightParen));
|
||||||
}
|
}
|
||||||
} else if left_paren_count < right_paren_count {
|
} else if left_paren_count < right_paren_count {
|
||||||
println!("Added left_parens");
|
|
||||||
let missing_left_parens = right_paren_count - left_paren_count;
|
let missing_left_parens = right_paren_count - left_paren_count;
|
||||||
for _ in 0..missing_left_parens {
|
for _ in 0..missing_left_parens {
|
||||||
tokens.insert(0, Token::Operator(LeftParen));
|
tokens.insert(0, Token::Operator(LeftParen));
|
||||||
@ -184,5 +182,5 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
|
|||||||
token_index += 1;
|
token_index += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return Ok(tokens)
|
Ok(tokens)
|
||||||
}
|
}
|
||||||
|
|||||||
37
src/main.rs
37
src/main.rs
@ -1,7 +1,7 @@
|
|||||||
use std::time::{Instant};
|
use std::time::{Instant};
|
||||||
use decimal::d128;
|
use decimal::d128;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub enum Operator {
|
pub enum Operator {
|
||||||
Plus,
|
Plus,
|
||||||
Minus,
|
Minus,
|
||||||
@ -15,17 +15,20 @@ pub enum Operator {
|
|||||||
RightParen, // lexer only
|
RightParen, // lexer only
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub enum TextOperator {
|
pub enum TextOperator {
|
||||||
To,
|
To,
|
||||||
Of,
|
Of,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub enum Identifier {
|
pub enum Constant {
|
||||||
Pi,
|
Pi,
|
||||||
E,
|
E,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub enum FunctionIdentifier {
|
||||||
Sqrt,
|
Sqrt,
|
||||||
Cbrt,
|
Cbrt,
|
||||||
|
|
||||||
@ -52,16 +55,17 @@ pub enum Identifier {
|
|||||||
Atanh,
|
Atanh,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Clone, Copy, Debug)]
|
||||||
pub enum Unit {
|
pub enum Unit {
|
||||||
Normal,
|
Normal,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub enum Token {
|
pub enum Token {
|
||||||
Operator(Operator),
|
Operator(Operator),
|
||||||
Number((d128, Unit)),
|
Number((d128, Unit)),
|
||||||
Identifier(Identifier),
|
FunctionIdentifier(FunctionIdentifier),
|
||||||
|
Constant(Constant),
|
||||||
Paren, // parser only
|
Paren, // parser only
|
||||||
TextOperator(TextOperator),
|
TextOperator(TextOperator),
|
||||||
}
|
}
|
||||||
@ -69,21 +73,32 @@ pub enum Token {
|
|||||||
pub type TokenVector = Vec<Token>;
|
pub type TokenVector = Vec<Token>;
|
||||||
|
|
||||||
mod lexer;
|
mod lexer;
|
||||||
|
mod parser;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let now = Instant::now();
|
let lex_start = Instant::now();
|
||||||
|
|
||||||
use std::env;
|
use std::env;
|
||||||
let args: Vec<String> = env::args().collect();
|
let args: Vec<String> = env::args().collect();
|
||||||
let s = if args.len() == 2 { &args[1] } else { "0.1" };
|
let s = if args.len() >= 2 { &args[1] } else { "0.1" };
|
||||||
|
|
||||||
match lexer::lex(s) {
|
match lexer::lex(s) {
|
||||||
Ok(tokens) => {
|
Ok(tokens) => {
|
||||||
|
let lex_time = Instant::now().duration_since(lex_start).as_nanos() as f32;
|
||||||
println!("Lexed TokenVector: {:?}", tokens);
|
println!("Lexed TokenVector: {:?}", tokens);
|
||||||
|
|
||||||
|
let parse_start = Instant::now();
|
||||||
|
match parser::parse(&tokens) {
|
||||||
|
Ok(ast) => {
|
||||||
|
let parse_time = Instant::now().duration_since(parse_start).as_nanos() as f32;
|
||||||
|
println!("Parsed AstNode: {:#?}", ast);
|
||||||
|
println!("\u{23f1} {:.3}ms lexing", lex_time/1000.0/1000.0);
|
||||||
|
println!("\u{23f1} {:.3}ms parsing", parse_time/1000.0/1000.0);
|
||||||
|
},
|
||||||
|
Err(e) => println!("parsing error: {}", e),
|
||||||
|
}
|
||||||
},
|
},
|
||||||
Err(e) => println!("lexing error: {}", e),
|
Err(e) => println!("lexing error: {}", e),
|
||||||
}
|
}
|
||||||
|
|
||||||
let duration = Instant::now().duration_since(now).as_nanos() as f32;
|
|
||||||
println!("\u{23f1} {:.3}ms lexing", duration/1000.0/1000.0);
|
|
||||||
}
|
}
|
||||||
|
|||||||
197
src/parser.rs
Normal file
197
src/parser.rs
Normal file
@ -0,0 +1,197 @@
|
|||||||
|
use crate::{Token, TokenVector};
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
use crate::Operator::{Percent, Caret, Divide, Factorial, LeftParen, Minus, Modulo, Multiply, Plus, RightParen};
|
||||||
|
use crate::TextOperator::{To, Of};
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct AstNode {
|
||||||
|
children: Vec<AstNode>,
|
||||||
|
entry: Token,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AstNode {
|
||||||
|
pub fn new(token: Token) -> AstNode {
|
||||||
|
AstNode {
|
||||||
|
children: Vec::new(),
|
||||||
|
entry: token,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse(tokens: &TokenVector) -> Result<AstNode, String> {
|
||||||
|
parse_level_1(tokens, 0).and_then(|(ast, next_pos)| if next_pos == tokens.len() {
|
||||||
|
Ok(ast)
|
||||||
|
} else {
|
||||||
|
Err(format!("Expected end of input, found {:?} at {}", tokens[next_pos], next_pos))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// level 1 precedence (lowest): to, of
|
||||||
|
fn parse_level_1(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
|
||||||
|
// do higher precedences first, then come back down
|
||||||
|
let (mut node, mut pos) = parse_level_2(tokens, pos)?;
|
||||||
|
// now we loop through the next tokens
|
||||||
|
loop {
|
||||||
|
let token = tokens.get(pos);
|
||||||
|
match token {
|
||||||
|
// if there's a match, we once again do higher precedences, then come
|
||||||
|
// back down again and continue the loop
|
||||||
|
Some(&Token::TextOperator(To)) | Some(&Token::TextOperator(Of)) => {
|
||||||
|
let (right_node, next_pos) = parse_level_2(tokens, pos + 1)?;
|
||||||
|
let mut new_node = AstNode::new(token.unwrap().clone());
|
||||||
|
new_node.children.push(node);
|
||||||
|
new_node.children.push(right_node);
|
||||||
|
node = new_node;
|
||||||
|
pos = next_pos;
|
||||||
|
},
|
||||||
|
// if there's no match, we go down to a lower precedence (or, in this
|
||||||
|
// case, we're done)
|
||||||
|
_ => {
|
||||||
|
return Ok((node, pos));
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// level 2 precedence: +, -
|
||||||
|
fn parse_level_2(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
|
||||||
|
let (mut node, mut pos) = parse_level_3(tokens, pos)?;
|
||||||
|
loop {
|
||||||
|
let token = tokens.get(pos);
|
||||||
|
match token {
|
||||||
|
Some(&Token::Operator(Plus)) | Some(&Token::Operator(Minus)) => {
|
||||||
|
let (right_node, next_pos) = parse_level_3(tokens, pos + 1)?;
|
||||||
|
let mut new_node = AstNode::new(token.unwrap().clone());
|
||||||
|
new_node.children.push(node);
|
||||||
|
new_node.children.push(right_node);
|
||||||
|
node = new_node;
|
||||||
|
pos = next_pos;
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
|
return Ok((node, pos));
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// level 3 precedence: *, /, modulo
|
||||||
|
fn parse_level_3(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
|
||||||
|
let (mut node, mut pos) = parse_level_4(tokens, pos)?;
|
||||||
|
loop {
|
||||||
|
let token = tokens.get(pos);
|
||||||
|
match token {
|
||||||
|
Some(&Token::Operator(Multiply)) | Some(&Token::Operator(Divide)) | Some(&Token::Operator(Modulo)) => {
|
||||||
|
let (right_node, next_pos) = parse_level_4(tokens, pos + 1)?;
|
||||||
|
let mut new_node = AstNode::new(token.unwrap().clone());
|
||||||
|
new_node.children.push(node);
|
||||||
|
new_node.children.push(right_node);
|
||||||
|
node = new_node;
|
||||||
|
pos = next_pos;
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
|
return Ok((node, pos));
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// level 4 precedence: ^
|
||||||
|
fn parse_level_4(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
|
||||||
|
let (mut node, mut pos) = parse_level_5(tokens, pos)?;
|
||||||
|
loop {
|
||||||
|
let token = tokens.get(pos);
|
||||||
|
match token {
|
||||||
|
Some(&Token::Operator(Caret)) => {
|
||||||
|
let (right_node, next_pos) = parse_level_5(tokens, pos + 1)?;
|
||||||
|
let mut new_node = AstNode::new(token.unwrap().clone());
|
||||||
|
new_node.children.push(node);
|
||||||
|
new_node.children.push(right_node);
|
||||||
|
node = new_node;
|
||||||
|
pos = next_pos;
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
|
return Ok((node, pos));
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// level 5 precedence: !, percent
|
||||||
|
fn parse_level_5(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
|
||||||
|
let (mut node, mut pos) = parse_level_6(tokens, pos)?;
|
||||||
|
loop {
|
||||||
|
let token = tokens.get(pos);
|
||||||
|
match token {
|
||||||
|
Some(&Token::Operator(Factorial)) | Some(&Token::Operator(Percent)) => {
|
||||||
|
|
||||||
|
// Here we are handling unary operators, aka stuff written as
|
||||||
|
// "Number Operator" (3!) instead of "Number Operator Number" (3+3).
|
||||||
|
// Therefore, if we find a match, we don't parse what comes after it.
|
||||||
|
let mut new_node = AstNode::new(token.unwrap().clone());
|
||||||
|
new_node.children.push(node);
|
||||||
|
node = new_node;
|
||||||
|
pos += 1;
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
|
// let's say we parse 1+2. parse_level_6 then returns 1, and token
|
||||||
|
// is set to plus. Plus has lower precedence than level 4, so we
|
||||||
|
// don't do anything, and pass the number down to a lower precedence.
|
||||||
|
return Ok((node, pos));
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// level 6 precedence: numbers, parens
|
||||||
|
fn parse_level_6(tokens: &TokenVector, pos: usize) -> Result<(AstNode, usize), String> {
|
||||||
|
let token: &Token = tokens.get(pos).expect(&format!("Unexpected end of input at {}", pos));
|
||||||
|
match token {
|
||||||
|
&Token::Number((_number, _unit)) => {
|
||||||
|
let node = AstNode::new(token.clone());
|
||||||
|
Ok((node, pos + 1))
|
||||||
|
},
|
||||||
|
Token::Constant(_constant) => {
|
||||||
|
let node = AstNode::new(token.clone());
|
||||||
|
Ok((node, pos + 1))
|
||||||
|
},
|
||||||
|
Token::FunctionIdentifier(_function_identifier) => {
|
||||||
|
let left_paren_pos = pos + 1;
|
||||||
|
let left_paren_token = tokens.get(left_paren_pos);
|
||||||
|
// check if ( comes after function identifier, like log(
|
||||||
|
match left_paren_token {
|
||||||
|
Some(&Token::Operator(LeftParen)) => {
|
||||||
|
// parse everything inside as you would with normal parentheses,
|
||||||
|
// then put it inside an ast node.
|
||||||
|
parse_level_1(tokens, left_paren_pos + 1).and_then(|(node, next_pos)| {
|
||||||
|
if let Some(&Token::Operator(RightParen)) = tokens.get(next_pos) {
|
||||||
|
let mut function_node = AstNode::new(token.clone());
|
||||||
|
function_node.children.push(node);
|
||||||
|
Ok((function_node, next_pos + 1))
|
||||||
|
} else {
|
||||||
|
Err(format!("Expected closing paren at {} but found {:?}", next_pos, tokens.get(next_pos)))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
|
return Err(format!("Expected ( after {} at {:?} but found {:?}", left_paren_pos, token, left_paren_token));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ok((node, pos + 1))
|
||||||
|
},
|
||||||
|
Token::Operator(LeftParen) => {
|
||||||
|
parse_level_1(tokens, pos + 1).and_then(|(node, next_pos)| {
|
||||||
|
if let Some(&Token::Operator(RightParen)) = tokens.get(next_pos) {
|
||||||
|
let mut paren = AstNode::new(Token::Paren);
|
||||||
|
paren.children.push(node);
|
||||||
|
Ok((paren, next_pos + 1))
|
||||||
|
} else {
|
||||||
|
Err(format!("Expected closing paren at {} but found {:?}", next_pos, tokens.get(next_pos)))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
|
Err(format!("Unexpected token {:?}, expected paren or number", token))
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user