Lexer determines if % is percent or modulo & wraps input in parens

This commit is contained in:
Kasper 2019-12-14 22:12:02 +01:00
parent f7a5481268
commit 8c156fcf15
2 changed files with 58 additions and 10 deletions

View File

@ -1,8 +1,10 @@
use std::str::FromStr; use std::str::FromStr;
use decimal::d128; use decimal::d128;
use crate::{Token, TokenVector}; use crate::{Token, TokenVector};
use crate::Operator::{Caret, Divide, Factorial, LeftParen, Minus, Modulo, ModuloOrPercent, Multiply, Of, Plus, RightParen, To}; use crate::Operator::{Percent, Caret, Divide, Factorial, LeftParen, Minus, Modulo, Multiply, Plus, RightParen};
use crate::TextOperator::{Of, To};
use crate::Identifier::{Acos, Acosh, Asin, Asinh, Atan, Atanh, Cbrt, Ceil, Cos, Cosh, Exp, Fabs, Floor, Ln, Log, Pi, Round, Sin, Sinh, Sqrt, Tan, Tanh, E}; use crate::Identifier::{Acos, Acosh, Asin, Asinh, Atan, Atanh, Cbrt, Ceil, Cos, Cosh, Exp, Fabs, Floor, Ln, Log, Pi, Round, Sin, Sinh, Sqrt, Tan, Tanh, E};
use crate::Unit::{Normal};
pub fn lex(input: &str) -> Result<TokenVector, String> { pub fn lex(input: &str) -> Result<TokenVector, String> {
@ -20,7 +22,7 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
'-' => tokens.push(Token::Operator(Minus)), '-' => tokens.push(Token::Operator(Minus)),
'*' => tokens.push(Token::Operator(Multiply)), '*' => tokens.push(Token::Operator(Multiply)),
'/' => tokens.push(Token::Operator(Divide)), '/' => tokens.push(Token::Operator(Divide)),
'%' => tokens.push(Token::Operator(ModuloOrPercent)), '%' => tokens.push(Token::Operator(Modulo)),
'^' => tokens.push(Token::Operator(Caret)), '^' => tokens.push(Token::Operator(Caret)),
'!' => tokens.push(Token::Operator(Factorial)), '!' => tokens.push(Token::Operator(Factorial)),
'(' => { '(' => {
@ -57,8 +59,8 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
// MAKE SURE max_word_length IS EQUAL TO THE // MAKE SURE max_word_length IS EQUAL TO THE
// LENGTH OF THE LONGEST STRING IN THIS MATCH STATEMENT. // LENGTH OF THE LONGEST STRING IN THIS MATCH STATEMENT.
"to" => tokens.push(Token::Operator(To)), "to" => tokens.push(Token::TextOperator(To)),
"of" => tokens.push(Token::Operator(Of)), "of" => tokens.push(Token::TextOperator(Of)),
"pi" => tokens.push(Token::Identifier(Pi)), "pi" => tokens.push(Token::Identifier(Pi)),
"e" => tokens.push(Token::Identifier(E)), "e" => tokens.push(Token::Identifier(E)),
@ -113,7 +115,7 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
match d128::from_str(number_string) { match d128::from_str(number_string) {
Ok(number) => { Ok(number) => {
if d128::get_status().is_empty() { if d128::get_status().is_empty() {
tokens.push(Token::Number(number)); tokens.push(Token::Number((number, Normal)));
} else { } else {
return Err(format!("Error parsing d128 number: {}", number_string)); return Err(format!("Error parsing d128 number: {}", number_string));
} }
@ -135,6 +137,7 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
byte_index += current_char.len_utf8(); byte_index += current_char.len_utf8();
}; };
// auto insert missing parentheses in first and last position
if left_paren_count > right_paren_count { if left_paren_count > right_paren_count {
println!("Added right_parens"); println!("Added right_parens");
let missing_right_parens = left_paren_count - right_paren_count; let missing_right_parens = left_paren_count - right_paren_count;
@ -149,5 +152,37 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
tokens.insert(0, Token::Operator(LeftParen)); tokens.insert(0, Token::Operator(LeftParen));
} }
} }
// wrap in parentheses acting as start and end for parsing.
tokens.push(Token::Operator(RightParen));
tokens.insert(0, Token::Operator(LeftParen));
// the lexer parses percentages as modulo, so here modulos become percentages
let mut token_index = 0;
for _i in 1..tokens.len() {
match tokens[token_index] {
Token::Operator(Modulo) => {
match &tokens[token_index + 1] {
Token::TextOperator(Of) => {
// for example "10% of 1km" should be a percentage, not modulo
tokens[token_index] = Token::Operator(Percent);
},
Token::Operator(operator) => {
match operator {
LeftParen => {},
_ => {
// for example "10%*2" should be a percentage, but "10%(2)" should be modulo
tokens[token_index] = Token::Operator(Percent);
}
}
},
_ => {},
}
}
_ => {},
}
token_index += 1;
}
return Ok(tokens) return Ok(tokens)
} }

View File

@ -7,12 +7,16 @@ pub enum Operator {
Minus, Minus,
Multiply, Multiply,
Divide, Divide,
ModuloOrPercent,
Modulo, Modulo,
Percent,
Caret, Caret,
Factorial, Factorial,
LeftParen, LeftParen, // lexer only
RightParen, RightParen, // lexer only
}
#[derive(Debug)]
pub enum TextOperator {
To, To,
Of, Of,
} }
@ -48,11 +52,18 @@ pub enum Identifier {
Atanh, Atanh,
} }
#[derive(Debug)]
pub enum Unit {
Normal,
}
#[derive(Debug)] #[derive(Debug)]
pub enum Token { pub enum Token {
Operator(Operator), Operator(Operator),
Number(d128), Number((d128, Unit)),
Identifier(Identifier), Identifier(Identifier),
Paren, // parser only
TextOperator(TextOperator),
} }
pub type TokenVector = Vec<Token>; pub type TokenVector = Vec<Token>;
@ -67,7 +78,9 @@ fn main() {
let s = if args.len() == 2 { &args[1] } else { "0.1" }; let s = if args.len() == 2 { &args[1] } else { "0.1" };
match lexer::lex(s) { match lexer::lex(s) {
Ok(vector) => println!("{:?}", vector), Ok(tokens) => {
println!("Lexed TokenVector: {:?}", tokens);
},
Err(e) => println!("lexing error: {}", e), Err(e) => println!("lexing error: {}", e),
} }