126 lines
4.7 KiB
Rust
126 lines
4.7 KiB
Rust
use std::str::FromStr;
|
|
use decimal::d128;
|
|
use crate::{Token, TokenVector};
|
|
use crate::Operator::{Caret, Divide, Factorial, LeftParen, Minus, Modulo, Multiply, PercentOrModulo, Plus, RightParen};
|
|
use crate::Identifier::{Acos, Acosh, Asin, Asinh, Atan, Atanh, Cbrt, Ceil, Cos, Cosh, Exp, Fabs, Floor, Ln, Log, Pi, Round, Sin, Sinh, Sqrt, Tan, Tanh, E};
|
|
|
|
pub fn lex(input: &str) -> Result<TokenVector, String> {
|
|
|
|
let mut chars = input.chars().enumerate().peekable();
|
|
let mut tokens: TokenVector = vec![];
|
|
let max_word_length = 5;
|
|
|
|
let mut byte_index = 0;
|
|
while let Some((_index, current_char)) = chars.next() {
|
|
match current_char {
|
|
'+' => tokens.push(Token::Operator(Plus)),
|
|
'-' => tokens.push(Token::Operator(Minus)),
|
|
'*' => tokens.push(Token::Operator(Multiply)),
|
|
'/' => tokens.push(Token::Operator(Divide)),
|
|
'%' => tokens.push(Token::Operator(PercentOrModulo)),
|
|
'^' => tokens.push(Token::Operator(Caret)),
|
|
'!' => tokens.push(Token::Operator(Factorial)),
|
|
'(' => tokens.push(Token::Operator(LeftParen)),
|
|
')' => tokens.push(Token::Operator(RightParen)),
|
|
'π' => tokens.push(Token::Identifier(Pi)),
|
|
',' => continue,
|
|
value if value.is_whitespace() => continue,
|
|
value if value.is_alphabetic() => {
|
|
|
|
let start_index = byte_index;
|
|
let mut end_index = byte_index;
|
|
while let Some((_index, current_char)) = chars.peek() {
|
|
// don't loop more than max_word_length:
|
|
if end_index >= start_index + max_word_length - 1 { break; }
|
|
|
|
if current_char.is_alphabetic() {
|
|
chars.next();
|
|
end_index += 1;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
let string = &input[start_index..=end_index];
|
|
println!("{}", string);
|
|
match string {
|
|
|
|
// MAKE SURE max_word_length IS EQUAL TO THE
|
|
// LENGTH OF THE LONGEST STRING IN THIS MATCH STATEMENT.
|
|
|
|
"pi" => tokens.push(Token::Identifier(Pi)),
|
|
"e" => tokens.push(Token::Identifier(E)),
|
|
|
|
"mod" => tokens.push(Token::Operator(Modulo)),
|
|
|
|
"sqrt" => tokens.push(Token::Identifier(Sqrt)),
|
|
"cbrt" => tokens.push(Token::Identifier(Cbrt)),
|
|
|
|
"log" => tokens.push(Token::Identifier(Log)),
|
|
"ln" => tokens.push(Token::Identifier(Ln)),
|
|
"exp" => tokens.push(Token::Identifier(Exp)),
|
|
|
|
"ceil" => tokens.push(Token::Identifier(Ceil)),
|
|
"floor" => tokens.push(Token::Identifier(Floor)),
|
|
"round" | "rint" => tokens.push(Token::Identifier(Round)),
|
|
"fabs" => tokens.push(Token::Identifier(Fabs)),
|
|
|
|
"sin" => tokens.push(Token::Identifier(Sin)),
|
|
"cos" => tokens.push(Token::Identifier(Cos)),
|
|
"tan" => tokens.push(Token::Identifier(Tan)),
|
|
"asin" => tokens.push(Token::Identifier(Asin)),
|
|
"acos" => tokens.push(Token::Identifier(Acos)),
|
|
"atan" => tokens.push(Token::Identifier(Atan)),
|
|
"sinh" => tokens.push(Token::Identifier(Sinh)),
|
|
"cosh" => tokens.push(Token::Identifier(Cosh)),
|
|
"tanh" => tokens.push(Token::Identifier(Tanh)),
|
|
"asinh" => tokens.push(Token::Identifier(Asinh)),
|
|
"acosh" => tokens.push(Token::Identifier(Acosh)),
|
|
"atanh" => tokens.push(Token::Identifier(Atanh)),
|
|
_ => {
|
|
return Err(format!("Invalid string: {}", string))
|
|
}
|
|
}
|
|
|
|
},
|
|
'.' | '0'..='9' => {
|
|
|
|
let start_index = byte_index;
|
|
let mut end_index = byte_index;
|
|
while let Some((_index, current_char)) = chars.peek() {
|
|
if current_char == &'.' || current_char.is_digit(10) {
|
|
chars.next();
|
|
end_index += 1;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
let number_string = &input[start_index..=end_index];
|
|
match d128::from_str(number_string) {
|
|
Ok(number) => {
|
|
if d128::get_status().is_empty() {
|
|
tokens.push(Token::Number(number));
|
|
} else {
|
|
return Err(format!("Error parsing d128 number: {}", number_string));
|
|
}
|
|
},
|
|
Err(_e) => {
|
|
return Err(format!("Error parsing d128 number: {}", number_string));
|
|
}
|
|
};
|
|
|
|
},
|
|
_ => {
|
|
return Err(format!("Invalid character: {}", current_char));
|
|
},
|
|
}
|
|
// The π character, for example, is more than one byte, so in that case
|
|
// byte_index needs to be incremented by 2. This is because we're slicing
|
|
// strings to get digits/words, and Rust slices bytes, not utf8 graphemes
|
|
// (aka "user-perceived characters").
|
|
byte_index += current_char.len_utf8();
|
|
};
|
|
return Ok(tokens)
|
|
}
|