use std::str::FromStr; use decimal::d128; use crate::{Token, TokenVector}; use crate::Operator::{Caret, Divide, Factorial, LeftParen, Minus, Modulo, Multiply, PercentOrModulo, Plus, RightParen}; use crate::Identifier::{Acos, Acosh, Asin, Asinh, Atan, Atanh, Cbrt, Ceil, Cos, Cosh, Exp, Fabs, Floor, Ln, Log, Pi, Round, Sin, Sinh, Sqrt, Tan, Tanh, E}; pub fn lex(input: &str) -> Result { let mut chars = input.chars().enumerate().peekable(); let mut tokens: TokenVector = vec![]; let max_word_length = 5; let mut byte_index = 0; while let Some((_index, current_char)) = chars.next() { match current_char { '+' => tokens.push(Token::Operator(Plus)), '-' => tokens.push(Token::Operator(Minus)), '*' => tokens.push(Token::Operator(Multiply)), '/' => tokens.push(Token::Operator(Divide)), '%' => tokens.push(Token::Operator(PercentOrModulo)), '^' => tokens.push(Token::Operator(Caret)), '!' => tokens.push(Token::Operator(Factorial)), '(' => tokens.push(Token::Operator(LeftParen)), ')' => tokens.push(Token::Operator(RightParen)), 'π' => tokens.push(Token::Identifier(Pi)), ',' => continue, value if value.is_whitespace() => continue, value if value.is_alphabetic() => { let start_index = byte_index; let mut end_index = byte_index; while let Some((_index, current_char)) = chars.peek() { // don't loop more than max_word_length: if end_index >= start_index + max_word_length - 1 { break; } if current_char.is_alphabetic() { chars.next(); end_index += 1; } else { break; } } let string = &input[start_index..=end_index]; println!("{}", string); match string { // MAKE SURE max_word_length IS EQUAL TO THE // LENGTH OF THE LONGEST STRING IN THIS MATCH STATEMENT. "pi" => tokens.push(Token::Identifier(Pi)), "e" => tokens.push(Token::Identifier(E)), "mod" => tokens.push(Token::Operator(Modulo)), "sqrt" => tokens.push(Token::Identifier(Sqrt)), "cbrt" => tokens.push(Token::Identifier(Cbrt)), "log" => tokens.push(Token::Identifier(Log)), "ln" => tokens.push(Token::Identifier(Ln)), "exp" => tokens.push(Token::Identifier(Exp)), "ceil" => tokens.push(Token::Identifier(Ceil)), "floor" => tokens.push(Token::Identifier(Floor)), "round" | "rint" => tokens.push(Token::Identifier(Round)), "fabs" => tokens.push(Token::Identifier(Fabs)), "sin" => tokens.push(Token::Identifier(Sin)), "cos" => tokens.push(Token::Identifier(Cos)), "tan" => tokens.push(Token::Identifier(Tan)), "asin" => tokens.push(Token::Identifier(Asin)), "acos" => tokens.push(Token::Identifier(Acos)), "atan" => tokens.push(Token::Identifier(Atan)), "sinh" => tokens.push(Token::Identifier(Sinh)), "cosh" => tokens.push(Token::Identifier(Cosh)), "tanh" => tokens.push(Token::Identifier(Tanh)), "asinh" => tokens.push(Token::Identifier(Asinh)), "acosh" => tokens.push(Token::Identifier(Acosh)), "atanh" => tokens.push(Token::Identifier(Atanh)), _ => { return Err(format!("Invalid string: {}", string)) } } }, '.' | '0'..='9' => { let start_index = byte_index; let mut end_index = byte_index; while let Some((_index, current_char)) = chars.peek() { if current_char == &'.' || current_char.is_digit(10) { chars.next(); end_index += 1; } else { break; } } let number_string = &input[start_index..=end_index]; match d128::from_str(number_string) { Ok(number) => { if d128::get_status().is_empty() { tokens.push(Token::Number(number)); } else { return Err(format!("Error parsing d128 number: {}", number_string)); } }, Err(_e) => { return Err(format!("Error parsing d128 number: {}", number_string)); } }; }, _ => { return Err(format!("Invalid character: {}", current_char)); }, } // The π character, for example, is more than one byte, so in that case // byte_index needs to be incremented by 2. This is because we're slicing // strings to get digits/words, and Rust slices bytes, not utf8 graphemes // (aka "user-perceived characters"). byte_index += current_char.len_utf8(); }; return Ok(tokens) }