diff --git a/Cargo.lock b/Cargo.lock index a6230f5..aeb375a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -29,6 +29,7 @@ version = "0.1.0" dependencies = [ "decimal 2.0.4 (registry+https://github.com/rust-lang/crates.io-index)", "nom 5.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-segmentation 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -127,6 +128,11 @@ name = "static_assertions" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "unicode-segmentation" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "version_check" version = "0.1.5" @@ -151,4 +157,5 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" "checksum serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)" = "1217f97ab8e8904b57dd22eb61cde455fa7446a9c1cf43966066da047c1f3702" "checksum static_assertions 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3" +"checksum unicode-segmentation 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0" "checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" diff --git a/Cargo.toml b/Cargo.toml index a242591..c7a2ccd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,4 +6,5 @@ edition = "2018" [dependencies] nom = "5.0" +unicode-segmentation = "1.6.0" decimal = "2.0.4" diff --git a/src/lexer.rs b/src/lexer.rs index 909b15a..a412ba0 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,13 +1,16 @@ use std::str::FromStr; use decimal::d128; -use crate::{Token, TokenVector, Operator::*}; +use crate::{Token, TokenVector}; +use crate::Operator::{Caret, Divide, Factorial, LeftParen, Minus, Multiply, PercentOrModulo, Plus, RightParen}; +use crate::Constant::{Pi, EulersNumber}; pub fn lex(input: &str) -> Result { let mut chars = input.chars().enumerate().peekable(); let mut tokens: TokenVector = vec![]; - while let Some((index, current_char)) = chars.next() { + let mut byte_index = 0; + while let Some((_index, current_char)) = chars.next() { match current_char { '+' => tokens.push(Token::Operator(Plus)), '-' => tokens.push(Token::Operator(Minus)), @@ -18,16 +21,37 @@ pub fn lex(input: &str) -> Result { '!' => tokens.push(Token::Operator(Factorial)), '(' => tokens.push(Token::Operator(LeftParen)), ')' => tokens.push(Token::Operator(RightParen)), + 'π' => tokens.push(Token::Constant(Pi)), ',' => continue, value if value.is_whitespace() => continue, value if value.is_alphabetic() => { + let start_index = byte_index; + let mut end_index = byte_index; + while let Some((_index, current_char)) = chars.peek() { + if current_char.is_alphabetic() { + chars.next(); + end_index += 1; + } else { + break; + } + } + + let string = &input[start_index..=end_index]; + match string { + "pi" => tokens.push(Token::Constant(Pi)), + "e" => tokens.push(Token::Constant(EulersNumber)), + _ => { + return Err(format!("Invalid string: {}", string)) + } + } + }, '.' | '0'..='9' => { - let start_index = index; - let mut end_index = index+1; - while let Some((_idk, current_char)) = chars.peek() { + let start_index = byte_index; + let mut end_index = byte_index; + while let Some((_index, current_char)) = chars.peek() { if current_char == &'.' || current_char.is_digit(10) { chars.next(); end_index += 1; @@ -36,24 +60,30 @@ pub fn lex(input: &str) -> Result { } } - match d128::from_str(&input[start_index..end_index]) { + let number_string = &input[start_index..=end_index]; + match d128::from_str(number_string) { Ok(number) => { if d128::get_status().is_empty() { tokens.push(Token::Number(number)); } else { - return Err(format!("Error parsing d128 number: {}", &input[start_index..end_index])); + return Err(format!("Error parsing d128 number: {}", number_string)); } }, Err(_e) => { - return Err("Error parsing d128 number (This should not happen because d128 does not throw errors)".to_owned()); + return Err(format!("Error parsing d128 number: {}", number_string)); } }; }, _ => { - return Err(format!("Unknown character: {}", current_char)); + return Err(format!("Invalid character: {}", current_char)); }, } + // The π character, for example, is more than one byte, so in that case + // byte_index needs to be incremented by 2. This is because we're slicing + // strings to get digits/words, and Rust slices bytes, not utf8 graphemes + // (aka "user-perceived characters"). + byte_index += current_char.len_utf8(); }; return Ok(tokens) } diff --git a/src/main.rs b/src/main.rs index 70dae7b..e9fba78 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,10 +14,18 @@ pub enum Operator { RightParen, } +#[derive(Debug)] +pub enum Constant { + Pi, + EulersNumber, +} + #[derive(Debug)] pub enum Token { Operator(Operator), Number(d128), + Function(String), + Constant(Constant), } pub type TokenVector = Vec;