From c4d1cb137158a35679137f03cec43cfe3ab04fec Mon Sep 17 00:00:00 2001 From: Kasper Date: Tue, 6 Jul 2021 20:25:29 +0200 Subject: [PATCH] Match alphabetic characters directly, make stuff private --- README.md | 2 +- src/lexer.rs | 74 +++++++++++++++++++++++++++++++--------------------- src/lib.rs | 7 +++-- 3 files changed, 50 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index bdfea31..ee44958 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ Add `cpc` as a dependency in `Cargo.toml`. ## API Usage ```rust -use cpc::{eval}; +use cpc::eval; use cpc::units::Unit; match eval("3m + 1cm", true, Unit::Celsius, false) { diff --git a/src/lexer.rs b/src/lexer.rs index 8b34479..1439c06 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -13,17 +13,21 @@ use crate::units::Unit; use crate::units::Unit::*; use unicode_segmentation::{Graphemes, UnicodeSegmentation}; -pub fn is_alphabetic_extended_str(input: &str) -> bool { +fn is_word_char_str(input: &str) -> bool { let x = match input { - value if value.chars().all(|c| ('a'..='z').contains(&c)) => true, - value if value.chars().all(|c| ('A'..='Z').contains(&c)) => true, - "Ω" | "Ω" | "µ" | "μ" | "π" => true, + "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" + | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" + | "Y" | "Z" => true, + "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" + | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" + | "y" | "z" => true, + "Ω" | "Ω" | "µ" | "μ" => true, _ => false, }; return x; } -pub fn is_numeric_str(input: &str) -> bool { +fn is_numeric_str(input: &str) -> bool { match input { "." => true, "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" => true, @@ -33,10 +37,10 @@ pub fn is_numeric_str(input: &str) -> bool { /// Read next characters as a word, otherwise return empty string. /// Returns an empty string if there's leading whitespace. -pub fn read_word_plain(chars: &mut Peekable) -> String { +fn read_word_plain(chars: &mut Peekable) -> String { let mut word = String::new(); while let Some(next_char) = chars.peek() { - if is_alphabetic_extended_str(&next_char) { + if is_word_char_str(&next_char) { word += chars.next().unwrap(); } else { break; @@ -47,7 +51,7 @@ pub fn read_word_plain(chars: &mut Peekable) -> String { /// Read next as a word, otherwise return empty string. /// Leading whitespace is ignored. A trailing digit may be included. -pub fn read_word(first_c: &str, lexer: &mut Lexer) -> String { +fn read_word(first_c: &str, lexer: &mut Lexer) -> String { let chars = &mut lexer.chars; let mut word = first_c.trim().to_owned(); if word == "" { @@ -61,31 +65,33 @@ pub fn read_word(first_c: &str, lexer: &mut Lexer) -> String { } } while let Some(next_char) = chars.peek() { - if is_alphabetic_extended_str(&next_char) { + if is_word_char_str(&next_char) { word += chars.next().unwrap(); } else { break; } } - match *chars.peek().unwrap_or(&"") { - "2" | "²" => { - word += "2"; - chars.next(); - }, - "3" | "³" => { - word += "3"; - chars.next(); - }, - _ => {}, + if word != "" { + match *chars.peek().unwrap_or(&"") { + "2" | "²" => { + word += "2"; + chars.next(); + }, + "3" | "³" => { + word += "3"; + chars.next(); + }, + _ => {}, + } } return word; } -pub fn parse_token(c: &str, lexer: &mut Lexer) -> Result<(), String> { +fn parse_token(c: &str, lexer: &mut Lexer) -> Result<(), String> { let tokens = &mut lexer.tokens; match c { value if value.trim().is_empty() => {}, - value if is_alphabetic_extended_str(&value) => { + value if is_word_char_str(&value) => { parse_word(read_word(c, lexer).as_str(), lexer)?; }, value if is_numeric_str(value) => { @@ -138,7 +144,15 @@ pub fn parse_token(c: &str, lexer: &mut Lexer) -> Result<(), String> { Ok(()) } -pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> { +fn parse_word_if_non_empty(word: &str, lexer: &mut Lexer) -> Result<(), String> { + match word { + "" => Ok(()), + _ => parse_word(word, lexer) + } +} + +fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> { + println!("word {}", word); let token = match word { "to" => Token::TextOperator(To), "of" => Token::TextOperator(Of), @@ -337,7 +351,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> { other => { lexer.tokens.push(Token::Unit(Pound)); lexer.tokens.push(Token::Operator(Minus)); - parse_token(&other, lexer)?; + parse_word_if_non_empty(&other, lexer)?; return Ok(()); } } @@ -464,7 +478,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> { "hr" | "hrs" | "hour" | "hours" => Token::Unit(WattHour), other => { lexer.tokens.push(Token::Unit(Watt)); - parse_token(other, lexer)?; + parse_word_if_non_empty(other, lexer)?; return Ok(()); }, } @@ -474,7 +488,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> { "hr" | "hrs" | "hour" | "hours" => Token::Unit(KilowattHour), other => { lexer.tokens.push(Token::Unit(Kilowatt)); - parse_token(other, lexer)?; + parse_word_if_non_empty(other, lexer)?; return Ok(()); }, } @@ -484,7 +498,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> { "hr" | "hrs" | "hour" | "hours" => Token::Unit(MegawattHour), other => { lexer.tokens.push(Token::Unit(Megawatt)); - parse_token(other, lexer)?; + parse_word_if_non_empty(other, lexer)?; return Ok(()); }, } @@ -494,7 +508,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> { "hr" | "hrs" | "hour" | "hours" => Token::Unit(GigawattHour), other => { lexer.tokens.push(Token::Unit(Gigawatt)); - parse_token(other, lexer)?; + parse_word_if_non_empty(other, lexer)?; return Ok(()); }, } @@ -504,7 +518,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> { "hr" | "hrs" | "hour" | "hours" => Token::Unit(TerawattHour), other => { lexer.tokens.push(Token::Unit(Terawatt)); - parse_token(other, lexer)?; + parse_word_if_non_empty(other, lexer)?; return Ok(()); }, } @@ -514,7 +528,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> { "hr" | "hrs" | "hour" | "hours" => Token::Unit(PetawattHour), other => { lexer.tokens.push(Token::Unit(Petawatt)); - parse_token(other, lexer)?; + parse_word_if_non_empty(other, lexer)?; return Ok(()); }, } @@ -574,7 +588,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> { return Ok(()); } -pub struct Lexer<'a> { +struct Lexer<'a> { left_paren_count: u16, right_paren_count: u16, chars: Peekable>, diff --git a/src/lib.rs b/src/lib.rs index 1c2f4ee..ca2111b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,7 @@ //! //! # Example usage //! ```rust -//! use cpc::{eval}; +//! use cpc::eval; //! use cpc::units::Unit; //! //! match eval("3m + 1cm", true, Unit::Celsius, false) { @@ -214,7 +214,7 @@ macro_rules! numtok { /// /// Example: /// ```rust -/// use cpc::{eval}; +/// use cpc::eval; /// use cpc::units::Unit; /// /// match eval("3m + 1cm", true, Unit::Celsius, false) { @@ -230,12 +230,14 @@ macro_rules! numtok { pub fn eval(input: &str, allow_trailing_operators: bool, default_degree: Unit, verbose: bool) -> Result { let lex_start = Instant::now(); + println!("lex"); match lexer::lex(input, allow_trailing_operators, default_degree) { Ok(tokens) => { let lex_time = Instant::now().duration_since(lex_start).as_nanos() as f32; if verbose == true { println!("Lexed TokenVector: {:?}", tokens); } + println!("parse"); let parse_start = Instant::now(); match parser::parse(&tokens) { Ok(ast) => { @@ -243,6 +245,7 @@ pub fn eval(input: &str, allow_trailing_operators: bool, default_degree: Unit, v if verbose == true { println!("Parsed AstNode: {:#?}", ast); } let eval_start = Instant::now(); + println!("eval"); match evaluator::evaluate(&ast) { Ok(answer) => { let eval_time = Instant::now().duration_since(eval_start).as_nanos() as f32;