Match alphabetic characters directly, make stuff private

2021-07-06 20:25:29 +02:00 · 2021-07-06 20:25:29 +02:00 · c4d1cb1371
commit c4d1cb1371
parent c3f7166d28
3 changed files with 50 additions and 33 deletions
--- a/README.md
+++ b/README.md
@ -32,7 +32,7 @@ Add `cpc` as a dependency in `Cargo.toml`.
 ## API Usage

 ```rust
-use cpc::{eval};
+use cpc::eval;
 use cpc::units::Unit;

 match eval("3m + 1cm", true, Unit::Celsius, false) {
--- a/src/lexer.rs
+++ b/src/lexer.rs
@ -13,17 +13,21 @@ use crate::units::Unit;
 use crate::units::Unit::*;
 use unicode_segmentation::{Graphemes, UnicodeSegmentation};

-pub fn is_alphabetic_extended_str(input: &str) -> bool {
+fn is_word_char_str(input: &str) -> bool {
  let x = match input {
-    value if value.chars().all(|c| ('a'..='z').contains(&c)) => true,
-    value if value.chars().all(|c| ('A'..='Z').contains(&c)) => true,
-    "Ω" | "Ω" | "µ" | "μ" | "π" => true,
+    "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L"
+    | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X"
+    | "Y" | "Z" => true,
+    "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l"
+    | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x"
+    | "y" | "z" => true,
+    "Ω" | "Ω" | "µ" | "μ" => true,
    _ => false,
  };
  return x;
 }

-pub fn is_numeric_str(input: &str) -> bool {
+fn is_numeric_str(input: &str) -> bool {
  match input {
    "." => true,
    "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" => true,
@ -33,10 +37,10 @@ pub fn is_numeric_str(input: &str) -> bool {

 /// Read next characters as a word, otherwise return empty string.
 /// Returns an empty string if there's leading whitespace.
-pub fn read_word_plain(chars: &mut Peekable<Graphemes>) -> String {
+fn read_word_plain(chars: &mut Peekable<Graphemes>) -> String {
  let mut word = String::new();
  while let Some(next_char) = chars.peek() {
-    if is_alphabetic_extended_str(&next_char) {
+    if is_word_char_str(&next_char) {
      word += chars.next().unwrap();
    } else {
      break;
@ -47,7 +51,7 @@ pub fn read_word_plain(chars: &mut Peekable<Graphemes>) -> String {

 /// Read next as a word, otherwise return empty string.
 /// Leading whitespace is ignored. A trailing digit may be included.
-pub fn read_word(first_c: &str, lexer: &mut Lexer) -> String {
+fn read_word(first_c: &str, lexer: &mut Lexer) -> String {
  let chars = &mut lexer.chars;
  let mut word = first_c.trim().to_owned();
  if word == "" {
@ -61,31 +65,33 @@ pub fn read_word(first_c: &str, lexer: &mut Lexer) -> String {
    }
  }
  while let Some(next_char) = chars.peek() {
-    if is_alphabetic_extended_str(&next_char) {
+    if is_word_char_str(&next_char) {
      word += chars.next().unwrap();
    } else {
      break;
    }
  }
-  match *chars.peek().unwrap_or(&"") {
-    "2" | "²" => {
-      word += "2";
-      chars.next();
-    },
-    "3" | "³" => {
-      word += "3";
-      chars.next();
-    },
-    _ => {},
+  if word != "" {
+    match *chars.peek().unwrap_or(&"") {
+      "2" | "²" => {
+        word += "2";
+        chars.next();
+      },
+      "3" | "³" => {
+        word += "3";
+        chars.next();
+      },
+      _ => {},
+    }
  }
  return word;
 }

-pub fn parse_token(c: &str, lexer: &mut Lexer) -> Result<(), String> {
+fn parse_token(c: &str, lexer: &mut Lexer) -> Result<(), String> {
  let tokens = &mut lexer.tokens;
  match c {
    value if value.trim().is_empty() => {},
-    value if is_alphabetic_extended_str(&value) => {
+    value if is_word_char_str(&value) => {
      parse_word(read_word(c, lexer).as_str(), lexer)?;
    },
    value if is_numeric_str(value) => {
@ -138,7 +144,15 @@ pub fn parse_token(c: &str, lexer: &mut Lexer) -> Result<(), String> {
  Ok(())
 }

-pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> {
+fn parse_word_if_non_empty(word: &str, lexer: &mut Lexer) -> Result<(), String> {
+  match word {
+    "" => Ok(()),
+    _ => parse_word(word, lexer)
+  }
+}
+
+fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> {
+  println!("word {}", word);
  let token = match word {
    "to" => Token::TextOperator(To),
    "of" => Token::TextOperator(Of),
@ -337,7 +351,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> {
            other => {
              lexer.tokens.push(Token::Unit(Pound));
              lexer.tokens.push(Token::Operator(Minus));
-              parse_token(&other, lexer)?;
+              parse_word_if_non_empty(&other, lexer)?;
              return Ok(());
            }
          }
@ -464,7 +478,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> {
        "hr" | "hrs" | "hour" | "hours" => Token::Unit(WattHour),
        other => {
          lexer.tokens.push(Token::Unit(Watt));
-          parse_token(other, lexer)?;
+          parse_word_if_non_empty(other, lexer)?;
          return Ok(());
        },
      }
@ -474,7 +488,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> {
        "hr" | "hrs" | "hour" | "hours" => Token::Unit(KilowattHour),
        other => {
          lexer.tokens.push(Token::Unit(Kilowatt));
-          parse_token(other, lexer)?;
+          parse_word_if_non_empty(other, lexer)?;
          return Ok(());
        },
      }
@ -484,7 +498,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> {
        "hr" | "hrs" | "hour" | "hours" => Token::Unit(MegawattHour),
        other => {
          lexer.tokens.push(Token::Unit(Megawatt));
-          parse_token(other, lexer)?;
+          parse_word_if_non_empty(other, lexer)?;
          return Ok(());
        },
      }
@ -494,7 +508,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> {
        "hr" | "hrs" | "hour" | "hours" => Token::Unit(GigawattHour),
        other => {
          lexer.tokens.push(Token::Unit(Gigawatt));
-          parse_token(other, lexer)?;
+          parse_word_if_non_empty(other, lexer)?;
          return Ok(());
        },
      }
@ -504,7 +518,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> {
        "hr" | "hrs" | "hour" | "hours" => Token::Unit(TerawattHour),
        other => {
          lexer.tokens.push(Token::Unit(Terawatt));
-          parse_token(other, lexer)?;
+          parse_word_if_non_empty(other, lexer)?;
          return Ok(());
        },
      }
@ -514,7 +528,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> {
        "hr" | "hrs" | "hour" | "hours" => Token::Unit(PetawattHour),
        other => {
          lexer.tokens.push(Token::Unit(Petawatt));
-          parse_token(other, lexer)?;
+          parse_word_if_non_empty(other, lexer)?;
          return Ok(());
        },
      }
@ -574,7 +588,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> {
  return Ok(());
 }

-pub struct Lexer<'a> {
+struct Lexer<'a> {
  left_paren_count: u16,
  right_paren_count: u16,
  chars: Peekable<Graphemes<'a>>,
--- a/src/lib.rs
+++ b/src/lib.rs
@ -8,7 +8,7 @@
 //! 
 //! # Example usage
 //! ```rust
-//! use cpc::{eval};
+//! use cpc::eval;
 //! use cpc::units::Unit;
 //! 
 //! match eval("3m + 1cm", true, Unit::Celsius, false) {
@ -214,7 +214,7 @@ macro_rules! numtok {
 /// 
 /// Example:
 /// ```rust
-/// use cpc::{eval};
+/// use cpc::eval;
 /// use cpc::units::Unit;
 /// 
 /// match eval("3m + 1cm", true, Unit::Celsius, false) {
@ -230,12 +230,14 @@ macro_rules! numtok {
 pub fn eval(input: &str, allow_trailing_operators: bool, default_degree: Unit, verbose: bool) -> Result<Number, String> {

  let lex_start = Instant::now();
+  println!("lex");

  match lexer::lex(input, allow_trailing_operators, default_degree) {
    Ok(tokens) => {
      let lex_time = Instant::now().duration_since(lex_start).as_nanos() as f32;
      if verbose == true { println!("Lexed TokenVector: {:?}", tokens); }

+      println!("parse");
      let parse_start = Instant::now();
      match parser::parse(&tokens) {
        Ok(ast) => {
@ -243,6 +245,7 @@ pub fn eval(input: &str, allow_trailing_operators: bool, default_degree: Unit, v
          if verbose == true { println!("Parsed AstNode: {:#?}", ast); }

          let eval_start = Instant::now();
+          println!("eval");
          match evaluator::evaluate(&ast) {
            Ok(answer) => {
              let eval_time = Instant::now().duration_since(eval_start).as_nanos() as f32;