Add more tests by @djmattyg007

2021-07-06 19:06:50 +02:00 · 2021-07-06 19:06:50 +02:00 · 0c1d2b38c1
commit 0c1d2b38c1
parent 84f604a96e
3 changed files with 61 additions and 8 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1,5 +1,14 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
+[[package]]
+name = "aho-corasick"
+version = "0.7.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "bitflags"
 version = "1.2.1"
@ -17,6 +26,7 @@ name = "cpc"
 version = "1.6.0"
 dependencies = [
 "decimal",
+ "regex",
 "unicode-segmentation",
 ]

@ -40,12 +50,35 @@ version = "0.2.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"

+[[package]]
+name = "memchr"
+version = "2.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
+
 [[package]]
 name = "ord_subset"
 version = "3.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d7ce14664caf5b27f5656ff727defd68ae1eb75ef3c4d95259361df1eb376bef"

+[[package]]
+name = "regex"
+version = "1.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
+
 [[package]]
 name = "rustc-serialize"
 version = "0.3.24"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -15,3 +15,6 @@ categories = ["mathematics", "science", "parsing", "text-processing", "value-for
 [dependencies]
 decimal = "2.1.0"
 unicode-segmentation = "1.8.0"
+
+[dev-dependencies]
+regex = "1.5.4"
--- a/src/lexer.rs
+++ b/src/lexer.rs
@ -464,6 +464,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> {
        "hr" | "hrs" | "hour" | "hours" => Token::Unit(WattHour),
        other => {
          lexer.tokens.push(Token::Unit(Watt));
+          println!("parse_token({})", other);
          parse_token(other, lexer)?;
          return Ok(());
        },
@ -584,9 +585,7 @@ pub struct Lexer<'a> {

 /// Lex an input string and returns [`Token`]s
 pub fn lex(input: &str, remove_trailing_operator: bool, default_degree: Unit) -> Result<Vec<Token>, String> {
-
-  let mut input = input.replace(",", ""); // ignore commas
-  input = input.to_lowercase();
+  let mut input = input.replace(",", "").to_lowercase();

  if remove_trailing_operator {
    match &input.chars().last().unwrap_or('x') {
@ -767,20 +766,38 @@ pub fn lex(input: &str, remove_trailing_operator: bool, default_degree: Unit) ->
 mod tests {
  use super::*;
  use crate::numtok;
+  use regex::Regex;

  #[test]
  fn test_lex() {
-    pub fn run_lex(input: &str, expected_tokens: Vec<Token>) {
+    let strip_operator_spacing = Regex::new(r" ([+\-*/]) ").unwrap();
+    let strip_afterdigit_spacing = Regex::new(r"(\d) ").unwrap();
+
+    let run_lex = |input: &str, expected_tokens: Vec<Token>| {
      let tokens = match lex(input, false, Unit::Celsius) {
        Ok(tokens) => tokens,
        Err(e) => {
          panic!("lex error: {}\nrun_lex input: {}", e, input);
        }
      };
-      if tokens != expected_tokens {
-        panic!("tokens mismatch: run_lex input: {}\nexpected: {:?}\nreceived: {:?}", input, expected_tokens, tokens);
-      }
-    }
+      let info_msg = format!("run_lex input: {}\nexpected: {:?}\nreceived: {:?}", input, expected_tokens, tokens);
+      assert!(tokens == expected_tokens, "{}", info_msg);
+
+      // Prove we can handle multiple spaces wherever we handle a single space
+      let input_extra_spaces = input.replace(" ", "   ");
+      let tokens_extra_spaces = lex(&input_extra_spaces, false, Unit::Celsius).unwrap();
+      assert!(tokens_extra_spaces == expected_tokens, "{}", info_msg);
+
+      // Prove we don't need spaces around operators
+      let input_stripped_spaces = strip_operator_spacing.replace_all(input, "$1");
+      let tokens_stripped_spaces = lex(&input_stripped_spaces, false, Unit::Celsius).unwrap();
+      assert!(tokens_stripped_spaces == expected_tokens, "{}", info_msg);
+
+      // Prove we don't need a space after a digit
+      let input_afterdigit_stripped_spaces = strip_afterdigit_spacing.replace_all(input, "$1");
+      let tokens_afterdigit_stripped_spaces = lex(&input_afterdigit_stripped_spaces, false, Unit::Celsius).unwrap();
+      assert!(tokens_afterdigit_stripped_spaces == expected_tokens, "{}", info_msg);
+    };

    run_lex("88 kilometres * 2", vec![numtok!(88), Token::Unit(Kilometer), Token::Operator(Multiply), numtok!(2)]);
    run_lex("100 nmi", vec![numtok!(100), Token::Unit(NauticalMile)]);