From 0c1d2b38c1ce3ec56e877eb5e84c6cd935e725ff Mon Sep 17 00:00:00 2001 From: Kasper Date: Tue, 6 Jul 2021 19:06:50 +0200 Subject: [PATCH] Add more tests by @djmattyg007 --- Cargo.lock | 33 +++++++++++++++++++++++++++++++++ Cargo.toml | 3 +++ src/lexer.rs | 33 +++++++++++++++++++++++++-------- 3 files changed, 61 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f49875a..8cb4346 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,14 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + [[package]] name = "bitflags" version = "1.2.1" @@ -17,6 +26,7 @@ name = "cpc" version = "1.6.0" dependencies = [ "decimal", + "regex", "unicode-segmentation", ] @@ -40,12 +50,35 @@ version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41" +[[package]] +name = "memchr" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" + [[package]] name = "ord_subset" version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7ce14664caf5b27f5656ff727defd68ae1eb75ef3c4d95259361df1eb376bef" +[[package]] +name = "regex" +version = "1.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + [[package]] name = "rustc-serialize" version = "0.3.24" diff --git a/Cargo.toml b/Cargo.toml index f2f086c..96a394b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,3 +15,6 @@ categories = ["mathematics", "science", "parsing", "text-processing", "value-for [dependencies] decimal = "2.1.0" unicode-segmentation = "1.8.0" + +[dev-dependencies] +regex = "1.5.4" diff --git a/src/lexer.rs b/src/lexer.rs index 5b32b8a..75d9b36 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -464,6 +464,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> { "hr" | "hrs" | "hour" | "hours" => Token::Unit(WattHour), other => { lexer.tokens.push(Token::Unit(Watt)); + println!("parse_token({})", other); parse_token(other, lexer)?; return Ok(()); }, @@ -584,9 +585,7 @@ pub struct Lexer<'a> { /// Lex an input string and returns [`Token`]s pub fn lex(input: &str, remove_trailing_operator: bool, default_degree: Unit) -> Result, String> { - - let mut input = input.replace(",", ""); // ignore commas - input = input.to_lowercase(); + let mut input = input.replace(",", "").to_lowercase(); if remove_trailing_operator { match &input.chars().last().unwrap_or('x') { @@ -767,20 +766,38 @@ pub fn lex(input: &str, remove_trailing_operator: bool, default_degree: Unit) -> mod tests { use super::*; use crate::numtok; + use regex::Regex; #[test] fn test_lex() { - pub fn run_lex(input: &str, expected_tokens: Vec) { + let strip_operator_spacing = Regex::new(r" ([+\-*/]) ").unwrap(); + let strip_afterdigit_spacing = Regex::new(r"(\d) ").unwrap(); + + let run_lex = |input: &str, expected_tokens: Vec| { let tokens = match lex(input, false, Unit::Celsius) { Ok(tokens) => tokens, Err(e) => { panic!("lex error: {}\nrun_lex input: {}", e, input); } }; - if tokens != expected_tokens { - panic!("tokens mismatch: run_lex input: {}\nexpected: {:?}\nreceived: {:?}", input, expected_tokens, tokens); - } - } + let info_msg = format!("run_lex input: {}\nexpected: {:?}\nreceived: {:?}", input, expected_tokens, tokens); + assert!(tokens == expected_tokens, "{}", info_msg); + + // Prove we can handle multiple spaces wherever we handle a single space + let input_extra_spaces = input.replace(" ", " "); + let tokens_extra_spaces = lex(&input_extra_spaces, false, Unit::Celsius).unwrap(); + assert!(tokens_extra_spaces == expected_tokens, "{}", info_msg); + + // Prove we don't need spaces around operators + let input_stripped_spaces = strip_operator_spacing.replace_all(input, "$1"); + let tokens_stripped_spaces = lex(&input_stripped_spaces, false, Unit::Celsius).unwrap(); + assert!(tokens_stripped_spaces == expected_tokens, "{}", info_msg); + + // Prove we don't need a space after a digit + let input_afterdigit_stripped_spaces = strip_afterdigit_spacing.replace_all(input, "$1"); + let tokens_afterdigit_stripped_spaces = lex(&input_afterdigit_stripped_spaces, false, Unit::Celsius).unwrap(); + assert!(tokens_afterdigit_stripped_spaces == expected_tokens, "{}", info_msg); + }; run_lex("88 kilometres * 2", vec![numtok!(88), Token::Unit(Kilometer), Token::Operator(Multiply), numtok!(2)]); run_lex("100 nmi", vec![numtok!(100), Token::Unit(NauticalMile)]);