Add more tests by @djmattyg007
This commit is contained in:
parent
84f604a96e
commit
0c1d2b38c1
33
Cargo.lock
generated
33
Cargo.lock
generated
@ -1,5 +1,14 @@
|
|||||||
# This file is automatically @generated by Cargo.
|
# This file is automatically @generated by Cargo.
|
||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "0.7.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bitflags"
|
name = "bitflags"
|
||||||
version = "1.2.1"
|
version = "1.2.1"
|
||||||
@ -17,6 +26,7 @@ name = "cpc"
|
|||||||
version = "1.6.0"
|
version = "1.6.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"decimal",
|
"decimal",
|
||||||
|
"regex",
|
||||||
"unicode-segmentation",
|
"unicode-segmentation",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -40,12 +50,35 @@ version = "0.2.93"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"
|
checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memchr"
|
||||||
|
version = "2.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ord_subset"
|
name = "ord_subset"
|
||||||
version = "3.1.1"
|
version = "3.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d7ce14664caf5b27f5656ff727defd68ae1eb75ef3c4d95259361df1eb376bef"
|
checksum = "d7ce14664caf5b27f5656ff727defd68ae1eb75ef3c4d95259361df1eb376bef"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "1.5.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.6.25"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustc-serialize"
|
name = "rustc-serialize"
|
||||||
version = "0.3.24"
|
version = "0.3.24"
|
||||||
|
|||||||
@ -15,3 +15,6 @@ categories = ["mathematics", "science", "parsing", "text-processing", "value-for
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
decimal = "2.1.0"
|
decimal = "2.1.0"
|
||||||
unicode-segmentation = "1.8.0"
|
unicode-segmentation = "1.8.0"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
regex = "1.5.4"
|
||||||
|
|||||||
33
src/lexer.rs
33
src/lexer.rs
@ -464,6 +464,7 @@ pub fn parse_word(word: &str, lexer: &mut Lexer) -> Result<(), String> {
|
|||||||
"hr" | "hrs" | "hour" | "hours" => Token::Unit(WattHour),
|
"hr" | "hrs" | "hour" | "hours" => Token::Unit(WattHour),
|
||||||
other => {
|
other => {
|
||||||
lexer.tokens.push(Token::Unit(Watt));
|
lexer.tokens.push(Token::Unit(Watt));
|
||||||
|
println!("parse_token({})", other);
|
||||||
parse_token(other, lexer)?;
|
parse_token(other, lexer)?;
|
||||||
return Ok(());
|
return Ok(());
|
||||||
},
|
},
|
||||||
@ -584,9 +585,7 @@ pub struct Lexer<'a> {
|
|||||||
|
|
||||||
/// Lex an input string and returns [`Token`]s
|
/// Lex an input string and returns [`Token`]s
|
||||||
pub fn lex(input: &str, remove_trailing_operator: bool, default_degree: Unit) -> Result<Vec<Token>, String> {
|
pub fn lex(input: &str, remove_trailing_operator: bool, default_degree: Unit) -> Result<Vec<Token>, String> {
|
||||||
|
let mut input = input.replace(",", "").to_lowercase();
|
||||||
let mut input = input.replace(",", ""); // ignore commas
|
|
||||||
input = input.to_lowercase();
|
|
||||||
|
|
||||||
if remove_trailing_operator {
|
if remove_trailing_operator {
|
||||||
match &input.chars().last().unwrap_or('x') {
|
match &input.chars().last().unwrap_or('x') {
|
||||||
@ -767,20 +766,38 @@ pub fn lex(input: &str, remove_trailing_operator: bool, default_degree: Unit) ->
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::numtok;
|
use crate::numtok;
|
||||||
|
use regex::Regex;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_lex() {
|
fn test_lex() {
|
||||||
pub fn run_lex(input: &str, expected_tokens: Vec<Token>) {
|
let strip_operator_spacing = Regex::new(r" ([+\-*/]) ").unwrap();
|
||||||
|
let strip_afterdigit_spacing = Regex::new(r"(\d) ").unwrap();
|
||||||
|
|
||||||
|
let run_lex = |input: &str, expected_tokens: Vec<Token>| {
|
||||||
let tokens = match lex(input, false, Unit::Celsius) {
|
let tokens = match lex(input, false, Unit::Celsius) {
|
||||||
Ok(tokens) => tokens,
|
Ok(tokens) => tokens,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
panic!("lex error: {}\nrun_lex input: {}", e, input);
|
panic!("lex error: {}\nrun_lex input: {}", e, input);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
if tokens != expected_tokens {
|
let info_msg = format!("run_lex input: {}\nexpected: {:?}\nreceived: {:?}", input, expected_tokens, tokens);
|
||||||
panic!("tokens mismatch: run_lex input: {}\nexpected: {:?}\nreceived: {:?}", input, expected_tokens, tokens);
|
assert!(tokens == expected_tokens, "{}", info_msg);
|
||||||
}
|
|
||||||
}
|
// Prove we can handle multiple spaces wherever we handle a single space
|
||||||
|
let input_extra_spaces = input.replace(" ", " ");
|
||||||
|
let tokens_extra_spaces = lex(&input_extra_spaces, false, Unit::Celsius).unwrap();
|
||||||
|
assert!(tokens_extra_spaces == expected_tokens, "{}", info_msg);
|
||||||
|
|
||||||
|
// Prove we don't need spaces around operators
|
||||||
|
let input_stripped_spaces = strip_operator_spacing.replace_all(input, "$1");
|
||||||
|
let tokens_stripped_spaces = lex(&input_stripped_spaces, false, Unit::Celsius).unwrap();
|
||||||
|
assert!(tokens_stripped_spaces == expected_tokens, "{}", info_msg);
|
||||||
|
|
||||||
|
// Prove we don't need a space after a digit
|
||||||
|
let input_afterdigit_stripped_spaces = strip_afterdigit_spacing.replace_all(input, "$1");
|
||||||
|
let tokens_afterdigit_stripped_spaces = lex(&input_afterdigit_stripped_spaces, false, Unit::Celsius).unwrap();
|
||||||
|
assert!(tokens_afterdigit_stripped_spaces == expected_tokens, "{}", info_msg);
|
||||||
|
};
|
||||||
|
|
||||||
run_lex("88 kilometres * 2", vec![numtok!(88), Token::Unit(Kilometer), Token::Operator(Multiply), numtok!(2)]);
|
run_lex("88 kilometres * 2", vec![numtok!(88), Token::Unit(Kilometer), Token::Operator(Multiply), numtok!(2)]);
|
||||||
run_lex("100 nmi", vec![numtok!(100), Token::Unit(NauticalMile)]);
|
run_lex("100 nmi", vec![numtok!(100), Token::Unit(NauticalMile)]);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user