Greatly improved parsing of multi-word units

This commit is contained in:
Kasper 2020-01-09 23:01:31 +01:00
parent 80aacc6493
commit fbbb895c64
2 changed files with 38 additions and 61 deletions

View File

@ -12,7 +12,7 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
let mut chars = input.chars().enumerate().peekable(); let mut chars = input.chars().enumerate().peekable();
let mut tokens: TokenVector = vec![]; let mut tokens: TokenVector = vec![];
let max_word_length = 12; let max_word_length = 30;
let mut left_paren_count = 0; let mut left_paren_count = 0;
let mut right_paren_count = 0; let mut right_paren_count = 0;
@ -44,14 +44,29 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
let mut end_index = byte_index; let mut end_index = byte_index;
while let Some((_index, current_char)) = chars.peek() { while let Some((_index, current_char)) = chars.peek() {
// don't loop more than max_word_length: // don't loop more than max_word_length:
if end_index >= start_index + max_word_length - 1 { break; } if end_index >= start_index + max_word_length - 1 {
let string = &input[start_index..=end_index];
return Err(format!("Invalid string starting with: {}", string));
}
if current_char.is_alphabetic() { if current_char.is_alphabetic() {
byte_index += current_char.len_utf8(); byte_index += current_char.len_utf8();
chars.next(); chars.next();
end_index += 1; end_index += 1;
} else { } else {
let string = &input[start_index..=end_index];
match string.trim_end() {
// allow for two-word units
"nautical" | "square" | "cubic" => {
println!("xx {}", string);
byte_index += current_char.len_utf8();
chars.next();
end_index += 1;
},
_ => {
break; break;
},
}
} }
} }
@ -109,52 +124,13 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
"ft" | "foot" | "feet" => tokens.push(Token::Unit(Foot)), "ft" | "foot" | "feet" => tokens.push(Token::Unit(Foot)),
"yd" | "yard" | "yards" => tokens.push(Token::Unit(Yard)), "yd" | "yard" | "yards" => tokens.push(Token::Unit(Yard)),
"mi" | "mile" | "miles" => tokens.push(Token::Unit(Mile)), "mi" | "mile" | "miles" => tokens.push(Token::Unit(Mile)),
"nmi" => tokens.push(Token::Unit(NauticalMile)), "nmi" | "nautical mile" => tokens.push(Token::Unit(NauticalMile)),
"lightyear" => tokens.push(Token::Unit(LightYear)), "lightyear" => tokens.push(Token::Unit(LightYear)),
// two word unit
"nautical" | "square" | "cubic" => {
// skip past whitespace
if let Some((_index, current_char)) = chars.peek() {
if current_char.is_whitespace() {
byte_index += current_char.len_utf8();
chars.next();
}
}
// prevent off-by-one error causing string to be " mile"
byte_index += current_char.len_utf8();
chars.next();
let start_index = byte_index;
let mut end_index = byte_index;
while let Some((_index, current_char)) = chars.peek() {
// don't loop more than max_word_length:
if end_index >= start_index + max_word_length - 1 { break; }
if current_char.is_alphabetic() {
byte_index += current_char.len_utf8();
end_index += 1;
chars.next();
} else {
break;
}
}
let second_string = &input[start_index..=end_index];
let full_string = format!("{} {}", string, second_string);
match full_string.as_str() {
"nautical mile" => tokens.push(Token::Unit(NauticalMile)),
"square meter" | "square meters" => tokens.push(Token::Unit(SquareMeter)), "square meter" | "square meters" => tokens.push(Token::Unit(SquareMeter)),
"cubic meter" | "cubic meters" => tokens.push(Token::Unit(CubicMeter)), "cubic meter" | "cubic meters" => tokens.push(Token::Unit(CubicMeter)),
_ => {
return Err(format!("Invalid string: {}", string));
}
}
}
_ => { _ => {
return Err(format!("Invalid string: {}", string)); return Err(format!("Invalid string: {}", string));
} }

View File

@ -20,13 +20,22 @@ macro_rules! create_units {
} }
use Unit::*; use Unit::*;
fn get_info(unit: &Unit) -> (UnitType, d128) { impl Unit {
match unit { pub fn category(&self) -> UnitType {
match self {
$( $(
Unit::$variant => $properties Unit::$variant => $properties.0
),* ),*
} }
} }
pub fn weight(&self) -> d128 {
match self {
$(
Unit::$variant => $properties.1
),*
}
}
}
} }
} }
@ -105,20 +114,12 @@ create_units!(
Pound: (Mass, d128!(453.59237)), Pound: (Mass, d128!(453.59237)),
ShortTon: (Mass, d128!(907184.74)), ShortTon: (Mass, d128!(907184.74)),
LongTon: (Mass, d128!(1016046.9088)), LongTon: (Mass, d128!(1016046.9088)),
Kelvin: (Temperature, d128!(0)), Kelvin: (Temperature, d128!(0)),
Celcius: (Temperature, d128!(0)), Celcius: (Temperature, d128!(0)),
Fahrenheit: (Temperature, d128!(0)), Fahrenheit: (Temperature, d128!(0)),
); );
impl Unit {
pub fn category(&self) -> UnitType {
return get_info(self).0
}
pub fn weight(&self) -> d128 {
return get_info(self).1
}
}
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Number { pub struct Number {
pub value: d128, pub value: d128,