Greatly improved parsing of multi-word units

This commit is contained in:
Kasper 2020-01-09 23:01:31 +01:00
parent 80aacc6493
commit fbbb895c64
2 changed files with 38 additions and 61 deletions

View File

@ -12,7 +12,7 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
let mut chars = input.chars().enumerate().peekable();
let mut tokens: TokenVector = vec![];
let max_word_length = 12;
let max_word_length = 30;
let mut left_paren_count = 0;
let mut right_paren_count = 0;
@ -44,14 +44,29 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
let mut end_index = byte_index;
while let Some((_index, current_char)) = chars.peek() {
// don't loop more than max_word_length:
if end_index >= start_index + max_word_length - 1 { break; }
if end_index >= start_index + max_word_length - 1 {
let string = &input[start_index..=end_index];
return Err(format!("Invalid string starting with: {}", string));
}
if current_char.is_alphabetic() {
byte_index += current_char.len_utf8();
chars.next();
end_index += 1;
} else {
break;
let string = &input[start_index..=end_index];
match string.trim_end() {
// allow for two-word units
"nautical" | "square" | "cubic" => {
println!("xx {}", string);
byte_index += current_char.len_utf8();
chars.next();
end_index += 1;
},
_ => {
break;
},
}
}
}
@ -109,51 +124,12 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
"ft" | "foot" | "feet" => tokens.push(Token::Unit(Foot)),
"yd" | "yard" | "yards" => tokens.push(Token::Unit(Yard)),
"mi" | "mile" | "miles" => tokens.push(Token::Unit(Mile)),
"nmi" => tokens.push(Token::Unit(NauticalMile)),
"nmi" | "nautical mile" => tokens.push(Token::Unit(NauticalMile)),
"lightyear" => tokens.push(Token::Unit(LightYear)),
// two word unit
"nautical" | "square" | "cubic" => {
// skip past whitespace
if let Some((_index, current_char)) = chars.peek() {
if current_char.is_whitespace() {
byte_index += current_char.len_utf8();
chars.next();
}
}
// prevent off-by-one error causing string to be " mile"
byte_index += current_char.len_utf8();
chars.next();
"square meter" | "square meters" => tokens.push(Token::Unit(SquareMeter)),
let start_index = byte_index;
let mut end_index = byte_index;
while let Some((_index, current_char)) = chars.peek() {
// don't loop more than max_word_length:
if end_index >= start_index + max_word_length - 1 { break; }
if current_char.is_alphabetic() {
byte_index += current_char.len_utf8();
end_index += 1;
chars.next();
} else {
break;
}
}
let second_string = &input[start_index..=end_index];
let full_string = format!("{} {}", string, second_string);
match full_string.as_str() {
"nautical mile" => tokens.push(Token::Unit(NauticalMile)),
"square meter" | "square meters" => tokens.push(Token::Unit(SquareMeter)),
"cubic meter" | "cubic meters" => tokens.push(Token::Unit(CubicMeter)),
_ => {
return Err(format!("Invalid string: {}", string));
}
}
}
"cubic meter" | "cubic meters" => tokens.push(Token::Unit(CubicMeter)),
_ => {
return Err(format!("Invalid string: {}", string));

View File

@ -20,11 +20,20 @@ macro_rules! create_units {
}
use Unit::*;
fn get_info(unit: &Unit) -> (UnitType, d128) {
match unit {
$(
Unit::$variant => $properties
),*
impl Unit {
pub fn category(&self) -> UnitType {
match self {
$(
Unit::$variant => $properties.0
),*
}
}
pub fn weight(&self) -> d128 {
match self {
$(
Unit::$variant => $properties.1
),*
}
}
}
}
@ -105,20 +114,12 @@ create_units!(
Pound: (Mass, d128!(453.59237)),
ShortTon: (Mass, d128!(907184.74)),
LongTon: (Mass, d128!(1016046.9088)),
Kelvin: (Temperature, d128!(0)),
Celcius: (Temperature, d128!(0)),
Fahrenheit: (Temperature, d128!(0)),
);
impl Unit {
pub fn category(&self) -> UnitType {
return get_info(self).0
}
pub fn weight(&self) -> d128 {
return get_info(self).1
}
}
#[derive(Clone, Debug)]
pub struct Number {
pub value: d128,