Greatly improved parsing of multi-word units
This commit is contained in:
parent
80aacc6493
commit
fbbb895c64
60
src/lexer.rs
60
src/lexer.rs
@ -12,7 +12,7 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
|
|||||||
|
|
||||||
let mut chars = input.chars().enumerate().peekable();
|
let mut chars = input.chars().enumerate().peekable();
|
||||||
let mut tokens: TokenVector = vec![];
|
let mut tokens: TokenVector = vec![];
|
||||||
let max_word_length = 12;
|
let max_word_length = 30;
|
||||||
|
|
||||||
let mut left_paren_count = 0;
|
let mut left_paren_count = 0;
|
||||||
let mut right_paren_count = 0;
|
let mut right_paren_count = 0;
|
||||||
@ -44,14 +44,29 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
|
|||||||
let mut end_index = byte_index;
|
let mut end_index = byte_index;
|
||||||
while let Some((_index, current_char)) = chars.peek() {
|
while let Some((_index, current_char)) = chars.peek() {
|
||||||
// don't loop more than max_word_length:
|
// don't loop more than max_word_length:
|
||||||
if end_index >= start_index + max_word_length - 1 { break; }
|
if end_index >= start_index + max_word_length - 1 {
|
||||||
|
let string = &input[start_index..=end_index];
|
||||||
|
return Err(format!("Invalid string starting with: {}", string));
|
||||||
|
}
|
||||||
|
|
||||||
if current_char.is_alphabetic() {
|
if current_char.is_alphabetic() {
|
||||||
byte_index += current_char.len_utf8();
|
byte_index += current_char.len_utf8();
|
||||||
chars.next();
|
chars.next();
|
||||||
end_index += 1;
|
end_index += 1;
|
||||||
} else {
|
} else {
|
||||||
|
let string = &input[start_index..=end_index];
|
||||||
|
match string.trim_end() {
|
||||||
|
// allow for two-word units
|
||||||
|
"nautical" | "square" | "cubic" => {
|
||||||
|
println!("xx {}", string);
|
||||||
|
byte_index += current_char.len_utf8();
|
||||||
|
chars.next();
|
||||||
|
end_index += 1;
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
break;
|
break;
|
||||||
|
},
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -109,52 +124,13 @@ pub fn lex(input: &str) -> Result<TokenVector, String> {
|
|||||||
"ft" | "foot" | "feet" => tokens.push(Token::Unit(Foot)),
|
"ft" | "foot" | "feet" => tokens.push(Token::Unit(Foot)),
|
||||||
"yd" | "yard" | "yards" => tokens.push(Token::Unit(Yard)),
|
"yd" | "yard" | "yards" => tokens.push(Token::Unit(Yard)),
|
||||||
"mi" | "mile" | "miles" => tokens.push(Token::Unit(Mile)),
|
"mi" | "mile" | "miles" => tokens.push(Token::Unit(Mile)),
|
||||||
"nmi" => tokens.push(Token::Unit(NauticalMile)),
|
"nmi" | "nautical mile" => tokens.push(Token::Unit(NauticalMile)),
|
||||||
"lightyear" => tokens.push(Token::Unit(LightYear)),
|
"lightyear" => tokens.push(Token::Unit(LightYear)),
|
||||||
|
|
||||||
// two word unit
|
|
||||||
"nautical" | "square" | "cubic" => {
|
|
||||||
// skip past whitespace
|
|
||||||
if let Some((_index, current_char)) = chars.peek() {
|
|
||||||
if current_char.is_whitespace() {
|
|
||||||
byte_index += current_char.len_utf8();
|
|
||||||
chars.next();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// prevent off-by-one error causing string to be " mile"
|
|
||||||
byte_index += current_char.len_utf8();
|
|
||||||
chars.next();
|
|
||||||
|
|
||||||
let start_index = byte_index;
|
|
||||||
let mut end_index = byte_index;
|
|
||||||
while let Some((_index, current_char)) = chars.peek() {
|
|
||||||
// don't loop more than max_word_length:
|
|
||||||
if end_index >= start_index + max_word_length - 1 { break; }
|
|
||||||
|
|
||||||
if current_char.is_alphabetic() {
|
|
||||||
byte_index += current_char.len_utf8();
|
|
||||||
end_index += 1;
|
|
||||||
chars.next();
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let second_string = &input[start_index..=end_index];
|
|
||||||
let full_string = format!("{} {}", string, second_string);
|
|
||||||
match full_string.as_str() {
|
|
||||||
"nautical mile" => tokens.push(Token::Unit(NauticalMile)),
|
|
||||||
|
|
||||||
"square meter" | "square meters" => tokens.push(Token::Unit(SquareMeter)),
|
"square meter" | "square meters" => tokens.push(Token::Unit(SquareMeter)),
|
||||||
|
|
||||||
"cubic meter" | "cubic meters" => tokens.push(Token::Unit(CubicMeter)),
|
"cubic meter" | "cubic meters" => tokens.push(Token::Unit(CubicMeter)),
|
||||||
|
|
||||||
_ => {
|
|
||||||
return Err(format!("Invalid string: {}", string));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
_ => {
|
_ => {
|
||||||
return Err(format!("Invalid string: {}", string));
|
return Err(format!("Invalid string: {}", string));
|
||||||
}
|
}
|
||||||
|
|||||||
25
src/units.rs
25
src/units.rs
@ -20,13 +20,22 @@ macro_rules! create_units {
|
|||||||
}
|
}
|
||||||
use Unit::*;
|
use Unit::*;
|
||||||
|
|
||||||
fn get_info(unit: &Unit) -> (UnitType, d128) {
|
impl Unit {
|
||||||
match unit {
|
pub fn category(&self) -> UnitType {
|
||||||
|
match self {
|
||||||
$(
|
$(
|
||||||
Unit::$variant => $properties
|
Unit::$variant => $properties.0
|
||||||
),*
|
),*
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
pub fn weight(&self) -> d128 {
|
||||||
|
match self {
|
||||||
|
$(
|
||||||
|
Unit::$variant => $properties.1
|
||||||
|
),*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -105,20 +114,12 @@ create_units!(
|
|||||||
Pound: (Mass, d128!(453.59237)),
|
Pound: (Mass, d128!(453.59237)),
|
||||||
ShortTon: (Mass, d128!(907184.74)),
|
ShortTon: (Mass, d128!(907184.74)),
|
||||||
LongTon: (Mass, d128!(1016046.9088)),
|
LongTon: (Mass, d128!(1016046.9088)),
|
||||||
|
|
||||||
Kelvin: (Temperature, d128!(0)),
|
Kelvin: (Temperature, d128!(0)),
|
||||||
Celcius: (Temperature, d128!(0)),
|
Celcius: (Temperature, d128!(0)),
|
||||||
Fahrenheit: (Temperature, d128!(0)),
|
Fahrenheit: (Temperature, d128!(0)),
|
||||||
);
|
);
|
||||||
|
|
||||||
impl Unit {
|
|
||||||
pub fn category(&self) -> UnitType {
|
|
||||||
return get_info(self).0
|
|
||||||
}
|
|
||||||
pub fn weight(&self) -> d128 {
|
|
||||||
return get_info(self).1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct Number {
|
pub struct Number {
|
||||||
pub value: d128,
|
pub value: d128,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user