Attempt at improving multi-word handling (incomplete)
This commit is contained in:
parent
9485e3896b
commit
541e38dc65
7
Cargo.lock
generated
7
Cargo.lock
generated
@ -17,6 +17,7 @@ name = "cpc"
|
||||
version = "1.6.0"
|
||||
dependencies = [
|
||||
"decimal",
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -56,3 +57,9 @@ name = "serde"
|
||||
version = "1.0.125"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "558dc50e1a5a5fa7112ca2ce4effcb321b0300c0d4ccf0776a9f60cd89031171"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-segmentation"
|
||||
version = "1.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b"
|
||||
|
||||
@ -14,3 +14,4 @@ categories = ["mathematics", "science", "parsing", "text-processing", "value-for
|
||||
|
||||
[dependencies]
|
||||
decimal = "2.1.0"
|
||||
unicode-segmentation = "1.8.0"
|
||||
|
||||
888
src/lexer.rs
888
src/lexer.rs
@ -1,3 +1,4 @@
|
||||
use std::iter::Peekable;
|
||||
use std::str::FromStr;
|
||||
use decimal::d128;
|
||||
use crate::Token;
|
||||
@ -10,6 +11,7 @@ use crate::LexerKeyword::{In, PercentChar, Per, Mercury, Hg, PoundForce, Force,
|
||||
use crate::FunctionIdentifier::{Cbrt, Ceil, Cos, Exp, Abs, Floor, Ln, Log, Round, Sin, Sqrt, Tan};
|
||||
use crate::units::Unit;
|
||||
use crate::units::Unit::*;
|
||||
use unicode_segmentation::{Graphemes, UnicodeSegmentation};
|
||||
|
||||
pub const fn is_alphabetic_extended(input: &char) -> bool {
|
||||
match input {
|
||||
@ -18,6 +20,467 @@ pub const fn is_alphabetic_extended(input: &char) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_alphabetic_extended_str(input: &str) -> bool {
|
||||
let x = match input {
|
||||
value if value.chars().all(|c| ('a'..='z').contains(&c)) => true,
|
||||
value if value.chars().all(|c| ('A'..='Z').contains(&c)) => true,
|
||||
"Ω" | "Ω" | "µ" | "μ" | "π" => true,
|
||||
_ => false,
|
||||
};
|
||||
return x;
|
||||
}
|
||||
|
||||
pub fn is_numeric_str(input: &str) -> bool {
|
||||
match input {
|
||||
"." => true,
|
||||
"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Read next characters as a word, otherwise return empty string.
|
||||
/// Returns an empty string if there's leading whitespace.
|
||||
pub fn read_word_plain(chars: &mut Peekable<Graphemes>) -> String {
|
||||
let mut word = "".to_string();
|
||||
while let Some(next_char) = chars.peek() {
|
||||
if is_alphabetic_extended_str(&next_char) {
|
||||
word += chars.next().unwrap();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return word;
|
||||
}
|
||||
|
||||
/// Read next as a word, otherwise return empty string.
|
||||
/// Leading whitespace is ignored. A trailing digit may be included.
|
||||
pub fn read_word(chars: &mut Peekable<Graphemes>) -> String {
|
||||
// skip whitespace
|
||||
while let Some(current_char) = chars.peek() {
|
||||
if current_char.trim().is_empty() {
|
||||
chars.next();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let mut word = "".to_string();
|
||||
while let Some(next_char) = chars.peek() {
|
||||
if is_alphabetic_extended_str(&next_char) {
|
||||
word += chars.next().unwrap();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
match *chars.peek().unwrap_or(&"") {
|
||||
"2" | "²" => {
|
||||
word += "2";
|
||||
chars.next();
|
||||
},
|
||||
"3" | "³" => {
|
||||
word += "3";
|
||||
chars.next();
|
||||
},
|
||||
_ => {},
|
||||
}
|
||||
return word;
|
||||
}
|
||||
|
||||
pub fn parse_word(tokens: &mut Vec<Token>, chars: &mut Peekable<Graphemes>, default_degree: Unit) -> Result<(), String> {
|
||||
let token = match read_word(chars).as_str() {
|
||||
"to" => Token::TextOperator(To),
|
||||
"of" => Token::TextOperator(Of),
|
||||
|
||||
"hundred" => Token::NamedNumber(Hundred),
|
||||
"thousand" => Token::NamedNumber(Thousand),
|
||||
"mil" | "mill" | "million" => Token::NamedNumber(Million),
|
||||
"bil" | "bill" | "billion" => Token::NamedNumber(Billion),
|
||||
"tri" | "tril" | "trillion" => Token::NamedNumber(Trillion),
|
||||
"quadrillion" => Token::NamedNumber(Quadrillion),
|
||||
"quintillion" => Token::NamedNumber(Quintillion),
|
||||
"sextillion" => Token::NamedNumber(Sextillion),
|
||||
"septillion" => Token::NamedNumber(Septillion),
|
||||
"octillion" => Token::NamedNumber(Octillion),
|
||||
"nonillion" => Token::NamedNumber(Nonillion),
|
||||
"decillion" => Token::NamedNumber(Decillion),
|
||||
"undecillion" => Token::NamedNumber(Undecillion),
|
||||
"duodecillion" => Token::NamedNumber(Duodecillion),
|
||||
"tredecillion" => Token::NamedNumber(Tredecillion),
|
||||
"quattuordecillion" => Token::NamedNumber(Quattuordecillion),
|
||||
"quindecillion" => Token::NamedNumber(Quindecillion),
|
||||
"sexdecillion" => Token::NamedNumber(Sexdecillion),
|
||||
"septendecillion" => Token::NamedNumber(Septendecillion),
|
||||
"octodecillion" => Token::NamedNumber(Octodecillion),
|
||||
"novemdecillion" => Token::NamedNumber(Novemdecillion),
|
||||
"vigintillion" => Token::NamedNumber(Vigintillion),
|
||||
"centillion" => Token::NamedNumber(Centillion),
|
||||
"googol" => Token::NamedNumber(Googol),
|
||||
|
||||
"pi" => Token::Constant(Pi),
|
||||
"e" => Token::Constant(E),
|
||||
|
||||
"mod" => Token::Operator(Modulo),
|
||||
|
||||
"sqrt" => Token::FunctionIdentifier(Sqrt),
|
||||
"cbrt" => Token::FunctionIdentifier(Cbrt),
|
||||
|
||||
"log" => Token::FunctionIdentifier(Log),
|
||||
"ln" => Token::FunctionIdentifier(Ln),
|
||||
"exp" => Token::FunctionIdentifier(Exp),
|
||||
|
||||
"round" | "rint" => Token::FunctionIdentifier(Round),
|
||||
"ceil" => Token::FunctionIdentifier(Ceil),
|
||||
"floor" => Token::FunctionIdentifier(Floor),
|
||||
"abs" | "fabs" => Token::FunctionIdentifier(Abs),
|
||||
|
||||
"sin" => Token::FunctionIdentifier(Sin),
|
||||
"cos" => Token::FunctionIdentifier(Cos),
|
||||
"tan" => Token::FunctionIdentifier(Tan),
|
||||
|
||||
"per" => Token::LexerKeyword(Per),
|
||||
"hg" => Token::LexerKeyword(Hg), // can be hectogram or mercury
|
||||
|
||||
"ns" | "nanosec" | "nanosecs" | "nanosecond" | "nanoseconds" => Token::Unit(Nanosecond),
|
||||
// µ and μ are two different characters
|
||||
"µs" | "μs" | "microsec" | "microsecs" | "microsecond" | "microseconds" => Token::Unit(Microsecond),
|
||||
"ms" | "millisec" | "millisecs" | "millisecond" | "milliseconds" => Token::Unit(Millisecond),
|
||||
"s" | "sec" | "secs" | "second" | "seconds" => Token::Unit(Second),
|
||||
"min" | "mins" | "minute" | "minutes" => Token::Unit(Minute),
|
||||
"h" | "hr" | "hrs" | "hour" | "hours" => Token::Unit(Hour),
|
||||
"day" | "days" => Token::Unit(Day),
|
||||
"wk" | "wks" | "week" | "weeks" => Token::Unit(Week),
|
||||
"mo" | "mos" | "month" | "months" => Token::Unit(Month),
|
||||
"q" | "quarter" | "quarters" => Token::Unit(Quarter),
|
||||
"yr" | "yrs" | "year" | "years" => Token::Unit(Year),
|
||||
"decade" | "decades" => Token::Unit(Decade),
|
||||
"century" | "centuries" => Token::Unit(Century),
|
||||
"millenium" | "millenia" | "milleniums" => Token::Unit(Millenium),
|
||||
|
||||
"mm" | "millimeter" | "millimeters" | "millimetre" | "millimetres" => Token::Unit(Millimeter),
|
||||
"cm" | "centimeter" | "centimeters" | "centimetre" | "centimetres" => Token::Unit(Centimeter),
|
||||
"dm" | "decimeter" | "decimeters" | "decimetre" | "decimetres" => Token::Unit(Decimeter),
|
||||
"m" | "meter" | "meters" | "metre" | "metres" => Token::Unit(Meter),
|
||||
"km" | "kilometer" | "kilometers" | "kilometre" | "kilometres" => Token::Unit(Kilometer),
|
||||
"in" => Token::LexerKeyword(In),
|
||||
"inch" | "inches" => Token::Unit(Inch),
|
||||
"ft" | "foot" | "feet" => Token::Unit(Foot),
|
||||
"yd" | "yard" | "yards" => Token::Unit(Yard),
|
||||
"mi" | "mile" | "miles" => Token::Unit(Mile),
|
||||
"nmi" => Token::Unit(NauticalMile),
|
||||
"nautical" => {
|
||||
match read_word(chars).as_str() {
|
||||
"mile" | "miles" => Token::Unit(NauticalMile),
|
||||
string => return Err(format!("Invalid string: {}", string)),
|
||||
}
|
||||
},
|
||||
"ly" | "lightyear" | "lightyears" => Token::Unit(LightYear),
|
||||
"lightsec" | "lightsecs" | "lightsecond" | "lightseconds" => Token::Unit(LightSecond),
|
||||
"light" => {
|
||||
match read_word(chars).as_str() {
|
||||
"yr" | "yrs" | "year" | "years" => Token::Unit(LightYear),
|
||||
"sec" | "secs" | "second" | "seconds" => Token::Unit(LightYear),
|
||||
string => return Err(format!("Invalid string: {}", string)),
|
||||
}
|
||||
}
|
||||
|
||||
"sqmm" | "mm2" | "millimeter2" | "millimeters2" | "millimetre2" | "millimetres2" => Token::Unit(SquareMillimeter),
|
||||
"sqcm" | "cm2" | "centimeter2" | "centimeters2" | "centimetre2" | "centimetres2" => Token::Unit(SquareCentimeter),
|
||||
"sqdm" | "dm2" | "decimeter2" | "decimeters2" | "decimetre2" | "decimetres2" => Token::Unit(SquareDecimeter),
|
||||
"sqm" | "m2" | "meter2" | "meters2" | "metre2" | "metres2" => Token::Unit(SquareMeter),
|
||||
"sqkm" | "km2" | "kilometer2" | "kilometers2" | "kilometre2" | "kilometres2" => Token::Unit(SquareKilometer),
|
||||
"sqin" | "in2" | "inch2" | "inches2" => Token::Unit(SquareInch),
|
||||
"sqft" | "ft2" | "foot2" | "feet2" => Token::Unit(SquareFoot),
|
||||
"sqyd" | "yd2" | "yard2" | "yards2" => Token::Unit(SquareYard),
|
||||
"sqmi" | "mi2" | "mile2" | "miles2" => Token::Unit(SquareMile),
|
||||
"sq" | "square" => {
|
||||
match read_word(chars).as_str() {
|
||||
"mm" | "millimeter" | "millimeters" | "millimetre" | "millimetres" => Token::Unit(SquareMillimeter),
|
||||
"cm" | "centimeter" | "centimeters" | "centimetre" | "centimetres" => Token::Unit(SquareCentimeter),
|
||||
"dm" | "decimeter" | "decimeters" | "decimetre" | "decimetres" => Token::Unit(SquareDecimeter),
|
||||
"m" | "meter" | "meters" | "metre" | "metres" => Token::Unit(SquareMeter),
|
||||
"km" | "kilometer" | "kilometers" | "kilometre" | "kilometres" => Token::Unit(SquareKilometer),
|
||||
"in" | "inch" | "inches" => Token::Unit(SquareInch),
|
||||
"ft" | "foot" | "feet" => Token::Unit(SquareFoot),
|
||||
"yd" | "yard" | "yards" => Token::Unit(SquareYard),
|
||||
"mi" | "mile" | "miles" => Token::Unit(SquareMile),
|
||||
string => return Err(format!("Invalid string: {}", string)),
|
||||
}
|
||||
}
|
||||
"are" | "ares" => Token::Unit(Are),
|
||||
"decare" | "decares" => Token::Unit(Decare),
|
||||
"ha" | "hectare" | "hectares" => Token::Unit(Hectare),
|
||||
"acre" | "acres" => Token::Unit(Acre),
|
||||
|
||||
"mm3" | "millimeter3" | "millimeters3" | "millimetre3" | "millimetres3" => Token::Unit(CubicMillimeter),
|
||||
"cm3" | "centimeter3" | "centimeters3" | "centimetre3" | "centimetres3" => Token::Unit(CubicCentimeter),
|
||||
"dm3" | "decimeter3" | "decimeters3" | "decimetre3" | "decimetres3" => Token::Unit(CubicDecimeter),
|
||||
"m3" | "meter3" | "meters3" | "metre3" | "metres3" => Token::Unit(CubicMeter),
|
||||
"km3" | "kilometer3" | "kilometers3" | "kilometre3" | "kilometres3" => Token::Unit(CubicKilometer),
|
||||
"inc3" | "inch3" | "inches3" => Token::Unit(CubicInch),
|
||||
"ft3" | "foot3" | "feet3" => Token::Unit(CubicFoot),
|
||||
"yd3" | "yard3" | "yards3" => Token::Unit(CubicYard),
|
||||
"mi3" | "mile3" | "miles3" => Token::Unit(CubicMile),
|
||||
"cubic" => {
|
||||
match read_word(chars).as_str() {
|
||||
"mm" | "millimeter" | "millimeters" | "millimetre" | "millimetres" => Token::Unit(CubicMillimeter),
|
||||
"cm" | "centimeter" | "centimeters" | "centimetre" | "centimetres" => Token::Unit(CubicCentimeter),
|
||||
"dm" | "decimeter" | "decimeters" | "decimetre" | "decimetres" => Token::Unit(CubicDecimeter),
|
||||
"m" | "meter" | "meters" | "metre" | "metres" => Token::Unit(CubicMeter),
|
||||
"km" | "kilometer" | "kilometers" | "kilometre" | "kilometres" => Token::Unit(CubicKilometer),
|
||||
"in" | "inch" | "inches" => Token::Unit(CubicInch),
|
||||
"ft" | "foot" | "feet" => Token::Unit(CubicFoot),
|
||||
"yd" | "yard" | "yards" => Token::Unit(CubicYard),
|
||||
"mi" | "mile" | "miles" => Token::Unit(CubicMile),
|
||||
string => return Err(format!("Invalid string: {}", string)),
|
||||
}
|
||||
},
|
||||
"ml" | "milliliter" | "milliliters" | "millilitre" | "millilitres" => Token::Unit(Milliliter),
|
||||
"cl" | "centiliter" | "centiliters" | "centilitre" | "centilitres" => Token::Unit(Centiliter),
|
||||
"dl" | "deciliter" | "deciliters" | "decilitre" | "decilitres" => Token::Unit(Deciliter),
|
||||
"l" | "liter" | "liters" | "litre" | "litres" => Token::Unit(Liter),
|
||||
"ts" | "tsp" | "tspn" | "tspns" | "teaspoon" | "teaspoons" => Token::Unit(Teaspoon),
|
||||
"tbs" | "tbsp" | "tablespoon" | "tablespoons" => Token::Unit(Tablespoon),
|
||||
"floz" => Token::Unit(FluidOunce),
|
||||
"fl" | "fluid" => {
|
||||
match read_word(chars).as_str() {
|
||||
"oz" | "ounce" | "ounces" => Token::Unit(FluidOunce),
|
||||
string => return Err(format!("Invalid string: {}", string)),
|
||||
}
|
||||
},
|
||||
"cup" | "cups" => Token::Unit(Cup),
|
||||
"pt" | "pint" | "pints" => Token::Unit(Pint),
|
||||
"qt" | "quart" | "quarts" => Token::Unit(Quart),
|
||||
"gal" | "gallon" | "gallons" => Token::Unit(Gallon),
|
||||
"bbl" | "oil barrel" | "oil barrels" => Token::Unit(OilBarrel),
|
||||
"oil" => {
|
||||
match read_word(chars).as_str() {
|
||||
"barrel" | "barrels" => Token::Unit(OilBarrel),
|
||||
string => return Err(format!("Invalid string: {}", string)),
|
||||
}
|
||||
},
|
||||
|
||||
"metric" => {
|
||||
match read_word(chars).as_str() {
|
||||
"ton" | "tons" | "tonne" | "tonnes" => Token::Unit(MetricTon),
|
||||
"hp" | "hps" | "horsepower" | "horsepowers" => Token::Unit(MetricHorsepower),
|
||||
string => return Err(format!("Invalid string: {}", string)),
|
||||
}
|
||||
},
|
||||
|
||||
"mg" | "milligram" | "milligrams" => Token::Unit(Milligram),
|
||||
"g" | "gram" | "grams" => Token::Unit(Gram),
|
||||
"hectogram" | "hectograms" => Token::Unit(Hectogram),
|
||||
"kg" | "kilo" | "kilos" | "kilogram" | "kilograms" => Token::Unit(Kilogram),
|
||||
"t" | "tonne" | "tonnes" => Token::Unit(MetricTon),
|
||||
"oz" | "ounces" => Token::Unit(Ounce),
|
||||
"lb" | "lbs" => Token::Unit(Pound),
|
||||
"pound" | "pounds" => {
|
||||
todo!();
|
||||
// if chars.peek() == Some(&"-") {
|
||||
// let dash_chars_iter = chars.clone();
|
||||
// dash_chars_iter.next();
|
||||
// match read_word_plain(dash_chars_iter).as_str() {
|
||||
// "force" => {
|
||||
|
||||
// }
|
||||
// }
|
||||
// chars.next();
|
||||
// match read_word_plain(chars).as_str() {
|
||||
// "force" => Token::LexerKeyword(PoundForce),
|
||||
// string => return Err(format!("Invalid string: {}", string)),
|
||||
// }
|
||||
// match read_word(chars).as_str() {
|
||||
// "force" => Token::LexerKeyword(PoundForce),
|
||||
// string => return Err(format!("Invalid string: {}", string)),
|
||||
// }
|
||||
// } else {
|
||||
// Token::Unit(Pound)
|
||||
// }
|
||||
},
|
||||
"stone" | "stones" => Token::Unit(Stone),
|
||||
"st" | "ton" | "tons" => Token::Unit(ShortTon),
|
||||
"short" => {
|
||||
match read_word(chars).as_str() {
|
||||
"ton" | "tons" | "tonne" | "tonnes" => Token::Unit(ShortTon),
|
||||
string => return Err(format!("Invalid string: {}", string)),
|
||||
}
|
||||
},
|
||||
"lt" => Token::Unit(LongTon),
|
||||
"long" => {
|
||||
match read_word(chars).as_str() {
|
||||
"ton" | "tons" | "tonne" | "tonnes" => Token::Unit(LongTon),
|
||||
string => return Err(format!("Invalid string: {}", string)),
|
||||
}
|
||||
},
|
||||
|
||||
"bit" | "bits" => Token::Unit(Bit),
|
||||
"kbit" | "kilobit" | "kilobits" => Token::Unit(Kilobit),
|
||||
"mbit" | "megabit" | "megabits" => Token::Unit(Megabit),
|
||||
"gbit" | "gigabit" | "gigabits" => Token::Unit(Gigabit),
|
||||
"tbit" | "terabit" | "terabits" => Token::Unit(Terabit),
|
||||
"pbit" | "petabit" | "petabits" => Token::Unit(Petabit),
|
||||
"ebit" | "exabit" | "exabits" => Token::Unit(Exabit),
|
||||
"zbit" | "zettabit" | "zettabits" => Token::Unit(Zettabit),
|
||||
"ybit" | "yottabit" | "yottabits" => Token::Unit(Yottabit),
|
||||
"kibit" | "kibibit" | "kibibits" => Token::Unit(Kibibit),
|
||||
"mibit" | "mebibit" | "mebibits" => Token::Unit(Mebibit),
|
||||
"gibit" | "gibibit" | "gibibits" => Token::Unit(Gibibit),
|
||||
"tibit" | "tebibit" | "tebibits" => Token::Unit(Tebibit),
|
||||
"pibit" | "pebibit" | "pebibits" => Token::Unit(Pebibit),
|
||||
"eibit" | "exbibit" | "exbibits" => Token::Unit(Exbibit),
|
||||
"zibit" | "zebibit" | "zebibits" => Token::Unit(Zebibit),
|
||||
"yibit" | "yobibit" | "yobibits" => Token::Unit(Yobibit),
|
||||
"byte" | "bytes" => Token::Unit(Byte),
|
||||
"kb" | "kilobyte" | "kilobytes" => Token::Unit(Kilobyte),
|
||||
"mb" | "megabyte" | "megabytes" => Token::Unit(Megabyte),
|
||||
"gb" | "gigabyte" | "gigabytes" => Token::Unit(Gigabyte),
|
||||
"tb" | "terabyte" | "terabytes" => Token::Unit(Terabyte),
|
||||
"pb" | "petabyte" | "petabytes" => Token::Unit(Petabyte),
|
||||
"eb" | "exabyte" | "exabytes" => Token::Unit(Exabyte),
|
||||
"zb" | "zettabyte" | "zettabytes" => Token::Unit(Zettabyte),
|
||||
"yb" | "yottabyte" | "yottabytes" => Token::Unit(Yottabyte),
|
||||
"kib" | "kibibyte" | "kibibytes" => Token::Unit(Kibibyte),
|
||||
"mib" | "mebibyte" | "mebibytes" => Token::Unit(Mebibyte),
|
||||
"gib" | "gibibyte" | "gibibytes" => Token::Unit(Gibibyte),
|
||||
"tib" | "tebibyte" | "tebibytes" => Token::Unit(Tebibyte),
|
||||
"pib" | "pebibyte" | "pebibytes" => Token::Unit(Pebibyte),
|
||||
"eib" | "exbibyte" | "exbibytes" => Token::Unit(Exbibyte),
|
||||
"zib" | "zebibyte" | "zebibytes" => Token::Unit(Zebibyte),
|
||||
"yib" | "yobibyte" | "yobibytes" => Token::Unit(Yobibyte),
|
||||
|
||||
"millijoule" | "millijoules" => Token::Unit(Millijoule),
|
||||
"j"| "joule" | "joules" => Token::Unit(Joule),
|
||||
"nm" => Token::Unit(NewtonMeter),
|
||||
"newton" => {
|
||||
todo!();
|
||||
// "-meter" | "-meters" | "metre" | "metres" => Token::Unit(NewtonMeter),
|
||||
// "meter" | "meters" | "metre" | "metres" => Token::Unit(NewtonMeter),
|
||||
},
|
||||
"kj" | "kilojoule" | "kilojoules" => Token::Unit(Kilojoule),
|
||||
"mj" | "megajoule" | "megajoules" => Token::Unit(Megajoule),
|
||||
"gj" | "gigajoule" | "gigajoules" => Token::Unit(Gigajoule),
|
||||
"tj" | "terajoule" | "terajoules" => Token::Unit(Terajoule),
|
||||
"cal" | "calorie" | "calories" => Token::Unit(Calorie),
|
||||
"kcal" | "kilocalorie" | "kilocalories" => Token::Unit(KiloCalorie),
|
||||
"btu" => Token::Unit(BritishThermalUnit),
|
||||
"british" => {
|
||||
match read_word(chars).as_str() {
|
||||
"thermal" => {
|
||||
match read_word(chars).as_str() {
|
||||
"unit" | "units" => Token::Unit(BritishThermalUnit),
|
||||
string => return Err(format!("Invalid string: {}", string)),
|
||||
}
|
||||
},
|
||||
string => return Err(format!("Invalid string: {}", string)),
|
||||
}
|
||||
},
|
||||
"wh" => Token::Unit(WattHour),
|
||||
"kwh" => Token::Unit(KilowattHour),
|
||||
"mwh" => Token::Unit(MegawattHour),
|
||||
"gwh" => Token::Unit(GigawattHour),
|
||||
"twh" => Token::Unit(TerawattHour),
|
||||
"pwh" => Token::Unit(PetawattHour),
|
||||
|
||||
"milliwatt" | "milliwatts" => Token::Unit(Milliwatt),
|
||||
"w" | "watts" => Token::Unit(Watt),
|
||||
"kw" | "kilowatts" => Token::Unit(Kilowatt),
|
||||
"mw" | "megawatts" => Token::Unit(Megawatt),
|
||||
"gw" | "gigawatts" => Token::Unit(Gigawatt),
|
||||
"tw" | "terawatts" => Token::Unit(Terawatt),
|
||||
"pw" | "petawatts" => Token::Unit(Petawatt),
|
||||
"hp" | "hps" | "horsepower" | "horsepowers" => Token::Unit(Horsepower),
|
||||
"mhp" | "hpm" => Token::Unit(MetricHorsepower),
|
||||
|
||||
"watt" => {
|
||||
match read_word(chars).as_str() {
|
||||
"hr" | "hrs" | "hour" | "hours" => Token::Unit(WattHour),
|
||||
_ => Token::Unit(Watt)
|
||||
}
|
||||
}
|
||||
"kilowatt" => {
|
||||
match read_word(chars).as_str() {
|
||||
"hr" | "hrs" | "hour" | "hours" => Token::Unit(KilowattHour),
|
||||
_ => Token::Unit(Kilowatt),
|
||||
}
|
||||
}
|
||||
"megawatt" => {
|
||||
match read_word(chars).as_str() {
|
||||
"hr" | "hrs" | "hour" | "hours" => Token::Unit(MegawattHour),
|
||||
_ => Token::Unit(Megawatt),
|
||||
}
|
||||
}
|
||||
"gigawatt" => {
|
||||
match read_word(chars).as_str() {
|
||||
"hr" | "hrs" | "hour" | "hours" => Token::Unit(GigawattHour),
|
||||
_ => Token::Unit(Gigawatt),
|
||||
}
|
||||
}
|
||||
"terawatt" => {
|
||||
match read_word(chars).as_str() {
|
||||
"hr" | "hrs" | "hour" | "hours" => Token::Unit(TerawattHour),
|
||||
_ => Token::Unit(Terawatt),
|
||||
}
|
||||
}
|
||||
"petawatt" => {
|
||||
match read_word(chars).as_str() {
|
||||
"hr" | "hrs" | "hour" | "hours" => Token::Unit(PetawattHour),
|
||||
_ => Token::Unit(Petawatt),
|
||||
}
|
||||
}
|
||||
|
||||
"ma" | "milliamp" | "milliamps" | "milliampere" | "milliamperes" => Token::Unit(Milliampere),
|
||||
"a" | "amp" | "amps" | "ampere" | "amperes" => Token::Unit(Ampere),
|
||||
"ka" | "kiloamp" | "kiloamps" | "kiloampere" | "kiloamperes" => Token::Unit(Kiloampere),
|
||||
"bi" | "biot" | "biots" | "aba" | "abampere" | "abamperes" => Token::Unit(Abampere),
|
||||
|
||||
"mΩ" | "mΩ" | "milliohm" | "milliohms" => Token::Unit(Milliohm),
|
||||
"Ω" | "Ω" | "ohm" | "ohms" => Token::Unit(Ohm),
|
||||
"kΩ" | "kΩ" | "kiloohm" | "kiloohms" => Token::Unit(Kiloohm),
|
||||
|
||||
"mv" | "millivolt" | "millivolts" => Token::Unit(Millivolt),
|
||||
"v" | "volt" | "volts" => Token::Unit(Volt),
|
||||
"kv" | "kilovolt" | "kilovolts" => Token::Unit(Kilovolt),
|
||||
|
||||
// for pound-force per square inch
|
||||
"lbf" => Token::LexerKeyword(PoundForce),
|
||||
"force" => Token::LexerKeyword(Force),
|
||||
|
||||
"pa" | "pascal" | "pascals" => Token::Unit(Pascal),
|
||||
"kpa" | "kilopascal" | "kilopascals" => Token::Unit(Kilopascal),
|
||||
"atm" | "atms" | "atmosphere" | "atmospheres" => Token::Unit(Atmosphere),
|
||||
"mbar" | "mbars" | "millibar" | "millibars" => Token::Unit(Millibar),
|
||||
"bar" | "bars" => Token::Unit(Bar),
|
||||
"inhg" => Token::Unit(InchOfMercury),
|
||||
"mercury" => Token::LexerKeyword(Mercury),
|
||||
"psi" => Token::Unit(PoundsPerSquareInch),
|
||||
"torr" | "torrs" => Token::Unit(Torr),
|
||||
|
||||
"hz" | "hertz" => Token::Unit(Hertz),
|
||||
"khz" | "kilohertz" => Token::Unit(Kilohertz),
|
||||
"mhz" | "megahertz" => Token::Unit(Megahertz),
|
||||
"ghz" | "gigahertz" => Token::Unit(Gigahertz),
|
||||
"thz" | "terahertz" => Token::Unit(Terahertz),
|
||||
"phz" | "petahertz" => Token::Unit(Petahertz),
|
||||
"rpm" | "r/min" | "rev/min" => Token::Unit(RevolutionsPerMinute),
|
||||
|
||||
"kph" | "kmh" => Token::Unit(KilometersPerHour),
|
||||
"mps" => Token::Unit(MetersPerSecond),
|
||||
"mph" => Token::Unit(MilesPerHour),
|
||||
"fps" => Token::Unit(FeetPerSecond),
|
||||
"kn" | "kt" | "knot" | "knots" => Token::Unit(Knot),
|
||||
|
||||
"k" | "kelvin" | "kelvins" => Token::Unit(Kelvin),
|
||||
"c" | "celsius" => Token::Unit(Celsius),
|
||||
"f" | "fahrenheit" | "fahrenheits" => Token::Unit(Fahrenheit),
|
||||
"deg" | "degree" | "degrees" => Token::Unit(default_degree),
|
||||
|
||||
string => {
|
||||
return Err(format!("Invalid string: {}", string));
|
||||
}
|
||||
};
|
||||
tokens.push(token);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
/// Lex an input string and returns [`Token`]s
|
||||
pub fn lex(input: &str, allow_trailing_operators: bool, default_degree: Unit) -> Result<Vec<Token>, String> {
|
||||
|
||||
@ -34,387 +497,40 @@ pub fn lex(input: &str, allow_trailing_operators: bool, default_degree: Unit) ->
|
||||
}
|
||||
}
|
||||
|
||||
let mut chars = input.chars().peekable();
|
||||
let mut tokens: Vec<Token> = vec![];
|
||||
let max_word_length = 30;
|
||||
|
||||
let mut left_paren_count = 0;
|
||||
let mut right_paren_count = 0;
|
||||
|
||||
let mut byte_index = 0;
|
||||
while let Some(current_char) = chars.next() {
|
||||
match current_char {
|
||||
'+' => tokens.push(Token::Operator(Plus)),
|
||||
'-' => tokens.push(Token::Operator(Minus)),
|
||||
'*' => tokens.push(Token::Operator(Multiply)),
|
||||
'/' => tokens.push(Token::Operator(Divide)),
|
||||
'%' => tokens.push(Token::LexerKeyword(PercentChar)),
|
||||
'^' => tokens.push(Token::Operator(Caret)),
|
||||
'!' => tokens.push(Token::UnaryOperator(Factorial)),
|
||||
'(' => {
|
||||
left_paren_count += 1;
|
||||
tokens.push(Token::Operator(LeftParen));
|
||||
let mut chars = UnicodeSegmentation::graphemes(input.as_str(), true).peekable();
|
||||
let mut tokens: Vec<Token> = vec![];
|
||||
|
||||
while let Some(_) = chars.peek() {
|
||||
let current_char = chars.peek().unwrap();
|
||||
println!("1: {}", current_char);
|
||||
let token = match *current_char {
|
||||
value if value.trim().is_empty() => {
|
||||
chars.next();
|
||||
continue;
|
||||
},
|
||||
')' => {
|
||||
right_paren_count += 1;
|
||||
tokens.push(Token::Operator(RightParen));
|
||||
value if is_alphabetic_extended_str(&value) => {
|
||||
parse_word(&mut tokens, &mut chars, default_degree)?;
|
||||
continue;
|
||||
},
|
||||
'π' => tokens.push(Token::Constant(Pi)),
|
||||
'\'' => tokens.push(Token::Unit(Foot)),
|
||||
'"' | '“' | '”' | '″' => tokens.push(Token::LexerKeyword(DoubleQuotes)),
|
||||
value if value.is_whitespace() => {},
|
||||
'Ω' | 'Ω' => tokens.push(Token::Unit(Ohm)),
|
||||
value if is_alphabetic_extended(&value) => {
|
||||
let start_index = byte_index;
|
||||
// account for chars longer than one byte
|
||||
let mut end_index = byte_index + current_char.len_utf8() - 1;
|
||||
|
||||
while let Some(current_char) = chars.peek() {
|
||||
// don't loop more than max_word_length:
|
||||
if end_index >= start_index + max_word_length - 1 {
|
||||
let string = &input[start_index..=end_index];
|
||||
return Err(format!("Invalid string starting with: {}", string));
|
||||
}
|
||||
|
||||
if is_alphabetic_extended(¤t_char) {
|
||||
byte_index += current_char.len_utf8();
|
||||
end_index += current_char.len_utf8();
|
||||
value if is_numeric_str(value) => {
|
||||
let mut number_string = "".to_string();
|
||||
while let Some(number_char) = chars.peek() {
|
||||
if is_numeric_str(number_char) {
|
||||
number_string += number_char;
|
||||
chars.next();
|
||||
} else {
|
||||
let string = &input[start_index..=end_index];
|
||||
match string.trim_end() {
|
||||
// allow for two-word units
|
||||
"nautical" | "light" | "sq" | "square" | "cubic" | "metric" | "newton" => {
|
||||
byte_index += current_char.len_utf8();
|
||||
chars.next();
|
||||
end_index += 1;
|
||||
},
|
||||
_ => {
|
||||
break;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// allow for syntax like "km2"
|
||||
let mut is_multidimensional = true;
|
||||
match chars.peek() {
|
||||
// ...if the string is succeeded by 2 or 3
|
||||
Some('2') | Some('3') => {
|
||||
byte_index += '2'.len_utf8();
|
||||
chars.next();
|
||||
// we dont validate what comes after because it will be caught
|
||||
// by the parser anyway (for example 3m35)
|
||||
},
|
||||
_ => is_multidimensional = false,
|
||||
}
|
||||
if is_multidimensional {
|
||||
let string_plus_one_character = &input[start_index..=end_index+1];
|
||||
match string_plus_one_character {
|
||||
"mm2" | "millimeter2" | "millimeters2" | "millimetre2" | "millimetres2" => tokens.push(Token::Unit(SquareMillimeter)),
|
||||
"cm2" | "centimeter2" | "centimeters2" | "centimetre2" | "centimetres2" => tokens.push(Token::Unit(SquareCentimeter)),
|
||||
"dm2" | "decimeter2" | "decimeters2" | "decimetre2" | "decimetres2" => tokens.push(Token::Unit(SquareDecimeter)),
|
||||
"m2" | "meter2" | "meters2" | "metre2" | "metres2" => tokens.push(Token::Unit(SquareMeter)),
|
||||
"km2" | "kilometer2" | "kilometers2" | "kilometre2" | "kilometres2" => tokens.push(Token::Unit(SquareKilometer)),
|
||||
"in2" | "inch2" | "inches2" => tokens.push(Token::Unit(SquareInch)),
|
||||
"ft2" | "foot2" | "feet2" => tokens.push(Token::Unit(SquareFoot)),
|
||||
"yd2" | "yard2" | "yards2" => tokens.push(Token::Unit(SquareYard)),
|
||||
"mi2" | "mile2" | "miles2" => tokens.push(Token::Unit(SquareMile)),
|
||||
"mm3" | "millimeter3" | "millimeters3" | "millimetre3" | "millimetres3" => tokens.push(Token::Unit(CubicMillimeter)),
|
||||
"cm3" | "centimeter3" | "centimeters3" | "centimetre3" | "centimetres3" => tokens.push(Token::Unit(CubicCentimeter)),
|
||||
"dm3" | "decimeter3" | "decimeters3" | "decimetre3" | "decimetres3" => tokens.push(Token::Unit(CubicDecimeter)),
|
||||
"m3" | "meter3" | "meters3" | "metre3" | "metres3" => tokens.push(Token::Unit(CubicMeter)),
|
||||
"km3" | "kilometer3" | "kilometers3" | "kilometre3" | "kilometres3" => tokens.push(Token::Unit(CubicKilometer)),
|
||||
"inc3" | "inch3" | "inches3" => tokens.push(Token::Unit(CubicInch)),
|
||||
"ft3" | "foot3" | "feet3" => tokens.push(Token::Unit(CubicFoot)),
|
||||
"yd3" | "yard3" | "yards3" => tokens.push(Token::Unit(CubicYard)),
|
||||
"mi3" | "mile3" | "miles3" => tokens.push(Token::Unit(CubicMile)),
|
||||
_ => {},
|
||||
}
|
||||
} else {
|
||||
let string = &input[start_index..=end_index];
|
||||
let string: &str = &string.replacen("square", "sq", 1);
|
||||
match string {
|
||||
// MAKE SURE max_word_length IS EQUAL TO THE
|
||||
// LENGTH OF THE LONGEST STRING IN THIS MATCH STATEMENT.
|
||||
|
||||
"to" => tokens.push(Token::TextOperator(To)),
|
||||
"of" => tokens.push(Token::TextOperator(Of)),
|
||||
|
||||
"hundred" => tokens.push(Token::NamedNumber(Hundred)),
|
||||
"thousand" => tokens.push(Token::NamedNumber(Thousand)),
|
||||
"mil" | "mill" | "million" => tokens.push(Token::NamedNumber(Million)),
|
||||
"bil" | "bill" | "billion" => tokens.push(Token::NamedNumber(Billion)),
|
||||
"tri" | "tril" | "trillion" => tokens.push(Token::NamedNumber(Trillion)),
|
||||
"quadrillion" => tokens.push(Token::NamedNumber(Quadrillion)),
|
||||
"quintillion" => tokens.push(Token::NamedNumber(Quintillion)),
|
||||
"sextillion" => tokens.push(Token::NamedNumber(Sextillion)),
|
||||
"septillion" => tokens.push(Token::NamedNumber(Septillion)),
|
||||
"octillion" => tokens.push(Token::NamedNumber(Octillion)),
|
||||
"nonillion" => tokens.push(Token::NamedNumber(Nonillion)),
|
||||
"decillion" => tokens.push(Token::NamedNumber(Decillion)),
|
||||
"undecillion" => tokens.push(Token::NamedNumber(Undecillion)),
|
||||
"duodecillion" => tokens.push(Token::NamedNumber(Duodecillion)),
|
||||
"tredecillion" => tokens.push(Token::NamedNumber(Tredecillion)),
|
||||
"quattuordecillion" => tokens.push(Token::NamedNumber(Quattuordecillion)),
|
||||
"quindecillion" => tokens.push(Token::NamedNumber(Quindecillion)),
|
||||
"sexdecillion" => tokens.push(Token::NamedNumber(Sexdecillion)),
|
||||
"septendecillion" => tokens.push(Token::NamedNumber(Septendecillion)),
|
||||
"octodecillion" => tokens.push(Token::NamedNumber(Octodecillion)),
|
||||
"novemdecillion" => tokens.push(Token::NamedNumber(Novemdecillion)),
|
||||
"vigintillion" => tokens.push(Token::NamedNumber(Vigintillion)),
|
||||
"centillion" => tokens.push(Token::NamedNumber(Centillion)),
|
||||
"googol" => tokens.push(Token::NamedNumber(Googol)),
|
||||
|
||||
"pi" => tokens.push(Token::Constant(Pi)),
|
||||
"e" => tokens.push(Token::Constant(E)),
|
||||
|
||||
"mod" => tokens.push(Token::Operator(Modulo)),
|
||||
|
||||
"sqrt" => tokens.push(Token::FunctionIdentifier(Sqrt)),
|
||||
"cbrt" => tokens.push(Token::FunctionIdentifier(Cbrt)),
|
||||
|
||||
"log" => tokens.push(Token::FunctionIdentifier(Log)),
|
||||
"ln" => tokens.push(Token::FunctionIdentifier(Ln)),
|
||||
"exp" => tokens.push(Token::FunctionIdentifier(Exp)),
|
||||
|
||||
"round" | "rint" => tokens.push(Token::FunctionIdentifier(Round)),
|
||||
"ceil" => tokens.push(Token::FunctionIdentifier(Ceil)),
|
||||
"floor" => tokens.push(Token::FunctionIdentifier(Floor)),
|
||||
"abs" | "fabs" => tokens.push(Token::FunctionIdentifier(Abs)),
|
||||
|
||||
"sin" => tokens.push(Token::FunctionIdentifier(Sin)),
|
||||
"cos" => tokens.push(Token::FunctionIdentifier(Cos)),
|
||||
"tan" => tokens.push(Token::FunctionIdentifier(Tan)),
|
||||
|
||||
"per" => tokens.push(Token::LexerKeyword(Per)),
|
||||
"hg" => tokens.push(Token::LexerKeyword(Hg)), // can be hectogram or mercury
|
||||
|
||||
"ns" | "nanosec" | "nanosecs" | "nanosecond" | "nanoseconds" => tokens.push(Token::Unit(Nanosecond)),
|
||||
// µ and μ are two different characters
|
||||
"µs" | "μs" | "microsec" | "microsecs" | "microsecond" | "microseconds" => tokens.push(Token::Unit(Microsecond)),
|
||||
"ms" | "millisec" | "millisecs" | "millisecond" | "milliseconds" => tokens.push(Token::Unit(Millisecond)),
|
||||
"s" | "sec" | "secs" | "second" | "seconds" => tokens.push(Token::Unit(Second)),
|
||||
"min" | "mins" | "minute" | "minutes" => tokens.push(Token::Unit(Minute)),
|
||||
"h" | "hr" | "hrs" | "hour" | "hours" => tokens.push(Token::Unit(Hour)),
|
||||
"day" | "days" => tokens.push(Token::Unit(Day)),
|
||||
"wk" | "wks" | "week" | "weeks" => tokens.push(Token::Unit(Week)),
|
||||
"mo" | "mos" | "month" | "months" => tokens.push(Token::Unit(Month)),
|
||||
"q" | "quarter" | "quarters" => tokens.push(Token::Unit(Quarter)),
|
||||
"yr" | "yrs" | "year" | "years" => tokens.push(Token::Unit(Year)),
|
||||
"decade" | "decades" => tokens.push(Token::Unit(Decade)),
|
||||
"century" | "centuries" => tokens.push(Token::Unit(Century)),
|
||||
"millenium" | "millenia" | "milleniums" => tokens.push(Token::Unit(Millenium)),
|
||||
|
||||
"mm" | "millimeter" | "millimeters" | "millimetre" | "millimetres" => tokens.push(Token::Unit(Millimeter)),
|
||||
"cm" | "centimeter" | "centimeters" | "centimetre" | "centimetres" => tokens.push(Token::Unit(Centimeter)),
|
||||
"dm" | "decimeter" | "decimeters" | "decimetre" | "decimetres" => tokens.push(Token::Unit(Decimeter)),
|
||||
"m" | "meter" | "meters" | "metre" | "metres" => tokens.push(Token::Unit(Meter)),
|
||||
"km" | "kilometer" | "kilometers" | "kilometre" | "kilometres" => tokens.push(Token::Unit(Kilometer)),
|
||||
"in" => tokens.push(Token::LexerKeyword(In)),
|
||||
"inch" | "inches" => tokens.push(Token::Unit(Inch)),
|
||||
"ft" | "foot" | "feet" => tokens.push(Token::Unit(Foot)),
|
||||
"yd" | "yard" | "yards" => tokens.push(Token::Unit(Yard)),
|
||||
"mi" | "mile" | "miles" => tokens.push(Token::Unit(Mile)),
|
||||
"nmi" | "nautical mile" | "nautical miles" => tokens.push(Token::Unit(NauticalMile)),
|
||||
"ly" | "lightyear" | "lightyears" | "light yr" | "light yrs" | "light year" | "light years" => tokens.push(Token::Unit(LightYear)),
|
||||
"lightsec" | "lightsecs" | "lightsecond" | "lightseconds" | "light sec" | "light secs" | "light second" | "light seconds" => tokens.push(Token::Unit(LightYear)),
|
||||
|
||||
"sqmm" | "sq mm" | "sq millimeter" | "sq millimeters" | "sq millimetre" | "sq millimetres" => tokens.push(Token::Unit(SquareMillimeter)),
|
||||
"sqcm" | "sq cm" | "sq centimeter" | "sq centimeters" | "sq centimetre" | "sq centimetres" => tokens.push(Token::Unit(SquareCentimeter)),
|
||||
"sqdm" | "sq dm" | "sq decimeter" | "sq decimeters" | "sq decimetre" | "sq decimetres" => tokens.push(Token::Unit(SquareDecimeter)),
|
||||
"sqm" | "sq m" | "sq meter" | "sq meters" | "sq metre" | "sq metres" => tokens.push(Token::Unit(SquareMeter)),
|
||||
"sqkm" | "sq km" | "sq kilometer" | "sq kilometers" | "sq kilometre" | "sq kilometres" => tokens.push(Token::Unit(SquareKilometer)),
|
||||
"sqin" | "sq in" | "sq inch" | "sq inches" => tokens.push(Token::Unit(SquareInch)),
|
||||
"sqft" | "sq ft" | "sq foot" | "sq feet" => tokens.push(Token::Unit(SquareFoot)),
|
||||
"sqyd" | "sq yd" | "sq yard" | "sq yards" => tokens.push(Token::Unit(SquareYard)),
|
||||
"sqmi" | "sq mi" | "sq mile" | "sq miles" => tokens.push(Token::Unit(SquareMile)),
|
||||
"are" | "ares" => tokens.push(Token::Unit(Are)),
|
||||
"decare" | "decares" => tokens.push(Token::Unit(Decare)),
|
||||
"ha" | "hectare" | "hectares" => tokens.push(Token::Unit(Hectare)),
|
||||
"acre" | "acres" => tokens.push(Token::Unit(Acre)),
|
||||
|
||||
"cubic millimeter" | "cubic millimeters" | "cubic millimetre" | "cubic millimetres" => tokens.push(Token::Unit(CubicMillimeter)),
|
||||
"cubic centimeter" | "cubic centimeters" | "cubic centimetre" | "cubic centimetres" => tokens.push(Token::Unit(CubicCentimeter)),
|
||||
"cubic decimeter" | "cubic decimeters" | "cubic decimetre" | "cubic decimetres" => tokens.push(Token::Unit(CubicDecimeter)),
|
||||
"cubic meter" | "cubic meters" | "cubic metre" | "cubic metres" => tokens.push(Token::Unit(CubicMeter)),
|
||||
"cubic kilometer" | "cubic kilometers" | "cubic kilometre" | "cubic kilometres" => tokens.push(Token::Unit(CubicKilometer)),
|
||||
"cubic inch" | "cubic inches" => tokens.push(Token::Unit(CubicInch)),
|
||||
"cubic foot" | "cubic feet" => tokens.push(Token::Unit(CubicFoot)),
|
||||
"cubic yard" | "cubic yards" => tokens.push(Token::Unit(CubicYard)),
|
||||
"cubic mile" | "cubic miles" => tokens.push(Token::Unit(CubicMile)),
|
||||
"ml" | "milliliter" | "milliliters" | "millilitre" | "millilitres" => tokens.push(Token::Unit(Milliliter)),
|
||||
"cl" | "centiliter" | "centiliters" | "centilitre" | "centilitres" => tokens.push(Token::Unit(Centiliter)),
|
||||
"dl" | "deciliter" | "deciliters" | "decilitre" | "decilitres" => tokens.push(Token::Unit(Deciliter)),
|
||||
"l" | "liter" | "liters" | "litre" | "litres" => tokens.push(Token::Unit(Liter)),
|
||||
"ts" | "tsp" | "tspn" | "tspns" | "teaspoon" | "teaspoons" => tokens.push(Token::Unit(Teaspoon)),
|
||||
"tbs" | "tbsp" | "tablespoon" | "tablespoons" => tokens.push(Token::Unit(Tablespoon)),
|
||||
"floz" | "fl oz" | "fl ounce" | "fl ounces" | "fluid oz" | "fluid ounce" | "fluid ounces" => tokens.push(Token::Unit(FluidOunce)),
|
||||
"cup" | "cups" => tokens.push(Token::Unit(Cup)),
|
||||
"pt" | "pint" | "pints" => tokens.push(Token::Unit(Pint)),
|
||||
"qt" | "quart" | "quarts" => tokens.push(Token::Unit(Quart)),
|
||||
"gal" | "gallon" | "gallons" => tokens.push(Token::Unit(Gallon)),
|
||||
"bbl" | "oil barrel" | "oil barrels" => tokens.push(Token::Unit(OilBarrel)),
|
||||
|
||||
"mg" | "milligram" | "milligrams" => tokens.push(Token::Unit(Milligram)),
|
||||
"g" | "gram" | "grams" => tokens.push(Token::Unit(Gram)),
|
||||
"hectogram" | "hectograms" => tokens.push(Token::Unit(Hectogram)),
|
||||
"kg" | "kilo" | "kilos" | "kilogram" | "kilograms" => tokens.push(Token::Unit(Kilogram)),
|
||||
"t" | "tonne" | "tonnes" | "metric ton" | "metric tons" | "metric tonne" | "metric tonnes" => tokens.push(Token::Unit(MetricTon)),
|
||||
"oz" | "ounces" => tokens.push(Token::Unit(Ounce)),
|
||||
"lb" | "lbs" => tokens.push(Token::Unit(Pound)),
|
||||
"pound" | "pounds" => {
|
||||
let str_len = "-force".len();
|
||||
match input.get(end_index+1..=end_index+str_len) {
|
||||
Some("-force") => {
|
||||
tokens.push(Token::LexerKeyword(PoundForce));
|
||||
for _i in 0..str_len {
|
||||
chars.next();
|
||||
}
|
||||
byte_index += str_len;
|
||||
},
|
||||
_ => {
|
||||
tokens.push(Token::Unit(Pound));
|
||||
}
|
||||
}
|
||||
},
|
||||
"stone" | "stones" => tokens.push(Token::Unit(Stone)),
|
||||
"st" | "ton" | "tons" | "short ton" | "short tons" | "short tonne" | "short tonnes" => tokens.push(Token::Unit(ShortTon)),
|
||||
"lt" | "long ton" | "long tons" | "long tonne" | "long tonnes" => tokens.push(Token::Unit(LongTon)),
|
||||
|
||||
"bit" | "bits" => tokens.push(Token::Unit(Bit)),
|
||||
"kbit" | "kilobit" | "kilobits" => tokens.push(Token::Unit(Kilobit)),
|
||||
"mbit" | "megabit" | "megabits" => tokens.push(Token::Unit(Megabit)),
|
||||
"gbit" | "gigabit" | "gigabits" => tokens.push(Token::Unit(Gigabit)),
|
||||
"tbit" | "terabit" | "terabits" => tokens.push(Token::Unit(Terabit)),
|
||||
"pbit" | "petabit" | "petabits" => tokens.push(Token::Unit(Petabit)),
|
||||
"ebit" | "exabit" | "exabits" => tokens.push(Token::Unit(Exabit)),
|
||||
"zbit" | "zettabit" | "zettabits" => tokens.push(Token::Unit(Zettabit)),
|
||||
"ybit" | "yottabit" | "yottabits" => tokens.push(Token::Unit(Yottabit)),
|
||||
"kibit" | "kibibit" | "kibibits" => tokens.push(Token::Unit(Kibibit)),
|
||||
"mibit" | "mebibit" | "mebibits" => tokens.push(Token::Unit(Mebibit)),
|
||||
"gibit" | "gibibit" | "gibibits" => tokens.push(Token::Unit(Gibibit)),
|
||||
"tibit" | "tebibit" | "tebibits" => tokens.push(Token::Unit(Tebibit)),
|
||||
"pibit" | "pebibit" | "pebibits" => tokens.push(Token::Unit(Pebibit)),
|
||||
"eibit" | "exbibit" | "exbibits" => tokens.push(Token::Unit(Exbibit)),
|
||||
"zibit" | "zebibit" | "zebibits" => tokens.push(Token::Unit(Zebibit)),
|
||||
"yibit" | "yobibit" | "yobibits" => tokens.push(Token::Unit(Yobibit)),
|
||||
"byte" | "bytes" => tokens.push(Token::Unit(Byte)),
|
||||
"kb" | "kilobyte" | "kilobytes" => tokens.push(Token::Unit(Kilobyte)),
|
||||
"mb" | "megabyte" | "megabytes" => tokens.push(Token::Unit(Megabyte)),
|
||||
"gb" | "gigabyte" | "gigabytes" => tokens.push(Token::Unit(Gigabyte)),
|
||||
"tb" | "terabyte" | "terabytes" => tokens.push(Token::Unit(Terabyte)),
|
||||
"pb" | "petabyte" | "petabytes" => tokens.push(Token::Unit(Petabyte)),
|
||||
"eb" | "exabyte" | "exabytes" => tokens.push(Token::Unit(Exabyte)),
|
||||
"zb" | "zettabyte" | "zettabytes" => tokens.push(Token::Unit(Zettabyte)),
|
||||
"yb" | "yottabyte" | "yottabytes" => tokens.push(Token::Unit(Yottabyte)),
|
||||
"kib" | "kibibyte" | "kibibytes" => tokens.push(Token::Unit(Kibibyte)),
|
||||
"mib" | "mebibyte" | "mebibytes" => tokens.push(Token::Unit(Mebibyte)),
|
||||
"gib" | "gibibyte" | "gibibytes" => tokens.push(Token::Unit(Gibibyte)),
|
||||
"tib" | "tebibyte" | "tebibytes" => tokens.push(Token::Unit(Tebibyte)),
|
||||
"pib" | "pebibyte" | "pebibytes" => tokens.push(Token::Unit(Pebibyte)),
|
||||
"eib" | "exbibyte" | "exbibytes" => tokens.push(Token::Unit(Exbibyte)),
|
||||
"zib" | "zebibyte" | "zebibytes" => tokens.push(Token::Unit(Zebibyte)),
|
||||
"yib" | "yobibyte" | "yobibytes" => tokens.push(Token::Unit(Yobibyte)),
|
||||
|
||||
"millijoule" | "millijoules" => tokens.push(Token::Unit(Millijoule)),
|
||||
"j"| "joule" | "joules" => tokens.push(Token::Unit(Joule)),
|
||||
"nm" | "newton meter" | "newton meters" | "newton-meter" | "newton-meters" | "newton metre" | "newton metres" | "newton-metre" | "newton-metres" => tokens.push(Token::Unit(NewtonMeter)),
|
||||
"kj" | "kilojoule" | "kilojoules" => tokens.push(Token::Unit(Kilojoule)),
|
||||
"mj" | "megajoule" | "megajoules" => tokens.push(Token::Unit(Megajoule)),
|
||||
"gj" | "gigajoule" | "gigajoules" => tokens.push(Token::Unit(Gigajoule)),
|
||||
"tj" | "terajoule" | "terajoules" => tokens.push(Token::Unit(Terajoule)),
|
||||
"cal" | "calorie" | "calories" => tokens.push(Token::Unit(Calorie)),
|
||||
"kcal" | "kilocalorie" | "kilocalories" => tokens.push(Token::Unit(KiloCalorie)),
|
||||
"btu" | "british thermal unit" | "british thermal units" => tokens.push(Token::Unit(BritishThermalUnit)),
|
||||
"wh" | "watt hour" | "watt hours" => tokens.push(Token::Unit(WattHour)),
|
||||
"kwh" | "kilowatt hour" | "kilowatt hours" => tokens.push(Token::Unit(KilowattHour)),
|
||||
"mwh" | "megawatt hour" | "megawatt hours" => tokens.push(Token::Unit(MegawattHour)),
|
||||
"gwh" | "gigawatt hour" | "gigawatt hours" => tokens.push(Token::Unit(GigawattHour)),
|
||||
"twh" | "terawatt hour" | "terawatt hours" => tokens.push(Token::Unit(TerawattHour)),
|
||||
"pwh" | "petawatt hour" | "petawatt hours" => tokens.push(Token::Unit(PetawattHour)),
|
||||
|
||||
"milliwatt" | "milliwatts" => tokens.push(Token::Unit(Milliwatt)),
|
||||
"w" | "watt" | "watts" => tokens.push(Token::Unit(Watt)),
|
||||
"kw" | "kilowatt" | "kilowatts" => tokens.push(Token::Unit(Kilowatt)),
|
||||
"mw" | "megawatt" | "megawatts" => tokens.push(Token::Unit(Megawatt)),
|
||||
"gw" | "gigawatt" | "gigawatts" => tokens.push(Token::Unit(Gigawatt)),
|
||||
"tw" | "terawatt" | "terawatts" => tokens.push(Token::Unit(Terawatt)),
|
||||
"pw" | "petawatt" | "petawatts" => tokens.push(Token::Unit(Petawatt)),
|
||||
"hp" | "hps" | "horsepower" | "horsepowers" => tokens.push(Token::Unit(Horsepower)),
|
||||
"mhp" | "hpm" | "metric hp" | "metric hps" | "metric horsepower" | "metric horsepowers" => tokens.push(Token::Unit(MetricHorsepower)),
|
||||
|
||||
"ma" | "milliamp" | "milliamps" | "milliampere" | "milliamperes" => tokens.push(Token::Unit(Milliampere)),
|
||||
"a" | "amp" | "amps" | "ampere" | "amperes" => tokens.push(Token::Unit(Ampere)),
|
||||
"ka" | "kiloamp" | "kiloamps" | "kiloampere" | "kiloamperes" => tokens.push(Token::Unit(Kiloampere)),
|
||||
"bi" | "biot" | "biots" | "aba" | "abampere" | "abamperes" => tokens.push(Token::Unit(Abampere)),
|
||||
|
||||
"mΩ" | "mΩ" | "milliohm" | "milliohms" => tokens.push(Token::Unit(Milliohm)),
|
||||
"Ω" | "Ω" | "ohm" | "ohms" => tokens.push(Token::Unit(Ohm)),
|
||||
"kΩ" | "kΩ" | "kiloohm" | "kiloohms" => tokens.push(Token::Unit(Kiloohm)),
|
||||
|
||||
"mv" | "millivolt" | "millivolts" => tokens.push(Token::Unit(Millivolt)),
|
||||
"v" | "volt" | "volts" => tokens.push(Token::Unit(Volt)),
|
||||
"kv" | "kilovolt" | "kilovolts" => tokens.push(Token::Unit(Kilovolt)),
|
||||
|
||||
// for pound-force per square inch
|
||||
"lbf" => tokens.push(Token::LexerKeyword(PoundForce)),
|
||||
"force" => tokens.push(Token::LexerKeyword(Force)),
|
||||
|
||||
"pa" | "pascal" | "pascals" => tokens.push(Token::Unit(Pascal)),
|
||||
"kpa" | "kilopascal" | "kilopascals" => tokens.push(Token::Unit(Kilopascal)),
|
||||
"atm" | "atms" | "atmosphere" | "atmospheres" => tokens.push(Token::Unit(Atmosphere)),
|
||||
"mbar" | "mbars" | "millibar" | "millibars" => tokens.push(Token::Unit(Millibar)),
|
||||
"bar" | "bars" => tokens.push(Token::Unit(Bar)),
|
||||
"inhg" => tokens.push(Token::Unit(InchOfMercury)),
|
||||
"mercury" => tokens.push(Token::LexerKeyword(Mercury)),
|
||||
"psi" => tokens.push(Token::Unit(PoundsPerSquareInch)),
|
||||
"torr" | "torrs" => tokens.push(Token::Unit(Torr)),
|
||||
|
||||
"hz" | "hertz" => tokens.push(Token::Unit(Hertz)),
|
||||
"khz" | "kilohertz" => tokens.push(Token::Unit(Kilohertz)),
|
||||
"mhz" | "megahertz" => tokens.push(Token::Unit(Megahertz)),
|
||||
"ghz" | "gigahertz" => tokens.push(Token::Unit(Gigahertz)),
|
||||
"thz" | "terahertz" => tokens.push(Token::Unit(Terahertz)),
|
||||
"phz" | "petahertz" => tokens.push(Token::Unit(Petahertz)),
|
||||
"rpm" | "r/min" | "rev/min" => tokens.push(Token::Unit(RevolutionsPerMinute)),
|
||||
|
||||
"kph" | "kmh" => tokens.push(Token::Unit(KilometersPerHour)),
|
||||
"mps" => tokens.push(Token::Unit(MetersPerSecond)),
|
||||
"mph" => tokens.push(Token::Unit(MilesPerHour)),
|
||||
"fps" => tokens.push(Token::Unit(FeetPerSecond)),
|
||||
"kn" | "kt" | "knot" | "knots" => tokens.push(Token::Unit(Knot)),
|
||||
|
||||
"k" | "kelvin" | "kelvins" => tokens.push(Token::Unit(Kelvin)),
|
||||
"c" | "celsius" => tokens.push(Token::Unit(Celsius)),
|
||||
"f" | "fahrenheit" | "fahrenheits" => tokens.push(Token::Unit(Fahrenheit)),
|
||||
"deg" | "degree" | "degrees" => tokens.push(Token::Unit(default_degree)),
|
||||
|
||||
_ => {
|
||||
return Err(format!("Invalid string: {}", string));
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
'.' | '0'..='9' => {
|
||||
let start_index = byte_index;
|
||||
let mut end_index = byte_index;
|
||||
while let Some(current_char) = chars.peek() {
|
||||
if current_char == &'.' || current_char.is_digit(10) {
|
||||
byte_index += current_char.len_utf8();
|
||||
chars.next();
|
||||
end_index += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let number_string = &input[start_index..=end_index];
|
||||
d128::set_status(decimal::Status::empty());
|
||||
match d128::from_str(number_string) {
|
||||
let token;
|
||||
match d128::from_str(&number_string) {
|
||||
Ok(number) => {
|
||||
if d128::get_status().is_empty() {
|
||||
tokens.push(Token::Number(number));
|
||||
token = Token::Number(number);
|
||||
} else {
|
||||
return Err(format!("Error lexing d128 number: {}", number_string));
|
||||
}
|
||||
@ -423,18 +539,34 @@ pub fn lex(input: &str, allow_trailing_operators: bool, default_degree: Unit) ->
|
||||
return Err(format!("Error lexing d128 number: {}", number_string));
|
||||
}
|
||||
};
|
||||
|
||||
token
|
||||
},
|
||||
"+" => Token::Operator(Plus),
|
||||
"-" => Token::Operator(Minus),
|
||||
"*" => Token::Operator(Multiply),
|
||||
"/" => Token::Operator(Divide),
|
||||
"%" => Token::LexerKeyword(PercentChar),
|
||||
"^" => Token::Operator(Caret),
|
||||
"!" => Token::UnaryOperator(Factorial),
|
||||
"(" => {
|
||||
left_paren_count += 1;
|
||||
Token::Operator(LeftParen)
|
||||
},
|
||||
")" => {
|
||||
right_paren_count += 1;
|
||||
Token::Operator(RightParen)
|
||||
},
|
||||
"π" => Token::Constant(Pi),
|
||||
"'" => Token::Unit(Foot),
|
||||
"\"" | "“" | "”" | "″" => Token::LexerKeyword(DoubleQuotes),
|
||||
"Ω" | "Ω" => Token::Unit(Ohm),
|
||||
_ => {
|
||||
return Err(format!("Invalid character: {}", current_char));
|
||||
},
|
||||
}
|
||||
// The π character, for example, is more than one byte, so in that case
|
||||
// byte_index needs to be incremented by 2. This is because we're slicing
|
||||
// strings to get digits/words, and Rust slices bytes, not utf8 graphemes
|
||||
// (aka "user-perceived characters").
|
||||
byte_index += current_char.len_utf8();
|
||||
};
|
||||
};
|
||||
chars.next();
|
||||
tokens.push(token);
|
||||
}
|
||||
|
||||
// auto insert missing parentheses in first and last position
|
||||
if left_paren_count > right_paren_count {
|
||||
@ -475,7 +607,7 @@ pub fn lex(input: &str, allow_trailing_operators: bool, default_degree: Unit) ->
|
||||
}
|
||||
}
|
||||
},
|
||||
Some(Token::UnaryOperator(_operator)) => {
|
||||
Some(Token::UnaryOperator(_)) => {
|
||||
// "10%!" should be a percentage
|
||||
tokens[token_index] = Token::UnaryOperator(Percent);
|
||||
},
|
||||
@ -494,7 +626,7 @@ pub fn lex(input: &str, allow_trailing_operators: bool, default_degree: Unit) ->
|
||||
},
|
||||
}
|
||||
},
|
||||
// decide if " is inch of inch of mercury
|
||||
// decide if " is inch or inch of mercury
|
||||
Token::LexerKeyword(DoubleQuotes) => {
|
||||
match tokens.get(token_index + 1) {
|
||||
Some(Token::LexerKeyword(Hg)) => {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user