CPC/src/lexer.rs
2021-01-14 17:12:11 +01:00

588 lines
30 KiB
Rust

use std::str::FromStr;
use decimal_fixes_mirror::d128;
use crate::{Token, TokenVector};
use crate::Operator::{Caret, Divide, LeftParen, Minus, Modulo, Multiply, Plus, RightParen};
use crate::UnaryOperator::{Percent, Factorial};
use crate::TextOperator::{Of, To};
use crate::NamedNumber::*;
use crate::Constant::{E, Pi};
use crate::LexerKeyword::{In, PercentChar, Per, Mercury, Hg, PoundForce, Force, DoubleQuotes};
use crate::FunctionIdentifier::{Cbrt, Ceil, Cos, Exp, Abs, Floor, Ln, Log, Round, Sin, Sqrt, Tan};
use crate::units::Unit;
use crate::units::Unit::*;
pub const fn is_alphabetic_extended(input: &char) -> bool {
match input {
'A'..='Z' | 'a'..='z' | 'Ω' | 'Ω' | 'µ' | 'μ' | 'π' => true,
_ => false,
}
}
/// Lex an input string and return a [`TokenVector`]
pub fn lex(input: &str, allow_trailing_operators: bool, default_degree: Unit) -> Result<TokenVector, String> {
let mut input = input.replace(",", ""); // ignore commas
if allow_trailing_operators {
match &input.chars().last().unwrap_or('x') {
'+' | '-' | '*' | '/' | '^' | '(' => {
input.pop();
},
_ => {},
}
}
let mut chars = input.chars().peekable();
let mut tokens: TokenVector = vec![];
let max_word_length = 30;
let mut left_paren_count = 0;
let mut right_paren_count = 0;
let mut byte_index = 0;
while let Some(current_char) = chars.next() {
match current_char {
'+' => tokens.push(Token::Operator(Plus)),
'-' => tokens.push(Token::Operator(Minus)),
'*' => tokens.push(Token::Operator(Multiply)),
'/' => tokens.push(Token::Operator(Divide)),
'%' => tokens.push(Token::LexerKeyword(PercentChar)),
'^' => tokens.push(Token::Operator(Caret)),
'!' => tokens.push(Token::UnaryOperator(Factorial)),
'(' => {
left_paren_count += 1;
tokens.push(Token::Operator(LeftParen));
},
')' => {
right_paren_count += 1;
tokens.push(Token::Operator(RightParen));
},
'π' => tokens.push(Token::Constant(Pi)),
'\'' => tokens.push(Token::Unit(Foot)),
'"' | '“' | '”' | '″' => tokens.push(Token::LexerKeyword(DoubleQuotes)),
value if value.is_whitespace() => {},
'Ω' | 'Ω' => tokens.push(Token::Unit(Ohm)),
value if is_alphabetic_extended(&value) => {
let start_index = byte_index;
// account for chars longer than one byte
let mut end_index = byte_index + current_char.len_utf8() - 1;
while let Some(current_char) = chars.peek() {
// don't loop more than max_word_length:
if end_index >= start_index + max_word_length - 1 {
let string = &input[start_index..=end_index];
return Err(format!("Invalid string starting with: {}", string));
}
if is_alphabetic_extended(&current_char) {
byte_index += current_char.len_utf8();
end_index += current_char.len_utf8();
chars.next();
} else {
let string = &input[start_index..=end_index];
match string.trim_end() {
// allow for two-word units
"nautical" | "light" | "sq" | "square" | "cubic" | "metric" | "newton" => {
byte_index += current_char.len_utf8();
chars.next();
end_index += 1;
},
_ => {
break;
},
}
}
}
// allow for syntax like "km2"
let mut is_multidimensional = true;
match chars.peek() {
// ...if the string is succeeded by 2 or 3
Some('2') | Some('3') => {
byte_index += '2'.len_utf8();
chars.next();
// we dont validate what comes after because it will be caught
// by the parser anyway (for example 3m35)
},
_ => is_multidimensional = false,
}
if is_multidimensional {
let string_plus_one_character = &input[start_index..=end_index+1];
match string_plus_one_character {
"mm2" | "millimeter2" | "millimeters2" => tokens.push(Token::Unit(SquareMillimeter)),
"cm2" | "centimeter2" | "centimeters2" => tokens.push(Token::Unit(SquareCentimeter)),
"dm2" | "decimeter2" | "decimeters2" => tokens.push(Token::Unit(SquareCentimeter)),
"m2" | "meter2" | "meters2" => tokens.push(Token::Unit(SquareMeter)),
"km2" | "kilometer2" | "kilometers2" => tokens.push(Token::Unit(SquareKilometer)),
"in2" | "inch2" | "inches2" => tokens.push(Token::Unit(SquareInch)),
"ft2" | "foot2" | "feet2" => tokens.push(Token::Unit(SquareFoot)),
"yd2" | "yard2" | "yards2" => tokens.push(Token::Unit(SquareYard)),
"mi2" | "mile2" | "miles2" => tokens.push(Token::Unit(SquareMile)),
"mm3" | "millimeter3" | "millimeters3" => tokens.push(Token::Unit(CubicMillimeter)),
"cm3" | "centimeter3" | "centimeters3" => tokens.push(Token::Unit(CubicCentimeter)),
"dm3" | "decimeter3" | "decimeters3" => tokens.push(Token::Unit(CubicCentimeter)),
"m3" | "meter3" | "meters3" => tokens.push(Token::Unit(CubicMeter)),
"km3" | "kilometer3" | "kilometers3" => tokens.push(Token::Unit(CubicKilometer)),
"inc3" | "inch3" | "inches3" => tokens.push(Token::Unit(CubicInch)),
"ft3" | "foot3" | "feet3" => tokens.push(Token::Unit(CubicFoot)),
"yd3" | "yard3" | "yards3" => tokens.push(Token::Unit(CubicYard)),
"mi3" | "mile3" | "miles3" => tokens.push(Token::Unit(CubicMile)),
_ => {},
}
} else {
let string = &input[start_index..=end_index];
let string: &str = &string.replacen("square", "sq", 1);
match string {
// MAKE SURE max_word_length IS EQUAL TO THE
// LENGTH OF THE LONGEST STRING IN THIS MATCH STATEMENT.
"to" => tokens.push(Token::TextOperator(To)),
"of" => tokens.push(Token::TextOperator(Of)),
"hundred" => tokens.push(Token::NamedNumber(Hundred)),
"thousand" => tokens.push(Token::NamedNumber(Thousand)),
"mil" | "mill" | "million" => tokens.push(Token::NamedNumber(Million)),
"bil" | "bill" | "billion" => tokens.push(Token::NamedNumber(Billion)),
"tri" | "tril" | "trillion" => tokens.push(Token::NamedNumber(Trillion)),
"quadrillion" => tokens.push(Token::NamedNumber(Quadrillion)),
"quintillion" => tokens.push(Token::NamedNumber(Quintillion)),
"sextillion" => tokens.push(Token::NamedNumber(Sextillion)),
"septillion" => tokens.push(Token::NamedNumber(Septillion)),
"octillion" => tokens.push(Token::NamedNumber(Octillion)),
"nonillion" => tokens.push(Token::NamedNumber(Nonillion)),
"decillion" => tokens.push(Token::NamedNumber(Decillion)),
"undecillion" => tokens.push(Token::NamedNumber(Undecillion)),
"duodecillion" => tokens.push(Token::NamedNumber(Duodecillion)),
"tredecillion" => tokens.push(Token::NamedNumber(Tredecillion)),
"quattuordecillion" => tokens.push(Token::NamedNumber(Quattuordecillion)),
"quindecillion" => tokens.push(Token::NamedNumber(Quindecillion)),
"sexdecillion" => tokens.push(Token::NamedNumber(Sexdecillion)),
"septendecillion" => tokens.push(Token::NamedNumber(Septendecillion)),
"octodecillion" => tokens.push(Token::NamedNumber(Octodecillion)),
"novemdecillion" => tokens.push(Token::NamedNumber(Novemdecillion)),
"vigintillion" => tokens.push(Token::NamedNumber(Vigintillion)),
"centillion" => tokens.push(Token::NamedNumber(Centillion)),
"googol" => tokens.push(Token::NamedNumber(Googol)),
"pi" => tokens.push(Token::Constant(Pi)),
"e" => tokens.push(Token::Constant(E)),
"mod" => tokens.push(Token::Operator(Modulo)),
"sqrt" => tokens.push(Token::FunctionIdentifier(Sqrt)),
"cbrt" => tokens.push(Token::FunctionIdentifier(Cbrt)),
"log" => tokens.push(Token::FunctionIdentifier(Log)),
"ln" => tokens.push(Token::FunctionIdentifier(Ln)),
"exp" => tokens.push(Token::FunctionIdentifier(Exp)),
"round" | "rint" => tokens.push(Token::FunctionIdentifier(Round)),
"ceil" => tokens.push(Token::FunctionIdentifier(Ceil)),
"floor" => tokens.push(Token::FunctionIdentifier(Floor)),
"abs" | "fabs" => tokens.push(Token::FunctionIdentifier(Abs)),
"sin" => tokens.push(Token::FunctionIdentifier(Sin)),
"cos" => tokens.push(Token::FunctionIdentifier(Cos)),
"tan" => tokens.push(Token::FunctionIdentifier(Tan)),
"per" => tokens.push(Token::LexerKeyword(Per)),
"hg" => tokens.push(Token::LexerKeyword(Hg)), // can be hectogram or mercury
"ns" | "nanosec" | "nanosecs" | "nanosecond" | "nanoseconds" => tokens.push(Token::Unit(Nanosecond)),
// µ and μ are two different characters
"µs" | "μs" | "microsec" | "microsecs" | "microsecond" | "microseconds" => tokens.push(Token::Unit(Microsecond)),
"ms" | "millisec" | "millisecs" | "millisecond" | "milliseconds" => tokens.push(Token::Unit(Millisecond)),
"s" | "sec" | "secs" | "second" | "seconds" => tokens.push(Token::Unit(Second)),
"min" | "mins" | "minute" | "minutes" => tokens.push(Token::Unit(Minute)),
"h" | "hr" | "hrs" | "hour" | "hours" => tokens.push(Token::Unit(Hour)),
"day" | "days" => tokens.push(Token::Unit(Day)),
"wk" | "wks" | "week" | "weeks" => tokens.push(Token::Unit(Week)),
"mo" | "mos" | "month" | "months" => tokens.push(Token::Unit(Month)),
"q" | "quarter" | "quarters" => tokens.push(Token::Unit(Quarter)),
"yr" | "yrs" | "year" | "years" => tokens.push(Token::Unit(Year)),
"decade" | "decades" => tokens.push(Token::Unit(Decade)),
"century" | "centuries" => tokens.push(Token::Unit(Century)),
"millenium" | "millenia" | "milleniums" => tokens.push(Token::Unit(Millenium)),
"mm" | "millimeter" | "millimeters" => tokens.push(Token::Unit(Millimeter)),
"cm" | "centimeter" | "centimeters" => tokens.push(Token::Unit(Centimeter)),
"dm" | "decimeter" | "decimeters" => tokens.push(Token::Unit(Centimeter)),
"m" | "meter" | "meters" => tokens.push(Token::Unit(Meter)),
"km" | "kilometer" | "kilometers" => tokens.push(Token::Unit(Kilometer)),
"in" => tokens.push(Token::LexerKeyword(In)),
"inch" | "inches" => tokens.push(Token::Unit(Inch)),
"ft" | "foot" | "feet" => tokens.push(Token::Unit(Foot)),
"yd" | "yard" | "yards" => tokens.push(Token::Unit(Yard)),
"mi" | "mile" | "miles" => tokens.push(Token::Unit(Mile)),
"nmi" | "nautical mile" | "nautical miles" => tokens.push(Token::Unit(NauticalMile)),
"ly" | "lightyear" | "lightyears" | "light yr" | "light yrs" | "light year" | "light years" => tokens.push(Token::Unit(LightYear)),
"lightsec" | "lightsecs" | "lightsecond" | "lightseconds" | "light sec" | "light secs" | "light second" | "light seconds" => tokens.push(Token::Unit(LightYear)),
"sqmm" | "sq mm" | "sq millimeter" | "sq millimeters" => tokens.push(Token::Unit(SquareMillimeter)),
"sqcm" | "sq cm" | "sq centimeter" | "sq centimeters" => tokens.push(Token::Unit(SquareCentimeter)),
"sqdm" | "sq dm" | "sq decimeter" | "sq decimeters" => tokens.push(Token::Unit(SquareDecimeter)),
"sqm" | "sq m" | "sq meter" | "sq meters" => tokens.push(Token::Unit(SquareMeter)),
"sqkm" | "sq km" | "sq kilometer" | "sq kilometers" => tokens.push(Token::Unit(SquareKilometer)),
"sqin" | "sq in" | "sq inch" | "sq inches" => tokens.push(Token::Unit(SquareInch)),
"sqft" | "sq ft" | "sq foot" | "sq feet" => tokens.push(Token::Unit(SquareFoot)),
"sqyd" | "sq yd" | "sq yard" | "sq yards" => tokens.push(Token::Unit(SquareYard)),
"sqmi" | "sq mi" | "sq mile" | "sq miles" => tokens.push(Token::Unit(SquareMile)),
"are" | "ares" => tokens.push(Token::Unit(Are)),
"decare" | "decares" => tokens.push(Token::Unit(Decare)),
"ha" | "hectare" | "hectares" => tokens.push(Token::Unit(Hectare)),
"acre" | "acres" => tokens.push(Token::Unit(Acre)),
"cubic millimeter" | "cubic millimeters" => tokens.push(Token::Unit(CubicMillimeter)),
"cubic centimeter" | "cubic centimeters" => tokens.push(Token::Unit(CubicCentimeter)),
"cubic decimeter" | "cubic decimeters" => tokens.push(Token::Unit(CubicDecimeter)),
"cubic meter" | "cubic meters" => tokens.push(Token::Unit(CubicMeter)),
"cubic kilometer" | "cubic kilometers" => tokens.push(Token::Unit(CubicKilometer)),
"cubic inch" | "cubic inches" => tokens.push(Token::Unit(CubicInch)),
"cubic foot" | "cubic feet" => tokens.push(Token::Unit(CubicFoot)),
"cubic yard" | "cubic yards" => tokens.push(Token::Unit(CubicYard)),
"cubic mile" | "cubic miles" => tokens.push(Token::Unit(CubicMile)),
"ml" | "milliliter" | "milliliters" => tokens.push(Token::Unit(Milliliter)),
"cl" | "centiliter" | "centiliters" => tokens.push(Token::Unit(Centiliter)),
"dl" | "deciliter" | "deciliters" => tokens.push(Token::Unit(Deciliter)),
"l" | "liter" | "liters" => tokens.push(Token::Unit(Liter)),
"ts" | "tsp" | "tspn" | "tspns" | "teaspoon" | "teaspoons" => tokens.push(Token::Unit(Teaspoon)),
"tbs" | "tbsp" | "tablespoon" | "tablespoons" => tokens.push(Token::Unit(Tablespoon)),
"floz" | "fl oz" | "fl ounce" | "fl ounces" | "fluid oz" | "fluid ounce" | "fluid ounces" => tokens.push(Token::Unit(FluidOunce)),
"cup" | "cups" => tokens.push(Token::Unit(Cup)),
"pt" | "pint" | "pints" => tokens.push(Token::Unit(Pint)),
"qt" | "quart" | "quarts" => tokens.push(Token::Unit(Quart)),
"gal" | "gallon" | "gallons" => tokens.push(Token::Unit(Gallon)),
"bbl" | "oil barrel" | "oil barrels" => tokens.push(Token::Unit(OilBarrel)),
"mg" | "milligram" | "milligrams" => tokens.push(Token::Unit(Milligram)),
"g" | "gram" | "grams" => tokens.push(Token::Unit(Gram)),
"hectogram" | "hectograms" => tokens.push(Token::Unit(Hectogram)),
"kg" | "kilo" | "kilos" | "kilogram" | "kilograms" => tokens.push(Token::Unit(Kilogram)),
"t" | "tonne" | "tonnes" | "metric ton" | "metric tons" | "metric tonne" | "metric tonnes" => tokens.push(Token::Unit(MetricTon)),
"oz" | "ounces" => tokens.push(Token::Unit(Ounce)),
"lb" | "lbs" => tokens.push(Token::Unit(Pound)),
"pound" | "pounds" => {
let str_len = "-force".len();
match input.get(end_index+1..=end_index+str_len) {
Some("-force") => {
tokens.push(Token::LexerKeyword(PoundForce));
for _i in 0..str_len {
chars.next();
}
byte_index += str_len;
},
_ => {
tokens.push(Token::Unit(Pound));
}
}
},
"stone" | "stones" => tokens.push(Token::Unit(Stone)),
"st" | "ton" | "tons" | "short ton" | "short tons" | "short tonne" | "short tonnes" => tokens.push(Token::Unit(ShortTon)),
"lt" | "long ton" | "long tons" | "long tonne" | "long tonnes" => tokens.push(Token::Unit(LongTon)),
"bit" | "bits" => tokens.push(Token::Unit(Bit)),
"kbit" | "kilobit" | "kilobits" => tokens.push(Token::Unit(Kilobit)),
"mbit" | "megabit" | "megabits" => tokens.push(Token::Unit(Megabit)),
"gbit" | "gigabit" | "gigabits" => tokens.push(Token::Unit(Gigabit)),
"tbit" | "terabit" | "terabits" => tokens.push(Token::Unit(Terabit)),
"pbit" | "petabit" | "petabits" => tokens.push(Token::Unit(Petabit)),
"ebit" | "exabit" | "exabits" => tokens.push(Token::Unit(Exabit)),
"zbit" | "zettabit" | "zettabits" => tokens.push(Token::Unit(Zettabit)),
"ybit" | "yottabit" | "yottabits" => tokens.push(Token::Unit(Yottabit)),
"kibit" | "kibibit" | "kibibits" => tokens.push(Token::Unit(Kibibit)),
"mibit" | "mebibit" | "mebibits" => tokens.push(Token::Unit(Mebibit)),
"gibit" | "gibibit" | "gibibits" => tokens.push(Token::Unit(Gibibit)),
"tibit" | "tebibit" | "tebibits" => tokens.push(Token::Unit(Tebibit)),
"pibit" | "pebibit" | "pebibits" => tokens.push(Token::Unit(Pebibit)),
"eibit" | "exbibit" | "exbibits" => tokens.push(Token::Unit(Exbibit)),
"zibit" | "zebibit" | "zebibits" => tokens.push(Token::Unit(Zebibit)),
"yibit" | "yobibit" | "yobibits" => tokens.push(Token::Unit(Yobibit)),
"byte" | "bytes" => tokens.push(Token::Unit(Byte)),
"kb" | "kilobyte" | "kilobytes" => tokens.push(Token::Unit(Kilobyte)),
"mb" | "megabyte" | "megabytes" => tokens.push(Token::Unit(Megabyte)),
"gb" | "gigabyte" | "gigabytes" => tokens.push(Token::Unit(Gigabyte)),
"tb" | "terabyte" | "terabytes" => tokens.push(Token::Unit(Terabyte)),
"pb" | "petabyte" | "petabytes" => tokens.push(Token::Unit(Petabyte)),
"eb" | "exabyte" | "exabytes" => tokens.push(Token::Unit(Exabyte)),
"zb" | "zettabyte" | "zettabytes" => tokens.push(Token::Unit(Zettabyte)),
"yb" | "yottabyte" | "yottabytes" => tokens.push(Token::Unit(Yottabyte)),
"kib" | "kibibyte" | "kibibytes" => tokens.push(Token::Unit(Kibibyte)),
"mib" | "mebibyte" | "mebibytes" => tokens.push(Token::Unit(Mebibyte)),
"gib" | "gibibyte" | "gibibytes" => tokens.push(Token::Unit(Gibibyte)),
"tib" | "tebibyte" | "tebibytes" => tokens.push(Token::Unit(Tebibyte)),
"pib" | "pebibyte" | "pebibytes" => tokens.push(Token::Unit(Pebibyte)),
"eib" | "exbibyte" | "exbibytes" => tokens.push(Token::Unit(Exbibyte)),
"zib" | "zebibyte" | "zebibytes" => tokens.push(Token::Unit(Zebibyte)),
"yib" | "yobibyte" | "yobibytes" => tokens.push(Token::Unit(Yobibyte)),
"millijoule" | "millijoules" => tokens.push(Token::Unit(Millijoule)),
"j"| "joule" | "joules" => tokens.push(Token::Unit(Joule)),
"nm" | "newton meter" | "newton meters" | "newton-meter" | "newton-meters" => tokens.push(Token::Unit(NewtonMeter)),
"kj" | "kilojoule" | "kilojoules" => tokens.push(Token::Unit(Kilojoule)),
"mj" | "megajoule" | "megajoules" => tokens.push(Token::Unit(Megajoule)),
"gj" | "gigajoule" | "gigajoules" => tokens.push(Token::Unit(Gigajoule)),
"tj" | "terajoule" | "terajoules" => tokens.push(Token::Unit(Terajoule)),
"cal" | "calorie" | "calories" => tokens.push(Token::Unit(Calorie)),
"kcal" | "kilocalorie" | "kilocalories" => tokens.push(Token::Unit(KiloCalorie)),
"btu" | "british thermal unit" | "british thermal units" => tokens.push(Token::Unit(BritishThermalUnit)),
"wh" | "watt hour" | "watt hours" => tokens.push(Token::Unit(WattHour)),
"kwh" | "kilowatt hour" | "kilowatt hours" => tokens.push(Token::Unit(KilowattHour)),
"mwh" | "megawatt hour" | "megawatt hours" => tokens.push(Token::Unit(MegawattHour)),
"gwh" | "gigawatt hour" | "gigawatt hours" => tokens.push(Token::Unit(GigawattHour)),
"twh" | "terawatt hour" | "terawatt hours" => tokens.push(Token::Unit(TerawattHour)),
"pwh" | "petawatt hour" | "petawatt hours" => tokens.push(Token::Unit(PetawattHour)),
"milliwatt" | "milliwatts" => tokens.push(Token::Unit(Milliwatt)),
"w" | "watt" | "watts" => tokens.push(Token::Unit(Watt)),
"kw" | "kilowatt" | "kilowatts" => tokens.push(Token::Unit(Kilowatt)),
"mw" | "megawatt" | "megawatts" => tokens.push(Token::Unit(Megawatt)),
"gw" | "gigawatt" | "gigawatts" => tokens.push(Token::Unit(Gigawatt)),
"tw" | "terawatt" | "terawatts" => tokens.push(Token::Unit(Terawatt)),
"pw" | "petawatt" | "petawatts" => tokens.push(Token::Unit(Petawatt)),
"hp" | "hps" | "horsepower" | "horsepowers" => tokens.push(Token::Unit(Horsepower)),
"mhp" | "hpm" | "metric hp" | "metric hps" | "metric horsepower" | "metric horsepowers" => tokens.push(Token::Unit(MetricHorsepower)),
"ma" | "milliamp" | "milliamps" | "milliampere" | "milliamperes" => tokens.push(Token::Unit(Milliampere)),
"a" | "amp" | "amps" | "ampere" | "amperes" => tokens.push(Token::Unit(Ampere)),
"ka" | "kiloamp" | "kiloamps" | "kiloampere" | "kiloamperes" => tokens.push(Token::Unit(Kiloampere)),
"bi" | "biot" | "biots" | "aba" | "abampere" | "abamperes" => tokens.push(Token::Unit(Abampere)),
"" | "mΩ" | "milliohm" | "milliohms" => tokens.push(Token::Unit(Milliohm)),
"Ω" | "" | "ohm" | "ohms" => tokens.push(Token::Unit(Ohm)),
"" | "kΩ" | "kiloohm" | "kiloohms" => tokens.push(Token::Unit(Kiloohm)),
"mv" | "millivolt" | "millivolts" => tokens.push(Token::Unit(Millivolt)),
"v" | "volt" | "volts" => tokens.push(Token::Unit(Volt)),
"kv" | "kilovolt" | "kilovolts" => tokens.push(Token::Unit(Kilovolt)),
// for pound-force per square inch
"lbf" => tokens.push(Token::LexerKeyword(PoundForce)),
"force" => tokens.push(Token::LexerKeyword(Force)),
"pa" | "pascal" | "pascals" => tokens.push(Token::Unit(Pascal)),
"kpa" | "kilopascal" | "kilopascals" => tokens.push(Token::Unit(Kilopascal)),
"atm" | "atms" | "atmosphere" | "atmospheres" => tokens.push(Token::Unit(Atmosphere)),
"mbar" | "mbars" | "millibar" | "millibars" => tokens.push(Token::Unit(Millibar)),
"bar" | "bars" => tokens.push(Token::Unit(Bar)),
"inhg" => tokens.push(Token::Unit(InchOfMercury)),
"mercury" => tokens.push(Token::LexerKeyword(Mercury)),
"psi" => tokens.push(Token::Unit(PoundsPerSquareInch)),
"torr" | "torrs" => tokens.push(Token::Unit(Torr)),
"hz" | "hertz" => tokens.push(Token::Unit(Hertz)),
"khz" | "kilohertz" => tokens.push(Token::Unit(Kilohertz)),
"mhz" | "megahertz" => tokens.push(Token::Unit(Megahertz)),
"ghz" | "gigahertz" => tokens.push(Token::Unit(Gigahertz)),
"thz" | "terahertz" => tokens.push(Token::Unit(Terahertz)),
"phz" | "petahertz" => tokens.push(Token::Unit(Petahertz)),
"rpm" | "r/min" | "rev/min" => tokens.push(Token::Unit(RevolutionsPerMinute)),
"kph" | "kmh" => tokens.push(Token::Unit(KilometersPerHour)),
"mps" => tokens.push(Token::Unit(MetersPerSecond)),
"mph" => tokens.push(Token::Unit(MilesPerHour)),
"fps" => tokens.push(Token::Unit(FeetPerSecond)),
"kn" | "kt" | "knot" | "knots" => tokens.push(Token::Unit(Knot)),
"k" | "kelvin" | "kelvins" => tokens.push(Token::Unit(Kelvin)),
"c" | "celsius" => tokens.push(Token::Unit(Celsius)),
"f" | "fahrenheit" | "fahrenheits" => tokens.push(Token::Unit(Fahrenheit)),
"deg" | "degree" | "degrees" => tokens.push(Token::Unit(default_degree)),
_ => {
return Err(format!("Invalid string: {}", string));
}
}
}
},
'.' | '0'..='9' => {
let start_index = byte_index;
let mut end_index = byte_index;
while let Some(current_char) = chars.peek() {
if current_char == &'.' || current_char.is_digit(10) {
byte_index += current_char.len_utf8();
chars.next();
end_index += 1;
} else {
break;
}
}
let number_string = &input[start_index..=end_index];
match d128::from_str(number_string) {
Ok(number) => {
if d128::get_status().is_empty() {
tokens.push(Token::Number(number));
} else {
return Err(format!("Error lexing d128 number: {}", number_string));
}
},
Err(_e) => {
return Err(format!("Error lexing d128 number: {}", number_string));
}
};
},
_ => {
return Err(format!("Invalid character: {}", current_char));
},
}
// The π character, for example, is more than one byte, so in that case
// byte_index needs to be incremented by 2. This is because we're slicing
// strings to get digits/words, and Rust slices bytes, not utf8 graphemes
// (aka "user-perceived characters").
byte_index += current_char.len_utf8();
};
// auto insert missing parentheses in first and last position
if left_paren_count > right_paren_count {
let missing_right_parens = left_paren_count - right_paren_count;
for _ in 0..missing_right_parens {
tokens.push(Token::Operator(RightParen));
}
} else if left_paren_count < right_paren_count {
let missing_left_parens = right_paren_count - left_paren_count;
for _ in 0..missing_left_parens {
tokens.insert(0, Token::Operator(LeftParen));
}
}
let mut token_index = 0;
loop {
match tokens[token_index] {
// decide if % is percent or modulo
Token::LexerKeyword(PercentChar) => {
match tokens.get(token_index + 1) {
Some(Token::TextOperator(Of)) => {
// "10% of 1km" should be percentage
tokens[token_index] = Token::UnaryOperator(Percent);
},
Some(Token::Operator(operator)) => {
match operator {
LeftParen => {
// "10%(2)" should be modulo
tokens[token_index] = Token::Operator(Modulo);
},
_ => {
// "10%*2" should be a percentage
tokens[token_index] = Token::UnaryOperator(Percent);
}
}
},
Some(Token::UnaryOperator(_operator)) => {
// "10%!" should be a percentage
tokens[token_index] = Token::UnaryOperator(Percent);
},
Some(Token::LexerKeyword(PercentChar)) => {
// "10%%" should be a percentage
tokens[token_index] = Token::UnaryOperator(Percent);
},
None => {
// percent if there's no element afterwards
tokens[token_index] = Token::UnaryOperator(Percent);
},
_ => {
// everything else should be modulo, for example if the % is
// before a number, function or constants
tokens[token_index] = Token::Operator(Modulo);
},
}
},
// decide if " is inch of inch of mercury
Token::LexerKeyword(DoubleQuotes) => {
match tokens.get(token_index + 1) {
Some(Token::LexerKeyword(Hg)) => {
// "hg should be inch of mercury
tokens[token_index] = Token::Unit(InchOfMercury);
tokens.remove(token_index + 1);
},
_ => {
// otherwise, Inch
tokens[token_index] = Token::Unit(Inch);
},
}
},
// if hg wasn't already turned into inch of mercury, it's hectogram
Token::LexerKeyword(Hg) => {
tokens[token_index] = Token::Unit(Hectogram);
},
// decide if "in" is Inch or To
Token::LexerKeyword(In) => {
match tokens.get(token_index + 1) {
Some(Token::Unit(_)) => {
// "in" should be To
tokens[token_index] = Token::TextOperator(To);
},
_ => {
// otherwise, Inch
tokens[token_index] = Token::Unit(Inch);
},
}
},
_ => {},
}
// parse units like km/h, lbf per square inch
if token_index >= 2 {
let token1 = &tokens[token_index-2];
let token2 = match &tokens[token_index-1] {
// treat km/h the same as km per h
Token::Operator(Divide) => &Token::LexerKeyword(Per),
_ => &tokens[token_index-1],
};
let token3 = &tokens[token_index];
let mut replaced = true;
match (token1, token2, token3) {
// km/h
(Token::Unit(Kilometer), Token::LexerKeyword(Per), Token::Unit(Hour)) => {
tokens[token_index-2] = Token::Unit(KilometersPerHour);
},
// mi/h
(Token::Unit(Mile), Token::LexerKeyword(Per), Token::Unit(Hour)) => {
tokens[token_index-2] = Token::Unit(MilesPerHour);
},
// m/s
(Token::Unit(Meter), Token::LexerKeyword(Per), Token::Unit(Second)) => {
tokens[token_index-2] = Token::Unit(MetersPerSecond);
},
// ft/s
(Token::Unit(Foot), Token::LexerKeyword(Per), Token::Unit(Second)) => {
tokens[token_index-2] = Token::Unit(FeetPerSecond);
},
// btu/min
(Token::Unit(BritishThermalUnit), Token::LexerKeyword(Per), Token::Unit(Minute)) => {
tokens[token_index-2] = Token::Unit(BritishThermalUnitsPerMinute);
},
// btu/h
(Token::Unit(BritishThermalUnit), Token::LexerKeyword(Per), Token::Unit(Hour)) => {
tokens[token_index-2] = Token::Unit(BritishThermalUnitsPerHour);
},
// lbs/sqin
(Token::LexerKeyword(PoundForce), Token::LexerKeyword(Per), Token::Unit(SquareInch)) => {
tokens[token_index-2] = Token::Unit(PoundsPerSquareInch);
},
// inch of mercury
(Token::Unit(Inch), Token::TextOperator(Of), Token::LexerKeyword(Mercury)) => {
tokens[token_index-2] = Token::Unit(InchOfMercury);
},
_ => {
replaced = false;
},
}
if replaced {
tokens.remove(token_index);
tokens.remove(token_index-1);
token_index -= 2;
}
}
if token_index == tokens.len()-1 {
break;
} else {
token_index += 1;
}
}
Ok(tokens)
}