forked from blasthavers/blastmud
390 lines
11 KiB
Rust
390 lines
11 KiB
Rust
use once_cell::sync::OnceCell;
|
|
use rust_decimal::Decimal;
|
|
|
|
struct PluralRule<'l> {
|
|
match_suffix: &'l str,
|
|
drop: usize,
|
|
append_suffix: &'l str,
|
|
}
|
|
|
|
pub fn pluralise(orig_input: &str) -> String {
|
|
let mut extra_suffix: &str = "";
|
|
let mut input: &str = orig_input;
|
|
'wordsplit: for split_word in vec!["pair"] {
|
|
for (idx, _) in input.match_indices(split_word) {
|
|
let end_idx = idx + split_word.len();
|
|
if end_idx == input.len() {
|
|
continue;
|
|
}
|
|
if (idx == 0 || &input[idx - 1..idx] == " ") && &input[end_idx..end_idx + 1] == " " {
|
|
extra_suffix = &input[end_idx..];
|
|
input = &input[0..end_idx];
|
|
break 'wordsplit;
|
|
}
|
|
}
|
|
}
|
|
static PLURAL_RULES: OnceCell<Vec<PluralRule>> = OnceCell::new();
|
|
let plural_rules = PLURAL_RULES.get_or_init(|| {
|
|
vec![
|
|
PluralRule {
|
|
match_suffix: "foot",
|
|
drop: 3,
|
|
append_suffix: "eet",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "tooth",
|
|
drop: 4,
|
|
append_suffix: "eeth",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "man",
|
|
drop: 2,
|
|
append_suffix: "en",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "mouse",
|
|
drop: 4,
|
|
append_suffix: "ice",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "louse",
|
|
drop: 4,
|
|
append_suffix: "ice",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "fish",
|
|
drop: 0,
|
|
append_suffix: "",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "sheep",
|
|
drop: 0,
|
|
append_suffix: "",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "deer",
|
|
drop: 0,
|
|
append_suffix: "",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "pox",
|
|
drop: 0,
|
|
append_suffix: "",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "cis",
|
|
drop: 2,
|
|
append_suffix: "es",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "sis",
|
|
drop: 2,
|
|
append_suffix: "es",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "xis",
|
|
drop: 2,
|
|
append_suffix: "es",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "ss",
|
|
drop: 0,
|
|
append_suffix: "es",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "ch",
|
|
drop: 0,
|
|
append_suffix: "es",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "sh",
|
|
drop: 0,
|
|
append_suffix: "es",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "ife",
|
|
drop: 2,
|
|
append_suffix: "ves",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "lf",
|
|
drop: 1,
|
|
append_suffix: "ves",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "arf",
|
|
drop: 1,
|
|
append_suffix: "ves",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "ay",
|
|
drop: 0,
|
|
append_suffix: "s",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "ey",
|
|
drop: 0,
|
|
append_suffix: "s",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "iy",
|
|
drop: 0,
|
|
append_suffix: "s",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "oy",
|
|
drop: 0,
|
|
append_suffix: "s",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "uy",
|
|
drop: 0,
|
|
append_suffix: "s",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "y",
|
|
drop: 1,
|
|
append_suffix: "ies",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "ao",
|
|
drop: 0,
|
|
append_suffix: "s",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "eo",
|
|
drop: 0,
|
|
append_suffix: "s",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "io",
|
|
drop: 0,
|
|
append_suffix: "s",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "oo",
|
|
drop: 0,
|
|
append_suffix: "s",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "uo",
|
|
drop: 0,
|
|
append_suffix: "s",
|
|
},
|
|
// The o rule could be much larger... we'll add specific exceptions as
|
|
// the come up.
|
|
PluralRule {
|
|
match_suffix: "o",
|
|
drop: 0,
|
|
append_suffix: "es",
|
|
},
|
|
// Lots of possible exceptions here.
|
|
PluralRule {
|
|
match_suffix: "ex",
|
|
drop: 0,
|
|
append_suffix: "es",
|
|
},
|
|
PluralRule {
|
|
match_suffix: "ox",
|
|
drop: 0,
|
|
append_suffix: "es",
|
|
},
|
|
]
|
|
});
|
|
|
|
for rule in plural_rules {
|
|
if input.ends_with(rule.match_suffix) {
|
|
return input[0..(input.len() - rule.drop)].to_owned()
|
|
+ rule.append_suffix
|
|
+ extra_suffix;
|
|
}
|
|
}
|
|
input.to_owned() + "s" + extra_suffix
|
|
}
|
|
|
|
pub fn indefinite_article(countable_word: &str) -> &'static str {
|
|
if countable_word.is_empty() {
|
|
return "";
|
|
}
|
|
let vowels = ["a", "e", "i", "o", "u"];
|
|
if !vowels.contains(&&countable_word[0..1]) {
|
|
if countable_word.starts_with("honor")
|
|
|| countable_word.starts_with("honour")
|
|
|| countable_word.starts_with("honest")
|
|
|| countable_word.starts_with("hour")
|
|
|| countable_word.starts_with("heir")
|
|
{
|
|
return "an";
|
|
}
|
|
return "a";
|
|
}
|
|
if countable_word.starts_with("eu")
|
|
|| countable_word.starts_with("one")
|
|
|| countable_word.starts_with("once")
|
|
{
|
|
return "a";
|
|
}
|
|
if countable_word.starts_with("e") {
|
|
if countable_word.starts_with("ewe") {
|
|
return "a";
|
|
}
|
|
return "an";
|
|
}
|
|
if countable_word.starts_with("u") {
|
|
if countable_word.len() < 3 {
|
|
return "an";
|
|
}
|
|
if countable_word.starts_with("uni") {
|
|
if countable_word.starts_with("unid")
|
|
|| countable_word.starts_with("unim")
|
|
|| countable_word.starts_with("unin")
|
|
{
|
|
// unidentified, unimaginable, uninhabited etc...
|
|
return "an";
|
|
}
|
|
// Words like unilateral
|
|
return "a";
|
|
}
|
|
if countable_word.starts_with("unani") || countable_word.starts_with("ubiq") {
|
|
return "a";
|
|
}
|
|
if ["r", "s", "t"].contains(&&countable_word[1..2]) {
|
|
if vowels.contains(&&countable_word[2..3]) {
|
|
// All u[rst][aeiou] words, e.g. usury, need "a"
|
|
return "a";
|
|
}
|
|
return "an";
|
|
}
|
|
if countable_word.starts_with("ubiq")
|
|
|| countable_word.starts_with("uku")
|
|
|| countable_word.starts_with("ukr")
|
|
{
|
|
return "a";
|
|
}
|
|
}
|
|
return "an";
|
|
}
|
|
|
|
pub fn caps_first(inp: &str) -> String {
|
|
if inp.is_empty() {
|
|
"".to_string()
|
|
} else {
|
|
inp[0..1].to_uppercase() + &inp[1..]
|
|
}
|
|
}
|
|
|
|
pub fn join_words(words: &[&str]) -> String {
|
|
match words.split_last() {
|
|
None => "".to_string(),
|
|
Some((last, [])) => last.to_string(),
|
|
Some((last, rest)) => rest.join(", ") + " and " + last,
|
|
}
|
|
}
|
|
|
|
pub fn weight(grams: u64) -> String {
|
|
if grams > 999 {
|
|
format!(
|
|
"{} kg",
|
|
Decimal::from_i128_with_scale(grams as i128, 3).normalize()
|
|
)
|
|
} else {
|
|
format!("{} g", grams)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
#[test]
|
|
fn pluralise_should_follow_english_rules() {
|
|
for (word, plural) in vec![
|
|
("cat", "cats"),
|
|
("wolf", "wolves"),
|
|
("scarf", "scarves"),
|
|
("volcano", "volcanoes"),
|
|
("canoe", "canoes"),
|
|
("pistachio", "pistachios"),
|
|
("match", "matches"),
|
|
("the fairest sex", "the fairest sexes"),
|
|
("loud hiss", "loud hisses"),
|
|
("evil axis", "evil axes"),
|
|
("death ray", "death rays"),
|
|
("killer blowfly", "killer blowflies"),
|
|
("house mouse", "house mice"),
|
|
("zombie sheep", "zombie sheep"),
|
|
("brown pair of pants", "brown pairs of pants"),
|
|
("good pair", "good pairs"),
|
|
("repair kit", "repair kits"),
|
|
] {
|
|
assert_eq!(super::pluralise(word), plural);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn indefinite_article_should_follow_english_rules() {
|
|
for (article, word) in vec![
|
|
("a", "cat"),
|
|
("a", "human"),
|
|
("an", "apple"),
|
|
("an", "easter egg"),
|
|
("an", "indigo orb"),
|
|
("an", "orange"),
|
|
("an", "urchin"),
|
|
("an", "hour"),
|
|
("a", "once-in-a-lifetime opportunity"),
|
|
("a", "uranium-covered field"),
|
|
("a", "usurper to the throne"),
|
|
("a", "Ukrainian hero"),
|
|
("a", "universal truth"),
|
|
("an", "uninvited guest"),
|
|
("a", "unanimous decision"),
|
|
("a", "European getaway"),
|
|
("an", "utter disaster"),
|
|
("a", "uterus"),
|
|
("a", "user"),
|
|
("a", "ubiquitous hazard"),
|
|
("a", "unitary plan"),
|
|
] {
|
|
let result = super::indefinite_article(&word.to_lowercase());
|
|
assert_eq!(
|
|
format!("{} {}", result, word),
|
|
format!("{} {}", article, word)
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn caps_first_works() {
|
|
for (inp, outp) in vec![
|
|
("", ""),
|
|
("cat", "Cat"),
|
|
("Cat", "Cat"),
|
|
("hello world", "Hello world"),
|
|
] {
|
|
assert_eq!(super::caps_first(inp), outp);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn join_words_works() {
|
|
for (inp, outp) in vec![
|
|
(vec![], ""),
|
|
(vec!["cat"], "cat"),
|
|
(vec!["cat", "dog"], "cat and dog"),
|
|
(vec!["cat", "dog", "fish"], "cat, dog and fish"),
|
|
(
|
|
vec!["wolf", "cat", "dog", "fish"],
|
|
"wolf, cat, dog and fish",
|
|
),
|
|
] {
|
|
assert_eq!(super::join_words(&inp[..]), outp);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn weight_works() {
|
|
assert_eq!(super::weight(100), "100 g");
|
|
assert_eq!(super::weight(1000), "1 kg");
|
|
assert_eq!(super::weight(1100), "1.1 kg");
|
|
}
|
|
}
|