221 lines
8.0 KiB
Rust
221 lines
8.0 KiB
Rust
use once_cell::sync::OnceCell;
|
|
use rust_decimal::Decimal;
|
|
|
|
struct PluralRule<'l> {
|
|
match_suffix: &'l str,
|
|
drop: usize,
|
|
append_suffix: &'l str,
|
|
}
|
|
|
|
pub fn pluralise(input: &str) -> String {
|
|
static PLURAL_RULES: OnceCell<Vec<PluralRule>> = OnceCell::new();
|
|
let plural_rules = PLURAL_RULES.get_or_init(|| vec!(
|
|
PluralRule { match_suffix: "foot", drop: 3, append_suffix: "eet" },
|
|
PluralRule { match_suffix: "tooth", drop: 4, append_suffix: "eeth" },
|
|
PluralRule { match_suffix: "man", drop: 2, append_suffix: "en" },
|
|
PluralRule { match_suffix: "mouse", drop: 4, append_suffix: "ice" },
|
|
PluralRule { match_suffix: "louse", drop: 4, append_suffix: "ice" },
|
|
PluralRule { match_suffix: "fish", drop: 0, append_suffix: "" },
|
|
PluralRule { match_suffix: "sheep", drop: 0, append_suffix: "" },
|
|
PluralRule { match_suffix: "deer", drop: 0, append_suffix: "" },
|
|
PluralRule { match_suffix: "pox", drop: 0, append_suffix: "" },
|
|
PluralRule { match_suffix: "cis", drop: 2, append_suffix: "es" },
|
|
PluralRule { match_suffix: "sis", drop: 2, append_suffix: "es" },
|
|
PluralRule { match_suffix: "xis", drop: 2, append_suffix: "es" },
|
|
PluralRule { match_suffix: "ss", drop: 0, append_suffix: "es" },
|
|
PluralRule { match_suffix: "ch", drop: 0, append_suffix: "es" },
|
|
PluralRule { match_suffix: "sh", drop: 0, append_suffix: "es" },
|
|
PluralRule { match_suffix: "ife", drop: 2, append_suffix: "ves" },
|
|
PluralRule { match_suffix: "lf", drop: 1, append_suffix: "ves" },
|
|
PluralRule { match_suffix: "arf", drop: 1, append_suffix: "ves" },
|
|
PluralRule { match_suffix: "ay", drop: 0, append_suffix: "s" },
|
|
PluralRule { match_suffix: "ey", drop: 0, append_suffix: "s" },
|
|
PluralRule { match_suffix: "iy", drop: 0, append_suffix: "s" },
|
|
PluralRule { match_suffix: "oy", drop: 0, append_suffix: "s" },
|
|
PluralRule { match_suffix: "uy", drop: 0, append_suffix: "s" },
|
|
PluralRule { match_suffix: "y", drop: 1, append_suffix: "ies" },
|
|
PluralRule { match_suffix: "ao", drop: 0, append_suffix: "s" },
|
|
PluralRule { match_suffix: "eo", drop: 0, append_suffix: "s" },
|
|
PluralRule { match_suffix: "io", drop: 0, append_suffix: "s" },
|
|
PluralRule { match_suffix: "oo", drop: 0, append_suffix: "s" },
|
|
PluralRule { match_suffix: "uo", drop: 0, append_suffix: "s" },
|
|
// The o rule could be much larger... we'll add specific exceptions as
|
|
// the come up.
|
|
PluralRule { match_suffix: "o", drop: 0, append_suffix: "es" },
|
|
// Lots of possible exceptions here.
|
|
PluralRule { match_suffix: "ex", drop: 0, append_suffix: "es" },
|
|
));
|
|
|
|
for rule in plural_rules {
|
|
if input.ends_with(rule.match_suffix) {
|
|
return input[0..(input.len() - rule.drop)].to_owned() + rule.append_suffix;
|
|
}
|
|
}
|
|
input.to_owned() + "s"
|
|
}
|
|
|
|
pub fn indefinite_article(countable_word: &str) -> &'static str {
|
|
if countable_word.is_empty() {
|
|
return "";
|
|
}
|
|
let vowels = ["a", "e", "i", "o", "u"];
|
|
if !vowels.contains(&&countable_word[0..1]) {
|
|
if countable_word.starts_with("honor") || countable_word.starts_with("honour") ||
|
|
countable_word.starts_with("honest") || countable_word.starts_with("hour") ||
|
|
countable_word.starts_with("heir") {
|
|
return "an";
|
|
}
|
|
return "a";
|
|
}
|
|
if countable_word.starts_with("eu") || countable_word.starts_with("one") ||
|
|
countable_word.starts_with("once") {
|
|
return "a";
|
|
}
|
|
if countable_word.starts_with("e") {
|
|
if countable_word.starts_with("ewe") {
|
|
return "a";
|
|
}
|
|
return "an";
|
|
}
|
|
if countable_word.starts_with("u") {
|
|
if countable_word.len() < 3 {
|
|
return "an";
|
|
}
|
|
if countable_word.starts_with("uni") {
|
|
if countable_word.starts_with("unid") ||
|
|
countable_word.starts_with("unim") ||
|
|
countable_word.starts_with("unin") {
|
|
// unidentified, unimaginable, uninhabited etc...
|
|
return "an";
|
|
}
|
|
// Words like unilateral
|
|
return "a";
|
|
}
|
|
if countable_word.starts_with("unani") || countable_word.starts_with("ubiq") {
|
|
return "a";
|
|
}
|
|
if ["r", "s", "t"].contains(&&countable_word[1..2]) {
|
|
if vowels.contains(&&countable_word[2..3]) {
|
|
// All u[rst][aeiou] words, e.g. usury, need "a"
|
|
return "a";
|
|
}
|
|
return "an";
|
|
}
|
|
if countable_word.starts_with("ubiq") || countable_word.starts_with("uku") || countable_word.starts_with("ukr") {
|
|
return "a"
|
|
}
|
|
}
|
|
return "an";
|
|
}
|
|
|
|
pub fn caps_first(inp: &str) -> String {
|
|
if inp.is_empty() {
|
|
"".to_string()
|
|
} else {
|
|
inp[0..1].to_uppercase() + &inp[1..]
|
|
}
|
|
}
|
|
|
|
pub fn join_words(words: &[&str]) -> String {
|
|
match words.split_last() {
|
|
None => "".to_string(),
|
|
Some((last, [])) => last.to_string(),
|
|
Some((last, rest)) => rest.join(", ") + " and " + last
|
|
}
|
|
}
|
|
|
|
pub fn weight(grams: u64) -> String {
|
|
if grams > 999 {
|
|
format!("{} kg", Decimal::from_i128_with_scale(grams as i128, 3).normalize())
|
|
} else {
|
|
format!("{} g", grams)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
#[test]
|
|
fn pluralise_should_follow_english_rules() {
|
|
for (word, plural) in vec!(
|
|
("cat", "cats"),
|
|
("wolf", "wolves"),
|
|
("scarf", "scarves"),
|
|
("volcano", "volcanoes"),
|
|
("canoe", "canoes"),
|
|
("pistachio", "pistachios"),
|
|
("match", "matches"),
|
|
("the fairest sex", "the fairest sexes"),
|
|
("loud hiss", "loud hisses"),
|
|
("evil axis", "evil axes"),
|
|
("death ray", "death rays"),
|
|
("killer blowfly", "killer blowflies"),
|
|
("house mouse", "house mice"),
|
|
("zombie sheep", "zombie sheep"),
|
|
) {
|
|
assert_eq!(super::pluralise(word), plural);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn indefinite_article_should_follow_english_rules() {
|
|
for (article, word) in vec!(
|
|
("a", "cat"),
|
|
("a", "human"),
|
|
("an", "apple"),
|
|
("an", "easter egg"),
|
|
("an", "indigo orb"),
|
|
("an", "orange"),
|
|
("an", "urchin"),
|
|
("an", "hour"),
|
|
("a", "once-in-a-lifetime opportunity"),
|
|
("a", "uranium-covered field"),
|
|
("a", "usurper to the throne"),
|
|
("a", "Ukrainian hero"),
|
|
("a", "universal truth"),
|
|
("an", "uninvited guest"),
|
|
("a", "unanimous decision"),
|
|
("a", "European getaway"),
|
|
("an", "utter disaster"),
|
|
("a", "uterus"),
|
|
("a", "user"),
|
|
("a", "ubiquitous hazard"),
|
|
("a", "unitary plan"),
|
|
) {
|
|
let result = super::indefinite_article(&word.to_lowercase());
|
|
assert_eq!(format!("{} {}", result, word), format!("{} {}", article, word));
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn caps_first_works() {
|
|
for (inp, outp) in vec!(
|
|
("", ""),
|
|
("cat", "Cat"),
|
|
("Cat", "Cat"),
|
|
("hello world", "Hello world"),
|
|
) {
|
|
assert_eq!(super::caps_first(inp), outp);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn join_words_works() {
|
|
for (inp, outp) in vec!(
|
|
(vec!(), ""),
|
|
(vec!("cat"), "cat"),
|
|
(vec!("cat", "dog"), "cat and dog"),
|
|
(vec!("cat", "dog", "fish"), "cat, dog and fish"),
|
|
(vec!("wolf", "cat", "dog", "fish"), "wolf, cat, dog and fish"),
|
|
) {
|
|
assert_eq!(super::join_words(&inp[..]), outp);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn weight_works() {
|
|
assert_eq!(super::weight(100), "100 g");
|
|
assert_eq!(super::weight(1000), "1 kg");
|
|
assert_eq!(super::weight(1100), "1.1 kg");
|
|
}
|
|
}
|