worldwideportal/src/parsing.rs

233 lines
6.7 KiB
Rust
Raw Normal View History

use std::collections::VecDeque;
use nom::{
branch::alt,
character::complete::{anychar, char, none_of},
combinator::{eof, map, recognize, value},
multi::{many0_count, separated_list0},
sequence::{preceded, separated_pair},
IResult,
};
#[derive(PartialEq, Eq, Debug)]
pub struct ParseResult<'l> {
pub commands: Vec<ParsedCommand<'l>>,
}
#[derive(PartialEq, Eq, Debug)]
pub struct ParsedCommand<'l> {
pub arguments: VecDeque<&'l str>,
}
impl ParsedCommand<'_> {
pub fn split_out_command(&self) -> Option<(&str, Self)> {
let mut tmp_arguments = self.arguments.clone();
loop {
match tmp_arguments.pop_front() {
None => return None,
Some(head) => {
let trimmed_cmd = head.trim();
if !trimmed_cmd.is_empty() {
return Some((
trimmed_cmd,
Self {
arguments: tmp_arguments,
},
));
}
}
}
}
}
}
fn parse_string(input: &str) -> IResult<&str, ()> {
value(
(),
many0_count(alt((
value((), preceded(char('\\'), anychar)),
value((), none_of("\"")),
))),
)(input)
}
fn parse_parenthetical(input: &str) -> IResult<&str, ()> {
value(
(),
many0_count(alt((
value((), preceded(char('\\'), anychar)),
value(
(),
separated_pair(
char('{'),
parse_parenthetical,
alt((value((), eof), value((), char('}')))),
),
),
value(
(),
separated_pair(
char('"'),
parse_string,
alt((value((), eof), value((), char('"')))),
),
),
value((), none_of("}")),
))),
)(input)
}
fn parse_argument(input: &str) -> IResult<&str, ()> {
value(
(),
many0_count(alt((
value((), preceded(char('\\'), anychar)),
value(
(),
separated_pair(
char('{'),
parse_parenthetical,
alt((value((), eof), value((), char('}')))),
),
),
value(
(),
separated_pair(
char('"'),
parse_string,
alt((value((), eof), value((), char('"')))),
),
),
value((), none_of(" ;")),
))),
)(input)
}
fn parse_command(input: &str) -> IResult<&str, ParsedCommand> {
map(
separated_list0(char(' '), recognize(parse_argument)),
|arguments| ParsedCommand {
arguments: arguments.into(),
},
)(input)
}
pub fn parse_commands(input: &str) -> ParseResult {
// Note that the core parser doesn't do things like skipping multiple whitespace,
separated_list0(preceded(char(';'), many0_count(char(' '))), parse_command)(input)
.map(|(_, commands)| ParseResult { commands })
.unwrap_or_else(|_| ParseResult { commands: vec![] })
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_commands() {
assert_eq!(
parse_commands(""),
ParseResult {
commands: vec![ParsedCommand {
2024-08-22 22:25:05 +10:00
arguments: vec![""].into()
}]
}
);
assert_eq!(
parse_commands("north"),
ParseResult {
commands: vec![ParsedCommand {
2024-08-22 22:25:05 +10:00
arguments: vec!["north"].into()
}]
}
);
assert_eq!(
parse_commands("north "),
ParseResult {
commands: vec![ParsedCommand {
// This is deliberate, ensures we can reconstruct the input.
2024-08-22 22:25:05 +10:00
arguments: vec!["north", ""].into()
}]
}
);
assert_eq!(
parse_commands("#blah {x = 1 + 2; y = 3}; #home"),
ParseResult {
commands: vec![
ParsedCommand {
2024-08-22 22:25:05 +10:00
arguments: vec!["#blah", "{x = 1 + 2; y = 3}"].into()
},
ParsedCommand {
2024-08-22 22:25:05 +10:00
arguments: vec!["#home"].into()
}
]
}
);
assert_eq!(
parse_commands("#blah {x = 1 + 2"),
ParseResult {
commands: vec![ParsedCommand {
2024-08-22 22:25:05 +10:00
arguments: vec!["#blah", "{x = 1 + 2"].into()
},]
}
);
assert_eq!(
parse_commands("#blah {x = 1} {y = 1}"),
ParseResult {
commands: vec![ParsedCommand {
2024-08-22 22:25:05 +10:00
arguments: vec!["#blah", "{x = 1}", "{y = 1}"].into()
}]
}
);
assert_eq!(
parse_commands("#blah \"hello\" \"world\""),
ParseResult {
commands: vec![ParsedCommand {
2024-08-22 22:25:05 +10:00
arguments: vec!["#blah", "\"hello\"", "\"world\""].into()
}]
}
);
assert_eq!(
parse_commands("#blah {x = \"}\"} {y = 1}"),
ParseResult {
commands: vec![ParsedCommand {
2024-08-22 22:25:05 +10:00
arguments: vec!["#blah", "{x = \"}\"}", "{y = 1}"].into()
}]
}
);
assert_eq!(
parse_commands("#blah {x = \"}\"; a = \"{\"; y = {}; z = 1;} { q = 5 };"),
ParseResult {
commands: vec![
ParsedCommand {
arguments: vec![
"#blah",
"{x = \"}\"; a = \"{\"; y = {}; z = 1;}",
"{ q = 5 }"
]
2024-08-22 22:25:05 +10:00
.into()
},
ParsedCommand {
2024-08-22 22:25:05 +10:00
arguments: vec![""].into()
}
]
}
);
assert_eq!(
parse_commands("#blah {\\}\\}\\}} {y = 1}"),
ParseResult {
commands: vec![ParsedCommand {
2024-08-22 22:25:05 +10:00
arguments: vec!["#blah", "{\\}\\}\\}}", "{y = 1}"].into()
}]
}
);
assert_eq!(
parse_commands("#blah \"This is a \\\"test\\\"\""),
ParseResult {
commands: vec![ParsedCommand {
2024-08-22 22:25:05 +10:00
arguments: vec!["#blah", "\"This is a \\\"test\\\"\""].into()
}]
}
);
}
}