worldwideportal/src/parsing.rs

533 lines
17 KiB
Rust
Raw Normal View History

use std::{collections::VecDeque, fmt::Display};
use nom::{
branch::alt,
character::complete::{anychar, char, none_of},
combinator::{eof, map, recognize, value},
multi::{many0_count, separated_list0},
sequence::{delimited, preceded, separated_pair},
IResult,
};
#[derive(PartialEq, Eq, Debug)]
pub struct ParseResult {
pub commands: Vec<ParsedCommand>,
}
#[derive(PartialEq, Eq, Debug, Clone)]
pub enum ArgumentGuard {
Paren, // {}
DoubleQuote, // ""
}
#[derive(PartialEq, Eq, Debug, Clone)]
pub struct ParsedArgument {
pub guard: Option<ArgumentGuard>,
pub text: String,
pub quoted_text: String,
}
impl ParsedArgument {
pub fn forget_guard(&self) -> Self {
Self {
guard: None,
..self.clone()
}
}
}
impl Display for ParsedArgument {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.guard {
None => {}
Some(ArgumentGuard::Paren) => {
f.write_str("{")?;
}
Some(ArgumentGuard::DoubleQuote) => {
f.write_str("\"")?;
}
}
f.write_str(&self.quoted_text)?;
match self.guard {
None => {}
Some(ArgumentGuard::Paren) => {
f.write_str("}")?;
}
Some(ArgumentGuard::DoubleQuote) => {
f.write_str("\"")?;
}
}
Ok(())
}
}
#[derive(PartialEq, Eq, Debug, Clone)]
pub struct ParsedCommand {
pub arguments: VecDeque<ParsedArgument>,
}
impl ParsedCommand {
pub fn forget_guards(&self) -> Self {
Self {
arguments: self.arguments.iter().map(|v| v.forget_guard()).collect(),
..self.clone()
}
}
}
impl Display for ParsedCommand {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut it = self.arguments.iter();
match it.next() {
None => {}
Some(head) => {
head.fmt(f)?;
for v in it {
f.write_str(" ")?;
v.fmt(f)?;
}
}
}
Ok(())
}
}
impl ParsedCommand {
pub fn split_out_command(&self) -> Option<(String, Self)> {
let mut tmp_arguments = self.arguments.clone();
loop {
match tmp_arguments.pop_front() {
None => return None,
Some(head) => {
let trimmed_cmd = head.text.trim();
if !trimmed_cmd.is_empty() {
return Some((
trimmed_cmd.to_owned(),
Self {
arguments: tmp_arguments,
},
));
}
}
}
}
}
}
fn unquote_string(input: &str) -> String {
let mut buf: String = String::new();
let mut iter = input.chars();
loop {
match iter.next() {
None => return buf,
Some('\\') => match iter.next() {
None => {
buf.push('\\');
return buf;
}
Some('\\') => buf.push('\\'),
Some('{') => buf.push('{'),
Some('}') => buf.push('}'),
Some('"') => buf.push('"'),
Some(c) => {
buf.push('\\');
buf.push(c);
}
},
Some(c) => buf.push(c),
}
}
}
pub fn quote_string(input: &str) -> String {
let mut buf: String = String::new();
for c in input.chars() {
match c {
'\\' => buf.push_str("\\\\"),
'{' => buf.push_str("\\{"),
'}' => buf.push_str("\\}"),
'"' => buf.push_str("\\\""),
c => buf.push(c),
}
}
buf
}
fn parse_string(input: &str) -> IResult<&str, ()> {
value(
(),
many0_count(alt((
value((), preceded(char('\\'), anychar)),
value((), none_of("\"")),
))),
)(input)
}
fn parse_parenthetical(input: &str) -> IResult<&str, ()> {
value(
(),
many0_count(alt((
value((), preceded(char('\\'), anychar)),
value(
(),
separated_pair(
char('{'),
parse_parenthetical,
alt((value((), eof), value((), char('}')))),
),
),
value(
(),
separated_pair(
char('"'),
parse_string,
alt((value((), eof), value((), char('"')))),
),
),
value((), none_of("}")),
))),
)(input)
}
fn parse_argument(input: &str) -> IResult<&str, ParsedArgument> {
alt((
map(
delimited(char('{'), recognize(parse_parenthetical), char('}')),
|v| ParsedArgument {
text: unquote_string(v),
quoted_text: v.to_owned(),
guard: Some(ArgumentGuard::Paren),
},
),
map(
delimited(char('"'), recognize(parse_string), char('"')),
|v| ParsedArgument {
text: unquote_string(v),
quoted_text: v.to_owned(),
guard: Some(ArgumentGuard::DoubleQuote),
},
),
map(
recognize(value(
(),
many0_count(alt((
value((), preceded(char('\\'), anychar)),
value(
(),
separated_pair(
char('{'),
parse_parenthetical,
alt((value((), eof), value((), char('}')))),
),
),
value(
(),
separated_pair(
char('"'),
parse_string,
alt((value((), eof), value((), char('"')))),
),
),
value((), none_of(" ;")),
))),
)),
|v| ParsedArgument {
text: unquote_string(v),
quoted_text: v.to_owned(),
guard: None,
},
),
))(input)
}
fn parse_command(input: &str) -> IResult<&str, ParsedCommand> {
map(separated_list0(char(' '), parse_argument), |arguments| {
ParsedCommand {
arguments: arguments.into(),
}
})(input)
}
pub fn parse_commands(input: &str) -> ParseResult {
// Note that the core parser doesn't do things like skipping multiple whitespace,
separated_list0(preceded(char(';'), many0_count(char(' '))), parse_command)(input)
.map(|(_, commands)| ParseResult { commands })
.unwrap_or_else(|_| ParseResult { commands: vec![] })
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_commands() {
assert_eq!(
parse_commands(""),
ParseResult {
commands: vec![ParsedCommand {
arguments: vec![ParsedArgument {
text: "".to_owned(),
quoted_text: "".to_owned(),
guard: None
}]
.into()
}]
}
);
assert_eq!(
parse_commands("north"),
ParseResult {
commands: vec![ParsedCommand {
arguments: vec![ParsedArgument {
text: "north".to_owned(),
quoted_text: "north".to_owned(),
guard: None
}]
.into()
}]
}
);
assert_eq!(
parse_commands("north "),
ParseResult {
commands: vec![ParsedCommand {
// This is deliberate, ensures we can reconstruct the input.
arguments: vec![
ParsedArgument {
text: "north".to_owned(),
quoted_text: "north".to_owned(),
guard: None
},
ParsedArgument {
text: "".to_owned(),
quoted_text: "".to_owned(),
guard: None
}
]
.into()
}]
}
);
assert_eq!(
parse_commands("#blah {x = 1 + 2; y = 3}; #home"),
ParseResult {
commands: vec![
ParsedCommand {
arguments: vec![
ParsedArgument {
text: "#blah".to_owned(),
quoted_text: "#blah".to_owned(),
guard: None
},
ParsedArgument {
text: "x = 1 + 2; y = 3".to_owned(),
quoted_text: "x = 1 + 2; y = 3".to_owned(),
guard: Some(ArgumentGuard::Paren)
}
]
.into()
},
ParsedCommand {
arguments: vec![ParsedArgument {
text: "#home".to_owned(),
quoted_text: "#home".to_owned(),
guard: None
}]
.into()
}
]
}
);
assert_eq!(
parse_commands("#blah {x = 1 + 2"),
ParseResult {
commands: vec![ParsedCommand {
arguments: vec![
ParsedArgument {
text: "#blah".to_owned(),
quoted_text: "#blah".to_owned(),
guard: None
},
ParsedArgument {
text: "{x = 1 + 2".to_owned(),
quoted_text: "{x = 1 + 2".to_owned(),
guard: None
}
]
.into()
},]
}
);
assert_eq!(
parse_commands("#blah {x = 1} {y = 1}"),
ParseResult {
commands: vec![ParsedCommand {
arguments: vec![
ParsedArgument {
text: "#blah".to_owned(),
quoted_text: "#blah".to_owned(),
guard: None
},
ParsedArgument {
text: "x = 1".to_owned(),
quoted_text: "x = 1".to_owned(),
guard: Some(ArgumentGuard::Paren)
},
ParsedArgument {
text: "y = 1".to_owned(),
quoted_text: "y = 1".to_owned(),
guard: Some(ArgumentGuard::Paren)
}
]
.into()
}]
}
);
assert_eq!(
parse_commands("#blah \"hello\" \"world\""),
ParseResult {
commands: vec![ParsedCommand {
arguments: vec![
ParsedArgument {
text: "#blah".to_owned(),
quoted_text: "#blah".to_owned(),
guard: None
},
ParsedArgument {
text: "hello".to_owned(),
quoted_text: "hello".to_owned(),
guard: Some(ArgumentGuard::DoubleQuote)
},
ParsedArgument {
text: "world".to_owned(),
quoted_text: "world".to_owned(),
guard: Some(ArgumentGuard::DoubleQuote)
},
]
.into()
}]
}
);
assert_eq!(
parse_commands("#blah {x = \"}\"} {y = 1}"),
ParseResult {
commands: vec![ParsedCommand {
arguments: vec![
ParsedArgument {
text: "#blah".to_owned(),
quoted_text: "#blah".to_owned(),
guard: None
},
ParsedArgument {
text: "x = \"}\"".to_owned(),
quoted_text: "x = \"}\"".to_owned(),
guard: Some(ArgumentGuard::Paren)
},
ParsedArgument {
text: "y = 1".to_owned(),
quoted_text: "y = 1".to_owned(),
guard: Some(ArgumentGuard::Paren)
}
]
.into()
}]
}
);
assert_eq!(
parse_commands("#blah {x = \"}\"; a = \"{\"; y = {}; z = 1;} { q = 5 };"),
ParseResult {
commands: vec![
ParsedCommand {
arguments: vec![
ParsedArgument {
text: "#blah".to_owned(),
quoted_text: "#blah".to_owned(),
guard: None
},
ParsedArgument {
text: "x = \"}\"; a = \"{\"; y = {}; z = 1;".to_owned(),
quoted_text: "x = \"}\"; a = \"{\"; y = {}; z = 1;".to_owned(),
guard: Some(ArgumentGuard::Paren)
},
ParsedArgument {
text: " q = 5 ".to_owned(),
quoted_text: " q = 5 ".to_owned(),
guard: Some(ArgumentGuard::Paren)
}
]
2024-08-22 22:25:05 +10:00
.into()
},
ParsedCommand {
arguments: vec![ParsedArgument {
text: "".to_owned(),
quoted_text: "".to_owned(),
guard: None
}]
.into()
}
]
}
);
assert_eq!(
parse_commands("#blah {\\}\\}\\}} {y = 1}"),
ParseResult {
commands: vec![ParsedCommand {
arguments: vec![
ParsedArgument {
text: "#blah".to_owned(),
quoted_text: "#blah".to_owned(),
guard: None
},
ParsedArgument {
text: "}}}".to_owned(),
quoted_text: "\\}\\}\\}".to_owned(),
guard: Some(ArgumentGuard::Paren)
},
ParsedArgument {
text: "y = 1".to_owned(),
quoted_text: "y = 1".to_owned(),
guard: Some(ArgumentGuard::Paren)
}
]
.into()
}]
}
);
assert_eq!(
parse_commands("#blah \"This is a \\\"test\\\"\""),
ParseResult {
commands: vec![ParsedCommand {
arguments: vec![
ParsedArgument {
text: "#blah".to_owned(),
quoted_text: "#blah".to_owned(),
guard: None
},
ParsedArgument {
text: "This is a \"test\"".to_owned(),
quoted_text: "This is a \\\"test\\\"".to_owned(),
guard: Some(ArgumentGuard::DoubleQuote)
}
]
.into()
}]
}
);
}
#[test]
fn test_parse_roundtrip() {
let input = "#hello {to the} \"world\" I say! ";
assert_eq!(
parse_command(input).map(|c| c.1.to_string()),
Ok(input.to_owned())
);
let input = "{{{\"Foo \\ Bar\"}}}";
assert_eq!(
parse_command(input).map(|c| c.1.to_string()),
Ok(input.to_owned())
);
}
}