worldwideportal/src/match_table.rs

536 lines
19 KiB
Rust
Raw Normal View History

use std::{
collections::{BTreeSet, VecDeque},
str::FromStr,
};
use anyhow::bail;
use gc_arena::{Collect, GcRefLock, Rootable};
use itertools::Itertools;
use piccolo::{Callback, Context, IntoValue, Table, UserData, Value};
use regex::Regex;
use crate::{
lua_engine::frames::try_unwrap_frame,
parsing::{parse_commands, quote_string, ArgumentGuard, ParsedArgument, ParsedCommand},
2024-09-27 21:57:36 +10:00
GlobalMemoCell,
};
#[derive(Default, Debug, Collect)]
#[collect(require_static)]
pub struct MatchSubTable {
contents: Vec<MatchRecord>,
}
impl MatchSubTable {
pub fn to_value<'gc>(&self, ctx: Context<'gc>) -> anyhow::Result<Value<'gc>> {
let table = Table::new(&ctx);
for record in self.contents.iter() {
table.set(
ctx,
ctx.intern(record.match_text.as_bytes()),
ctx.intern(record.sub_text.as_bytes()),
)?;
}
Ok(table.into_value(ctx))
}
pub fn try_sub(&self, input: &str) -> Option<Vec<ParsedCommand>> {
let cleaned_input = strip_ansi_escapes::strip_str(input);
for record in self.contents.iter().rev() {
if let Some(matched) = record.match_regex.captures(&cleaned_input) {
let vec = Some(
record
.sub_commands
.iter()
.map(|subcmd| ParsedCommand {
arguments: subcmd
.arguments
.iter()
.map(|subarg| {
let unquoted_text = subarg
.text_parts
.iter()
.map(|tp| match tp {
SubTextPart::Literal(t) => t.as_str(),
SubTextPart::Variable(v) => {
if let Ok(v) = <usize as FromStr>::from_str(v) {
matched.get(v).map_or("", |v| v.as_str())
} else {
matched.name(v).map_or("", |v| v.as_str())
}
}
})
.join("");
ParsedArgument {
guard: if subarg.guard.is_none()
&& unquoted_text.contains(';')
{
Some(ArgumentGuard::Paren)
} else {
subarg.guard.clone()
},
text: unquoted_text.clone(),
quoted_text: quote_string(&unquoted_text),
}
})
.collect(),
})
.collect(),
);
return vec;
}
}
None
}
pub fn add_record(&mut self, match_text: &str, sub_text: &str) -> anyhow::Result<()> {
self.remove_record(match_text).unwrap_or(());
let rex = Regex::new(match_text)?;
let parse_result = parse_commands(sub_text);
let sub_commands: Vec<SubCommand> = parse_result
.commands
.into_iter()
.map(|cmd| {
Ok(SubCommand {
arguments: cmd
.arguments
.into_iter()
.map(parsedarg_to_subarg)
.collect::<anyhow::Result<VecDeque<SubArgument>>>()?,
})
})
.collect::<anyhow::Result<Vec<SubCommand>>>()?;
let vars: BTreeSet<String> = sub_commands
.iter()
.flat_map(|sc| {
sc.arguments.iter().flat_map(|arg| {
arg.text_parts.iter().filter_map(|tp| match tp {
SubTextPart::Variable(v) => Some(v.clone()),
_ => None,
})
})
})
.collect();
let max_captures = rex.captures_len();
let valid_vars: BTreeSet<String> = (0..max_captures)
.map(|n| n.to_string())
.chain(rex.capture_names().filter_map(|o| o.map(|n| n.to_string())))
.collect();
let invalid_vars: Vec<String> = vars.difference(&valid_vars).cloned().collect();
if !invalid_vars.is_empty() {
bail!("Invalid variables in substitution: {:?}", invalid_vars);
}
self.contents.push(MatchRecord {
match_text: match_text.to_owned(),
match_regex: rex,
sub_text: sub_text.to_owned(),
sub_commands,
});
Ok(())
}
pub fn remove_record(&mut self, match_text: &str) -> anyhow::Result<()> {
match self
.contents
.iter()
.enumerate()
.find(|(_idx, rec)| rec.match_text == match_text)
{
None => bail!("No matching record found."),
Some((idx, _)) => self.contents.remove(idx),
};
Ok(())
}
}
fn parsedarg_to_subarg(parsedarg: ParsedArgument) -> anyhow::Result<SubArgument> {
let mut text_parts: Vec<SubTextPart> = vec![];
let mut iter = parsedarg.text.chars().peekable();
let mut buf = String::new();
'outer: loop {
match iter.next() {
None => break 'outer,
Some('$') => match iter.peek() {
None => {
bail!("substitution ends in $ which is invalid.")
}
Some('$') => {
iter.next();
buf.push('$')
}
Some('{') => {
iter.next();
if !buf.is_empty() {
text_parts.push(SubTextPart::Literal(buf));
}
buf = String::new();
'inner: loop {
match iter.next() {
None => {
bail!("substitution opened with {{ is never closed.")
}
Some('}') => {
if buf.is_empty() {
bail!("substitution of empty variable name.");
}
text_parts.push(SubTextPart::Variable(buf));
buf = String::new();
break 'inner;
}
Some(c) => buf.push(c),
}
}
}
Some(_) => {
if !buf.is_empty() {
text_parts.push(SubTextPart::Literal(buf));
}
buf = String::new();
'inner: loop {
match iter.peek() {
None => {
text_parts.push(SubTextPart::Variable(buf));
buf = String::new();
break 'outer;
}
Some(c) if *c == '_' || c.is_ascii_alphanumeric() => {
buf.push(*c);
iter.next();
}
Some(_) => {
if buf.is_empty() {
bail!("substitution of empty variable name.");
}
text_parts.push(SubTextPart::Variable(buf));
buf = String::new();
break 'inner;
}
}
}
}
},
Some(c) => buf.push(c),
}
}
if !buf.is_empty() {
text_parts.push(SubTextPart::Literal(buf))
}
Ok(SubArgument {
guard: parsedarg.guard,
text_parts,
})
}
#[derive(Debug)]
pub struct MatchRecord {
pub match_text: String,
pub match_regex: Regex,
pub sub_text: String,
// We parse into into commands and arguments upfront, before substitution.
// This reduces the risk of security problems.
pub sub_commands: Vec<SubCommand>,
}
#[derive(Debug)]
pub struct SubCommand {
pub arguments: VecDeque<SubArgument>,
}
#[derive(Debug, PartialEq, Eq)]
pub struct SubArgument {
pub guard: Option<ArgumentGuard>,
pub text_parts: Vec<SubTextPart>,
}
#[derive(Debug, PartialEq, Eq)]
pub enum SubTextPart {
Literal(String),
Variable(String),
}
2024-09-27 21:57:36 +10:00
pub fn create_match_table<'gc, 'a>(ctx: Context<'gc>) -> Callback<'gc> {
Callback::from_fn(&ctx, move |ctx, _ex, mut stack| {
let _: () = stack.consume(ctx)?;
let user_data = UserData::<'gc>::new::<Rootable!['gcb => GcRefLock<'gcb, MatchSubTable>]>(
&ctx,
GcRefLock::new(&ctx, <MatchSubTable as Default>::default().into()),
);
let match_table_class: Table = ctx
.get_global::<Table>("classes")?
.get(ctx, "match_table")?;
user_data.set_metatable(&ctx, Some(match_table_class));
stack.push_back(user_data.into_value(ctx));
Ok(piccolo::CallbackReturn::Return)
})
}
pub fn match_table_add<'gc, 'a>(ctx: Context<'gc>) -> Callback<'gc> {
Callback::from_fn(&ctx, move |ctx, _ex, mut stack| {
let (match_table, match_text, sub_text): (UserData, piccolo::String, piccolo::String) =
stack.consume(ctx)?;
match_table
.downcast::<Rootable!['gcb => GcRefLock<'gcb, MatchSubTable>]>()?
.borrow_mut(&ctx)
.add_record(match_text.to_str()?, sub_text.to_str()?)?;
Ok(piccolo::CallbackReturn::Return)
})
}
pub fn match_table_remove<'gc, 'a>(ctx: Context<'gc>) -> Callback<'gc> {
Callback::from_fn(&ctx, move |ctx, _ex, mut stack| {
let (match_table, match_text): (UserData, piccolo::String) = stack.consume(ctx)?;
match_table
.downcast::<Rootable!['gcb => GcRefLock<'gcb, MatchSubTable>]>()?
.borrow_mut(&ctx)
.remove_record(match_text.to_str()?)?;
Ok(piccolo::CallbackReturn::Return)
})
}
pub fn match_table_lua_table<'gc, 'a>(ctx: Context<'gc>) -> Callback<'gc> {
Callback::from_fn(&ctx, move |ctx, _ex, mut stack| {
let match_table: UserData = stack.consume(ctx)?;
stack.push_back(
match_table
.downcast::<Rootable!['gcb => GcRefLock<'gcb, MatchSubTable>]>()?
.borrow_mut(&ctx)
.to_value(ctx)?,
);
Ok(piccolo::CallbackReturn::Return)
})
}
pub fn match_table_try_run_sub<'gc, 'a>(
ctx: Context<'gc>,
global_memo: &'a GlobalMemoCell,
) -> Callback<'gc> {
let global_memo = global_memo.clone();
Callback::from_fn(&ctx, move |ctx, _ex, mut stack| {
let (match_table, sub, frame): (UserData, piccolo::String, Value) = stack.consume(ctx)?;
let frame = try_unwrap_frame(ctx, &frame)?;
let cmds = match_table
.downcast::<Rootable!['gcb => GcRefLock<'gcb, MatchSubTable>]>()?
.borrow()
.try_sub(sub.to_str()?);
match cmds {
None => stack.push_back(false.into_value(ctx)),
Some(cmds) => {
let mut cq = global_memo.command_queue.borrow_mut();
for cmd in cmds.into_iter().rev() {
cq.push_front((frame.clone(), cmd));
}
stack.push_back(Value::Boolean(true))
}
}
Ok(piccolo::CallbackReturn::Return)
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parsedarg_to_subarg_works() {
assert_eq!(
parsedarg_to_subarg(ParsedArgument {
guard: None,
text: "hello world!".to_owned(),
quoted_text: "hello world!".to_owned()
})
.unwrap(),
SubArgument {
guard: None,
text_parts: vec![SubTextPart::Literal("hello world!".to_owned())]
}
);
assert_eq!(
parsedarg_to_subarg(ParsedArgument {
guard: None,
text: "hello $adjective ${my world}".to_owned(),
quoted_text: "hello $adjective ${my world}".to_owned()
})
.unwrap(),
SubArgument {
guard: None,
text_parts: vec![
SubTextPart::Literal("hello ".to_owned()),
SubTextPart::Variable("adjective".to_owned()),
SubTextPart::Literal(" ".to_owned()),
SubTextPart::Variable("my world".to_owned()),
]
}
);
assert_eq!(
parsedarg_to_subarg(ParsedArgument {
guard: Some(ArgumentGuard::DoubleQuote),
text: "hello $adjective${my world}${your world} end".to_owned(),
quoted_text: "hello $adjective$\\{my world\\}$\\{your world\\} end".to_owned()
})
.unwrap(),
SubArgument {
guard: Some(ArgumentGuard::DoubleQuote),
text_parts: vec![
SubTextPart::Literal("hello ".to_owned()),
SubTextPart::Variable("adjective".to_owned()),
SubTextPart::Variable("my world".to_owned()),
SubTextPart::Variable("your world".to_owned()),
SubTextPart::Literal(" end".to_owned()),
]
}
);
}
#[test]
fn parsedarg_rejects_invalid() {
assert!(parsedarg_to_subarg(ParsedArgument {
guard: None,
text: "${untermin".to_owned(),
quoted_text: "${untermin".to_owned()
})
.is_err());
assert!(parsedarg_to_subarg(ParsedArgument {
guard: None,
text: "$foo$".to_owned(),
quoted_text: "$foo$".to_owned()
})
.is_err());
assert!(parsedarg_to_subarg(ParsedArgument {
guard: None,
text: "$ hello".to_owned(),
quoted_text: "$ hello".to_owned()
})
.is_err());
assert!(parsedarg_to_subarg(ParsedArgument {
guard: None,
text: "My name is ${}".to_owned(),
quoted_text: "My name is ${}".to_owned()
})
.is_err());
}
#[test]
fn matchsubtable_works() {
let mut table: MatchSubTable = Default::default();
table
.add_record(
"^foo (?<bar>[a-z]+) baz",
"\\\"Someone is talking $bar about foo baz?;:flexes his ${bar}",
)
.expect("adding record failed");
assert_eq!(table.try_sub("unrelated babble"), None);
assert_eq!(
table.try_sub("foo woots baz\r\n"),
Some(vec![
ParsedCommand {
arguments: [
ParsedArgument {
guard: None,
text: "\"Someone".to_owned(),
quoted_text: "\\\"Someone".to_owned()
},
ParsedArgument {
guard: None,
text: "is".to_owned(),
quoted_text: "is".to_owned()
},
ParsedArgument {
guard: None,
text: "talking".to_owned(),
quoted_text: "talking".to_owned()
},
ParsedArgument {
guard: None,
text: "woots".to_owned(),
quoted_text: "woots".to_owned()
},
ParsedArgument {
guard: None,
text: "about".to_owned(),
quoted_text: "about".to_owned()
},
ParsedArgument {
guard: None,
text: "foo".to_owned(),
quoted_text: "foo".to_owned()
},
ParsedArgument {
guard: None,
text: "baz?".to_owned(),
quoted_text: "baz?".to_owned()
}
]
.into()
},
ParsedCommand {
arguments: [
ParsedArgument {
guard: None,
text: ":flexes".to_owned(),
quoted_text: ":flexes".to_owned()
},
ParsedArgument {
guard: None,
text: "his".to_owned(),
quoted_text: "his".to_owned()
},
ParsedArgument {
guard: None,
text: "woots".to_owned(),
quoted_text: "woots".to_owned()
}
]
.into()
}
])
);
}
#[test]
fn matchsubtable_resists_command_injection() {
let mut table: MatchSubTable = Default::default();
table
.add_record("^foo (.*)", "safe_command $1")
.expect("adding record failed");
let result = table
.try_sub("foo pwned!};dangerous_command {")
.expect("didn't match");
let expected = ParsedCommand {
arguments: [
ParsedArgument {
guard: None,
text: "safe_command".to_owned(),
quoted_text: "safe_command".to_owned(),
},
ParsedArgument {
guard: Some(ArgumentGuard::Paren),
text: "pwned!};dangerous_command {".to_owned(),
quoted_text: "pwned!\\};dangerous_command \\{".to_owned(),
},
]
.into(),
};
assert_eq!(result, vec![expected.clone()]);
let ser_result = result[0].to_string();
assert_eq!(
ser_result,
"safe_command {pwned!\\};dangerous_command \\{}".to_owned()
);
assert_eq!(parse_commands(&ser_result).commands, vec![expected]);
}
#[test]
fn matchsubtable_rejects_invalid() {
let mut table: MatchSubTable = Default::default();
assert!(table
.add_record("^foo (?<bar>[a-z]+) baz", "$wrong")
.is_err())
}
}