use std::{ collections::{BTreeSet, VecDeque}, str::FromStr, }; use anyhow::bail; use gc_arena::{Collect, GcRefLock, Rootable}; use itertools::Itertools; use piccolo::{Callback, Context, IntoValue, Table, UserData, Value}; use regex::Regex; use crate::{ lua_engine::frames::try_unwrap_frame, parsing::{parse_commands, quote_string, ArgumentGuard, ParsedArgument, ParsedCommand}, GlobalMemoCell, }; #[derive(Default, Debug, Collect)] #[collect(require_static)] pub struct MatchSubTable { contents: Vec, } impl MatchSubTable { pub fn to_value<'gc>(&self, ctx: Context<'gc>) -> anyhow::Result> { let table = Table::new(&ctx); for record in self.contents.iter() { table.set( ctx, ctx.intern(record.match_text.as_bytes()), ctx.intern(record.sub_text.as_bytes()), )?; } Ok(table.into_value(ctx)) } pub fn try_sub(&self, input: &str) -> Option> { let cleaned_input = strip_ansi_escapes::strip_str(input); for record in self.contents.iter().rev() { if let Some(matched) = record.match_regex.captures(&cleaned_input) { let vec = Some( record .sub_commands .iter() .map(|subcmd| ParsedCommand { arguments: subcmd .arguments .iter() .map(|subarg| { let unquoted_text = subarg .text_parts .iter() .map(|tp| match tp { SubTextPart::Literal(t) => t.as_str(), SubTextPart::Variable(v) => { if let Ok(v) = ::from_str(v) { matched.get(v).map_or("", |v| v.as_str()) } else { matched.name(v).map_or("", |v| v.as_str()) } } }) .join(""); ParsedArgument { guard: if subarg.guard.is_none() && unquoted_text.contains(';') { Some(ArgumentGuard::Paren) } else { subarg.guard.clone() }, text: unquoted_text.clone(), quoted_text: quote_string(&unquoted_text), } }) .collect(), }) .collect(), ); return vec; } } None } pub fn add_record(&mut self, match_text: &str, sub_text: &str) -> anyhow::Result<()> { self.remove_record(match_text).unwrap_or(()); let rex = Regex::new(match_text)?; let parse_result = parse_commands(sub_text); let sub_commands: Vec = parse_result .commands .into_iter() .map(|cmd| { Ok(SubCommand { arguments: cmd .arguments .into_iter() .map(parsedarg_to_subarg) .collect::>>()?, }) }) .collect::>>()?; let vars: BTreeSet = sub_commands .iter() .flat_map(|sc| { sc.arguments.iter().flat_map(|arg| { arg.text_parts.iter().filter_map(|tp| match tp { SubTextPart::Variable(v) => Some(v.clone()), _ => None, }) }) }) .collect(); let max_captures = rex.captures_len(); let valid_vars: BTreeSet = (0..max_captures) .map(|n| n.to_string()) .chain(rex.capture_names().filter_map(|o| o.map(|n| n.to_string()))) .collect(); let invalid_vars: Vec = vars.difference(&valid_vars).cloned().collect(); if !invalid_vars.is_empty() { bail!("Invalid variables in substitution: {:?}", invalid_vars); } self.contents.push(MatchRecord { match_text: match_text.to_owned(), match_regex: rex, sub_text: sub_text.to_owned(), sub_commands, }); Ok(()) } pub fn remove_record(&mut self, match_text: &str) -> anyhow::Result<()> { match self .contents .iter() .enumerate() .find(|(_idx, rec)| rec.match_text == match_text) { None => bail!("No matching record found."), Some((idx, _)) => self.contents.remove(idx), }; Ok(()) } } fn parsedarg_to_subarg(parsedarg: ParsedArgument) -> anyhow::Result { let mut text_parts: Vec = vec![]; let mut iter = parsedarg.text.chars().peekable(); let mut buf = String::new(); 'outer: loop { match iter.next() { None => break 'outer, Some('$') => match iter.peek() { None => { bail!("substitution ends in $ which is invalid.") } Some('$') => { iter.next(); buf.push('$') } Some('{') => { iter.next(); if !buf.is_empty() { text_parts.push(SubTextPart::Literal(buf)); } buf = String::new(); 'inner: loop { match iter.next() { None => { bail!("substitution opened with {{ is never closed.") } Some('}') => { if buf.is_empty() { bail!("substitution of empty variable name."); } text_parts.push(SubTextPart::Variable(buf)); buf = String::new(); break 'inner; } Some(c) => buf.push(c), } } } Some(_) => { if !buf.is_empty() { text_parts.push(SubTextPart::Literal(buf)); } buf = String::new(); 'inner: loop { match iter.peek() { None => { text_parts.push(SubTextPart::Variable(buf)); buf = String::new(); break 'outer; } Some(c) if *c == '_' || c.is_ascii_alphanumeric() => { buf.push(*c); iter.next(); } Some(_) => { if buf.is_empty() { bail!("substitution of empty variable name."); } text_parts.push(SubTextPart::Variable(buf)); buf = String::new(); break 'inner; } } } } }, Some(c) => buf.push(c), } } if !buf.is_empty() { text_parts.push(SubTextPart::Literal(buf)) } Ok(SubArgument { guard: parsedarg.guard, text_parts, }) } #[derive(Debug)] pub struct MatchRecord { pub match_text: String, pub match_regex: Regex, pub sub_text: String, // We parse into into commands and arguments upfront, before substitution. // This reduces the risk of security problems. pub sub_commands: Vec, } #[derive(Debug)] pub struct SubCommand { pub arguments: VecDeque, } #[derive(Debug, PartialEq, Eq)] pub struct SubArgument { pub guard: Option, pub text_parts: Vec, } #[derive(Debug, PartialEq, Eq)] pub enum SubTextPart { Literal(String), Variable(String), } pub fn create_match_table<'gc, 'a>(ctx: Context<'gc>) -> Callback<'gc> { Callback::from_fn(&ctx, move |ctx, _ex, mut stack| { let _: () = stack.consume(ctx)?; let user_data = UserData::<'gc>::new:: GcRefLock<'gcb, MatchSubTable>]>( &ctx, GcRefLock::new(&ctx, ::default().into()), ); let match_table_class: Table = ctx .get_global::("classes")? .get(ctx, "match_table")?; user_data.set_metatable(&ctx, Some(match_table_class)); stack.push_back(user_data.into_value(ctx)); Ok(piccolo::CallbackReturn::Return) }) } pub fn match_table_add<'gc, 'a>(ctx: Context<'gc>) -> Callback<'gc> { Callback::from_fn(&ctx, move |ctx, _ex, mut stack| { let (match_table, match_text, sub_text): (UserData, piccolo::String, piccolo::String) = stack.consume(ctx)?; match_table .downcast:: GcRefLock<'gcb, MatchSubTable>]>()? .borrow_mut(&ctx) .add_record(match_text.to_str()?, sub_text.to_str()?)?; Ok(piccolo::CallbackReturn::Return) }) } pub fn match_table_remove<'gc, 'a>(ctx: Context<'gc>) -> Callback<'gc> { Callback::from_fn(&ctx, move |ctx, _ex, mut stack| { let (match_table, match_text): (UserData, piccolo::String) = stack.consume(ctx)?; match_table .downcast:: GcRefLock<'gcb, MatchSubTable>]>()? .borrow_mut(&ctx) .remove_record(match_text.to_str()?)?; Ok(piccolo::CallbackReturn::Return) }) } pub fn match_table_lua_table<'gc, 'a>(ctx: Context<'gc>) -> Callback<'gc> { Callback::from_fn(&ctx, move |ctx, _ex, mut stack| { let match_table: UserData = stack.consume(ctx)?; stack.push_back( match_table .downcast:: GcRefLock<'gcb, MatchSubTable>]>()? .borrow_mut(&ctx) .to_value(ctx)?, ); Ok(piccolo::CallbackReturn::Return) }) } pub fn match_table_try_run_sub<'gc, 'a>( ctx: Context<'gc>, global_memo: &'a GlobalMemoCell, ) -> Callback<'gc> { let global_memo = global_memo.clone(); Callback::from_fn(&ctx, move |ctx, _ex, mut stack| { let (match_table, sub, frame): (UserData, piccolo::String, Value) = stack.consume(ctx)?; let frame = try_unwrap_frame(ctx, &frame)?; let cmds = match_table .downcast:: GcRefLock<'gcb, MatchSubTable>]>()? .borrow() .try_sub(sub.to_str()?); match cmds { None => stack.push_back(false.into_value(ctx)), Some(cmds) => { let mut cq = global_memo.command_queue.borrow_mut(); for cmd in cmds.into_iter().rev() { cq.push_front((frame.clone(), cmd)); } stack.push_back(Value::Boolean(true)) } } Ok(piccolo::CallbackReturn::Return) }) } #[cfg(test)] mod tests { use super::*; #[test] fn parsedarg_to_subarg_works() { assert_eq!( parsedarg_to_subarg(ParsedArgument { guard: None, text: "hello world!".to_owned(), quoted_text: "hello world!".to_owned() }) .unwrap(), SubArgument { guard: None, text_parts: vec![SubTextPart::Literal("hello world!".to_owned())] } ); assert_eq!( parsedarg_to_subarg(ParsedArgument { guard: None, text: "hello $adjective ${my world}".to_owned(), quoted_text: "hello $adjective ${my world}".to_owned() }) .unwrap(), SubArgument { guard: None, text_parts: vec![ SubTextPart::Literal("hello ".to_owned()), SubTextPart::Variable("adjective".to_owned()), SubTextPart::Literal(" ".to_owned()), SubTextPart::Variable("my world".to_owned()), ] } ); assert_eq!( parsedarg_to_subarg(ParsedArgument { guard: Some(ArgumentGuard::DoubleQuote), text: "hello $adjective${my world}${your world} end".to_owned(), quoted_text: "hello $adjective$\\{my world\\}$\\{your world\\} end".to_owned() }) .unwrap(), SubArgument { guard: Some(ArgumentGuard::DoubleQuote), text_parts: vec![ SubTextPart::Literal("hello ".to_owned()), SubTextPart::Variable("adjective".to_owned()), SubTextPart::Variable("my world".to_owned()), SubTextPart::Variable("your world".to_owned()), SubTextPart::Literal(" end".to_owned()), ] } ); } #[test] fn parsedarg_rejects_invalid() { assert!(parsedarg_to_subarg(ParsedArgument { guard: None, text: "${untermin".to_owned(), quoted_text: "${untermin".to_owned() }) .is_err()); assert!(parsedarg_to_subarg(ParsedArgument { guard: None, text: "$foo$".to_owned(), quoted_text: "$foo$".to_owned() }) .is_err()); assert!(parsedarg_to_subarg(ParsedArgument { guard: None, text: "$ hello".to_owned(), quoted_text: "$ hello".to_owned() }) .is_err()); assert!(parsedarg_to_subarg(ParsedArgument { guard: None, text: "My name is ${}".to_owned(), quoted_text: "My name is ${}".to_owned() }) .is_err()); } #[test] fn matchsubtable_works() { let mut table: MatchSubTable = Default::default(); table .add_record( "^foo (?[a-z]+) baz", "\\\"Someone is talking $bar about foo baz?;:flexes his ${bar}", ) .expect("adding record failed"); assert_eq!(table.try_sub("unrelated babble"), None); assert_eq!( table.try_sub("foo woots baz\r\n"), Some(vec![ ParsedCommand { arguments: [ ParsedArgument { guard: None, text: "\"Someone".to_owned(), quoted_text: "\\\"Someone".to_owned() }, ParsedArgument { guard: None, text: "is".to_owned(), quoted_text: "is".to_owned() }, ParsedArgument { guard: None, text: "talking".to_owned(), quoted_text: "talking".to_owned() }, ParsedArgument { guard: None, text: "woots".to_owned(), quoted_text: "woots".to_owned() }, ParsedArgument { guard: None, text: "about".to_owned(), quoted_text: "about".to_owned() }, ParsedArgument { guard: None, text: "foo".to_owned(), quoted_text: "foo".to_owned() }, ParsedArgument { guard: None, text: "baz?".to_owned(), quoted_text: "baz?".to_owned() } ] .into() }, ParsedCommand { arguments: [ ParsedArgument { guard: None, text: ":flexes".to_owned(), quoted_text: ":flexes".to_owned() }, ParsedArgument { guard: None, text: "his".to_owned(), quoted_text: "his".to_owned() }, ParsedArgument { guard: None, text: "woots".to_owned(), quoted_text: "woots".to_owned() } ] .into() } ]) ); } #[test] fn matchsubtable_resists_command_injection() { let mut table: MatchSubTable = Default::default(); table .add_record("^foo (.*)", "safe_command $1") .expect("adding record failed"); let result = table .try_sub("foo pwned!};dangerous_command {") .expect("didn't match"); let expected = ParsedCommand { arguments: [ ParsedArgument { guard: None, text: "safe_command".to_owned(), quoted_text: "safe_command".to_owned(), }, ParsedArgument { guard: Some(ArgumentGuard::Paren), text: "pwned!};dangerous_command {".to_owned(), quoted_text: "pwned!\\};dangerous_command \\{".to_owned(), }, ] .into(), }; assert_eq!(result, vec![expected.clone()]); let ser_result = result[0].to_string(); assert_eq!( ser_result, "safe_command {pwned!\\};dangerous_command \\{}".to_owned() ); assert_eq!(parse_commands(&ser_result).commands, vec![expected]); } #[test] fn matchsubtable_rejects_invalid() { let mut table: MatchSubTable = Default::default(); assert!(table .add_record("^foo (?[a-z]+) baz", "$wrong") .is_err()) } }