From b1bf0f317a6238dd98718f45d0531e81a8f5b553 Mon Sep 17 00:00:00 2001 From: Condorra Date: Mon, 23 Sep 2024 22:38:51 +1000 Subject: [PATCH] Implement a match table ready for aliases and triggers. --- Cargo.lock | 39 ++++ Cargo.toml | 1 + src/lua_engine/frames.rs | 48 ++++- src/main.rs | 1 + src/match_table.rs | 401 +++++++++++++++++++++++++++++++++++++++ src/parsing.rs | 14 ++ 6 files changed, 503 insertions(+), 1 deletion(-) create mode 100644 src/match_table.rs diff --git a/Cargo.lock b/Cargo.lock index 16d442c..466e860 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -30,6 +30,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "allocator-api2" version = "0.2.18" @@ -992,6 +1001,35 @@ dependencies = [ "rand_core", ] +[[package]] +name = "regex" +version = "1.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + [[package]] name = "rustc-demangle" version = "0.1.24" @@ -1355,6 +1393,7 @@ dependencies = [ "minicrossterm", "nom", "piccolo", + "regex", "serde", "serde_json", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index 35684d7..2b6cde8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,3 +22,4 @@ anyhow = "1.0.86" serde = "1.0.209" serde_json = "1.0.127" gc-arena = { git = "https://github.com/kyren/gc-arena.git", rev = "5a7534b883b703f23cfb8c3cfdf033460aa77ea9" } +regex = "1.10.6" diff --git a/src/lua_engine/frames.rs b/src/lua_engine/frames.rs index be8f9e8..0d8c70d 100644 --- a/src/lua_engine/frames.rs +++ b/src/lua_engine/frames.rs @@ -7,6 +7,7 @@ use piccolo::{ self, async_sequence, Callback, CallbackReturn, Context, FromValue, Function, IntoValue, SequenceReturn, Table, UserData, Value, Variadic, }; +use regex::Regex; use std::{rc::Rc, str}; use yew::UseStateSetter; @@ -17,7 +18,35 @@ pub fn alias<'gc, 'a>( _global_memo: &'a GlobalMemoCell, _global_layout: &'a UseStateSetter, ) -> Callback<'gc> { - Callback::from_fn(&ctx, move |_ctx, _ex, _stack| { + Callback::from_fn(&ctx, move |ctx, _ex, mut stack| { + let info: Table = ctx.get_global("info")?; + let cur_frame: TermFrame = + try_unwrap_frame(ctx, &info.get(ctx, ctx.intern_static(b"current_frame"))?)?; + let frames: Table = ctx.get_global("frames")?; + let cur_frame: Table = frames.get(ctx, cur_frame.0 as i64)?; + + let alias_match: piccolo::String = piccolo::String::from_value( + ctx, + stack + .pop_front() + .ok_or_else(|| anyhow::Error::msg("Missing alias match"))?, + )?; + let sub_to: piccolo::String = piccolo::String::from_value( + ctx, + stack + .pop_front() + .ok_or_else(|| anyhow::Error::msg("Missing substitution match"))?, + )?; + if !stack.is_empty() { + Err(anyhow::Error::msg( + "Extra arguments to alias command. Try wrapping the action in {}", + ))?; + } + + let aliases: Table = cur_frame.get(ctx, "aliases")?; + + aliases.set(ctx, alias_match, sub_to)?; + Ok(piccolo::CallbackReturn::Return) }) } @@ -222,6 +251,9 @@ pub(super) fn new_frame<'gc>(ctx: Context<'gc>, _global_memo: &GlobalMemoCell) - frame_tab.set(ctx, ctx.intern_static(b"frame"), frame)?; + let aliases_tab: Table = Table::new(&ctx); + frame_tab.set(ctx, ctx.intern_static(b"aliases"), aliases_tab)?; + Ok(piccolo::CallbackReturn::Return) }) } @@ -257,6 +289,20 @@ pub(super) fn frame_input<'gc>(ctx: Context<'gc>, _global_memo: &GlobalMemoCell) .ok_or_else(|| anyhow::Error::msg("classes.frame:new missing line!"))?; stack.consume(ctx)?; + // Check for an alias match... + for (alias_match, alias_sub) in frame_tab.get::<&str, Table>(ctx, "aliases")?.iter() { + if let Some(alias_match) = piccolo::String::from_value(ctx, alias_match) + .ok() + .and_then(|am| am.to_str().ok()) + .and_then(|v| Regex::new(v).ok()) + { + if let Some(alias_sub) = piccolo::String::from_value(ctx, alias_sub) + .ok() + .and_then(|am| am.to_str().ok()) + {} + } + } + let linked_mud = frame_tab.get_value(ctx, ctx.intern_static(b"linked_mud")); if linked_mud.is_nil() { return Ok(piccolo::CallbackReturn::Return); diff --git a/src/main.rs b/src/main.rs index d158dbd..fe41211 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,7 @@ pub mod command_handler; pub mod id_intern; pub mod lineengine; pub mod lua_engine; +pub mod match_table; pub mod parsing; pub mod split_panel; pub mod telnet; diff --git a/src/match_table.rs b/src/match_table.rs new file mode 100644 index 0000000..65db453 --- /dev/null +++ b/src/match_table.rs @@ -0,0 +1,401 @@ +use std::{collections::VecDeque, str::FromStr}; + +use anyhow::bail; +use itertools::Itertools; +use piccolo::{Context, IntoValue, Table, Value}; +use regex::Regex; + +use crate::parsing::{parse_commands, quote_string, ArgumentGuard, ParsedArgument, ParsedCommand}; + +#[derive(Default, Debug)] +pub struct MatchSubTable { + contents: Vec, +} + +impl MatchSubTable { + pub fn to_value<'gc>(&self, ctx: Context<'gc>) -> anyhow::Result> { + let table = Table::new(&ctx); + for record in self.contents.iter() { + table.set( + ctx, + ctx.intern(record.match_text.as_bytes()), + ctx.intern(record.sub_text.as_bytes()), + )?; + } + Ok(table.into_value(ctx)) + } + + pub fn try_sub(&self, input: &str) -> Option> { + for record in self.contents.iter() { + if let Some(matched) = record.match_regex.captures(input) { + let vec = Some( + record + .sub_commands + .iter() + .map(|subcmd| ParsedCommand { + arguments: subcmd + .arguments + .iter() + .map(|subarg| { + let unquoted_text = subarg + .text_parts + .iter() + .map(|tp| match tp { + SubTextPart::Literal(t) => t.as_str(), + SubTextPart::Variable(v) => { + if let Ok(v) = ::from_str(v) { + matched.get(v).map_or("", |v| v.as_str()) + } else { + matched.name(v).map_or("", |v| v.as_str()) + } + } + }) + .join(""); + ParsedArgument { + guard: if subarg.guard.is_none() + && unquoted_text.contains(';') + { + Some(ArgumentGuard::Paren) + } else { + subarg.guard.clone() + }, + text: unquoted_text.clone(), + quoted_text: quote_string(&unquoted_text), + } + }) + .collect(), + }) + .collect(), + ); + return vec; + } + } + None + } + + pub fn add_record(&mut self, match_text: &str, sub_text: &str) -> anyhow::Result<()> { + let rex = Regex::new(match_text)?; + + let parse_result = parse_commands(sub_text); + let sub_commands: Vec = parse_result + .commands + .into_iter() + .map(|cmd| { + Ok(SubCommand { + arguments: cmd + .arguments + .into_iter() + .map(parsedarg_to_subarg) + .collect::>>()?, + }) + }) + .collect::>>()?; + + self.contents.push(MatchRecord { + match_text: match_text.to_owned(), + match_regex: rex, + sub_text: sub_text.to_owned(), + sub_commands, + }); + Ok(()) + } +} + +fn parsedarg_to_subarg(parsedarg: ParsedArgument) -> anyhow::Result { + let mut text_parts: Vec = vec![]; + let mut iter = parsedarg.text.chars().peekable(); + let mut buf = String::new(); + 'outer: loop { + match iter.next() { + None => break 'outer, + Some('$') => match iter.peek() { + None => { + bail!("substitution ends in $ which is invalid.") + } + Some('$') => { + iter.next(); + buf.push('$') + } + Some('{') => { + iter.next(); + if !buf.is_empty() { + text_parts.push(SubTextPart::Literal(buf)); + } + buf = String::new(); + 'inner: loop { + match iter.next() { + None => { + bail!("substitution opened with {{ is never closed.") + } + Some('}') => { + if buf.is_empty() { + bail!("substitution of empty variable name."); + } + text_parts.push(SubTextPart::Variable(buf)); + buf = String::new(); + break 'inner; + } + Some(c) => buf.push(c), + } + } + } + Some(_) => { + if !buf.is_empty() { + text_parts.push(SubTextPart::Literal(buf)); + } + buf = String::new(); + 'inner: loop { + match iter.peek() { + None => { + text_parts.push(SubTextPart::Variable(buf)); + buf = String::new(); + break 'outer; + } + Some(c) if *c == '_' || c.is_ascii_alphanumeric() => { + buf.push(*c); + iter.next(); + } + Some(_) => { + if buf.is_empty() { + bail!("substitution of empty variable name."); + } + text_parts.push(SubTextPart::Variable(buf)); + buf = String::new(); + break 'inner; + } + } + } + } + }, + Some(c) => buf.push(c), + } + } + if !buf.is_empty() { + text_parts.push(SubTextPart::Literal(buf)) + } + Ok(SubArgument { + guard: parsedarg.guard, + text_parts, + }) +} + +#[derive(Debug)] +pub struct MatchRecord { + pub match_text: String, + pub match_regex: Regex, + pub sub_text: String, + // We parse into into commands and arguments upfront, before substitution. + // This reduces the risk of security problems. + pub sub_commands: Vec, +} + +#[derive(Debug)] +pub struct SubCommand { + pub arguments: VecDeque, +} + +#[derive(Debug, PartialEq, Eq)] +pub struct SubArgument { + pub guard: Option, + pub text_parts: Vec, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum SubTextPart { + Literal(String), + Variable(String), +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parsedarg_to_subarg_works() { + assert_eq!( + parsedarg_to_subarg(ParsedArgument { + guard: None, + text: "hello world!".to_owned(), + quoted_text: "hello world!".to_owned() + }) + .unwrap(), + SubArgument { + guard: None, + text_parts: vec![SubTextPart::Literal("hello world!".to_owned())] + } + ); + assert_eq!( + parsedarg_to_subarg(ParsedArgument { + guard: None, + text: "hello $adjective ${my world}".to_owned(), + quoted_text: "hello $adjective ${my world}".to_owned() + }) + .unwrap(), + SubArgument { + guard: None, + text_parts: vec![ + SubTextPart::Literal("hello ".to_owned()), + SubTextPart::Variable("adjective".to_owned()), + SubTextPart::Literal(" ".to_owned()), + SubTextPart::Variable("my world".to_owned()), + ] + } + ); + assert_eq!( + parsedarg_to_subarg(ParsedArgument { + guard: Some(ArgumentGuard::DoubleQuote), + text: "hello $adjective${my world}${your world} end".to_owned(), + quoted_text: "hello $adjective$\\{my world\\}$\\{your world\\} end".to_owned() + }) + .unwrap(), + SubArgument { + guard: Some(ArgumentGuard::DoubleQuote), + text_parts: vec![ + SubTextPart::Literal("hello ".to_owned()), + SubTextPart::Variable("adjective".to_owned()), + SubTextPart::Variable("my world".to_owned()), + SubTextPart::Variable("your world".to_owned()), + SubTextPart::Literal(" end".to_owned()), + ] + } + ); + } + + #[test] + fn parsedarg_rejects_invalid() { + assert!(parsedarg_to_subarg(ParsedArgument { + guard: None, + text: "${untermin".to_owned(), + quoted_text: "${untermin".to_owned() + }) + .is_err()); + assert!(parsedarg_to_subarg(ParsedArgument { + guard: None, + text: "$foo$".to_owned(), + quoted_text: "$foo$".to_owned() + }) + .is_err()); + assert!(parsedarg_to_subarg(ParsedArgument { + guard: None, + text: "$ hello".to_owned(), + quoted_text: "$ hello".to_owned() + }) + .is_err()); + assert!(parsedarg_to_subarg(ParsedArgument { + guard: None, + text: "My name is ${}".to_owned(), + quoted_text: "My name is ${}".to_owned() + }) + .is_err()); + } + + #[test] + fn matchsubtable_works() { + let mut table: MatchSubTable = Default::default(); + table + .add_record( + "^foo (?[a-z]+) baz", + "\\\"Someone is talking $bar about foo baz?;:flexes his ${bar}", + ) + .expect("adding record failed"); + assert_eq!(table.try_sub("unrelated babble"), None); + assert_eq!( + table.try_sub("foo woots baz\r\n"), + Some(vec![ + ParsedCommand { + arguments: [ + ParsedArgument { + guard: None, + text: "\"Someone".to_owned(), + quoted_text: "\\\"Someone".to_owned() + }, + ParsedArgument { + guard: None, + text: "is".to_owned(), + quoted_text: "is".to_owned() + }, + ParsedArgument { + guard: None, + text: "talking".to_owned(), + quoted_text: "talking".to_owned() + }, + ParsedArgument { + guard: None, + text: "woots".to_owned(), + quoted_text: "woots".to_owned() + }, + ParsedArgument { + guard: None, + text: "about".to_owned(), + quoted_text: "about".to_owned() + }, + ParsedArgument { + guard: None, + text: "foo".to_owned(), + quoted_text: "foo".to_owned() + }, + ParsedArgument { + guard: None, + text: "baz?".to_owned(), + quoted_text: "baz?".to_owned() + } + ] + .into() + }, + ParsedCommand { + arguments: [ + ParsedArgument { + guard: None, + text: ":flexes".to_owned(), + quoted_text: ":flexes".to_owned() + }, + ParsedArgument { + guard: None, + text: "his".to_owned(), + quoted_text: "his".to_owned() + }, + ParsedArgument { + guard: None, + text: "woots".to_owned(), + quoted_text: "woots".to_owned() + } + ] + .into() + } + ]) + ); + } + + #[test] + fn matchsubtable_resists_command_injection() { + let mut table: MatchSubTable = Default::default(); + table + .add_record("^foo (.*)", "safe_command $1") + .expect("adding record failed"); + let result = table + .try_sub("foo pwned!};dangerous_command {") + .expect("didn't match"); + let expected = ParsedCommand { + arguments: [ + ParsedArgument { + guard: None, + text: "safe_command".to_owned(), + quoted_text: "safe_command".to_owned(), + }, + ParsedArgument { + guard: Some(ArgumentGuard::Paren), + text: "pwned!};dangerous_command {".to_owned(), + quoted_text: "pwned!\\};dangerous_command \\{".to_owned(), + }, + ] + .into(), + }; + assert_eq!(result, vec![expected.clone()]); + let ser_result = result[0].to_string(); + assert_eq!( + ser_result, + "safe_command {pwned!\\};dangerous_command \\{}".to_owned() + ); + assert_eq!(parse_commands(&ser_result).commands, vec![expected]); + } +} diff --git a/src/parsing.rs b/src/parsing.rs index a50b229..905d3f1 100644 --- a/src/parsing.rs +++ b/src/parsing.rs @@ -140,6 +140,20 @@ fn unquote_string(input: &str) -> String { } } +pub fn quote_string(input: &str) -> String { + let mut buf: String = String::new(); + for c in input.chars() { + match c { + '\\' => buf.push_str("\\\\"), + '{' => buf.push_str("\\{"), + '}' => buf.push_str("\\}"), + '"' => buf.push_str("\\\""), + c => buf.push(c), + } + } + buf +} + fn parse_string(input: &str) -> IResult<&str, ()> { value( (),