542 lines
21 KiB
Rust
542 lines
21 KiB
Rust
pub use ansi_macro::ansi;
|
|
use std::rc::Rc;
|
|
|
|
/// Removes all non-printable characters except tabs and newlines.
|
|
/// Doesn't attempt to remove printable characters as part of an
|
|
/// escape - so use this for untrusted input that you don't expect
|
|
/// to contain ansi escapes at all.
|
|
pub fn ignore_special_characters(input: &str) -> String {
|
|
input.chars().filter(|c| *c == '\t' || *c == '\n' ||
|
|
(*c >= ' ' && *c <= '~')).collect()
|
|
}
|
|
|
|
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
|
|
struct AnsiState {
|
|
background: u64, // 0 means default.
|
|
foreground: u64,
|
|
bold: bool,
|
|
underline: bool,
|
|
strike: bool,
|
|
}
|
|
|
|
impl AnsiState {
|
|
fn restore_ansi(self: &Self) -> String {
|
|
let mut buf = String::new();
|
|
if !(self.bold && self.underline && self.strike &&
|
|
self.background != 0 && self.foreground != 0) {
|
|
buf.push_str(ansi!("<reset>"));
|
|
}
|
|
if self.bold { buf.push_str(ansi!("<bold>")); }
|
|
if self.underline { buf.push_str(ansi!("<under>")); }
|
|
if self.strike { buf.push_str(ansi!("<strike>")); }
|
|
if self.background != 0 {
|
|
buf.push_str(&format!("\x1b[{}m", 39 + self.background)); }
|
|
if self.foreground != 0 {
|
|
buf.push_str(&format!("\x1b[{}m", 29 + self.foreground)); }
|
|
buf
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
|
|
struct AnsiEvent<'l> (
|
|
AnsiParseToken<'l>,
|
|
Rc<AnsiState>
|
|
);
|
|
|
|
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
|
|
enum AnsiParseToken<'l> {
|
|
Character(char),
|
|
ControlSeq(&'l str),
|
|
Newline,
|
|
}
|
|
|
|
/// Emits events with only LF, spaces, tabs, and a small set of
|
|
/// character attributes (colours, bold, underline). Anything else
|
|
/// sent will be emitted as printable characters. Tabs are replaced
|
|
/// with 4 spaces.
|
|
#[derive(Clone, Debug)]
|
|
struct AnsiIterator<'l> {
|
|
underlying: std::iter::Enumerate<std::str::Chars<'l>>,
|
|
input: &'l str,
|
|
state: Rc<AnsiState>,
|
|
pending_col: bool,
|
|
inject_spaces: u64,
|
|
}
|
|
|
|
|
|
impl AnsiIterator<'_> {
|
|
fn new<'l>(input: &'l str) -> AnsiIterator<'l> {
|
|
AnsiIterator { underlying: input.chars().enumerate(),
|
|
input: input,
|
|
state: Rc::new(AnsiState {
|
|
background: 0,
|
|
foreground: 0,
|
|
bold: false,
|
|
underline: false,
|
|
strike: false
|
|
}),
|
|
pending_col: false,
|
|
inject_spaces: 0
|
|
}
|
|
}
|
|
}
|
|
|
|
impl <'l>Iterator for AnsiIterator<'l> {
|
|
type Item = AnsiEvent<'l>;
|
|
|
|
fn next(self: &mut Self) -> Option<AnsiEvent<'l>> {
|
|
if self.pending_col {
|
|
self.pending_col = false;
|
|
}
|
|
if self.inject_spaces > 0 {
|
|
self.pending_col = true;
|
|
self.inject_spaces -= 1;
|
|
return Some(AnsiEvent::<'l>(AnsiParseToken::Character(' '), self.state.clone()));
|
|
}
|
|
while let Some((i0, c)) = self.underlying.next() {
|
|
if c == '\n' {
|
|
return Some(AnsiEvent::<'l>(AnsiParseToken::Newline, self.state.clone()));
|
|
} else if c == '\t' {
|
|
for _ in 0..4 {
|
|
self.pending_col = true;
|
|
self.inject_spaces = 3;
|
|
return Some(AnsiEvent::<'l>(AnsiParseToken::Character(' '), self.state.clone()));
|
|
}
|
|
} else if c >= ' ' && c <= '~' {
|
|
self.pending_col = true;
|
|
return Some(AnsiEvent::<'l>(AnsiParseToken::Character(c), self.state.clone()));
|
|
} else if c == '\x1b' {
|
|
if let Some((_, c2)) = self.underlying.next() {
|
|
if c2 != '[' {
|
|
continue;
|
|
}
|
|
}
|
|
if let Some((_, cs1)) = self.underlying.next() {
|
|
let mut imax = i0;
|
|
let mut cs_no: i64 = cs1 as i64 - b'0' as i64;
|
|
if cs_no < 0 || cs_no > 9 {
|
|
continue;
|
|
}
|
|
if let Some((i2, cs2)) = self.underlying.next() {
|
|
let cs_no2: i64 = cs2 as i64 - b'0' as i64;
|
|
if cs_no2 >= 0 && cs_no2 <= 9 {
|
|
if let Some((i3, cs3)) = self.underlying.next() {
|
|
if cs3 == 'm' {
|
|
cs_no *= 10;
|
|
cs_no += cs_no2;
|
|
imax = i3;
|
|
} else { continue; }
|
|
}
|
|
} else if cs2 != 'm' {
|
|
continue;
|
|
} else {
|
|
imax = i2;
|
|
}
|
|
let st = Rc::make_mut(&mut self.state);
|
|
match cs_no {
|
|
0 => {
|
|
st.background = 0;
|
|
st.foreground = 0;
|
|
st.bold = false;
|
|
st.underline = false;
|
|
st.strike = false;
|
|
}
|
|
1 => { st.bold = true; }
|
|
4 => { st.underline = true; }
|
|
9 => { st.strike = true; }
|
|
24 => { st.underline = false; }
|
|
i if i >= 30 && i <= 37 => {
|
|
st.foreground = i as u64 - 29;
|
|
}
|
|
i if i >= 40 && i <= 47 => {
|
|
st.foreground = i as u64 - 39;
|
|
}
|
|
_ => continue
|
|
}
|
|
drop(st);
|
|
return Some(AnsiEvent::<'l>(
|
|
AnsiParseToken::ControlSeq(
|
|
&self.input[i0..(imax + 1)]
|
|
), self.state.clone()));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
}
|
|
|
|
/// Strips out basic colours / character formatting codes cleanly. Tabs are
|
|
/// changed to spaces, and newlines are preserved. All other ANSI non-printables
|
|
/// are stripped but might display incorrectly.
|
|
pub fn strip_special_characters(input: &str) -> String {
|
|
let mut buf: String = String::new();
|
|
let it = AnsiIterator::new(input);
|
|
for AnsiEvent(e, _) in it {
|
|
match e {
|
|
AnsiParseToken::Character(c) => buf.push(c),
|
|
AnsiParseToken::Newline => buf.push('\n'),
|
|
_ => {}
|
|
}
|
|
}
|
|
buf
|
|
}
|
|
|
|
/// Allows basic colours / character formatting codes. Tabs are
|
|
/// changed to spaces, and newlines are preserved. All other ANSI non-printables
|
|
/// are stripped but might display incorrectly.
|
|
pub fn limit_special_characters(input: &str) -> String {
|
|
let mut buf: String = String::new();
|
|
let it = AnsiIterator::new(input);
|
|
for AnsiEvent(e, _) in it {
|
|
match e {
|
|
AnsiParseToken::Character(c) => buf.push(c),
|
|
AnsiParseToken::Newline => buf.push('\n'),
|
|
AnsiParseToken::ControlSeq(t) => buf.push_str(t)
|
|
}
|
|
}
|
|
buf
|
|
}
|
|
|
|
/// Flows a second column around a first column, limiting the width of both
|
|
/// columns as specified, and adding a gutter.
|
|
pub fn flow_around(col1: &str, col1_width: usize, gutter: &str,
|
|
col2: &str, col2_width: usize) -> String {
|
|
let mut it1 = AnsiIterator::new(col1).peekable();
|
|
let mut it2 = AnsiIterator::new(col2).peekable();
|
|
|
|
let mut buf = String::new();
|
|
|
|
// Phase 1: col1 still has data, so flow col2 around col1.
|
|
'around_rows: loop {
|
|
match it1.peek() {
|
|
None => break 'around_rows,
|
|
Some(AnsiEvent(_, st)) => buf.push_str(&st.restore_ansi())
|
|
}
|
|
let mut fill_needed: usize = 0;
|
|
let mut skip_nl = true;
|
|
'col_data: for i in 0..col1_width {
|
|
'until_move_forward: loop {
|
|
match it1.next() {
|
|
None | Some(AnsiEvent(AnsiParseToken::Newline, _)) => {
|
|
fill_needed = col1_width - i;
|
|
skip_nl = false;
|
|
break 'col_data;
|
|
}
|
|
Some(AnsiEvent(AnsiParseToken::Character(c), _)) => {
|
|
buf.push(c);
|
|
break 'until_move_forward;
|
|
}
|
|
Some(AnsiEvent(AnsiParseToken::ControlSeq(s), _)) => {
|
|
buf.push_str(s);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// If there is a newline (optionally preceded by 1+ control characters),
|
|
// and we didn't just read one, we should skip it, since we broke to a
|
|
// new line anyway. It is safe to eat any control characters since we will
|
|
// restore_ansi() anyway.
|
|
if skip_nl {
|
|
loop {
|
|
match it1.peek() {
|
|
None => break,
|
|
Some(AnsiEvent(AnsiParseToken::Character(_), _)) => break,
|
|
Some(AnsiEvent(AnsiParseToken::ControlSeq(s), _)) => {
|
|
if fill_needed > 0 { buf.push_str(s); }
|
|
it1.next();
|
|
}
|
|
Some(AnsiEvent(AnsiParseToken::Newline, _)) => {
|
|
it1.next();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
for _ in 0..fill_needed { buf.push(' '); }
|
|
|
|
buf.push_str(gutter);
|
|
|
|
if let Some(AnsiEvent(_, st)) = it2.peek() {
|
|
buf.push_str(&st.restore_ansi())
|
|
}
|
|
skip_nl = true;
|
|
'col_data: for _ in 0..col2_width {
|
|
'until_move_forward: loop {
|
|
match it2.next() {
|
|
None | Some(AnsiEvent(AnsiParseToken::Newline, _)) => {
|
|
skip_nl = false;
|
|
break 'col_data;
|
|
}
|
|
Some(AnsiEvent(AnsiParseToken::Character(c), _)) => {
|
|
buf.push(c);
|
|
break 'until_move_forward;
|
|
}
|
|
Some(AnsiEvent(AnsiParseToken::ControlSeq(s), _)) => {
|
|
buf.push_str(s);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if skip_nl {
|
|
loop {
|
|
match it2.peek() {
|
|
None => break,
|
|
Some(AnsiEvent(AnsiParseToken::Character(_), _)) => break,
|
|
Some(AnsiEvent(AnsiParseToken::ControlSeq(s), _)) => {
|
|
if fill_needed > 0 { buf.push_str(s); }
|
|
it2.next();
|
|
}
|
|
Some(AnsiEvent(AnsiParseToken::Newline, _)) => {
|
|
it2.next();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
buf.push('\n');
|
|
}
|
|
|
|
// Now just copy anything left in it2 over.
|
|
for AnsiEvent(e, _) in it2 {
|
|
match e {
|
|
AnsiParseToken::Character(c) => buf.push(c),
|
|
AnsiParseToken::Newline => buf.push('\n'),
|
|
AnsiParseToken::ControlSeq(t) => buf.push_str(t)
|
|
}
|
|
}
|
|
|
|
buf
|
|
}
|
|
|
|
fn is_wrappable(c: char) -> bool {
|
|
c == ' ' || c == '-'
|
|
}
|
|
|
|
pub fn word_wrap<F>(input: &str, limit: F) -> String
|
|
where F: Fn(usize) -> usize {
|
|
let mut it_main = AnsiIterator::new(input);
|
|
let mut start_word = true;
|
|
let mut row: usize = 0;
|
|
let mut col: usize = 0;
|
|
let mut buf: String = String::new();
|
|
|
|
loop {
|
|
let ev = it_main.next();
|
|
match ev {
|
|
None => break,
|
|
Some(AnsiEvent(AnsiParseToken::Character(c), _)) => {
|
|
col += 1;
|
|
if is_wrappable(c) {
|
|
start_word = true;
|
|
if col < limit(row) || (col == limit(row) && c != ' ') {
|
|
buf.push(c);
|
|
}
|
|
if col == limit(row) {
|
|
let mut it_lookahead = it_main.clone();
|
|
let fits = 'check_fits: loop {
|
|
match it_lookahead.next() {
|
|
None => break 'check_fits true,
|
|
Some(AnsiEvent(AnsiParseToken::Newline, _)) => break 'check_fits true,
|
|
Some(AnsiEvent(AnsiParseToken::Character(c), _)) =>
|
|
break 'check_fits is_wrappable(c),
|
|
_ => {}
|
|
}
|
|
};
|
|
if !fits {
|
|
buf.push('\n');
|
|
row += 1;
|
|
col = 0;
|
|
}
|
|
} else if col > limit(row) {
|
|
buf.push('\n');
|
|
row += 1;
|
|
if c == ' ' {
|
|
col = 0;
|
|
} else {
|
|
buf.push(c);
|
|
col = 1;
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
assert!(col <= limit(row),
|
|
"col must be below limit, but found c={}, col={}, limit={}",
|
|
c, col, limit(row));
|
|
if !start_word {
|
|
if col == limit(row) {
|
|
// We are about to hit the limit, and we need to decide
|
|
// if we save it for a hyphen or just push the char.
|
|
let mut it_lookahead = it_main.clone();
|
|
let fits = 'check_fits: loop {
|
|
match it_lookahead.next() {
|
|
None => break 'check_fits true,
|
|
Some(AnsiEvent(AnsiParseToken::Newline, _)) => break 'check_fits true,
|
|
Some(AnsiEvent(AnsiParseToken::Character(c), _)) =>
|
|
break 'check_fits is_wrappable(c),
|
|
_ => {}
|
|
}
|
|
};
|
|
if fits {
|
|
buf.push(c);
|
|
} else {
|
|
buf.push('-');
|
|
buf.push('\n');
|
|
row += 1;
|
|
col = 1;
|
|
buf.push(c);
|
|
}
|
|
continue;
|
|
}
|
|
buf.push(c);
|
|
continue;
|
|
}
|
|
start_word = false;
|
|
// We are about to start a word. Do we start the word, wrap, or
|
|
// hyphenate?
|
|
let it_lookahead = it_main.clone();
|
|
let mut wordlen = 0;
|
|
'lookahead: for AnsiEvent(e, _) in it_lookahead {
|
|
match e {
|
|
AnsiParseToken::ControlSeq(_) => {}
|
|
AnsiParseToken::Character(c) if !is_wrappable(c) => {
|
|
wordlen += 1;
|
|
}
|
|
AnsiParseToken::Character(c) if c == '-' => {
|
|
// Hyphens are special. The hyphen has to fit before
|
|
// we break the word.
|
|
wordlen += 1;
|
|
break 'lookahead;
|
|
}
|
|
_ => break 'lookahead,
|
|
}
|
|
}
|
|
// Note we already increased col.
|
|
if wordlen < limit(row) + 1 - col || (wordlen > limit(row) && col != limit(row)) {
|
|
buf.push(c);
|
|
continue;
|
|
}
|
|
// So we can't hyphenate or fit it, let's break now.
|
|
buf.push('\n');
|
|
row += 1;
|
|
col = 1;
|
|
buf.push(c);
|
|
}
|
|
Some(AnsiEvent(AnsiParseToken::Newline, _)) => {
|
|
col = 0;
|
|
row += 1;
|
|
buf.push('\n');
|
|
start_word = true;
|
|
}
|
|
Some(AnsiEvent(AnsiParseToken::ControlSeq(t), _)) => buf.push_str(t)
|
|
}
|
|
}
|
|
|
|
buf
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn ignore_special_characters_removes_esc() {
|
|
assert_eq!(ignore_special_characters("hello\x1b[world"), "hello[world");
|
|
}
|
|
|
|
#[test]
|
|
fn strip_special_characters_makes_plaintext() {
|
|
assert_eq!(strip_special_characters("a\tb"), "a b");
|
|
assert_eq!(
|
|
strip_special_characters(ansi!("<red>hello<green>world")),
|
|
"helloworld");
|
|
assert_eq!(
|
|
strip_special_characters("hello\r\x07world\n"),
|
|
"helloworld\n");
|
|
assert_eq!(
|
|
strip_special_characters("hello\r\x07world\n"),
|
|
"helloworld\n");
|
|
assert_eq!(
|
|
strip_special_characters("Test\x1b[5;5fing"),
|
|
"Test5fing");
|
|
}
|
|
|
|
#[test]
|
|
fn limit_special_characters_strips_some_things() {
|
|
assert_eq!(limit_special_characters(ansi!("a<bgred><green>b<bggreen><red>c<reset>d")),
|
|
ansi!("a<bgred><green>b<bggreen><red>c<reset>d"));
|
|
assert_eq!(limit_special_characters("Test\x1b[5;5fing"),
|
|
"Test5fing");
|
|
}
|
|
|
|
#[test]
|
|
fn flow_around_works_for_plain_text() {
|
|
let str1 = " /\\ /\\\n\
|
|
/--------\\\n\
|
|
| () () |\n\
|
|
| |\n\
|
|
| /\\ |\n\
|
|
| \\ / |\n\
|
|
| -(--)- |\n\
|
|
| / \\ |\n\
|
|
\\--------/\n\
|
|
A very poor rendition of a cat! Meow.";
|
|
let str2 = "Hello world, this is the second column for this test. It starts with a rather long line that will wrap.\n\
|
|
And here is a shorter line.\n\
|
|
All of this should by nicely wrapped, even if it is exactly the len\n\
|
|
gth of column 2!\n\
|
|
\n\
|
|
But double newlines should come up as blank lines.\n\
|
|
Blah\n\
|
|
Blah\n\
|
|
Blah\n\
|
|
Blah\n\
|
|
Blah\n\
|
|
Blah\n\
|
|
Blah\n\
|
|
And once we get to the bottom of column 1, column 2 should just get written\n\
|
|
out normally, not in the previous column.";
|
|
// This has a lot of unnecessary resets, but that is expected with the algorithm right now.
|
|
let expected = "\u{1b}[0m /\\ /\\ | \u{1b}[0mHello world, this is the second column for this test. It starts wit\n\u{1b}[0m/--------\\ | \u{1b}[0mh a rather long line that will wrap.\n\u{1b}[0m| () () | | \u{1b}[0mAnd here is a shorter line.\n\u{1b}[0m| | | \u{1b}[0mAll of this should by nicely wrapped, even if it is exactly the len\n\u{1b}[0m| /\\ | | \u{1b}[0mgth of column 2!\n\u{1b}[0m| \\ / | | \u{1b}[0m\n\u{1b}[0m| -(--)- | | \u{1b}[0mBut double newlines should come up as blank lines.\n\u{1b}[0m| / \\ | | \u{1b}[0mBlah\n\u{1b}[0m\\--------/ | \u{1b}[0mBlah\n\u{1b}[0mA very poo | \u{1b}[0mBlah\n\u{1b}[0mr renditio | \u{1b}[0mBlah\n\u{1b}[0mn of a cat | \u{1b}[0mBlah\n\u{1b}[0m! Meow. | \u{1b}[0mBlah\nBlah\nAnd once we get to the bottom of column 1, column 2 should just get written\nout normally, not in the previous column.";
|
|
assert_eq!(flow_around(str1, 10, " | ", str2, 67), expected);
|
|
}
|
|
|
|
#[test]
|
|
fn word_wrap_works_on_long_text() {
|
|
let unwrapped = "Hello, this is a very long passage of text that needs to be wrapped. Some words are superduperlong! There are some new\nlines in it though!\nLet's try manuallya-hyphenating.\nManually-hyphenating\nOneverylongrunonwordthatjustkeepsgoing.\n - -- --- - -- - - - -testing";
|
|
let wrapped = "Hello, \n\
|
|
this is a\n\
|
|
very long\n\
|
|
passage of\n\
|
|
text that\n\
|
|
needs to \n\
|
|
be \n\
|
|
wrapped. \n\
|
|
Some words\n\
|
|
are super-\n\
|
|
duperlong!\n\
|
|
There are\n\
|
|
some new\n\
|
|
lines in \n\
|
|
it though!\n\
|
|
Let's try\n\
|
|
manuallya-\n\
|
|
hyphenati-\n\
|
|
ng.\n\
|
|
Manually-\n\
|
|
hyphenati-\n\
|
|
ng\n\
|
|
Oneverylo-\n\
|
|
ngrunonwo-\n\
|
|
rdthatjus-\n\
|
|
tkeepsgoi-\n\
|
|
ng.\n \
|
|
- -- ---\n\
|
|
- -- - -\n\
|
|
- -testing";
|
|
assert_eq!(word_wrap(unwrapped, |_| 10), wrapped);
|
|
}
|
|
|
|
}
|