use std::{ fmt::{Display, Formatter}, iter::Peekable, str::CharIndices, }; // Wrapper to avoid unnecessary branching when input doesn't have ANSI escape sequences. pub struct AnsiStyle { attributes: Option, } impl AnsiStyle { pub fn new() -> Self { AnsiStyle { attributes: None } } pub fn update(&mut self, sequence: EscapeSequence) -> bool { match &mut self.attributes { Some(a) => a.update(sequence), None => { self.attributes = Some(Attributes::new()); self.attributes.as_mut().unwrap().update(sequence) } } } pub fn to_reset_sequence(&self) -> String { match self.attributes { Some(ref a) => a.to_reset_sequence(), None => String::new(), } } } impl Display for AnsiStyle { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self.attributes { Some(ref a) => a.fmt(f), None => Ok(()), } } } struct Attributes { has_sgr_sequences: bool, foreground: String, background: String, underlined: String, /// The character set to use. /// REGEX: `\^[()][AB0-3]` charset: String, /// A buffer for unknown sequences. unknown_buffer: String, /// ON: ^[1m /// OFF: ^[22m bold: String, /// ON: ^[2m /// OFF: ^[22m dim: String, /// ON: ^[4m /// OFF: ^[24m underline: String, /// ON: ^[3m /// OFF: ^[23m italic: String, /// ON: ^[9m /// OFF: ^[29m strike: String, /// The hyperlink sequence. /// FORMAT: \x1B]8;{ID};{URL}\e\\ /// /// `\e\\` may be replaced with BEL `\x07`. /// Setting both {ID} and {URL} to an empty string represents no hyperlink. hyperlink: String, } impl Attributes { pub fn new() -> Self { Attributes { has_sgr_sequences: false, foreground: "".to_owned(), background: "".to_owned(), underlined: "".to_owned(), charset: "".to_owned(), unknown_buffer: "".to_owned(), bold: "".to_owned(), dim: "".to_owned(), underline: "".to_owned(), italic: "".to_owned(), strike: "".to_owned(), hyperlink: "".to_owned(), } } /// Update the attributes with an escape sequence. /// Returns `false` if the sequence is unsupported. pub fn update(&mut self, sequence: EscapeSequence) -> bool { use EscapeSequence::*; match sequence { Text(_) => return false, Unknown(_) => { /* defer to update_with_unsupported */ } OSC { raw_sequence, command, .. } => { if command.starts_with("8;") { return self.update_with_hyperlink(raw_sequence); } /* defer to update_with_unsupported */ } CSI { final_byte, parameters, .. } => { match final_byte { "m" => return self.update_with_sgr(parameters), _ => { // NOTE(eth-p): We might want to ignore these, since they involve cursor or buffer manipulation. /* defer to update_with_unsupported */ } } } NF { nf_sequence, .. } => { let mut iter = nf_sequence.chars(); match iter.next() { Some('(') => return self.update_with_charset('(', iter), Some(')') => return self.update_with_charset(')', iter), _ => { /* defer to update_with_unsupported */ } } } } self.update_with_unsupported(sequence.raw()) } fn sgr_reset(&mut self) { self.has_sgr_sequences = false; self.foreground.clear(); self.background.clear(); self.underlined.clear(); self.bold.clear(); self.dim.clear(); self.underline.clear(); self.italic.clear(); self.strike.clear(); } fn update_with_sgr(&mut self, parameters: &str) -> bool { let mut iter = parameters .split(';') .map(|p| if p.is_empty() { "0" } else { p }) .map(|p| p.parse::()) .map(|p| p.unwrap_or(0)); // Treat errors as 0. self.has_sgr_sequences = true; while let Some(p) = iter.next() { match p { 0 => self.sgr_reset(), 1 => self.bold = "\x1B[1m".to_owned(), 2 => self.dim = "\x1B[2m".to_owned(), 3 => self.italic = "\x1B[3m".to_owned(), 4 => self.underline = "\x1B[4m".to_owned(), 23 => self.italic.clear(), 24 => self.underline.clear(), 22 => { self.bold.clear(); self.dim.clear(); } 30..=39 => self.foreground = Self::parse_color(p, &mut iter), 40..=49 => self.background = Self::parse_color(p, &mut iter), 58..=59 => self.underlined = Self::parse_color(p, &mut iter), 90..=97 => self.foreground = Self::parse_color(p, &mut iter), 100..=107 => self.background = Self::parse_color(p, &mut iter), _ => { // Unsupported SGR sequence. // Be compatible and pretend one just wasn't was provided. } } } true } fn update_with_unsupported(&mut self, sequence: &str) -> bool { self.unknown_buffer.push_str(sequence); false } fn update_with_hyperlink(&mut self, sequence: &str) -> bool { if sequence == "8;;" { // Empty hyperlink ID and HREF -> end of hyperlink. self.hyperlink.clear(); } else { self.hyperlink.clear(); self.hyperlink.push_str(sequence); } true } fn update_with_charset(&mut self, kind: char, set: impl Iterator) -> bool { self.charset = format!("\x1B{}{}", kind, set.take(1).collect::()); true } fn parse_color(color: u16, parameters: &mut dyn Iterator) -> String { match color % 10 { 8 => match parameters.next() { Some(5) /* 256-color */ => format!("\x1B[{};5;{}m", color, join(";", 1, parameters)), Some(2) /* 24-bit color */ => format!("\x1B[{};2;{}m", color, join(";", 3, parameters)), Some(c) => format!("\x1B[{color};{c}m"), _ => "".to_owned(), }, 9 => "".to_owned(), _ => format!("\x1B[{color}m"), } } /// Gets an ANSI escape sequence to reset all the known attributes. pub fn to_reset_sequence(&self) -> String { let mut buf = String::with_capacity(17); // TODO: Enable me in a later pull request. // if self.has_sgr_sequences { // buf.push_str("\x1B[m"); // } if !self.hyperlink.is_empty() { buf.push_str("\x1B]8;;\x1B\\"); // Disable hyperlink. } // TODO: Enable me in a later pull request. // if !self.charset.is_empty() { // // https://espterm.github.io/docs/VT100%20escape%20codes.html // buf.push_str("\x1B(B\x1B)B"); // setusg0 and setusg1 // } buf } } impl Display for Attributes { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!( f, "{}{}{}{}{}{}{}{}{}{}", self.foreground, self.background, self.underlined, self.charset, self.bold, self.dim, self.underline, self.italic, self.strike, self.hyperlink, ) } } fn join( delimiter: &str, limit: usize, iterator: &mut dyn Iterator, ) -> String { iterator .take(limit) .map(|i| i.to_string()) .collect::>() .join(delimiter) } /// A range of indices for a raw ANSI escape sequence. #[derive(Debug, PartialEq)] enum EscapeSequenceOffsets { Text { start: usize, end: usize, }, Unknown { start: usize, end: usize, }, #[allow(clippy::upper_case_acronyms)] NF { // https://en.wikipedia.org/wiki/ANSI_escape_code#nF_Escape_sequences start_sequence: usize, start: usize, end: usize, }, #[allow(clippy::upper_case_acronyms)] OSC { // https://en.wikipedia.org/wiki/ANSI_escape_code#OSC_(Operating_System_Command)_sequences start_sequence: usize, start_command: usize, start_terminator: usize, end: usize, }, #[allow(clippy::upper_case_acronyms)] CSI { // https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_(Control_Sequence_Introducer)_sequences start_sequence: usize, start_parameters: usize, start_intermediates: usize, start_final_byte: usize, end: usize, }, } /// An iterator over the offests of ANSI/VT escape sequences within a string. /// /// ## Example /// /// ```ignore /// let iter = EscapeSequenceOffsetsIterator::new("\x1B[33mThis is yellow text.\x1B[m"); /// ``` struct EscapeSequenceOffsetsIterator<'a> { text: &'a str, chars: Peekable>, } impl<'a> EscapeSequenceOffsetsIterator<'a> { pub fn new(text: &'a str) -> EscapeSequenceOffsetsIterator<'a> { return EscapeSequenceOffsetsIterator { text, chars: text.char_indices().peekable(), }; } /// Takes values from the iterator while the predicate returns true. /// If the predicate returns false, that value is left. fn chars_take_while(&mut self, pred: impl Fn(char) -> bool) -> Option<(usize, usize)> { self.chars.peek()?; let start = self.chars.peek().unwrap().0; let mut end: usize = start; while let Some((i, c)) = self.chars.peek() { if !pred(*c) { break; } end = *i + c.len_utf8(); self.chars.next(); } Some((start, end)) } fn next_text(&mut self) -> Option { self.chars_take_while(|c| c != '\x1B') .map(|(start, end)| EscapeSequenceOffsets::Text { start, end }) } fn next_sequence(&mut self) -> Option { let (start_sequence, c) = self.chars.next().expect("to not be finished"); match self.chars.peek() { None => Some(EscapeSequenceOffsets::Unknown { start: start_sequence, end: start_sequence + c.len_utf8(), }), Some((_, ']')) => self.next_osc(start_sequence), Some((_, '[')) => self.next_csi(start_sequence), Some((i, c)) => match c { '\x20'..='\x2F' => self.next_nf(start_sequence), c => Some(EscapeSequenceOffsets::Unknown { start: start_sequence, end: i + c.len_utf8(), }), }, } } fn next_osc(&mut self, start_sequence: usize) -> Option { let (osc_open_index, osc_open_char) = self.chars.next().expect("to not be finished"); debug_assert_eq!(osc_open_char, ']'); let mut start_terminator: usize; let mut end_sequence: usize; loop { match self.chars_take_while(|c| !matches!(c, '\x07' | '\x1B')) { None => { start_terminator = self.text.len(); end_sequence = start_terminator; break; } Some((_, end)) => { start_terminator = end; end_sequence = end; } } match self.chars.next() { Some((ti, '\x07')) => { end_sequence = ti + '\x07'.len_utf8(); break; } Some((ti, '\x1B')) => { match self.chars.next() { Some((i, '\\')) => { end_sequence = i + '\\'.len_utf8(); break; } None => { end_sequence = ti + '\x1B'.len_utf8(); break; } _ => { // Repeat, since `\\`(anything) isn't a valid ST. } } } None => { // Prematurely ends. break; } Some((_, tc)) => { panic!("this should not be reached: char {tc:?}") } } } Some(EscapeSequenceOffsets::OSC { start_sequence, start_command: osc_open_index + osc_open_char.len_utf8(), start_terminator, end: end_sequence, }) } fn next_csi(&mut self, start_sequence: usize) -> Option { let (csi_open_index, csi_open_char) = self.chars.next().expect("to not be finished"); debug_assert_eq!(csi_open_char, '['); let start_parameters: usize = csi_open_index + csi_open_char.len_utf8(); // Keep iterating while within the range of `0x30-0x3F`. let mut start_intermediates: usize = start_parameters; if let Some((_, end)) = self.chars_take_while(|c| matches!(c, '\x30'..='\x3F')) { start_intermediates = end; } // Keep iterating while within the range of `0x20-0x2F`. let mut start_final_byte: usize = start_intermediates; if let Some((_, end)) = self.chars_take_while(|c| matches!(c, '\x20'..='\x2F')) { start_final_byte = end; } // Take the last char. let end_of_sequence = match self.chars.next() { None => start_final_byte, Some((i, c)) => i + c.len_utf8(), }; Some(EscapeSequenceOffsets::CSI { start_sequence, start_parameters, start_intermediates, start_final_byte, end: end_of_sequence, }) } fn next_nf(&mut self, start_sequence: usize) -> Option { let (nf_open_index, nf_open_char) = self.chars.next().expect("to not be finished"); debug_assert!(matches!(nf_open_char, '\x20'..='\x2F')); let start: usize = nf_open_index; let mut end: usize = start; // Keep iterating while within the range of `0x20-0x2F`. match self.chars_take_while(|c| matches!(c, '\x20'..='\x2F')) { Some((_, i)) => end = i, None => { return Some(EscapeSequenceOffsets::NF { start_sequence, start, end, }) } } // Get the final byte. if let Some((i, c)) = self.chars.next() { end = i + c.len_utf8() } Some(EscapeSequenceOffsets::NF { start_sequence, start, end, }) } } impl<'a> Iterator for EscapeSequenceOffsetsIterator<'a> { type Item = EscapeSequenceOffsets; fn next(&mut self) -> Option { match self.chars.peek() { Some((_, '\x1B')) => self.next_sequence(), Some((_, _)) => self.next_text(), None => None, } } } /// An iterator over ANSI/VT escape sequences within a string. /// /// ## Example /// /// ```ignore /// let iter = EscapeSequenceIterator::new("\x1B[33mThis is yellow text.\x1B[m"); /// ``` pub struct EscapeSequenceIterator<'a> { text: &'a str, offset_iter: EscapeSequenceOffsetsIterator<'a>, } impl<'a> EscapeSequenceIterator<'a> { pub fn new(text: &'a str) -> EscapeSequenceIterator<'a> { return EscapeSequenceIterator { text, offset_iter: EscapeSequenceOffsetsIterator::new(text), }; } } impl<'a> Iterator for EscapeSequenceIterator<'a> { type Item = EscapeSequence<'a>; fn next(&mut self) -> Option { use EscapeSequenceOffsets::*; self.offset_iter.next().map(|offsets| match offsets { Unknown { start, end } => EscapeSequence::Unknown(&self.text[start..end]), Text { start, end } => EscapeSequence::Text(&self.text[start..end]), NF { start_sequence, start, end, } => EscapeSequence::NF { raw_sequence: &self.text[start_sequence..end], nf_sequence: &self.text[start..end], }, OSC { start_sequence, start_command, start_terminator, end, } => EscapeSequence::OSC { raw_sequence: &self.text[start_sequence..end], command: &self.text[start_command..start_terminator], terminator: &self.text[start_terminator..end], }, CSI { start_sequence, start_parameters, start_intermediates, start_final_byte, end, } => EscapeSequence::CSI { raw_sequence: &self.text[start_sequence..end], parameters: &self.text[start_parameters..start_intermediates], intermediates: &self.text[start_intermediates..start_final_byte], final_byte: &self.text[start_final_byte..end], }, }) } } /// A parsed ANSI/VT100 escape sequence. #[derive(Debug, PartialEq)] pub enum EscapeSequence<'a> { Text(&'a str), Unknown(&'a str), #[allow(clippy::upper_case_acronyms)] NF { raw_sequence: &'a str, nf_sequence: &'a str, }, #[allow(clippy::upper_case_acronyms)] OSC { raw_sequence: &'a str, command: &'a str, terminator: &'a str, }, #[allow(clippy::upper_case_acronyms)] CSI { raw_sequence: &'a str, parameters: &'a str, intermediates: &'a str, final_byte: &'a str, }, } impl<'a> EscapeSequence<'a> { pub fn raw(&self) -> &'a str { use EscapeSequence::*; match *self { Text(raw) => raw, Unknown(raw) => raw, NF { raw_sequence, .. } => raw_sequence, OSC { raw_sequence, .. } => raw_sequence, CSI { raw_sequence, .. } => raw_sequence, } } } #[cfg(test)] mod tests { use crate::vscreen::{ EscapeSequence, EscapeSequenceIterator, EscapeSequenceOffsets, EscapeSequenceOffsetsIterator, }; #[test] fn test_escape_sequence_offsets_iterator_parses_text() { let mut iter = EscapeSequenceOffsetsIterator::new("text"); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::Text { start: 0, end: 4 }) ); } #[test] fn test_escape_sequence_offsets_iterator_parses_text_stops_at_esc() { let mut iter = EscapeSequenceOffsetsIterator::new("text\x1B[ming"); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::Text { start: 0, end: 4 }) ); } #[test] fn test_escape_sequence_offsets_iterator_parses_osc_with_bel() { let mut iter = EscapeSequenceOffsetsIterator::new("\x1B]abc\x07"); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::OSC { start_sequence: 0, start_command: 2, start_terminator: 5, end: 6, }) ); } #[test] fn test_escape_sequence_offsets_iterator_parses_osc_with_st() { let mut iter = EscapeSequenceOffsetsIterator::new("\x1B]abc\x1B\\"); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::OSC { start_sequence: 0, start_command: 2, start_terminator: 5, end: 7, }) ); } #[test] fn test_escape_sequence_offsets_iterator_parses_osc_thats_broken() { let mut iter = EscapeSequenceOffsetsIterator::new("\x1B]ab"); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::OSC { start_sequence: 0, start_command: 2, start_terminator: 4, end: 4, }) ); } #[test] fn test_escape_sequence_offsets_iterator_parses_csi() { let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[m"); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::CSI { start_sequence: 0, start_parameters: 2, start_intermediates: 2, start_final_byte: 2, end: 3 }) ); } #[test] fn test_escape_sequence_offsets_iterator_parses_csi_with_parameters() { let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[1;34m"); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::CSI { start_sequence: 0, start_parameters: 2, start_intermediates: 6, start_final_byte: 6, end: 7 }) ); } #[test] fn test_escape_sequence_offsets_iterator_parses_csi_with_intermediates() { let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[$m"); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::CSI { start_sequence: 0, start_parameters: 2, start_intermediates: 2, start_final_byte: 3, end: 4 }) ); } #[test] fn test_escape_sequence_offsets_iterator_parses_csi_with_parameters_and_intermediates() { let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[1$m"); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::CSI { start_sequence: 0, start_parameters: 2, start_intermediates: 3, start_final_byte: 4, end: 5 }) ); } #[test] fn test_escape_sequence_offsets_iterator_parses_csi_thats_broken() { let mut iter = EscapeSequenceOffsetsIterator::new("\x1B["); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::CSI { start_sequence: 0, start_parameters: 2, start_intermediates: 2, start_final_byte: 2, end: 2 }) ); let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[1"); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::CSI { start_sequence: 0, start_parameters: 2, start_intermediates: 3, start_final_byte: 3, end: 3 }) ); let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[1$"); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::CSI { start_sequence: 0, start_parameters: 2, start_intermediates: 3, start_final_byte: 4, end: 4 }) ); } #[test] fn test_escape_sequence_offsets_iterator_parses_nf() { let mut iter = EscapeSequenceOffsetsIterator::new("\x1B($0"); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::NF { start_sequence: 0, start: 1, end: 4 }) ); } #[test] fn test_escape_sequence_offsets_iterator_parses_nf_thats_broken() { let mut iter = EscapeSequenceOffsetsIterator::new("\x1B("); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::NF { start_sequence: 0, start: 1, end: 1 }) ); } #[test] fn test_escape_sequence_offsets_iterator_iterates() { let mut iter = EscapeSequenceOffsetsIterator::new("text\x1B[33m\x1B]OSC\x07\x1B(0"); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::Text { start: 0, end: 4 }) ); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::CSI { start_sequence: 4, start_parameters: 6, start_intermediates: 8, start_final_byte: 8, end: 9 }) ); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::OSC { start_sequence: 9, start_command: 11, start_terminator: 14, end: 15 }) ); assert_eq!( iter.next(), Some(EscapeSequenceOffsets::NF { start_sequence: 15, start: 16, end: 18 }) ); assert_eq!(iter.next(), None); } #[test] fn test_escape_sequence_iterator_iterates() { let mut iter = EscapeSequenceIterator::new("text\x1B[33m\x1B]OSC\x07\x1B]OSC\x1B\\\x1B(0"); assert_eq!(iter.next(), Some(EscapeSequence::Text("text"))); assert_eq!( iter.next(), Some(EscapeSequence::CSI { raw_sequence: "\x1B[33m", parameters: "33", intermediates: "", final_byte: "m", }) ); assert_eq!( iter.next(), Some(EscapeSequence::OSC { raw_sequence: "\x1B]OSC\x07", command: "OSC", terminator: "\x07", }) ); assert_eq!( iter.next(), Some(EscapeSequence::OSC { raw_sequence: "\x1B]OSC\x1B\\", command: "OSC", terminator: "\x1B\\", }) ); assert_eq!( iter.next(), Some(EscapeSequence::NF { raw_sequence: "\x1B(0", nf_sequence: "(0", }) ); assert_eq!(iter.next(), None); } #[test] fn test_sgr_attributes_do_not_leak_into_wrong_field() { let mut attrs = crate::vscreen::Attributes::new(); // Bold, Dim, Italic, Underline, Foreground, Background attrs.update(EscapeSequence::CSI { raw_sequence: "\x1B[1;2;3;4;31;41m", parameters: "1;2;3;4;31;41", intermediates: "", final_byte: "m", }); assert_eq!(attrs.bold, "\x1B[1m"); assert_eq!(attrs.dim, "\x1B[2m"); assert_eq!(attrs.italic, "\x1B[3m"); assert_eq!(attrs.underline, "\x1B[4m"); assert_eq!(attrs.foreground, "\x1B[31m"); assert_eq!(attrs.background, "\x1B[41m"); // Bold, Bright Foreground, Bright Background attrs.sgr_reset(); attrs.update(EscapeSequence::CSI { raw_sequence: "\x1B[1;94;103m", parameters: "1;94;103", intermediates: "", final_byte: "m", }); assert_eq!(attrs.bold, "\x1B[1m"); assert_eq!(attrs.foreground, "\x1B[94m"); assert_eq!(attrs.background, "\x1B[103m"); } }