2022-09-04 00:02:08 +02:00
|
|
|
use std::fmt::Write;
|
|
|
|
|
2024-06-11 05:07:54 +02:00
|
|
|
use crate::{
|
|
|
|
nonprintable_notation::NonprintableNotation,
|
|
|
|
vscreen::{EscapeSequenceOffsets, EscapeSequenceOffsetsIterator},
|
|
|
|
};
|
2023-03-14 22:21:30 +01:00
|
|
|
|
2018-09-11 03:11:59 +02:00
|
|
|
/// Expand tabs like an ANSI-enabled expand(1).
|
2022-09-09 20:21:22 +02:00
|
|
|
pub fn expand_tabs(line: &str, width: usize, cursor: &mut usize) -> String {
|
|
|
|
let mut buffer = String::with_capacity(line.len() * 2);
|
2022-08-29 18:49:29 +02:00
|
|
|
|
2024-06-11 05:07:54 +02:00
|
|
|
for seq in EscapeSequenceOffsetsIterator::new(line) {
|
|
|
|
match seq {
|
|
|
|
EscapeSequenceOffsets::Text { .. } => {
|
|
|
|
let mut text = &line[seq.index_of_start()..seq.index_past_end()];
|
2022-09-09 20:21:22 +02:00
|
|
|
while let Some(index) = text.find('\t') {
|
|
|
|
// Add previous text.
|
|
|
|
if index > 0 {
|
|
|
|
*cursor += index;
|
|
|
|
buffer.push_str(&text[0..index]);
|
|
|
|
}
|
2022-08-29 18:49:29 +02:00
|
|
|
|
2022-09-09 20:21:22 +02:00
|
|
|
// Add tab.
|
|
|
|
let spaces = width - (*cursor % width);
|
|
|
|
*cursor += spaces;
|
2023-03-14 21:19:14 +01:00
|
|
|
buffer.push_str(&" ".repeat(spaces));
|
2018-09-11 03:11:59 +02:00
|
|
|
|
2022-09-09 20:21:22 +02:00
|
|
|
// Next.
|
|
|
|
text = &text[index + 1..text.len()];
|
|
|
|
}
|
|
|
|
|
|
|
|
*cursor += text.len();
|
|
|
|
buffer.push_str(text);
|
|
|
|
}
|
2024-06-11 05:07:54 +02:00
|
|
|
_ => {
|
|
|
|
// Copy the ANSI escape sequence.
|
|
|
|
buffer.push_str(&line[seq.index_of_start()..seq.index_past_end()])
|
|
|
|
}
|
2022-09-09 20:21:22 +02:00
|
|
|
}
|
|
|
|
}
|
2022-08-29 18:49:29 +02:00
|
|
|
|
2018-09-11 03:11:59 +02:00
|
|
|
buffer
|
|
|
|
}
|
2018-11-01 13:02:29 +01:00
|
|
|
|
2019-08-31 19:30:24 +02:00
|
|
|
fn try_parse_utf8_char(input: &[u8]) -> Option<(char, usize)> {
|
|
|
|
let str_from_utf8 = |seq| std::str::from_utf8(seq).ok();
|
|
|
|
|
2020-04-24 16:09:56 +02:00
|
|
|
let decoded = input
|
|
|
|
.get(0..1)
|
|
|
|
.and_then(str_from_utf8)
|
|
|
|
.map(|c| (c, 1))
|
2020-04-24 08:46:01 +02:00
|
|
|
.or_else(|| input.get(0..2).and_then(str_from_utf8).map(|c| (c, 2)))
|
|
|
|
.or_else(|| input.get(0..3).and_then(str_from_utf8).map(|c| (c, 3)))
|
|
|
|
.or_else(|| input.get(0..4).and_then(str_from_utf8).map(|c| (c, 4)));
|
2019-08-31 19:30:24 +02:00
|
|
|
|
2020-04-24 08:46:01 +02:00
|
|
|
decoded.map(|(seq, n)| (seq.chars().next().unwrap(), n))
|
2019-08-31 19:30:24 +02:00
|
|
|
}
|
|
|
|
|
2023-03-14 22:21:30 +01:00
|
|
|
pub fn replace_nonprintable(
|
|
|
|
input: &[u8],
|
|
|
|
tab_width: usize,
|
|
|
|
nonprintable_notation: NonprintableNotation,
|
|
|
|
) -> String {
|
2018-11-01 20:29:48 +01:00
|
|
|
let mut output = String::new();
|
2018-11-01 13:02:29 +01:00
|
|
|
|
2018-11-01 19:40:26 +01:00
|
|
|
let tab_width = if tab_width == 0 { 4 } else { tab_width };
|
2018-11-01 13:02:29 +01:00
|
|
|
|
2019-08-31 19:30:24 +02:00
|
|
|
let mut idx = 0;
|
2022-01-20 14:35:25 +01:00
|
|
|
let mut line_idx = 0;
|
2019-08-31 19:30:24 +02:00
|
|
|
let len = input.len();
|
|
|
|
while idx < len {
|
|
|
|
if let Some((chr, skip_ahead)) = try_parse_utf8_char(&input[idx..]) {
|
|
|
|
idx += skip_ahead;
|
2022-01-20 14:35:25 +01:00
|
|
|
line_idx += 1;
|
2019-08-31 19:30:24 +02:00
|
|
|
|
|
|
|
match chr {
|
|
|
|
// space
|
2020-07-06 19:46:01 +02:00
|
|
|
' ' => output.push('·'),
|
2019-08-31 19:30:24 +02:00
|
|
|
// tab
|
|
|
|
'\t' => {
|
2022-01-20 14:35:25 +01:00
|
|
|
let tab_stop = tab_width - ((line_idx - 1) % tab_width);
|
2022-01-24 12:40:17 +01:00
|
|
|
line_idx = 0;
|
2022-01-20 14:35:25 +01:00
|
|
|
if tab_stop == 1 {
|
2019-08-31 19:30:24 +02:00
|
|
|
output.push('↹');
|
|
|
|
} else {
|
|
|
|
output.push('├');
|
2022-01-20 14:35:25 +01:00
|
|
|
output.push_str(&"─".repeat(tab_stop - 2));
|
2019-08-31 19:30:24 +02:00
|
|
|
output.push('┤');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// line feed
|
2022-01-20 14:35:25 +01:00
|
|
|
'\x0A' => {
|
2023-03-14 22:21:30 +01:00
|
|
|
output.push_str(match nonprintable_notation {
|
|
|
|
NonprintableNotation::Caret => "^J\x0A",
|
|
|
|
NonprintableNotation::Unicode => "␊\x0A",
|
|
|
|
});
|
2022-01-20 14:35:25 +01:00
|
|
|
line_idx = 0;
|
|
|
|
}
|
2023-10-20 19:41:48 +02:00
|
|
|
// ASCII control characters
|
|
|
|
'\x00'..='\x1F' => {
|
|
|
|
let c = u32::from(chr);
|
|
|
|
|
|
|
|
match nonprintable_notation {
|
|
|
|
NonprintableNotation::Caret => {
|
|
|
|
let caret_character = char::from_u32(0x40 + c).unwrap();
|
|
|
|
write!(output, "^{caret_character}").ok();
|
|
|
|
}
|
|
|
|
|
|
|
|
NonprintableNotation::Unicode => {
|
|
|
|
let replacement_symbol = char::from_u32(0x2400 + c).unwrap();
|
|
|
|
output.push(replacement_symbol)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// delete
|
|
|
|
'\x7F' => match nonprintable_notation {
|
|
|
|
NonprintableNotation::Caret => output.push_str("^?"),
|
|
|
|
NonprintableNotation::Unicode => output.push('\u{2421}'),
|
|
|
|
},
|
2019-08-31 19:30:24 +02:00
|
|
|
// printable ASCII
|
|
|
|
c if c.is_ascii_alphanumeric()
|
|
|
|
|| c.is_ascii_punctuation()
|
|
|
|
|| c.is_ascii_graphic() =>
|
|
|
|
{
|
|
|
|
output.push(c)
|
2018-11-01 19:40:26 +01:00
|
|
|
}
|
2019-08-31 19:30:24 +02:00
|
|
|
// everything else
|
|
|
|
c => output.push_str(&c.escape_unicode().collect::<String>()),
|
2018-11-01 13:02:29 +01:00
|
|
|
}
|
2019-08-31 19:30:24 +02:00
|
|
|
} else {
|
2022-09-04 00:02:08 +02:00
|
|
|
write!(output, "\\x{:02X}", input[idx]).ok();
|
2019-08-31 19:30:24 +02:00
|
|
|
idx += 1;
|
2018-11-01 13:02:29 +01:00
|
|
|
}
|
|
|
|
}
|
2018-11-01 20:29:48 +01:00
|
|
|
|
|
|
|
output
|
2018-11-01 13:02:29 +01:00
|
|
|
}
|
2019-08-31 19:30:24 +02:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_try_parse_utf8_char() {
|
|
|
|
assert_eq!(try_parse_utf8_char(&[0x20]), Some((' ', 1)));
|
|
|
|
assert_eq!(try_parse_utf8_char(&[0x20, 0x20]), Some((' ', 1)));
|
|
|
|
assert_eq!(try_parse_utf8_char(&[0x20, 0xef]), Some((' ', 1)));
|
|
|
|
|
|
|
|
assert_eq!(try_parse_utf8_char(&[0x00]), Some(('\x00', 1)));
|
|
|
|
assert_eq!(try_parse_utf8_char(&[0x1b]), Some(('\x1b', 1)));
|
|
|
|
|
|
|
|
assert_eq!(try_parse_utf8_char(&[0xc3, 0xa4]), Some(('ä', 2)));
|
|
|
|
assert_eq!(try_parse_utf8_char(&[0xc3, 0xa4, 0xef]), Some(('ä', 2)));
|
|
|
|
assert_eq!(try_parse_utf8_char(&[0xc3, 0xa4, 0x20]), Some(('ä', 2)));
|
|
|
|
|
|
|
|
assert_eq!(try_parse_utf8_char(&[0xe2, 0x82, 0xac]), Some(('€', 3)));
|
|
|
|
assert_eq!(
|
|
|
|
try_parse_utf8_char(&[0xe2, 0x82, 0xac, 0xef]),
|
|
|
|
Some(('€', 3))
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
try_parse_utf8_char(&[0xe2, 0x82, 0xac, 0x20]),
|
|
|
|
Some(('€', 3))
|
|
|
|
);
|
|
|
|
|
|
|
|
assert_eq!(try_parse_utf8_char(&[0xe2, 0x88, 0xb0]), Some(('∰', 3)));
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
try_parse_utf8_char(&[0xf0, 0x9f, 0x8c, 0x82]),
|
|
|
|
Some(('🌂', 4))
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
try_parse_utf8_char(&[0xf0, 0x9f, 0x8c, 0x82, 0xef]),
|
|
|
|
Some(('🌂', 4))
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
try_parse_utf8_char(&[0xf0, 0x9f, 0x8c, 0x82, 0x20]),
|
|
|
|
Some(('🌂', 4))
|
|
|
|
);
|
|
|
|
|
|
|
|
assert_eq!(try_parse_utf8_char(&[]), None);
|
|
|
|
assert_eq!(try_parse_utf8_char(&[0xef]), None);
|
|
|
|
assert_eq!(try_parse_utf8_char(&[0xef, 0x20]), None);
|
|
|
|
assert_eq!(try_parse_utf8_char(&[0xf0, 0xf0]), None);
|
|
|
|
}
|