diff --git a/Cargo.lock b/Cargo.lock index 65c57da8..f3808990 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -52,6 +52,7 @@ dependencies = [ "console 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", "content_inspector 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "directories 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", "error-chain 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)", "git2 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -166,6 +167,63 @@ dependencies = [ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "encoding" +version = "0.2.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding-index-japanese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding-index-korean 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding-index-simpchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding-index-singlebyte 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding-index-tradchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding-index-japanese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding-index-korean" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding-index-simpchinese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding-index-singlebyte" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding-index-tradchinese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding_index_tests" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "error-chain" version = "0.12.0" @@ -841,6 +899,13 @@ dependencies = [ "checksum console 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "ecd48adf136733979b49e15bc3b4c43cc0d3c85ece7bd08e6daa414c6fcb13e6" "checksum content_inspector 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d902e17eb0038a23c88baa0d78c75fac7968132e73f4fdb9ea77b03d2641b669" "checksum directories 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b106a38a9bf6c763c6c2e2c3332ab7635da453a68a6babca776386b3b287d338" +"checksum encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec" +"checksum encoding-index-japanese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91" +"checksum encoding-index-korean 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81" +"checksum encoding-index-simpchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7" +"checksum encoding-index-singlebyte 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a" +"checksum encoding-index-tradchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18" +"checksum encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569" "checksum error-chain 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "07e791d3be96241c77c43846b665ef1384606da2cd2a48730abe606a12906e02" "checksum flate2 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "37847f133aae7acf82bb9577ccd8bda241df836787642654286e79679826a54b" "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" diff --git a/Cargo.toml b/Cargo.toml index 2444a95a..36651dfe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ directories = "1.0" lazy_static = "1.0" wild = "2.0" content_inspector = "0.2.2" +encoding = "0.2" [dependencies.git2] version = "0.7" diff --git a/src/inputfile.rs b/src/inputfile.rs index 47b47f5e..cf544c4c 100644 --- a/src/inputfile.rs +++ b/src/inputfile.rs @@ -1,6 +1,8 @@ use std::fs::File; use std::io::{self, BufRead, BufReader}; +use content_inspector::{self, ContentType}; + use errors::*; const THEME_PREVIEW_FILE: &[u8] = include_bytes!("../assets/theme_preview.rs"); @@ -8,6 +10,7 @@ const THEME_PREVIEW_FILE: &[u8] = include_bytes!("../assets/theme_preview.rs"); pub struct InputFileReader<'a> { inner: Box, pub first_line: Vec, + pub content_type: ContentType, } impl<'a> InputFileReader<'a> { @@ -15,18 +18,31 @@ impl<'a> InputFileReader<'a> { let mut first_line = vec![]; reader.read_until(b'\n', &mut first_line).ok(); + let content_type = content_inspector::inspect(&first_line[..]); + + if content_type == ContentType::UTF_16LE { + reader.read_until(0x00, &mut first_line).ok(); + } + InputFileReader { inner: Box::new(reader), first_line, + content_type, } } pub fn read_line(&mut self, buf: &mut Vec) -> io::Result { if self.first_line.is_empty() { - self.inner.read_until(b'\n', buf).map(|size| size > 0) + let res = self.inner.read_until(b'\n', buf).map(|size| size > 0)?; + + if self.content_type == ContentType::UTF_16LE { + self.inner.read_until(0x00, buf).ok(); + } + + Ok(res) } else { buf.append(&mut self.first_line); - return Ok(true); + Ok(true) } } } @@ -84,3 +100,32 @@ fn basic() { assert_eq!(false, res.unwrap()); assert!(buffer.is_empty()); } + +#[test] +fn utf16le() { + let content = b"\xFF\xFE\x73\x00\x0A\x00\x64\x00"; + let mut reader = InputFileReader::new(&content[..]); + + assert_eq!(b"\xFF\xFE\x73\x00\x0A\x00", &reader.first_line[..]); + + let mut buffer = vec![]; + + let res = reader.read_line(&mut buffer); + assert!(res.is_ok()); + assert_eq!(true, res.unwrap()); + assert_eq!(b"\xFF\xFE\x73\x00\x0A\x00", &buffer[..]); + + buffer.clear(); + + let res = reader.read_line(&mut buffer); + assert!(res.is_ok()); + assert_eq!(true, res.unwrap()); + assert_eq!(b"\x64\x00", &buffer[..]); + + buffer.clear(); + + let res = reader.read_line(&mut buffer); + assert!(res.is_ok()); + assert_eq!(false, res.unwrap()); + assert!(buffer.is_empty()); +} diff --git a/src/main.rs b/src/main.rs index b0e88d15..00d7339e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,6 +15,7 @@ extern crate atty; extern crate console; extern crate content_inspector; extern crate directories; +extern crate encoding; extern crate git2; extern crate syntect; extern crate wild; diff --git a/src/printer.rs b/src/printer.rs index d4087c05..5b6d6377 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -9,7 +9,10 @@ use console::AnsiCodeIterator; use syntect::easy::HighlightLines; use syntect::highlighting::Theme; -use content_inspector::{self, ContentType}; +use content_inspector::ContentType; + +use encoding::all::{UTF_16BE, UTF_16LE}; +use encoding::{DecoderTrap, Encoding}; use app::Config; use assets::HighlightingAssets; @@ -121,12 +124,9 @@ impl<'a> InteractivePrinter<'a> { panel_width = 0; } - // Determine file content type - let content_type = content_inspector::inspect(&reader.first_line[..]); - let mut line_changes = None; - let highlighter = if content_type.is_binary() { + let highlighter = if reader.content_type.is_binary() { None } else { // Get the Git modifications @@ -149,8 +149,8 @@ impl<'a> InteractivePrinter<'a> { colors, config, decorations, + content_type: reader.content_type, ansi_prefix_sgr: String::new(), - content_type, line_changes, highlighter, } @@ -207,10 +207,11 @@ impl<'a> Printer for InteractivePrinter<'a> { _ => ("", "STDIN"), }; - let mode = if self.content_type.is_binary() { - " " - } else { - "" + let mode = match self.content_type { + ContentType::BINARY => " ", + ContentType::UTF_16LE => " ", + ContentType::UTF_16BE => " ", + _ => "" }; writeln!( @@ -247,7 +248,18 @@ impl<'a> Printer for InteractivePrinter<'a> { line_number: usize, line_buffer: &[u8], ) -> Result<()> { - let line = String::from_utf8_lossy(&line_buffer).to_string(); + let line = match self.content_type { + ContentType::BINARY => { + return Ok(()); + } + ContentType::UTF_16LE => UTF_16LE + .decode(&line_buffer, DecoderTrap::Strict) + .unwrap_or("Invalid UTF-16LE".into()), + ContentType::UTF_16BE => UTF_16BE + .decode(&line_buffer, DecoderTrap::Strict) + .unwrap_or("Invalid UTF-16BE".into()), + _ => String::from_utf8_lossy(&line_buffer).to_string(), + }; let regions = { let highlighter = match self.highlighter { Some(ref mut highlighter) => highlighter,