From 870a4b150e5309c522f3226450232f41c7cb216d Mon Sep 17 00:00:00 2001 From: Sunshine Date: Fri, 25 Dec 2020 21:23:29 -1000 Subject: [PATCH] make possible to use stdin as input method --- src/main.rs | 23 ++- src/tests/mod.rs | 1 + src/tests/url/get_url_fragment.rs | 22 +++ src/tests/url/parse_data_url.rs | 3 +- src/tests/url/resolve_url.rs | 229 ++++++++++++------------------ src/tests/url/url_has_protocol.rs | 11 +- src/url.rs | 7 +- 7 files changed, 146 insertions(+), 150 deletions(-) diff --git a/src/main.rs b/src/main.rs index 16174eb..6b7efae 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,7 @@ use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT}; use std::collections::HashMap; use std::env; use std::fs; -use std::io::{self, Error, Write}; +use std::io::{self, prelude::*, Error, Write}; use std::path::Path; use std::process; use std::time::Duration; @@ -48,12 +48,22 @@ impl Output { } } +pub fn read_stdin() -> String { + let mut buffer = String::new(); + for line in io::stdin().lock().lines() { + buffer += line.unwrap_or_default().as_str(); + buffer += "\n"; + } + buffer +} + fn main() { let options = Options::from_args(); let original_target: &str = &options.target; let target_url: &str; let mut base_url: String; let mut dom; + let mut use_stdin: bool = false; // Pre-process the input let cwd_normalized: String = @@ -68,6 +78,11 @@ fn main() { eprintln!("No target specified"); } process::exit(1); + } else if target.clone() == "-" { + // Read from pipe (stdin) + use_stdin = true; + // Default target URL to empty data URL; the user can control it via --base-url + target_url = "data:text/html," } else if is_http_url(target.clone()) || is_data_url(target.clone()) { target_url = target.as_str(); } else if is_file_url(target.clone()) { @@ -119,7 +134,9 @@ fn main() { base_url = str!(target_url); // Retrieve target document - if is_file_url(target_url) || is_http_url(target_url) { + if use_stdin { + dom = html_to_dom(&read_stdin()); + } else if is_file_url(target_url) || is_http_url(target_url) { match retrieve_asset(&mut cache, &client, target_url, target_url, &options, 0) { Ok((data, final_url, _media_type)) => { if options.base_url.clone().unwrap_or(str!()).is_empty() { @@ -198,7 +215,7 @@ fn main() { // Add metadata tag if !options.no_metadata { - let metadata_comment: String = create_metadata_tag(&base_url); + let metadata_comment: String = create_metadata_tag(&target_url); result.insert_str(0, &metadata_comment); if metadata_comment.len() > 0 { result.insert_str(metadata_comment.len(), "\n"); diff --git a/src/tests/mod.rs b/src/tests/mod.rs index d276286..cc7d93b 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -4,4 +4,5 @@ mod html; mod js; mod macros; mod opts; +mod url; mod utils; diff --git a/src/tests/url/get_url_fragment.rs b/src/tests/url/get_url_fragment.rs index 9d65a21..b1b130b 100644 --- a/src/tests/url/get_url_fragment.rs +++ b/src/tests/url/get_url_fragment.rs @@ -18,9 +18,31 @@ mod passing { "test" ); } +} + +// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ +// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ +// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ +// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[cfg(test)] +mod failing { + use crate::url; #[test] fn https_empty() { assert_eq!(url::get_url_fragment("https://kernel.org#"), ""); } + + #[test] + fn no_fragment() { + assert_eq!(url::get_url_fragment("https://kernel.org"), ""); + } + + #[test] + fn dummy_data_url() { + assert_eq!(url::get_url_fragment("data:text/html,"), ""); + } } diff --git a/src/tests/url/parse_data_url.rs b/src/tests/url/parse_data_url.rs index 589fc5e..c383bb8 100644 --- a/src/tests/url/parse_data_url.rs +++ b/src/tests/url/parse_data_url.rs @@ -59,8 +59,7 @@ mod passing { #[test] fn parse_text_css_url_encoded() { - let (media_type, data) = - url::parse_data_url("data:text/css,div{background-color:%23000}"); + let (media_type, data) = url::parse_data_url("data:text/css,div{background-color:%23000}"); assert_eq!(media_type, "text/css"); assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}"); diff --git a/src/tests/url/resolve_url.rs b/src/tests/url/resolve_url.rs index b4b814e..dbce125 100644 --- a/src/tests/url/resolve_url.rs +++ b/src/tests/url/resolve_url.rs @@ -7,208 +7,165 @@ #[cfg(test)] mod passing { - use url::ParseError; - use crate::url; #[test] - fn from_https_to_level_up_relative() -> Result<(), ParseError> { - let resolved_url = - url::resolve_url("https://www.kernel.org", "../category/signatures.html")?; - + fn from_https_to_level_up_relative() { assert_eq!( - resolved_url.as_str(), + url::resolve_url("https://www.kernel.org", "../category/signatures.html") + .unwrap_or_default(), "https://www.kernel.org/category/signatures.html" ); - - Ok(()) } #[test] - fn from_just_filename_to_full_https_url() -> Result<(), ParseError> { - let resolved_url = url::resolve_url( - "saved_page.htm", - "https://www.kernel.org/category/signatures.html", - )?; - + fn from_just_filename_to_full_https_url() { assert_eq!( - resolved_url.as_str(), + url::resolve_url( + "saved_page.htm", + "https://www.kernel.org/category/signatures.html", + ) + .unwrap_or_default(), "https://www.kernel.org/category/signatures.html" ); - - Ok(()) } #[test] - fn from_https_url_to_url_with_no_protocol() -> Result<(), ParseError> { - let resolved_url = url::resolve_url( - "https://www.kernel.org", - "//www.kernel.org/theme/images/logos/tux.png", - )?; - + fn from_https_url_to_url_with_no_protocol() { assert_eq!( - resolved_url.as_str(), + url::resolve_url( + "https://www.kernel.org", + "//www.kernel.org/theme/images/logos/tux.png", + ) + .unwrap_or_default(), "https://www.kernel.org/theme/images/logos/tux.png" ); - - Ok(()) } #[test] - fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() -> Result<(), ParseError> - { - let resolved_url = url::resolve_url( - "https://www.kernel.org", - "//another-host.org/theme/images/logos/tux.png", - )?; - + fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() { assert_eq!( - resolved_url.as_str(), + url::resolve_url( + "https://www.kernel.org", + "//another-host.org/theme/images/logos/tux.png", + ) + .unwrap_or_default(), "https://another-host.org/theme/images/logos/tux.png" ); - - Ok(()) } #[test] - fn from_https_url_to_relative_root_path() -> Result<(), ParseError> { - let resolved_url = url::resolve_url( - "https://www.kernel.org/category/signatures.html", - "/theme/images/logos/tux.png", - )?; - + fn from_https_url_to_relative_root_path() { assert_eq!( - resolved_url.as_str(), + url::resolve_url( + "https://www.kernel.org/category/signatures.html", + "/theme/images/logos/tux.png", + ) + .unwrap_or_default(), "https://www.kernel.org/theme/images/logos/tux.png" ); - - Ok(()) } #[test] - fn from_https_to_just_filename() -> Result<(), ParseError> { - let resolved_url = url::resolve_url( - "https://www.w3schools.com/html/html_iframe.asp", - "default.asp", - )?; - + fn from_https_to_just_filename() { assert_eq!( - resolved_url.as_str(), + url::resolve_url( + "https://www.w3schools.com/html/html_iframe.asp", + "default.asp", + ) + .unwrap_or_default(), "https://www.w3schools.com/html/default.asp" ); - - Ok(()) } #[test] - fn from_data_url_to_https() -> Result<(), ParseError> { - let resolved_url = url::resolve_url( - "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", - "https://www.kernel.org/category/signatures.html", - )?; - + fn from_data_url_to_https() { assert_eq!( - resolved_url.as_str(), + url::resolve_url( + "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", + "https://www.kernel.org/category/signatures.html", + ) + .unwrap_or_default(), "https://www.kernel.org/category/signatures.html" ); - - Ok(()) } #[test] - fn from_data_url_to_data_url() -> Result<(), ParseError> { - let resolved_url = url::resolve_url( - "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", - "data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K", - )?; - + fn from_data_url_to_data_url() { assert_eq!( - resolved_url.as_str(), + url::resolve_url( + "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", + "data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K", + ) + .unwrap_or_default(), "data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K" ); - - Ok(()) } #[test] - fn from_file_url_to_relative_path() -> Result<(), ParseError> { - let resolved_url = url::resolve_url( - "file:///home/user/Websites/my-website/index.html", - "assets/images/logo.png", - ) - .unwrap_or(str!()); - + fn from_file_url_to_relative_path() { assert_eq!( - resolved_url.as_str(), + url::resolve_url( + "file:///home/user/Websites/my-website/index.html", + "assets/images/logo.png", + ) + .unwrap_or_default(), "file:///home/user/Websites/my-website/assets/images/logo.png" ); - - Ok(()) } #[test] - fn from_file_url_to_relative_path_with_backslashes() -> Result<(), ParseError> { - let resolved_url = url::resolve_url( - "file:\\\\\\home\\user\\Websites\\my-website\\index.html", - "assets\\images\\logo.png", - ) - .unwrap_or(str!()); - + fn from_file_url_to_relative_path_with_backslashes() { assert_eq!( - resolved_url.as_str(), + url::resolve_url( + "file:\\\\\\home\\user\\Websites\\my-website\\index.html", + "assets\\images\\logo.png", + ) + .unwrap_or_default(), "file:///home/user/Websites/my-website/assets/images/logo.png" ); - - Ok(()) } #[test] - fn from_data_url_to_file_url() -> Result<(), ParseError> { - let resolved_url = url::resolve_url( - "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", - "file:///etc/passwd", - ) - .unwrap_or(str!()); - - assert_eq!(resolved_url.as_str(), "file:///etc/passwd"); - - Ok(()) - } - - #[test] - fn preserve_fragment() -> Result<(), ParseError> { - let resolved_url = url::resolve_url( - "http://doesnt-matter.local/", - "css/fonts/fontmarvelous.svg#fontmarvelous", - ) - .unwrap_or(str!()); - + fn from_data_url_to_file_url() { assert_eq!( - resolved_url.as_str(), + url::resolve_url( + "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", + "file:///etc/passwd", + ) + .unwrap_or_default(), + "file:///etc/passwd" + ); + } + + #[test] + fn preserve_fragment() { + assert_eq!( + url::resolve_url( + "http://doesnt-matter.local/", + "css/fonts/fontmarvelous.svg#fontmarvelous", + ) + .unwrap_or_default(), "http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous" ); - - Ok(()) } #[test] - fn resolve_from_file_url_to_file_url() -> Result<(), ParseError> { - let resolved_url = if cfg!(windows) { - url::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!()) - } else { - url::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!()) - }; - + fn resolve_from_file_url_to_file_url() { assert_eq!( - resolved_url.as_str(), + if cfg!(windows) { + url::resolve_url("file:///c:/index.html", "file:///c:/image.png") + .unwrap_or_default() + } else { + url::resolve_url("file:///tmp/index.html", "file:///tmp/image.png") + .unwrap_or_default() + }, if cfg!(windows) { "file:///c:/image.png" } else { "file:///tmp/image.png" } ); - - Ok(()) } } @@ -222,18 +179,16 @@ mod passing { #[cfg(test)] mod failing { use crate::url; - use url::ParseError; #[test] - fn from_data_url_to_url_with_no_protocol() -> Result<(), ParseError> { - let resolved_url = url::resolve_url( - "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", - "//www.w3schools.com/html/html_iframe.asp", - ) - .unwrap_or(str!()); - - assert_eq!(resolved_url.as_str(), ""); - - Ok(()) + fn from_data_url_to_url_with_no_protocol() { + assert_eq!( + url::resolve_url( + "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", + "//www.w3schools.com/html/html_iframe.asp", + ) + .unwrap_or_default(), + "" + ); } } diff --git a/src/tests/url/url_has_protocol.rs b/src/tests/url/url_has_protocol.rs index 1111b4a..b111a07 100644 --- a/src/tests/url/url_has_protocol.rs +++ b/src/tests/url/url_has_protocol.rs @@ -54,6 +54,11 @@ mod passing { "MAILTO:somebody@somewhere.com?subject=hello" )); } + + #[test] + fn empty_data_url() { + assert!(url::url_has_protocol("data:text/html,")); + } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ @@ -65,13 +70,11 @@ mod passing { #[cfg(test)] mod failing { - use crate::utils; + use crate::url; #[test] fn url_with_no_protocol() { - assert!(!url::url_has_protocol( - "//some-hostname.com/some-file.html" - )); + assert!(!url::url_has_protocol("//some-hostname.com/some-file.html")); } #[test] diff --git a/src/url.rs b/src/url.rs index 5792fb6..f90134d 100644 --- a/src/url.rs +++ b/src/url.rs @@ -74,10 +74,9 @@ pub fn file_url_to_fs_path(url: &str) -> String { } pub fn get_url_fragment>(url: T) -> String { - if Url::parse(url.as_ref()).unwrap().fragment() == None { - str!() - } else { - str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap()) + match Url::parse(url.as_ref()) { + Ok(parsed_url) => parsed_url.fragment().unwrap_or("").to_string(), + Err(_err) => str!(), } }