Merge pull request #230 from snshn/stdin

Make possible to use stdin as input method
This commit is contained in:
Sunshine 2020-12-25 21:58:14 -10:00 committed by GitHub
commit 913051870a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 146 additions and 150 deletions

View file

@ -3,7 +3,7 @@ use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use std::collections::HashMap;
use std::env;
use std::fs;
use std::io::{self, Error, Write};
use std::io::{self, prelude::*, Error, Write};
use std::path::Path;
use std::process;
use std::time::Duration;
@ -48,12 +48,22 @@ impl Output {
}
}
pub fn read_stdin() -> String {
let mut buffer = String::new();
for line in io::stdin().lock().lines() {
buffer += line.unwrap_or_default().as_str();
buffer += "\n";
}
buffer
}
fn main() {
let options = Options::from_args();
let original_target: &str = &options.target;
let target_url: &str;
let mut base_url: String;
let mut dom;
let mut use_stdin: bool = false;
// Pre-process the input
let cwd_normalized: String =
@ -68,6 +78,11 @@ fn main() {
eprintln!("No target specified");
}
process::exit(1);
} else if target.clone() == "-" {
// Read from pipe (stdin)
use_stdin = true;
// Default target URL to empty data URL; the user can control it via --base-url
target_url = "data:text/html,"
} else if is_http_url(target.clone()) || is_data_url(target.clone()) {
target_url = target.as_str();
} else if is_file_url(target.clone()) {
@ -119,7 +134,9 @@ fn main() {
base_url = str!(target_url);
// Retrieve target document
if is_file_url(target_url) || is_http_url(target_url) {
if use_stdin {
dom = html_to_dom(&read_stdin());
} else if is_file_url(target_url) || is_http_url(target_url) {
match retrieve_asset(&mut cache, &client, target_url, target_url, &options, 0) {
Ok((data, final_url, _media_type)) => {
if options.base_url.clone().unwrap_or(str!()).is_empty() {
@ -198,7 +215,7 @@ fn main() {
// Add metadata tag
if !options.no_metadata {
let metadata_comment: String = create_metadata_tag(&base_url);
let metadata_comment: String = create_metadata_tag(&target_url);
result.insert_str(0, &metadata_comment);
if metadata_comment.len() > 0 {
result.insert_str(metadata_comment.len(), "\n");

View file

@ -4,4 +4,5 @@ mod html;
mod js;
mod macros;
mod opts;
mod url;
mod utils;

View file

@ -18,9 +18,31 @@ mod passing {
"test"
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn https_empty() {
assert_eq!(url::get_url_fragment("https://kernel.org#"), "");
}
#[test]
fn no_fragment() {
assert_eq!(url::get_url_fragment("https://kernel.org"), "");
}
#[test]
fn dummy_data_url() {
assert_eq!(url::get_url_fragment("data:text/html,"), "");
}
}

View file

@ -59,8 +59,7 @@ mod passing {
#[test]
fn parse_text_css_url_encoded() {
let (media_type, data) =
url::parse_data_url("data:text/css,div{background-color:%23000}");
let (media_type, data) = url::parse_data_url("data:text/css,div{background-color:%23000}");
assert_eq!(media_type, "text/css");
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");

View file

@ -7,208 +7,165 @@
#[cfg(test)]
mod passing {
use url::ParseError;
use crate::url;
#[test]
fn from_https_to_level_up_relative() -> Result<(), ParseError> {
let resolved_url =
url::resolve_url("https://www.kernel.org", "../category/signatures.html")?;
fn from_https_to_level_up_relative() {
assert_eq!(
resolved_url.as_str(),
url::resolve_url("https://www.kernel.org", "../category/signatures.html")
.unwrap_or_default(),
"https://www.kernel.org/category/signatures.html"
);
Ok(())
}
#[test]
fn from_just_filename_to_full_https_url() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
fn from_just_filename_to_full_https_url() {
assert_eq!(
url::resolve_url(
"saved_page.htm",
"https://www.kernel.org/category/signatures.html",
)?;
assert_eq!(
resolved_url.as_str(),
)
.unwrap_or_default(),
"https://www.kernel.org/category/signatures.html"
);
Ok(())
}
#[test]
fn from_https_url_to_url_with_no_protocol() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
fn from_https_url_to_url_with_no_protocol() {
assert_eq!(
url::resolve_url(
"https://www.kernel.org",
"//www.kernel.org/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
)
.unwrap_or_default(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
Ok(())
}
#[test]
fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() -> Result<(), ParseError>
{
let resolved_url = url::resolve_url(
fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() {
assert_eq!(
url::resolve_url(
"https://www.kernel.org",
"//another-host.org/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
)
.unwrap_or_default(),
"https://another-host.org/theme/images/logos/tux.png"
);
Ok(())
}
#[test]
fn from_https_url_to_relative_root_path() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
fn from_https_url_to_relative_root_path() {
assert_eq!(
url::resolve_url(
"https://www.kernel.org/category/signatures.html",
"/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
)
.unwrap_or_default(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
Ok(())
}
#[test]
fn from_https_to_just_filename() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
fn from_https_to_just_filename() {
assert_eq!(
url::resolve_url(
"https://www.w3schools.com/html/html_iframe.asp",
"default.asp",
)?;
assert_eq!(
resolved_url.as_str(),
)
.unwrap_or_default(),
"https://www.w3schools.com/html/default.asp"
);
Ok(())
}
#[test]
fn from_data_url_to_https() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
fn from_data_url_to_https() {
assert_eq!(
url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"https://www.kernel.org/category/signatures.html",
)?;
assert_eq!(
resolved_url.as_str(),
)
.unwrap_or_default(),
"https://www.kernel.org/category/signatures.html"
);
Ok(())
}
#[test]
fn from_data_url_to_data_url() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
fn from_data_url_to_data_url() {
assert_eq!(
url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
)?;
assert_eq!(
resolved_url.as_str(),
)
.unwrap_or_default(),
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K"
);
Ok(())
}
#[test]
fn from_file_url_to_relative_path() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
fn from_file_url_to_relative_path() {
assert_eq!(
url::resolve_url(
"file:///home/user/Websites/my-website/index.html",
"assets/images/logo.png",
)
.unwrap_or(str!());
assert_eq!(
resolved_url.as_str(),
.unwrap_or_default(),
"file:///home/user/Websites/my-website/assets/images/logo.png"
);
Ok(())
}
#[test]
fn from_file_url_to_relative_path_with_backslashes() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
fn from_file_url_to_relative_path_with_backslashes() {
assert_eq!(
url::resolve_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
"assets\\images\\logo.png",
)
.unwrap_or(str!());
assert_eq!(
resolved_url.as_str(),
.unwrap_or_default(),
"file:///home/user/Websites/my-website/assets/images/logo.png"
);
Ok(())
}
#[test]
fn from_data_url_to_file_url() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
fn from_data_url_to_file_url() {
assert_eq!(
url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"file:///etc/passwd",
)
.unwrap_or(str!());
assert_eq!(resolved_url.as_str(), "file:///etc/passwd");
Ok(())
.unwrap_or_default(),
"file:///etc/passwd"
);
}
#[test]
fn preserve_fragment() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
fn preserve_fragment() {
assert_eq!(
url::resolve_url(
"http://doesnt-matter.local/",
"css/fonts/fontmarvelous.svg#fontmarvelous",
)
.unwrap_or(str!());
assert_eq!(
resolved_url.as_str(),
.unwrap_or_default(),
"http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous"
);
Ok(())
}
#[test]
fn resolve_from_file_url_to_file_url() -> Result<(), ParseError> {
let resolved_url = if cfg!(windows) {
url::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!())
} else {
url::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!())
};
fn resolve_from_file_url_to_file_url() {
assert_eq!(
resolved_url.as_str(),
if cfg!(windows) {
url::resolve_url("file:///c:/index.html", "file:///c:/image.png")
.unwrap_or_default()
} else {
url::resolve_url("file:///tmp/index.html", "file:///tmp/image.png")
.unwrap_or_default()
},
if cfg!(windows) {
"file:///c:/image.png"
} else {
"file:///tmp/image.png"
}
);
Ok(())
}
}
@ -222,18 +179,16 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::url;
use url::ParseError;
#[test]
fn from_data_url_to_url_with_no_protocol() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
fn from_data_url_to_url_with_no_protocol() {
assert_eq!(
url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"//www.w3schools.com/html/html_iframe.asp",
)
.unwrap_or(str!());
assert_eq!(resolved_url.as_str(), "");
Ok(())
.unwrap_or_default(),
""
);
}
}

View file

@ -54,6 +54,11 @@ mod passing {
"MAILTO:somebody@somewhere.com?subject=hello"
));
}
#[test]
fn empty_data_url() {
assert!(url::url_has_protocol("data:text/html,"));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@ -65,13 +70,11 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::utils;
use crate::url;
#[test]
fn url_with_no_protocol() {
assert!(!url::url_has_protocol(
"//some-hostname.com/some-file.html"
));
assert!(!url::url_has_protocol("//some-hostname.com/some-file.html"));
}
#[test]

View file

@ -74,10 +74,9 @@ pub fn file_url_to_fs_path(url: &str) -> String {
}
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
if Url::parse(url.as_ref()).unwrap().fragment() == None {
str!()
} else {
str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap())
match Url::parse(url.as_ref()) {
Ok(parsed_url) => parsed_url.fragment().unwrap_or("").to_string(),
Err(_err) => str!(),
}
}