Merge branch 'context-comment' of github.com:Alch-Emi/monolith into context-comment

This commit is contained in:
Sunshine 2020-04-25 20:59:34 -04:00
commit c999359b9f
No known key found for this signature in database
GPG key ID: B80CA68703CD8AB1
6 changed files with 57 additions and 3 deletions

1
Cargo.lock generated
View file

@ -582,6 +582,7 @@ dependencies = [
"html5ever 0.24.1 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
"tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)",
"url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]

View file

@ -18,6 +18,10 @@ cssparser = "0.27.2"
html5ever = "0.24.1"
url = "2.1.1"
# Used to render comments indicating the time the page was saved
# also required by reqwest as of v0.10.0
time = "0.1.42"
[dependencies.reqwest]
version = "0.10.*"
default-features = false

View file

@ -14,6 +14,7 @@ pub struct AppArgs {
pub silent: bool,
pub timeout: u64,
pub user_agent: String,
pub no_metadata: bool,
}
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
@ -41,6 +42,7 @@ impl AppArgs {
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
.args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Excludes metadata information from the document'")
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
.args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
@ -59,6 +61,7 @@ impl AppArgs {
app_args.no_images = app.is_present("no-images");
app_args.no_js = app.is_present("no-js");
app_args.insecure = app.is_present("insecure");
app_args.no_metadata = app.is_present("no-metadata");
app_args.isolate = app.is_present("isolate");
app_args.silent = app.is_present("silent");
app_args.timeout = app

View file

@ -2,6 +2,7 @@ use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
use monolith::utils::{data_url_to_text, is_data_url, is_file_url, is_http_url, retrieve_asset};
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use reqwest::Url;
use std::collections::HashMap;
use std::env;
use std::fs;
@ -133,6 +134,8 @@ fn main() {
process::exit(1);
}
let time_saved = time::now_utc();
walk_and_embed_assets(
&mut cache,
&client,
@ -146,7 +149,7 @@ fn main() {
app_args.silent,
);
let html: String = stringify_document(
let mut html: String = stringify_document(
&dom.document,
app_args.no_css,
app_args.no_frames,
@ -155,6 +158,32 @@ fn main() {
app_args.isolate,
);
if !app_args.no_metadata {
// Safe to unwrap (we just put this through an HTTP request)
let mut clean_url = Url::parse(&base_url).unwrap();
clean_url.set_fragment(None);
// Don't include credentials
clean_url.set_username("");
clean_url.set_password(None);
let metadata_comment = if is_http_url(&base_url) {
format!(
"<!--- Saved from {} on {} using {} v{} -->\n",
&clean_url,
time_saved.rfc3339(),
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
} else {
format!(
"<!--- Saved from local source on {} using {} v{} -->\n",
time_saved.rfc3339(),
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
};
html.insert_str(0, &metadata_comment);
}
output
.writeln_str(&html)
.expect("Could not write HTML output");

View file

@ -75,6 +75,7 @@ fn passing_bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
fn passing_isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-I")
.arg("data:text/html,Hello%2C%20World!")
.output()
@ -101,6 +102,7 @@ fn passing_isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
fn passing_remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-c")
.arg("data:text/html,<style>body{background-color:pink}</style>Hello")
.output()
@ -128,6 +130,7 @@ fn passing_remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>>
fn passing_remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-f")
.arg("data:text/html,<iframe src=\"https://google.com\"></iframe>Hi")
.output()
@ -154,6 +157,7 @@ fn passing_remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error
fn passing_remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-i")
.arg("data:text/html,<img src=\"https://google.com\"/>Hi")
.output()
@ -189,6 +193,7 @@ Hi\
fn passing_remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-j")
.arg("data:text/html,<script>alert(2)</script>Hi")
.output()
@ -220,6 +225,7 @@ fn passing_local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let out = cmd
.arg("-M")
.arg(if cfg!(windows) {
"src\\tests\\data\\local-file.html"
} else {
@ -274,6 +280,7 @@ fn passing_local_file_target_input_absolute_target_path() -> Result<(), Box<dyn
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-jciI")
.arg(if cfg!(windows) {
format!(
@ -334,6 +341,7 @@ fn passing_local_file_url_target_input() -> Result<(), Box<dyn std::error::Error
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
let out = cmd
.arg("-M")
.arg("-cji")
.arg(if cfg!(windows) {
format!(
@ -401,6 +409,7 @@ fn passing_security_disallow_local_assets_within_data_url_targets(
) -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/local-script.js\"%3E%3C/script%3E")
.output()
.unwrap();
@ -438,7 +447,7 @@ fn passing_embed_file_url_local_asset_within_style_attribute(
file = file_url_prefix,
path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg(file_html.path()).output().unwrap();
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
// STDOUT should contain HTML with date URL for background-image in it
assert_eq!(
@ -489,7 +498,7 @@ fn passing_css_import_string() -> Result<(), Box<dyn std::error::Error>> {
file = file_url_prefix,
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg(file_html.path()).output().unwrap();
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
// STDOUT should contain embedded CSS url()'s
assert_eq!(

View file

@ -38,3 +38,11 @@ fn passing_removes_empty_query_amp_and_empty_fragment() {
"https://somewhere.com/font.eot?a=b"
);
}
#[test]
fn passing_keeps_credentials() {
assert_eq!(
utils::clean_url("https://cookie:monster@gibson.internet/"),
"https://cookie:monster@gibson.internet/"
);
}