Merge branch 'context-comment' of github.com:Alch-Emi/monolith into context-comment
This commit is contained in:
commit
c999359b9f
6 changed files with 57 additions and 3 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -582,6 +582,7 @@ dependencies = [
|
||||||
"html5ever 0.24.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"html5ever 0.24.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"reqwest 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"reqwest 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,10 @@ cssparser = "0.27.2"
|
||||||
html5ever = "0.24.1"
|
html5ever = "0.24.1"
|
||||||
url = "2.1.1"
|
url = "2.1.1"
|
||||||
|
|
||||||
|
# Used to render comments indicating the time the page was saved
|
||||||
|
# also required by reqwest as of v0.10.0
|
||||||
|
time = "0.1.42"
|
||||||
|
|
||||||
[dependencies.reqwest]
|
[dependencies.reqwest]
|
||||||
version = "0.10.*"
|
version = "0.10.*"
|
||||||
default-features = false
|
default-features = false
|
||||||
|
|
|
@ -14,6 +14,7 @@ pub struct AppArgs {
|
||||||
pub silent: bool,
|
pub silent: bool,
|
||||||
pub timeout: u64,
|
pub timeout: u64,
|
||||||
pub user_agent: String,
|
pub user_agent: String,
|
||||||
|
pub no_metadata: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
|
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
|
||||||
|
@ -41,6 +42,7 @@ impl AppArgs {
|
||||||
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
|
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
|
||||||
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
||||||
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
|
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
|
||||||
|
.args_from_usage("-M, --no-metadata 'Excludes metadata information from the document'")
|
||||||
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
|
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
|
||||||
.args_from_usage("-s, --silent 'Suppresses verbosity'")
|
.args_from_usage("-s, --silent 'Suppresses verbosity'")
|
||||||
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
|
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
|
||||||
|
@ -59,6 +61,7 @@ impl AppArgs {
|
||||||
app_args.no_images = app.is_present("no-images");
|
app_args.no_images = app.is_present("no-images");
|
||||||
app_args.no_js = app.is_present("no-js");
|
app_args.no_js = app.is_present("no-js");
|
||||||
app_args.insecure = app.is_present("insecure");
|
app_args.insecure = app.is_present("insecure");
|
||||||
|
app_args.no_metadata = app.is_present("no-metadata");
|
||||||
app_args.isolate = app.is_present("isolate");
|
app_args.isolate = app.is_present("isolate");
|
||||||
app_args.silent = app.is_present("silent");
|
app_args.silent = app.is_present("silent");
|
||||||
app_args.timeout = app
|
app_args.timeout = app
|
||||||
|
|
31
src/main.rs
31
src/main.rs
|
@ -2,6 +2,7 @@ use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
|
||||||
use monolith::utils::{data_url_to_text, is_data_url, is_file_url, is_http_url, retrieve_asset};
|
use monolith::utils::{data_url_to_text, is_data_url, is_file_url, is_http_url, retrieve_asset};
|
||||||
use reqwest::blocking::Client;
|
use reqwest::blocking::Client;
|
||||||
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||||
|
use reqwest::Url;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
|
@ -133,6 +134,8 @@ fn main() {
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let time_saved = time::now_utc();
|
||||||
|
|
||||||
walk_and_embed_assets(
|
walk_and_embed_assets(
|
||||||
&mut cache,
|
&mut cache,
|
||||||
&client,
|
&client,
|
||||||
|
@ -146,7 +149,7 @@ fn main() {
|
||||||
app_args.silent,
|
app_args.silent,
|
||||||
);
|
);
|
||||||
|
|
||||||
let html: String = stringify_document(
|
let mut html: String = stringify_document(
|
||||||
&dom.document,
|
&dom.document,
|
||||||
app_args.no_css,
|
app_args.no_css,
|
||||||
app_args.no_frames,
|
app_args.no_frames,
|
||||||
|
@ -155,6 +158,32 @@ fn main() {
|
||||||
app_args.isolate,
|
app_args.isolate,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if !app_args.no_metadata {
|
||||||
|
// Safe to unwrap (we just put this through an HTTP request)
|
||||||
|
let mut clean_url = Url::parse(&base_url).unwrap();
|
||||||
|
clean_url.set_fragment(None);
|
||||||
|
// Don't include credentials
|
||||||
|
clean_url.set_username("");
|
||||||
|
clean_url.set_password(None);
|
||||||
|
let metadata_comment = if is_http_url(&base_url) {
|
||||||
|
format!(
|
||||||
|
"<!--- Saved from {} on {} using {} v{} -->\n",
|
||||||
|
&clean_url,
|
||||||
|
time_saved.rfc3339(),
|
||||||
|
env!("CARGO_PKG_NAME"),
|
||||||
|
env!("CARGO_PKG_VERSION"),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
format!(
|
||||||
|
"<!--- Saved from local source on {} using {} v{} -->\n",
|
||||||
|
time_saved.rfc3339(),
|
||||||
|
env!("CARGO_PKG_NAME"),
|
||||||
|
env!("CARGO_PKG_VERSION"),
|
||||||
|
)
|
||||||
|
};
|
||||||
|
html.insert_str(0, &metadata_comment);
|
||||||
|
}
|
||||||
|
|
||||||
output
|
output
|
||||||
.writeln_str(&html)
|
.writeln_str(&html)
|
||||||
.expect("Could not write HTML output");
|
.expect("Could not write HTML output");
|
||||||
|
|
|
@ -75,6 +75,7 @@ fn passing_bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
fn passing_isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
fn passing_isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||||
let out = cmd
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
.arg("-I")
|
.arg("-I")
|
||||||
.arg("data:text/html,Hello%2C%20World!")
|
.arg("data:text/html,Hello%2C%20World!")
|
||||||
.output()
|
.output()
|
||||||
|
@ -101,6 +102,7 @@ fn passing_isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
fn passing_remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
fn passing_remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||||
let out = cmd
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
.arg("-c")
|
.arg("-c")
|
||||||
.arg("data:text/html,<style>body{background-color:pink}</style>Hello")
|
.arg("data:text/html,<style>body{background-color:pink}</style>Hello")
|
||||||
.output()
|
.output()
|
||||||
|
@ -128,6 +130,7 @@ fn passing_remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>>
|
||||||
fn passing_remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
fn passing_remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||||
let out = cmd
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
.arg("-f")
|
.arg("-f")
|
||||||
.arg("data:text/html,<iframe src=\"https://google.com\"></iframe>Hi")
|
.arg("data:text/html,<iframe src=\"https://google.com\"></iframe>Hi")
|
||||||
.output()
|
.output()
|
||||||
|
@ -154,6 +157,7 @@ fn passing_remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error
|
||||||
fn passing_remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
fn passing_remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||||
let out = cmd
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
.arg("-i")
|
.arg("-i")
|
||||||
.arg("data:text/html,<img src=\"https://google.com\"/>Hi")
|
.arg("data:text/html,<img src=\"https://google.com\"/>Hi")
|
||||||
.output()
|
.output()
|
||||||
|
@ -189,6 +193,7 @@ Hi\
|
||||||
fn passing_remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
fn passing_remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||||
let out = cmd
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
.arg("-j")
|
.arg("-j")
|
||||||
.arg("data:text/html,<script>alert(2)</script>Hi")
|
.arg("data:text/html,<script>alert(2)</script>Hi")
|
||||||
.output()
|
.output()
|
||||||
|
@ -220,6 +225,7 @@ fn passing_local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let cwd_normalized: String =
|
let cwd_normalized: String =
|
||||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||||
let out = cmd
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
.arg(if cfg!(windows) {
|
.arg(if cfg!(windows) {
|
||||||
"src\\tests\\data\\local-file.html"
|
"src\\tests\\data\\local-file.html"
|
||||||
} else {
|
} else {
|
||||||
|
@ -274,6 +280,7 @@ fn passing_local_file_target_input_absolute_target_path() -> Result<(), Box<dyn
|
||||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||||
let out = cmd
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
.arg("-jciI")
|
.arg("-jciI")
|
||||||
.arg(if cfg!(windows) {
|
.arg(if cfg!(windows) {
|
||||||
format!(
|
format!(
|
||||||
|
@ -334,6 +341,7 @@ fn passing_local_file_url_target_input() -> Result<(), Box<dyn std::error::Error
|
||||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||||
let out = cmd
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
.arg("-cji")
|
.arg("-cji")
|
||||||
.arg(if cfg!(windows) {
|
.arg(if cfg!(windows) {
|
||||||
format!(
|
format!(
|
||||||
|
@ -401,6 +409,7 @@ fn passing_security_disallow_local_assets_within_data_url_targets(
|
||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||||
let out = cmd
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/local-script.js\"%3E%3C/script%3E")
|
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/local-script.js\"%3E%3C/script%3E")
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
@ -438,7 +447,7 @@ fn passing_embed_file_url_local_asset_within_style_attribute(
|
||||||
file = file_url_prefix,
|
file = file_url_prefix,
|
||||||
path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
|
path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
|
||||||
)?;
|
)?;
|
||||||
let out = cmd.arg(file_html.path()).output().unwrap();
|
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
|
||||||
|
|
||||||
// STDOUT should contain HTML with date URL for background-image in it
|
// STDOUT should contain HTML with date URL for background-image in it
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -489,7 +498,7 @@ fn passing_css_import_string() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
file = file_url_prefix,
|
file = file_url_prefix,
|
||||||
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
|
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
|
||||||
)?;
|
)?;
|
||||||
let out = cmd.arg(file_html.path()).output().unwrap();
|
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
|
||||||
|
|
||||||
// STDOUT should contain embedded CSS url()'s
|
// STDOUT should contain embedded CSS url()'s
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|
|
@ -38,3 +38,11 @@ fn passing_removes_empty_query_amp_and_empty_fragment() {
|
||||||
"https://somewhere.com/font.eot?a=b"
|
"https://somewhere.com/font.eot?a=b"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_keeps_credentials() {
|
||||||
|
assert_eq!(
|
||||||
|
utils::clean_url("https://cookie:monster@gibson.internet/"),
|
||||||
|
"https://cookie:monster@gibson.internet/"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue