From a891782ce4db2ed44aaaba51cb5c69fe457b359e Mon Sep 17 00:00:00 2001 From: Vincent Flyson Date: Fri, 23 Aug 2019 04:33:30 -0400 Subject: [PATCH] Add option to exclude images --- Cargo.toml | 2 +- src/html.rs | 33 ++++++++++++++++++++------------- src/main.rs | 14 +++++++------- 3 files changed, 28 insertions(+), 21 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 51a8d7e..279cdd2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "monolith" -version = "2.0.2" +version = "2.0.3" authors = ["Sunshine "] [dependencies] diff --git a/src/html.rs b/src/html.rs index d701d94..0ef386b 100644 --- a/src/html.rs +++ b/src/html.rs @@ -19,6 +19,8 @@ enum NodeMatch { Other, } +static PNG_PIXEL: &str = ""; + static JS_DOM_EVENT_ATTRS: [&str; 21] = [ // Input "onfocus", "onblur", "onselect", "onchange", "onsubmit", "onreset", "onkeydown", "onkeypress", "onkeyup", @@ -30,12 +32,12 @@ static JS_DOM_EVENT_ATTRS: [&str; 21] = [ "onload", "onunload", "onabort", "onerror", "onresize", ]; -pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool) { +pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool, opt_no_images: bool) { match node.data { NodeData::Document => { // Dig deeper for child in node.children.borrow().iter() { - walk_and_embed_assets(&url, child, opt_no_js); + walk_and_embed_assets(&url, child, opt_no_js, opt_no_images); } }, @@ -97,10 +99,15 @@ pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool) { NodeMatch::Image => { for attr in attrs_mut.iter_mut() { if &attr.name.local == "src" { - let href_full_url = resolve_url(&url, &attr.value.to_string()); - let favicon_datauri = retrieve_asset(&href_full_url.unwrap(), true, ""); - attr.value.clear(); - attr.value.push_slice(favicon_datauri.unwrap().as_str()); + if opt_no_images { + attr.value.clear(); + attr.value.push_slice(PNG_PIXEL); + } else { + let src_full_url = resolve_url(&url, &attr.value.to_string()); + let img_datauri = retrieve_asset(&src_full_url.unwrap(), true, ""); + attr.value.clear(); + attr.value.push_slice(img_datauri.unwrap().as_str()); + } } } }, @@ -122,9 +129,9 @@ pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool) { for attr in attrs_mut.iter_mut() { if &attr.name.local == "href" { let href_full_url = resolve_url(&url, &attr.value.to_string()); - let favicon_datauri = retrieve_asset(&href_full_url.unwrap(), true, "text/css"); + let css_datauri = retrieve_asset(&href_full_url.unwrap(), true, "text/css"); attr.value.clear(); - attr.value.push_slice(favicon_datauri.unwrap().as_str()); + attr.value.push_slice(css_datauri.unwrap().as_str()); } } }, @@ -140,10 +147,10 @@ pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool) { } else { for attr in attrs_mut.iter_mut() { if &attr.name.local == "src" { - let href_full_url = resolve_url(&url, &attr.value.to_string()); - let favicon_datauri = retrieve_asset(&href_full_url.unwrap(), true, "application/javascript"); + let src_full_url = resolve_url(&url, &attr.value.to_string()); + let js_datauri = retrieve_asset(&src_full_url.unwrap(), true, "application/javascript"); attr.value.clear(); - attr.value.push_slice(favicon_datauri.unwrap().as_str()); + attr.value.push_slice(js_datauri.unwrap().as_str()); } } } @@ -176,7 +183,7 @@ pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool) { // Dig deeper for child in node.children.borrow().iter() { - walk_and_embed_assets(&url, child, opt_no_js); + walk_and_embed_assets(&url, child, opt_no_js, opt_no_images); } }, @@ -191,7 +198,7 @@ pub fn html_to_dom(data: &str) -> html5ever::rcdom::RcDom { .unwrap() } -pub fn print_dom(handle: &Handle, _opt_isolate: bool) { +pub fn print_dom(handle: &Handle) { // TODO: append to the if opt_isolate serialize(&mut io::stdout(), handle, SerializeOpts::default()).unwrap(); } diff --git a/src/main.rs b/src/main.rs index f628495..983c5c2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,7 +7,7 @@ use monolith::html::{walk_and_embed_assets, html_to_dom, print_dom}; fn main() { let command = App::new("monolith") - .version("2.0.2") + .version("2.0.3") .author("Sunshine ") .about("CLI tool to save web pages as single HTML files") .arg(Arg::with_name("url") @@ -15,21 +15,21 @@ fn main() { .takes_value(true) .index(1) .help("URL to download")) - .args_from_usage("-j, --nojs 'Remove JavaScript'") - // .args_from_usage("-i, --isolate 'Isolate the document'") + .args_from_usage("-j, --no-js 'Excludes JavaScript'") + .args_from_usage("-i, --no-images 'Removes images'") .get_matches(); // Process the command let arg_target = command.value_of("url").unwrap(); - let opt_no_js = command.is_present("nojs"); - let opt_isolate = command.is_present("isolate"); + let opt_no_js = command.is_present("no-js"); + let opt_no_img = command.is_present("no-images"); if is_url(arg_target) { let data = retrieve_asset(&arg_target, false, ""); let dom = html_to_dom(&data.unwrap()); - walk_and_embed_assets(&arg_target, &dom.document, opt_no_js); + walk_and_embed_assets(&arg_target, &dom.document, opt_no_js, opt_no_img); - print_dom(&dom.document, opt_isolate); + print_dom(&dom.document); } }