Add option to exclude images
This commit is contained in:
parent
399f9a38d6
commit
a891782ce4
3 changed files with 28 additions and 21 deletions
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "monolith"
|
name = "monolith"
|
||||||
version = "2.0.2"
|
version = "2.0.3"
|
||||||
authors = ["Sunshine <sunshine@uberspace.net>"]
|
authors = ["Sunshine <sunshine@uberspace.net>"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|
31
src/html.rs
31
src/html.rs
|
@ -19,6 +19,8 @@ enum NodeMatch {
|
||||||
Other,
|
Other,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PNG_PIXEL: &str = "";
|
||||||
|
|
||||||
static JS_DOM_EVENT_ATTRS: [&str; 21] = [
|
static JS_DOM_EVENT_ATTRS: [&str; 21] = [
|
||||||
// Input
|
// Input
|
||||||
"onfocus", "onblur", "onselect", "onchange", "onsubmit", "onreset", "onkeydown", "onkeypress", "onkeyup",
|
"onfocus", "onblur", "onselect", "onchange", "onsubmit", "onreset", "onkeydown", "onkeypress", "onkeyup",
|
||||||
|
@ -30,12 +32,12 @@ static JS_DOM_EVENT_ATTRS: [&str; 21] = [
|
||||||
"onload", "onunload", "onabort", "onerror", "onresize",
|
"onload", "onunload", "onabort", "onerror", "onresize",
|
||||||
];
|
];
|
||||||
|
|
||||||
pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool) {
|
pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool, opt_no_images: bool) {
|
||||||
match node.data {
|
match node.data {
|
||||||
NodeData::Document => {
|
NodeData::Document => {
|
||||||
// Dig deeper
|
// Dig deeper
|
||||||
for child in node.children.borrow().iter() {
|
for child in node.children.borrow().iter() {
|
||||||
walk_and_embed_assets(&url, child, opt_no_js);
|
walk_and_embed_assets(&url, child, opt_no_js, opt_no_images);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -97,10 +99,15 @@ pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool) {
|
||||||
NodeMatch::Image => {
|
NodeMatch::Image => {
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "src" {
|
if &attr.name.local == "src" {
|
||||||
let href_full_url = resolve_url(&url, &attr.value.to_string());
|
if opt_no_images {
|
||||||
let favicon_datauri = retrieve_asset(&href_full_url.unwrap(), true, "");
|
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(favicon_datauri.unwrap().as_str());
|
attr.value.push_slice(PNG_PIXEL);
|
||||||
|
} else {
|
||||||
|
let src_full_url = resolve_url(&url, &attr.value.to_string());
|
||||||
|
let img_datauri = retrieve_asset(&src_full_url.unwrap(), true, "");
|
||||||
|
attr.value.clear();
|
||||||
|
attr.value.push_slice(img_datauri.unwrap().as_str());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -122,9 +129,9 @@ pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool) {
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "href" {
|
if &attr.name.local == "href" {
|
||||||
let href_full_url = resolve_url(&url, &attr.value.to_string());
|
let href_full_url = resolve_url(&url, &attr.value.to_string());
|
||||||
let favicon_datauri = retrieve_asset(&href_full_url.unwrap(), true, "text/css");
|
let css_datauri = retrieve_asset(&href_full_url.unwrap(), true, "text/css");
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(favicon_datauri.unwrap().as_str());
|
attr.value.push_slice(css_datauri.unwrap().as_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -140,10 +147,10 @@ pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool) {
|
||||||
} else {
|
} else {
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "src" {
|
if &attr.name.local == "src" {
|
||||||
let href_full_url = resolve_url(&url, &attr.value.to_string());
|
let src_full_url = resolve_url(&url, &attr.value.to_string());
|
||||||
let favicon_datauri = retrieve_asset(&href_full_url.unwrap(), true, "application/javascript");
|
let js_datauri = retrieve_asset(&src_full_url.unwrap(), true, "application/javascript");
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(favicon_datauri.unwrap().as_str());
|
attr.value.push_slice(js_datauri.unwrap().as_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -176,7 +183,7 @@ pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool) {
|
||||||
|
|
||||||
// Dig deeper
|
// Dig deeper
|
||||||
for child in node.children.borrow().iter() {
|
for child in node.children.borrow().iter() {
|
||||||
walk_and_embed_assets(&url, child, opt_no_js);
|
walk_and_embed_assets(&url, child, opt_no_js, opt_no_images);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -191,7 +198,7 @@ pub fn html_to_dom(data: &str) -> html5ever::rcdom::RcDom {
|
||||||
.unwrap()
|
.unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn print_dom(handle: &Handle, _opt_isolate: bool) {
|
pub fn print_dom(handle: &Handle) {
|
||||||
// TODO: append <meta http-equiv="Access-Control-Allow-Origin" content="'self'"/> to the <head> if opt_isolate
|
// TODO: append <meta http-equiv="Access-Control-Allow-Origin" content="'self'"/> to the <head> if opt_isolate
|
||||||
serialize(&mut io::stdout(), handle, SerializeOpts::default()).unwrap();
|
serialize(&mut io::stdout(), handle, SerializeOpts::default()).unwrap();
|
||||||
}
|
}
|
||||||
|
|
14
src/main.rs
14
src/main.rs
|
@ -7,7 +7,7 @@ use monolith::html::{walk_and_embed_assets, html_to_dom, print_dom};
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let command = App::new("monolith")
|
let command = App::new("monolith")
|
||||||
.version("2.0.2")
|
.version("2.0.3")
|
||||||
.author("Sunshine <sunshine@uberspace.net>")
|
.author("Sunshine <sunshine@uberspace.net>")
|
||||||
.about("CLI tool to save web pages as single HTML files")
|
.about("CLI tool to save web pages as single HTML files")
|
||||||
.arg(Arg::with_name("url")
|
.arg(Arg::with_name("url")
|
||||||
|
@ -15,21 +15,21 @@ fn main() {
|
||||||
.takes_value(true)
|
.takes_value(true)
|
||||||
.index(1)
|
.index(1)
|
||||||
.help("URL to download"))
|
.help("URL to download"))
|
||||||
.args_from_usage("-j, --nojs 'Remove JavaScript'")
|
.args_from_usage("-j, --no-js 'Excludes JavaScript'")
|
||||||
// .args_from_usage("-i, --isolate 'Isolate the document'")
|
.args_from_usage("-i, --no-images 'Removes images'")
|
||||||
.get_matches();
|
.get_matches();
|
||||||
|
|
||||||
// Process the command
|
// Process the command
|
||||||
let arg_target = command.value_of("url").unwrap();
|
let arg_target = command.value_of("url").unwrap();
|
||||||
let opt_no_js = command.is_present("nojs");
|
let opt_no_js = command.is_present("no-js");
|
||||||
let opt_isolate = command.is_present("isolate");
|
let opt_no_img = command.is_present("no-images");
|
||||||
|
|
||||||
if is_url(arg_target) {
|
if is_url(arg_target) {
|
||||||
let data = retrieve_asset(&arg_target, false, "");
|
let data = retrieve_asset(&arg_target, false, "");
|
||||||
let dom = html_to_dom(&data.unwrap());
|
let dom = html_to_dom(&data.unwrap());
|
||||||
|
|
||||||
walk_and_embed_assets(&arg_target, &dom.document, opt_no_js);
|
walk_and_embed_assets(&arg_target, &dom.document, opt_no_js, opt_no_img);
|
||||||
|
|
||||||
print_dom(&dom.document, opt_isolate);
|
print_dom(&dom.document);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue