Add flag to ignore errors related to TLS certificates

This commit is contained in:
Vincent Flyson 2019-08-26 23:17:36 -04:00
parent fada7884dc
commit 02b717ae54
5 changed files with 88 additions and 59 deletions

View file

@ -1,6 +1,6 @@
[package]
name = "monolith"
version = "2.0.14"
version = "2.0.16"
authors = [
"Sunshine <sunshine@uberspace.net>",
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",

View file

@ -23,6 +23,7 @@ If compared to saving websites with `wget -mpk`, this tool embeds all assets as
### Options
- `-i`: Remove images
- `-j`: Exclude JavaScript
- `-k`: Accept invalid X.509 (TLS) certificates
- `-s`: Silent mode
- `-u`: Specify custom User-Agent

View file

@ -70,18 +70,20 @@ pub fn walk_and_embed_assets(
opt_no_images: bool,
opt_user_agent: &str,
opt_silent: bool,
opt_insecure: bool,
) {
match node.data {
NodeData::Document => {
// Dig deeper
for child in node.children.borrow().iter() {
walk_and_embed_assets(
&url, child,
opt_no_js,
opt_no_images,
opt_user_agent,
opt_silent,
);
&url, child,
opt_no_js,
opt_no_images,
opt_user_agent,
opt_silent,
opt_insecure,
);
}
}
NodeData::Doctype { .. } => {}
@ -128,12 +130,14 @@ pub fn walk_and_embed_assets(
)
.unwrap_or(EMPTY_STRING.clone());
let favicon_datauri = retrieve_asset(
&href_full_url,
true,
"",
opt_user_agent,
opt_silent,
).unwrap_or(EMPTY_STRING.clone());
&href_full_url,
true,
"",
opt_user_agent,
opt_silent,
opt_insecure,
)
.unwrap_or(EMPTY_STRING.clone());
attr.value.clear();
attr.value.push_slice(favicon_datauri.as_str());
}
@ -148,12 +152,14 @@ pub fn walk_and_embed_assets(
)
.unwrap_or(EMPTY_STRING.clone());
let css_datauri = retrieve_asset(
&href_full_url,
true,
"text/css",
opt_user_agent,
opt_silent,
).unwrap_or(EMPTY_STRING.clone());
&href_full_url,
true,
"text/css",
opt_user_agent,
opt_silent,
opt_insecure,
)
.unwrap_or(EMPTY_STRING.clone());
attr.value.clear();
attr.value.push_slice(css_datauri.as_str());
}
@ -185,12 +191,14 @@ pub fn walk_and_embed_assets(
)
.unwrap_or(EMPTY_STRING.clone());
let img_datauri = retrieve_asset(
&src_full_url,
true,
"",
opt_user_agent,
opt_silent,
).unwrap_or(EMPTY_STRING.clone());
&src_full_url,
true,
"",
opt_user_agent,
opt_silent,
opt_insecure,
)
.unwrap_or(EMPTY_STRING.clone());
attr.value.clear();
attr.value.push_slice(img_datauri.as_str());
}
@ -211,12 +219,14 @@ pub fn walk_and_embed_assets(
)
.unwrap_or(EMPTY_STRING.clone());
let source_datauri = retrieve_asset(
&srcset_full_url,
true,
"",
opt_user_agent,
opt_silent,
).unwrap_or(EMPTY_STRING.clone());
&srcset_full_url,
true,
"",
opt_user_agent,
opt_silent,
opt_insecure,
)
.unwrap_or(EMPTY_STRING.clone());
attr.value.clear();
attr.value.push_slice(source_datauri.as_str());
}
@ -257,12 +267,14 @@ pub fn walk_and_embed_assets(
)
.unwrap_or(EMPTY_STRING.clone());
let js_datauri = retrieve_asset(
&src_full_url,
true,
"application/javascript",
opt_user_agent,
opt_silent,
).unwrap_or(EMPTY_STRING.clone());
&src_full_url,
true,
"application/javascript",
opt_user_agent,
opt_silent,
opt_insecure,
)
.unwrap_or(EMPTY_STRING.clone());
attr.value.clear();
attr.value.push_slice(js_datauri.as_str());
}
@ -290,21 +302,24 @@ pub fn walk_and_embed_assets(
let src_full_url: String = resolve_url(&url, &attr.value.to_string())
.unwrap_or(EMPTY_STRING.clone());
let iframe_data = retrieve_asset(
&src_full_url,
false,
"text/html",
opt_user_agent,
opt_silent,
).unwrap_or(EMPTY_STRING.clone());
&src_full_url,
false,
"text/html",
opt_user_agent,
opt_silent,
opt_insecure,
)
.unwrap_or(EMPTY_STRING.clone());
let dom = html_to_dom(&iframe_data);
walk_and_embed_assets(
&src_full_url,
&dom.document,
opt_no_js,
opt_no_images,
opt_user_agent,
opt_silent,
);
&src_full_url,
&dom.document,
opt_no_js,
opt_no_images,
opt_user_agent,
opt_silent,
opt_insecure,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
let iframe_datauri = data_to_dataurl("text/html", &buf);
@ -328,13 +343,14 @@ pub fn walk_and_embed_assets(
// Dig deeper
for child in node.children.borrow().iter() {
walk_and_embed_assets(
&url,
child,
opt_no_js,
opt_no_images,
opt_user_agent,
opt_silent,
);
&url,
child,
opt_no_js,
opt_no_images,
opt_user_agent,
opt_silent,
opt_insecure,
);
}
}
NodeData::ProcessingInstruction { .. } => unreachable!()

View file

@ -36,13 +36,15 @@ pub fn retrieve_asset(
as_dataurl: bool,
as_mime: &str,
opt_user_agent: &str,
opt_silent: bool
opt_silent: bool,
opt_insecure: bool,
) -> Result<String, reqwest::Error> {
if is_data_url(&url).unwrap() {
Ok(url.to_string())
} else {
let client = Client::builder()
.timeout(Duration::from_secs(10))
.danger_accept_invalid_certs(opt_insecure)
.build()?;
let mut response = client
.get(url)

View file

@ -23,6 +23,7 @@ fn main() {
)
.args_from_usage("-i, --no-images 'Removes images'")
.args_from_usage("-j, --no-js 'Excludes JavaScript'")
.args_from_usage("-k, --insecure 'Accept invalid X.509 (TLS) certificates'")
.args_from_usage("-s, --silent 'Suppress verbosity'")
.args_from_usage("-u, --user-agent=[Iceweasel] 'Custom User-Agent string'")
.get_matches();
@ -31,11 +32,19 @@ fn main() {
let arg_target = command.value_of("url").unwrap();
let opt_no_images = command.is_present("no-images");
let opt_no_js = command.is_present("no-js");
let opt_insecure = command.is_present("insecure");
let opt_silent = command.is_present("silent");
let opt_user_agent = command.value_of("user-agent").unwrap_or(DEFAULT_USER_AGENT);
if is_valid_url(arg_target) {
let data = retrieve_asset(&arg_target, false, "", opt_user_agent, opt_silent).unwrap();
let data = retrieve_asset(
&arg_target,
false,
"",
opt_user_agent,
opt_silent,
opt_insecure,
).unwrap();
let dom = html_to_dom(&data);
walk_and_embed_assets(
@ -45,6 +54,7 @@ fn main() {
opt_no_images,
opt_user_agent,
opt_silent,
opt_insecure,
);
print_dom(&dom.document);