diff --git a/src/html.rs b/src/html.rs index f13b542..fa10175 100644 --- a/src/html.rs +++ b/src/html.rs @@ -7,7 +7,6 @@ use html5ever::tree_builder::{Attribute, TreeSink}; use html5ever::{local_name, namespace_url, ns}; use http::retrieve_asset; use js::attr_is_event_handler; -use regex::Regex; use std::default::Default; use utils::{data_to_dataurl, is_valid_url, resolve_url, url_has_protocol}; @@ -103,7 +102,7 @@ pub fn walk_and_embed_assets( let href_full_url: String = resolve_url(&url, &attr.value.to_string()) .unwrap_or(EMPTY_STRING.clone()); - let favicon_datauri = retrieve_asset( + let (favicon_dataurl, _) = retrieve_asset( &href_full_url, true, "", @@ -111,9 +110,9 @@ pub fn walk_and_embed_assets( opt_silent, opt_insecure, ) - .unwrap_or(EMPTY_STRING.clone()); + .unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone())); attr.value.clear(); - attr.value.push_slice(favicon_datauri.as_str()); + attr.value.push_slice(favicon_dataurl.as_str()); } } } @@ -126,7 +125,7 @@ pub fn walk_and_embed_assets( let href_full_url: String = resolve_url(&url, &attr.value.to_string()) .unwrap_or(EMPTY_STRING.clone()); - let css_datauri = retrieve_asset( + let (css_dataurl, _) = retrieve_asset( &href_full_url, true, "text/css", @@ -134,9 +133,9 @@ pub fn walk_and_embed_assets( opt_silent, opt_insecure, ) - .unwrap_or(EMPTY_STRING.clone()); + .unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone())); attr.value.clear(); - attr.value.push_slice(css_datauri.as_str()); + attr.value.push_slice(css_dataurl.as_str()); } } } @@ -168,7 +167,7 @@ pub fn walk_and_embed_assets( } else { let src_full_url: String = resolve_url(&url, &value).unwrap_or(EMPTY_STRING.clone()); - let img_datauri = retrieve_asset( + let (img_dataurl, _) = retrieve_asset( &src_full_url, true, "", @@ -176,9 +175,9 @@ pub fn walk_and_embed_assets( opt_silent, opt_insecure, ) - .unwrap_or(EMPTY_STRING.clone()); + .unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone())); attr.value.clear(); - attr.value.push_slice(img_datauri.as_str()); + attr.value.push_slice(img_dataurl.as_str()); } } } @@ -201,7 +200,7 @@ pub fn walk_and_embed_assets( let srcset_full_url: String = resolve_url(&url, &attr.value.to_string()) .unwrap_or(EMPTY_STRING.clone()); - let source_datauri = retrieve_asset( + let (source_dataurl, _) = retrieve_asset( &srcset_full_url, true, "", @@ -209,9 +208,9 @@ pub fn walk_and_embed_assets( opt_silent, opt_insecure, ) - .unwrap_or(EMPTY_STRING.clone()); + .unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone())); attr.value.clear(); - attr.value.push_slice(source_datauri.as_str()); + attr.value.push_slice(source_dataurl.as_str()); } } } @@ -247,7 +246,7 @@ pub fn walk_and_embed_assets( let src_full_url: String = resolve_url(&url, &attr.value.to_string()) .unwrap_or(EMPTY_STRING.clone()); - let js_datauri = retrieve_asset( + let (js_dataurl, _) = retrieve_asset( &src_full_url, true, "application/javascript", @@ -255,9 +254,9 @@ pub fn walk_and_embed_assets( opt_silent, opt_insecure, ) - .unwrap_or(EMPTY_STRING.clone()); + .unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone())); attr.value.clear(); - attr.value.push_slice(js_datauri.as_str()); + attr.value.push_slice(js_dataurl.as_str()); } } } @@ -300,7 +299,7 @@ pub fn walk_and_embed_assets( let src_full_url: String = resolve_url(&url, &iframe_src).unwrap_or(EMPTY_STRING.clone()); - let iframe_data = retrieve_asset( + let (iframe_data, iframe_final_url) = retrieve_asset( &src_full_url, false, "text/html", @@ -308,10 +307,10 @@ pub fn walk_and_embed_assets( opt_silent, opt_insecure, ) - .unwrap_or(EMPTY_STRING.clone()); + .unwrap_or((EMPTY_STRING.clone(), src_full_url)); let dom = html_to_dom(&iframe_data); walk_and_embed_assets( - &src_full_url, + &iframe_final_url, &dom.document, opt_no_css, opt_no_js, @@ -323,9 +322,9 @@ pub fn walk_and_embed_assets( ); let mut buf: Vec = Vec::new(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); - let iframe_datauri = data_to_dataurl("text/html", &buf); + let iframe_dataurl = data_to_dataurl("text/html", &buf); attr.value.clear(); - attr.value.push_slice(iframe_datauri.as_str()); + attr.value.push_slice(iframe_dataurl.as_str()); } } } @@ -344,7 +343,7 @@ pub fn walk_and_embed_assets( } else { let poster_full_url: String = resolve_url(&url, &video_poster) .unwrap_or(EMPTY_STRING.clone()); - let img_datauri = retrieve_asset( + let (poster_dataurl, _) = retrieve_asset( &poster_full_url, true, "", @@ -352,9 +351,9 @@ pub fn walk_and_embed_assets( opt_silent, opt_insecure, ) - .unwrap_or(poster_full_url); + .unwrap_or((poster_full_url, EMPTY_STRING.clone())); attr.value.clear(); - attr.value.push_slice(img_datauri.as_str()); + attr.value.push_slice(poster_dataurl.as_str()); } } } diff --git a/src/http.rs b/src/http.rs index fe5e3ae..19bf902 100644 --- a/src/http.rs +++ b/src/http.rs @@ -6,26 +6,25 @@ use utils::{data_to_dataurl, is_data_url}; pub fn retrieve_asset( url: &str, as_dataurl: bool, - as_mime: &str, + mime: &str, opt_user_agent: &str, opt_silent: bool, opt_insecure: bool, -) -> Result { +) -> Result<(String, String), reqwest::Error> { if is_data_url(&url).unwrap() { - Ok(url.to_string()) + Ok((url.to_string(), url.to_string())) } else { let client = Client::builder() .timeout(Duration::from_secs(10)) .danger_accept_invalid_certs(opt_insecure) .build()?; let mut response = client.get(url).header(USER_AGENT, opt_user_agent).send()?; - let final_url = response.url().as_str(); if !opt_silent { - if url == final_url { + if url == response.url().as_str() { eprintln!("[ {} ]", &url); } else { - eprintln!("[ {} -> {} ]", &url, &final_url); + eprintln!("[ {} -> {} ]", &url, &response.url().as_str()); } } @@ -35,19 +34,22 @@ pub fn retrieve_asset( response.copy_to(&mut data)?; // Attempt to obtain MIME type by reading the Content-Type header - let mimetype = if as_mime == "" { + let mimetype = if mime == "" { response .headers() .get(CONTENT_TYPE) .and_then(|header| header.to_str().ok()) - .unwrap_or(&as_mime) + .unwrap_or(&mime) } else { - as_mime + mime }; - Ok(data_to_dataurl(&mimetype, &data)) + Ok(( + data_to_dataurl(&mimetype, &data), + response.url().to_string(), + )) } else { - Ok(response.text().unwrap()) + Ok((response.text().unwrap(), response.url().to_string())) } } } diff --git a/src/main.rs b/src/main.rs index ff1d29c..42a74a7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -46,7 +46,7 @@ fn main() { let opt_user_agent: &str = command.value_of("user-agent").unwrap_or(DEFAULT_USER_AGENT); if is_valid_url(arg_target) { - let data = retrieve_asset( + let (data, final_url) = retrieve_asset( &arg_target, false, "", @@ -58,7 +58,7 @@ fn main() { let dom = html_to_dom(&data); walk_and_embed_assets( - &arg_target, + &final_url, &dom.document, opt_no_css, opt_no_js, diff --git a/src/tests/http.rs b/src/tests/http.rs new file mode 100644 index 0000000..003919b --- /dev/null +++ b/src/tests/http.rs @@ -0,0 +1,21 @@ +use crate::http::retrieve_asset; + +#[test] +fn test_retrieve_asset() { + let (data, final_url) = + retrieve_asset("data:text/html;base64,...", true, "", "", true, false).unwrap(); + assert_eq!(&data, "data:text/html;base64,..."); + assert_eq!(&final_url, "data:text/html;base64,..."); + + let (data, final_url) = retrieve_asset( + "data:text/html;base64,...", + true, + "image/png", + "", + true, + false, + ) + .unwrap(); + assert_eq!(&data, "data:text/html;base64,..."); + assert_eq!(&final_url, "data:text/html;base64,..."); +} diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 2efe36b..a77b631 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1,3 +1,4 @@ mod html; +mod http; mod js; mod utils;