diff --git a/src/html.rs b/src/html.rs
index f13b542..fa10175 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -7,7 +7,6 @@ use html5ever::tree_builder::{Attribute, TreeSink};
use html5ever::{local_name, namespace_url, ns};
use http::retrieve_asset;
use js::attr_is_event_handler;
-use regex::Regex;
use std::default::Default;
use utils::{data_to_dataurl, is_valid_url, resolve_url, url_has_protocol};
@@ -103,7 +102,7 @@ pub fn walk_and_embed_assets(
let href_full_url: String =
resolve_url(&url, &attr.value.to_string())
.unwrap_or(EMPTY_STRING.clone());
- let favicon_datauri = retrieve_asset(
+ let (favicon_dataurl, _) = retrieve_asset(
&href_full_url,
true,
"",
@@ -111,9 +110,9 @@ pub fn walk_and_embed_assets(
opt_silent,
opt_insecure,
)
- .unwrap_or(EMPTY_STRING.clone());
+ .unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone()));
attr.value.clear();
- attr.value.push_slice(favicon_datauri.as_str());
+ attr.value.push_slice(favicon_dataurl.as_str());
}
}
}
@@ -126,7 +125,7 @@ pub fn walk_and_embed_assets(
let href_full_url: String =
resolve_url(&url, &attr.value.to_string())
.unwrap_or(EMPTY_STRING.clone());
- let css_datauri = retrieve_asset(
+ let (css_dataurl, _) = retrieve_asset(
&href_full_url,
true,
"text/css",
@@ -134,9 +133,9 @@ pub fn walk_and_embed_assets(
opt_silent,
opt_insecure,
)
- .unwrap_or(EMPTY_STRING.clone());
+ .unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone()));
attr.value.clear();
- attr.value.push_slice(css_datauri.as_str());
+ attr.value.push_slice(css_dataurl.as_str());
}
}
}
@@ -168,7 +167,7 @@ pub fn walk_and_embed_assets(
} else {
let src_full_url: String =
resolve_url(&url, &value).unwrap_or(EMPTY_STRING.clone());
- let img_datauri = retrieve_asset(
+ let (img_dataurl, _) = retrieve_asset(
&src_full_url,
true,
"",
@@ -176,9 +175,9 @@ pub fn walk_and_embed_assets(
opt_silent,
opt_insecure,
)
- .unwrap_or(EMPTY_STRING.clone());
+ .unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone()));
attr.value.clear();
- attr.value.push_slice(img_datauri.as_str());
+ attr.value.push_slice(img_dataurl.as_str());
}
}
}
@@ -201,7 +200,7 @@ pub fn walk_and_embed_assets(
let srcset_full_url: String =
resolve_url(&url, &attr.value.to_string())
.unwrap_or(EMPTY_STRING.clone());
- let source_datauri = retrieve_asset(
+ let (source_dataurl, _) = retrieve_asset(
&srcset_full_url,
true,
"",
@@ -209,9 +208,9 @@ pub fn walk_and_embed_assets(
opt_silent,
opt_insecure,
)
- .unwrap_or(EMPTY_STRING.clone());
+ .unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone()));
attr.value.clear();
- attr.value.push_slice(source_datauri.as_str());
+ attr.value.push_slice(source_dataurl.as_str());
}
}
}
@@ -247,7 +246,7 @@ pub fn walk_and_embed_assets(
let src_full_url: String =
resolve_url(&url, &attr.value.to_string())
.unwrap_or(EMPTY_STRING.clone());
- let js_datauri = retrieve_asset(
+ let (js_dataurl, _) = retrieve_asset(
&src_full_url,
true,
"application/javascript",
@@ -255,9 +254,9 @@ pub fn walk_and_embed_assets(
opt_silent,
opt_insecure,
)
- .unwrap_or(EMPTY_STRING.clone());
+ .unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone()));
attr.value.clear();
- attr.value.push_slice(js_datauri.as_str());
+ attr.value.push_slice(js_dataurl.as_str());
}
}
}
@@ -300,7 +299,7 @@ pub fn walk_and_embed_assets(
let src_full_url: String =
resolve_url(&url, &iframe_src).unwrap_or(EMPTY_STRING.clone());
- let iframe_data = retrieve_asset(
+ let (iframe_data, iframe_final_url) = retrieve_asset(
&src_full_url,
false,
"text/html",
@@ -308,10 +307,10 @@ pub fn walk_and_embed_assets(
opt_silent,
opt_insecure,
)
- .unwrap_or(EMPTY_STRING.clone());
+ .unwrap_or((EMPTY_STRING.clone(), src_full_url));
let dom = html_to_dom(&iframe_data);
walk_and_embed_assets(
- &src_full_url,
+ &iframe_final_url,
&dom.document,
opt_no_css,
opt_no_js,
@@ -323,9 +322,9 @@ pub fn walk_and_embed_assets(
);
let mut buf: Vec = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
- let iframe_datauri = data_to_dataurl("text/html", &buf);
+ let iframe_dataurl = data_to_dataurl("text/html", &buf);
attr.value.clear();
- attr.value.push_slice(iframe_datauri.as_str());
+ attr.value.push_slice(iframe_dataurl.as_str());
}
}
}
@@ -344,7 +343,7 @@ pub fn walk_and_embed_assets(
} else {
let poster_full_url: String = resolve_url(&url, &video_poster)
.unwrap_or(EMPTY_STRING.clone());
- let img_datauri = retrieve_asset(
+ let (poster_dataurl, _) = retrieve_asset(
&poster_full_url,
true,
"",
@@ -352,9 +351,9 @@ pub fn walk_and_embed_assets(
opt_silent,
opt_insecure,
)
- .unwrap_or(poster_full_url);
+ .unwrap_or((poster_full_url, EMPTY_STRING.clone()));
attr.value.clear();
- attr.value.push_slice(img_datauri.as_str());
+ attr.value.push_slice(poster_dataurl.as_str());
}
}
}
diff --git a/src/http.rs b/src/http.rs
index fe5e3ae..19bf902 100644
--- a/src/http.rs
+++ b/src/http.rs
@@ -6,26 +6,25 @@ use utils::{data_to_dataurl, is_data_url};
pub fn retrieve_asset(
url: &str,
as_dataurl: bool,
- as_mime: &str,
+ mime: &str,
opt_user_agent: &str,
opt_silent: bool,
opt_insecure: bool,
-) -> Result {
+) -> Result<(String, String), reqwest::Error> {
if is_data_url(&url).unwrap() {
- Ok(url.to_string())
+ Ok((url.to_string(), url.to_string()))
} else {
let client = Client::builder()
.timeout(Duration::from_secs(10))
.danger_accept_invalid_certs(opt_insecure)
.build()?;
let mut response = client.get(url).header(USER_AGENT, opt_user_agent).send()?;
- let final_url = response.url().as_str();
if !opt_silent {
- if url == final_url {
+ if url == response.url().as_str() {
eprintln!("[ {} ]", &url);
} else {
- eprintln!("[ {} -> {} ]", &url, &final_url);
+ eprintln!("[ {} -> {} ]", &url, &response.url().as_str());
}
}
@@ -35,19 +34,22 @@ pub fn retrieve_asset(
response.copy_to(&mut data)?;
// Attempt to obtain MIME type by reading the Content-Type header
- let mimetype = if as_mime == "" {
+ let mimetype = if mime == "" {
response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
- .unwrap_or(&as_mime)
+ .unwrap_or(&mime)
} else {
- as_mime
+ mime
};
- Ok(data_to_dataurl(&mimetype, &data))
+ Ok((
+ data_to_dataurl(&mimetype, &data),
+ response.url().to_string(),
+ ))
} else {
- Ok(response.text().unwrap())
+ Ok((response.text().unwrap(), response.url().to_string()))
}
}
}
diff --git a/src/main.rs b/src/main.rs
index ff1d29c..42a74a7 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -46,7 +46,7 @@ fn main() {
let opt_user_agent: &str = command.value_of("user-agent").unwrap_or(DEFAULT_USER_AGENT);
if is_valid_url(arg_target) {
- let data = retrieve_asset(
+ let (data, final_url) = retrieve_asset(
&arg_target,
false,
"",
@@ -58,7 +58,7 @@ fn main() {
let dom = html_to_dom(&data);
walk_and_embed_assets(
- &arg_target,
+ &final_url,
&dom.document,
opt_no_css,
opt_no_js,
diff --git a/src/tests/http.rs b/src/tests/http.rs
new file mode 100644
index 0000000..003919b
--- /dev/null
+++ b/src/tests/http.rs
@@ -0,0 +1,21 @@
+use crate::http::retrieve_asset;
+
+#[test]
+fn test_retrieve_asset() {
+ let (data, final_url) =
+ retrieve_asset("data:text/html;base64,...", true, "", "", true, false).unwrap();
+ assert_eq!(&data, "data:text/html;base64,...");
+ assert_eq!(&final_url, "data:text/html;base64,...");
+
+ let (data, final_url) = retrieve_asset(
+ "data:text/html;base64,...",
+ true,
+ "image/png",
+ "",
+ true,
+ false,
+ )
+ .unwrap();
+ assert_eq!(&data, "data:text/html;base64,...");
+ assert_eq!(&final_url, "data:text/html;base64,...");
+}
diff --git a/src/tests/mod.rs b/src/tests/mod.rs
index 2efe36b..a77b631 100644
--- a/src/tests/mod.rs
+++ b/src/tests/mod.rs
@@ -1,3 +1,4 @@
mod html;
+mod http;
mod js;
mod utils;