commit
413dd66886
5 changed files with 51 additions and 50 deletions
|
@ -58,7 +58,7 @@ impl AppArgs {
|
|||
app_args.output = app.value_of("output").unwrap_or("").to_string();
|
||||
app_args.user_agent = app
|
||||
.value_of("user-agent")
|
||||
.unwrap_or_else(|| DEFAULT_USER_AGENT)
|
||||
.unwrap_or(DEFAULT_USER_AGENT)
|
||||
.to_string();
|
||||
app_args
|
||||
}
|
||||
|
|
80
src/html.rs
80
src/html.rs
|
@ -14,7 +14,7 @@ use reqwest::Client;
|
|||
use std::collections::HashMap;
|
||||
use std::default::Default;
|
||||
|
||||
const ICON_VALUES: [&str; 5] = [
|
||||
const ICON_VALUES: &[&str] = &[
|
||||
"icon",
|
||||
"shortcut icon",
|
||||
"mask-icon",
|
||||
|
@ -31,15 +31,18 @@ pub fn get_parent_node(node: &Handle) -> Handle {
|
|||
parent.and_then(|node| node.upgrade()).unwrap()
|
||||
}
|
||||
|
||||
pub fn get_node_name(node: &Handle) -> String {
|
||||
pub fn get_node_name(node: &Handle) -> &'_ str {
|
||||
match &node.data {
|
||||
NodeData::Element { ref name, .. } => name.local.as_ref().to_string(),
|
||||
_ => str!(),
|
||||
NodeData::Element { ref name, .. } => name.local.as_ref(),
|
||||
_ => "",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_icon(attr_value: &str) -> bool {
|
||||
ICON_VALUES.contains(&&*attr_value.to_lowercase())
|
||||
ICON_VALUES
|
||||
.iter()
|
||||
.find(|a| attr_value.eq_ignore_ascii_case(a))
|
||||
.is_some()
|
||||
}
|
||||
|
||||
pub fn walk_and_embed_assets(
|
||||
|
@ -94,10 +97,10 @@ pub fn walk_and_embed_assets(
|
|||
|
||||
for attr in attrs_mut.iter_mut() {
|
||||
if &attr.name.local == "rel" {
|
||||
if is_icon(&attr.value.to_string()) {
|
||||
if is_icon(attr.value.as_ref()) {
|
||||
link_type = "icon";
|
||||
break;
|
||||
} else if attr.value.to_string() == "stylesheet" {
|
||||
} else if attr.value.as_ref() == "stylesheet" {
|
||||
link_type = "stylesheet";
|
||||
break;
|
||||
}
|
||||
|
@ -110,9 +113,8 @@ pub fn walk_and_embed_assets(
|
|||
if opt_no_images {
|
||||
attr.value.clear();
|
||||
} else {
|
||||
let href_full_url: String =
|
||||
resolve_url(&url, &attr.value.to_string())
|
||||
.unwrap_or(str!());
|
||||
let href_full_url =
|
||||
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
||||
let (favicon_dataurl, _) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
|
@ -121,7 +123,7 @@ pub fn walk_and_embed_assets(
|
|||
"",
|
||||
opt_silent,
|
||||
)
|
||||
.unwrap_or((str!(), str!()));
|
||||
.unwrap_or_default();
|
||||
attr.value.clear();
|
||||
attr.value.push_slice(favicon_dataurl.as_str());
|
||||
}
|
||||
|
@ -133,9 +135,8 @@ pub fn walk_and_embed_assets(
|
|||
if opt_no_css {
|
||||
attr.value.clear();
|
||||
} else {
|
||||
let href_full_url: String =
|
||||
resolve_url(&url, &attr.value.to_string())
|
||||
.unwrap_or(str!());
|
||||
let href_full_url =
|
||||
resolve_url(&url, &attr.value.as_ref()).unwrap_or_default();
|
||||
let replacement_text = match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
|
@ -157,7 +158,7 @@ pub fn walk_and_embed_assets(
|
|||
|
||||
// If a network error occured, warn
|
||||
Err(e) => {
|
||||
eprintln!("Warning: {}", e,);
|
||||
eprintln!("Warning: {}", e);
|
||||
|
||||
// If failed to resolve, replace with absolute URL
|
||||
href_full_url
|
||||
|
@ -172,8 +173,8 @@ pub fn walk_and_embed_assets(
|
|||
} else {
|
||||
for attr in attrs_mut.iter_mut() {
|
||||
if &attr.name.local == "href" {
|
||||
let href_full_url: String =
|
||||
resolve_url(&url, &attr.value.to_string()).unwrap_or(str!());
|
||||
let href_full_url =
|
||||
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
||||
attr.value.clear();
|
||||
attr.value.push_slice(&href_full_url.as_str());
|
||||
}
|
||||
|
@ -231,8 +232,8 @@ pub fn walk_and_embed_assets(
|
|||
let attr_name: &str = &attr.name.local;
|
||||
|
||||
if attr_name == "src" {
|
||||
let src_full_url: String = resolve_url(&url, &attr.value.to_string())
|
||||
.unwrap_or(attr.value.to_string());
|
||||
let src_full_url = resolve_url(&url, attr.value.as_ref())
|
||||
.unwrap_or_else(|_| attr.value.to_string());
|
||||
attr.value.clear();
|
||||
attr.value.push_slice(src_full_url.as_str());
|
||||
} else if attr_name == "srcset" {
|
||||
|
@ -241,9 +242,8 @@ pub fn walk_and_embed_assets(
|
|||
attr.value.clear();
|
||||
attr.value.push_slice(TRANSPARENT_PIXEL);
|
||||
} else {
|
||||
let srcset_full_url: String =
|
||||
resolve_url(&url, &attr.value.to_string())
|
||||
.unwrap_or(str!());
|
||||
let srcset_full_url =
|
||||
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
||||
let (source_dataurl, _) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
|
@ -268,8 +268,8 @@ pub fn walk_and_embed_assets(
|
|||
continue;
|
||||
}
|
||||
|
||||
let href_full_url: String =
|
||||
resolve_url(&url, &attr.value.to_string()).unwrap_or(str!());
|
||||
let href_full_url =
|
||||
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
||||
attr.value.clear();
|
||||
attr.value.push_slice(href_full_url.as_str());
|
||||
}
|
||||
|
@ -298,8 +298,8 @@ pub fn walk_and_embed_assets(
|
|||
} else {
|
||||
for attr in attrs_mut.iter_mut() {
|
||||
if &attr.name.local == "src" {
|
||||
let src_full_url: String =
|
||||
resolve_url(&url, &attr.value.to_string()).unwrap_or(str!());
|
||||
let src_full_url =
|
||||
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
||||
let (js_dataurl, _) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
|
@ -343,8 +343,8 @@ pub fn walk_and_embed_assets(
|
|||
if &attr.name.local == "action" {
|
||||
// Modify action to be a full URL
|
||||
if !is_valid_url(&attr.value) {
|
||||
let href_full_url: String =
|
||||
resolve_url(&url, &attr.value.to_string()).unwrap_or(str!());
|
||||
let href_full_url =
|
||||
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
||||
attr.value.clear();
|
||||
attr.value.push_slice(href_full_url.as_str());
|
||||
}
|
||||
|
@ -360,15 +360,14 @@ pub fn walk_and_embed_assets(
|
|||
continue;
|
||||
}
|
||||
|
||||
let iframe_src: String = attr.value.to_string();
|
||||
let iframe_src = attr.value.as_ref();
|
||||
|
||||
// Ignore iframes with empty source (they cause infinite loops)
|
||||
if iframe_src == str!() {
|
||||
if iframe_src.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let src_full_url: String =
|
||||
resolve_url(&url, &iframe_src).unwrap_or(str!());
|
||||
let src_full_url = resolve_url(&url, iframe_src).unwrap_or_default();
|
||||
let (iframe_data, iframe_final_url) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
|
@ -401,18 +400,18 @@ pub fn walk_and_embed_assets(
|
|||
"video" => {
|
||||
for attr in attrs_mut.iter_mut() {
|
||||
if &attr.name.local == "poster" {
|
||||
let video_poster = attr.value.to_string();
|
||||
let video_poster = attr.value.as_ref();
|
||||
|
||||
// Skip posters with empty source
|
||||
if video_poster == str!() {
|
||||
if video_poster.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if opt_no_images {
|
||||
attr.value.clear();
|
||||
} else {
|
||||
let poster_full_url: String =
|
||||
resolve_url(&url, &video_poster).unwrap_or(str!());
|
||||
let poster_full_url =
|
||||
resolve_url(&url, video_poster).unwrap_or_default();
|
||||
let (poster_dataurl, _) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
|
@ -517,9 +516,7 @@ fn get_child_node_by_name(handle: &Handle, node_name: &str) -> Handle {
|
|||
});
|
||||
match matching_children {
|
||||
Some(node) => node.clone(),
|
||||
_ => {
|
||||
return handle.clone();
|
||||
}
|
||||
_ => handle.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -535,7 +532,7 @@ pub fn stringify_document(
|
|||
serialize(&mut buf, handle, SerializeOpts::default())
|
||||
.expect("unable to serialize DOM into buffer");
|
||||
|
||||
let mut result: String = String::from_utf8(buf).unwrap();
|
||||
let mut result = String::from_utf8(buf).unwrap();
|
||||
|
||||
if opt_isolate || opt_no_css || opt_no_frames || opt_no_js || opt_no_images {
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
|
@ -559,7 +556,6 @@ pub fn stringify_document(
|
|||
if opt_no_images {
|
||||
content_attr += " img-src data:;";
|
||||
}
|
||||
content_attr = content_attr.trim().to_string();
|
||||
|
||||
let meta = dom.create_element(
|
||||
QualName::new(None, ns!(), local_name!("meta")),
|
||||
|
@ -570,7 +566,7 @@ pub fn stringify_document(
|
|||
},
|
||||
Attribute {
|
||||
name: QualName::new(None, ns!(), local_name!("content")),
|
||||
value: format_tendril!("{}", content_attr),
|
||||
value: format_tendril!("{}", content_attr.trim()),
|
||||
},
|
||||
],
|
||||
Default::default(),
|
||||
|
|
|
@ -28,14 +28,15 @@ pub fn retrieve_asset(
|
|||
let mut response = client.get(url).send()?;
|
||||
|
||||
if !opt_silent {
|
||||
if url == response.url().as_str() {
|
||||
let res_url = response.url().as_str();
|
||||
if url == res_url {
|
||||
eprintln!("{}", &url);
|
||||
} else {
|
||||
eprintln!("{} -> {}", &url, &response.url().as_str());
|
||||
eprintln!("{} -> {}", &url, res_url);
|
||||
}
|
||||
}
|
||||
|
||||
let new_cache_key = clean_url(response.url().to_string());
|
||||
let new_cache_key = clean_url(response.url());
|
||||
|
||||
if as_dataurl {
|
||||
// Convert response into a byte array
|
||||
|
@ -54,7 +55,7 @@ pub fn retrieve_asset(
|
|||
};
|
||||
let dataurl = data_to_dataurl(&mimetype, &data);
|
||||
// insert in cache
|
||||
cache.insert(new_cache_key, dataurl.to_string());
|
||||
cache.insert(new_cache_key, dataurl.clone());
|
||||
Ok((dataurl, response.url().to_string()))
|
||||
} else {
|
||||
let content = response.text().unwrap();
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
const JS_DOM_EVENT_ATTRS: [&str; 21] = [
|
||||
const JS_DOM_EVENT_ATTRS: &[&str] = &[
|
||||
// Input
|
||||
"onfocus",
|
||||
"onblur",
|
||||
|
@ -28,5 +28,8 @@ const JS_DOM_EVENT_ATTRS: [&str; 21] = [
|
|||
|
||||
// Returns true if DOM attribute name matches a native JavaScript event handler
|
||||
pub fn attr_is_event_handler(attr_name: &str) -> bool {
|
||||
JS_DOM_EVENT_ATTRS.contains(&attr_name.to_lowercase().as_str())
|
||||
JS_DOM_EVENT_ATTRS
|
||||
.iter()
|
||||
.find(|a| attr_name.eq_ignore_ascii_case(a))
|
||||
.is_some()
|
||||
}
|
||||
|
|
|
@ -33,7 +33,8 @@ fn test_get_parent_node_name() {
|
|||
}
|
||||
NodeData::Element { ref name, .. } => {
|
||||
let node_name = name.local.as_ref().to_string();
|
||||
let parent_node_name = get_node_name(&get_parent_node(node));
|
||||
let parent = get_parent_node(node);
|
||||
let parent_node_name = get_node_name(&parent);
|
||||
if node_name == "head" || node_name == "body" {
|
||||
assert_eq!(parent_node_name, "html");
|
||||
} else if node_name == "div" {
|
||||
|
|
Loading…
Reference in a new issue