commit
413dd66886
@ -58,7 +58,7 @@ impl AppArgs {
|
|||||||
app_args.output = app.value_of("output").unwrap_or("").to_string();
|
app_args.output = app.value_of("output").unwrap_or("").to_string();
|
||||||
app_args.user_agent = app
|
app_args.user_agent = app
|
||||||
.value_of("user-agent")
|
.value_of("user-agent")
|
||||||
.unwrap_or_else(|| DEFAULT_USER_AGENT)
|
.unwrap_or(DEFAULT_USER_AGENT)
|
||||||
.to_string();
|
.to_string();
|
||||||
app_args
|
app_args
|
||||||
}
|
}
|
||||||
|
80
src/html.rs
80
src/html.rs
@ -14,7 +14,7 @@ use reqwest::Client;
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::default::Default;
|
use std::default::Default;
|
||||||
|
|
||||||
const ICON_VALUES: [&str; 5] = [
|
const ICON_VALUES: &[&str] = &[
|
||||||
"icon",
|
"icon",
|
||||||
"shortcut icon",
|
"shortcut icon",
|
||||||
"mask-icon",
|
"mask-icon",
|
||||||
@ -31,15 +31,18 @@ pub fn get_parent_node(node: &Handle) -> Handle {
|
|||||||
parent.and_then(|node| node.upgrade()).unwrap()
|
parent.and_then(|node| node.upgrade()).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_node_name(node: &Handle) -> String {
|
pub fn get_node_name(node: &Handle) -> &'_ str {
|
||||||
match &node.data {
|
match &node.data {
|
||||||
NodeData::Element { ref name, .. } => name.local.as_ref().to_string(),
|
NodeData::Element { ref name, .. } => name.local.as_ref(),
|
||||||
_ => str!(),
|
_ => "",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_icon(attr_value: &str) -> bool {
|
pub fn is_icon(attr_value: &str) -> bool {
|
||||||
ICON_VALUES.contains(&&*attr_value.to_lowercase())
|
ICON_VALUES
|
||||||
|
.iter()
|
||||||
|
.find(|a| attr_value.eq_ignore_ascii_case(a))
|
||||||
|
.is_some()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn walk_and_embed_assets(
|
pub fn walk_and_embed_assets(
|
||||||
@ -94,10 +97,10 @@ pub fn walk_and_embed_assets(
|
|||||||
|
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "rel" {
|
if &attr.name.local == "rel" {
|
||||||
if is_icon(&attr.value.to_string()) {
|
if is_icon(attr.value.as_ref()) {
|
||||||
link_type = "icon";
|
link_type = "icon";
|
||||||
break;
|
break;
|
||||||
} else if attr.value.to_string() == "stylesheet" {
|
} else if attr.value.as_ref() == "stylesheet" {
|
||||||
link_type = "stylesheet";
|
link_type = "stylesheet";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -110,9 +113,8 @@ pub fn walk_and_embed_assets(
|
|||||||
if opt_no_images {
|
if opt_no_images {
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
} else {
|
} else {
|
||||||
let href_full_url: String =
|
let href_full_url =
|
||||||
resolve_url(&url, &attr.value.to_string())
|
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
||||||
.unwrap_or(str!());
|
|
||||||
let (favicon_dataurl, _) = retrieve_asset(
|
let (favicon_dataurl, _) = retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
@ -121,7 +123,7 @@ pub fn walk_and_embed_assets(
|
|||||||
"",
|
"",
|
||||||
opt_silent,
|
opt_silent,
|
||||||
)
|
)
|
||||||
.unwrap_or((str!(), str!()));
|
.unwrap_or_default();
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(favicon_dataurl.as_str());
|
attr.value.push_slice(favicon_dataurl.as_str());
|
||||||
}
|
}
|
||||||
@ -133,9 +135,8 @@ pub fn walk_and_embed_assets(
|
|||||||
if opt_no_css {
|
if opt_no_css {
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
} else {
|
} else {
|
||||||
let href_full_url: String =
|
let href_full_url =
|
||||||
resolve_url(&url, &attr.value.to_string())
|
resolve_url(&url, &attr.value.as_ref()).unwrap_or_default();
|
||||||
.unwrap_or(str!());
|
|
||||||
let replacement_text = match retrieve_asset(
|
let replacement_text = match retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
@ -157,7 +158,7 @@ pub fn walk_and_embed_assets(
|
|||||||
|
|
||||||
// If a network error occured, warn
|
// If a network error occured, warn
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
eprintln!("Warning: {}", e,);
|
eprintln!("Warning: {}", e);
|
||||||
|
|
||||||
// If failed to resolve, replace with absolute URL
|
// If failed to resolve, replace with absolute URL
|
||||||
href_full_url
|
href_full_url
|
||||||
@ -172,8 +173,8 @@ pub fn walk_and_embed_assets(
|
|||||||
} else {
|
} else {
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "href" {
|
if &attr.name.local == "href" {
|
||||||
let href_full_url: String =
|
let href_full_url =
|
||||||
resolve_url(&url, &attr.value.to_string()).unwrap_or(str!());
|
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(&href_full_url.as_str());
|
attr.value.push_slice(&href_full_url.as_str());
|
||||||
}
|
}
|
||||||
@ -231,8 +232,8 @@ pub fn walk_and_embed_assets(
|
|||||||
let attr_name: &str = &attr.name.local;
|
let attr_name: &str = &attr.name.local;
|
||||||
|
|
||||||
if attr_name == "src" {
|
if attr_name == "src" {
|
||||||
let src_full_url: String = resolve_url(&url, &attr.value.to_string())
|
let src_full_url = resolve_url(&url, attr.value.as_ref())
|
||||||
.unwrap_or(attr.value.to_string());
|
.unwrap_or_else(|_| attr.value.to_string());
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(src_full_url.as_str());
|
attr.value.push_slice(src_full_url.as_str());
|
||||||
} else if attr_name == "srcset" {
|
} else if attr_name == "srcset" {
|
||||||
@ -241,9 +242,8 @@ pub fn walk_and_embed_assets(
|
|||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(TRANSPARENT_PIXEL);
|
attr.value.push_slice(TRANSPARENT_PIXEL);
|
||||||
} else {
|
} else {
|
||||||
let srcset_full_url: String =
|
let srcset_full_url =
|
||||||
resolve_url(&url, &attr.value.to_string())
|
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
||||||
.unwrap_or(str!());
|
|
||||||
let (source_dataurl, _) = retrieve_asset(
|
let (source_dataurl, _) = retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
@ -268,8 +268,8 @@ pub fn walk_and_embed_assets(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let href_full_url: String =
|
let href_full_url =
|
||||||
resolve_url(&url, &attr.value.to_string()).unwrap_or(str!());
|
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(href_full_url.as_str());
|
attr.value.push_slice(href_full_url.as_str());
|
||||||
}
|
}
|
||||||
@ -298,8 +298,8 @@ pub fn walk_and_embed_assets(
|
|||||||
} else {
|
} else {
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "src" {
|
if &attr.name.local == "src" {
|
||||||
let src_full_url: String =
|
let src_full_url =
|
||||||
resolve_url(&url, &attr.value.to_string()).unwrap_or(str!());
|
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
||||||
let (js_dataurl, _) = retrieve_asset(
|
let (js_dataurl, _) = retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
@ -343,8 +343,8 @@ pub fn walk_and_embed_assets(
|
|||||||
if &attr.name.local == "action" {
|
if &attr.name.local == "action" {
|
||||||
// Modify action to be a full URL
|
// Modify action to be a full URL
|
||||||
if !is_valid_url(&attr.value) {
|
if !is_valid_url(&attr.value) {
|
||||||
let href_full_url: String =
|
let href_full_url =
|
||||||
resolve_url(&url, &attr.value.to_string()).unwrap_or(str!());
|
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(href_full_url.as_str());
|
attr.value.push_slice(href_full_url.as_str());
|
||||||
}
|
}
|
||||||
@ -360,15 +360,14 @@ pub fn walk_and_embed_assets(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let iframe_src: String = attr.value.to_string();
|
let iframe_src = attr.value.as_ref();
|
||||||
|
|
||||||
// Ignore iframes with empty source (they cause infinite loops)
|
// Ignore iframes with empty source (they cause infinite loops)
|
||||||
if iframe_src == str!() {
|
if iframe_src.is_empty() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let src_full_url: String =
|
let src_full_url = resolve_url(&url, iframe_src).unwrap_or_default();
|
||||||
resolve_url(&url, &iframe_src).unwrap_or(str!());
|
|
||||||
let (iframe_data, iframe_final_url) = retrieve_asset(
|
let (iframe_data, iframe_final_url) = retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
@ -401,18 +400,18 @@ pub fn walk_and_embed_assets(
|
|||||||
"video" => {
|
"video" => {
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "poster" {
|
if &attr.name.local == "poster" {
|
||||||
let video_poster = attr.value.to_string();
|
let video_poster = attr.value.as_ref();
|
||||||
|
|
||||||
// Skip posters with empty source
|
// Skip posters with empty source
|
||||||
if video_poster == str!() {
|
if video_poster.is_empty() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if opt_no_images {
|
if opt_no_images {
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
} else {
|
} else {
|
||||||
let poster_full_url: String =
|
let poster_full_url =
|
||||||
resolve_url(&url, &video_poster).unwrap_or(str!());
|
resolve_url(&url, video_poster).unwrap_or_default();
|
||||||
let (poster_dataurl, _) = retrieve_asset(
|
let (poster_dataurl, _) = retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
@ -517,9 +516,7 @@ fn get_child_node_by_name(handle: &Handle, node_name: &str) -> Handle {
|
|||||||
});
|
});
|
||||||
match matching_children {
|
match matching_children {
|
||||||
Some(node) => node.clone(),
|
Some(node) => node.clone(),
|
||||||
_ => {
|
_ => handle.clone(),
|
||||||
return handle.clone();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -535,7 +532,7 @@ pub fn stringify_document(
|
|||||||
serialize(&mut buf, handle, SerializeOpts::default())
|
serialize(&mut buf, handle, SerializeOpts::default())
|
||||||
.expect("unable to serialize DOM into buffer");
|
.expect("unable to serialize DOM into buffer");
|
||||||
|
|
||||||
let mut result: String = String::from_utf8(buf).unwrap();
|
let mut result = String::from_utf8(buf).unwrap();
|
||||||
|
|
||||||
if opt_isolate || opt_no_css || opt_no_frames || opt_no_js || opt_no_images {
|
if opt_isolate || opt_no_css || opt_no_frames || opt_no_js || opt_no_images {
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
@ -559,7 +556,6 @@ pub fn stringify_document(
|
|||||||
if opt_no_images {
|
if opt_no_images {
|
||||||
content_attr += " img-src data:;";
|
content_attr += " img-src data:;";
|
||||||
}
|
}
|
||||||
content_attr = content_attr.trim().to_string();
|
|
||||||
|
|
||||||
let meta = dom.create_element(
|
let meta = dom.create_element(
|
||||||
QualName::new(None, ns!(), local_name!("meta")),
|
QualName::new(None, ns!(), local_name!("meta")),
|
||||||
@ -570,7 +566,7 @@ pub fn stringify_document(
|
|||||||
},
|
},
|
||||||
Attribute {
|
Attribute {
|
||||||
name: QualName::new(None, ns!(), local_name!("content")),
|
name: QualName::new(None, ns!(), local_name!("content")),
|
||||||
value: format_tendril!("{}", content_attr),
|
value: format_tendril!("{}", content_attr.trim()),
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
Default::default(),
|
Default::default(),
|
||||||
|
@ -28,14 +28,15 @@ pub fn retrieve_asset(
|
|||||||
let mut response = client.get(url).send()?;
|
let mut response = client.get(url).send()?;
|
||||||
|
|
||||||
if !opt_silent {
|
if !opt_silent {
|
||||||
if url == response.url().as_str() {
|
let res_url = response.url().as_str();
|
||||||
|
if url == res_url {
|
||||||
eprintln!("{}", &url);
|
eprintln!("{}", &url);
|
||||||
} else {
|
} else {
|
||||||
eprintln!("{} -> {}", &url, &response.url().as_str());
|
eprintln!("{} -> {}", &url, res_url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let new_cache_key = clean_url(response.url().to_string());
|
let new_cache_key = clean_url(response.url());
|
||||||
|
|
||||||
if as_dataurl {
|
if as_dataurl {
|
||||||
// Convert response into a byte array
|
// Convert response into a byte array
|
||||||
@ -54,7 +55,7 @@ pub fn retrieve_asset(
|
|||||||
};
|
};
|
||||||
let dataurl = data_to_dataurl(&mimetype, &data);
|
let dataurl = data_to_dataurl(&mimetype, &data);
|
||||||
// insert in cache
|
// insert in cache
|
||||||
cache.insert(new_cache_key, dataurl.to_string());
|
cache.insert(new_cache_key, dataurl.clone());
|
||||||
Ok((dataurl, response.url().to_string()))
|
Ok((dataurl, response.url().to_string()))
|
||||||
} else {
|
} else {
|
||||||
let content = response.text().unwrap();
|
let content = response.text().unwrap();
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
const JS_DOM_EVENT_ATTRS: [&str; 21] = [
|
const JS_DOM_EVENT_ATTRS: &[&str] = &[
|
||||||
// Input
|
// Input
|
||||||
"onfocus",
|
"onfocus",
|
||||||
"onblur",
|
"onblur",
|
||||||
@ -28,5 +28,8 @@ const JS_DOM_EVENT_ATTRS: [&str; 21] = [
|
|||||||
|
|
||||||
// Returns true if DOM attribute name matches a native JavaScript event handler
|
// Returns true if DOM attribute name matches a native JavaScript event handler
|
||||||
pub fn attr_is_event_handler(attr_name: &str) -> bool {
|
pub fn attr_is_event_handler(attr_name: &str) -> bool {
|
||||||
JS_DOM_EVENT_ATTRS.contains(&attr_name.to_lowercase().as_str())
|
JS_DOM_EVENT_ATTRS
|
||||||
|
.iter()
|
||||||
|
.find(|a| attr_name.eq_ignore_ascii_case(a))
|
||||||
|
.is_some()
|
||||||
}
|
}
|
||||||
|
@ -33,7 +33,8 @@ fn test_get_parent_node_name() {
|
|||||||
}
|
}
|
||||||
NodeData::Element { ref name, .. } => {
|
NodeData::Element { ref name, .. } => {
|
||||||
let node_name = name.local.as_ref().to_string();
|
let node_name = name.local.as_ref().to_string();
|
||||||
let parent_node_name = get_node_name(&get_parent_node(node));
|
let parent = get_parent_node(node);
|
||||||
|
let parent_node_name = get_node_name(&parent);
|
||||||
if node_name == "head" || node_name == "body" {
|
if node_name == "head" || node_name == "body" {
|
||||||
assert_eq!(parent_node_name, "html");
|
assert_eq!(parent_node_name, "html");
|
||||||
} else if node_name == "div" {
|
} else if node_name == "div" {
|
||||||
|
Loading…
Reference in New Issue
Block a user