Merge pull request #101 from rhysd/ignore-preload
Improve handling preload links and white spaces in attribute values
This commit is contained in:
commit
c1dc798ded
1 changed files with 109 additions and 81 deletions
190
src/html.rs
190
src/html.rs
|
@ -82,8 +82,6 @@ pub fn walk_and_embed_assets(
|
||||||
|
|
||||||
match name.local.as_ref() {
|
match name.local.as_ref() {
|
||||||
"link" => {
|
"link" => {
|
||||||
let mut link_type: &str = "";
|
|
||||||
|
|
||||||
// Remove integrity attributes
|
// Remove integrity attributes
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
while i < attrs_mut.len() {
|
while i < attrs_mut.len() {
|
||||||
|
@ -95,88 +93,117 @@ pub fn walk_and_embed_assets(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum LinkType {
|
||||||
|
Icon,
|
||||||
|
Stylesheet,
|
||||||
|
Preload,
|
||||||
|
DnsPrefetch,
|
||||||
|
Unknown,
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut link_type = LinkType::Unknown;
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "rel" {
|
if &attr.name.local == "rel" {
|
||||||
if is_icon(attr.value.as_ref()) {
|
let value = attr.value.trim();
|
||||||
link_type = "icon";
|
if is_icon(value) {
|
||||||
|
link_type = LinkType::Icon;
|
||||||
break;
|
break;
|
||||||
} else if attr.value.as_ref() == "stylesheet" {
|
} else if value.eq_ignore_ascii_case("stylesheet") {
|
||||||
link_type = "stylesheet";
|
link_type = LinkType::Stylesheet;
|
||||||
|
break;
|
||||||
|
} else if value.eq_ignore_ascii_case("preload") {
|
||||||
|
link_type = LinkType::Preload;
|
||||||
|
break;
|
||||||
|
} else if value.eq_ignore_ascii_case("dns-prefetch") {
|
||||||
|
link_type = LinkType::DnsPrefetch;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
let link_type = link_type;
|
||||||
|
|
||||||
if link_type == "icon" {
|
match link_type {
|
||||||
for attr in attrs_mut.iter_mut() {
|
LinkType::Icon => {
|
||||||
if &attr.name.local == "href" {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if opt_no_images {
|
if &attr.name.local == "href" {
|
||||||
attr.value.clear();
|
if opt_no_images {
|
||||||
} else {
|
attr.value.clear();
|
||||||
let href_full_url =
|
} else {
|
||||||
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
let href_full_url = resolve_url(&url, attr.value.as_ref())
|
||||||
let (favicon_dataurl, _) = retrieve_asset(
|
.unwrap_or_default();
|
||||||
cache,
|
let (favicon_dataurl, _) = retrieve_asset(
|
||||||
client,
|
|
||||||
&href_full_url,
|
|
||||||
true,
|
|
||||||
"",
|
|
||||||
opt_silent,
|
|
||||||
)
|
|
||||||
.unwrap_or_default();
|
|
||||||
attr.value.clear();
|
|
||||||
attr.value.push_slice(favicon_dataurl.as_str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if link_type == "stylesheet" {
|
|
||||||
for attr in attrs_mut.iter_mut() {
|
|
||||||
if &attr.name.local == "href" {
|
|
||||||
if opt_no_css {
|
|
||||||
attr.value.clear();
|
|
||||||
} else {
|
|
||||||
let href_full_url =
|
|
||||||
resolve_url(&url, &attr.value.as_ref()).unwrap_or_default();
|
|
||||||
let replacement_text = match retrieve_asset(
|
|
||||||
cache,
|
|
||||||
client,
|
|
||||||
&href_full_url,
|
|
||||||
false,
|
|
||||||
"text/css",
|
|
||||||
opt_silent,
|
|
||||||
) {
|
|
||||||
// On successful retrieval, traverse CSS
|
|
||||||
Ok((css_data, _)) => resolve_css_imports(
|
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
&css_data,
|
|
||||||
true,
|
|
||||||
&href_full_url,
|
&href_full_url,
|
||||||
opt_no_images,
|
true,
|
||||||
|
"",
|
||||||
opt_silent,
|
opt_silent,
|
||||||
),
|
)
|
||||||
|
.unwrap_or_default();
|
||||||
// If a network error occured, warn
|
attr.value.clear();
|
||||||
Err(e) => {
|
attr.value.push_slice(favicon_dataurl.as_str());
|
||||||
eprintln!("Warning: {}", e);
|
}
|
||||||
|
|
||||||
// If failed to resolve, replace with absolute URL
|
|
||||||
href_full_url
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
attr.value.clear();
|
|
||||||
attr.value.push_slice(&replacement_text);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
LinkType::Stylesheet => {
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "href" {
|
if &attr.name.local == "href" {
|
||||||
let href_full_url =
|
if opt_no_css {
|
||||||
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
attr.value.clear();
|
||||||
|
} else {
|
||||||
|
let href_full_url = resolve_url(&url, &attr.value.as_ref())
|
||||||
|
.unwrap_or_default();
|
||||||
|
let replacement_text = match retrieve_asset(
|
||||||
|
cache,
|
||||||
|
client,
|
||||||
|
&href_full_url,
|
||||||
|
false,
|
||||||
|
"text/css",
|
||||||
|
opt_silent,
|
||||||
|
) {
|
||||||
|
// On successful retrieval, traverse CSS
|
||||||
|
Ok((css_data, _)) => resolve_css_imports(
|
||||||
|
cache,
|
||||||
|
client,
|
||||||
|
&css_data,
|
||||||
|
true,
|
||||||
|
&href_full_url,
|
||||||
|
opt_no_images,
|
||||||
|
opt_silent,
|
||||||
|
),
|
||||||
|
|
||||||
|
// If a network error occured, warn
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Warning: {}", e);
|
||||||
|
|
||||||
|
// If failed to resolve, replace with absolute URL
|
||||||
|
href_full_url
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
attr.value.clear();
|
||||||
|
attr.value.push_slice(&replacement_text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LinkType::Preload | LinkType::DnsPrefetch => {
|
||||||
|
// Since all resources are embedded as data URL, preloading and prefetching are unnecessary
|
||||||
|
if let Some(attr) =
|
||||||
|
attrs_mut.iter_mut().find(|a| &a.name.local == "href")
|
||||||
|
{
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(&href_full_url.as_str());
|
}
|
||||||
|
}
|
||||||
|
LinkType::Unknown => {
|
||||||
|
for attr in attrs_mut.iter_mut() {
|
||||||
|
if &attr.name.local == "href" {
|
||||||
|
let href_full_url =
|
||||||
|
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
||||||
|
attr.value.clear();
|
||||||
|
attr.value.push_slice(&href_full_url.as_str());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -203,10 +230,10 @@ pub fn walk_and_embed_assets(
|
||||||
name: QualName::new(None, ns!(), local_name!("src")),
|
name: QualName::new(None, ns!(), local_name!("src")),
|
||||||
value: Tendril::from_slice(TRANSPARENT_PIXEL),
|
value: Tendril::from_slice(TRANSPARENT_PIXEL),
|
||||||
});
|
});
|
||||||
} else if let Some((dataurl, _)) = (&found_datasrc)
|
} else if let Some((dataurl, _)) = found_datasrc
|
||||||
.into_iter()
|
.iter()
|
||||||
.chain(&found_src) // Give dataurl priority
|
.chain(&found_src) // Give dataurl priority
|
||||||
.map(|attr| &attr.value)
|
.map(|attr| attr.value.trim())
|
||||||
.filter(|src| !src.is_empty()) // Ignore empty srcs
|
.filter(|src| !src.is_empty()) // Ignore empty srcs
|
||||||
.next()
|
.next()
|
||||||
.and_then(|src| resolve_url(&url, src).ok()) // Make absolute
|
.and_then(|src| resolve_url(&url, src).ok()) // Make absolute
|
||||||
|
@ -232,7 +259,7 @@ pub fn walk_and_embed_assets(
|
||||||
let attr_name: &str = &attr.name.local;
|
let attr_name: &str = &attr.name.local;
|
||||||
|
|
||||||
if attr_name == "src" {
|
if attr_name == "src" {
|
||||||
let src_full_url = resolve_url(&url, attr.value.as_ref())
|
let src_full_url = resolve_url(&url, attr.value.trim())
|
||||||
.unwrap_or_else(|_| attr.value.to_string());
|
.unwrap_or_else(|_| attr.value.to_string());
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(src_full_url.as_str());
|
attr.value.push_slice(src_full_url.as_str());
|
||||||
|
@ -243,7 +270,7 @@ pub fn walk_and_embed_assets(
|
||||||
attr.value.push_slice(TRANSPARENT_PIXEL);
|
attr.value.push_slice(TRANSPARENT_PIXEL);
|
||||||
} else {
|
} else {
|
||||||
let srcset_full_url =
|
let srcset_full_url =
|
||||||
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
resolve_url(&url, attr.value.trim()).unwrap_or_default();
|
||||||
let (source_dataurl, _) = retrieve_asset(
|
let (source_dataurl, _) = retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
|
@ -263,13 +290,13 @@ pub fn walk_and_embed_assets(
|
||||||
"a" => {
|
"a" => {
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "href" {
|
if &attr.name.local == "href" {
|
||||||
|
let attr_value = attr.value.trim();
|
||||||
// Don't touch email links or hrefs which begin with a hash sign
|
// Don't touch email links or hrefs which begin with a hash sign
|
||||||
if attr.value.starts_with('#') || url_has_protocol(&attr.value) {
|
if attr_value.starts_with('#') || url_has_protocol(attr_value) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let href_full_url =
|
let href_full_url = resolve_url(&url, attr_value).unwrap_or_default();
|
||||||
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(href_full_url.as_str());
|
attr.value.push_slice(href_full_url.as_str());
|
||||||
}
|
}
|
||||||
|
@ -299,7 +326,7 @@ pub fn walk_and_embed_assets(
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "src" {
|
if &attr.name.local == "src" {
|
||||||
let src_full_url =
|
let src_full_url =
|
||||||
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
resolve_url(&url, attr.value.trim()).unwrap_or_default();
|
||||||
let (js_dataurl, _) = retrieve_asset(
|
let (js_dataurl, _) = retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
|
@ -341,10 +368,11 @@ pub fn walk_and_embed_assets(
|
||||||
"form" => {
|
"form" => {
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "action" {
|
if &attr.name.local == "action" {
|
||||||
|
let attr_value = attr.value.trim();
|
||||||
// Modify action to be a full URL
|
// Modify action to be a full URL
|
||||||
if !is_valid_url(&attr.value) {
|
if !is_valid_url(attr_value) {
|
||||||
let href_full_url =
|
let href_full_url =
|
||||||
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
|
resolve_url(&url, attr_value).unwrap_or_default();
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(href_full_url.as_str());
|
attr.value.push_slice(href_full_url.as_str());
|
||||||
}
|
}
|
||||||
|
@ -360,7 +388,7 @@ pub fn walk_and_embed_assets(
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let iframe_src = attr.value.as_ref();
|
let iframe_src = attr.value.trim();
|
||||||
|
|
||||||
// Ignore iframes with empty source (they cause infinite loops)
|
// Ignore iframes with empty source (they cause infinite loops)
|
||||||
if iframe_src.is_empty() {
|
if iframe_src.is_empty() {
|
||||||
|
@ -400,7 +428,7 @@ pub fn walk_and_embed_assets(
|
||||||
"video" => {
|
"video" => {
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "poster" {
|
if &attr.name.local == "poster" {
|
||||||
let video_poster = attr.value.as_ref();
|
let video_poster = attr.value.trim();
|
||||||
|
|
||||||
// Skip posters with empty source
|
// Skip posters with empty source
|
||||||
if video_poster.is_empty() {
|
if video_poster.is_empty() {
|
||||||
|
|
Loading…
Reference in a new issue