From 660511b8a07c28a0df142ed69bfc29a90ccb828d Mon Sep 17 00:00:00 2001 From: rhysd Date: Thu, 9 Jan 2020 15:34:30 +0900 Subject: [PATCH 1/6] define link type of element as enum and prefer match statement since match statement checks exhaustiveness --- src/html.rs | 146 ++++++++++++++++++++++++++++------------------------ 1 file changed, 79 insertions(+), 67 deletions(-) diff --git a/src/html.rs b/src/html.rs index f867da9..e3c29ab 100644 --- a/src/html.rs +++ b/src/html.rs @@ -82,8 +82,6 @@ pub fn walk_and_embed_assets( match name.local.as_ref() { "link" => { - let mut link_type: &str = ""; - // Remove integrity attributes let mut i = 0; while i < attrs_mut.len() { @@ -95,88 +93,102 @@ pub fn walk_and_embed_assets( } } + enum LinkType { + Icon, + Stylesheet, + Preload, + DnsPrefetch, + Unknown, + } + + let mut link_type = LinkType::Unknown; for attr in attrs_mut.iter_mut() { if &attr.name.local == "rel" { if is_icon(attr.value.as_ref()) { - link_type = "icon"; + link_type = LinkType::Icon; break; } else if attr.value.as_ref() == "stylesheet" { - link_type = "stylesheet"; + link_type = LinkType::Stylesheet; break; } } } + let link_type = link_type; - if link_type == "icon" { - for attr in attrs_mut.iter_mut() { - if &attr.name.local == "href" { - if opt_no_images { - attr.value.clear(); - } else { - let href_full_url = - resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); - let (favicon_dataurl, _) = retrieve_asset( - cache, - client, - &href_full_url, - true, - "", - opt_silent, - ) - .unwrap_or_default(); - attr.value.clear(); - attr.value.push_slice(favicon_dataurl.as_str()); - } - } - } - } else if link_type == "stylesheet" { - for attr in attrs_mut.iter_mut() { - if &attr.name.local == "href" { - if opt_no_css { - attr.value.clear(); - } else { - let href_full_url = - resolve_url(&url, &attr.value.as_ref()).unwrap_or_default(); - let replacement_text = match retrieve_asset( - cache, - client, - &href_full_url, - false, - "text/css", - opt_silent, - ) { - // On successful retrieval, traverse CSS - Ok((css_data, _)) => resolve_css_imports( + match link_type { + LinkType::Icon => { + for attr in attrs_mut.iter_mut() { + if &attr.name.local == "href" { + if opt_no_images { + attr.value.clear(); + } else { + let href_full_url = resolve_url(&url, attr.value.as_ref()) + .unwrap_or_default(); + let (favicon_dataurl, _) = retrieve_asset( cache, client, - &css_data, - true, &href_full_url, - opt_no_images, + true, + "", opt_silent, - ), - - // If a network error occured, warn - Err(e) => { - eprintln!("Warning: {}", e); - - // If failed to resolve, replace with absolute URL - href_full_url - } - }; - - attr.value.clear(); - attr.value.push_slice(&replacement_text); + ) + .unwrap_or_default(); + attr.value.clear(); + attr.value.push_slice(favicon_dataurl.as_str()); + } } } } - } else { - for attr in attrs_mut.iter_mut() { - if &attr.name.local == "href" { - let href_full_url = - resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); - attr.value.clear(); - attr.value.push_slice(&href_full_url.as_str()); + LinkType::Stylesheet => { + for attr in attrs_mut.iter_mut() { + if &attr.name.local == "href" { + if opt_no_css { + attr.value.clear(); + } else { + let href_full_url = resolve_url(&url, &attr.value.as_ref()) + .unwrap_or_default(); + let replacement_text = match retrieve_asset( + cache, + client, + &href_full_url, + false, + "text/css", + opt_silent, + ) { + // On successful retrieval, traverse CSS + Ok((css_data, _)) => resolve_css_imports( + cache, + client, + &css_data, + true, + &href_full_url, + opt_no_images, + opt_silent, + ), + + // If a network error occured, warn + Err(e) => { + eprintln!("Warning: {}", e); + + // If failed to resolve, replace with absolute URL + href_full_url + } + }; + + attr.value.clear(); + attr.value.push_slice(&replacement_text); + } + } + } + } + LinkType::Unknown => { + for attr in attrs_mut.iter_mut() { + if &attr.name.local == "href" { + let href_full_url = + resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); + attr.value.clear(); + attr.value.push_slice(&href_full_url.as_str()); + } } } } From 8d7052b39ca21f853ed845980cd964c6ac0e3486 Mon Sep 17 00:00:00 2001 From: rhysd Date: Thu, 9 Jan 2020 18:18:21 +0900 Subject: [PATCH 2/6] ignore preload and prefetch sources since all resources are embedded as data URL. --- src/html.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/html.rs b/src/html.rs index e3c29ab..cb25181 100644 --- a/src/html.rs +++ b/src/html.rs @@ -104,12 +104,19 @@ pub fn walk_and_embed_assets( let mut link_type = LinkType::Unknown; for attr in attrs_mut.iter_mut() { if &attr.name.local == "rel" { - if is_icon(attr.value.as_ref()) { + let value = attr.value.as_ref(); + if is_icon(value) { link_type = LinkType::Icon; break; - } else if attr.value.as_ref() == "stylesheet" { + } else if value == "stylesheet" { link_type = LinkType::Stylesheet; break; + } else if value == "preload" { + link_type = LinkType::Preload; + break; + } else if value == "dns-prefetch" { + link_type = LinkType::DnsPrefetch; + break; } } } @@ -181,6 +188,14 @@ pub fn walk_and_embed_assets( } } } + LinkType::Preload | LinkType::DnsPrefetch => { + // Since all resources are embedded as data URL, preloading and prefetching are unnecessary. + if let Some(attr) = + attrs_mut.iter_mut().find(|a| &a.name.local == "href") + { + attr.value.clear(); + } + } LinkType::Unknown => { for attr in attrs_mut.iter_mut() { if &attr.name.local == "href" { From 6f158dc6db8ced589e07673a459f1fa7f9a9ad0b Mon Sep 17 00:00:00 2001 From: rhysd Date: Fri, 10 Jan 2020 13:52:31 +0900 Subject: [PATCH 3/6] compare value of 'rel' properties in case-insensitive --- src/html.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/html.rs b/src/html.rs index cb25181..3ac5fef 100644 --- a/src/html.rs +++ b/src/html.rs @@ -108,13 +108,13 @@ pub fn walk_and_embed_assets( if is_icon(value) { link_type = LinkType::Icon; break; - } else if value == "stylesheet" { + } else if value.eq_ignore_ascii_case("stylesheet") { link_type = LinkType::Stylesheet; break; - } else if value == "preload" { + } else if value.eq_ignore_ascii_case("preload") { link_type = LinkType::Preload; break; - } else if value == "dns-prefetch" { + } else if value.eq_ignore_ascii_case("dns-prefetch") { link_type = LinkType::DnsPrefetch; break; } From b51f41fe347b32301e31445392c855df7c43ca62 Mon Sep 17 00:00:00 2001 From: rhysd Date: Fri, 10 Jan 2020 14:27:15 +0900 Subject: [PATCH 4/6] trim attribute values --- src/html.rs | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/html.rs b/src/html.rs index 3ac5fef..30e3355 100644 --- a/src/html.rs +++ b/src/html.rs @@ -104,7 +104,7 @@ pub fn walk_and_embed_assets( let mut link_type = LinkType::Unknown; for attr in attrs_mut.iter_mut() { if &attr.name.local == "rel" { - let value = attr.value.as_ref(); + let value = attr.value.trim(); if is_icon(value) { link_type = LinkType::Icon; break; @@ -233,7 +233,7 @@ pub fn walk_and_embed_assets( } else if let Some((dataurl, _)) = (&found_datasrc) .into_iter() .chain(&found_src) // Give dataurl priority - .map(|attr| &attr.value) + .map(|attr| attr.value.trim()) .filter(|src| !src.is_empty()) // Ignore empty srcs .next() .and_then(|src| resolve_url(&url, src).ok()) // Make absolute @@ -259,7 +259,7 @@ pub fn walk_and_embed_assets( let attr_name: &str = &attr.name.local; if attr_name == "src" { - let src_full_url = resolve_url(&url, attr.value.as_ref()) + let src_full_url = resolve_url(&url, attr.value.trim()) .unwrap_or_else(|_| attr.value.to_string()); attr.value.clear(); attr.value.push_slice(src_full_url.as_str()); @@ -270,7 +270,7 @@ pub fn walk_and_embed_assets( attr.value.push_slice(TRANSPARENT_PIXEL); } else { let srcset_full_url = - resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); + resolve_url(&url, attr.value.trim()).unwrap_or_default(); let (source_dataurl, _) = retrieve_asset( cache, client, @@ -290,13 +290,13 @@ pub fn walk_and_embed_assets( "a" => { for attr in attrs_mut.iter_mut() { if &attr.name.local == "href" { + let attr_value = attr.value.trim(); // Don't touch email links or hrefs which begin with a hash sign - if attr.value.starts_with('#') || url_has_protocol(&attr.value) { + if attr_value.starts_with('#') || url_has_protocol(attr_value) { continue; } - let href_full_url = - resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); + let href_full_url = resolve_url(&url, attr_value).unwrap_or_default(); attr.value.clear(); attr.value.push_slice(href_full_url.as_str()); } @@ -326,7 +326,7 @@ pub fn walk_and_embed_assets( for attr in attrs_mut.iter_mut() { if &attr.name.local == "src" { let src_full_url = - resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); + resolve_url(&url, attr.value.trim()).unwrap_or_default(); let (js_dataurl, _) = retrieve_asset( cache, client, @@ -368,10 +368,11 @@ pub fn walk_and_embed_assets( "form" => { for attr in attrs_mut.iter_mut() { if &attr.name.local == "action" { + let attr_value = attr.value.trim(); // Modify action to be a full URL - if !is_valid_url(&attr.value) { + if !is_valid_url(attr_value) { let href_full_url = - resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); + resolve_url(&url, attr_value).unwrap_or_default(); attr.value.clear(); attr.value.push_slice(href_full_url.as_str()); } @@ -387,7 +388,7 @@ pub fn walk_and_embed_assets( continue; } - let iframe_src = attr.value.as_ref(); + let iframe_src = attr.value.trim(); // Ignore iframes with empty source (they cause infinite loops) if iframe_src.is_empty() { @@ -427,7 +428,7 @@ pub fn walk_and_embed_assets( "video" => { for attr in attrs_mut.iter_mut() { if &attr.name.local == "poster" { - let video_poster = attr.value.as_ref(); + let video_poster = attr.value.trim(); // Skip posters with empty source if video_poster.is_empty() { From 67b79e92f9de9ae1a9ffd98d12d08aa2dd9a85f8 Mon Sep 17 00:00:00 2001 From: rhysd Date: Fri, 10 Jan 2020 14:45:02 +0900 Subject: [PATCH 5/6] simplify &x.into_iter() to x.iter() --- src/html.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/html.rs b/src/html.rs index 30e3355..62d67bc 100644 --- a/src/html.rs +++ b/src/html.rs @@ -230,8 +230,8 @@ pub fn walk_and_embed_assets( name: QualName::new(None, ns!(), local_name!("src")), value: Tendril::from_slice(TRANSPARENT_PIXEL), }); - } else if let Some((dataurl, _)) = (&found_datasrc) - .into_iter() + } else if let Some((dataurl, _)) = found_datasrc + .iter() .chain(&found_src) // Give dataurl priority .map(|attr| attr.value.trim()) .filter(|src| !src.is_empty()) // Ignore empty srcs From 69d99b69e8900ff281cb85c585349d7039168a0e Mon Sep 17 00:00:00 2001 From: rhysd Date: Mon, 13 Jan 2020 23:47:07 +0900 Subject: [PATCH 6/6] remove . in line comment --- src/html.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/html.rs b/src/html.rs index 62d67bc..4d34e99 100644 --- a/src/html.rs +++ b/src/html.rs @@ -189,7 +189,7 @@ pub fn walk_and_embed_assets( } } LinkType::Preload | LinkType::DnsPrefetch => { - // Since all resources are embedded as data URL, preloading and prefetching are unnecessary. + // Since all resources are embedded as data URL, preloading and prefetching are unnecessary if let Some(attr) = attrs_mut.iter_mut().find(|a| &a.name.local == "href") {