From 63e19998d07ce669ea92e9954b00c907f8bf90aa Mon Sep 17 00:00:00 2001 From: rhysd Date: Sat, 4 Jan 2020 07:49:26 +0900 Subject: [PATCH 1/6] reduce clones and fix some code styles and redundant code --- src/html.rs | 6 ++---- src/http.rs | 9 +++++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/html.rs b/src/html.rs index c494cf9..b57ecaa 100644 --- a/src/html.rs +++ b/src/html.rs @@ -155,7 +155,7 @@ pub fn walk_and_embed_assets( // If a network error occured, warn Err(e) => { - eprintln!("Warning: {}", e,); + eprintln!("Warning: {}", e); // If failed to resolve, replace with absolute URL href_full_url @@ -515,9 +515,7 @@ fn get_child_node_by_name(handle: &Handle, node_name: &str) -> Handle { }); match matching_children { Some(node) => node.clone(), - _ => { - return handle.clone(); - } + _ => handle.clone(), } } diff --git a/src/http.rs b/src/http.rs index cd245cf..fdeb405 100644 --- a/src/http.rs +++ b/src/http.rs @@ -28,14 +28,15 @@ pub fn retrieve_asset( let mut response = client.get(url).send()?; if !opt_silent { - if url == response.url().as_str() { + let res_url = response.url().as_str(); + if url == res_url { eprintln!("{}", &url); } else { - eprintln!("{} -> {}", &url, &response.url().as_str()); + eprintln!("{} -> {}", &url, res_url); } } - let new_cache_key = clean_url(response.url().to_string()); + let new_cache_key = clean_url(response.url()); if as_dataurl { // Convert response into a byte array @@ -54,7 +55,7 @@ pub fn retrieve_asset( }; let dataurl = data_to_dataurl(&mimetype, &data); // insert in cache - cache.insert(new_cache_key, dataurl.to_string()); + cache.insert(new_cache_key, dataurl.clone()); Ok((dataurl, response.url().to_string())) } else { let content = response.text().unwrap(); From ce03e0e487496dde585e627f4c611a54e8689625 Mon Sep 17 00:00:00 2001 From: rhysd Date: Sat, 4 Jan 2020 07:49:39 +0900 Subject: [PATCH 2/6] reduce allocation on checking DOM attributes and do not hard-code number of elements of array constant `to_lower` allocates new string but the allocation is not necessary here. --- src/html.rs | 7 +++++-- src/js.rs | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/html.rs b/src/html.rs index b57ecaa..2212b6e 100644 --- a/src/html.rs +++ b/src/html.rs @@ -12,7 +12,7 @@ use std::collections::HashMap; use std::default::Default; use utils::{data_to_dataurl, is_valid_url, resolve_css_imports, resolve_url, url_has_protocol}; -const ICON_VALUES: [&str; 5] = [ +const ICON_VALUES: &[&str] = &[ "icon", "shortcut icon", "mask-icon", @@ -37,7 +37,10 @@ pub fn get_node_name(node: &Handle) -> String { } pub fn is_icon(attr_value: &str) -> bool { - ICON_VALUES.contains(&&*attr_value.to_lowercase()) + ICON_VALUES + .iter() + .find(|a| attr_value.eq_ignore_ascii_case(a)) + .is_some() } pub fn walk_and_embed_assets( diff --git a/src/js.rs b/src/js.rs index 32f946c..18ae0de 100644 --- a/src/js.rs +++ b/src/js.rs @@ -1,4 +1,4 @@ -const JS_DOM_EVENT_ATTRS: [&str; 21] = [ +const JS_DOM_EVENT_ATTRS: &[&str] = &[ // Input "onfocus", "onblur", @@ -28,5 +28,8 @@ const JS_DOM_EVENT_ATTRS: [&str; 21] = [ // Returns true if DOM attribute name matches a native JavaScript event handler pub fn attr_is_event_handler(attr_name: &str) -> bool { - JS_DOM_EVENT_ATTRS.contains(&attr_name.to_lowercase().as_str()) + JS_DOM_EVENT_ATTRS + .iter() + .find(|a| attr_name.eq_ignore_ascii_case(a)) + .is_some() } From 84c13f060533fa54d1c06bec9293bab275f3ccb3 Mon Sep 17 00:00:00 2001 From: rhysd Date: Sat, 4 Jan 2020 07:58:29 +0900 Subject: [PATCH 3/6] prefer unwrap_or_default to unwrap_or --- src/args.rs | 2 +- src/html.rs | 30 ++++++++++++++---------------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/args.rs b/src/args.rs index 1a67bcd..9448d5f 100644 --- a/src/args.rs +++ b/src/args.rs @@ -58,7 +58,7 @@ impl AppArgs { app_args.output = app.value_of("output").unwrap_or("").to_string(); app_args.user_agent = app .value_of("user-agent") - .unwrap_or_else(|| DEFAULT_USER_AGENT) + .unwrap_or(DEFAULT_USER_AGENT) .to_string(); app_args } diff --git a/src/html.rs b/src/html.rs index 2212b6e..27dcfbc 100644 --- a/src/html.rs +++ b/src/html.rs @@ -111,9 +111,8 @@ pub fn walk_and_embed_assets( if opt_no_images { attr.value.clear(); } else { - let href_full_url: String = - resolve_url(&url, &attr.value.to_string()) - .unwrap_or(str!()); + let href_full_url = resolve_url(&url, &attr.value.to_string()) + .unwrap_or_default(); let (favicon_dataurl, _) = retrieve_asset( cache, client, @@ -122,7 +121,7 @@ pub fn walk_and_embed_assets( "", opt_silent, ) - .unwrap_or((str!(), str!())); + .unwrap_or_default(); attr.value.clear(); attr.value.push_slice(favicon_dataurl.as_str()); } @@ -134,9 +133,8 @@ pub fn walk_and_embed_assets( if opt_no_css { attr.value.clear(); } else { - let href_full_url: String = - resolve_url(&url, &attr.value.to_string()) - .unwrap_or(str!()); + let href_full_url = resolve_url(&url, &attr.value.to_string()) + .unwrap_or_default(); let replacement_text = match retrieve_asset( cache, client, @@ -173,8 +171,8 @@ pub fn walk_and_embed_assets( } else { for attr in attrs_mut.iter_mut() { if &attr.name.local == "href" { - let href_full_url: String = - resolve_url(&url, &attr.value.to_string()).unwrap_or(str!()); + let href_full_url = + resolve_url(&url, &attr.value.to_string()).unwrap_or_default(); attr.value.clear(); attr.value.push_slice(&href_full_url.as_str()); } @@ -244,7 +242,7 @@ pub fn walk_and_embed_assets( } else { let srcset_full_url: String = resolve_url(&url, &attr.value.to_string()) - .unwrap_or(str!()); + .unwrap_or_default(); let (source_dataurl, _) = retrieve_asset( cache, client, @@ -270,7 +268,7 @@ pub fn walk_and_embed_assets( } let href_full_url: String = - resolve_url(&url, &attr.value.to_string()).unwrap_or(str!()); + resolve_url(&url, &attr.value.to_string()).unwrap_or_default(); attr.value.clear(); attr.value.push_slice(href_full_url.as_str()); } @@ -300,7 +298,7 @@ pub fn walk_and_embed_assets( for attr in attrs_mut.iter_mut() { if &attr.name.local == "src" { let src_full_url: String = - resolve_url(&url, &attr.value.to_string()).unwrap_or(str!()); + resolve_url(&url, &attr.value.to_string()).unwrap_or_default(); let (js_dataurl, _) = retrieve_asset( cache, client, @@ -345,7 +343,7 @@ pub fn walk_and_embed_assets( // Modify action to be a full URL if !is_valid_url(&attr.value) { let href_full_url: String = - resolve_url(&url, &attr.value.to_string()).unwrap_or(str!()); + resolve_url(&url, &attr.value.to_string()).unwrap_or_default(); attr.value.clear(); attr.value.push_slice(href_full_url.as_str()); } @@ -369,7 +367,7 @@ pub fn walk_and_embed_assets( } let src_full_url: String = - resolve_url(&url, &iframe_src).unwrap_or(str!()); + resolve_url(&url, &iframe_src).unwrap_or_default(); let (iframe_data, iframe_final_url) = retrieve_asset( cache, client, @@ -412,8 +410,8 @@ pub fn walk_and_embed_assets( if opt_no_images { attr.value.clear(); } else { - let poster_full_url: String = - resolve_url(&url, &video_poster).unwrap_or(str!()); + let poster_full_url = + resolve_url(&url, &video_poster).unwrap_or_default(); let (poster_dataurl, _) = retrieve_asset( cache, client, From ddf4b8ac13eeab59b1eb9819632d5a9546980066 Mon Sep 17 00:00:00 2001 From: rhysd Date: Sat, 4 Jan 2020 08:05:02 +0900 Subject: [PATCH 4/6] prefer &str to String for reducing allocations --- src/html.rs | 53 +++++++++++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/src/html.rs b/src/html.rs index 27dcfbc..b03b656 100644 --- a/src/html.rs +++ b/src/html.rs @@ -29,10 +29,10 @@ pub fn get_parent_node(node: &Handle) -> Handle { parent.and_then(|node| node.upgrade()).unwrap() } -pub fn get_node_name(node: &Handle) -> String { +pub fn get_node_name(node: &Handle) -> &'_ str { match &node.data { - NodeData::Element { ref name, .. } => name.local.as_ref().to_string(), - _ => str!(), + NodeData::Element { ref name, .. } => name.local.as_ref(), + _ => "", } } @@ -95,10 +95,10 @@ pub fn walk_and_embed_assets( for attr in attrs_mut.iter_mut() { if &attr.name.local == "rel" { - if is_icon(&attr.value.to_string()) { + if is_icon(attr.value.as_ref()) { link_type = "icon"; break; - } else if attr.value.to_string() == "stylesheet" { + } else if attr.value.as_ref() == "stylesheet" { link_type = "stylesheet"; break; } @@ -111,8 +111,8 @@ pub fn walk_and_embed_assets( if opt_no_images { attr.value.clear(); } else { - let href_full_url = resolve_url(&url, &attr.value.to_string()) - .unwrap_or_default(); + let href_full_url = + resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); let (favicon_dataurl, _) = retrieve_asset( cache, client, @@ -133,8 +133,8 @@ pub fn walk_and_embed_assets( if opt_no_css { attr.value.clear(); } else { - let href_full_url = resolve_url(&url, &attr.value.to_string()) - .unwrap_or_default(); + let href_full_url = + resolve_url(&url, &attr.value.as_ref()).unwrap_or_default(); let replacement_text = match retrieve_asset( cache, client, @@ -172,7 +172,7 @@ pub fn walk_and_embed_assets( for attr in attrs_mut.iter_mut() { if &attr.name.local == "href" { let href_full_url = - resolve_url(&url, &attr.value.to_string()).unwrap_or_default(); + resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); attr.value.clear(); attr.value.push_slice(&href_full_url.as_str()); } @@ -230,8 +230,8 @@ pub fn walk_and_embed_assets( let attr_name: &str = &attr.name.local; if attr_name == "src" { - let src_full_url: String = resolve_url(&url, &attr.value.to_string()) - .unwrap_or(attr.value.to_string()); + let src_full_url: String = resolve_url(&url, attr.value.as_ref()) + .unwrap_or_else(|_| attr.value.to_string()); attr.value.clear(); attr.value.push_slice(src_full_url.as_str()); } else if attr_name == "srcset" { @@ -241,8 +241,7 @@ pub fn walk_and_embed_assets( attr.value.push_slice(TRANSPARENT_PIXEL); } else { let srcset_full_url: String = - resolve_url(&url, &attr.value.to_string()) - .unwrap_or_default(); + resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); let (source_dataurl, _) = retrieve_asset( cache, client, @@ -268,7 +267,7 @@ pub fn walk_and_embed_assets( } let href_full_url: String = - resolve_url(&url, &attr.value.to_string()).unwrap_or_default(); + resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); attr.value.clear(); attr.value.push_slice(href_full_url.as_str()); } @@ -297,8 +296,8 @@ pub fn walk_and_embed_assets( } else { for attr in attrs_mut.iter_mut() { if &attr.name.local == "src" { - let src_full_url: String = - resolve_url(&url, &attr.value.to_string()).unwrap_or_default(); + let src_full_url = + resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); let (js_dataurl, _) = retrieve_asset( cache, client, @@ -342,8 +341,8 @@ pub fn walk_and_embed_assets( if &attr.name.local == "action" { // Modify action to be a full URL if !is_valid_url(&attr.value) { - let href_full_url: String = - resolve_url(&url, &attr.value.to_string()).unwrap_or_default(); + let href_full_url = + resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); attr.value.clear(); attr.value.push_slice(href_full_url.as_str()); } @@ -359,15 +358,14 @@ pub fn walk_and_embed_assets( continue; } - let iframe_src: String = attr.value.to_string(); + let iframe_src = attr.value.as_ref(); // Ignore iframes with empty source (they cause infinite loops) - if iframe_src == str!() { + if iframe_src.is_empty() { continue; } - let src_full_url: String = - resolve_url(&url, &iframe_src).unwrap_or_default(); + let src_full_url = resolve_url(&url, iframe_src).unwrap_or_default(); let (iframe_data, iframe_final_url) = retrieve_asset( cache, client, @@ -400,10 +398,10 @@ pub fn walk_and_embed_assets( "video" => { for attr in attrs_mut.iter_mut() { if &attr.name.local == "poster" { - let video_poster = attr.value.to_string(); + let video_poster = attr.value.as_ref(); // Skip posters with empty source - if video_poster == str!() { + if video_poster.is_empty() { continue; } @@ -411,7 +409,7 @@ pub fn walk_and_embed_assets( attr.value.clear(); } else { let poster_full_url = - resolve_url(&url, &video_poster).unwrap_or_default(); + resolve_url(&url, video_poster).unwrap_or_default(); let (poster_dataurl, _) = retrieve_asset( cache, client, @@ -556,7 +554,6 @@ pub fn stringify_document( if opt_no_images { content_attr += " img-src data:;"; } - content_attr = content_attr.trim().to_string(); let meta = dom.create_element( QualName::new(None, ns!(), local_name!("meta")), @@ -567,7 +564,7 @@ pub fn stringify_document( }, Attribute { name: QualName::new(None, ns!(), local_name!("content")), - value: format_tendril!("{}", content_attr), + value: format_tendril!("{}", content_attr.trim()), }, ], Default::default(), From ed879231af3e3b4876e2483b6b3fffa76fddc1d1 Mon Sep 17 00:00:00 2001 From: rhysd Date: Sat, 4 Jan 2020 08:07:19 +0900 Subject: [PATCH 5/6] fix test code was broken by refactoring --- src/tests/html.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tests/html.rs b/src/tests/html.rs index 70d4e2c..22097b6 100644 --- a/src/tests/html.rs +++ b/src/tests/html.rs @@ -33,7 +33,8 @@ fn test_get_parent_node_name() { } NodeData::Element { ref name, .. } => { let node_name = name.local.as_ref().to_string(); - let parent_node_name = get_node_name(&get_parent_node(node)); + let parent = get_parent_node(node); + let parent_node_name = get_node_name(&parent); if node_name == "head" || node_name == "body" { assert_eq!(parent_node_name, "html"); } else if node_name == "div" { From dc7ec6e7a8d1e59cdc6220773ff3e3bf0a7452f7 Mon Sep 17 00:00:00 2001 From: rhysd Date: Sat, 4 Jan 2020 16:33:11 +0900 Subject: [PATCH 6/6] remove more redundant type annotations --- src/html.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/html.rs b/src/html.rs index b03b656..5744792 100644 --- a/src/html.rs +++ b/src/html.rs @@ -230,7 +230,7 @@ pub fn walk_and_embed_assets( let attr_name: &str = &attr.name.local; if attr_name == "src" { - let src_full_url: String = resolve_url(&url, attr.value.as_ref()) + let src_full_url = resolve_url(&url, attr.value.as_ref()) .unwrap_or_else(|_| attr.value.to_string()); attr.value.clear(); attr.value.push_slice(src_full_url.as_str()); @@ -240,7 +240,7 @@ pub fn walk_and_embed_assets( attr.value.clear(); attr.value.push_slice(TRANSPARENT_PIXEL); } else { - let srcset_full_url: String = + let srcset_full_url = resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); let (source_dataurl, _) = retrieve_asset( cache, @@ -266,7 +266,7 @@ pub fn walk_and_embed_assets( continue; } - let href_full_url: String = + let href_full_url = resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); attr.value.clear(); attr.value.push_slice(href_full_url.as_str()); @@ -530,7 +530,7 @@ pub fn stringify_document( serialize(&mut buf, handle, SerializeOpts::default()) .expect("unable to serialize DOM into buffer"); - let mut result: String = String::from_utf8(buf).unwrap(); + let mut result = String::from_utf8(buf).unwrap(); if opt_isolate || opt_no_css || opt_no_frames || opt_no_js || opt_no_images { let mut buf: Vec = Vec::new();