From bc98aca2a2032f1f4ea400cbe959f104e5370b89 Mon Sep 17 00:00:00 2001 From: Sunshine Date: Sun, 28 Jun 2020 16:11:15 -0400 Subject: [PATCH] indent items in retrieval log to form a tree-like structure --- src/css.rs | 42 ++++++++++++-- src/html.rs | 76 ++++++++++++++++++++----- src/main.rs | 11 +++- src/tests/cli.rs | 12 ++-- src/tests/css/embed_css.rs | 15 +++-- src/tests/html/embed_srcset.rs | 2 +- src/tests/html/walk_and_embed_assets.rs | 22 +++---- src/tests/utils/retrieve_asset.rs | 4 ++ src/utils.rs | 21 +++++-- 9 files changed, 159 insertions(+), 46 deletions(-) diff --git a/src/css.rs b/src/css.rs index 4572e36..2a646fe 100644 --- a/src/css.rs +++ b/src/css.rs @@ -61,6 +61,7 @@ pub fn process_css<'a>( parent_url: &str, parser: &mut Parser, options: &Options, + depth: u32, rule_name: &str, prop_name: &str, func_name: &str, @@ -114,6 +115,7 @@ pub fn process_css<'a>( parent_url, parser, options, + depth, rule_name, curr_prop.as_str(), func_name, @@ -175,6 +177,7 @@ pub fn process_css<'a>( &parent_url, &import_full_url, options.silent, + depth + 1, ) { Ok((import_contents, import_final_url, _import_media_type)) => { let import_data_url = data_to_data_url( @@ -185,6 +188,7 @@ pub fn process_css<'a>( &import_final_url, &String::from_utf8_lossy(&import_contents), options, + depth + 1, ) .as_bytes(), &import_final_url, @@ -224,6 +228,7 @@ pub fn process_css<'a>( &parent_url, &resolved_url, options.silent, + depth + 1, ) { Ok((data, final_url, media_type)) => { let data_url = data_to_data_url(&media_type, &data, &final_url); @@ -310,7 +315,14 @@ pub fn process_css<'a>( if is_import { let full_url = resolve_url(&parent_url, value).unwrap_or_default(); let url_fragment = get_url_fragment(full_url.clone()); - match retrieve_asset(cache, client, &parent_url, &full_url, options.silent) { + match retrieve_asset( + cache, + client, + &parent_url, + &full_url, + options.silent, + depth + 1, + ) { Ok((css, final_url, _media_type)) => { let data_url = data_to_data_url( "text/css", @@ -320,6 +332,7 @@ pub fn process_css<'a>( &final_url, &String::from_utf8_lossy(&css), options, + depth + 1, ) .as_bytes(), &final_url, @@ -338,13 +351,19 @@ pub fn process_css<'a>( } } } else { - if options.no_images && is_image_url_prop(curr_prop.as_str()) { + if is_image_url_prop(curr_prop.as_str()) && options.no_images { result.push_str(enquote(str!(empty_image!()), false).as_str()); } else { let full_url = resolve_url(&parent_url, value).unwrap_or_default(); let url_fragment = get_url_fragment(full_url.clone()); - match retrieve_asset(cache, client, &parent_url, &full_url, options.silent) - { + match retrieve_asset( + cache, + client, + &parent_url, + &full_url, + options.silent, + depth + 1, + ) { Ok((data, final_url, media_type)) => { let data_url = data_to_data_url(&media_type, &data, &final_url); let assembled_url: String = @@ -378,6 +397,7 @@ pub fn process_css<'a>( parent_url, parser, options, + depth, curr_rule.as_str(), curr_prop.as_str(), function_name, @@ -406,9 +426,21 @@ pub fn embed_css( parent_url: &str, css: &str, options: &Options, + depth: u32, ) -> String { let mut input = ParserInput::new(&css); let mut parser = Parser::new(&mut input); - process_css(cache, client, parent_url, &mut parser, options, "", "", "").unwrap() + process_css( + cache, + client, + parent_url, + &mut parser, + options, + depth, + "", + "", + "", + ) + .unwrap() } diff --git a/src/html.rs b/src/html.rs index 034bc97..50285ae 100644 --- a/src/html.rs +++ b/src/html.rs @@ -75,6 +75,7 @@ pub fn embed_srcset( parent_url: &str, srcset: &str, options: &Options, + depth: u32, ) -> String { let mut array: Vec = vec![]; let srcset_items: Vec<&str> = srcset.split(',').collect(); @@ -94,7 +95,14 @@ pub fn embed_srcset( } else { let image_full_url = resolve_url(&parent_url, part.path).unwrap_or_default(); let image_url_fragment = get_url_fragment(image_full_url.clone()); - match retrieve_asset(cache, client, &parent_url, &image_full_url, options.silent) { + match retrieve_asset( + cache, + client, + &parent_url, + &image_full_url, + options.silent, + depth + 1, + ) { Ok((image_data, image_final_url, image_media_type)) => { let image_data_url = data_to_data_url(&image_media_type, &image_data, &image_final_url); @@ -138,12 +146,13 @@ pub fn walk_and_embed_assets( url: &str, node: &Handle, options: &Options, + depth: u32, ) { match node.data { NodeData::Document => { // Dig deeper for child in node.children.borrow().iter() { - walk_and_embed_assets(cache, client, &url, child, options); + walk_and_embed_assets(cache, client, &url, child, options, depth); } } NodeData::Element { @@ -240,6 +249,7 @@ pub fn walk_and_embed_assets( &url, &link_href_full_url, options.silent, + depth + 1, ) { Ok(( link_href_data, @@ -312,6 +322,7 @@ pub fn walk_and_embed_assets( &url, &link_href_full_url, options.silent, + depth + 1, ) { Ok(( link_href_data, @@ -328,6 +339,7 @@ pub fn walk_and_embed_assets( &link_href_final_url, &String::from_utf8_lossy(&link_href_data), options, + depth + 1, ); let link_href_data_url = data_to_data_url( "text/css", @@ -406,6 +418,7 @@ pub fn walk_and_embed_assets( &url, &background_full_url, options.silent, + depth + 1, ) { Ok((background_data, background_final_url, background_media_type)) => { let background_data_url = data_to_data_url( @@ -483,8 +496,14 @@ pub fn walk_and_embed_assets( ) .unwrap_or_default(); let img_url_fragment = get_url_fragment(img_full_url.clone()); - match retrieve_asset(cache, client, &url, &img_full_url, options.silent) - { + match retrieve_asset( + cache, + client, + &url, + &img_full_url, + options.silent, + depth + 1, + ) { Ok((img_data, img_final_url, img_media_type)) => { let img_data_url = data_to_data_url( &img_media_type, @@ -521,7 +540,8 @@ pub fn walk_and_embed_assets( attrs_mut.push(Attribute { name: QualName::new(None, ns!(), local_name!("srcset")), value: Tendril::from_slice( - embed_srcset(cache, client, &url, &img_srcset, options).as_ref(), + embed_srcset(cache, client, &url, &img_srcset, options, depth) + .as_ref(), ), }); } @@ -573,6 +593,7 @@ pub fn walk_and_embed_assets( &url, &input_image_full_url, options.silent, + depth + 1, ) { Ok(( input_image_data, @@ -629,7 +650,14 @@ pub fn walk_and_embed_assets( if !options.no_images && !image_href.is_empty() { let image_full_url = resolve_url(&url, image_href).unwrap_or_default(); let image_url_fragment = get_url_fragment(image_full_url.clone()); - match retrieve_asset(cache, client, &url, &image_full_url, options.silent) { + match retrieve_asset( + cache, + client, + &url, + &image_full_url, + options.silent, + depth + 1, + ) { Ok((image_data, image_final_url, image_media_type)) => { let image_data_url = data_to_data_url( &image_media_type, @@ -687,6 +715,7 @@ pub fn walk_and_embed_assets( &url, &srcset_full_url, options.silent, + depth + 1, ) { Ok((srcset_data, srcset_final_url, srcset_media_type)) => { let srcset_data_url = data_to_data_url( @@ -763,8 +792,14 @@ pub fn walk_and_embed_assets( node.children.borrow_mut().clear(); } else if !script_src.is_empty() { let script_full_url = resolve_url(&url, script_src).unwrap_or_default(); - match retrieve_asset(cache, client, &url, &script_full_url, options.silent) - { + match retrieve_asset( + cache, + client, + &url, + &script_full_url, + options.silent, + depth + 1, + ) { Ok((script_data, script_final_url, _script_media_type)) => { // Only embed if we're able to validate integrity if script_integrity.is_empty() @@ -802,8 +837,14 @@ pub fn walk_and_embed_assets( for node in node.children.borrow_mut().iter_mut() { if let NodeData::Text { ref contents } = node.data { let mut tendril = contents.borrow_mut(); - let replacement = - embed_css(cache, client, &url, tendril.as_ref(), options); + let replacement = embed_css( + cache, + client, + &url, + tendril.as_ref(), + options, + depth, + ); tendril.clear(); tendril.push_slice(&replacement); } @@ -850,6 +891,7 @@ pub fn walk_and_embed_assets( &url, &frame_full_url, options.silent, + depth + 1, ) { Ok((frame_data, frame_final_url, frame_media_type)) => { let frame_dom = @@ -860,6 +902,7 @@ pub fn walk_and_embed_assets( &frame_final_url, &frame_dom.document, &options, + depth + 1, ); let mut frame_data: Vec = Vec::new(); serialize( @@ -921,6 +964,7 @@ pub fn walk_and_embed_assets( &url, &video_poster_full_url, options.silent, + depth + 1, ) { Ok(( video_poster_data, @@ -975,8 +1019,14 @@ pub fn walk_and_embed_assets( .iter_mut() .filter(|a| a.name.local.as_ref().eq_ignore_ascii_case("style")) { - let replacement = - embed_css(cache, client, &url, attribute.value.as_ref(), options); + let replacement = embed_css( + cache, + client, + &url, + attribute.value.as_ref(), + options, + depth, + ); // let replacement = str!(); attribute.value.clear(); attribute.value.push_slice(&replacement); @@ -999,7 +1049,7 @@ pub fn walk_and_embed_assets( // Dig deeper for child in node.children.borrow().iter() { - walk_and_embed_assets(cache, client, &url, child, options); + walk_and_embed_assets(cache, client, &url, child, options, depth); } } _ => { diff --git a/src/main.rs b/src/main.rs index 2777d0a..bbb1034 100644 --- a/src/main.rs +++ b/src/main.rs @@ -108,7 +108,14 @@ fn main() { // Retrieve target document if is_file_url(target_url) || is_http_url(target_url) { - match retrieve_asset(&mut cache, &client, target_url, target_url, options.silent) { + match retrieve_asset( + &mut cache, + &client, + target_url, + target_url, + options.silent, + 0, + ) { Ok((data, final_url, _media_type)) => { base_url = final_url; dom = html_to_dom(&String::from_utf8_lossy(&data)); @@ -131,7 +138,7 @@ fn main() { } // Embed remote assets - walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options); + walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0); // Serialize DOM tree let mut result: String = stringify_document(&dom.document, &options); diff --git a/src/tests/cli.rs b/src/tests/cli.rs index 98e7ff4..14c84c4 100644 --- a/src/tests/cli.rs +++ b/src/tests/cli.rs @@ -287,8 +287,8 @@ mod passing { std::str::from_utf8(&out.stderr).unwrap(), format!( "\ - {file}{cwd}/src/tests/data/basic/local-file.html\n\ - {file}{cwd}/src/tests/data/basic/local-style.css\n\ + {file}{cwd}/src/tests/data/basic/local-file.html\n \ + {file}{cwd}/src/tests/data/basic/local-style.css\n \ {file}{cwd}/src/tests/data/basic/local-script.js\n\ ", file = file_url_protocol, @@ -489,7 +489,7 @@ mod passing { std::str::from_utf8(&out.stderr).unwrap(), format!( "\ - {file}{html_path}\n\ + {file}{html_path}\n \ {file}{svg_path}\n\ ", file = file_url_prefix, @@ -540,9 +540,9 @@ mod passing { std::str::from_utf8(&out.stderr).unwrap(), format!( "\ - {file}{html_path}\n\ - {file}{css_path}\n\ - {file}{css_path}\n\ + {file}{html_path}\n \ + {file}{css_path}\n \ + {file}{css_path}\n \ {file}{css_path}\n\ ", file = file_url_prefix, diff --git a/src/tests/css/embed_css.rs b/src/tests/css/embed_css.rs index 9525718..3b001c7 100644 --- a/src/tests/css/embed_css.rs +++ b/src/tests/css/embed_css.rs @@ -19,7 +19,7 @@ mod passing { let client = Client::new(); let options = Options::default(); - assert_eq!(css::embed_css(cache, &client, "", "", &options), ""); + assert_eq!(css::embed_css(cache, &client, "", "", &options, 0), ""); } #[test] @@ -35,6 +35,7 @@ mod passing { "https://doesntmatter.local/", "\t \t ", &options, + 0, ), "" ); @@ -63,6 +64,7 @@ mod passing { "https://doesntmatter.local/", &STYLE, &options, + 0, ), format!( "/* border: none;*/\ @@ -94,7 +96,7 @@ mod passing { height: calc(100vh - 10pt)"; assert_eq!( - css::embed_css(cache, &client, "", &STYLE, &options), + css::embed_css(cache, &client, "", &STYLE, &options, 0), format!( "/* border: none;*/\ background-image: url('{empty_image}'); \ @@ -124,7 +126,7 @@ mod passing { html > body {}"; assert_eq!( - css::embed_css(cache, &client, "file:///", &CSS, &options), + css::embed_css(cache, &client, "file:///", &CSS, &options, 0), CSS ); } @@ -166,7 +168,7 @@ mod passing { } "; - assert_eq!(css::embed_css(cache, &client, "", &CSS, &options), CSS); + assert_eq!(css::embed_css(cache, &client, "", &CSS, &options, 0), CSS); } #[test] @@ -191,6 +193,7 @@ mod passing { "https://doesntmatter.local/", &CSS, &options, + 0, ), "\ @charset 'UTF-8';\n\ @@ -226,6 +229,7 @@ mod passing { "https://doesntmatter.local/", &CSS, &options, + 0, ), CSS ); @@ -253,6 +257,7 @@ mod passing { "https://doesntmatter.local/", &CSS, &options, + 0, ), CSS ); @@ -282,6 +287,7 @@ mod passing { "https://doesntmatter.local/", &CSS, &options, + 0, ), CSS ); @@ -336,6 +342,7 @@ mod passing { "https://doesntmatter.local/", &CSS, &options, + 0, ), CSS_OUT ); diff --git a/src/tests/html/embed_srcset.rs b/src/tests/html/embed_srcset.rs index bf340b1..5489c86 100644 --- a/src/tests/html/embed_srcset.rs +++ b/src/tests/html/embed_srcset.rs @@ -21,7 +21,7 @@ mod passing { let mut options = Options::default(); options.no_images = true; options.silent = true; - let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options); + let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0); assert_eq!( format!("{} 1x, {} 2x", empty_image!(), empty_image!()), diff --git a/src/tests/html/walk_and_embed_assets.rs b/src/tests/html/walk_and_embed_assets.rs index c03560b..894bad2 100644 --- a/src/tests/html/walk_and_embed_assets.rs +++ b/src/tests/html/walk_and_embed_assets.rs @@ -27,7 +27,7 @@ mod passing { let client = Client::new(); - html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options); + html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); let mut buf: Vec = Vec::new(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); @@ -50,7 +50,7 @@ mod passing { let client = Client::new(); - html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options); + html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); let mut buf: Vec = Vec::new(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); @@ -73,7 +73,7 @@ mod passing { let client = Client::new(); - html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options); + html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); let mut buf: Vec = Vec::new(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); @@ -99,7 +99,7 @@ mod passing { let client = Client::new(); - html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options); + html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); let mut buf: Vec = Vec::new(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); @@ -132,7 +132,7 @@ mod passing { let client = Client::new(); - html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options); + html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); let mut buf: Vec = Vec::new(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); @@ -169,7 +169,7 @@ mod passing { let client = Client::new(); - html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options); + html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); let mut buf: Vec = Vec::new(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); @@ -193,7 +193,7 @@ mod passing { let client = Client::new(); - html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options); + html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); let mut buf: Vec = Vec::new(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); @@ -217,7 +217,7 @@ mod passing { let client = Client::new(); - html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options); + html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); let mut buf: Vec = Vec::new(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); @@ -244,7 +244,7 @@ mod passing { let client = Client::new(); - html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options); + html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); let mut buf: Vec = Vec::new(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); @@ -274,7 +274,7 @@ mod passing { let client = Client::new(); - html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options); + html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); let mut buf: Vec = Vec::new(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); @@ -310,7 +310,7 @@ mod passing { let client = Client::new(); - html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options); + html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); let mut buf: Vec = Vec::new(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); diff --git a/src/tests/utils/retrieve_asset.rs b/src/tests/utils/retrieve_asset.rs index f78d85e..023f2e3 100644 --- a/src/tests/utils/retrieve_asset.rs +++ b/src/tests/utils/retrieve_asset.rs @@ -27,6 +27,7 @@ mod passing { "data:text/html;base64,c291cmNl", "data:text/html;base64,dGFyZ2V0", false, + 0, ) .unwrap(); assert_eq!( @@ -63,6 +64,7 @@ mod passing { cwd = cwd.to_str().unwrap() ), false, + 0, ) .unwrap(); assert_eq!(url::data_to_data_url("application/javascript", &data, &final_url), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg=="); @@ -103,6 +105,7 @@ mod failing { "data:text/html;base64,SoUrCe", "file:///etc/passwd", false, + 0, ) { Ok((..)) => { assert!(false); @@ -125,6 +128,7 @@ mod failing { "https://kernel.org/", "file:///etc/passwd", false, + 0, ) { Ok((..)) => { assert!(false); diff --git a/src/utils.rs b/src/utils.rs index de0ca45..9016b94 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -6,6 +6,8 @@ use std::path::Path; use crate::url::{clean_url, data_url_to_data, file_url_to_fs_path, is_data_url, is_file_url}; +const INDENT: &str = " "; + const MAGIC: [[&[u8]; 2]; 18] = [ // Image [b"GIF87a", b"image/gif"], @@ -56,12 +58,23 @@ pub fn is_plaintext_media_type(media_type: &str) -> bool { PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str()) } +pub fn indent(level: u32) -> String { + let mut result = str!(); + let mut l: u32 = level; + while l > 0 { + result += INDENT; + l -= 1; + } + result +} + pub fn retrieve_asset( cache: &mut HashMap>, client: &Client, parent_url: &str, url: &str, opt_silent: bool, + depth: u32, ) -> Result<(Vec, String, String), reqwest::Error> { if url.len() == 0 { // Provoke error @@ -83,7 +96,7 @@ pub fn retrieve_asset( let path = Path::new(&fs_file_path); if path.exists() { if !opt_silent { - eprintln!("{}", &url); + eprintln!("{}{}", indent(depth).as_str(), &url); } Ok((fs::read(&fs_file_path).expect(""), url.to_string(), str!())) @@ -97,7 +110,7 @@ pub fn retrieve_asset( if cache.contains_key(&cache_key) { // URL is in cache, we get and return it if !opt_silent { - eprintln!("{} (from cache)", &url); + eprintln!("{}{} (from cache)", indent(depth).as_str(), &url); } Ok(( @@ -112,9 +125,9 @@ pub fn retrieve_asset( if !opt_silent { if url == res_url { - eprintln!("{}", &url); + eprintln!("{}{}", indent(depth).as_str(), &url); } else { - eprintln!("{} -> {}", &url, &res_url); + eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &res_url); } }