Merge pull request #194 from snshn/indented-tree

Indented tree
This commit is contained in:
Sunshine 2020-06-28 16:37:10 -04:00 committed by GitHub
commit 4263e42cd1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 191 additions and 46 deletions

View file

@ -61,6 +61,7 @@ pub fn process_css<'a>(
parent_url: &str,
parser: &mut Parser,
options: &Options,
depth: u32,
rule_name: &str,
prop_name: &str,
func_name: &str,
@ -114,6 +115,7 @@ pub fn process_css<'a>(
parent_url,
parser,
options,
depth,
rule_name,
curr_prop.as_str(),
func_name,
@ -175,6 +177,7 @@ pub fn process_css<'a>(
&parent_url,
&import_full_url,
options.silent,
depth + 1,
) {
Ok((import_contents, import_final_url, _import_media_type)) => {
let import_data_url = data_to_data_url(
@ -185,6 +188,7 @@ pub fn process_css<'a>(
&import_final_url,
&String::from_utf8_lossy(&import_contents),
options,
depth + 1,
)
.as_bytes(),
&import_final_url,
@ -224,6 +228,7 @@ pub fn process_css<'a>(
&parent_url,
&resolved_url,
options.silent,
depth + 1,
) {
Ok((data, final_url, media_type)) => {
let data_url = data_to_data_url(&media_type, &data, &final_url);
@ -310,7 +315,14 @@ pub fn process_css<'a>(
if is_import {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
match retrieve_asset(cache, client, &parent_url, &full_url, options.silent) {
match retrieve_asset(
cache,
client,
&parent_url,
&full_url,
options.silent,
depth + 1,
) {
Ok((css, final_url, _media_type)) => {
let data_url = data_to_data_url(
"text/css",
@ -320,6 +332,7 @@ pub fn process_css<'a>(
&final_url,
&String::from_utf8_lossy(&css),
options,
depth + 1,
)
.as_bytes(),
&final_url,
@ -338,13 +351,19 @@ pub fn process_css<'a>(
}
}
} else {
if options.no_images && is_image_url_prop(curr_prop.as_str()) {
if is_image_url_prop(curr_prop.as_str()) && options.no_images {
result.push_str(enquote(str!(empty_image!()), false).as_str());
} else {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
match retrieve_asset(cache, client, &parent_url, &full_url, options.silent)
{
match retrieve_asset(
cache,
client,
&parent_url,
&full_url,
options.silent,
depth + 1,
) {
Ok((data, final_url, media_type)) => {
let data_url = data_to_data_url(&media_type, &data, &final_url);
let assembled_url: String =
@ -378,6 +397,7 @@ pub fn process_css<'a>(
parent_url,
parser,
options,
depth,
curr_rule.as_str(),
curr_prop.as_str(),
function_name,
@ -406,9 +426,21 @@ pub fn embed_css(
parent_url: &str,
css: &str,
options: &Options,
depth: u32,
) -> String {
let mut input = ParserInput::new(&css);
let mut parser = Parser::new(&mut input);
process_css(cache, client, parent_url, &mut parser, options, "", "", "").unwrap()
process_css(
cache,
client,
parent_url,
&mut parser,
options,
depth,
"",
"",
"",
)
.unwrap()
}

View file

@ -75,6 +75,7 @@ pub fn embed_srcset(
parent_url: &str,
srcset: &str,
options: &Options,
depth: u32,
) -> String {
let mut array: Vec<SrcSetItem> = vec![];
let srcset_items: Vec<&str> = srcset.split(',').collect();
@ -94,7 +95,14 @@ pub fn embed_srcset(
} else {
let image_full_url = resolve_url(&parent_url, part.path).unwrap_or_default();
let image_url_fragment = get_url_fragment(image_full_url.clone());
match retrieve_asset(cache, client, &parent_url, &image_full_url, options.silent) {
match retrieve_asset(
cache,
client,
&parent_url,
&image_full_url,
options.silent,
depth + 1,
) {
Ok((image_data, image_final_url, image_media_type)) => {
let image_data_url =
data_to_data_url(&image_media_type, &image_data, &image_final_url);
@ -138,12 +146,13 @@ pub fn walk_and_embed_assets(
url: &str,
node: &Handle,
options: &Options,
depth: u32,
) {
match node.data {
NodeData::Document => {
// Dig deeper
for child in node.children.borrow().iter() {
walk_and_embed_assets(cache, client, &url, child, options);
walk_and_embed_assets(cache, client, &url, child, options, depth);
}
}
NodeData::Element {
@ -240,6 +249,7 @@ pub fn walk_and_embed_assets(
&url,
&link_href_full_url,
options.silent,
depth + 1,
) {
Ok((
link_href_data,
@ -312,6 +322,7 @@ pub fn walk_and_embed_assets(
&url,
&link_href_full_url,
options.silent,
depth + 1,
) {
Ok((
link_href_data,
@ -328,6 +339,7 @@ pub fn walk_and_embed_assets(
&link_href_final_url,
&String::from_utf8_lossy(&link_href_data),
options,
depth + 1,
);
let link_href_data_url = data_to_data_url(
"text/css",
@ -406,6 +418,7 @@ pub fn walk_and_embed_assets(
&url,
&background_full_url,
options.silent,
depth + 1,
) {
Ok((background_data, background_final_url, background_media_type)) => {
let background_data_url = data_to_data_url(
@ -483,8 +496,14 @@ pub fn walk_and_embed_assets(
)
.unwrap_or_default();
let img_url_fragment = get_url_fragment(img_full_url.clone());
match retrieve_asset(cache, client, &url, &img_full_url, options.silent)
{
match retrieve_asset(
cache,
client,
&url,
&img_full_url,
options.silent,
depth + 1,
) {
Ok((img_data, img_final_url, img_media_type)) => {
let img_data_url = data_to_data_url(
&img_media_type,
@ -521,7 +540,8 @@ pub fn walk_and_embed_assets(
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("srcset")),
value: Tendril::from_slice(
embed_srcset(cache, client, &url, &img_srcset, options).as_ref(),
embed_srcset(cache, client, &url, &img_srcset, options, depth)
.as_ref(),
),
});
}
@ -573,6 +593,7 @@ pub fn walk_and_embed_assets(
&url,
&input_image_full_url,
options.silent,
depth + 1,
) {
Ok((
input_image_data,
@ -629,7 +650,14 @@ pub fn walk_and_embed_assets(
if !options.no_images && !image_href.is_empty() {
let image_full_url = resolve_url(&url, image_href).unwrap_or_default();
let image_url_fragment = get_url_fragment(image_full_url.clone());
match retrieve_asset(cache, client, &url, &image_full_url, options.silent) {
match retrieve_asset(
cache,
client,
&url,
&image_full_url,
options.silent,
depth + 1,
) {
Ok((image_data, image_final_url, image_media_type)) => {
let image_data_url = data_to_data_url(
&image_media_type,
@ -687,6 +715,7 @@ pub fn walk_and_embed_assets(
&url,
&srcset_full_url,
options.silent,
depth + 1,
) {
Ok((srcset_data, srcset_final_url, srcset_media_type)) => {
let srcset_data_url = data_to_data_url(
@ -763,8 +792,14 @@ pub fn walk_and_embed_assets(
node.children.borrow_mut().clear();
} else if !script_src.is_empty() {
let script_full_url = resolve_url(&url, script_src).unwrap_or_default();
match retrieve_asset(cache, client, &url, &script_full_url, options.silent)
{
match retrieve_asset(
cache,
client,
&url,
&script_full_url,
options.silent,
depth + 1,
) {
Ok((script_data, script_final_url, _script_media_type)) => {
// Only embed if we're able to validate integrity
if script_integrity.is_empty()
@ -802,8 +837,14 @@ pub fn walk_and_embed_assets(
for node in node.children.borrow_mut().iter_mut() {
if let NodeData::Text { ref contents } = node.data {
let mut tendril = contents.borrow_mut();
let replacement =
embed_css(cache, client, &url, tendril.as_ref(), options);
let replacement = embed_css(
cache,
client,
&url,
tendril.as_ref(),
options,
depth,
);
tendril.clear();
tendril.push_slice(&replacement);
}
@ -850,6 +891,7 @@ pub fn walk_and_embed_assets(
&url,
&frame_full_url,
options.silent,
depth + 1,
) {
Ok((frame_data, frame_final_url, frame_media_type)) => {
let frame_dom =
@ -860,6 +902,7 @@ pub fn walk_and_embed_assets(
&frame_final_url,
&frame_dom.document,
&options,
depth + 1,
);
let mut frame_data: Vec<u8> = Vec::new();
serialize(
@ -921,6 +964,7 @@ pub fn walk_and_embed_assets(
&url,
&video_poster_full_url,
options.silent,
depth + 1,
) {
Ok((
video_poster_data,
@ -975,8 +1019,14 @@ pub fn walk_and_embed_assets(
.iter_mut()
.filter(|a| a.name.local.as_ref().eq_ignore_ascii_case("style"))
{
let replacement =
embed_css(cache, client, &url, attribute.value.as_ref(), options);
let replacement = embed_css(
cache,
client,
&url,
attribute.value.as_ref(),
options,
depth,
);
// let replacement = str!();
attribute.value.clear();
attribute.value.push_slice(&replacement);
@ -999,7 +1049,7 @@ pub fn walk_and_embed_assets(
// Dig deeper
for child in node.children.borrow().iter() {
walk_and_embed_assets(cache, client, &url, child, options);
walk_and_embed_assets(cache, client, &url, child, options, depth);
}
}
_ => {

View file

@ -108,7 +108,14 @@ fn main() {
// Retrieve target document
if is_file_url(target_url) || is_http_url(target_url) {
match retrieve_asset(&mut cache, &client, target_url, target_url, options.silent) {
match retrieve_asset(
&mut cache,
&client,
target_url,
target_url,
options.silent,
0,
) {
Ok((data, final_url, _media_type)) => {
base_url = final_url;
dom = html_to_dom(&String::from_utf8_lossy(&data));
@ -131,7 +138,7 @@ fn main() {
}
// Embed remote assets
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options);
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
// Serialize DOM tree
let mut result: String = stringify_document(&dom.document, &options);

View file

@ -287,8 +287,8 @@ mod passing {
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{cwd}/src/tests/data/basic/local-file.html\n\
{file}{cwd}/src/tests/data/basic/local-style.css\n\
{file}{cwd}/src/tests/data/basic/local-file.html\n \
{file}{cwd}/src/tests/data/basic/local-style.css\n \
{file}{cwd}/src/tests/data/basic/local-script.js\n\
",
file = file_url_protocol,
@ -489,7 +489,7 @@ mod passing {
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n\
{file}{html_path}\n \
{file}{svg_path}\n\
",
file = file_url_prefix,
@ -540,9 +540,9 @@ mod passing {
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n\
{file}{css_path}\n\
{file}{css_path}\n\
{file}{html_path}\n \
{file}{css_path}\n \
{file}{css_path}\n \
{file}{css_path}\n\
",
file = file_url_prefix,

View file

@ -19,7 +19,7 @@ mod passing {
let client = Client::new();
let options = Options::default();
assert_eq!(css::embed_css(cache, &client, "", "", &options), "");
assert_eq!(css::embed_css(cache, &client, "", "", &options, 0), "");
}
#[test]
@ -35,6 +35,7 @@ mod passing {
"https://doesntmatter.local/",
"\t \t ",
&options,
0,
),
""
);
@ -63,6 +64,7 @@ mod passing {
"https://doesntmatter.local/",
&STYLE,
&options,
0,
),
format!(
"/* border: none;*/\
@ -94,7 +96,7 @@ mod passing {
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(cache, &client, "", &STYLE, &options),
css::embed_css(cache, &client, "", &STYLE, &options, 0),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
@ -124,7 +126,7 @@ mod passing {
html > body {}";
assert_eq!(
css::embed_css(cache, &client, "file:///", &CSS, &options),
css::embed_css(cache, &client, "file:///", &CSS, &options, 0),
CSS
);
}
@ -166,7 +168,7 @@ mod passing {
}
";
assert_eq!(css::embed_css(cache, &client, "", &CSS, &options), CSS);
assert_eq!(css::embed_css(cache, &client, "", &CSS, &options, 0), CSS);
}
#[test]
@ -191,6 +193,7 @@ mod passing {
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
"\
@charset 'UTF-8';\n\
@ -226,6 +229,7 @@ mod passing {
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
CSS
);
@ -253,6 +257,7 @@ mod passing {
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
CSS
);
@ -282,6 +287,7 @@ mod passing {
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
CSS
);
@ -336,6 +342,7 @@ mod passing {
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
CSS_OUT
);

View file

@ -21,7 +21,7 @@ mod passing {
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options);
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
assert_eq!(
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),

View file

@ -27,7 +27,7 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -50,7 +50,7 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -73,7 +73,7 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -99,7 +99,7 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -132,7 +132,7 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -169,7 +169,7 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -193,7 +193,7 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -217,7 +217,7 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -244,7 +244,7 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -274,7 +274,7 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -310,7 +310,7 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();

31
src/tests/utils/indent.rs Normal file
View file

@ -0,0 +1,31 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::utils;
#[test]
fn zero() {
assert_eq!(utils::indent(0), "");
}
#[test]
fn one() {
assert_eq!(utils::indent(1), " ");
}
#[test]
fn two() {
assert_eq!(utils::indent(2), " ");
}
#[test]
fn three() {
assert_eq!(utils::indent(3), " ");
}
}

View file

@ -1,2 +1,3 @@
mod detect_media_type;
mod indent;
mod retrieve_asset;

View file

@ -27,6 +27,7 @@ mod passing {
"data:text/html;base64,c291cmNl",
"data:text/html;base64,dGFyZ2V0",
false,
0,
)
.unwrap();
assert_eq!(
@ -63,6 +64,7 @@ mod passing {
cwd = cwd.to_str().unwrap()
),
false,
0,
)
.unwrap();
assert_eq!(url::data_to_data_url("application/javascript", &data, &final_url), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
@ -103,6 +105,7 @@ mod failing {
"data:text/html;base64,SoUrCe",
"file:///etc/passwd",
false,
0,
) {
Ok((..)) => {
assert!(false);
@ -125,6 +128,7 @@ mod failing {
"https://kernel.org/",
"file:///etc/passwd",
false,
0,
) {
Ok((..)) => {
assert!(false);

View file

@ -6,6 +6,8 @@ use std::path::Path;
use crate::url::{clean_url, data_url_to_data, file_url_to_fs_path, is_data_url, is_file_url};
const INDENT: &str = " ";
const MAGIC: [[&[u8]; 2]; 18] = [
// Image
[b"GIF87a", b"image/gif"],
@ -56,12 +58,23 @@ pub fn is_plaintext_media_type(media_type: &str) -> bool {
PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
}
pub fn indent(level: u32) -> String {
let mut result = str!();
let mut l: u32 = level;
while l > 0 {
result += INDENT;
l -= 1;
}
result
}
pub fn retrieve_asset(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &str,
url: &str,
opt_silent: bool,
depth: u32,
) -> Result<(Vec<u8>, String, String), reqwest::Error> {
if url.len() == 0 {
// Provoke error
@ -83,7 +96,7 @@ pub fn retrieve_asset(
let path = Path::new(&fs_file_path);
if path.exists() {
if !opt_silent {
eprintln!("{}", &url);
eprintln!("{}{}", indent(depth).as_str(), &url);
}
Ok((fs::read(&fs_file_path).expect(""), url.to_string(), str!()))
@ -97,7 +110,7 @@ pub fn retrieve_asset(
if cache.contains_key(&cache_key) {
// URL is in cache, we get and return it
if !opt_silent {
eprintln!("{} (from cache)", &url);
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
}
Ok((
@ -112,9 +125,9 @@ pub fn retrieve_asset(
if !opt_silent {
if url == res_url {
eprintln!("{}", &url);
eprintln!("{}{}", indent(depth).as_str(), &url);
} else {
eprintln!("{} -> {}", &url, &res_url);
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &res_url);
}
}