Merge pull request #193 from snshn/options-struct

Pass options object instead of using separate parameters
This commit is contained in:
Sunshine 2020-06-28 01:51:05 -04:00 committed by GitHub
commit 438ebd520a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 320 additions and 632 deletions

View file

@ -2,6 +2,7 @@ use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
use reqwest::blocking::Client; use reqwest::blocking::Client;
use std::collections::HashMap; use std::collections::HashMap;
use crate::opts::Options;
use crate::url::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_with_fragment}; use crate::url::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_with_fragment};
use crate::utils::retrieve_asset; use crate::utils::retrieve_asset;
@ -59,12 +60,10 @@ pub fn process_css<'a>(
client: &Client, client: &Client,
parent_url: &str, parent_url: &str,
parser: &mut Parser, parser: &mut Parser,
options: &Options,
rule_name: &str, rule_name: &str,
prop_name: &str, prop_name: &str,
func_name: &str, func_name: &str,
opt_no_fonts: bool,
opt_no_images: bool,
opt_silent: bool,
) -> Result<String, ParseError<'a, String>> { ) -> Result<String, ParseError<'a, String>> {
let mut result: String = str!(); let mut result: String = str!();
@ -91,7 +90,7 @@ pub fn process_css<'a>(
Token::Colon => result.push_str(":"), Token::Colon => result.push_str(":"),
Token::Comma => result.push_str(","), Token::Comma => result.push_str(","),
Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => { Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => {
if opt_no_fonts && curr_rule == "font-face" { if options.no_fonts && curr_rule == "font-face" {
continue; continue;
} }
@ -114,12 +113,10 @@ pub fn process_css<'a>(
client, client,
parent_url, parent_url,
parser, parser,
options,
rule_name, rule_name,
curr_prop.as_str(), curr_prop.as_str(),
func_name, func_name,
opt_no_fonts,
opt_no_images,
opt_silent,
) )
}) })
.unwrap(); .unwrap();
@ -149,7 +146,7 @@ pub fn process_css<'a>(
// @import, @font-face, @charset, @media... // @import, @font-face, @charset, @media...
Token::AtKeyword(ref value) => { Token::AtKeyword(ref value) => {
curr_rule = str!(value); curr_rule = str!(value);
if opt_no_fonts && curr_rule == "font-face" { if options.no_fonts && curr_rule == "font-face" {
continue; continue;
} }
result.push_str("@"); result.push_str("@");
@ -172,7 +169,13 @@ pub fn process_css<'a>(
let import_full_url = resolve_url(&parent_url, value).unwrap_or_default(); let import_full_url = resolve_url(&parent_url, value).unwrap_or_default();
let import_url_fragment = get_url_fragment(import_full_url.clone()); let import_url_fragment = get_url_fragment(import_full_url.clone());
match retrieve_asset(cache, client, &parent_url, &import_full_url, opt_silent) { match retrieve_asset(
cache,
client,
&parent_url,
&import_full_url,
options.silent,
) {
Ok((import_contents, import_final_url, _import_media_type)) => { Ok((import_contents, import_final_url, _import_media_type)) => {
let import_data_url = data_to_data_url( let import_data_url = data_to_data_url(
"text/css", "text/css",
@ -181,9 +184,7 @@ pub fn process_css<'a>(
client, client,
&import_final_url, &import_final_url,
&String::from_utf8_lossy(&import_contents), &String::from_utf8_lossy(&import_contents),
opt_no_fonts, options,
opt_no_images,
opt_silent,
) )
.as_bytes(), .as_bytes(),
&import_final_url, &import_final_url,
@ -212,7 +213,7 @@ pub fn process_css<'a>(
continue; continue;
} }
if opt_no_images && is_image_url_prop(curr_prop.as_str()) { if options.no_images && is_image_url_prop(curr_prop.as_str()) {
result.push_str(enquote(str!(empty_image!()), false).as_str()); result.push_str(enquote(str!(empty_image!()), false).as_str());
} else { } else {
let resolved_url = resolve_url(&parent_url, value).unwrap_or_default(); let resolved_url = resolve_url(&parent_url, value).unwrap_or_default();
@ -222,7 +223,7 @@ pub fn process_css<'a>(
client, client,
&parent_url, &parent_url,
&resolved_url, &resolved_url,
opt_silent, options.silent,
) { ) {
Ok((data, final_url, media_type)) => { Ok((data, final_url, media_type)) => {
let data_url = data_to_data_url(&media_type, &data, &final_url); let data_url = data_to_data_url(&media_type, &data, &final_url);
@ -265,7 +266,7 @@ pub fn process_css<'a>(
if *has_sign && *unit_value >= 0. { if *has_sign && *unit_value >= 0. {
result.push_str("+"); result.push_str("+");
} }
result.push_str(str!(unit_value * 100.).as_str()); result.push_str(str!(unit_value * 100.0).as_str());
result.push_str("%"); result.push_str("%");
} }
Token::Dimension { Token::Dimension {
@ -309,7 +310,7 @@ pub fn process_css<'a>(
if is_import { if is_import {
let full_url = resolve_url(&parent_url, value).unwrap_or_default(); let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone()); let url_fragment = get_url_fragment(full_url.clone());
match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) { match retrieve_asset(cache, client, &parent_url, &full_url, options.silent) {
Ok((css, final_url, _media_type)) => { Ok((css, final_url, _media_type)) => {
let data_url = data_to_data_url( let data_url = data_to_data_url(
"text/css", "text/css",
@ -318,9 +319,7 @@ pub fn process_css<'a>(
client, client,
&final_url, &final_url,
&String::from_utf8_lossy(&css), &String::from_utf8_lossy(&css),
opt_no_fonts, options,
opt_no_images,
opt_silent,
) )
.as_bytes(), .as_bytes(),
&final_url, &final_url,
@ -339,12 +338,13 @@ pub fn process_css<'a>(
} }
} }
} else { } else {
if opt_no_images && is_image_url_prop(curr_prop.as_str()) { if options.no_images && is_image_url_prop(curr_prop.as_str()) {
result.push_str(enquote(str!(empty_image!()), false).as_str()); result.push_str(enquote(str!(empty_image!()), false).as_str());
} else { } else {
let full_url = resolve_url(&parent_url, value).unwrap_or_default(); let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone()); let url_fragment = get_url_fragment(full_url.clone());
match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) { match retrieve_asset(cache, client, &parent_url, &full_url, options.silent)
{
Ok((data, final_url, media_type)) => { Ok((data, final_url, media_type)) => {
let data_url = data_to_data_url(&media_type, &data, &final_url); let data_url = data_to_data_url(&media_type, &data, &final_url);
let assembled_url: String = let assembled_url: String =
@ -377,12 +377,10 @@ pub fn process_css<'a>(
client, client,
parent_url, parent_url,
parser, parser,
options,
curr_rule.as_str(), curr_rule.as_str(),
curr_prop.as_str(), curr_prop.as_str(),
function_name, function_name,
opt_no_fonts,
opt_no_images,
opt_silent,
) )
}) })
.unwrap(); .unwrap();
@ -407,24 +405,10 @@ pub fn embed_css(
client: &Client, client: &Client,
parent_url: &str, parent_url: &str,
css: &str, css: &str,
opt_no_fonts: bool, options: &Options,
opt_no_images: bool,
opt_silent: bool,
) -> String { ) -> String {
let mut input = ParserInput::new(&css); let mut input = ParserInput::new(&css);
let mut parser = Parser::new(&mut input); let mut parser = Parser::new(&mut input);
process_css( process_css(cache, client, parent_url, &mut parser, options, "", "", "").unwrap()
cache,
client,
parent_url,
&mut parser,
"",
"",
"",
opt_no_fonts,
opt_no_images,
opt_silent,
)
.unwrap()
} }

View file

@ -15,6 +15,7 @@ use std::default::Default;
use crate::css::embed_css; use crate::css::embed_css;
use crate::js::attr_is_event_handler; use crate::js::attr_is_event_handler;
use crate::opts::Options;
use crate::url::{ use crate::url::{
data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_has_protocol, data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_has_protocol,
url_with_fragment, url_with_fragment,
@ -73,8 +74,7 @@ pub fn embed_srcset(
client: &Client, client: &Client,
parent_url: &str, parent_url: &str,
srcset: &str, srcset: &str,
opt_no_images: bool, options: &Options,
opt_silent: bool,
) -> String { ) -> String {
let mut array: Vec<SrcSetItem> = vec![]; let mut array: Vec<SrcSetItem> = vec![];
let srcset_items: Vec<&str> = srcset.split(',').collect(); let srcset_items: Vec<&str> = srcset.split(',').collect();
@ -89,12 +89,12 @@ pub fn embed_srcset(
let mut result: String = str!(); let mut result: String = str!();
let mut i: usize = array.len(); let mut i: usize = array.len();
for part in array { for part in array {
if opt_no_images { if options.no_images {
result.push_str(empty_image!()); result.push_str(empty_image!());
} else { } else {
let image_full_url = resolve_url(&parent_url, part.path).unwrap_or_default(); let image_full_url = resolve_url(&parent_url, part.path).unwrap_or_default();
let image_url_fragment = get_url_fragment(image_full_url.clone()); let image_url_fragment = get_url_fragment(image_full_url.clone());
match retrieve_asset(cache, client, &parent_url, &image_full_url, opt_silent) { match retrieve_asset(cache, client, &parent_url, &image_full_url, options.silent) {
Ok((image_data, image_final_url, image_media_type)) => { Ok((image_data, image_final_url, image_media_type)) => {
let image_data_url = let image_data_url =
data_to_data_url(&image_media_type, &image_data, &image_final_url); data_to_data_url(&image_media_type, &image_data, &image_final_url);
@ -137,29 +137,13 @@ pub fn walk_and_embed_assets(
client: &Client, client: &Client,
url: &str, url: &str,
node: &Handle, node: &Handle,
opt_no_css: bool, options: &Options,
opt_no_fonts: bool,
opt_no_frames: bool,
opt_no_js: bool,
opt_no_images: bool,
opt_silent: bool,
) { ) {
match node.data { match node.data {
NodeData::Document => { NodeData::Document => {
// Dig deeper // Dig deeper
for child in node.children.borrow().iter() { for child in node.children.borrow().iter() {
walk_and_embed_assets( walk_and_embed_assets(cache, client, &url, child, options);
cache,
client,
&url,
child,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
} }
} }
NodeData::Element { NodeData::Element {
@ -245,7 +229,7 @@ pub fn walk_and_embed_assets(
} }
} }
if !opt_no_images && !link_href.is_empty() { if !options.no_images && !link_href.is_empty() {
let link_href_full_url = let link_href_full_url =
resolve_url(&url, link_href).unwrap_or_default(); resolve_url(&url, link_href).unwrap_or_default();
let link_href_url_fragment = let link_href_url_fragment =
@ -255,7 +239,7 @@ pub fn walk_and_embed_assets(
client, client,
&url, &url,
&link_href_full_url, &link_href_full_url,
opt_silent, options.silent,
) { ) {
Ok(( Ok((
link_href_data, link_href_data,
@ -319,7 +303,7 @@ pub fn walk_and_embed_assets(
} }
} }
if !opt_no_css && !link_href.is_empty() { if !options.no_css && !link_href.is_empty() {
let link_href_full_url = let link_href_full_url =
resolve_url(&url, link_href).unwrap_or_default(); resolve_url(&url, link_href).unwrap_or_default();
match retrieve_asset( match retrieve_asset(
@ -327,7 +311,7 @@ pub fn walk_and_embed_assets(
client, client,
&url, &url,
&link_href_full_url, &link_href_full_url,
opt_silent, options.silent,
) { ) {
Ok(( Ok((
link_href_data, link_href_data,
@ -343,9 +327,7 @@ pub fn walk_and_embed_assets(
client, client,
&link_href_final_url, &link_href_final_url,
&String::from_utf8_lossy(&link_href_data), &String::from_utf8_lossy(&link_href_data),
opt_no_fonts, options,
opt_no_images,
opt_silent,
); );
let link_href_data_url = data_to_data_url( let link_href_data_url = data_to_data_url(
"text/css", "text/css",
@ -415,11 +397,16 @@ pub fn walk_and_embed_assets(
} }
} }
if !opt_no_images && !background.is_empty() { if !options.no_images && !background.is_empty() {
let background_full_url = resolve_url(&url, background).unwrap_or_default(); let background_full_url = resolve_url(&url, background).unwrap_or_default();
let background_url_fragment = get_url_fragment(background_full_url.clone()); let background_url_fragment = get_url_fragment(background_full_url.clone());
match retrieve_asset(cache, client, &url, &background_full_url, opt_silent) match retrieve_asset(
{ cache,
client,
&url,
&background_full_url,
options.silent,
) {
Ok((background_data, background_final_url, background_media_type)) => { Ok((background_data, background_final_url, background_media_type)) => {
let background_data_url = data_to_data_url( let background_data_url = data_to_data_url(
&background_media_type, &background_media_type,
@ -471,7 +458,7 @@ pub fn walk_and_embed_assets(
} }
} }
if opt_no_images { if options.no_images {
// Add empty image src attribute // Add empty image src attribute
attrs_mut.push(Attribute { attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")), name: QualName::new(None, ns!(), local_name!("src")),
@ -496,7 +483,8 @@ pub fn walk_and_embed_assets(
) )
.unwrap_or_default(); .unwrap_or_default();
let img_url_fragment = get_url_fragment(img_full_url.clone()); let img_url_fragment = get_url_fragment(img_full_url.clone());
match retrieve_asset(cache, client, &url, &img_full_url, opt_silent) { match retrieve_asset(cache, client, &url, &img_full_url, options.silent)
{
Ok((img_data, img_final_url, img_media_type)) => { Ok((img_data, img_final_url, img_media_type)) => {
let img_data_url = data_to_data_url( let img_data_url = data_to_data_url(
&img_media_type, &img_media_type,
@ -533,21 +521,13 @@ pub fn walk_and_embed_assets(
attrs_mut.push(Attribute { attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("srcset")), name: QualName::new(None, ns!(), local_name!("srcset")),
value: Tendril::from_slice( value: Tendril::from_slice(
embed_srcset( embed_srcset(cache, client, &url, &img_srcset, options).as_ref(),
cache,
client,
&url,
&img_srcset,
opt_no_images,
opt_silent,
)
.as_ref(),
), ),
}); });
} }
} }
"svg" => { "svg" => {
if opt_no_images { if options.no_images {
node.children.borrow_mut().clear(); node.children.borrow_mut().clear();
} }
} }
@ -573,7 +553,7 @@ pub fn walk_and_embed_assets(
} }
} }
if opt_no_images || input_image_src.is_empty() { if options.no_images || input_image_src.is_empty() {
attrs_mut.push(Attribute { attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")), name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(if input_image_src.is_empty() { value: Tendril::from_slice(if input_image_src.is_empty() {
@ -592,7 +572,7 @@ pub fn walk_and_embed_assets(
client, client,
&url, &url,
&input_image_full_url, &input_image_full_url,
opt_silent, options.silent,
) { ) {
Ok(( Ok((
input_image_data, input_image_data,
@ -646,10 +626,10 @@ pub fn walk_and_embed_assets(
} }
} }
if !opt_no_images && !image_href.is_empty() { if !options.no_images && !image_href.is_empty() {
let image_full_url = resolve_url(&url, image_href).unwrap_or_default(); let image_full_url = resolve_url(&url, image_href).unwrap_or_default();
let image_url_fragment = get_url_fragment(image_full_url.clone()); let image_url_fragment = get_url_fragment(image_full_url.clone());
match retrieve_asset(cache, client, &url, &image_full_url, opt_silent) { match retrieve_asset(cache, client, &url, &image_full_url, options.silent) {
Ok((image_data, image_final_url, image_media_type)) => { Ok((image_data, image_final_url, image_media_type)) => {
let image_data_url = data_to_data_url( let image_data_url = data_to_data_url(
&image_media_type, &image_media_type,
@ -693,7 +673,7 @@ pub fn walk_and_embed_assets(
attr.value.push_slice(src_full_url.as_str()); attr.value.push_slice(src_full_url.as_str());
} else if attr_name.eq_ignore_ascii_case("srcset") { } else if attr_name.eq_ignore_ascii_case("srcset") {
if get_node_name(&get_parent_node(&node)) == Some("picture") { if get_node_name(&get_parent_node(&node)) == Some("picture") {
if opt_no_images { if options.no_images {
attr.value.clear(); attr.value.clear();
attr.value.push_slice(empty_image!()); attr.value.push_slice(empty_image!());
} else { } else {
@ -706,7 +686,7 @@ pub fn walk_and_embed_assets(
client, client,
&url, &url,
&srcset_full_url, &srcset_full_url,
opt_silent, options.silent,
) { ) {
Ok((srcset_data, srcset_final_url, srcset_media_type)) => { Ok((srcset_data, srcset_final_url, srcset_media_type)) => {
let srcset_data_url = data_to_data_url( let srcset_data_url = data_to_data_url(
@ -744,7 +724,7 @@ pub fn walk_and_embed_assets(
if attr_name.eq_ignore_ascii_case("href") { if attr_name.eq_ignore_ascii_case("href") {
let attr_value = attr.value.trim(); let attr_value = attr.value.trim();
if opt_no_js && attr_value.starts_with("javascript:") { if options.no_js && attr_value.trim().starts_with("javascript:") {
attr.value.clear(); attr.value.clear();
// Replace with empty JS call to preserve original behavior // Replace with empty JS call to preserve original behavior
attr.value.push_slice("javascript:;"); attr.value.push_slice("javascript:;");
@ -778,12 +758,13 @@ pub fn walk_and_embed_assets(
} }
} }
if opt_no_js { if options.no_js {
// Empty inner content (src is already gone) // Empty inner content (src is already gone)
node.children.borrow_mut().clear(); node.children.borrow_mut().clear();
} else if !script_src.is_empty() { } else if !script_src.is_empty() {
let script_full_url = resolve_url(&url, script_src).unwrap_or_default(); let script_full_url = resolve_url(&url, script_src).unwrap_or_default();
match retrieve_asset(cache, client, &url, &script_full_url, opt_silent) { match retrieve_asset(cache, client, &url, &script_full_url, options.silent)
{
Ok((script_data, script_final_url, _script_media_type)) => { Ok((script_data, script_final_url, _script_media_type)) => {
// Only embed if we're able to validate integrity // Only embed if we're able to validate integrity
if script_integrity.is_empty() if script_integrity.is_empty()
@ -814,22 +795,15 @@ pub fn walk_and_embed_assets(
} }
} }
"style" => { "style" => {
if opt_no_css { if options.no_css {
// Empty inner content of STYLE tags // Empty inner content of STYLE tags
node.children.borrow_mut().clear(); node.children.borrow_mut().clear();
} else { } else {
for node in node.children.borrow_mut().iter_mut() { for node in node.children.borrow_mut().iter_mut() {
if let NodeData::Text { ref contents } = node.data { if let NodeData::Text { ref contents } = node.data {
let mut tendril = contents.borrow_mut(); let mut tendril = contents.borrow_mut();
let replacement = embed_css( let replacement =
cache, embed_css(cache, client, &url, tendril.as_ref(), options);
client,
&url,
tendril.as_ref(),
opt_no_fonts,
opt_no_images,
opt_silent,
);
tendril.clear(); tendril.clear();
tendril.push_slice(&replacement); tendril.push_slice(&replacement);
} }
@ -855,7 +829,7 @@ pub fn walk_and_embed_assets(
for attr in attrs_mut.iter_mut() { for attr in attrs_mut.iter_mut() {
let attr_name: &str = &attr.name.local; let attr_name: &str = &attr.name.local;
if attr_name.eq_ignore_ascii_case("src") { if attr_name.eq_ignore_ascii_case("src") {
if opt_no_frames { if options.no_frames {
// Empty the src attribute // Empty the src attribute
attr.value.clear(); attr.value.clear();
continue; continue;
@ -870,7 +844,13 @@ pub fn walk_and_embed_assets(
let frame_full_url = resolve_url(&url, frame_src).unwrap_or_default(); let frame_full_url = resolve_url(&url, frame_src).unwrap_or_default();
let frame_url_fragment = get_url_fragment(frame_full_url.clone()); let frame_url_fragment = get_url_fragment(frame_full_url.clone());
match retrieve_asset(cache, client, &url, &frame_full_url, opt_silent) { match retrieve_asset(
cache,
client,
&url,
&frame_full_url,
options.silent,
) {
Ok((frame_data, frame_final_url, frame_media_type)) => { Ok((frame_data, frame_final_url, frame_media_type)) => {
let frame_dom = let frame_dom =
html_to_dom(&String::from_utf8_lossy(&frame_data)); html_to_dom(&String::from_utf8_lossy(&frame_data));
@ -879,12 +859,7 @@ pub fn walk_and_embed_assets(
client, client,
&frame_final_url, &frame_final_url,
&frame_dom.document, &frame_dom.document,
opt_no_css, &options,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
); );
let mut frame_data: Vec<u8> = Vec::new(); let mut frame_data: Vec<u8> = Vec::new();
serialize( serialize(
@ -931,7 +906,7 @@ pub fn walk_and_embed_assets(
continue; continue;
} }
if opt_no_images { if options.no_images {
attr.value.clear(); attr.value.clear();
continue; continue;
} }
@ -945,7 +920,7 @@ pub fn walk_and_embed_assets(
client, client,
&url, &url,
&video_poster_full_url, &video_poster_full_url,
opt_silent, options.silent,
) { ) {
Ok(( Ok((
video_poster_data, video_poster_data,
@ -983,7 +958,7 @@ pub fn walk_and_embed_assets(
} }
// Process style attributes // Process style attributes
if opt_no_css { if options.no_css {
// Get rid of style attributes // Get rid of style attributes
let mut i = 0; let mut i = 0;
while i < attrs_mut.len() { while i < attrs_mut.len() {
@ -1000,22 +975,15 @@ pub fn walk_and_embed_assets(
.iter_mut() .iter_mut()
.filter(|a| a.name.local.as_ref().eq_ignore_ascii_case("style")) .filter(|a| a.name.local.as_ref().eq_ignore_ascii_case("style"))
{ {
let replacement = embed_css( let replacement =
cache, embed_css(cache, client, &url, attribute.value.as_ref(), options);
client,
&url,
attribute.value.as_ref(),
opt_no_fonts,
opt_no_images,
opt_silent,
);
// let replacement = str!(); // let replacement = str!();
attribute.value.clear(); attribute.value.clear();
attribute.value.push_slice(&replacement); attribute.value.push_slice(&replacement);
} }
} }
if opt_no_js { if options.no_js {
// Get rid of JS event attributes // Get rid of JS event attributes
let mut js_attr_indexes = Vec::new(); let mut js_attr_indexes = Vec::new();
for (i, attr) in attrs_mut.iter_mut().enumerate() { for (i, attr) in attrs_mut.iter_mut().enumerate() {
@ -1031,22 +999,11 @@ pub fn walk_and_embed_assets(
// Dig deeper // Dig deeper
for child in node.children.borrow().iter() { for child in node.children.borrow().iter() {
walk_and_embed_assets( walk_and_embed_assets(cache, client, &url, child, options);
cache,
client,
&url,
child,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
} }
} }
_ => { _ => {
// Note: in case of opt_no_js being set to true, there's no need to worry about // Note: in case of options.no_js being set to true, there's no need to worry about
// getting rid of comments that may contain scripts, e.g. <!--[if IE]><script>... // getting rid of comments that may contain scripts, e.g. <!--[if IE]><script>...
// since that's not part of W3C standard and therefore gets ignored // since that's not part of W3C standard and therefore gets ignored
// by browsers other than IE [5, 9] // by browsers other than IE [5, 9]
@ -1073,15 +1030,7 @@ fn get_child_node_by_name(handle: &Handle, node_name: &str) -> Handle {
} }
} }
pub fn stringify_document( pub fn stringify_document(handle: &Handle, options: &Options) -> String {
handle: &Handle,
opt_no_css: bool,
opt_no_fonts: bool,
opt_no_frames: bool,
opt_no_js: bool,
opt_no_images: bool,
opt_isolate: bool,
) -> String {
let mut buf: Vec<u8> = Vec::new(); let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, handle, SerializeOpts::default()) serialize(&mut buf, handle, SerializeOpts::default())
.expect("unable to serialize DOM into buffer"); .expect("unable to serialize DOM into buffer");
@ -1089,20 +1038,19 @@ pub fn stringify_document(
let mut result = String::from_utf8(buf).unwrap(); let mut result = String::from_utf8(buf).unwrap();
// Take care of CSP // Take care of CSP
if opt_isolate || opt_no_css || opt_no_fonts || opt_no_frames || opt_no_js || opt_no_images { if options.isolate
|| options.no_css
|| options.no_fonts
|| options.no_frames
|| options.no_js
|| options.no_images
{
let mut buf: Vec<u8> = Vec::new(); let mut buf: Vec<u8> = Vec::new();
let mut dom = html_to_dom(&result); let mut dom = html_to_dom(&result);
let doc = dom.get_document(); let doc = dom.get_document();
let html = get_child_node_by_name(&doc, "html"); let html = get_child_node_by_name(&doc, "html");
let head = get_child_node_by_name(&html, "head"); let head = get_child_node_by_name(&html, "head");
let csp_content: String = csp( let csp_content: String = csp(options);
opt_isolate,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
);
let meta = dom.create_element( let meta = dom.create_element(
QualName::new(None, ns!(), local_name!("meta")), QualName::new(None, ns!(), local_name!("meta")),
@ -1136,38 +1084,31 @@ pub fn stringify_document(
result result
} }
pub fn csp( pub fn csp(options: &Options) -> String {
opt_isolate: bool,
opt_no_css: bool,
opt_no_fonts: bool,
opt_no_frames: bool,
opt_no_js: bool,
opt_no_images: bool,
) -> String {
let mut string_list = vec![]; let mut string_list = vec![];
if opt_isolate { if options.isolate {
string_list.push("default-src 'unsafe-inline' data:;"); string_list.push("default-src 'unsafe-inline' data:;");
} }
if opt_no_css { if options.no_css {
string_list.push("style-src 'none';"); string_list.push("style-src 'none';");
} }
if opt_no_fonts { if options.no_fonts {
string_list.push("font-src 'none';"); string_list.push("font-src 'none';");
} }
if opt_no_frames { if options.no_frames {
string_list.push("frame-src 'none';"); string_list.push("frame-src 'none';");
string_list.push("child-src 'none';"); string_list.push("child-src 'none';");
} }
if opt_no_js { if options.no_js {
string_list.push("script-src 'none';"); string_list.push("script-src 'none';");
} }
if opt_no_images { if options.no_images {
// Note: data: is needed for transparent pixels // Note: data: is needed for transparent pixels
string_list.push("img-src data:;"); string_list.push("img-src data:;");
} }

View file

@ -1,9 +1,13 @@
#[macro_use]
extern crate clap;
#[macro_use] #[macro_use]
mod macros; mod macros;
pub mod css; pub mod css;
pub mod html; pub mod html;
pub mod js; pub mod js;
pub mod opts;
pub mod url; pub mod url;
pub mod utils; pub mod utils;

View file

@ -9,16 +9,12 @@ use std::process;
use std::time::Duration; use std::time::Duration;
use monolith::html::{html_to_dom, metadata_tag, stringify_document, walk_and_embed_assets}; use monolith::html::{html_to_dom, metadata_tag, stringify_document, walk_and_embed_assets};
use monolith::opts::Options;
use monolith::url::{data_url_to_data, is_data_url, is_file_url, is_http_url}; use monolith::url::{data_url_to_data, is_data_url, is_file_url, is_http_url};
use monolith::utils::retrieve_asset; use monolith::utils::retrieve_asset;
mod args;
mod macros; mod macros;
#[macro_use]
extern crate clap;
use crate::args::AppArgs;
enum Output { enum Output {
Stdout(io::Stdout), Stdout(io::Stdout),
File(fs::File), File(fs::File),
@ -48,8 +44,8 @@ impl Output {
} }
fn main() { fn main() {
let app_args = AppArgs::get(); let options = Options::from_args();
let original_target: &str = &app_args.target; let original_target: &str = &options.target;
let target_url: &str; let target_url: &str;
let base_url; let base_url;
let dom; let dom;
@ -89,30 +85,30 @@ fn main() {
} }
// Define output // Define output
let mut output = Output::new(&app_args.output).expect("Could not prepare output"); let mut output = Output::new(&options.output).expect("Could not prepare output");
// Initialize client // Initialize client
let mut cache = HashMap::new(); let mut cache = HashMap::new();
let mut header_map = HeaderMap::new(); let mut header_map = HeaderMap::new();
header_map.insert( header_map.insert(
USER_AGENT, USER_AGENT,
HeaderValue::from_str(&app_args.user_agent).expect("Invalid User-Agent header specified"), HeaderValue::from_str(&options.user_agent).expect("Invalid User-Agent header specified"),
); );
let timeout: u64 = if app_args.timeout > 0 { let timeout: u64 = if options.timeout > 0 {
app_args.timeout options.timeout
} else { } else {
std::u64::MAX / 4 std::u64::MAX / 4
}; };
let client = Client::builder() let client = Client::builder()
.timeout(Duration::from_secs(timeout)) .timeout(Duration::from_secs(timeout))
.danger_accept_invalid_certs(app_args.insecure) .danger_accept_invalid_certs(options.insecure)
.default_headers(header_map) .default_headers(header_map)
.build() .build()
.expect("Failed to initialize HTTP client"); .expect("Failed to initialize HTTP client");
// Retrieve target document // Retrieve target document
if is_file_url(target_url) || is_http_url(target_url) { if is_file_url(target_url) || is_http_url(target_url) {
match retrieve_asset(&mut cache, &client, target_url, target_url, app_args.silent) { match retrieve_asset(&mut cache, &client, target_url, target_url, options.silent) {
Ok((data, final_url, _media_type)) => { Ok((data, final_url, _media_type)) => {
base_url = final_url; base_url = final_url;
dom = html_to_dom(&String::from_utf8_lossy(&data)); dom = html_to_dom(&String::from_utf8_lossy(&data));
@ -135,32 +131,13 @@ fn main() {
} }
// Embed remote assets // Embed remote assets
walk_and_embed_assets( walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options);
&mut cache,
&client,
&base_url,
&dom.document,
app_args.no_css,
app_args.no_fonts,
app_args.no_frames,
app_args.no_js,
app_args.no_images,
app_args.silent,
);
// Serialize DOM tree // Serialize DOM tree
let mut result: String = stringify_document( let mut result: String = stringify_document(&dom.document, &options);
&dom.document,
app_args.no_css,
app_args.no_fonts,
app_args.no_frames,
app_args.no_js,
app_args.no_images,
app_args.isolate,
);
// Add metadata tag // Add metadata tag
if !app_args.no_metadata { if !options.no_metadata {
let metadata_comment = metadata_tag(&base_url); let metadata_comment = metadata_tag(&base_url);
result.insert_str(0, &metadata_comment); result.insert_str(0, &metadata_comment);
if metadata_comment.len() > 0 { if metadata_comment.len() > 0 {

View file

@ -1,7 +1,7 @@
use clap::{App, Arg}; use clap::{App, Arg};
#[derive(Default)] #[derive(Default)]
pub struct AppArgs { pub struct Options {
pub target: String, pub target: String,
pub no_css: bool, pub no_css: bool,
pub no_fonts: bool, pub no_fonts: bool,
@ -21,8 +21,8 @@ const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
const DEFAULT_USER_AGENT: &str = const DEFAULT_USER_AGENT: &str =
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0"; "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
impl AppArgs { impl Options {
pub fn get() -> AppArgs { pub fn from_args() -> Options {
let app = App::new(env!("CARGO_PKG_NAME")) let app = App::new(env!("CARGO_PKG_NAME"))
.version(crate_version!()) .version(crate_version!())
.author(crate_authors!("\n")) .author(crate_authors!("\n"))
@ -34,7 +34,7 @@ impl AppArgs {
.index(1) .index(1)
.help("URL or file path"), .help("URL or file path"),
) )
// .args_from_usage("-a, --include-audio 'Removes audio sources'") // .args_from_usage("-a, --no-audio 'Removes audio sources'")
.args_from_usage("-c, --no-css 'Removes CSS'") .args_from_usage("-c, --no-css 'Removes CSS'")
.args_from_usage("-f, --no-frames 'Removes frames and iframes'") .args_from_usage("-f, --no-frames 'Removes frames and iframes'")
.args_from_usage("-F, --no-fonts 'Removes fonts'") .args_from_usage("-F, --no-fonts 'Removes fonts'")
@ -47,33 +47,35 @@ impl AppArgs {
.args_from_usage("-s, --silent 'Suppresses verbosity'") .args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'") .args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'") .args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
// .args_from_usage("-v, --include-video 'Removes video sources'") // .args_from_usage("-v, --no-video 'Removes video sources'")
.get_matches(); .get_matches();
let mut app_args = AppArgs::default(); let mut options: Options = Options::default();
// Process the command // Process the command
app_args.target = app options.target = app
.value_of("target") .value_of("target")
.expect("please set target") .expect("please set target")
.to_string(); .to_string();
app_args.no_css = app.is_present("no-css"); options.no_css = app.is_present("no-css");
app_args.no_fonts = app.is_present("no-fonts"); options.no_frames = app.is_present("no-frames");
app_args.no_frames = app.is_present("no-frames"); options.no_fonts = app.is_present("no-fonts");
app_args.no_images = app.is_present("no-images"); options.no_images = app.is_present("no-images");
app_args.no_js = app.is_present("no-js"); options.isolate = app.is_present("isolate");
app_args.insecure = app.is_present("insecure"); options.no_js = app.is_present("no-js");
app_args.no_metadata = app.is_present("no-metadata"); options.insecure = app.is_present("insecure");
app_args.isolate = app.is_present("isolate"); options.no_metadata = app.is_present("no-metadata");
app_args.silent = app.is_present("silent"); options.output = app.value_of("output").unwrap_or("").to_string();
app_args.timeout = app options.silent = app.is_present("silent");
options.timeout = app
.value_of("timeout") .value_of("timeout")
.unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string()) .unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string())
.parse::<u64>() .parse::<u64>()
.unwrap(); .unwrap();
app_args.output = app.value_of("output").unwrap_or("").to_string(); options.user_agent = app
app_args.user_agent = app
.value_of("user-agent") .value_of("user-agent")
.unwrap_or(DEFAULT_USER_AGENT) .unwrap_or(DEFAULT_USER_AGENT)
.to_string(); .to_string();
app_args
options
} }
} }

View file

@ -7,25 +7,26 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::css;
use reqwest::blocking::Client; use reqwest::blocking::Client;
use std::collections::HashMap; use std::collections::HashMap;
use crate::css;
use crate::opts::Options;
#[test] #[test]
fn empty_input() { fn empty_input() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let options = Options::default();
assert_eq!( assert_eq!(css::embed_css(cache, &client, "", "", &options), "");
css::embed_css(cache, &client, "", "", false, false, false,),
""
);
} }
#[test] #[test]
fn trim_if_empty() { fn trim_if_empty() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let options = Options::default();
assert_eq!( assert_eq!(
css::embed_css( css::embed_css(
@ -33,9 +34,7 @@ mod passing {
&client, &client,
"https://doesntmatter.local/", "https://doesntmatter.local/",
"\t \t ", "\t \t ",
false, &options,
false,
false,
), ),
"" ""
); );
@ -45,6 +44,9 @@ mod passing {
fn style_exclude_unquoted_images() { fn style_exclude_unquoted_images() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
const STYLE: &str = "/* border: none;*/\ const STYLE: &str = "/* border: none;*/\
background-image: url(https://somewhere.com/bg.png); \ background-image: url(https://somewhere.com/bg.png); \
@ -60,9 +62,7 @@ mod passing {
&client, &client,
"https://doesntmatter.local/", "https://doesntmatter.local/",
&STYLE, &STYLE,
false, &options,
true,
true,
), ),
format!( format!(
"/* border: none;*/\ "/* border: none;*/\
@ -81,6 +81,9 @@ mod passing {
fn style_exclude_single_quoted_images() { fn style_exclude_single_quoted_images() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
const STYLE: &str = "/* border: none;*/\ const STYLE: &str = "/* border: none;*/\
background-image: url('https://somewhere.com/bg.png'); \ background-image: url('https://somewhere.com/bg.png'); \
@ -91,7 +94,7 @@ mod passing {
height: calc(100vh - 10pt)"; height: calc(100vh - 10pt)";
assert_eq!( assert_eq!(
css::embed_css(cache, &client, "", &STYLE, false, true, true,), css::embed_css(cache, &client, "", &STYLE, &options),
format!( format!(
"/* border: none;*/\ "/* border: none;*/\
background-image: url('{empty_image}'); \ background-image: url('{empty_image}'); \
@ -109,6 +112,8 @@ mod passing {
fn style_block() { fn style_block() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\ const CSS: &str = "\
#id.class-name:not(:nth-child(3n+0)) {\n \ #id.class-name:not(:nth-child(3n+0)) {\n \
@ -119,7 +124,7 @@ mod passing {
html > body {}"; html > body {}";
assert_eq!( assert_eq!(
css::embed_css(cache, &client, "file:///", &CSS, false, false, true,), css::embed_css(cache, &client, "file:///", &CSS, &options),
CSS CSS
); );
} }
@ -128,6 +133,8 @@ mod passing {
fn attribute_selectors() { fn attribute_selectors() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\ const CSS: &str = "\
[data-value] { [data-value] {
@ -159,16 +166,15 @@ mod passing {
} }
"; ";
assert_eq!( assert_eq!(css::embed_css(cache, &client, "", &CSS, &options), CSS);
css::embed_css(cache, &client, "", &CSS, false, false, false,),
CSS
);
} }
#[test] #[test]
fn import_string() { fn import_string() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\ const CSS: &str = "\
@charset 'UTF-8';\n\ @charset 'UTF-8';\n\
@ -184,9 +190,7 @@ mod passing {
&client, &client,
"https://doesntmatter.local/", "https://doesntmatter.local/",
&CSS, &CSS,
false, &options,
false,
true,
), ),
"\ "\
@charset 'UTF-8';\n\ @charset 'UTF-8';\n\
@ -202,6 +206,8 @@ mod passing {
fn hash_urls() { fn hash_urls() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\ const CSS: &str = "\
body {\n \ body {\n \
@ -219,9 +225,7 @@ mod passing {
&client, &client,
"https://doesntmatter.local/", "https://doesntmatter.local/",
&CSS, &CSS,
false, &options,
false,
true,
), ),
CSS CSS
); );
@ -231,6 +235,8 @@ mod passing {
fn transform_percentages_and_degrees() { fn transform_percentages_and_degrees() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\ const CSS: &str = "\
div {\n \ div {\n \
@ -246,9 +252,7 @@ mod passing {
&client, &client,
"https://doesntmatter.local/", "https://doesntmatter.local/",
&CSS, &CSS,
false, &options,
false,
true,
), ),
CSS CSS
); );
@ -258,6 +262,8 @@ mod passing {
fn unusual_indents() { fn unusual_indents() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\ const CSS: &str = "\
.is\\:good:hover {\n \ .is\\:good:hover {\n \
@ -275,9 +281,7 @@ mod passing {
&client, &client,
"https://doesntmatter.local/", "https://doesntmatter.local/",
&CSS, &CSS,
false, &options,
false,
true,
), ),
CSS CSS
); );
@ -287,6 +291,9 @@ mod passing {
fn exclude_fonts() { fn exclude_fonts() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let mut options = Options::default();
options.no_fonts = true;
options.silent = true;
const CSS: &str = "\ const CSS: &str = "\
@font-face {\n \ @font-face {\n \
@ -328,9 +335,7 @@ mod passing {
&client, &client,
"https://doesntmatter.local/", "https://doesntmatter.local/",
&CSS, &CSS,
true, &options,
false,
true,
), ),
CSS_OUT CSS_OUT
); );

View file

@ -8,143 +8,72 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::html; use crate::html;
use crate::opts::Options;
#[test] #[test]
fn isolated() { fn isolated() {
let opt_isolate: bool = true; let mut options = Options::default();
let opt_no_css: bool = false; options.isolate = true;
let opt_no_fonts: bool = false; let csp_content = html::csp(&options);
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let csp_content = html::csp(
opt_isolate,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
);
assert_eq!(csp_content, "default-src 'unsafe-inline' data:;"); assert_eq!(csp_content, "default-src 'unsafe-inline' data:;");
} }
#[test] #[test]
fn no_css() { fn no_css() {
let opt_isolate: bool = false; let mut options = Options::default();
let opt_no_css: bool = true; options.no_css = true;
let opt_no_fonts: bool = false; let csp_content = html::csp(&options);
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let csp_content = html::csp(
opt_isolate,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
);
assert_eq!(csp_content, "style-src 'none';"); assert_eq!(csp_content, "style-src 'none';");
} }
#[test] #[test]
fn no_fonts() { fn no_fonts() {
let opt_isolate: bool = false; let mut options = Options::default();
let opt_no_css: bool = false; options.no_fonts = true;
let opt_no_fonts: bool = true; let csp_content = html::csp(&options);
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let csp_content = html::csp(
opt_isolate,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
);
assert_eq!(csp_content, "font-src 'none';"); assert_eq!(csp_content, "font-src 'none';");
} }
#[test] #[test]
fn no_frames() { fn no_frames() {
let opt_isolate: bool = false; let mut options = Options::default();
let opt_no_css: bool = false; options.no_frames = true;
let opt_no_fonts: bool = false; let csp_content = html::csp(&options);
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let csp_content = html::csp(
opt_isolate,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
);
assert_eq!(csp_content, "frame-src 'none'; child-src 'none';"); assert_eq!(csp_content, "frame-src 'none'; child-src 'none';");
} }
#[test] #[test]
fn no_js() { fn no_js() {
let opt_isolate: bool = false; let mut options = Options::default();
let opt_no_css: bool = false; options.no_js = true;
let opt_no_fonts: bool = false; let csp_content = html::csp(&options);
let opt_no_frames: bool = false;
let opt_no_js: bool = true;
let opt_no_images: bool = false;
let csp_content = html::csp(
opt_isolate,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
);
assert_eq!(csp_content, "script-src 'none';"); assert_eq!(csp_content, "script-src 'none';");
} }
#[test] #[test]
fn no_image() { fn no_images() {
let opt_isolate: bool = false; let mut options = Options::default();
let opt_no_css: bool = false; options.no_images = true;
let opt_no_fonts: bool = false; let csp_content = html::csp(&options);
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = true;
let csp_content = html::csp(
opt_isolate,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
);
assert_eq!(csp_content, "img-src data:;"); assert_eq!(csp_content, "img-src data:;");
} }
#[test] #[test]
fn all() { fn all() {
let opt_isolate: bool = true; let mut options = Options::default();
let opt_no_css: bool = true; options.isolate = true;
let opt_no_fonts: bool = true; options.no_css = true;
let opt_no_frames: bool = true; options.no_fonts = true;
let opt_no_js: bool = true; options.no_frames = true;
let opt_no_images: bool = true; options.no_js = true;
let csp_content = html::csp( options.no_images = true;
opt_isolate, let csp_content = html::csp(&options);
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
);
assert_eq!(csp_content, "default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;"); assert_eq!(csp_content, "default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;");
} }

View file

@ -7,16 +7,21 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::html;
use reqwest::blocking::Client; use reqwest::blocking::Client;
use std::collections::HashMap; use std::collections::HashMap;
use crate::html;
use crate::opts::Options;
#[test] #[test]
fn replace_with_empty_images() { fn replace_with_empty_images() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new(); let client = Client::new();
let srcset_value = "small.png 1x, large.png 2x"; let srcset_value = "small.png 1x, large.png 2x";
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, true, true); let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options);
assert_eq!( assert_eq!(
format!("{} 1x, {} 2x", empty_image!(), empty_image!()), format!("{} 1x, {} 2x", empty_image!(), empty_image!()),

View file

@ -7,9 +7,10 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::html;
use html5ever::rcdom::{Handle, NodeData}; use html5ever::rcdom::{Handle, NodeData};
use crate::html;
#[test] #[test]
fn get_node_name() { fn get_node_name() {
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>"; let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";

View file

@ -8,29 +8,16 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::html; use crate::html;
use crate::opts::Options;
#[test] #[test]
fn div_as_root_element() { fn div_as_root_element() {
let html = "<div><script src=\"some.js\"></script></div>"; let html = "<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html);
let options = Options::default();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!( assert_eq!(
html::stringify_document( html::stringify_document(&dom.document, &options),
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>" "<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
); );
} }
@ -42,23 +29,13 @@ mod passing {
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\ <meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<div><script src=\"some.js\"></script></div>"; <div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html);
let mut options = Options::default();
let opt_no_css: bool = false; options.isolate = true;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = true;
assert_eq!( assert_eq!(
html::stringify_document( html::stringify_document(
&dom.document, &dom.document,
opt_no_css, &options
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
), ),
"<html>\ "<html>\
<head>\ <head>\
@ -83,24 +60,11 @@ mod passing {
<link rel=\"stylesheet\" href=\"main.css\"/>\ <link rel=\"stylesheet\" href=\"main.css\"/>\
<div style=\"display: none;\"></div>"; <div style=\"display: none;\"></div>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html);
let mut options = Options::default();
let opt_no_css: bool = true; options.no_css = true;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!( assert_eq!(
html::stringify_document( html::stringify_document(&dom.document, &options),
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\ "<!DOCTYPE html>\
<html>\ <html>\
<head>\ <head>\
@ -120,23 +84,13 @@ mod passing {
<link rel=\"something\"/>\ <link rel=\"something\"/>\
<div><script src=\"some.js\"></script></div>"; <div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html);
let mut options = Options::default();
let opt_no_css: bool = false; options.no_frames = true;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!( assert_eq!(
html::stringify_document( html::stringify_document(
&dom.document, &dom.document,
opt_no_css, &options
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
), ),
"<!DOCTYPE html>\ "<!DOCTYPE html>\
<html>\ <html>\
@ -162,23 +116,18 @@ mod passing {
<iframe src=\"some.html\"></iframe>\ <iframe src=\"some.html\"></iframe>\
</div>"; </div>";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html);
let mut options = Options::default();
let opt_isolate: bool = true; options.isolate = true;
let opt_no_css: bool = true; options.no_css = true;
let opt_no_fonts: bool = true; options.no_fonts = true;
let opt_no_frames: bool = true; options.no_frames = true;
let opt_no_js: bool = true; options.no_js = true;
let opt_no_images: bool = true; options.no_images = true;
assert_eq!( assert_eq!(
html::stringify_document( html::stringify_document(
&dom.document, &dom.document,
opt_no_css, &options
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
), ),
"<!DOCTYPE html>\ "<!DOCTYPE html>\
<html>\ <html>\

View file

@ -7,11 +7,13 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::html;
use html5ever::serialize::{serialize, SerializeOpts}; use html5ever::serialize::{serialize, SerializeOpts};
use reqwest::blocking::Client; use reqwest::blocking::Client;
use std::collections::HashMap; use std::collections::HashMap;
use crate::html;
use crate::opts::Options;
#[test] #[test]
fn basic() { fn basic() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
@ -20,27 +22,12 @@ mod passing {
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html);
let url = "http://localhost"; let url = "http://localhost";
let opt_no_css: bool = false; let mut options = Options::default();
let opt_no_fonts: bool = false; options.silent = true;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new(); let client = Client::new();
html::walk_and_embed_assets( html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new(); let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -58,27 +45,12 @@ mod passing {
let url = "http://localhost"; let url = "http://localhost";
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let opt_no_css: bool = false; let mut options = Options::default();
let opt_no_fonts: bool = false; options.silent = true;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new(); let client = Client::new();
html::walk_and_embed_assets( html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new(); let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -96,27 +68,12 @@ mod passing {
let url = "http://localhost"; let url = "http://localhost";
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let opt_no_css: bool = false; let mut options = Options::default();
let opt_no_fonts: bool = false; options.silent = true;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new(); let client = Client::new();
html::walk_and_embed_assets( html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new(); let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -136,26 +93,13 @@ mod passing {
let url = "http://localhost"; let url = "http://localhost";
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let opt_no_css: bool = true; let mut options = Options::default();
let opt_no_fonts: bool = false; options.no_css = true;
let opt_no_frames: bool = false; options.silent = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new(); let client = Client::new();
html::walk_and_embed_assets( html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new(); let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -182,27 +126,13 @@ mod passing {
let url = "http://localhost"; let url = "http://localhost";
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let opt_no_css: bool = false; let mut options = Options::default();
let opt_no_fonts: bool = false; options.no_images = true;
let opt_no_frames: bool = false; options.silent = true;
let opt_no_js: bool = false;
let opt_no_images: bool = true;
let opt_silent = true;
let client = Client::new(); let client = Client::new();
html::walk_and_embed_assets( html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new(); let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -233,27 +163,13 @@ mod passing {
let url = "http://localhost"; let url = "http://localhost";
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let opt_no_css: bool = false; let mut options = Options::default();
let opt_no_fonts: bool = false; options.no_images = true;
let opt_no_frames: bool = false; options.silent = true;
let opt_no_js: bool = false;
let opt_no_images: bool = true;
let opt_silent = true;
let client = Client::new(); let client = Client::new();
html::walk_and_embed_assets( html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new(); let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -271,26 +187,13 @@ mod passing {
let url = "http://localhost"; let url = "http://localhost";
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let opt_no_css: bool = false; let mut options = Options::default();
let opt_no_fonts: bool = false; options.no_frames = true;
let opt_no_frames: bool = true; options.silent = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new(); let client = Client::new();
html::walk_and_embed_assets( html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new(); let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -308,26 +211,13 @@ mod passing {
let url = "http://localhost"; let url = "http://localhost";
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let opt_no_css: bool = false; let mut options = Options::default();
let opt_no_fonts: bool = false; options.no_frames = true;
let opt_no_frames: bool = true; options.silent = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new(); let client = Client::new();
html::walk_and_embed_assets( html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new(); let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -348,27 +238,13 @@ mod passing {
let url = "http://localhost"; let url = "http://localhost";
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let opt_no_css: bool = false; let mut options = Options::default();
let opt_no_fonts: bool = false; options.no_js = true;
let opt_no_frames: bool = false; options.silent = true;
let opt_no_js: bool = true;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new(); let client = Client::new();
html::walk_and_embed_assets( html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new(); let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -388,26 +264,17 @@ mod passing {
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html);
let url = "http://localhost"; let url = "http://localhost";
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new();
let opt_no_css: bool = true;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
let opt_silent = true;
html::walk_and_embed_assets( let mut options = Options::default();
cache, options.no_css = true;
&client, options.no_frames = true;
&url, options.no_js = true;
&dom.document, options.no_images = true;
opt_no_css, options.silent = true;
opt_no_fonts,
opt_no_frames, let client = Client::new();
opt_no_js,
opt_no_images, html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
opt_silent,
);
let mut buf: Vec<u8> = Vec::new(); let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@ -433,26 +300,17 @@ mod passing {
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html);
let url = "http://localhost"; let url = "http://localhost";
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
let client = Client::new();
let opt_no_css: bool = true;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
let opt_silent = true;
html::walk_and_embed_assets( let mut options = Options::default();
cache, options.no_css = true;
&client, options.no_frames = true;
&url, options.no_js = true;
&dom.document, options.no_images = true;
opt_no_css, options.silent = true;
opt_no_fonts,
opt_no_frames, let client = Client::new();
opt_no_js,
opt_no_images, html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
opt_silent,
);
let mut buf: Vec<u8> = Vec::new(); let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();

View file

@ -3,4 +3,5 @@ mod css;
mod html; mod html;
mod js; mod js;
mod macros; mod macros;
mod opts;
mod utils; mod utils;

30
src/tests/opts.rs Normal file
View file

@ -0,0 +1,30 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::opts::Options;
#[test]
fn defaults() {
let options: Options = Options::default();
assert_eq!(options.target, str!());
assert_eq!(options.no_css, false);
assert_eq!(options.no_frames, false);
assert_eq!(options.no_fonts, false);
assert_eq!(options.no_images, false);
assert_eq!(options.isolate, false);
assert_eq!(options.no_js, false);
assert_eq!(options.insecure, false);
assert_eq!(options.no_metadata, false);
assert_eq!(options.output, str!());
assert_eq!(options.silent, false);
assert_eq!(options.timeout, 0);
assert_eq!(options.user_agent, "");
}
}

View file

@ -7,9 +7,10 @@
#[cfg(test)] #[cfg(test)]
mod passing { mod passing {
use crate::url;
use url::ParseError; use url::ParseError;
use crate::url;
#[test] #[test]
fn from_https_to_level_up_relative() -> Result<(), ParseError> { fn from_https_to_level_up_relative() -> Result<(), ParseError> {
let resolved_url = let resolved_url =

View file

@ -86,10 +86,11 @@ mod passing {
#[cfg(test)] #[cfg(test)]
mod failing { mod failing {
use crate::utils;
use reqwest::blocking::Client; use reqwest::blocking::Client;
use std::collections::HashMap; use std::collections::HashMap;
use crate::utils;
#[test] #[test]
fn read_local_file_with_data_url_parent() { fn read_local_file_with_data_url_parent() {
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();