Merge pull request #256 from snshn/more-tests-fixes-and-improvements

More tests, fixes, improvements
This commit is contained in:
Sunshine 2021-06-02 04:06:37 -10:00 committed by GitHub
commit 22a031af5d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 1193 additions and 1165 deletions

1348
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
[package]
name = "monolith"
version = "2.4.1"
version = "2.5.0"
authors = [
"Sunshine <sunshine@uberspace.net>",
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
@ -22,20 +22,20 @@ include = [
license = "CC0-1.0"
[dependencies]
atty = "0.2" # Used for highlighting network errors
atty = "0.2.14" # Used for highlighting network errors
base64 = "0.13.0"
chrono = "0.4.19" # Used for formatting creation timestamp
clap = "2.33.3"
cssparser = "0.28.1"
html5ever = "0.24.1"
regex = "1.4.3" # Used for parsing srcset
sha2 = "0.9.2" # Used for calculating checksums during integrity checks
url = "2.2.0"
regex = "1.5.4" # Used for parsing srcset and NOSCRIPT
sha2 = "0.9.5" # Used for calculating checksums during integrity checks
url = "2.2.2"
[dependencies.reqwest]
version = "0.11.0"
version = "0.11.3"
default-features = false
features = ["default-tls", "blocking", "gzip"]
[dev-dependencies]
assert_cmd = "1.0.2"
assert_cmd = "1.0.4"

View File

@ -79,11 +79,11 @@ or
- `-j`: Exclude JavaScript
- `-k`: Accept invalid X.509 (TLS) certificates
- `-M`: Don't add timestamp and URL information
- `-n`: Extract contents of NOSCRIPT tags
- `-n`: Extract contents of NOSCRIPT elements
- `-o`: Write output to `file`
- `-s`: Be quiet
- `-t`: Adjust `network request timeout`
- `-u`: Provide `custom User-Agent`
- `-u`: Provide custom `User-Agent`
- `-v`: Exclude videos
---------------------------------------------------
@ -99,20 +99,16 @@ Please open an issue if something is wrong, that helps make this project better.
---------------------------------------------------
## Related projects
- `Monolith Chrome Extension`: https://github.com/rhysd/monolith-of-web
- `Pagesaver`: https://github.com/distributed-mind/pagesaver
- `Personal WayBack Machine`: https://github.com/popey/pwbm
- `Hako`: https://github.com/dmpop/hako
- `Monk`: https://gitlab.com/fisherdarling/monk
- Monolith Chrome Extension: https://github.com/rhysd/monolith-of-web
- Pagesaver: https://github.com/distributed-mind/pagesaver
- Personal WayBack Machine: https://github.com/popey/pwbm
- Hako: https://github.com/dmpop/hako
- Monk: https://gitlab.com/fisherdarling/monk
---------------------------------------------------
## License
<a href="https://creativecommons.org/publicdomain/zero/1.0/">
<img src="https://i.creativecommons.org/p/zero/1.0/88x31.png" alt="CC0-1.0" />
</a>
<br />
To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide.
This software is distributed without any warranty.

View File

@ -6,7 +6,7 @@ use std::collections::HashMap;
use url::Url;
use crate::opts::Options;
use crate::url::{data_to_data_url, resolve_url};
use crate::url::{create_data_url, resolve_url};
use crate::utils::retrieve_asset;
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
@ -55,14 +55,6 @@ pub fn embed_css(
.unwrap()
}
pub fn enquote(input: String, double: bool) -> String {
if double {
format!("\"{}\"", input.replace("\"", "\\\""))
} else {
format!("'{}'", input.replace("'", "\\'"))
}
}
pub fn format_ident(ident: &str) -> String {
let mut res: String = String::new();
let _ = serialize_identifier(ident, &mut res);
@ -207,7 +199,7 @@ pub fn process_css<'a>(
depth + 1,
) {
Ok((import_contents, import_final_url, _import_media_type)) => {
let mut import_data_url = data_to_data_url(
let mut import_data_url = create_data_url(
"text/css",
embed_css(
cache,
@ -221,15 +213,18 @@ pub fn process_css<'a>(
&import_final_url,
);
import_data_url.set_fragment(import_full_url.fragment());
result.push_str(enquote(import_data_url.to_string(), false).as_str());
result.push_str(
format_quoted_string(&import_data_url.to_string()).as_str(),
);
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if import_full_url.scheme() == "http"
|| import_full_url.scheme() == "https"
{
result
.push_str(enquote(import_full_url.to_string(), false).as_str());
result.push_str(
format_quoted_string(&import_full_url.to_string()).as_str(),
);
}
}
}
@ -241,7 +236,7 @@ pub fn process_css<'a>(
}
if options.no_images && is_image_url_prop(curr_prop.as_str()) {
result.push_str(enquote(str!(empty_image!()), false).as_str());
result.push_str(format_quoted_string(empty_image!()).as_str());
} else {
let resolved_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
@ -254,9 +249,11 @@ pub fn process_css<'a>(
) {
Ok((data, final_url, media_type)) => {
let mut data_url =
data_to_data_url(&media_type, &data, &final_url);
create_data_url(&media_type, &data, &final_url);
data_url.set_fragment(resolved_url.fragment());
result.push_str(enquote(data_url.to_string(), false).as_str());
result.push_str(
format_quoted_string(&data_url.to_string()).as_str(),
);
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
@ -264,7 +261,8 @@ pub fn process_css<'a>(
|| resolved_url.scheme() == "https"
{
result.push_str(
enquote(resolved_url.to_string(), false).as_str(),
format_quoted_string(&resolved_url.to_string())
.as_str(),
);
}
}
@ -346,7 +344,7 @@ pub fn process_css<'a>(
depth + 1,
) {
Ok((css, final_url, _media_type)) => {
let mut data_url = data_to_data_url(
let mut data_url = create_data_url(
"text/css",
embed_css(
cache,
@ -360,18 +358,19 @@ pub fn process_css<'a>(
&final_url,
);
data_url.set_fragment(full_url.fragment());
result.push_str(enquote(data_url.to_string(), false).as_str());
result.push_str(format_quoted_string(&data_url.to_string()).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if full_url.scheme() == "http" || full_url.scheme() == "https" {
result.push_str(enquote(full_url.to_string(), false).as_str());
result
.push_str(format_quoted_string(&full_url.to_string()).as_str());
}
}
}
} else {
if is_image_url_prop(curr_prop.as_str()) && options.no_images {
result.push_str(enquote(str!(empty_image!()), false).as_str());
result.push_str(format_quoted_string(empty_image!()).as_str());
} else {
let full_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
@ -383,14 +382,17 @@ pub fn process_css<'a>(
depth + 1,
) {
Ok((data, final_url, media_type)) => {
let mut data_url = data_to_data_url(&media_type, &data, &final_url);
let mut data_url = create_data_url(&media_type, &data, &final_url);
data_url.set_fragment(full_url.fragment());
result.push_str(enquote(data_url.to_string(), false).as_str());
result
.push_str(format_quoted_string(&data_url.to_string()).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if full_url.scheme() == "http" || full_url.scheme() == "https" {
result.push_str(enquote(full_url.to_string(), false).as_str());
result.push_str(
format_quoted_string(&full_url.to_string()).as_str(),
);
}
}
}

View File

@ -17,7 +17,7 @@ use std::default::Default;
use crate::css::embed_css;
use crate::js::attr_is_event_handler;
use crate::opts::Options;
use crate::url::{clean_url, data_to_data_url, is_url_and_has_protocol, resolve_url};
use crate::url::{clean_url, create_data_url, is_url_and_has_protocol, resolve_url};
use crate::utils::retrieve_asset;
struct SrcSetItem<'a> {
@ -190,7 +190,7 @@ pub fn embed_srcset(
) {
Ok((image_data, image_final_url, image_media_type)) => {
let mut image_data_url =
data_to_data_url(&image_media_type, &image_data, &image_final_url);
create_data_url(&image_media_type, &image_data, &image_final_url);
// Append retreved asset as a data URL
image_data_url.set_fragment(image_full_url.fragment());
result.push_str(image_data_url.as_ref());
@ -474,8 +474,9 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String {
result = String::from_utf8(buf).unwrap();
}
// Unwrap NOSCRIPT elements
if options.unwrap_noscript {
let noscript_re = Regex::new(r"<(?P<c>/?noscript)>").unwrap();
let noscript_re = Regex::new(r"<(?P<c>/?noscript[^>]*)>").unwrap();
result = noscript_re.replace_all(&result, "<!--$c-->").to_string();
}
@ -503,44 +504,39 @@ pub fn retrieve_and_embed_asset(
depth + 1,
) {
Ok((data, final_url, mut media_type)) => {
// Check integrity if it's a LINK or SCRIPT tag
let node_name: &str = get_node_name(&node).unwrap();
// Check integrity if it's a LINK or SCRIPT element
let mut ok_to_include: bool = true;
if node_name == "link" || node_name == "script" {
let node_integrity_attr_value: Option<String> = get_node_attr(node, "integrity");
// Check integrity
if let Some(node_integrity_attr_value) = node_integrity_attr_value {
if let Some(node_integrity_attr_value) = get_node_attr(node, "integrity") {
if !node_integrity_attr_value.is_empty() {
ok_to_include = check_integrity(&data, &node_integrity_attr_value);
}
}
// Wipe integrity attribute
set_node_attr(node, "integrity", None);
// Wipe the integrity attribute
set_node_attr(node, "integrity", None);
}
}
if ok_to_include {
if node_name == "link" {
let link_type: &str = determine_link_node_type(node);
// CSS LINK nodes requires special treatment
if link_type == "stylesheet" {
let css: String = embed_css(
cache,
client,
&final_url,
&String::from_utf8_lossy(&data),
options,
depth + 1,
);
let css_data_url = data_to_data_url("text/css", css.as_bytes(), &final_url);
if node_name == "link" && determine_link_node_type(node) == "stylesheet" {
// Stylesheet LINK elements require special treatment
let css: String = embed_css(
cache,
client,
&final_url,
&String::from_utf8_lossy(&data),
options,
depth + 1,
);
set_node_attr(&node, attr_name, Some(css_data_url.to_string()));
return; // Do not fall through
}
// Create and embed data URL
let css_data_url = create_data_url("text/css", css.as_bytes(), &final_url);
set_node_attr(&node, attr_name, Some(css_data_url.to_string()));
} else if node_name == "frame" || node_name == "iframe" {
// (I)FRAMEs are also quite different from conventional resources
let frame_dom = html_to_dom(&String::from_utf8_lossy(&data));
walk_and_embed_assets(
cache,
@ -559,30 +555,38 @@ pub fn retrieve_and_embed_asset(
)
.unwrap();
let mut frame_data_url = data_to_data_url(&media_type, &frame_data, &final_url);
// Create and embed data URL
let mut frame_data_url = create_data_url(&media_type, &frame_data, &final_url);
frame_data_url.set_fragment(resolved_url.fragment());
set_node_attr(node, attr_name, Some(frame_data_url.to_string()));
} else {
// Every other type of element gets processed here
return; // Do not fall through
}
// Parse media type for SCRIPT elements
if node_name == "script" {
if let Some(_) = get_node_attr(node, "src") {
if let Some(script_node_type_attr_value) = get_node_attr(node, "type") {
media_type = script_node_type_attr_value.to_string();
} else {
// Fallback to default one if it's not specified
media_type = "application/javascript".to_string();
}
}
}
// Everything else
if node_name == "script" {
media_type = "application/javascript".to_string();
// Create and embed data URL
let mut data_url = create_data_url(&media_type, &data, &final_url);
data_url.set_fragment(resolved_url.fragment());
set_node_attr(node, attr_name, Some(data_url.to_string()));
}
let mut data_url = data_to_data_url(&media_type, &data, &final_url);
data_url.set_fragment(resolved_url.fragment());
set_node_attr(node, attr_name, Some(data_url.to_string()));
}
}
Err(_) => {
if resolved_url.scheme() == "http" || resolved_url.scheme() == "https" {
// Keep remote reference if unable to retrieve the asset
// Keep remote references if unable to retrieve the asset
set_node_attr(node, attr_name, Some(resolved_url.to_string()));
} else {
// Exclude non-remote URLs
// Remove local references if they can't be successfully embedded as data URLs
set_node_attr(node, attr_name, None);
}
}
@ -645,7 +649,7 @@ pub fn walk_and_embed_assets(
let link_type: &str = determine_link_node_type(node);
if link_type == "icon" {
// Find and resolve this LINK node's href attribute
// Find and resolve LINK's href attribute
if let Some(link_attr_href_value) = get_node_attr(node, "href") {
if !options.no_images && !link_attr_href_value.is_empty() {
retrieve_and_embed_asset(
@ -663,10 +667,12 @@ pub fn walk_and_embed_assets(
}
}
} else if link_type == "stylesheet" {
// Find and resolve this LINK node's href attribute
// Resolve LINK's href attribute
if let Some(link_attr_href_value) = get_node_attr(node, "href") {
if options.no_css {
set_node_attr(node, "href", None);
// Wipe integrity attribute
set_node_attr(node, "integrity", None);
} else {
if !link_attr_href_value.is_empty() {
retrieve_and_embed_asset(
@ -826,6 +832,7 @@ pub fn walk_and_embed_assets(
set_node_attr(node, "href", None);
}
}
if let Some(image_attr_xlink_href_value) = get_node_attr(node, "xlink:href") {
image_href = image_attr_xlink_href_value;
if options.no_images {
@ -915,14 +922,15 @@ pub fn walk_and_embed_assets(
// Replace with empty JS call to preserve original behavior
set_node_attr(node, "href", Some(str!("javascript:;")));
}
} else if anchor_attr_href_value.clone().starts_with('#')
|| is_url_and_has_protocol(&anchor_attr_href_value.clone())
{
// Don't touch mailto: links or hrefs which begin with a hash sign
} else {
let href_full_url: Url =
resolve_url(document_url, &anchor_attr_href_value);
set_node_attr(node, "href", Some(href_full_url.to_string()));
// Don't touch mailto: links or hrefs which begin with a hash sign
if !anchor_attr_href_value.clone().starts_with('#')
&& !is_url_and_has_protocol(&anchor_attr_href_value.clone())
{
let href_full_url: Url =
resolve_url(document_url, &anchor_attr_href_value);
set_node_attr(node, "href", Some(href_full_url.to_string()));
}
}
}
}
@ -936,6 +944,8 @@ pub fn walk_and_embed_assets(
// Remove src attribute
if script_attr_src != None {
set_node_attr(node, "src", None);
// Wipe integrity attribute
set_node_attr(node, "integrity", None);
}
} else if !script_attr_src.clone().unwrap_or_default().is_empty() {
retrieve_and_embed_asset(
@ -1080,7 +1090,7 @@ pub fn walk_and_embed_assets(
);
// Get rid of original contents
noscript_contents.clear();
// Insert HTML containing embedded assets back into NOSCRIPT node
// Insert HTML containing embedded assets into NOSCRIPT node
if let Some(html) =
get_child_node_by_name(&noscript_contents_dom.document, "html")
{

View File

@ -13,7 +13,7 @@ use monolith::html::{
stringify_document, walk_and_embed_assets,
};
use monolith::opts::Options;
use monolith::url::{data_to_data_url, parse_data_url, resolve_url};
use monolith::url::{create_data_url, parse_data_url, resolve_url};
use monolith::utils::retrieve_asset;
mod macros;
@ -266,7 +266,7 @@ fn main() {
0,
) {
Ok((data, final_url, media_type)) => {
let favicon_data_url: Url = data_to_data_url(&media_type, &data, &final_url);
let favicon_data_url: Url = create_data_url(&media_type, &data, &final_url);
dom = add_favicon(&dom.document, favicon_data_url.to_string());
}
Err(_) => {

View File

@ -12,8 +12,8 @@ mod passing {
use std::process::Command;
#[test]
fn add_new_when_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn add_new_when_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-b")
@ -35,13 +35,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn keep_existing_when_none_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn keep_existing_when_none_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
@ -61,13 +59,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn override_existing_when_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn override_existing_when_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-b")
@ -89,13 +85,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_existing_when_empty_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn set_existing_to_empty_when_empty_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-b")
@ -117,7 +111,5 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}

View File

@ -15,8 +15,8 @@ mod passing {
use url::Url;
#[test]
fn print_version() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn print_version() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("-V").output().unwrap();
// STDOUT should contain program name and version
@ -30,12 +30,10 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn stdin_target_input() -> Result<(), Box<dyn std::error::Error>> {
fn stdin_target_input() {
let mut echo = Command::new("echo")
.arg("Hello from STDIN")
.stdout(Stdio::piped())
@ -44,22 +42,20 @@ mod passing {
let echo_out = echo.stdout.take().unwrap();
echo.wait().unwrap();
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
cmd.stdin(echo_out);
let out = cmd.arg("-M").arg("-").output().unwrap();
// STDOUT should contain HTML from STDIN
// STDOUT should contain HTML created out of STDIN
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body>Hello from STDIN\n</body></html>\n"
);
Ok(())
}
#[test]
fn css_import_string() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn css_import_string() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("src/tests/data/css/index.html");
let path_css: &Path = Path::new("src/tests/data/css/style.css");
@ -71,7 +67,7 @@ mod passing {
// STDOUT should contain embedded CSS url()'s
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><style>\n\n @charset \"UTF-8\";\n\n @import \'data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\';\n\n @import url(\'data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\');\n\n @import url(\'data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\');\n\n</style>\n</head><body></body></html>\n"
"<html><head><style>\n\n @charset \"UTF-8\";\n\n @import \"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\";\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n</style>\n</head><body></body></html>\n"
);
// STDERR should list files that got retrieved
@ -84,19 +80,13 @@ mod passing {
{file_url_css}\n \
{file_url_css}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
.unwrap()
.into_string(),
file_url_css = Url::from_file_path(fs::canonicalize(&path_css).unwrap())
.unwrap()
.into_string(),
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_css = Url::from_file_path(fs::canonicalize(&path_css).unwrap()).unwrap(),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}
@ -114,8 +104,8 @@ mod failing {
use std::process::Command;
#[test]
fn bad_input_empty_target() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn bad_input_empty_target() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("").output().unwrap();
// STDOUT should be empty
@ -129,7 +119,5 @@ mod failing {
// The exit code should be 1
out.assert().code(1);
Ok(())
}
}

View File

@ -12,28 +12,8 @@ mod passing {
use std::process::Command;
#[test]
fn bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
// STDOUT should contain HTML
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"Unsupported data URL media type\n"
);
// The exit code should be 1
out.assert().code(1);
Ok(())
}
#[test]
fn isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn isolate_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-I")
@ -54,13 +34,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn remove_css_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-c")
@ -82,13 +60,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_fonts_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn remove_fonts_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-F")
@ -110,13 +86,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn remove_frames_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-f")
@ -137,13 +111,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn remove_images_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-i")
@ -173,13 +145,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn remove_js_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-j")
@ -203,14 +173,43 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
}
}
Ok(())
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
#[test]
fn bad_input_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
// STDOUT should contain HTML
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"Unsupported data URL media type\n"
);
// The exit code should be 1
out.assert().code(1);
}
#[test]
fn security_disallow_local_assets_within_data_url_targets(
) -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn security_disallow_local_assets_within_data_url_targets() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/basic/local-script.js\"%3E%3C/script%3E")
@ -228,7 +227,5 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}

View File

@ -15,8 +15,8 @@ mod passing {
use url::Url;
#[test]
fn local_file_target_input_relative_target_path() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn local_file_target_input_relative_target_path() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let out = cmd
@ -65,13 +65,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn local_file_target_input_absolute_target_path() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn local_file_target_input_absolute_target_path() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("src/tests/data/basic/local-file.html");
let out = cmd
@ -107,21 +105,17 @@ mod passing {
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"{file_url_html}\n",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
.unwrap()
.into_string(),
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn local_file_url_target_input() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn local_file_url_target_input() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
@ -177,14 +171,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn embed_file_url_local_asset_within_style_attribute() -> Result<(), Box<dyn std::error::Error>>
{
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn embed_file_url_local_asset_within_style_attribute() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("src/tests/data/svg/index.html");
let path_svg: &Path = Path::new("src/tests/data/svg/image.svg");
@ -193,7 +184,7 @@ mod passing {
// STDOUT should contain HTML with date URL for background-image in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><div style=\"background-image: url('data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=')\"></div>\n</body></html>\n"
"<html><head></head><body><div style=\"background-image: url(&quot;data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=&quot;)\"></div>\n</body></html>\n"
);
// STDERR should list files that got retrieved
@ -204,24 +195,18 @@ mod passing {
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
.unwrap()
.into_string(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
.unwrap()
.into_string(),
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn discard_integrity_for_local_files() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn discard_integrity_for_local_files() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
@ -280,7 +265,5 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}

View File

@ -15,8 +15,8 @@ mod passing {
use url::Url;
#[test]
fn parse_noscript_contents() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn parse_noscript_contents() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("src/tests/data/noscript/index.html");
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
@ -36,24 +36,18 @@ mod passing {
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
.unwrap()
.into_string(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
.unwrap()
.into_string(),
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn unwrap_noscript_contents() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn unwrap_noscript_contents() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("src/tests/data/noscript/index.html");
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
@ -73,24 +67,18 @@ mod passing {
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
.unwrap()
.into_string(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
.unwrap()
.into_string(),
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn unwrap_noscript_contents_nested() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn unwrap_noscript_contents_nested() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("src/tests/data/noscript/nested.html");
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
@ -110,24 +98,18 @@ mod passing {
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
.unwrap()
.into_string(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
.unwrap()
.into_string(),
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn unwrap_noscript_contents_with_script() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn unwrap_noscript_contents_with_script() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("src/tests/data/noscript/script.html");
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
@ -136,7 +118,14 @@ mod passing {
// STDOUT should contain HTML with no CSS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript-->\n</body></html>\n"
"<html>\
<head></head>\
<body>\
<!--noscript-->\
<img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\">\
<!--/noscript-->\n\
</body>\
</html>\n"
);
// STDERR should contain target HTML and embedded SVG files
@ -147,18 +136,35 @@ mod passing {
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
.unwrap()
.into_string(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
.unwrap()
.into_string(),
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// The exit code should be 0
out.assert().code(0);
}
Ok(())
#[test]
fn unwrap_noscript_contents_attr_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-n")
.arg("data:text/html,<noscript class=\"\">test</noscript>")
.output()
.unwrap();
// STDOUT should contain unwrapped contents of NOSCRIPT element
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><!--noscript class=\"\"-->test<!--/noscript--></head><body></body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
}
}

View File

@ -12,10 +12,10 @@ mod passing {
use std::process::Command;
#[test]
fn change_encoding_to_utf_8() -> Result<(), Box<dyn std::error::Error>> {
fn change_encoding_to_utf_8() {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String = str!(cwd.to_str().unwrap()).replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg(if cfg!(windows) {
@ -30,7 +30,14 @@ mod passing {
// STDOUT should contain newly added base URL
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\n <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n </head>\n <body>\n © Some Company\n \n\n</body></html>\n"
"<html>\
<head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
</head>\n \
<body>\n \
© Some Company\n \
\n\n</body>\
</html>\n"
);
// STDERR should contain only the target file
@ -45,7 +52,5 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}

View File

@ -61,8 +61,8 @@ mod passing {
css::embed_css(cache, &client, &document_url, &STYLE, &options, 0,),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
list-style: url('{empty_image}');\
background-image: url(\"{empty_image}\"); \
list-style: url(\"{empty_image}\");\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
@ -93,8 +93,8 @@ mod passing {
css::embed_css(cache, &client, &document_url, &STYLE, &options, 0),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
list-style: url('{empty_image}');\
background-image: url(\"{empty_image}\"); \
list-style: url(\"{empty_image}\");\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
@ -115,7 +115,7 @@ mod passing {
const CSS: &str = "\
#id.class-name:not(:nth-child(3n+0)) {\n \
// border: none;\n \
background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=');\n\
background-image: url(\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\");\n\
}\n\
\n\
html > body {}";
@ -191,9 +191,9 @@ mod passing {
"\
@charset \"UTF-8\";\n\
\n\
@import 'data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9';\n\
@import \"data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9\";\n\
\n\
@import url('data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==')\n\
@import url(\"data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==\")\n\
"
);
}
@ -331,7 +331,7 @@ mod passing {
";
const CSS_OUT: &str = "\
#language a[href=\"#translations\"]:before {\n\
content: url('data:;base64,') \"\\a \";\n\
content: url(\"data:;base64,\") \"\\a \";\n\
white-space: pre }\n\
";

View File

@ -1,53 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::css;
#[test]
fn empty_input_single_quotes() {
assert_eq!(css::enquote(str!(""), false), "''");
}
#[test]
fn empty_input_double_quotes() {
assert_eq!(css::enquote(str!(""), true), "\"\"");
}
#[test]
fn apostrophes_single_quotes() {
assert_eq!(
css::enquote(str!("It's a lovely day, don't you think?"), false),
"'It\\'s a lovely day, don\\'t you think?'"
);
}
#[test]
fn apostrophes_double_quotes() {
assert_eq!(
css::enquote(str!("It's a lovely day, don't you think?"), true),
"\"It's a lovely day, don't you think?\""
);
}
#[test]
fn feet_and_inches_single_quotes() {
assert_eq!(
css::enquote(str!("5'2\", 6'5\""), false),
"'5\\'2\", 6\\'5\"'"
);
}
#[test]
fn feet_and_inches_double_quotes() {
assert_eq!(
css::enquote(str!("5'2\", 6'5\""), true),
"\"5'2\\\", 6'5\\\"\""
);
}
}

View File

@ -1,3 +1,2 @@
mod embed_css;
mod enquote;
mod is_image_url_prop;

View File

@ -87,10 +87,12 @@ mod passing {
#[test]
fn no_css() {
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
<link rel=\"alternate stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>";
let html = "\
<link rel=\"stylesheet\" href=\"main.css\">\
<link rel=\"alternate stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>\
";
let dom = html::html_to_dom(&html);
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
@ -108,16 +110,18 @@ mod passing {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<link rel=\"stylesheet\">\
<link rel=\"alternate stylesheet\">\
<style></style>\
</head>\
<body>\
<div></div>\
</body>\
</html>"
"\
<html>\
<head>\
<link rel=\"stylesheet\">\
<link rel=\"alternate stylesheet\">\
<style></style>\
</head>\
<body>\
<div></div>\
</body>\
</html>\
"
);
}
@ -203,7 +207,15 @@ mod passing {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
"\
<html>\
<head>\
</head>\
<frameset>\
<frame src=\"\">\
</frameset>\
</html>\
"
);
}
@ -227,16 +239,25 @@ mod passing {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
"\
<html>\
<head></head>\
<body>\
<iframe src=\"\"></iframe>\
</body>\
</html>\
"
);
}
#[test]
fn no_js() {
let html = "<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>";
let html = "\
<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>\
";
let dom = html::html_to_dom(&html);
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
@ -254,52 +275,141 @@ mod passing {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><script></script>\
<script></script></div></body></html>"
"\
<html>\
<head></head>\
<body>\
<div>\
<script></script>\
<script></script>\
</div>\
</body>\
</html>\
"
);
}
// #[test]
// fn discards_integrity() {
// let html = "<title>No integrity</title>\
// <link integrity=\"sha384-...\" rel=\"something\"/>\
// <script integrity=\"sha384-...\" src=\"some.js\"></script>";
// let dom = html::html_to_dom(&html);
// let url: Url = Url::parse("http://localhost").unwrap();
// let cache = &mut HashMap::new();
#[test]
fn keeps_integrity_for_linked_assets() {
let html = "<title>Has integrity</title>\
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\" />";
let dom = html::html_to_dom(&html);
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
// let mut options = Options::default();
// options.no_css = true;
// options.no_frames = true;
// options.no_js = true;
// options.no_images = true;
// options.silent = true;
let mut options = Options::default();
options.silent = true;
// let client = Client::new();
let client = Client::new();
// html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
// let mut buf: Vec<u8> = Vec::new();
// serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
// assert_eq!(
// buf.iter().map(|&c| c as char).collect::<String>(),
// "<html>\
// <head><title>No integrity</title><link rel=\"something\"><script></script></head>\
// <body></body>\
// </html>"
// );
// }
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<title>Has integrity</title>\
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\">\
</head>\
<body></body>\
</html>\
"
);
}
#[test]
fn discards_integrity_for_linked_assets_nojs_nocss() {
let html = "\
<title>No integrity</title>\
<link integrity=\"\" rel=\"stylesheet\" href=\"data:;\"/>\
<script integrity=\"\" src=\"some.js\"></script>\
";
let dom = html::html_to_dom(&html);
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<title>No integrity</title>\
<link rel=\"stylesheet\">\
<script></script>\
</head>\
<body></body>\
</html>\
"
);
}
#[test]
fn discards_integrity_for_embedded_assets() {
let html = "\
<title>No integrity</title>\
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\"/>\
<script integrity=\"sha384-456\" src=\"some.js\"></script>\
";
let dom = html::html_to_dom(&html);
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<title>No integrity</title>\
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\">\
<script></script>\
</head>\
<body>\
</body>\
</html>\
"
);
}
#[test]
fn removes_unwanted_meta_tags() {
let html = "<html>\
<head>\
<meta http-equiv=\"Refresh\" value=\"20\"/>\
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
</head>\
<body></body>\
</html>";
let html = "\
<html>\
<head>\
<meta http-equiv=\"Refresh\" value=\"20\"/>\
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
</head>\
<body>\
</body>\
</html>\
";
let dom = html::html_to_dom(&html);
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
@ -320,19 +430,22 @@ mod passing {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
"\
<html>\
<head>\
<meta http-equiv=\"disabled by monolith (Refresh)\" value=\"20\">\
<meta http-equiv=\"disabled by monolith (Location)\" value=\"https://freebsd.org\">\
</head>\
<body></body>\
<body>\
</body>\
</html>"
);
}
#[test]
fn processes_noscript_tags() {
let html = "<html>\
let html = "\
<html>\
<body>\
<noscript>\
<img src=\"image.png\" />\
@ -357,7 +470,8 @@ mod passing {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"<html>\
"\
<html>\
<head>\
</head>\
<body>\
@ -370,4 +484,34 @@ mod passing {
)
);
}
#[test]
fn preserves_script_type_json() {
let html = "<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>";
let dom = html::html_to_dom(&html);
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>\
</head>\
<body>\
</body>\
</html>"
);
}
}

View File

@ -15,7 +15,7 @@ mod passing {
fn encode_string_with_specific_media_type() {
let mime = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let data_url = url::data_to_data_url(mime, data.as_bytes(), &Url::parse("data:,").unwrap());
let data_url = url::create_data_url(mime, data.as_bytes(), &Url::parse("data:,").unwrap());
assert_eq!(
data_url.as_str(),
@ -26,7 +26,7 @@ mod passing {
#[test]
fn encode_append_fragment() {
let data = "<svg></svg>\n";
let data_url = url::data_to_data_url(
let data_url = url::create_data_url(
"image/svg+xml",
data.as_bytes(),
&Url::parse("data:,").unwrap(),

View File

@ -1,69 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use reqwest::Url;
use crate::url;
#[test]
fn http_url() {
assert!(url::is_http_or_https_url(&Url::parse("http://kernel.org").unwrap()));
}
#[test]
fn https_url() {
assert!(url::is_http_or_https_url(&Url::parse("https://www.rust-lang.org/").unwrap()));
}
#[test]
fn http_url_with_backslashes() {
assert!(url::is_http_or_https_url(&Url::parse("http:\\\\freebsd.org\\").unwrap()));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use reqwest::Url;
use crate::url;
#[test]
fn url_with_no_protocol() {
assert!(!url::is_http_or_https_url(&Url::parse("//kernel.org").unwrap()));
}
#[test]
fn dot_slash_filename() {
assert!(!url::is_http_or_https_url(&Url::parse("./index.html").unwrap()));
}
#[test]
fn just_filename() {
assert!(!url::is_http_or_https_url(&Url::parse("some-local-page.htm").unwrap()));
}
#[test]
fn https_ip_port_url() {
assert!(!url::is_http_or_https_url(&Url::parse("ftp://1.2.3.4/www/index.html").unwrap()));
}
#[test]
fn data_url() {
assert!(!url::is_http_or_https_url(
&Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h").unwrap()
));
}
}

View File

@ -48,6 +48,11 @@ mod passing {
assert!(url::is_url_and_has_protocol("https://github.com"));
}
#[test]
fn file() {
assert!(url::is_url_and_has_protocol("file:///tmp/image.png"));
}
#[test]
fn mailto_uppercase() {
assert!(url::is_url_and_has_protocol(
@ -59,6 +64,11 @@ mod passing {
fn empty_data_url() {
assert!(url::is_url_and_has_protocol("data:text/html,"));
}
#[test]
fn empty_data_url_surrounded_by_spaces() {
assert!(url::is_url_and_has_protocol(" data:text/html, "));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@ -74,25 +84,27 @@ mod failing {
#[test]
fn url_with_no_protocol() {
assert!(!url::is_url_and_has_protocol(
"//some-hostname.com/some-file.html"
));
assert_eq!(
url::is_url_and_has_protocol("//some-hostname.com/some-file.html"),
false
);
}
#[test]
fn relative_path() {
assert!(!url::is_url_and_has_protocol(
"some-hostname.com/some-file.html"
));
assert_eq!(
url::is_url_and_has_protocol("some-hostname.com/some-file.html"),
false
);
}
#[test]
fn relative_to_root_path() {
assert!(!url::is_url_and_has_protocol("/some-file.html"));
assert_eq!(url::is_url_and_has_protocol("/some-file.html"), false);
}
#[test]
fn empty_string() {
assert!(!url::is_url_and_has_protocol(""));
assert_eq!(url::is_url_and_has_protocol(""), false);
}
}

View File

@ -1,5 +1,5 @@
mod clean_url;
mod data_to_data_url;
mod create_data_url;
mod is_url_and_has_protocol;
mod parse_data_url;
mod percent_decode;

View File

@ -11,6 +11,34 @@ mod passing {
use crate::url;
#[test]
fn basic_httsp_relative() {
assert_eq!(
url::resolve_url(
&Url::parse("https://www.kernel.org").unwrap(),
"category/signatures.html"
)
.as_str(),
Url::parse("https://www.kernel.org/category/signatures.html")
.unwrap()
.as_str()
);
}
#[test]
fn basic_httsp_absolute() {
assert_eq!(
url::resolve_url(
&Url::parse("https://www.kernel.org").unwrap(),
"/category/signatures.html"
)
.as_str(),
Url::parse("https://www.kernel.org/category/signatures.html")
.unwrap()
.as_str()
);
}
#[test]
fn from_https_to_level_up_relative() {
assert_eq!(
@ -50,7 +78,7 @@ mod passing {
}
#[test]
fn from_https_url_to_relative_root_path() {
fn from_https_url_to_absolute_path() {
assert_eq!(
url::resolve_url(
&Url::parse("https://www.kernel.org/category/signatures.html").unwrap(),
@ -148,22 +176,28 @@ mod passing {
);
}
// #[test]
// fn resolve_from_file_url_to_file_url() {
// assert_eq!(
// if cfg!(windows) {
// url::resolve_url(&Url::parse("file:///c:/index.html").unwrap(), "file:///c:/image.png").as_str()
// } else {
// url::resolve_url(&Url::parse("file:///tmp/index.html").unwrap(), "file:///tmp/image.png")
// .as_str()
// },
// if cfg!(windows) {
// "file:///c:/image.png"
// } else {
// "file:///tmp/image.png"
// }
// );
// }
#[test]
fn resolve_from_file_url_to_file_url() {
if cfg!(windows) {
assert_eq!(
url::resolve_url(
&Url::parse("file:///c:/index.html").unwrap(),
"file:///c:/image.png"
)
.as_str(),
"file:///c:/image.png"
);
} else {
assert_eq!(
url::resolve_url(
&Url::parse("file:///tmp/index.html").unwrap(),
"file:///tmp/image.png"
)
.as_str(),
"file:///tmp/image.png"
);
}
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗

View File

@ -36,8 +36,8 @@ mod passing {
)
.unwrap();
assert_eq!(
url::data_to_data_url(&media_type, &data, &final_url),
url::data_to_data_url(
url::create_data_url(&media_type, &data, &final_url),
url::create_data_url(
"text/html",
"target".as_bytes(),
&Url::parse("data:text/html;base64,c291cmNl").unwrap()
@ -45,7 +45,7 @@ mod passing {
);
assert_eq!(
final_url,
url::data_to_data_url(
url::create_data_url(
"text/html",
"target".as_bytes(),
&Url::parse("data:text/html;base64,c291cmNl").unwrap()
@ -85,7 +85,7 @@ mod passing {
0,
)
.unwrap();
assert_eq!(url::data_to_data_url("application/javascript", &data, &final_url), Url::parse("data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==").unwrap());
assert_eq!(url::create_data_url("application/javascript", &data, &final_url), Url::parse("data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==").unwrap());
assert_eq!(
final_url,
Url::parse(&format!(

View File

@ -12,7 +12,7 @@ pub fn clean_url(url: Url) -> Url {
url
}
pub fn data_to_data_url(media_type: &str, data: &[u8], final_asset_url: &Url) -> Url {
pub fn create_data_url(media_type: &str, data: &[u8], final_asset_url: &Url) -> Url {
let media_type: String = if media_type.is_empty() {
detect_media_type(data, &final_asset_url)
} else {