add more tests
This commit is contained in:
parent
a308a20411
commit
a6e891b3c5
16
README.md
16
README.md
@ -79,11 +79,11 @@ or
|
|||||||
- `-j`: Exclude JavaScript
|
- `-j`: Exclude JavaScript
|
||||||
- `-k`: Accept invalid X.509 (TLS) certificates
|
- `-k`: Accept invalid X.509 (TLS) certificates
|
||||||
- `-M`: Don't add timestamp and URL information
|
- `-M`: Don't add timestamp and URL information
|
||||||
- `-n`: Extract contents of NOSCRIPT tags
|
- `-n`: Extract contents of NOSCRIPT elements
|
||||||
- `-o`: Write output to `file`
|
- `-o`: Write output to `file`
|
||||||
- `-s`: Be quiet
|
- `-s`: Be quiet
|
||||||
- `-t`: Adjust `network request timeout`
|
- `-t`: Adjust `network request timeout`
|
||||||
- `-u`: Provide `custom User-Agent`
|
- `-u`: Provide custom `User-Agent`
|
||||||
- `-v`: Exclude videos
|
- `-v`: Exclude videos
|
||||||
|
|
||||||
---------------------------------------------------
|
---------------------------------------------------
|
||||||
@ -99,19 +99,15 @@ Please open an issue if something is wrong, that helps make this project better.
|
|||||||
---------------------------------------------------
|
---------------------------------------------------
|
||||||
|
|
||||||
## Related projects
|
## Related projects
|
||||||
- `Monolith Chrome Extension`: https://github.com/rhysd/monolith-of-web
|
- Monolith Chrome Extension: https://github.com/rhysd/monolith-of-web
|
||||||
- `Pagesaver`: https://github.com/distributed-mind/pagesaver
|
- Pagesaver: https://github.com/distributed-mind/pagesaver
|
||||||
- `Personal WayBack Machine`: https://github.com/popey/pwbm
|
- Personal WayBack Machine: https://github.com/popey/pwbm
|
||||||
- `Hako`: https://github.com/dmpop/hako
|
- Hako: https://github.com/dmpop/hako
|
||||||
|
|
||||||
---------------------------------------------------
|
---------------------------------------------------
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
<a href="https://creativecommons.org/publicdomain/zero/1.0/">
|
|
||||||
<img src="https://i.creativecommons.org/p/zero/1.0/88x31.png" alt="CC0-1.0" />
|
|
||||||
</a>
|
|
||||||
<br />
|
|
||||||
To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide.
|
To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide.
|
||||||
This software is distributed without any warranty.
|
This software is distributed without any warranty.
|
||||||
|
|
||||||
|
71
src/html.rs
71
src/html.rs
@ -474,8 +474,9 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String {
|
|||||||
result = String::from_utf8(buf).unwrap();
|
result = String::from_utf8(buf).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Unwrap NOSCRIPT elements
|
||||||
if options.unwrap_noscript {
|
if options.unwrap_noscript {
|
||||||
let noscript_re = Regex::new(r"<(?P<c>/?noscript)>").unwrap();
|
let noscript_re = Regex::new(r"<(?P<c>/?noscript[^>]*)>").unwrap();
|
||||||
result = noscript_re.replace_all(&result, "<!--$c-->").to_string();
|
result = noscript_re.replace_all(&result, "<!--$c-->").to_string();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -503,29 +504,25 @@ pub fn retrieve_and_embed_asset(
|
|||||||
depth + 1,
|
depth + 1,
|
||||||
) {
|
) {
|
||||||
Ok((data, final_url, mut media_type)) => {
|
Ok((data, final_url, mut media_type)) => {
|
||||||
// Check integrity if it's a LINK or SCRIPT tag
|
|
||||||
let node_name: &str = get_node_name(&node).unwrap();
|
let node_name: &str = get_node_name(&node).unwrap();
|
||||||
|
|
||||||
|
// Check integrity if it's a LINK or SCRIPT element
|
||||||
let mut ok_to_include: bool = true;
|
let mut ok_to_include: bool = true;
|
||||||
|
|
||||||
if node_name == "link" || node_name == "script" {
|
if node_name == "link" || node_name == "script" {
|
||||||
let node_integrity_attr_value: Option<String> = get_node_attr(node, "integrity");
|
|
||||||
|
|
||||||
// Check integrity
|
// Check integrity
|
||||||
if let Some(node_integrity_attr_value) = node_integrity_attr_value {
|
if let Some(node_integrity_attr_value) = get_node_attr(node, "integrity") {
|
||||||
if !node_integrity_attr_value.is_empty() {
|
if !node_integrity_attr_value.is_empty() {
|
||||||
ok_to_include = check_integrity(&data, &node_integrity_attr_value);
|
ok_to_include = check_integrity(&data, &node_integrity_attr_value);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Wipe integrity attribute
|
// Wipe the integrity attribute
|
||||||
set_node_attr(node, "integrity", None);
|
set_node_attr(node, "integrity", None);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ok_to_include {
|
if ok_to_include {
|
||||||
if node_name == "link" {
|
if node_name == "link" && determine_link_node_type(node) == "stylesheet" {
|
||||||
let link_type: &str = determine_link_node_type(node);
|
// Stylesheet LINK elements require special treatment
|
||||||
// CSS LINK nodes requires special treatment
|
|
||||||
if link_type == "stylesheet" {
|
|
||||||
let css: String = embed_css(
|
let css: String = embed_css(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
@ -534,13 +531,12 @@ pub fn retrieve_and_embed_asset(
|
|||||||
options,
|
options,
|
||||||
depth + 1,
|
depth + 1,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Create and embed data URL
|
||||||
let css_data_url = create_data_url("text/css", css.as_bytes(), &final_url);
|
let css_data_url = create_data_url("text/css", css.as_bytes(), &final_url);
|
||||||
|
|
||||||
set_node_attr(&node, attr_name, Some(css_data_url.to_string()));
|
set_node_attr(&node, attr_name, Some(css_data_url.to_string()));
|
||||||
|
|
||||||
return; // Do not fall through
|
|
||||||
}
|
|
||||||
} else if node_name == "frame" || node_name == "iframe" {
|
} else if node_name == "frame" || node_name == "iframe" {
|
||||||
|
// (I)FRAMEs are also quite different from conventional resources
|
||||||
let frame_dom = html_to_dom(&String::from_utf8_lossy(&data));
|
let frame_dom = html_to_dom(&String::from_utf8_lossy(&data));
|
||||||
walk_and_embed_assets(
|
walk_and_embed_assets(
|
||||||
cache,
|
cache,
|
||||||
@ -559,30 +555,38 @@ pub fn retrieve_and_embed_asset(
|
|||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// Create and embed data URL
|
||||||
let mut frame_data_url = create_data_url(&media_type, &frame_data, &final_url);
|
let mut frame_data_url = create_data_url(&media_type, &frame_data, &final_url);
|
||||||
|
|
||||||
frame_data_url.set_fragment(resolved_url.fragment());
|
frame_data_url.set_fragment(resolved_url.fragment());
|
||||||
|
|
||||||
set_node_attr(node, attr_name, Some(frame_data_url.to_string()));
|
set_node_attr(node, attr_name, Some(frame_data_url.to_string()));
|
||||||
|
} else {
|
||||||
|
// Every other type of element gets processed here
|
||||||
|
|
||||||
return; // Do not fall through
|
// Parse media type for SCRIPT elements
|
||||||
}
|
|
||||||
|
|
||||||
// Everything else
|
|
||||||
if node_name == "script" {
|
if node_name == "script" {
|
||||||
|
if let Some(_) = get_node_attr(node, "src") {
|
||||||
|
if let Some(script_node_type_attr_value) = get_node_attr(node, "type") {
|
||||||
|
media_type = script_node_type_attr_value.to_string();
|
||||||
|
} else {
|
||||||
|
// Fallback to default one if it's not specified
|
||||||
media_type = "application/javascript".to_string();
|
media_type = "application/javascript".to_string();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create and embed data URL
|
||||||
let mut data_url = create_data_url(&media_type, &data, &final_url);
|
let mut data_url = create_data_url(&media_type, &data, &final_url);
|
||||||
data_url.set_fragment(resolved_url.fragment());
|
data_url.set_fragment(resolved_url.fragment());
|
||||||
set_node_attr(node, attr_name, Some(data_url.to_string()));
|
set_node_attr(node, attr_name, Some(data_url.to_string()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
if resolved_url.scheme() == "http" || resolved_url.scheme() == "https" {
|
if resolved_url.scheme() == "http" || resolved_url.scheme() == "https" {
|
||||||
// Keep remote reference if unable to retrieve the asset
|
// Keep remote references if unable to retrieve the asset
|
||||||
set_node_attr(node, attr_name, Some(resolved_url.to_string()));
|
set_node_attr(node, attr_name, Some(resolved_url.to_string()));
|
||||||
} else {
|
} else {
|
||||||
// Exclude non-remote URLs
|
// Remove local references if they can't be successfully embedded as data URLs
|
||||||
set_node_attr(node, attr_name, None);
|
set_node_attr(node, attr_name, None);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -645,7 +649,7 @@ pub fn walk_and_embed_assets(
|
|||||||
let link_type: &str = determine_link_node_type(node);
|
let link_type: &str = determine_link_node_type(node);
|
||||||
|
|
||||||
if link_type == "icon" {
|
if link_type == "icon" {
|
||||||
// Find and resolve this LINK node's href attribute
|
// Find and resolve LINK's href attribute
|
||||||
if let Some(link_attr_href_value) = get_node_attr(node, "href") {
|
if let Some(link_attr_href_value) = get_node_attr(node, "href") {
|
||||||
if !options.no_images && !link_attr_href_value.is_empty() {
|
if !options.no_images && !link_attr_href_value.is_empty() {
|
||||||
retrieve_and_embed_asset(
|
retrieve_and_embed_asset(
|
||||||
@ -663,10 +667,12 @@ pub fn walk_and_embed_assets(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if link_type == "stylesheet" {
|
} else if link_type == "stylesheet" {
|
||||||
// Find and resolve this LINK node's href attribute
|
// Resolve LINK's href attribute
|
||||||
if let Some(link_attr_href_value) = get_node_attr(node, "href") {
|
if let Some(link_attr_href_value) = get_node_attr(node, "href") {
|
||||||
if options.no_css {
|
if options.no_css {
|
||||||
set_node_attr(node, "href", None);
|
set_node_attr(node, "href", None);
|
||||||
|
// Wipe integrity attribute
|
||||||
|
set_node_attr(node, "integrity", None);
|
||||||
} else {
|
} else {
|
||||||
if !link_attr_href_value.is_empty() {
|
if !link_attr_href_value.is_empty() {
|
||||||
retrieve_and_embed_asset(
|
retrieve_and_embed_asset(
|
||||||
@ -916,17 +922,18 @@ pub fn walk_and_embed_assets(
|
|||||||
// Replace with empty JS call to preserve original behavior
|
// Replace with empty JS call to preserve original behavior
|
||||||
set_node_attr(node, "href", Some(str!("javascript:;")));
|
set_node_attr(node, "href", Some(str!("javascript:;")));
|
||||||
}
|
}
|
||||||
} else if anchor_attr_href_value.clone().starts_with('#')
|
|
||||||
|| is_url_and_has_protocol(&anchor_attr_href_value.clone())
|
|
||||||
{
|
|
||||||
// Don't touch mailto: links or hrefs which begin with a hash sign
|
|
||||||
} else {
|
} else {
|
||||||
|
// Don't touch mailto: links or hrefs which begin with a hash sign
|
||||||
|
if !anchor_attr_href_value.clone().starts_with('#')
|
||||||
|
&& !is_url_and_has_protocol(&anchor_attr_href_value.clone())
|
||||||
|
{
|
||||||
let href_full_url: Url =
|
let href_full_url: Url =
|
||||||
resolve_url(document_url, &anchor_attr_href_value);
|
resolve_url(document_url, &anchor_attr_href_value);
|
||||||
set_node_attr(node, "href", Some(href_full_url.to_string()));
|
set_node_attr(node, "href", Some(href_full_url.to_string()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
"script" => {
|
"script" => {
|
||||||
// Read values of integrity and src attributes
|
// Read values of integrity and src attributes
|
||||||
let script_attr_src: Option<String> = get_node_attr(node, "src");
|
let script_attr_src: Option<String> = get_node_attr(node, "src");
|
||||||
@ -937,6 +944,8 @@ pub fn walk_and_embed_assets(
|
|||||||
// Remove src attribute
|
// Remove src attribute
|
||||||
if script_attr_src != None {
|
if script_attr_src != None {
|
||||||
set_node_attr(node, "src", None);
|
set_node_attr(node, "src", None);
|
||||||
|
// Wipe integrity attribute
|
||||||
|
set_node_attr(node, "integrity", None);
|
||||||
}
|
}
|
||||||
} else if !script_attr_src.clone().unwrap_or_default().is_empty() {
|
} else if !script_attr_src.clone().unwrap_or_default().is_empty() {
|
||||||
retrieve_and_embed_asset(
|
retrieve_and_embed_asset(
|
||||||
@ -1081,7 +1090,7 @@ pub fn walk_and_embed_assets(
|
|||||||
);
|
);
|
||||||
// Get rid of original contents
|
// Get rid of original contents
|
||||||
noscript_contents.clear();
|
noscript_contents.clear();
|
||||||
// Insert HTML containing embedded assets back into NOSCRIPT node
|
// Insert HTML containing embedded assets into NOSCRIPT node
|
||||||
if let Some(html) =
|
if let Some(html) =
|
||||||
get_child_node_by_name(&noscript_contents_dom.document, "html")
|
get_child_node_by_name(&noscript_contents_dom.document, "html")
|
||||||
{
|
{
|
||||||
|
@ -88,7 +88,7 @@ mod passing {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn remove_existing_when_empty_provided() {
|
fn set_existing_to_empty_when_empty_provided() {
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
let out = cmd
|
let out = cmd
|
||||||
.arg("-M")
|
.arg("-M")
|
||||||
|
@ -11,24 +11,6 @@ mod passing {
|
|||||||
use std::env;
|
use std::env;
|
||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn bad_input_data_url() {
|
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
|
||||||
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
|
|
||||||
|
|
||||||
// STDOUT should contain HTML
|
|
||||||
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
|
|
||||||
|
|
||||||
// STDERR should contain error description
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
|
||||||
"Unsupported data URL media type\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// The exit code should be 1
|
|
||||||
out.assert().code(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn isolate_data_url() {
|
fn isolate_data_url() {
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
@ -192,6 +174,38 @@ mod passing {
|
|||||||
// The exit code should be 0
|
// The exit code should be 0
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||||
|
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod failing {
|
||||||
|
use assert_cmd::prelude::*;
|
||||||
|
use std::env;
|
||||||
|
use std::process::Command;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bad_input_data_url() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
|
||||||
|
|
||||||
|
// STDOUT should contain HTML
|
||||||
|
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
|
||||||
|
|
||||||
|
// STDERR should contain error description
|
||||||
|
assert_eq!(
|
||||||
|
std::str::from_utf8(&out.stderr).unwrap(),
|
||||||
|
"Unsupported data URL media type\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// The exit code should be 1
|
||||||
|
out.assert().code(1);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn security_disallow_local_assets_within_data_url_targets() {
|
fn security_disallow_local_assets_within_data_url_targets() {
|
||||||
|
@ -130,7 +130,14 @@ mod passing {
|
|||||||
// STDOUT should contain HTML with no CSS
|
// STDOUT should contain HTML with no CSS
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
std::str::from_utf8(&out.stdout).unwrap(),
|
||||||
"<html><head></head><body><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript-->\n</body></html>\n"
|
"<html>\
|
||||||
|
<head></head>\
|
||||||
|
<body>\
|
||||||
|
<!--noscript-->\
|
||||||
|
<img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\">\
|
||||||
|
<!--/noscript-->\n\
|
||||||
|
</body>\
|
||||||
|
</html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should contain target HTML and embedded SVG files
|
// STDERR should contain target HTML and embedded SVG files
|
||||||
@ -153,4 +160,27 @@ mod passing {
|
|||||||
// The exit code should be 0
|
// The exit code should be 0
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn unwrap_noscript_contents_attr_data_url() {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-n")
|
||||||
|
.arg("data:text/html,<noscript class=\"\">test</noscript>")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDOUT should contain unwrapped contents of NOSCRIPT element
|
||||||
|
assert_eq!(
|
||||||
|
std::str::from_utf8(&out.stdout).unwrap(),
|
||||||
|
"<html><head><!--noscript class=\"\"-->test<!--/noscript--></head><body></body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||||
|
|
||||||
|
// The exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -30,7 +30,14 @@ mod passing {
|
|||||||
// STDOUT should contain newly added base URL
|
// STDOUT should contain newly added base URL
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
std::str::from_utf8(&out.stdout).unwrap(),
|
||||||
"<html><head>\n <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n </head>\n <body>\n © Some Company\n \n\n</body></html>\n"
|
"<html>\
|
||||||
|
<head>\n \
|
||||||
|
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||||
|
</head>\n \
|
||||||
|
<body>\n \
|
||||||
|
© Some Company\n \
|
||||||
|
\n\n</body>\
|
||||||
|
</html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should contain only the target file
|
// STDERR should contain only the target file
|
||||||
|
@ -87,10 +87,12 @@ mod passing {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn no_css() {
|
fn no_css() {
|
||||||
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
|
let html = "\
|
||||||
|
<link rel=\"stylesheet\" href=\"main.css\">\
|
||||||
<link rel=\"alternate stylesheet\" href=\"main.css\">\
|
<link rel=\"alternate stylesheet\" href=\"main.css\">\
|
||||||
<style>html{background-color: #000;}</style>\
|
<style>html{background-color: #000;}</style>\
|
||||||
<div style=\"display: none;\"></div>";
|
<div style=\"display: none;\"></div>\
|
||||||
|
";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html);
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
@ -108,7 +110,8 @@ mod passing {
|
|||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
"<html>\
|
"\
|
||||||
|
<html>\
|
||||||
<head>\
|
<head>\
|
||||||
<link rel=\"stylesheet\">\
|
<link rel=\"stylesheet\">\
|
||||||
<link rel=\"alternate stylesheet\">\
|
<link rel=\"alternate stylesheet\">\
|
||||||
@ -117,7 +120,8 @@ mod passing {
|
|||||||
<body>\
|
<body>\
|
||||||
<div></div>\
|
<div></div>\
|
||||||
</body>\
|
</body>\
|
||||||
</html>"
|
</html>\
|
||||||
|
"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -203,7 +207,15 @@ mod passing {
|
|||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
|
"\
|
||||||
|
<html>\
|
||||||
|
<head>\
|
||||||
|
</head>\
|
||||||
|
<frameset>\
|
||||||
|
<frame src=\"\">\
|
||||||
|
</frameset>\
|
||||||
|
</html>\
|
||||||
|
"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -227,16 +239,25 @@ mod passing {
|
|||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
|
"\
|
||||||
|
<html>\
|
||||||
|
<head></head>\
|
||||||
|
<body>\
|
||||||
|
<iframe src=\"\"></iframe>\
|
||||||
|
</body>\
|
||||||
|
</html>\
|
||||||
|
"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn no_js() {
|
fn no_js() {
|
||||||
let html = "<div onClick=\"void(0)\">\
|
let html = "\
|
||||||
|
<div onClick=\"void(0)\">\
|
||||||
<script src=\"http://localhost/assets/some.js\"></script>\
|
<script src=\"http://localhost/assets/some.js\"></script>\
|
||||||
<script>alert(1)</script>\
|
<script>alert(1)</script>\
|
||||||
</div>";
|
</div>\
|
||||||
|
";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html);
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
@ -254,52 +275,141 @@ mod passing {
|
|||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
"<html><head></head><body><div><script></script>\
|
"\
|
||||||
<script></script></div></body></html>"
|
<html>\
|
||||||
|
<head></head>\
|
||||||
|
<body>\
|
||||||
|
<div>\
|
||||||
|
<script></script>\
|
||||||
|
<script></script>\
|
||||||
|
</div>\
|
||||||
|
</body>\
|
||||||
|
</html>\
|
||||||
|
"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// #[test]
|
#[test]
|
||||||
// fn discards_integrity() {
|
fn keeps_integrity_for_linked_assets() {
|
||||||
// let html = "<title>No integrity</title>\
|
let html = "<title>Has integrity</title>\
|
||||||
// <link integrity=\"sha384-...\" rel=\"something\"/>\
|
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\" />";
|
||||||
// <script integrity=\"sha384-...\" src=\"some.js\"></script>";
|
let dom = html::html_to_dom(&html);
|
||||||
// let dom = html::html_to_dom(&html);
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
// let url: Url = Url::parse("http://localhost").unwrap();
|
let cache = &mut HashMap::new();
|
||||||
// let cache = &mut HashMap::new();
|
|
||||||
|
|
||||||
// let mut options = Options::default();
|
let mut options = Options::default();
|
||||||
// options.no_css = true;
|
options.silent = true;
|
||||||
// options.no_frames = true;
|
|
||||||
// options.no_js = true;
|
|
||||||
// options.no_images = true;
|
|
||||||
// options.silent = true;
|
|
||||||
|
|
||||||
// let client = Client::new();
|
let client = Client::new();
|
||||||
|
|
||||||
// html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
// let mut buf: Vec<u8> = Vec::new();
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
// serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
// assert_eq!(
|
assert_eq!(
|
||||||
// buf.iter().map(|&c| c as char).collect::<String>(),
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
// "<html>\
|
"\
|
||||||
// <head><title>No integrity</title><link rel=\"something\"><script></script></head>\
|
<html>\
|
||||||
// <body></body>\
|
<head>\
|
||||||
// </html>"
|
<title>Has integrity</title>\
|
||||||
// );
|
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\">\
|
||||||
// }
|
</head>\
|
||||||
|
<body></body>\
|
||||||
|
</html>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn discards_integrity_for_linked_assets_nojs_nocss() {
|
||||||
|
let html = "\
|
||||||
|
<title>No integrity</title>\
|
||||||
|
<link integrity=\"\" rel=\"stylesheet\" href=\"data:;\"/>\
|
||||||
|
<script integrity=\"\" src=\"some.js\"></script>\
|
||||||
|
";
|
||||||
|
let dom = html::html_to_dom(&html);
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_css = true;
|
||||||
|
options.no_js = true;
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"\
|
||||||
|
<html>\
|
||||||
|
<head>\
|
||||||
|
<title>No integrity</title>\
|
||||||
|
<link rel=\"stylesheet\">\
|
||||||
|
<script></script>\
|
||||||
|
</head>\
|
||||||
|
<body></body>\
|
||||||
|
</html>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn discards_integrity_for_embedded_assets() {
|
||||||
|
let html = "\
|
||||||
|
<title>No integrity</title>\
|
||||||
|
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\"/>\
|
||||||
|
<script integrity=\"sha384-456\" src=\"some.js\"></script>\
|
||||||
|
";
|
||||||
|
let dom = html::html_to_dom(&html);
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.no_css = true;
|
||||||
|
options.no_js = true;
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"\
|
||||||
|
<html>\
|
||||||
|
<head>\
|
||||||
|
<title>No integrity</title>\
|
||||||
|
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\">\
|
||||||
|
<script></script>\
|
||||||
|
</head>\
|
||||||
|
<body>\
|
||||||
|
</body>\
|
||||||
|
</html>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn removes_unwanted_meta_tags() {
|
fn removes_unwanted_meta_tags() {
|
||||||
let html = "<html>\
|
let html = "\
|
||||||
|
<html>\
|
||||||
<head>\
|
<head>\
|
||||||
<meta http-equiv=\"Refresh\" value=\"20\"/>\
|
<meta http-equiv=\"Refresh\" value=\"20\"/>\
|
||||||
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
|
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
|
||||||
</head>\
|
</head>\
|
||||||
<body></body>\
|
<body>\
|
||||||
</html>";
|
</body>\
|
||||||
|
</html>\
|
||||||
|
";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html);
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
@ -320,19 +430,22 @@ mod passing {
|
|||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
"<html>\
|
"\
|
||||||
|
<html>\
|
||||||
<head>\
|
<head>\
|
||||||
<meta http-equiv=\"disabled by monolith (Refresh)\" value=\"20\">\
|
<meta http-equiv=\"disabled by monolith (Refresh)\" value=\"20\">\
|
||||||
<meta http-equiv=\"disabled by monolith (Location)\" value=\"https://freebsd.org\">\
|
<meta http-equiv=\"disabled by monolith (Location)\" value=\"https://freebsd.org\">\
|
||||||
</head>\
|
</head>\
|
||||||
<body></body>\
|
<body>\
|
||||||
|
</body>\
|
||||||
</html>"
|
</html>"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn processes_noscript_tags() {
|
fn processes_noscript_tags() {
|
||||||
let html = "<html>\
|
let html = "\
|
||||||
|
<html>\
|
||||||
<body>\
|
<body>\
|
||||||
<noscript>\
|
<noscript>\
|
||||||
<img src=\"image.png\" />\
|
<img src=\"image.png\" />\
|
||||||
@ -357,7 +470,8 @@ mod passing {
|
|||||||
assert_eq!(
|
assert_eq!(
|
||||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
format!(
|
format!(
|
||||||
"<html>\
|
"\
|
||||||
|
<html>\
|
||||||
<head>\
|
<head>\
|
||||||
</head>\
|
</head>\
|
||||||
<body>\
|
<body>\
|
||||||
@ -370,4 +484,34 @@ mod passing {
|
|||||||
)
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn preserves_script_type_json() {
|
||||||
|
let html = "<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>";
|
||||||
|
let dom = html::html_to_dom(&html);
|
||||||
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
let mut options = Options::default();
|
||||||
|
options.silent = true;
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"\
|
||||||
|
<html>\
|
||||||
|
<head>\
|
||||||
|
<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>\
|
||||||
|
</head>\
|
||||||
|
<body>\
|
||||||
|
</body>\
|
||||||
|
</html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user