2020-03-23 03:08:41 +01:00
|
|
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
|
|
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
|
|
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
|
|
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
|
|
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
|
|
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
2019-09-29 23:15:49 +02:00
|
|
|
|
2020-05-23 09:49:04 +02:00
|
|
|
#[cfg(test)]
|
|
|
|
mod passing {
|
|
|
|
use html5ever::serialize::{serialize, SerializeOpts};
|
|
|
|
use reqwest::blocking::Client;
|
|
|
|
use std::collections::HashMap;
|
2021-03-11 23:44:02 +01:00
|
|
|
use url::Url;
|
2020-05-23 09:49:04 +02:00
|
|
|
|
2020-06-28 07:36:41 +02:00
|
|
|
use crate::html;
|
|
|
|
use crate::opts::Options;
|
|
|
|
|
2020-05-23 09:49:04 +02:00
|
|
|
#[test]
|
|
|
|
fn basic() {
|
|
|
|
let cache = &mut HashMap::new();
|
|
|
|
|
2021-03-11 23:44:02 +01:00
|
|
|
let html: &str = "<div><P></P></div>";
|
2020-05-23 09:49:04 +02:00
|
|
|
let dom = html::html_to_dom(&html);
|
2021-03-11 23:44:02 +01:00
|
|
|
let url: Url = Url::parse("http://localhost").unwrap();
|
2020-05-23 09:49:04 +02:00
|
|
|
|
2020-06-28 07:36:41 +02:00
|
|
|
let mut options = Options::default();
|
|
|
|
options.silent = true;
|
2020-05-23 09:49:04 +02:00
|
|
|
|
|
|
|
let client = Client::new();
|
|
|
|
|
2020-06-28 22:11:15 +02:00
|
|
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
2020-05-23 09:49:04 +02:00
|
|
|
|
|
|
|
let mut buf: Vec<u8> = Vec::new();
|
|
|
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
|
|
"<html><head></head><body><div><p></p></div></body></html>"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn ensure_no_recursive_iframe() {
|
|
|
|
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
|
|
|
|
let dom = html::html_to_dom(&html);
|
2021-03-11 23:44:02 +01:00
|
|
|
let url: Url = Url::parse("http://localhost").unwrap();
|
2020-05-23 09:49:04 +02:00
|
|
|
let cache = &mut HashMap::new();
|
|
|
|
|
2020-06-28 07:36:41 +02:00
|
|
|
let mut options = Options::default();
|
|
|
|
options.silent = true;
|
2020-05-23 09:49:04 +02:00
|
|
|
|
|
|
|
let client = Client::new();
|
|
|
|
|
2020-06-28 22:11:15 +02:00
|
|
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
2020-05-23 09:49:04 +02:00
|
|
|
|
|
|
|
let mut buf: Vec<u8> = Vec::new();
|
|
|
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
|
|
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn ensure_no_recursive_frame() {
|
|
|
|
let html = "<frameset><frame src=\"\"></frameset>";
|
|
|
|
let dom = html::html_to_dom(&html);
|
2021-03-11 23:44:02 +01:00
|
|
|
let url: Url = Url::parse("http://localhost").unwrap();
|
2020-05-23 09:49:04 +02:00
|
|
|
let cache = &mut HashMap::new();
|
|
|
|
|
2020-06-28 07:36:41 +02:00
|
|
|
let mut options = Options::default();
|
|
|
|
options.silent = true;
|
2020-05-23 09:49:04 +02:00
|
|
|
|
|
|
|
let client = Client::new();
|
|
|
|
|
2020-06-28 22:11:15 +02:00
|
|
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
2020-05-23 09:49:04 +02:00
|
|
|
|
|
|
|
let mut buf: Vec<u8> = Vec::new();
|
|
|
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
|
|
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn no_css() {
|
|
|
|
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
|
2021-03-09 12:48:41 +01:00
|
|
|
<link rel=\"alternate stylesheet\" href=\"main.css\">\
|
2020-05-23 09:49:04 +02:00
|
|
|
<style>html{background-color: #000;}</style>\
|
|
|
|
<div style=\"display: none;\"></div>";
|
|
|
|
let dom = html::html_to_dom(&html);
|
2021-03-11 23:44:02 +01:00
|
|
|
let url: Url = Url::parse("http://localhost").unwrap();
|
2020-05-23 09:49:04 +02:00
|
|
|
let cache = &mut HashMap::new();
|
|
|
|
|
2020-06-28 07:36:41 +02:00
|
|
|
let mut options = Options::default();
|
|
|
|
options.no_css = true;
|
|
|
|
options.silent = true;
|
|
|
|
|
2020-05-23 09:49:04 +02:00
|
|
|
let client = Client::new();
|
|
|
|
|
2020-06-28 22:11:15 +02:00
|
|
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
2020-05-23 09:49:04 +02:00
|
|
|
|
|
|
|
let mut buf: Vec<u8> = Vec::new();
|
|
|
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
2020-04-03 09:30:52 +02:00
|
|
|
"<html>\
|
2020-05-23 09:49:04 +02:00
|
|
|
<head>\
|
|
|
|
<link rel=\"stylesheet\">\
|
2021-03-09 12:48:41 +01:00
|
|
|
<link rel=\"alternate stylesheet\">\
|
2020-05-23 09:49:04 +02:00
|
|
|
<style></style>\
|
|
|
|
</head>\
|
|
|
|
<body>\
|
|
|
|
<div></div>\
|
|
|
|
</body>\
|
|
|
|
</html>"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn no_images() {
|
|
|
|
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
|
|
|
|
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
|
|
|
|
let dom = html::html_to_dom(&html);
|
2021-03-11 23:44:02 +01:00
|
|
|
let url: Url = Url::parse("http://localhost").unwrap();
|
2020-05-23 09:49:04 +02:00
|
|
|
let cache = &mut HashMap::new();
|
|
|
|
|
2020-06-28 07:36:41 +02:00
|
|
|
let mut options = Options::default();
|
|
|
|
options.no_images = true;
|
|
|
|
options.silent = true;
|
2020-05-23 09:49:04 +02:00
|
|
|
|
|
|
|
let client = Client::new();
|
|
|
|
|
2020-06-28 22:11:15 +02:00
|
|
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
2020-05-23 09:49:04 +02:00
|
|
|
|
|
|
|
let mut buf: Vec<u8> = Vec::new();
|
|
|
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
|
|
format!(
|
|
|
|
"<html>\
|
2020-06-26 05:53:20 +02:00
|
|
|
<head>\
|
|
|
|
<link rel=\"icon\">\
|
|
|
|
</head>\
|
|
|
|
<body>\
|
|
|
|
<div>\
|
|
|
|
<img src=\"{empty_image}\">\
|
|
|
|
</div>\
|
|
|
|
</body>\
|
|
|
|
</html>",
|
2020-05-23 09:49:04 +02:00
|
|
|
empty_image = empty_image!()
|
|
|
|
)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn no_body_background_images() {
|
|
|
|
let html =
|
|
|
|
"<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
|
|
|
|
let dom = html::html_to_dom(&html);
|
2021-03-11 23:44:02 +01:00
|
|
|
let url: Url = Url::parse("http://localhost").unwrap();
|
2020-05-23 09:49:04 +02:00
|
|
|
let cache = &mut HashMap::new();
|
|
|
|
|
2020-06-28 07:36:41 +02:00
|
|
|
let mut options = Options::default();
|
|
|
|
options.no_images = true;
|
|
|
|
options.silent = true;
|
2020-05-23 09:49:04 +02:00
|
|
|
|
|
|
|
let client = Client::new();
|
|
|
|
|
2020-06-28 22:11:15 +02:00
|
|
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
2020-05-23 09:49:04 +02:00
|
|
|
|
|
|
|
let mut buf: Vec<u8> = Vec::new();
|
|
|
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
|
|
"<html><head></head><body></body></html>"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn no_frames() {
|
|
|
|
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
|
|
|
|
let dom = html::html_to_dom(&html);
|
2021-03-11 23:44:02 +01:00
|
|
|
let url: Url = Url::parse("http://localhost").unwrap();
|
2020-05-23 09:49:04 +02:00
|
|
|
let cache = &mut HashMap::new();
|
|
|
|
|
2020-06-28 07:36:41 +02:00
|
|
|
let mut options = Options::default();
|
|
|
|
options.no_frames = true;
|
|
|
|
options.silent = true;
|
|
|
|
|
2020-05-23 09:49:04 +02:00
|
|
|
let client = Client::new();
|
|
|
|
|
2020-06-28 22:11:15 +02:00
|
|
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
2020-05-23 09:49:04 +02:00
|
|
|
|
|
|
|
let mut buf: Vec<u8> = Vec::new();
|
|
|
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
|
|
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn no_iframes() {
|
|
|
|
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
|
|
|
|
let dom = html::html_to_dom(&html);
|
2021-03-11 23:44:02 +01:00
|
|
|
let url: Url = Url::parse("http://localhost").unwrap();
|
2020-05-23 09:49:04 +02:00
|
|
|
let cache = &mut HashMap::new();
|
|
|
|
|
2020-06-28 07:36:41 +02:00
|
|
|
let mut options = Options::default();
|
|
|
|
options.no_frames = true;
|
|
|
|
options.silent = true;
|
|
|
|
|
2020-05-23 09:49:04 +02:00
|
|
|
let client = Client::new();
|
|
|
|
|
2020-06-28 22:11:15 +02:00
|
|
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
2020-05-23 09:49:04 +02:00
|
|
|
|
|
|
|
let mut buf: Vec<u8> = Vec::new();
|
|
|
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
|
|
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn no_js() {
|
|
|
|
let html = "<div onClick=\"void(0)\">\
|
2020-06-26 05:53:20 +02:00
|
|
|
<script src=\"http://localhost/assets/some.js\"></script>\
|
|
|
|
<script>alert(1)</script>\
|
2020-05-23 09:49:04 +02:00
|
|
|
</div>";
|
|
|
|
let dom = html::html_to_dom(&html);
|
2021-03-11 23:44:02 +01:00
|
|
|
let url: Url = Url::parse("http://localhost").unwrap();
|
2020-05-23 09:49:04 +02:00
|
|
|
let cache = &mut HashMap::new();
|
|
|
|
|
2020-06-28 07:36:41 +02:00
|
|
|
let mut options = Options::default();
|
|
|
|
options.no_js = true;
|
|
|
|
options.silent = true;
|
2020-05-23 09:49:04 +02:00
|
|
|
|
|
|
|
let client = Client::new();
|
|
|
|
|
2020-06-28 22:11:15 +02:00
|
|
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
2020-05-23 09:49:04 +02:00
|
|
|
|
|
|
|
let mut buf: Vec<u8> = Vec::new();
|
|
|
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
|
|
"<html><head></head><body><div><script></script>\
|
|
|
|
<script></script></div></body></html>"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2021-03-11 23:44:02 +01:00
|
|
|
// #[test]
|
|
|
|
// fn discards_integrity() {
|
|
|
|
// let html = "<title>No integrity</title>\
|
|
|
|
// <link integrity=\"sha384-...\" rel=\"something\"/>\
|
|
|
|
// <script integrity=\"sha384-...\" src=\"some.js\"></script>";
|
|
|
|
// let dom = html::html_to_dom(&html);
|
|
|
|
// let url: Url = Url::parse("http://localhost").unwrap();
|
|
|
|
// let cache = &mut HashMap::new();
|
|
|
|
|
|
|
|
// let mut options = Options::default();
|
|
|
|
// options.no_css = true;
|
|
|
|
// options.no_frames = true;
|
|
|
|
// options.no_js = true;
|
|
|
|
// options.no_images = true;
|
|
|
|
// options.silent = true;
|
|
|
|
|
|
|
|
// let client = Client::new();
|
|
|
|
|
|
|
|
// html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
|
|
|
|
|
|
|
// let mut buf: Vec<u8> = Vec::new();
|
|
|
|
// serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
|
|
|
|
|
|
// assert_eq!(
|
|
|
|
// buf.iter().map(|&c| c as char).collect::<String>(),
|
|
|
|
// "<html>\
|
|
|
|
// <head><title>No integrity</title><link rel=\"something\"><script></script></head>\
|
|
|
|
// <body></body>\
|
|
|
|
// </html>"
|
|
|
|
// );
|
|
|
|
// }
|
2020-06-26 05:53:20 +02:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn removes_unwanted_meta_tags() {
|
|
|
|
let html = "<html>\
|
|
|
|
<head>\
|
|
|
|
<meta http-equiv=\"Refresh\" value=\"20\"/>\
|
|
|
|
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
|
|
|
|
</head>\
|
2020-05-23 09:49:04 +02:00
|
|
|
<body></body>\
|
2020-06-26 05:53:20 +02:00
|
|
|
</html>";
|
|
|
|
let dom = html::html_to_dom(&html);
|
2021-03-11 23:44:02 +01:00
|
|
|
let url: Url = Url::parse("http://localhost").unwrap();
|
2020-06-26 05:53:20 +02:00
|
|
|
let cache = &mut HashMap::new();
|
2020-06-28 07:36:41 +02:00
|
|
|
|
|
|
|
let mut options = Options::default();
|
|
|
|
options.no_css = true;
|
|
|
|
options.no_frames = true;
|
|
|
|
options.no_js = true;
|
|
|
|
options.no_images = true;
|
|
|
|
options.silent = true;
|
|
|
|
|
2020-06-26 05:53:20 +02:00
|
|
|
let client = Client::new();
|
2020-06-28 07:36:41 +02:00
|
|
|
|
2020-06-28 22:11:15 +02:00
|
|
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
2020-06-26 05:53:20 +02:00
|
|
|
|
|
|
|
let mut buf: Vec<u8> = Vec::new();
|
|
|
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
|
|
"<html>\
|
|
|
|
<head>\
|
2020-12-25 22:09:47 +01:00
|
|
|
<meta http-equiv=\"disabled by monolith (Refresh)\" value=\"20\">\
|
|
|
|
<meta http-equiv=\"disabled by monolith (Location)\" value=\"https://freebsd.org\">\
|
2020-06-26 05:53:20 +02:00
|
|
|
</head>\
|
|
|
|
<body></body>\
|
2020-05-23 09:49:04 +02:00
|
|
|
</html>"
|
|
|
|
);
|
|
|
|
}
|
2021-02-23 06:42:39 +01:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn processes_noscript_tags() {
|
|
|
|
let html = "<html>\
|
|
|
|
<body>\
|
|
|
|
<noscript>\
|
|
|
|
<img src=\"image.png\" />\
|
|
|
|
</noscript>\
|
|
|
|
</body>\
|
|
|
|
</html>";
|
|
|
|
let dom = html::html_to_dom(&html);
|
2021-03-11 23:44:02 +01:00
|
|
|
let url: Url = Url::parse("http://localhost").unwrap();
|
2021-02-23 06:42:39 +01:00
|
|
|
let cache = &mut HashMap::new();
|
|
|
|
|
|
|
|
let mut options = Options::default();
|
|
|
|
options.no_images = true;
|
|
|
|
options.silent = true;
|
|
|
|
|
|
|
|
let client = Client::new();
|
|
|
|
|
|
|
|
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
|
|
|
|
|
|
|
let mut buf: Vec<u8> = Vec::new();
|
|
|
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
|
|
|
format!(
|
|
|
|
"<html>\
|
|
|
|
<head>\
|
|
|
|
</head>\
|
|
|
|
<body>\
|
|
|
|
<noscript>\
|
|
|
|
<img src=\"{}\">\
|
|
|
|
</noscript>\
|
|
|
|
</body>\
|
|
|
|
</html>",
|
|
|
|
empty_image!(),
|
|
|
|
)
|
|
|
|
);
|
|
|
|
}
|
2019-12-26 15:44:01 +01:00
|
|
|
}
|