Merge pull request #262 from snshn/support-more-encodings
Add support for wider range of charsets
This commit is contained in:
commit
cbda57cfa8
31 changed files with 944 additions and 498 deletions
43
Cargo.lock
generated
43
Cargo.lock
generated
|
@ -26,9 +26,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "assert_cmd"
|
name = "assert_cmd"
|
||||||
version = "1.0.4"
|
version = "1.0.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8f57fec1ac7e4de72dcc69811795f1a7172ed06012f80a5d1ee651b62484f588"
|
checksum = "a88b6bd5df287567ffdf4ddf4d33060048e1068308e5f62d81c6f9824a045a48"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bstr",
|
"bstr",
|
||||||
"doc-comment",
|
"doc-comment",
|
||||||
|
@ -497,9 +497,9 @@ checksum = "6456b8a6c8f33fee7d958fcd1b60d55b11940a79e63ae87013e6d22e26034440"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hyper"
|
name = "hyper"
|
||||||
version = "0.14.8"
|
version = "0.14.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d3f71a7eea53a3f8257a7b4795373ff886397178cd634430ea94e12d7fe4fe34"
|
checksum = "07d6baa1b441335f3ce5098ac421fb6547c46dda735ca1bc6d0153c838f9dd83"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytes",
|
"bytes",
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
|
@ -511,7 +511,7 @@ dependencies = [
|
||||||
"httparse",
|
"httparse",
|
||||||
"httpdate",
|
"httpdate",
|
||||||
"itoa",
|
"itoa",
|
||||||
"pin-project",
|
"pin-project-lite",
|
||||||
"socket2",
|
"socket2",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tower-service",
|
"tower-service",
|
||||||
|
@ -678,6 +678,7 @@ dependencies = [
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap",
|
||||||
"cssparser",
|
"cssparser",
|
||||||
|
"encoding_rs",
|
||||||
"html5ever",
|
"html5ever",
|
||||||
"regex",
|
"regex",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
|
@ -880,26 +881,6 @@ dependencies = [
|
||||||
"siphasher 0.3.5",
|
"siphasher 0.3.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pin-project"
|
|
||||||
version = "1.0.7"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "c7509cc106041c40a4518d2af7a61530e1eed0e6285296a3d8c5472806ccc4a4"
|
|
||||||
dependencies = [
|
|
||||||
"pin-project-internal",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pin-project-internal"
|
|
||||||
version = "1.0.7"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "48c950132583b500556b1efd71d45b319029f2b71518d979fcc208e16b42426f"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"syn",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pin-project-lite"
|
name = "pin-project-lite"
|
||||||
version = "0.2.6"
|
version = "0.2.6"
|
||||||
|
@ -1281,9 +1262,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "security-framework"
|
name = "security-framework"
|
||||||
version = "2.2.0"
|
version = "2.3.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3670b1d2fdf6084d192bc71ead7aabe6c06aa2ea3fbd9cc3ac111fa5c2b1bd84"
|
checksum = "23a2ac85147a3a11d77ecf1bc7166ec0b92febfa4461c37944e180f319ece467"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags",
|
"bitflags",
|
||||||
"core-foundation",
|
"core-foundation",
|
||||||
|
@ -1294,9 +1275,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "security-framework-sys"
|
name = "security-framework-sys"
|
||||||
version = "2.2.0"
|
version = "2.3.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3676258fd3cfe2c9a0ec99ce3038798d847ce3e4bb17746373eb9f0f1ac16339"
|
checksum = "7e4effb91b4b8b6fb7732e670b6cee160278ff8e6bf485c7805d9e319d76e284"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"core-foundation-sys",
|
"core-foundation-sys",
|
||||||
"libc",
|
"libc",
|
||||||
|
@ -1594,9 +1575,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-normalization"
|
name = "unicode-normalization"
|
||||||
version = "0.1.18"
|
version = "0.1.19"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "33717dca7ac877f497014e10d73f3acf948c342bee31b5ca7892faf94ccc6b49"
|
checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"tinyvec",
|
"tinyvec",
|
||||||
]
|
]
|
||||||
|
|
|
@ -27,6 +27,7 @@ base64 = "0.13.0"
|
||||||
chrono = "0.4.19" # Used for formatting creation timestamp
|
chrono = "0.4.19" # Used for formatting creation timestamp
|
||||||
clap = "2.33.3"
|
clap = "2.33.3"
|
||||||
cssparser = "0.28.1"
|
cssparser = "0.28.1"
|
||||||
|
encoding_rs = "0.8.28"
|
||||||
html5ever = "0.24.1"
|
html5ever = "0.24.1"
|
||||||
regex = "1.5.4" # Used for parsing srcset and NOSCRIPT
|
regex = "1.5.4" # Used for parsing srcset and NOSCRIPT
|
||||||
sha2 = "0.9.5" # Used for calculating checksums during integrity checks
|
sha2 = "0.9.5" # Used for calculating checksums during integrity checks
|
||||||
|
|
17
src/css.rs
17
src/css.rs
|
@ -198,9 +198,14 @@ pub fn process_css<'a>(
|
||||||
options,
|
options,
|
||||||
depth + 1,
|
depth + 1,
|
||||||
) {
|
) {
|
||||||
Ok((import_contents, import_final_url, _import_media_type)) => {
|
Ok((
|
||||||
|
import_contents,
|
||||||
|
import_final_url,
|
||||||
|
import_media_type,
|
||||||
|
_import_charset,
|
||||||
|
)) => {
|
||||||
let mut import_data_url = create_data_url(
|
let mut import_data_url = create_data_url(
|
||||||
"text/css",
|
&import_media_type,
|
||||||
embed_css(
|
embed_css(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
|
@ -247,7 +252,7 @@ pub fn process_css<'a>(
|
||||||
options,
|
options,
|
||||||
depth + 1,
|
depth + 1,
|
||||||
) {
|
) {
|
||||||
Ok((data, final_url, media_type)) => {
|
Ok((data, final_url, media_type, _charset)) => {
|
||||||
let mut data_url =
|
let mut data_url =
|
||||||
create_data_url(&media_type, &data, &final_url);
|
create_data_url(&media_type, &data, &final_url);
|
||||||
data_url.set_fragment(resolved_url.fragment());
|
data_url.set_fragment(resolved_url.fragment());
|
||||||
|
@ -343,9 +348,9 @@ pub fn process_css<'a>(
|
||||||
options,
|
options,
|
||||||
depth + 1,
|
depth + 1,
|
||||||
) {
|
) {
|
||||||
Ok((css, final_url, _media_type)) => {
|
Ok((css, final_url, media_type, _charset)) => {
|
||||||
let mut data_url = create_data_url(
|
let mut data_url = create_data_url(
|
||||||
"text/css",
|
&media_type,
|
||||||
embed_css(
|
embed_css(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
|
@ -381,7 +386,7 @@ pub fn process_css<'a>(
|
||||||
options,
|
options,
|
||||||
depth + 1,
|
depth + 1,
|
||||||
) {
|
) {
|
||||||
Ok((data, final_url, media_type)) => {
|
Ok((data, final_url, media_type, _charset)) => {
|
||||||
let mut data_url = create_data_url(&media_type, &data, &final_url);
|
let mut data_url = create_data_url(&media_type, &data, &final_url);
|
||||||
data_url.set_fragment(full_url.fragment());
|
data_url.set_fragment(full_url.fragment());
|
||||||
result
|
result
|
||||||
|
|
188
src/html.rs
188
src/html.rs
|
@ -1,5 +1,6 @@
|
||||||
use base64;
|
use base64;
|
||||||
use chrono::prelude::*;
|
use chrono::prelude::*;
|
||||||
|
use encoding_rs::Encoding;
|
||||||
use html5ever::interface::QualName;
|
use html5ever::interface::QualName;
|
||||||
use html5ever::parse_document;
|
use html5ever::parse_document;
|
||||||
use html5ever::rcdom::{Handle, NodeData, RcDom};
|
use html5ever::rcdom::{Handle, NodeData, RcDom};
|
||||||
|
@ -18,7 +19,7 @@ use crate::css::embed_css;
|
||||||
use crate::js::attr_is_event_handler;
|
use crate::js::attr_is_event_handler;
|
||||||
use crate::opts::Options;
|
use crate::opts::Options;
|
||||||
use crate::url::{clean_url, create_data_url, is_url_and_has_protocol, resolve_url};
|
use crate::url::{clean_url, create_data_url, is_url_and_has_protocol, resolve_url};
|
||||||
use crate::utils::retrieve_asset;
|
use crate::utils::{parse_content_type, retrieve_asset};
|
||||||
|
|
||||||
struct SrcSetItem<'a> {
|
struct SrcSetItem<'a> {
|
||||||
path: &'a str,
|
path: &'a str,
|
||||||
|
@ -31,9 +32,8 @@ pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom {
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
serialize(&mut buf, document, SerializeOpts::default())
|
serialize(&mut buf, document, SerializeOpts::default())
|
||||||
.expect("unable to serialize DOM into buffer");
|
.expect("unable to serialize DOM into buffer");
|
||||||
let result = String::from_utf8(buf).unwrap();
|
|
||||||
|
|
||||||
let mut dom = html_to_dom(&result);
|
let mut dom = html_to_dom(&buf, "utf-8".to_string());
|
||||||
let doc = dom.get_document();
|
let doc = dom.get_document();
|
||||||
if let Some(html) = get_child_node_by_name(&doc, "html") {
|
if let Some(html) = get_child_node_by_name(&doc, "html") {
|
||||||
if let Some(head) = get_child_node_by_name(&html, "head") {
|
if let Some(head) = get_child_node_by_name(&html, "head") {
|
||||||
|
@ -115,7 +115,7 @@ pub fn create_metadata_tag(url: &Url) -> String {
|
||||||
|
|
||||||
// Prevent credentials from getting into metadata
|
// Prevent credentials from getting into metadata
|
||||||
if clean_url.scheme() == "http" || clean_url.scheme() == "https" {
|
if clean_url.scheme() == "http" || clean_url.scheme() == "https" {
|
||||||
// Only HTTP(S) URLs may feature credentials
|
// Only HTTP(S) URLs can contain credentials
|
||||||
clean_url.set_username("").unwrap();
|
clean_url.set_username("").unwrap();
|
||||||
clean_url.set_password(None).unwrap();
|
clean_url.set_password(None).unwrap();
|
||||||
}
|
}
|
||||||
|
@ -188,7 +188,8 @@ pub fn embed_srcset(
|
||||||
options,
|
options,
|
||||||
depth + 1,
|
depth + 1,
|
||||||
) {
|
) {
|
||||||
Ok((image_data, image_final_url, image_media_type)) => {
|
Ok((image_data, image_final_url, image_media_type, _image_charset)) => {
|
||||||
|
// TODO: use image_charset
|
||||||
let mut image_data_url =
|
let mut image_data_url =
|
||||||
create_data_url(&image_media_type, &image_data, &image_final_url);
|
create_data_url(&image_media_type, &image_data, &image_final_url);
|
||||||
// Append retreved asset as a data URL
|
// Append retreved asset as a data URL
|
||||||
|
@ -253,6 +254,48 @@ pub fn find_base_node(node: &Handle) -> Option<Handle> {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn find_meta_charset_or_content_type_node(node: &Handle) -> Option<Handle> {
|
||||||
|
match node.data {
|
||||||
|
NodeData::Document => {
|
||||||
|
// Dig deeper
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
if let Some(meta_charset_node) = find_meta_charset_or_content_type_node(child) {
|
||||||
|
return Some(meta_charset_node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NodeData::Element { ref name, .. } => {
|
||||||
|
match name.local.as_ref() {
|
||||||
|
"head" => {
|
||||||
|
if let Some(meta_node) = get_child_node_by_name(node, "meta") {
|
||||||
|
if let Some(_) = get_node_attr(&meta_node, "charset") {
|
||||||
|
return Some(meta_node);
|
||||||
|
} else if let Some(meta_node_http_equiv_attr_value) =
|
||||||
|
get_node_attr(&meta_node, "http-equiv")
|
||||||
|
{
|
||||||
|
if meta_node_http_equiv_attr_value.eq_ignore_ascii_case("content-type")
|
||||||
|
{
|
||||||
|
return Some(meta_node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dig deeper
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
if let Some(meta_charset_node) = find_meta_charset_or_content_type_node(child) {
|
||||||
|
return Some(meta_charset_node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
pub fn get_base_url(handle: &Handle) -> Option<String> {
|
pub fn get_base_url(handle: &Handle) -> Option<String> {
|
||||||
if let Some(base_node) = find_base_node(handle) {
|
if let Some(base_node) = find_base_node(handle) {
|
||||||
get_node_attr(&base_node, "href")
|
get_node_attr(&base_node, "href")
|
||||||
|
@ -261,6 +304,24 @@ pub fn get_base_url(handle: &Handle) -> Option<String> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_charset(node: &Handle) -> Option<String> {
|
||||||
|
if let Some(meta_charset_node) = find_meta_charset_or_content_type_node(node) {
|
||||||
|
if let Some(meta_charset_node_attr_value) = get_node_attr(&meta_charset_node, "charset") {
|
||||||
|
// Processing <meta charset="..." />
|
||||||
|
return Some(meta_charset_node_attr_value);
|
||||||
|
} else if let Some(meta_content_type_node_attr_value) =
|
||||||
|
get_node_attr(&meta_charset_node, "content")
|
||||||
|
{
|
||||||
|
// Processing <meta http-equiv="content-type" content="text/html; charset=..." />
|
||||||
|
let (_media_type, charset, _is_base64) =
|
||||||
|
parse_content_type(&meta_content_type_node_attr_value);
|
||||||
|
return Some(charset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
pub fn get_child_node_by_name(parent: &Handle, node_name: &str) -> Option<Handle> {
|
pub fn get_child_node_by_name(parent: &Handle, node_name: &str) -> Option<Handle> {
|
||||||
let children = parent.children.borrow();
|
let children = parent.children.borrow();
|
||||||
let matching_children = children.iter().find(|child| match child.data {
|
let matching_children = children.iter().find(|child| match child.data {
|
||||||
|
@ -273,13 +334,6 @@ pub fn get_child_node_by_name(parent: &Handle, node_name: &str) -> Option<Handle
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_node_name(node: &Handle) -> Option<&'_ str> {
|
|
||||||
match &node.data {
|
|
||||||
NodeData::Element { ref name, .. } => Some(name.local.as_ref()),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_node_attr(node: &Handle, attr_name: &str) -> Option<String> {
|
pub fn get_node_attr(node: &Handle, attr_name: &str) -> Option<String> {
|
||||||
match &node.data {
|
match &node.data {
|
||||||
NodeData::Element { ref attrs, .. } => {
|
NodeData::Element { ref attrs, .. } => {
|
||||||
|
@ -294,6 +348,13 @@ pub fn get_node_attr(node: &Handle, attr_name: &str) -> Option<String> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_node_name(node: &Handle) -> Option<&'_ str> {
|
||||||
|
match &node.data {
|
||||||
|
NodeData::Element { ref name, .. } => Some(name.local.as_ref()),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn get_parent_node(child: &Handle) -> Handle {
|
pub fn get_parent_node(child: &Handle) -> Handle {
|
||||||
let parent = child.parent.take().clone();
|
let parent = child.parent.take().clone();
|
||||||
parent.and_then(|node| node.upgrade()).unwrap()
|
parent.and_then(|node| node.upgrade()).unwrap()
|
||||||
|
@ -340,10 +401,19 @@ pub fn has_favicon(handle: &Handle) -> bool {
|
||||||
found_favicon
|
found_favicon
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn html_to_dom(data: &str) -> RcDom {
|
pub fn html_to_dom(data: &Vec<u8>, document_encoding: String) -> RcDom {
|
||||||
|
let s: String;
|
||||||
|
|
||||||
|
if let Some(encoding) = Encoding::for_label(document_encoding.as_bytes()) {
|
||||||
|
let (string, _, _) = encoding.decode(&data);
|
||||||
|
s = string.to_string();
|
||||||
|
} else {
|
||||||
|
s = String::from_utf8_lossy(&data).to_string();
|
||||||
|
}
|
||||||
|
|
||||||
parse_document(RcDom::default(), Default::default())
|
parse_document(RcDom::default(), Default::default())
|
||||||
.from_utf8()
|
.from_utf8()
|
||||||
.read_from(&mut data.as_bytes())
|
.read_from(&mut s.as_bytes())
|
||||||
.unwrap()
|
.unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -355,9 +425,8 @@ pub fn set_base_url(document: &Handle, desired_base_href: String) -> RcDom {
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
serialize(&mut buf, document, SerializeOpts::default())
|
serialize(&mut buf, document, SerializeOpts::default())
|
||||||
.expect("unable to serialize DOM into buffer");
|
.expect("unable to serialize DOM into buffer");
|
||||||
let result = String::from_utf8(buf).unwrap();
|
|
||||||
|
|
||||||
let mut dom = html_to_dom(&result);
|
let mut dom = html_to_dom(&buf, "utf-8".to_string());
|
||||||
let doc = dom.get_document();
|
let doc = dom.get_document();
|
||||||
if let Some(html_node) = get_child_node_by_name(&doc, "html") {
|
if let Some(html_node) = get_child_node_by_name(&doc, "html") {
|
||||||
if let Some(head_node) = get_child_node_by_name(&html_node, "head") {
|
if let Some(head_node) = get_child_node_by_name(&html_node, "head") {
|
||||||
|
@ -383,6 +452,41 @@ pub fn set_base_url(document: &Handle, desired_base_href: String) -> RcDom {
|
||||||
dom
|
dom
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn set_charset(mut dom: RcDom, desired_charset: String) -> RcDom {
|
||||||
|
if let Some(meta_charset_node) = find_meta_charset_or_content_type_node(&dom.document) {
|
||||||
|
if let Some(_) = get_node_attr(&meta_charset_node, "charset") {
|
||||||
|
set_node_attr(&meta_charset_node, "charset", Some(desired_charset));
|
||||||
|
} else if let Some(_) = get_node_attr(&meta_charset_node, "content") {
|
||||||
|
set_node_attr(
|
||||||
|
&meta_charset_node,
|
||||||
|
"content",
|
||||||
|
Some(format!("text/html;charset={}", desired_charset)),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let meta_charset_node = dom.create_element(
|
||||||
|
QualName::new(None, ns!(), local_name!("meta")),
|
||||||
|
vec![Attribute {
|
||||||
|
name: QualName::new(None, ns!(), local_name!("charset")),
|
||||||
|
value: format_tendril!("{}", desired_charset),
|
||||||
|
}],
|
||||||
|
Default::default(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Insert newly created META charset node into HEAD
|
||||||
|
if let Some(html_node) = get_child_node_by_name(&dom.document, "html") {
|
||||||
|
if let Some(head_node) = get_child_node_by_name(&html_node, "head") {
|
||||||
|
head_node
|
||||||
|
.children
|
||||||
|
.borrow_mut()
|
||||||
|
.push(meta_charset_node.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dom
|
||||||
|
}
|
||||||
|
|
||||||
pub fn set_node_attr(node: &Handle, attr_name: &str, attr_value: Option<String>) {
|
pub fn set_node_attr(node: &Handle, attr_name: &str, attr_value: Option<String>) {
|
||||||
match &node.data {
|
match &node.data {
|
||||||
NodeData::Element { ref attrs, .. } => {
|
NodeData::Element { ref attrs, .. } => {
|
||||||
|
@ -423,16 +527,10 @@ pub fn set_node_attr(node: &Handle, attr_name: &str, attr_value: Option<String>)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn stringify_document(handle: &Handle, options: &Options) -> String {
|
pub fn serialize_document(mut dom: RcDom, document_encoding: String, options: &Options) -> Vec<u8> {
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
serialize(&mut buf, handle, SerializeOpts::default())
|
let doc = dom.get_document();
|
||||||
.expect("Unable to serialize DOM into buffer");
|
|
||||||
|
|
||||||
let mut result = String::from_utf8(buf).unwrap();
|
|
||||||
|
|
||||||
// We can't make it isolate the page right away since it may have no HEAD element,
|
|
||||||
// ergo we have to serialize, parse the DOM again, insert the CSP meta tag, and then
|
|
||||||
// finally serialize and return the resulting string
|
|
||||||
if options.isolate
|
if options.isolate
|
||||||
|| options.no_css
|
|| options.no_css
|
||||||
|| options.no_fonts
|
|| options.no_fonts
|
||||||
|
@ -441,9 +539,6 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String {
|
||||||
|| options.no_images
|
|| options.no_images
|
||||||
{
|
{
|
||||||
// Take care of CSP
|
// Take care of CSP
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
|
||||||
let mut dom = html_to_dom(&result);
|
|
||||||
let doc = dom.get_document();
|
|
||||||
if let Some(html) = get_child_node_by_name(&doc, "html") {
|
if let Some(html) = get_child_node_by_name(&doc, "html") {
|
||||||
if let Some(head) = get_child_node_by_name(&html, "head") {
|
if let Some(head) = get_child_node_by_name(&html, "head") {
|
||||||
let meta = dom.create_element(
|
let meta = dom.create_element(
|
||||||
|
@ -468,19 +563,27 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String {
|
||||||
head.children.borrow_mut().reverse();
|
head.children.borrow_mut().reverse();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
serialize(&mut buf, &doc, SerializeOpts::default())
|
serialize(&mut buf, &doc, SerializeOpts::default())
|
||||||
.expect("Unable to serialize DOM into buffer");
|
.expect("Unable to serialize DOM into buffer");
|
||||||
result = String::from_utf8(buf).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Unwrap NOSCRIPT elements
|
// Unwrap NOSCRIPT elements
|
||||||
if options.unwrap_noscript {
|
if options.unwrap_noscript {
|
||||||
|
let s: &str = &String::from_utf8_lossy(&buf);
|
||||||
let noscript_re = Regex::new(r"<(?P<c>/?noscript[^>]*)>").unwrap();
|
let noscript_re = Regex::new(r"<(?P<c>/?noscript[^>]*)>").unwrap();
|
||||||
result = noscript_re.replace_all(&result, "<!--$c-->").to_string();
|
buf = noscript_re.replace_all(&s, "<!--$c-->").as_bytes().to_vec();
|
||||||
}
|
}
|
||||||
|
|
||||||
result
|
if !document_encoding.is_empty() {
|
||||||
|
if let Some(encoding) = Encoding::for_label(document_encoding.as_bytes()) {
|
||||||
|
let s: &str = &String::from_utf8_lossy(&buf);
|
||||||
|
let (data, _, _) = encoding.encode(s);
|
||||||
|
buf = data.to_vec();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
buf
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn retrieve_and_embed_asset(
|
pub fn retrieve_and_embed_asset(
|
||||||
|
@ -503,7 +606,7 @@ pub fn retrieve_and_embed_asset(
|
||||||
options,
|
options,
|
||||||
depth + 1,
|
depth + 1,
|
||||||
) {
|
) {
|
||||||
Ok((data, final_url, mut media_type)) => {
|
Ok((data, final_url, mut media_type, _charset)) => {
|
||||||
let node_name: &str = get_node_name(&node).unwrap();
|
let node_name: &str = get_node_name(&node).unwrap();
|
||||||
|
|
||||||
// Check integrity if it's a LINK or SCRIPT element
|
// Check integrity if it's a LINK or SCRIPT element
|
||||||
|
@ -537,7 +640,7 @@ pub fn retrieve_and_embed_asset(
|
||||||
set_node_attr(&node, attr_name, Some(css_data_url.to_string()));
|
set_node_attr(&node, attr_name, Some(css_data_url.to_string()));
|
||||||
} else if node_name == "frame" || node_name == "iframe" {
|
} else if node_name == "frame" || node_name == "iframe" {
|
||||||
// (I)FRAMEs are also quite different from conventional resources
|
// (I)FRAMEs are also quite different from conventional resources
|
||||||
let frame_dom = html_to_dom(&String::from_utf8_lossy(&data));
|
let frame_dom = html_to_dom(&data, "utf-8".to_string());
|
||||||
walk_and_embed_assets(
|
walk_and_embed_assets(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
|
@ -556,6 +659,7 @@ pub fn retrieve_and_embed_asset(
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
// Create and embed data URL
|
// Create and embed data URL
|
||||||
|
// TODO: use charset
|
||||||
let mut frame_data_url = create_data_url(&media_type, &frame_data, &final_url);
|
let mut frame_data_url = create_data_url(&media_type, &frame_data, &final_url);
|
||||||
frame_data_url.set_fragment(resolved_url.fragment());
|
frame_data_url.set_fragment(resolved_url.fragment());
|
||||||
set_node_attr(node, attr_name, Some(frame_data_url.to_string()));
|
set_node_attr(node, attr_name, Some(frame_data_url.to_string()));
|
||||||
|
@ -629,21 +733,8 @@ pub fn walk_and_embed_assets(
|
||||||
meta_attr_http_equiv_value
|
meta_attr_http_equiv_value
|
||||||
)),
|
)),
|
||||||
);
|
);
|
||||||
} else if meta_attr_http_equiv_value.eq_ignore_ascii_case("Content-Type") {
|
|
||||||
// Enforce charset to be set to UTF-8
|
|
||||||
if let Some(_attr_value) = get_node_attr(node, "content") {
|
|
||||||
set_node_attr(
|
|
||||||
&node,
|
|
||||||
"content",
|
|
||||||
Some(str!("text/html; charset=utf-8")),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if let Some(_meta_attr_http_equiv_value) = get_node_attr(node, "charset")
|
|
||||||
{
|
|
||||||
// Enforce charset to be set to UTF-8
|
|
||||||
set_node_attr(&node, "charset", Some(str!("utf-8")));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
"link" => {
|
"link" => {
|
||||||
let link_type: &str = determine_link_node_type(node);
|
let link_type: &str = determine_link_node_type(node);
|
||||||
|
@ -1078,7 +1169,8 @@ pub fn walk_and_embed_assets(
|
||||||
// Get contents of NOSCRIPT node
|
// Get contents of NOSCRIPT node
|
||||||
let mut noscript_contents = contents.borrow_mut();
|
let mut noscript_contents = contents.borrow_mut();
|
||||||
// Parse contents of NOSCRIPT node as DOM
|
// Parse contents of NOSCRIPT node as DOM
|
||||||
let noscript_contents_dom: RcDom = html_to_dom(&noscript_contents);
|
let noscript_contents_dom: RcDom =
|
||||||
|
html_to_dom(&noscript_contents.as_bytes().to_vec(), str!());
|
||||||
// Embed assets of NOSCRIPT node contents
|
// Embed assets of NOSCRIPT node contents
|
||||||
walk_and_embed_assets(
|
walk_and_embed_assets(
|
||||||
cache,
|
cache,
|
||||||
|
@ -1098,7 +1190,7 @@ pub fn walk_and_embed_assets(
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
serialize(&mut buf, &body, SerializeOpts::default())
|
serialize(&mut buf, &body, SerializeOpts::default())
|
||||||
.expect("Unable to serialize DOM into buffer");
|
.expect("Unable to serialize DOM into buffer");
|
||||||
let result = String::from_utf8(buf).unwrap();
|
let result = String::from_utf8_lossy(&buf);
|
||||||
noscript_contents.push_slice(&result);
|
noscript_contents.push_slice(&result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
135
src/main.rs
135
src/main.rs
|
@ -1,3 +1,4 @@
|
||||||
|
use html5ever::rcdom::RcDom;
|
||||||
use reqwest::blocking::Client;
|
use reqwest::blocking::Client;
|
||||||
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
@ -9,11 +10,11 @@ use std::time::Duration;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
use monolith::html::{
|
use monolith::html::{
|
||||||
add_favicon, create_metadata_tag, get_base_url, has_favicon, html_to_dom, set_base_url,
|
add_favicon, create_metadata_tag, get_base_url, get_charset, has_favicon, html_to_dom,
|
||||||
stringify_document, walk_and_embed_assets,
|
serialize_document, set_base_url, set_charset, walk_and_embed_assets,
|
||||||
};
|
};
|
||||||
use monolith::opts::Options;
|
use monolith::opts::Options;
|
||||||
use monolith::url::{create_data_url, parse_data_url, resolve_url};
|
use monolith::url::{create_data_url, resolve_url};
|
||||||
use monolith::utils::retrieve_asset;
|
use monolith::utils::retrieve_asset;
|
||||||
|
|
||||||
mod macros;
|
mod macros;
|
||||||
|
@ -32,29 +33,35 @@ impl Output {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn writeln_str(&mut self, s: &str) -> Result<(), Error> {
|
fn write(&mut self, bytes: &Vec<u8>) -> Result<(), Error> {
|
||||||
match self {
|
match self {
|
||||||
Output::Stdout(stdout) => {
|
Output::Stdout(stdout) => {
|
||||||
writeln!(stdout, "{}", s)?;
|
stdout.write_all(bytes)?;
|
||||||
|
// Ensure newline at end of output
|
||||||
|
if bytes.last() != Some(&b"\n"[0]) {
|
||||||
|
stdout.write(b"\n")?;
|
||||||
|
}
|
||||||
stdout.flush()
|
stdout.flush()
|
||||||
}
|
}
|
||||||
Output::File(f) => {
|
Output::File(file) => {
|
||||||
writeln!(f, "{}", s)?;
|
file.write_all(bytes)?;
|
||||||
f.flush()
|
// Ensure newline at end of output
|
||||||
|
if bytes.last() != Some(&b"\n"[0]) {
|
||||||
|
file.write(b"\n")?;
|
||||||
|
}
|
||||||
|
file.flush()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn read_stdin() -> String {
|
pub fn read_stdin() -> Vec<u8> {
|
||||||
let mut buffer = String::new();
|
let mut buffer: Vec<u8> = vec![];
|
||||||
|
|
||||||
for line in io::stdin().lock().lines() {
|
match io::stdin().lock().read_to_end(&mut buffer) {
|
||||||
buffer += line.unwrap_or_default().as_str();
|
Ok(_) => buffer,
|
||||||
buffer += "\n";
|
Err(_) => buffer,
|
||||||
}
|
}
|
||||||
|
|
||||||
buffer
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
|
@ -77,7 +84,7 @@ fn main() {
|
||||||
if target.clone() == "-" {
|
if target.clone() == "-" {
|
||||||
// Read from pipe (stdin)
|
// Read from pipe (stdin)
|
||||||
use_stdin = true;
|
use_stdin = true;
|
||||||
// Set default target URL to an empty data URL; the user can control it via --base-url
|
// Set default target URL to an empty data URL; the user can set it via --base-url
|
||||||
target_url = Url::parse("data:text/html,").unwrap();
|
target_url = Url::parse("data:text/html,").unwrap();
|
||||||
} else {
|
} else {
|
||||||
match Url::parse(&target.clone()) {
|
match Url::parse(&target.clone()) {
|
||||||
|
@ -131,9 +138,6 @@ fn main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Define output
|
|
||||||
let mut output = Output::new(&options.output).expect("Could not prepare output");
|
|
||||||
|
|
||||||
// Initialize client
|
// Initialize client
|
||||||
let mut cache = HashMap::new();
|
let mut cache = HashMap::new();
|
||||||
let mut header_map = HeaderMap::new();
|
let mut header_map = HeaderMap::new();
|
||||||
|
@ -158,20 +162,33 @@ fn main() {
|
||||||
// At this stage we assume that the base URL is the same as the target URL
|
// At this stage we assume that the base URL is the same as the target URL
|
||||||
base_url = target_url.clone();
|
base_url = target_url.clone();
|
||||||
|
|
||||||
let mut dom;
|
let data: Vec<u8>;
|
||||||
|
let mut document_encoding: String = str!();
|
||||||
|
let mut dom: RcDom;
|
||||||
|
|
||||||
// Retrieve target document
|
// Retrieve target document
|
||||||
if use_stdin {
|
if use_stdin {
|
||||||
dom = html_to_dom(&read_stdin());
|
data = read_stdin();
|
||||||
} else if target_url.scheme() == "file"
|
} else if target_url.scheme() == "file"
|
||||||
|| (target_url.scheme() == "http" || target_url.scheme() == "https")
|
|| (target_url.scheme() == "http" || target_url.scheme() == "https")
|
||||||
|
|| target_url.scheme() == "data"
|
||||||
{
|
{
|
||||||
match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options, 0) {
|
match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options, 0) {
|
||||||
Ok((data, final_url, _media_type)) => {
|
Ok((retrieved_data, final_url, media_type, charset)) => {
|
||||||
if options.base_url.clone().unwrap_or(str!()).is_empty() {
|
// Make sure the media type is text/html
|
||||||
base_url = final_url
|
if !media_type.eq_ignore_ascii_case("text/html") {
|
||||||
|
if !options.silent {
|
||||||
|
eprintln!("Unsupported document media type");
|
||||||
}
|
}
|
||||||
dom = html_to_dom(&String::from_utf8_lossy(&data));
|
process::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.base_url.clone().unwrap_or(str!()).is_empty() {
|
||||||
|
base_url = final_url;
|
||||||
|
}
|
||||||
|
|
||||||
|
data = retrieved_data;
|
||||||
|
document_encoding = charset;
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
if !options.silent {
|
if !options.silent {
|
||||||
|
@ -180,24 +197,25 @@ fn main() {
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if target_url.scheme() == "data" {
|
|
||||||
let (media_type, data): (String, Vec<u8>) = parse_data_url(&target_url);
|
|
||||||
|
|
||||||
if !media_type.eq_ignore_ascii_case("text/html") {
|
|
||||||
if !options.silent {
|
|
||||||
eprintln!("Unsupported data URL media type");
|
|
||||||
}
|
|
||||||
process::exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
dom = html_to_dom(&String::from_utf8_lossy(&data));
|
|
||||||
} else {
|
} else {
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initial parse to read document's charset from META tag
|
||||||
|
dom = html_to_dom(&data, document_encoding.clone());
|
||||||
|
|
||||||
|
// Attempt to determine document's charset
|
||||||
|
if let Some(charset) = get_charset(&dom.document) {
|
||||||
|
if !charset.is_empty() {
|
||||||
|
// TODO && label(charset) != UTF_8
|
||||||
|
document_encoding = charset;
|
||||||
|
dom = html_to_dom(&data, document_encoding.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Use custom base URL if specified, read and use what's in the DOM otherwise
|
// Use custom base URL if specified, read and use what's in the DOM otherwise
|
||||||
let b: String = options.base_url.clone().unwrap_or(str!());
|
let custom_base_url: String = options.base_url.clone().unwrap_or(str!());
|
||||||
if b.is_empty() {
|
if custom_base_url.is_empty() {
|
||||||
// No custom base URL is specified,
|
// No custom base URL is specified,
|
||||||
// try to see if the document has BASE tag
|
// try to see if the document has BASE tag
|
||||||
if let Some(existing_base_url) = get_base_url(&dom.document) {
|
if let Some(existing_base_url) = get_base_url(&dom.document) {
|
||||||
|
@ -205,7 +223,7 @@ fn main() {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Custom base URL provided
|
// Custom base URL provided
|
||||||
match Url::parse(&b) {
|
match Url::parse(&custom_base_url) {
|
||||||
Ok(parsed_url) => {
|
Ok(parsed_url) => {
|
||||||
if parsed_url.scheme() == "file" {
|
if parsed_url.scheme() == "file" {
|
||||||
// File base URLs can only work with
|
// File base URLs can only work with
|
||||||
|
@ -222,7 +240,7 @@ fn main() {
|
||||||
// perhaps it's a filesystem path?
|
// perhaps it's a filesystem path?
|
||||||
if target_url.scheme() == "file" {
|
if target_url.scheme() == "file" {
|
||||||
// Relative paths could work for documents saved from filesystem
|
// Relative paths could work for documents saved from filesystem
|
||||||
let path: &Path = Path::new(&b);
|
let path: &Path = Path::new(&custom_base_url);
|
||||||
if path.exists() {
|
if path.exists() {
|
||||||
match Url::from_file_path(fs::canonicalize(&path).unwrap()) {
|
match Url::from_file_path(fs::canonicalize(&path).unwrap()) {
|
||||||
Ok(file_url) => {
|
Ok(file_url) => {
|
||||||
|
@ -230,7 +248,10 @@ fn main() {
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
if !options.silent {
|
if !options.silent {
|
||||||
eprintln!("Could not map given path to base URL: {}", b);
|
eprintln!(
|
||||||
|
"Could not map given path to base URL: {}",
|
||||||
|
custom_base_url
|
||||||
|
);
|
||||||
}
|
}
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
@ -241,11 +262,10 @@ fn main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Embed remote assets
|
// Traverse through the document and embed remote assets
|
||||||
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
|
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
|
||||||
|
|
||||||
// Update or add new BASE tag to reroute network requests
|
// Update or add new BASE element to reroute network requests and hash-links
|
||||||
// and hash-links in the final document
|
|
||||||
if let Some(new_base_url) = options.base_url.clone() {
|
if let Some(new_base_url) = options.base_url.clone() {
|
||||||
dom = set_base_url(&dom.document, new_base_url);
|
dom = set_base_url(&dom.document, new_base_url);
|
||||||
}
|
}
|
||||||
|
@ -265,7 +285,8 @@ fn main() {
|
||||||
&options,
|
&options,
|
||||||
0,
|
0,
|
||||||
) {
|
) {
|
||||||
Ok((data, final_url, media_type)) => {
|
Ok((data, final_url, media_type, _charset)) => {
|
||||||
|
// TODO: use charset
|
||||||
let favicon_data_url: Url = create_data_url(&media_type, &data, &final_url);
|
let favicon_data_url: Url = create_data_url(&media_type, &data, &final_url);
|
||||||
dom = add_favicon(&dom.document, favicon_data_url.to_string());
|
dom = add_favicon(&dom.document, favicon_data_url.to_string());
|
||||||
}
|
}
|
||||||
|
@ -275,20 +296,26 @@ fn main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Enforce UTF-8 encoding for documents that may end up having garbled html entities
|
||||||
|
// due to html5ever forcefully converting them into UTF-8 byte sequences.
|
||||||
|
if document_encoding.eq_ignore_ascii_case("iso-8859-1") {
|
||||||
|
document_encoding = str!("utf-8");
|
||||||
|
dom = set_charset(dom, document_encoding.clone());
|
||||||
|
}
|
||||||
|
|
||||||
// Serialize DOM tree
|
// Serialize DOM tree
|
||||||
let mut result: String = stringify_document(&dom.document, &options);
|
let mut result: Vec<u8> = serialize_document(dom, document_encoding, &options);
|
||||||
|
|
||||||
// Add metadata tag
|
// Add metadata tag
|
||||||
if !options.no_metadata {
|
if !options.no_metadata {
|
||||||
let metadata_comment: String = create_metadata_tag(&target_url);
|
let mut metadata_comment: String = create_metadata_tag(&target_url);
|
||||||
result.insert_str(0, &metadata_comment);
|
metadata_comment += "\n";
|
||||||
if metadata_comment.len() > 0 {
|
result.splice(0..0, metadata_comment.as_bytes().to_vec());
|
||||||
result.insert_str(metadata_comment.len(), "\n");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Define output
|
||||||
|
let mut output = Output::new(&options.output).expect("Could not prepare output");
|
||||||
|
|
||||||
// Write result into stdout or file
|
// Write result into stdout or file
|
||||||
output
|
output.write(&result).expect("Could not write HTML output");
|
||||||
.writeln_str(&result)
|
|
||||||
.expect("Could not write HTML output");
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,18 +22,18 @@ mod passing {
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
// STDOUT should contain newly added base URL
|
// STDOUT should contain newly added base URL
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
"<html><head>\
|
"<html><head>\
|
||||||
<base href=\"http://localhost:8000/\"></base>\
|
<base href=\"http://localhost:8000/\"></base>\
|
||||||
</head><body>Hello, World!</body></html>\n"
|
</head><body>Hello, World!</body></html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// Exit code should be 0
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -46,18 +46,18 @@ mod passing {
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
// STDOUT should contain newly added base URL
|
// STDOUT should contain newly added base URL
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
"<html><head>\
|
"<html><head>\
|
||||||
<base href=\"http://localhost:8000/\">\
|
<base href=\"http://localhost:8000/\">\
|
||||||
</head><body>Hello, World!</body></html>\n"
|
</head><body>Hello, World!</body></html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// Exit code should be 0
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -72,18 +72,18 @@ mod passing {
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
// STDOUT should contain newly added base URL
|
// STDOUT should contain newly added base URL
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
"<html><head>\
|
"<html><head>\
|
||||||
<base href=\"http://localhost/\">\
|
<base href=\"http://localhost/\">\
|
||||||
</head><body>Hello, World!</body></html>\n"
|
</head><body>Hello, World!</body></html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// Exit code should be 0
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -98,18 +98,18 @@ mod passing {
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
// STDOUT should contain newly added base URL
|
// STDOUT should contain newly added base URL
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
"<html><head>\
|
"<html><head>\
|
||||||
<base href=\"\">\
|
<base href=\"\">\
|
||||||
</head><body>Hello, World!</body></html>\n"
|
</head><body>Hello, World!</body></html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// Exit code should be 0
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,16 +19,16 @@ mod passing {
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
let out = cmd.arg("-V").output().unwrap();
|
let out = cmd.arg("-V").output().unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
// STDOUT should contain program name and version
|
// STDOUT should contain program name and version
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
|
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// Exit code should be 0
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -46,11 +46,17 @@ mod passing {
|
||||||
cmd.stdin(echo_out);
|
cmd.stdin(echo_out);
|
||||||
let out = cmd.arg("-M").arg("-").output().unwrap();
|
let out = cmd.arg("-M").arg("-").output().unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
// STDOUT should contain HTML created out of STDIN
|
// STDOUT should contain HTML created out of STDIN
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
"<html><head></head><body>Hello from STDIN\n</body></html>\n"
|
"<html><head></head><body>Hello from STDIN\n</body></html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -64,15 +70,9 @@ mod passing {
|
||||||
|
|
||||||
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
|
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
|
||||||
|
|
||||||
// STDOUT should contain embedded CSS url()'s
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
"<html><head><style>\n\n @charset \"UTF-8\";\n\n @import \"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\";\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n</style>\n</head><body></body></html>\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should list files that got retrieved
|
// STDERR should list files that got retrieved
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
String::from_utf8_lossy(&out.stderr),
|
||||||
format!(
|
format!(
|
||||||
"\
|
"\
|
||||||
{file_url_html}\n \
|
{file_url_html}\n \
|
||||||
|
@ -85,7 +85,13 @@ mod passing {
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
// The exit code should be 0
|
// STDOUT should contain embedded CSS url()'s
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head><style>\n\n @charset \"UTF-8\";\n\n @import \"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\";\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n</style>\n</head><body></body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -108,16 +114,16 @@ mod failing {
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
let out = cmd.arg("").output().unwrap();
|
let out = cmd.arg("").output().unwrap();
|
||||||
|
|
||||||
// STDOUT should be empty
|
|
||||||
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
|
|
||||||
|
|
||||||
// STDERR should contain error description
|
// STDERR should contain error description
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
String::from_utf8_lossy(&out.stderr),
|
||||||
"No target specified\n"
|
"No target specified\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// The exit code should be 1
|
// STDOUT should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stdout), "");
|
||||||
|
|
||||||
|
// Exit code should be 1
|
||||||
out.assert().code(1);
|
out.assert().code(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,18 +21,18 @@ mod passing {
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
// STDOUT should contain isolated HTML
|
// STDOUT should contain isolated HTML
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
"<html><head>\
|
"<html><head>\
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
|
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
|
||||||
</head><body>Hello, World!</body></html>\n"
|
</head><body>Hello, World!</body></html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// Exit code should be 0
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -46,19 +46,19 @@ mod passing {
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
// STDOUT should contain HTML with no CSS
|
// STDOUT should contain HTML with no CSS
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
"<html><head>\
|
"<html><head>\
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
|
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
|
||||||
<style></style>\
|
<style></style>\
|
||||||
</head><body>Hello</body></html>\n"
|
</head><body>Hello</body></html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// Exit code should be 0
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -72,19 +72,19 @@ mod passing {
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
// STDOUT should contain HTML with no web fonts
|
// STDOUT should contain HTML with no web fonts
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
"<html><head>\
|
"<html><head>\
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"font-src 'none';\"></meta>\
|
<meta http-equiv=\"Content-Security-Policy\" content=\"font-src 'none';\"></meta>\
|
||||||
<style></style>\
|
<style></style>\
|
||||||
</head><body>Hi</body></html>\n"
|
</head><body>Hi</body></html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// Exit code should be 0
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -98,18 +98,18 @@ mod passing {
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
// STDOUT should contain HTML with no iframes
|
// STDOUT should contain HTML with no iframes
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
"<html><head>\
|
"<html><head>\
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
|
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
|
||||||
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
|
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// Exit code should be 0
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,9 +123,12 @@ mod passing {
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
// STDOUT should contain HTML with no images
|
// STDOUT should contain HTML with no images
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
format!(
|
format!(
|
||||||
"<html>\
|
"<html>\
|
||||||
<head>\
|
<head>\
|
||||||
|
@ -140,10 +143,7 @@ mod passing {
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// Exit code should be 0
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,9 +157,12 @@ mod passing {
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
// STDOUT should contain HTML with no JS
|
// STDOUT should contain HTML with no JS
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
"<html>\
|
"<html>\
|
||||||
<head>\
|
<head>\
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
|
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
|
||||||
|
@ -168,10 +171,7 @@ mod passing {
|
||||||
</html>\n"
|
</html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// Exit code should be 0
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -194,16 +194,16 @@ mod failing {
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
|
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
|
||||||
|
|
||||||
// STDOUT should contain HTML
|
|
||||||
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
|
|
||||||
|
|
||||||
// STDERR should contain error description
|
// STDERR should contain error description
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
String::from_utf8_lossy(&out.stderr),
|
||||||
"Unsupported data URL media type\n"
|
"Unsupported document media type\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// The exit code should be 1
|
// STDOUT should contain HTML
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stdout), "");
|
||||||
|
|
||||||
|
// Exit code should be 1
|
||||||
out.assert().code(1);
|
out.assert().code(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -216,16 +216,16 @@ mod failing {
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
// STDOUT should contain HTML with no JS in it
|
// STDOUT should contain HTML with no JS in it
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
"<html><head><script src=\"data:application/javascript;base64,\"></script></head><body></body></html>\n"
|
"<html><head><script src=\"data:application/javascript;base64,\"></script></head><body></body></html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// Exit code should be 0
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,7 +10,7 @@ mod passing {
|
||||||
use assert_cmd::prelude::*;
|
use assert_cmd::prelude::*;
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::path::Path;
|
use std::path::{Path, MAIN_SEPARATOR};
|
||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
|
@ -21,18 +21,33 @@ mod passing {
|
||||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||||
let out = cmd
|
let out = cmd
|
||||||
.arg("-M")
|
.arg("-M")
|
||||||
.arg(if cfg!(windows) {
|
.arg(format!(
|
||||||
"src\\tests\\data\\basic\\local-file.html"
|
"src{s}tests{s}data{s}basic{s}local-file.html",
|
||||||
} else {
|
s = MAIN_SEPARATOR
|
||||||
"src/tests/data/basic/local-file.html"
|
))
|
||||||
})
|
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||||
|
|
||||||
|
// STDERR should contain list of retrieved file URLs, two missing
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"\
|
||||||
|
{file}{cwd}/src/tests/data/basic/local-file.html\n \
|
||||||
|
{file}{cwd}/src/tests/data/basic/local-style.css\n \
|
||||||
|
{file}{cwd}/src/tests/data/basic/local-style-does-not-exist.css (not found)\n \
|
||||||
|
{file}{cwd}/src/tests/data/basic/monolith.png (not found)\n \
|
||||||
|
{file}{cwd}/src/tests/data/basic/local-script.js\n\
|
||||||
|
",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd_normalized
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
// STDOUT should contain HTML from the local file
|
// STDOUT should contain HTML from the local file
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
"\
|
"\
|
||||||
<!DOCTYPE html><html lang=\"en\"><head>\n \
|
<!DOCTYPE html><html lang=\"en\"><head>\n \
|
||||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||||
|
@ -47,23 +62,7 @@ mod passing {
|
||||||
"
|
"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should contain list of retrieved file URLs, two missing
|
// Exit code should be 0
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
|
||||||
format!(
|
|
||||||
"\
|
|
||||||
{file}{cwd}/src/tests/data/basic/local-file.html\n \
|
|
||||||
{file}{cwd}/src/tests/data/basic/local-style.css\n \
|
|
||||||
{file}{cwd}/src/tests/data/basic/local-style-does-not-exist.css (not found)\n \
|
|
||||||
{file}{cwd}/src/tests/data/basic/monolith.png (not found)\n \
|
|
||||||
{file}{cwd}/src/tests/data/basic/local-script.js\n\
|
|
||||||
",
|
|
||||||
file = file_url_protocol,
|
|
||||||
cwd = cwd_normalized
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -79,9 +78,18 @@ mod passing {
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should contain only the target file
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"{file_url_html}\n",
|
||||||
|
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
// STDOUT should contain HTML from the local file
|
// STDOUT should contain HTML from the local file
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
format!(
|
format!(
|
||||||
"\
|
"\
|
||||||
<!DOCTYPE html><html lang=\"en\"><head>\
|
<!DOCTYPE html><html lang=\"en\"><head>\
|
||||||
|
@ -100,16 +108,7 @@ mod passing {
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should contain only the target file
|
// Exit code should be 0
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
|
||||||
format!(
|
|
||||||
"{file_url_html}\n",
|
|
||||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -122,25 +121,27 @@ mod passing {
|
||||||
let out = cmd
|
let out = cmd
|
||||||
.arg("-M")
|
.arg("-M")
|
||||||
.arg("-cji")
|
.arg("-cji")
|
||||||
.arg(if cfg!(windows) {
|
.arg(format!(
|
||||||
format!(
|
|
||||||
"{file}{cwd}/src/tests/data/basic/local-file.html",
|
"{file}{cwd}/src/tests/data/basic/local-file.html",
|
||||||
file = file_url_protocol,
|
file = file_url_protocol,
|
||||||
cwd = cwd_normalized,
|
cwd = cwd_normalized,
|
||||||
)
|
))
|
||||||
} else {
|
|
||||||
format!(
|
|
||||||
"{file}{cwd}/src/tests/data/basic/local-file.html",
|
|
||||||
file = file_url_protocol,
|
|
||||||
cwd = cwd_normalized,
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should contain list of retrieved file URLs
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd_normalized,
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
// STDOUT should contain HTML from the local file
|
// STDOUT should contain HTML from the local file
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
format!(
|
format!(
|
||||||
"\
|
"\
|
||||||
<!DOCTYPE html><html lang=\"en\"><head>\
|
<!DOCTYPE html><html lang=\"en\"><head>\
|
||||||
|
@ -159,17 +160,7 @@ mod passing {
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should contain list of retrieved file URLs
|
// Exit code should be 0
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
|
||||||
format!(
|
|
||||||
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
|
|
||||||
file = file_url_protocol,
|
|
||||||
cwd = cwd_normalized,
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -181,15 +172,9 @@ mod passing {
|
||||||
|
|
||||||
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
|
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
|
||||||
|
|
||||||
// STDOUT should contain HTML with date URL for background-image in it
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
"<html><head></head><body><div style=\"background-image: url("")\"></div>\n</body></html>\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should list files that got retrieved
|
// STDERR should list files that got retrieved
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
String::from_utf8_lossy(&out.stderr),
|
||||||
format!(
|
format!(
|
||||||
"\
|
"\
|
||||||
{file_url_html}\n \
|
{file_url_html}\n \
|
||||||
|
@ -200,7 +185,13 @@ mod passing {
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
// The exit code should be 0
|
// STDOUT should contain HTML with date URL for background-image in it
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head></head><body><div style=\"background-image: url("")\"></div>\n</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -229,9 +220,25 @@ mod passing {
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should contain list of retrieved file URLs
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"\
|
||||||
|
{file}{cwd}/src/tests/data/integrity/index.html\n \
|
||||||
|
{file}{cwd}/src/tests/data/integrity/style.css\n \
|
||||||
|
{file}{cwd}/src/tests/data/integrity/style.css\n \
|
||||||
|
{file}{cwd}/src/tests/data/integrity/script.js\n \
|
||||||
|
{file}{cwd}/src/tests/data/integrity/script.js\n\
|
||||||
|
",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd_normalized,
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
// STDOUT should contain HTML from the local file; integrity attributes should be missing
|
// STDOUT should contain HTML from the local file; integrity attributes should be missing
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
format!(
|
format!(
|
||||||
"\
|
"\
|
||||||
<!DOCTYPE html><html lang=\"en\"><head>\
|
<!DOCTYPE html><html lang=\"en\"><head>\
|
||||||
|
@ -247,23 +254,7 @@ mod passing {
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should contain list of retrieved file URLs
|
// Exit code should be 0
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
|
||||||
format!(
|
|
||||||
"\
|
|
||||||
{file}{cwd}/src/tests/data/integrity/index.html\n \
|
|
||||||
{file}{cwd}/src/tests/data/integrity/style.css\n \
|
|
||||||
{file}{cwd}/src/tests/data/integrity/style.css\n \
|
|
||||||
{file}{cwd}/src/tests/data/integrity/script.js\n \
|
|
||||||
{file}{cwd}/src/tests/data/integrity/script.js\n\
|
|
||||||
",
|
|
||||||
file = file_url_protocol,
|
|
||||||
cwd = cwd_normalized,
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,15 +22,9 @@ mod passing {
|
||||||
|
|
||||||
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
|
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
|
||||||
|
|
||||||
// STDOUT should contain HTML with no CSS
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
"<html><head></head><body><noscript><img src=\"\"></noscript>\n</body></html>\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should contain target HTML and embedded SVG files
|
// STDERR should contain target HTML and embedded SVG files
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
String::from_utf8_lossy(&out.stderr),
|
||||||
format!(
|
format!(
|
||||||
"\
|
"\
|
||||||
{file_url_html}\n \
|
{file_url_html}\n \
|
||||||
|
@ -41,7 +35,13 @@ mod passing {
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
// The exit code should be 0
|
// STDOUT should contain HTML with no CSS
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head></head><body><noscript><img src=\"\"></noscript>\n</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,15 +53,9 @@ mod passing {
|
||||||
|
|
||||||
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
|
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
|
||||||
|
|
||||||
// STDOUT should contain HTML with no CSS
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
"<html><head></head><body><!--noscript--><img src=\"\"><!--/noscript-->\n</body></html>\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should contain target HTML and embedded SVG files
|
// STDERR should contain target HTML and embedded SVG files
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
String::from_utf8_lossy(&out.stderr),
|
||||||
format!(
|
format!(
|
||||||
"\
|
"\
|
||||||
{file_url_html}\n \
|
{file_url_html}\n \
|
||||||
|
@ -72,7 +66,13 @@ mod passing {
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
// The exit code should be 0
|
// STDOUT should contain HTML with no CSS
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head></head><body><!--noscript--><img src=\"\"><!--/noscript-->\n</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,15 +84,9 @@ mod passing {
|
||||||
|
|
||||||
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
|
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
|
||||||
|
|
||||||
// STDOUT should contain HTML with no CSS
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
"<html><head></head><body><!--noscript--><h1>JS is not active</h1><!--noscript--><img src=\"\"><!--/noscript--><!--/noscript-->\n</body></html>\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should contain target HTML and embedded SVG files
|
// STDERR should contain target HTML and embedded SVG files
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
String::from_utf8_lossy(&out.stderr),
|
||||||
format!(
|
format!(
|
||||||
"\
|
"\
|
||||||
{file_url_html}\n \
|
{file_url_html}\n \
|
||||||
|
@ -103,7 +97,13 @@ mod passing {
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
// The exit code should be 0
|
// STDOUT should contain HTML with no CSS
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html><head></head><body><!--noscript--><h1>JS is not active</h1><!--noscript--><img src=\"\"><!--/noscript--><!--/noscript-->\n</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,22 +115,9 @@ mod passing {
|
||||||
|
|
||||||
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
|
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
|
||||||
|
|
||||||
// STDOUT should contain HTML with no CSS
|
|
||||||
assert_eq!(
|
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
|
||||||
"<html>\
|
|
||||||
<head></head>\
|
|
||||||
<body>\
|
|
||||||
<!--noscript-->\
|
|
||||||
<img src=\"\">\
|
|
||||||
<!--/noscript-->\n\
|
|
||||||
</body>\
|
|
||||||
</html>\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// STDERR should contain target HTML and embedded SVG files
|
// STDERR should contain target HTML and embedded SVG files
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
String::from_utf8_lossy(&out.stderr),
|
||||||
format!(
|
format!(
|
||||||
"\
|
"\
|
||||||
{file_url_html}\n \
|
{file_url_html}\n \
|
||||||
|
@ -141,7 +128,20 @@ mod passing {
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
// The exit code should be 0
|
// STDOUT should contain HTML with no CSS
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
|
"<html>\
|
||||||
|
<head></head>\
|
||||||
|
<body>\
|
||||||
|
<!--noscript-->\
|
||||||
|
<img src=\"\">\
|
||||||
|
<!--/noscript-->\n\
|
||||||
|
</body>\
|
||||||
|
</html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -155,16 +155,16 @@ mod passing {
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
// STDOUT should contain unwrapped contents of NOSCRIPT element
|
// STDOUT should contain unwrapped contents of NOSCRIPT element
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stdout),
|
||||||
"<html><head><!--noscript class=\"\"-->test<!--/noscript--></head><body></body></html>\n"
|
"<html><head><!--noscript class=\"\"-->test<!--/noscript--></head><body></body></html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// Exit code should be 0
|
||||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
|
||||||
|
|
||||||
// The exit code should be 0
|
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,28 +8,39 @@
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod passing {
|
mod passing {
|
||||||
use assert_cmd::prelude::*;
|
use assert_cmd::prelude::*;
|
||||||
|
use encoding_rs::Encoding;
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::process::Command;
|
use std::path::MAIN_SEPARATOR;
|
||||||
|
use std::process::{Command, Stdio};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn change_encoding_to_utf_8() {
|
fn change_iso88591_to_utf8_to_properly_display_html_entities() {
|
||||||
let cwd = env::current_dir().unwrap();
|
let cwd = env::current_dir().unwrap();
|
||||||
let cwd_normalized: String = str!(cwd.to_str().unwrap()).replace("\\", "/");
|
let cwd_normalized: String = str!(cwd.to_str().unwrap()).replace("\\", "/");
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
let out = cmd
|
let out = cmd
|
||||||
.arg("-M")
|
.arg("-M")
|
||||||
.arg(if cfg!(windows) {
|
.arg(format!(
|
||||||
"src\\tests\\data\\unusual_encodings\\iso-8859-1.html"
|
"src{s}tests{s}data{s}unusual_encodings{s}iso-8859-1.html",
|
||||||
} else {
|
s = MAIN_SEPARATOR
|
||||||
"src/tests/data/unusual_encodings/iso-8859-1.html"
|
))
|
||||||
})
|
|
||||||
.output()
|
.output()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||||
|
|
||||||
// STDOUT should contain newly added base URL
|
// STDERR should contain only the target file
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
String::from_utf8_lossy(&out.stderr),
|
||||||
|
format!(
|
||||||
|
"{file}{cwd}/src/tests/data/unusual_encodings/iso-8859-1.html\n",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd_normalized,
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDOUT should contain original document but with UTF-8 charset
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8_lossy(&out.stdout),
|
||||||
"<html>\
|
"<html>\
|
||||||
<head>\n \
|
<head>\n \
|
||||||
<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\">\n \
|
<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\">\n \
|
||||||
|
@ -40,17 +51,102 @@ mod passing {
|
||||||
</html>\n"
|
</html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn properly_save_document_with_gb2312() {
|
||||||
|
let cwd = env::current_dir().unwrap();
|
||||||
|
let cwd_normalized: String = str!(cwd.to_str().unwrap()).replace("\\", "/");
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg(format!(
|
||||||
|
"src{s}tests{s}data{s}unusual_encodings{s}gb2312.html",
|
||||||
|
s = MAIN_SEPARATOR
|
||||||
|
))
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||||
|
|
||||||
// STDERR should contain only the target file
|
// STDERR should contain only the target file
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
String::from_utf8_lossy(&out.stderr),
|
||||||
format!(
|
format!(
|
||||||
"{file}{cwd}/src/tests/data/unusual_encodings/iso-8859-1.html\n",
|
"{file}{cwd}/src/tests/data/unusual_encodings/gb2312.html\n",
|
||||||
file = file_url_protocol,
|
file = file_url_protocol,
|
||||||
cwd = cwd_normalized,
|
cwd = cwd_normalized,
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
// The exit code should be 0
|
// STDOUT should contain original document without any modificatons
|
||||||
|
let s: String;
|
||||||
|
if let Some(encoding) = Encoding::for_label(b"gb2312") {
|
||||||
|
let (string, _, _) = encoding.decode(&out.stdout);
|
||||||
|
s = string.to_string();
|
||||||
|
} else {
|
||||||
|
s = String::from_utf8_lossy(&out.stdout).to_string();
|
||||||
|
}
|
||||||
|
assert_eq!(
|
||||||
|
s,
|
||||||
|
"<html>\
|
||||||
|
<head>\n \
|
||||||
|
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\">\n \
|
||||||
|
<title>近七成人减少线下需求\u{3000}银行数字化转型提速--经济·科技--人民网 </title>\n\
|
||||||
|
</head>\n\
|
||||||
|
<body>\n \
|
||||||
|
<h1>近七成人减少线下需求\u{3000}银行数字化转型提速</h1>\n\n\n\
|
||||||
|
</body>\
|
||||||
|
</html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn properly_save_document_with_gb2312_from_stdin() {
|
||||||
|
let mut echo = Command::new("cat")
|
||||||
|
.arg(format!(
|
||||||
|
"src{s}tests{s}data{s}unusual_encodings{s}gb2312.html",
|
||||||
|
s = MAIN_SEPARATOR
|
||||||
|
))
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.spawn()
|
||||||
|
.unwrap();
|
||||||
|
let echo_out = echo.stdout.take().unwrap();
|
||||||
|
echo.wait().unwrap();
|
||||||
|
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||||
|
cmd.stdin(echo_out);
|
||||||
|
let out = cmd.arg("-M").arg("-").output().unwrap();
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||||
|
|
||||||
|
// STDOUT should contain HTML created out of STDIN
|
||||||
|
let s: String;
|
||||||
|
if let Some(encoding) = Encoding::for_label(b"gb2312") {
|
||||||
|
let (string, _, _) = encoding.decode(&out.stdout);
|
||||||
|
s = string.to_string();
|
||||||
|
} else {
|
||||||
|
s = String::from_utf8_lossy(&out.stdout).to_string();
|
||||||
|
}
|
||||||
|
assert_eq!(
|
||||||
|
s,
|
||||||
|
"<html>\
|
||||||
|
<head>\n \
|
||||||
|
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\">\n \
|
||||||
|
<title>近七成人减少线下需求\u{3000}银行数字化转型提速--经济·科技--人民网 </title>\n\
|
||||||
|
</head>\n\
|
||||||
|
<body>\n \
|
||||||
|
<h1>近七成人减少线下需求\u{3000}银行数字化转型提速</h1>\n\n\n\
|
||||||
|
</body>\
|
||||||
|
</html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exit code should be 0
|
||||||
out.assert().code(0);
|
out.assert().code(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -331,7 +331,7 @@ mod passing {
|
||||||
";
|
";
|
||||||
const CSS_OUT: &str = "\
|
const CSS_OUT: &str = "\
|
||||||
#language a[href=\"#translations\"]:before {\n\
|
#language a[href=\"#translations\"]:before {\n\
|
||||||
content: url(\"data:;base64,\") \"\\a \";\n\
|
content: url(\"data:text/plain;base64,\") \"\\a \";\n\
|
||||||
white-space: pre }\n\
|
white-space: pre }\n\
|
||||||
";
|
";
|
||||||
|
|
||||||
|
|
9
src/tests/data/unusual_encodings/gb2312.html
Normal file
9
src/tests/data/unusual_encodings/gb2312.html
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="content-type" content="text/html;charset=GB2312"/>
|
||||||
|
<title>近七成人减少线下需求 银行数字化转型提速--经济·科技--人民网 </title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>近七成人减少线下需求 银行数字化转型提速</h1>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -14,7 +14,7 @@ mod passing {
|
||||||
#[test]
|
#[test]
|
||||||
fn basic() {
|
fn basic() {
|
||||||
let html = "<div>text</div>";
|
let html = "<div>text</div>";
|
||||||
let mut dom = html::html_to_dom(&html);
|
let mut dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
|
|
||||||
dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string());
|
dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string());
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ mod passing {
|
||||||
<body>
|
<body>
|
||||||
</body>
|
</body>
|
||||||
</html>";
|
</html>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html::get_base_url(&dom.document),
|
html::get_base_url(&dom.document),
|
||||||
|
@ -38,7 +38,7 @@ mod passing {
|
||||||
<body>
|
<body>
|
||||||
</body>
|
</body>
|
||||||
</html>";
|
</html>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html::get_base_url(&dom.document),
|
html::get_base_url(&dom.document),
|
||||||
|
@ -67,7 +67,7 @@ mod failing {
|
||||||
<body>
|
<body>
|
||||||
</body>
|
</body>
|
||||||
</html>";
|
</html>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
|
|
||||||
assert_eq!(html::get_base_url(&dom.document), None);
|
assert_eq!(html::get_base_url(&dom.document), None);
|
||||||
}
|
}
|
||||||
|
@ -82,7 +82,7 @@ mod failing {
|
||||||
<body>
|
<body>
|
||||||
</body>
|
</body>
|
||||||
</html>";
|
</html>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
|
|
||||||
assert_eq!(html::get_base_url(&dom.document), None);
|
assert_eq!(html::get_base_url(&dom.document), None);
|
||||||
}
|
}
|
||||||
|
@ -97,7 +97,7 @@ mod failing {
|
||||||
<body>
|
<body>
|
||||||
</body>
|
</body>
|
||||||
</html>";
|
</html>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
|
|
||||||
assert_eq!(html::get_base_url(&dom.document), Some(str!()));
|
assert_eq!(html::get_base_url(&dom.document), Some(str!()));
|
||||||
}
|
}
|
||||||
|
|
72
src/tests/html/get_charset.rs
Normal file
72
src/tests/html/get_charset.rs
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use crate::html;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn meta_content_type() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
|
|
||||||
|
assert_eq!(html::get_charset(&dom.document), Some(str!("GB2312")));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn meta_charset() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset=\"GB2312\" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
|
|
||||||
|
assert_eq!(html::get_charset(&dom.document), Some(str!("GB2312")));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn multiple_conflicting_meta_charset_first() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset=\"utf-8\" />
|
||||||
|
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
|
|
||||||
|
assert_eq!(html::get_charset(&dom.document), Some(str!("utf-8")));
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn multiple_conflicting_meta_content_type_first() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
|
||||||
|
<meta charset=\"utf-8\" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
|
|
||||||
|
assert_eq!(html::get_charset(&dom.document), Some(str!("GB2312")));
|
||||||
|
}
|
||||||
|
}
|
|
@ -14,7 +14,7 @@ mod passing {
|
||||||
#[test]
|
#[test]
|
||||||
fn div_two_style_attributes() {
|
fn div_two_style_attributes() {
|
||||||
let html = "<!doctype html><html><head></head><body><DIV STYLE=\"color: blue;\" style=\"display: none;\"></div></body></html>";
|
let html = "<!doctype html><html><head></head><body><DIV STYLE=\"color: blue;\" style=\"display: none;\"></div></body></html>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
|
|
||||||
fn test_walk(node: &Handle, i: &mut i8) {
|
fn test_walk(node: &Handle, i: &mut i8) {
|
||||||
|
|
|
@ -14,7 +14,7 @@ mod passing {
|
||||||
#[test]
|
#[test]
|
||||||
fn parent_node_names() {
|
fn parent_node_names() {
|
||||||
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
|
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
|
|
||||||
fn test_walk(node: &Handle, i: &mut i8) {
|
fn test_walk(node: &Handle, i: &mut i8) {
|
||||||
|
|
|
@ -12,7 +12,7 @@ mod passing {
|
||||||
#[test]
|
#[test]
|
||||||
fn icon() {
|
fn icon() {
|
||||||
let html = "<link rel=\"icon\" href=\"\" /><div>text</div>";
|
let html = "<link rel=\"icon\" href=\"\" /><div>text</div>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let res: bool = html::has_favicon(&dom.document);
|
let res: bool = html::has_favicon(&dom.document);
|
||||||
|
|
||||||
assert!(res);
|
assert!(res);
|
||||||
|
@ -21,7 +21,7 @@ mod passing {
|
||||||
#[test]
|
#[test]
|
||||||
fn shortcut_icon() {
|
fn shortcut_icon() {
|
||||||
let html = "<link rel=\"shortcut icon\" href=\"\" /><div>text</div>";
|
let html = "<link rel=\"shortcut icon\" href=\"\" /><div>text</div>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let res: bool = html::has_favicon(&dom.document);
|
let res: bool = html::has_favicon(&dom.document);
|
||||||
|
|
||||||
assert!(res);
|
assert!(res);
|
||||||
|
@ -42,7 +42,7 @@ mod failing {
|
||||||
#[test]
|
#[test]
|
||||||
fn absent() {
|
fn absent() {
|
||||||
let html = "<div>text</div>";
|
let html = "<div>text</div>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let res: bool = html::has_favicon(&dom.document);
|
let res: bool = html::has_favicon(&dom.document);
|
||||||
|
|
||||||
assert!(!res);
|
assert!(!res);
|
||||||
|
|
|
@ -4,10 +4,11 @@ mod compose_csp;
|
||||||
mod create_metadata_tag;
|
mod create_metadata_tag;
|
||||||
mod embed_srcset;
|
mod embed_srcset;
|
||||||
mod get_base_url;
|
mod get_base_url;
|
||||||
|
mod get_charset;
|
||||||
mod get_node_attr;
|
mod get_node_attr;
|
||||||
mod get_node_name;
|
mod get_node_name;
|
||||||
mod has_favicon;
|
mod has_favicon;
|
||||||
mod is_icon;
|
mod is_icon;
|
||||||
|
mod serialize_document;
|
||||||
mod set_node_attr;
|
mod set_node_attr;
|
||||||
mod stringify_document;
|
|
||||||
mod walk_and_embed_assets;
|
mod walk_and_embed_assets;
|
||||||
|
|
|
@ -13,11 +13,11 @@ mod passing {
|
||||||
#[test]
|
#[test]
|
||||||
fn div_as_root_element() {
|
fn div_as_root_element() {
|
||||||
let html = "<div><script src=\"some.js\"></script></div>";
|
let html = "<div><script src=\"some.js\"></script></div>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let options = Options::default();
|
let options = Options::default();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html::stringify_document(&dom.document, &options),
|
String::from_utf8_lossy(&html::serialize_document(dom, str!(), &options)),
|
||||||
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
|
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -28,15 +28,16 @@ mod passing {
|
||||||
<link rel=\"something\" href=\"some.css\" />\
|
<link rel=\"something\" href=\"some.css\" />\
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||||
<div><script src=\"some.js\"></script></div>";
|
<div><script src=\"some.js\"></script></div>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let mut options = Options::default();
|
let mut options = Options::default();
|
||||||
options.isolate = true;
|
options.isolate = true;
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html::stringify_document(
|
String::from_utf8_lossy(&html::serialize_document(
|
||||||
&dom.document,
|
dom,
|
||||||
|
str!(),
|
||||||
&options
|
&options
|
||||||
),
|
)),
|
||||||
"<html>\
|
"<html>\
|
||||||
<head>\
|
<head>\
|
||||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
|
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
|
||||||
|
@ -59,12 +60,12 @@ mod passing {
|
||||||
<title>Unstyled document</title>\
|
<title>Unstyled document</title>\
|
||||||
<link rel=\"stylesheet\" href=\"main.css\"/>\
|
<link rel=\"stylesheet\" href=\"main.css\"/>\
|
||||||
<div style=\"display: none;\"></div>";
|
<div style=\"display: none;\"></div>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let mut options = Options::default();
|
let mut options = Options::default();
|
||||||
options.no_css = true;
|
options.no_css = true;
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html::stringify_document(&dom.document, &options),
|
String::from_utf8_lossy(&html::serialize_document(dom, str!(), &options)),
|
||||||
"<!DOCTYPE html>\
|
"<!DOCTYPE html>\
|
||||||
<html>\
|
<html>\
|
||||||
<head>\
|
<head>\
|
||||||
|
@ -83,15 +84,16 @@ mod passing {
|
||||||
<title>Frameless document</title>\
|
<title>Frameless document</title>\
|
||||||
<link rel=\"something\"/>\
|
<link rel=\"something\"/>\
|
||||||
<div><script src=\"some.js\"></script></div>";
|
<div><script src=\"some.js\"></script></div>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let mut options = Options::default();
|
let mut options = Options::default();
|
||||||
options.no_frames = true;
|
options.no_frames = true;
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html::stringify_document(
|
String::from_utf8_lossy(&html::serialize_document(
|
||||||
&dom.document,
|
dom,
|
||||||
|
str!(),
|
||||||
&options
|
&options
|
||||||
),
|
)),
|
||||||
"<!DOCTYPE html>\
|
"<!DOCTYPE html>\
|
||||||
<html>\
|
<html>\
|
||||||
<head>\
|
<head>\
|
||||||
|
@ -115,7 +117,7 @@ mod passing {
|
||||||
<img style=\"width: 100%;\" src=\"some.png\" />\
|
<img style=\"width: 100%;\" src=\"some.png\" />\
|
||||||
<iframe src=\"some.html\"></iframe>\
|
<iframe src=\"some.html\"></iframe>\
|
||||||
</div>";
|
</div>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let mut options = Options::default();
|
let mut options = Options::default();
|
||||||
options.isolate = true;
|
options.isolate = true;
|
||||||
options.no_css = true;
|
options.no_css = true;
|
||||||
|
@ -125,10 +127,11 @@ mod passing {
|
||||||
options.no_images = true;
|
options.no_images = true;
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html::stringify_document(
|
String::from_utf8_lossy(&html::serialize_document(
|
||||||
&dom.document,
|
dom,
|
||||||
|
str!(),
|
||||||
&options
|
&options
|
||||||
),
|
)),
|
||||||
"<!DOCTYPE html>\
|
"<!DOCTYPE html>\
|
||||||
<html>\
|
<html>\
|
||||||
<head>\
|
<head>\
|
|
@ -14,7 +14,7 @@ mod passing {
|
||||||
#[test]
|
#[test]
|
||||||
fn html_lang_and_body_style() {
|
fn html_lang_and_body_style() {
|
||||||
let html = "<!doctype html><html lang=\"en\"><head></head><body></body></html>";
|
let html = "<!doctype html><html lang=\"en\"><head></head><body></body></html>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
|
|
||||||
fn test_walk(node: &Handle, i: &mut i8) {
|
fn test_walk(node: &Handle, i: &mut i8) {
|
||||||
|
@ -67,7 +67,7 @@ mod passing {
|
||||||
#[test]
|
#[test]
|
||||||
fn body_background() {
|
fn body_background() {
|
||||||
let html = "<!doctype html><html lang=\"en\"><head></head><body background=\"1\" background=\"2\"></body></html>";
|
let html = "<!doctype html><html lang=\"en\"><head></head><body background=\"1\" background=\"2\"></body></html>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
|
|
||||||
fn test_walk(node: &Handle, i: &mut i8) {
|
fn test_walk(node: &Handle, i: &mut i8) {
|
||||||
|
|
|
@ -20,7 +20,7 @@ mod passing {
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
let html: &str = "<div><P></P></div>";
|
let html: &str = "<div><P></P></div>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
|
|
||||||
let mut options = Options::default();
|
let mut options = Options::default();
|
||||||
|
@ -42,7 +42,7 @@ mod passing {
|
||||||
#[test]
|
#[test]
|
||||||
fn ensure_no_recursive_iframe() {
|
fn ensure_no_recursive_iframe() {
|
||||||
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
|
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
@ -65,7 +65,7 @@ mod passing {
|
||||||
#[test]
|
#[test]
|
||||||
fn ensure_no_recursive_frame() {
|
fn ensure_no_recursive_frame() {
|
||||||
let html = "<frameset><frame src=\"\"></frameset>";
|
let html = "<frameset><frame src=\"\"></frameset>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
@ -93,7 +93,7 @@ mod passing {
|
||||||
<style>html{background-color: #000;}</style>\
|
<style>html{background-color: #000;}</style>\
|
||||||
<div style=\"display: none;\"></div>\
|
<div style=\"display: none;\"></div>\
|
||||||
";
|
";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
@ -129,7 +129,7 @@ mod passing {
|
||||||
fn no_images() {
|
fn no_images() {
|
||||||
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
|
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
|
||||||
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
|
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
@ -166,7 +166,7 @@ mod passing {
|
||||||
fn no_body_background_images() {
|
fn no_body_background_images() {
|
||||||
let html =
|
let html =
|
||||||
"<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
|
"<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
@ -190,7 +190,7 @@ mod passing {
|
||||||
#[test]
|
#[test]
|
||||||
fn no_frames() {
|
fn no_frames() {
|
||||||
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
|
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
@ -222,7 +222,7 @@ mod passing {
|
||||||
#[test]
|
#[test]
|
||||||
fn no_iframes() {
|
fn no_iframes() {
|
||||||
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
|
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
@ -258,7 +258,7 @@ mod passing {
|
||||||
<script>alert(1)</script>\
|
<script>alert(1)</script>\
|
||||||
</div>\
|
</div>\
|
||||||
";
|
";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
@ -293,7 +293,7 @@ mod passing {
|
||||||
fn keeps_integrity_for_linked_assets() {
|
fn keeps_integrity_for_linked_assets() {
|
||||||
let html = "<title>Has integrity</title>\
|
let html = "<title>Has integrity</title>\
|
||||||
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\" />";
|
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\" />";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
@ -328,7 +328,7 @@ mod passing {
|
||||||
<link integrity=\"\" rel=\"stylesheet\" href=\"data:;\"/>\
|
<link integrity=\"\" rel=\"stylesheet\" href=\"data:;\"/>\
|
||||||
<script integrity=\"\" src=\"some.js\"></script>\
|
<script integrity=\"\" src=\"some.js\"></script>\
|
||||||
";
|
";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
@ -366,7 +366,7 @@ mod passing {
|
||||||
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\"/>\
|
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\"/>\
|
||||||
<script integrity=\"sha384-456\" src=\"some.js\"></script>\
|
<script integrity=\"sha384-456\" src=\"some.js\"></script>\
|
||||||
";
|
";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
@ -410,7 +410,7 @@ mod passing {
|
||||||
</body>\
|
</body>\
|
||||||
</html>\
|
</html>\
|
||||||
";
|
";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
@ -452,7 +452,7 @@ mod passing {
|
||||||
</noscript>\
|
</noscript>\
|
||||||
</body>\
|
</body>\
|
||||||
</html>";
|
</html>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
@ -488,7 +488,7 @@ mod passing {
|
||||||
#[test]
|
#[test]
|
||||||
fn preserves_script_type_json() {
|
fn preserves_script_type_json() {
|
||||||
let html = "<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>";
|
let html = "<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
let url: Url = Url::parse("http://localhost").unwrap();
|
let url: Url = Url::parse("http://localhost").unwrap();
|
||||||
let cache = &mut HashMap::new();
|
let cache = &mut HashMap::new();
|
||||||
|
|
||||||
|
|
|
@ -13,9 +13,10 @@ mod passing {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_text_html_base64() {
|
fn parse_text_html_base64() {
|
||||||
let (media_type, data) = url::parse_data_url(&Url::parse("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==").unwrap());
|
let (media_type, charset, data) = url::parse_data_url(&Url::parse("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==").unwrap());
|
||||||
|
|
||||||
assert_eq!(media_type, "text/html");
|
assert_eq!(media_type, "text/html");
|
||||||
|
assert_eq!(charset, "US-ASCII");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
String::from_utf8_lossy(&data),
|
String::from_utf8_lossy(&data),
|
||||||
"Work expands so as to fill the time available for its completion"
|
"Work expands so as to fill the time available for its completion"
|
||||||
|
@ -24,11 +25,12 @@ mod passing {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_text_html_utf8() {
|
fn parse_text_html_utf8() {
|
||||||
let (media_type, data) = url::parse_data_url(
|
let (media_type, charset, data) = url::parse_data_url(
|
||||||
&Url::parse("data:text/html;utf8,Work expands so as to fill the time available for its completion").unwrap(),
|
&Url::parse("data:text/html;charset=utf8,Work expands so as to fill the time available for its completion").unwrap(),
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(media_type, "text/html");
|
assert_eq!(media_type, "text/html");
|
||||||
|
assert_eq!(charset, "utf8");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
String::from_utf8_lossy(&data),
|
String::from_utf8_lossy(&data),
|
||||||
"Work expands so as to fill the time available for its completion"
|
"Work expands so as to fill the time available for its completion"
|
||||||
|
@ -37,7 +39,7 @@ mod passing {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_text_html_plaintext() {
|
fn parse_text_html_plaintext() {
|
||||||
let (media_type, data) = url::parse_data_url(
|
let (media_type, charset, data) = url::parse_data_url(
|
||||||
&Url::parse(
|
&Url::parse(
|
||||||
"data:text/html,Work expands so as to fill the time available for its completion",
|
"data:text/html,Work expands so as to fill the time available for its completion",
|
||||||
)
|
)
|
||||||
|
@ -45,6 +47,7 @@ mod passing {
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(media_type, "text/html");
|
assert_eq!(media_type, "text/html");
|
||||||
|
assert_eq!(charset, "US-ASCII");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
String::from_utf8_lossy(&data),
|
String::from_utf8_lossy(&data),
|
||||||
"Work expands so as to fill the time available for its completion"
|
"Work expands so as to fill the time available for its completion"
|
||||||
|
@ -53,26 +56,31 @@ mod passing {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_text_css_url_encoded() {
|
fn parse_text_css_url_encoded() {
|
||||||
let (media_type, data) =
|
let (media_type, charset, data) =
|
||||||
url::parse_data_url(&Url::parse("data:text/css,div{background-color:%23000}").unwrap());
|
url::parse_data_url(&Url::parse("data:text/css,div{background-color:%23000}").unwrap());
|
||||||
|
|
||||||
assert_eq!(media_type, "text/css");
|
assert_eq!(media_type, "text/css");
|
||||||
|
assert_eq!(charset, "US-ASCII");
|
||||||
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
|
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_no_media_type_base64() {
|
fn parse_no_media_type_base64() {
|
||||||
let (media_type, data) = url::parse_data_url(&Url::parse("data:;base64,dGVzdA==").unwrap());
|
let (media_type, charset, data) =
|
||||||
|
url::parse_data_url(&Url::parse("data:;base64,dGVzdA==").unwrap());
|
||||||
|
|
||||||
assert_eq!(media_type, "");
|
assert_eq!(media_type, "text/plain");
|
||||||
|
assert_eq!(charset, "US-ASCII");
|
||||||
assert_eq!(String::from_utf8_lossy(&data), "test");
|
assert_eq!(String::from_utf8_lossy(&data), "test");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_no_media_type_no_encoding() {
|
fn parse_no_media_type_no_encoding() {
|
||||||
let (media_type, data) = url::parse_data_url(&Url::parse("data:;,test%20test").unwrap());
|
let (media_type, charset, data) =
|
||||||
|
url::parse_data_url(&Url::parse("data:;,test%20test").unwrap());
|
||||||
|
|
||||||
assert_eq!(media_type, "");
|
assert_eq!(media_type, "text/plain");
|
||||||
|
assert_eq!(charset, "US-ASCII");
|
||||||
assert_eq!(String::from_utf8_lossy(&data), "test test");
|
assert_eq!(String::from_utf8_lossy(&data), "test test");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -92,9 +100,10 @@ mod failing {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn empty_data_url() {
|
fn empty_data_url() {
|
||||||
let (media_type, data) = url::parse_data_url(&Url::parse("data:,").unwrap());
|
let (media_type, charset, data) = url::parse_data_url(&Url::parse("data:,").unwrap());
|
||||||
|
|
||||||
assert_eq!(media_type, "");
|
assert_eq!(media_type, "text/plain");
|
||||||
|
assert_eq!(charset, "US-ASCII");
|
||||||
assert_eq!(String::from_utf8_lossy(&data), "");
|
assert_eq!(String::from_utf8_lossy(&data), "");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -195,7 +195,7 @@ mod failing {
|
||||||
let dummy_url: Url = Url::parse("data:,").unwrap();
|
let dummy_url: Url = Url::parse("data:,").unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
utils::detect_media_type(b"abcdef0123456789", &dummy_url),
|
utils::detect_media_type(b"abcdef0123456789", &dummy_url),
|
||||||
""
|
"application/octet-stream"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,4 +28,9 @@ mod passing {
|
||||||
fn three() {
|
fn three() {
|
||||||
assert_eq!(utils::indent(3), " ");
|
assert_eq!(utils::indent(3), " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn four() {
|
||||||
|
assert_eq!(utils::indent(4), " ");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
mod detect_media_type;
|
mod detect_media_type;
|
||||||
mod indent;
|
mod indent;
|
||||||
|
mod parse_content_type;
|
||||||
mod retrieve_asset;
|
mod retrieve_asset;
|
||||||
|
|
86
src/tests/utils/parse_content_type.rs
Normal file
86
src/tests/utils/parse_content_type.rs
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use crate::utils;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn text_plain_utf8() {
|
||||||
|
let (media_type, charset, is_base64) = utils::parse_content_type("text/plain;charset=utf8");
|
||||||
|
assert_eq!(media_type, "text/plain");
|
||||||
|
assert_eq!(charset, "utf8");
|
||||||
|
assert!(!is_base64);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn text_plain_utf8_spaces() {
|
||||||
|
let (media_type, charset, is_base64) =
|
||||||
|
utils::parse_content_type(" text/plain ; charset=utf8 ");
|
||||||
|
assert_eq!(media_type, "text/plain");
|
||||||
|
assert_eq!(charset, "utf8");
|
||||||
|
assert!(!is_base64);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty() {
|
||||||
|
let (media_type, charset, is_base64) = utils::parse_content_type("");
|
||||||
|
assert_eq!(media_type, "text/plain");
|
||||||
|
assert_eq!(charset, "US-ASCII");
|
||||||
|
assert!(!is_base64);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn base64() {
|
||||||
|
let (media_type, charset, is_base64) = utils::parse_content_type(";base64");
|
||||||
|
assert_eq!(media_type, "text/plain");
|
||||||
|
assert_eq!(charset, "US-ASCII");
|
||||||
|
assert!(is_base64);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn text_html_base64() {
|
||||||
|
let (media_type, charset, is_base64) = utils::parse_content_type("text/html;base64");
|
||||||
|
assert_eq!(media_type, "text/html");
|
||||||
|
assert_eq!(charset, "US-ASCII");
|
||||||
|
assert!(is_base64);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn only_media_type() {
|
||||||
|
let (media_type, charset, is_base64) = utils::parse_content_type("text/html");
|
||||||
|
assert_eq!(media_type, "text/html");
|
||||||
|
assert_eq!(charset, "US-ASCII");
|
||||||
|
assert!(!is_base64);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn only_media_type_colon() {
|
||||||
|
let (media_type, charset, is_base64) = utils::parse_content_type("text/html;");
|
||||||
|
assert_eq!(media_type, "text/html");
|
||||||
|
assert_eq!(charset, "US-ASCII");
|
||||||
|
assert!(!is_base64);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn media_type_gb2312_filename() {
|
||||||
|
let (media_type, charset, is_base64) =
|
||||||
|
utils::parse_content_type("text/html;charset=GB2312;filename=index.html");
|
||||||
|
assert_eq!(media_type, "text/html");
|
||||||
|
assert_eq!(charset, "GB2312");
|
||||||
|
assert!(!is_base64);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn media_type_filename_gb2312() {
|
||||||
|
let (media_type, charset, is_base64) =
|
||||||
|
utils::parse_content_type("text/html;filename=index.html;charset=GB2312");
|
||||||
|
assert_eq!(media_type, "text/html");
|
||||||
|
assert_eq!(charset, "GB2312");
|
||||||
|
assert!(!is_base64);
|
||||||
|
}
|
||||||
|
}
|
|
@ -26,7 +26,7 @@ mod passing {
|
||||||
|
|
||||||
// If both source and target are data URLs,
|
// If both source and target are data URLs,
|
||||||
// ensure the result contains target data URL
|
// ensure the result contains target data URL
|
||||||
let (data, final_url, media_type) = utils::retrieve_asset(
|
let (data, final_url, media_type, charset) = utils::retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
&client,
|
&client,
|
||||||
&Url::parse("data:text/html;base64,c291cmNl").unwrap(),
|
&Url::parse("data:text/html;base64,c291cmNl").unwrap(),
|
||||||
|
@ -35,23 +35,16 @@ mod passing {
|
||||||
0,
|
0,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
assert_eq!(&media_type, "text/html");
|
||||||
|
assert_eq!(&charset, "US-ASCII");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
url::create_data_url(&media_type, &data, &final_url),
|
url::create_data_url(&media_type, &data, &final_url),
|
||||||
url::create_data_url(
|
Url::parse("data:text/html;base64,dGFyZ2V0").unwrap(),
|
||||||
"text/html",
|
|
||||||
"target".as_bytes(),
|
|
||||||
&Url::parse("data:text/html;base64,c291cmNl").unwrap()
|
|
||||||
)
|
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
final_url,
|
final_url,
|
||||||
url::create_data_url(
|
Url::parse("data:text/html;base64,dGFyZ2V0").unwrap(),
|
||||||
"text/html",
|
|
||||||
"target".as_bytes(),
|
|
||||||
&Url::parse("data:text/html;base64,c291cmNl").unwrap()
|
|
||||||
)
|
|
||||||
);
|
);
|
||||||
assert_eq!(&media_type, "text/html");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -66,7 +59,7 @@ mod passing {
|
||||||
|
|
||||||
// Inclusion of local assets from local sources should be allowed
|
// Inclusion of local assets from local sources should be allowed
|
||||||
let cwd = env::current_dir().unwrap();
|
let cwd = env::current_dir().unwrap();
|
||||||
let (data, final_url, _media_type) = utils::retrieve_asset(
|
let (data, final_url, media_type, charset) = utils::retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
&client,
|
&client,
|
||||||
&Url::parse(&format!(
|
&Url::parse(&format!(
|
||||||
|
@ -85,7 +78,9 @@ mod passing {
|
||||||
0,
|
0,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(url::create_data_url("application/javascript", &data, &final_url), Url::parse("data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==").unwrap());
|
assert_eq!(&media_type, "application/javascript");
|
||||||
|
assert_eq!(&charset, "");
|
||||||
|
assert_eq!(url::create_data_url(&media_type, &data, &final_url), Url::parse("data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==").unwrap());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
final_url,
|
final_url,
|
||||||
Url::parse(&format!(
|
Url::parse(&format!(
|
||||||
|
|
38
src/url.rs
38
src/url.rs
|
@ -1,7 +1,7 @@
|
||||||
use base64;
|
use base64;
|
||||||
use url::{form_urlencoded, Url};
|
use url::{form_urlencoded, Url};
|
||||||
|
|
||||||
use crate::utils::detect_media_type;
|
use crate::utils::{detect_media_type, parse_content_type};
|
||||||
|
|
||||||
pub fn clean_url(url: Url) -> Url {
|
pub fn clean_url(url: Url) -> Url {
|
||||||
let mut url = url.clone();
|
let mut url = url.clone();
|
||||||
|
@ -37,42 +37,26 @@ pub fn is_url_and_has_protocol(input: &str) -> bool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_data_url(url: &Url) -> (String, Vec<u8>) {
|
pub fn parse_data_url(url: &Url) -> (String, String, Vec<u8>) {
|
||||||
let path: String = url.path().to_string();
|
let path: String = url.path().to_string();
|
||||||
let comma_loc: usize = path.find(',').unwrap_or(path.len());
|
let comma_loc: usize = path.find(',').unwrap_or(path.len());
|
||||||
|
|
||||||
let meta_data: String = path.chars().take(comma_loc).collect();
|
// Split data URL into meta data and raw data
|
||||||
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
|
let content_type: String = path.chars().take(comma_loc).collect();
|
||||||
|
let data: String = path.chars().skip(comma_loc + 1).collect();
|
||||||
|
|
||||||
let text: String = percent_decode(raw_data);
|
// Parse meta data
|
||||||
|
let (media_type, charset, is_base64) = parse_content_type(&content_type);
|
||||||
|
|
||||||
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
|
// Parse raw data into vector of bytes
|
||||||
let mut media_type: String = str!();
|
let text: String = percent_decode(data);
|
||||||
let mut encoding: &str = "";
|
let blob: Vec<u8> = if is_base64 {
|
||||||
|
|
||||||
let mut i: i8 = 0;
|
|
||||||
for item in &meta_data_items {
|
|
||||||
if i == 0 {
|
|
||||||
media_type = str!(item);
|
|
||||||
} else {
|
|
||||||
if item.eq_ignore_ascii_case("base64")
|
|
||||||
|| item.eq_ignore_ascii_case("utf8")
|
|
||||||
|| item.eq_ignore_ascii_case("charset=UTF-8")
|
|
||||||
{
|
|
||||||
encoding = item;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
i = i + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
|
|
||||||
base64::decode(&text).unwrap_or(vec![])
|
base64::decode(&text).unwrap_or(vec![])
|
||||||
} else {
|
} else {
|
||||||
text.as_bytes().to_vec()
|
text.as_bytes().to_vec()
|
||||||
};
|
};
|
||||||
|
|
||||||
(media_type, data)
|
(media_type, charset, blob)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn percent_decode(input: String) -> String {
|
pub fn percent_decode(input: String) -> String {
|
||||||
|
|
122
src/utils.rs
122
src/utils.rs
|
@ -33,25 +33,63 @@ const MAGIC: [[&[u8]; 2]; 18] = [
|
||||||
[b"....moov", b"video/quicktime"],
|
[b"....moov", b"video/quicktime"],
|
||||||
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
||||||
];
|
];
|
||||||
const PLAINTEXT_MEDIA_TYPES: &[&str] = &["application/javascript", "image/svg+xml"];
|
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
|
||||||
|
"application/javascript",
|
||||||
|
"application/json",
|
||||||
|
"image/svg+xml",
|
||||||
|
];
|
||||||
|
|
||||||
pub fn detect_media_type(data: &[u8], url: &Url) -> String {
|
pub fn detect_media_type(data: &[u8], url: &Url) -> String {
|
||||||
|
// At first attempt to read file's header
|
||||||
for magic_item in MAGIC.iter() {
|
for magic_item in MAGIC.iter() {
|
||||||
if data.starts_with(magic_item[0]) {
|
if data.starts_with(magic_item[0]) {
|
||||||
return String::from_utf8(magic_item[1].to_vec()).unwrap();
|
return String::from_utf8(magic_item[1].to_vec()).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if url.path().to_lowercase().ends_with(".svg") {
|
// If header didn't match any known magic signatures,
|
||||||
return str!("image/svg+xml");
|
// try to guess media type from file name
|
||||||
|
let parts: Vec<&str> = url.path().split('/').collect();
|
||||||
|
detect_media_type_by_file_name(parts.last().unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
str!()
|
pub fn detect_media_type_by_file_name(filename: &str) -> String {
|
||||||
}
|
let filename_lowercased: &str = &filename.to_lowercase();
|
||||||
|
let parts: Vec<&str> = filename_lowercased.split('.').collect();
|
||||||
|
|
||||||
pub fn is_plaintext_media_type(media_type: &str) -> bool {
|
let mime: &str = match parts.last() {
|
||||||
media_type.to_lowercase().as_str().starts_with("text/")
|
Some(v) => match *v {
|
||||||
|| PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
|
"avi" => "video/avi",
|
||||||
|
"bmp" => "image/bmp",
|
||||||
|
"css" => "text/css",
|
||||||
|
"flac" => "audio/flac",
|
||||||
|
"gif" => "image/gif",
|
||||||
|
"htm" | "html" => "text/html",
|
||||||
|
"ico" => "image/x-icon",
|
||||||
|
"jpeg" | "jpg" => "image/jpeg",
|
||||||
|
"js" => "application/javascript",
|
||||||
|
"json" => "application/json",
|
||||||
|
"mp3" => "audio/mpeg",
|
||||||
|
"mp4" | "m4v" => "video/mp4",
|
||||||
|
"ogg" => "audio/ogg",
|
||||||
|
"ogv" => "video/ogg",
|
||||||
|
"pdf" => "application/pdf",
|
||||||
|
"png" => "image/png",
|
||||||
|
"svg" => "image/svg+xml",
|
||||||
|
"swf" => "application/x-shockwave-flash",
|
||||||
|
"tif" | "tiff" => "image/tiff",
|
||||||
|
"txt" => "text/plain",
|
||||||
|
"wav" => "audio/wav",
|
||||||
|
"webp" => "image/webp",
|
||||||
|
"woff" => "font/woff",
|
||||||
|
"woff2" => "font/woff2",
|
||||||
|
"xml" => "text/xml",
|
||||||
|
&_ => "application/octet-stream",
|
||||||
|
},
|
||||||
|
None => "application/octet-stream",
|
||||||
|
};
|
||||||
|
|
||||||
|
mime.to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn indent(level: u32) -> String {
|
pub fn indent(level: u32) -> String {
|
||||||
|
@ -66,6 +104,38 @@ pub fn indent(level: u32) -> String {
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_plaintext_media_type(media_type: &str) -> bool {
|
||||||
|
media_type.to_lowercase().as_str().starts_with("text/")
|
||||||
|
|| PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_content_type(content_type: &str) -> (String, String, bool) {
|
||||||
|
let mut media_type: String = str!("text/plain");
|
||||||
|
let mut charset: String = str!("US-ASCII");
|
||||||
|
let mut is_base64: bool = false;
|
||||||
|
|
||||||
|
// Parse meta data
|
||||||
|
let content_type_items: Vec<&str> = content_type.split(';').collect();
|
||||||
|
let mut i: i8 = 0;
|
||||||
|
for item in &content_type_items {
|
||||||
|
if i == 0 {
|
||||||
|
if item.trim().len() > 0 {
|
||||||
|
media_type = str!(item.trim());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if item.trim().eq_ignore_ascii_case("base64") {
|
||||||
|
is_base64 = true;
|
||||||
|
} else if item.trim().starts_with("charset=") {
|
||||||
|
charset = item.trim().chars().skip(8).collect();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
(media_type, charset, is_base64)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn retrieve_asset(
|
pub fn retrieve_asset(
|
||||||
cache: &mut HashMap<String, Vec<u8>>,
|
cache: &mut HashMap<String, Vec<u8>>,
|
||||||
client: &Client,
|
client: &Client,
|
||||||
|
@ -73,10 +143,10 @@ pub fn retrieve_asset(
|
||||||
url: &Url,
|
url: &Url,
|
||||||
options: &Options,
|
options: &Options,
|
||||||
depth: u32,
|
depth: u32,
|
||||||
) -> Result<(Vec<u8>, Url, String), reqwest::Error> {
|
) -> Result<(Vec<u8>, Url, String, String), reqwest::Error> {
|
||||||
if url.scheme() == "data" {
|
if url.scheme() == "data" {
|
||||||
let (media_type, data) = parse_data_url(url);
|
let (media_type, charset, data) = parse_data_url(url);
|
||||||
Ok((data, url.clone(), media_type))
|
Ok((data, url.clone(), media_type, charset))
|
||||||
} else if url.scheme() == "file" {
|
} else if url.scheme() == "file" {
|
||||||
// Check if parent_url is also file:/// (if not, then we don't embed the asset)
|
// Check if parent_url is also file:/// (if not, then we don't embed the asset)
|
||||||
if parent_url.scheme() != "file" {
|
if parent_url.scheme() != "file" {
|
||||||
|
@ -123,7 +193,14 @@ pub fn retrieve_asset(
|
||||||
eprintln!("{}{}", indent(depth).as_str(), &url);
|
eprintln!("{}{}", indent(depth).as_str(), &url);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((fs::read(&path).expect(""), url.clone(), str!()))
|
let file_blob: Vec<u8> = fs::read(&path).expect("Unable to read file");
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
file_blob.clone(),
|
||||||
|
url.clone(),
|
||||||
|
detect_media_type(&file_blob, url),
|
||||||
|
str!(),
|
||||||
|
))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if !options.silent {
|
if !options.silent {
|
||||||
|
@ -147,16 +224,19 @@ pub fn retrieve_asset(
|
||||||
let cache_key: String = clean_url(url.clone()).as_str().to_string();
|
let cache_key: String = clean_url(url.clone()).as_str().to_string();
|
||||||
|
|
||||||
if cache.contains_key(&cache_key) {
|
if cache.contains_key(&cache_key) {
|
||||||
// URL is in cache,
|
// URL is in cache, we get and return it
|
||||||
// we get and return it
|
|
||||||
if !options.silent {
|
if !options.silent {
|
||||||
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
|
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((cache.get(&cache_key).unwrap().to_vec(), url.clone(), str!()))
|
Ok((
|
||||||
|
cache.get(&cache_key).unwrap().to_vec(),
|
||||||
|
url.clone(),
|
||||||
|
str!(),
|
||||||
|
str!(),
|
||||||
|
))
|
||||||
} else {
|
} else {
|
||||||
// URL not in cache,
|
// URL not in cache, we retrieve the file
|
||||||
// we retrieve the file
|
|
||||||
match client.get(url.as_str()).send() {
|
match client.get(url.as_str()).send() {
|
||||||
Ok(mut response) => {
|
Ok(mut response) => {
|
||||||
if !options.ignore_errors && response.status() != 200 {
|
if !options.ignore_errors && response.status() != 200 {
|
||||||
|
@ -192,18 +272,20 @@ pub fn retrieve_asset(
|
||||||
let mut data: Vec<u8> = vec![];
|
let mut data: Vec<u8> = vec![];
|
||||||
response.copy_to(&mut data).unwrap();
|
response.copy_to(&mut data).unwrap();
|
||||||
|
|
||||||
// Attempt to obtain media type by reading Content-Type header
|
// Attempt to obtain media type and charset by reading Content-Type header
|
||||||
let media_type: &str = response
|
let content_type: &str = response
|
||||||
.headers()
|
.headers()
|
||||||
.get(CONTENT_TYPE)
|
.get(CONTENT_TYPE)
|
||||||
.and_then(|header| header.to_str().ok())
|
.and_then(|header| header.to_str().ok())
|
||||||
.unwrap_or("");
|
.unwrap_or("");
|
||||||
|
|
||||||
|
let (media_type, charset, _is_base64) = parse_content_type(&content_type);
|
||||||
|
|
||||||
// Add retrieved resource to cache
|
// Add retrieved resource to cache
|
||||||
cache.insert(new_cache_key, data.clone());
|
cache.insert(new_cache_key, data.clone());
|
||||||
|
|
||||||
// Return
|
// Return
|
||||||
Ok((data, response.url().clone(), media_type.to_string()))
|
Ok((data, response.url().clone(), media_type, charset))
|
||||||
}
|
}
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
if !options.silent {
|
if !options.silent {
|
||||||
|
|
Loading…
Reference in a new issue