don't modify base url by default, add option for setting it
This commit is contained in:
parent
5ac520b4da
commit
15d98a7269
@ -54,6 +54,7 @@ The guide can be found [here](docs/containers.md)
|
|||||||
---------------------------------------------------
|
---------------------------------------------------
|
||||||
|
|
||||||
## Options
|
## Options
|
||||||
|
- `-b`: Use custom base URL
|
||||||
- `-c`: Exclude CSS
|
- `-c`: Exclude CSS
|
||||||
- `-e`: Ignore network errors
|
- `-e`: Ignore network errors
|
||||||
- `-f`: Omit frames
|
- `-f`: Omit frames
|
||||||
@ -62,6 +63,7 @@ The guide can be found [here](docs/containers.md)
|
|||||||
- `-I`: Isolate the document
|
- `-I`: Isolate the document
|
||||||
- `-j`: Exclude JavaScript
|
- `-j`: Exclude JavaScript
|
||||||
- `-k`: Accept invalid X.509 (TLS) certificates
|
- `-k`: Accept invalid X.509 (TLS) certificates
|
||||||
|
- `-M`: Don’t add timestamp and source information
|
||||||
- `-o`: Write output to file
|
- `-o`: Write output to file
|
||||||
- `-s`: Be quiet
|
- `-s`: Be quiet
|
||||||
- `-t`: Adjust network request timeout
|
- `-t`: Adjust network request timeout
|
||||||
|
27
docs/arch/0008-base-tag.md
Normal file
27
docs/arch/0008-base-tag.md
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
# 8. Base Tag
|
||||||
|
|
||||||
|
Date: 2020-11-22
|
||||||
|
|
||||||
|
## Status
|
||||||
|
|
||||||
|
Accepted
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
HTML documents may contain `base` tag within `head`, which influences URL resolution prefix for anchor and relative links as well as dynamically loaded resources. Sometimes to make certain saved pages function closer to how they originally operated, the `base` tag specifying the source page's URL may need to be added to the document.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
|
||||||
|
Adding the `base` tag should be optional. Saved documents should not contain the `base` tag unless it was requested by the user, or unless the document originally had the `base` tag in it. Only documents donwloaded from remote resources should be able to obtain a new `base` tag, existing `base` tags within documents saved from data URLs and local resources should be kept intact.
|
||||||
|
The existing `href` attribute's value of the original `base` tag should be used for resolving document's relative links instead of document's own URL.
|
||||||
|
There can be only one such tag. If multiple `base` tags are provided, only the first encountered tag will end up being used.
|
||||||
|
|
||||||
|
## Consequences
|
||||||
|
|
||||||
|
In case the remote document had the `base` tag in it:
|
||||||
|
- By default: the `href` attribute should be resolved to a full URL if it's relative, kept empty in case it was empty or non-existent, all other attributes of that tag should be kept intact.
|
||||||
|
- If `base` tag was requested to be added: the exsting `base` tag's `href` attribute should be set to page's full URL, all other attributes should be kept intact.
|
||||||
|
|
||||||
|
In case the remote document didn't have the `base` tag in it:
|
||||||
|
- By default: no `base` tag is added to the document, it gets saved to disk without having one.
|
||||||
|
- If `base` tag was requested to be added: the added `base` tag should contain only one attribute `href`, equal to the remote URL of that HTML document.
|
555
src/html.rs
555
src/html.rs
@ -6,7 +6,7 @@ use html5ever::rcdom::{Handle, NodeData, RcDom};
|
|||||||
use html5ever::serialize::{serialize, SerializeOpts};
|
use html5ever::serialize::{serialize, SerializeOpts};
|
||||||
use html5ever::tendril::{format_tendril, Tendril, TendrilSink};
|
use html5ever::tendril::{format_tendril, Tendril, TendrilSink};
|
||||||
use html5ever::tree_builder::{Attribute, TreeSink};
|
use html5ever::tree_builder::{Attribute, TreeSink};
|
||||||
use html5ever::{local_name, namespace_url, ns};
|
use html5ever::{local_name, namespace_url, ns, LocalName};
|
||||||
use reqwest::blocking::Client;
|
use reqwest::blocking::Client;
|
||||||
use reqwest::Url;
|
use reqwest::Url;
|
||||||
use sha2::{Digest, Sha256, Sha384, Sha512};
|
use sha2::{Digest, Sha256, Sha384, Sha512};
|
||||||
@ -29,31 +29,6 @@ struct SrcSetItem<'a> {
|
|||||||
|
|
||||||
const ICON_VALUES: &[&str] = &["icon", "shortcut icon"];
|
const ICON_VALUES: &[&str] = &["icon", "shortcut icon"];
|
||||||
|
|
||||||
pub fn add_base_tag(document: &Handle, url: String) -> RcDom {
|
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
|
||||||
serialize(&mut buf, document, SerializeOpts::default())
|
|
||||||
.expect("unable to serialize DOM into buffer");
|
|
||||||
let result = String::from_utf8(buf).unwrap();
|
|
||||||
|
|
||||||
let mut dom = html_to_dom(&result);
|
|
||||||
let doc = dom.get_document();
|
|
||||||
let html = get_child_node_by_name(&doc, "html");
|
|
||||||
let head = get_child_node_by_name(&html, "head");
|
|
||||||
let favicon_node = dom.create_element(
|
|
||||||
QualName::new(None, ns!(), local_name!("base")),
|
|
||||||
vec![Attribute {
|
|
||||||
name: QualName::new(None, ns!(), local_name!("href")),
|
|
||||||
value: format_tendril!("{}", url),
|
|
||||||
}],
|
|
||||||
Default::default(),
|
|
||||||
);
|
|
||||||
|
|
||||||
// Insert BASE tag into HEAD
|
|
||||||
head.children.borrow_mut().push(favicon_node.clone());
|
|
||||||
|
|
||||||
dom
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom {
|
pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom {
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
serialize(&mut buf, document, SerializeOpts::default())
|
serialize(&mut buf, document, SerializeOpts::default())
|
||||||
@ -62,30 +37,49 @@ pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom {
|
|||||||
|
|
||||||
let mut dom = html_to_dom(&result);
|
let mut dom = html_to_dom(&result);
|
||||||
let doc = dom.get_document();
|
let doc = dom.get_document();
|
||||||
let html = get_child_node_by_name(&doc, "html");
|
if let Some(html) = get_child_node_by_name(&doc, "html") {
|
||||||
let head = get_child_node_by_name(&html, "head");
|
if let Some(head) = get_child_node_by_name(&html, "head") {
|
||||||
let favicon_node = dom.create_element(
|
let favicon_node = dom.create_element(
|
||||||
QualName::new(None, ns!(), local_name!("link")),
|
QualName::new(None, ns!(), local_name!("link")),
|
||||||
vec![
|
vec![
|
||||||
Attribute {
|
Attribute {
|
||||||
name: QualName::new(None, ns!(), local_name!("rel")),
|
name: QualName::new(None, ns!(), local_name!("rel")),
|
||||||
value: format_tendril!("icon"),
|
value: format_tendril!("icon"),
|
||||||
},
|
},
|
||||||
Attribute {
|
Attribute {
|
||||||
name: QualName::new(None, ns!(), local_name!("href")),
|
name: QualName::new(None, ns!(), local_name!("href")),
|
||||||
value: format_tendril!("{}", favicon_data_url),
|
value: format_tendril!("{}", favicon_data_url),
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
Default::default(),
|
Default::default(),
|
||||||
);
|
);
|
||||||
|
// Insert favicon LINK tag into HEAD
|
||||||
// Insert favicon LINK tag into HEAD
|
head.children.borrow_mut().push(favicon_node.clone());
|
||||||
head.children.borrow_mut().push(favicon_node.clone());
|
}
|
||||||
|
}
|
||||||
|
|
||||||
dom
|
dom
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn csp(options: &Options) -> String {
|
pub fn check_integrity(data: &[u8], integrity: &str) -> bool {
|
||||||
|
if integrity.starts_with("sha256-") {
|
||||||
|
let mut hasher = Sha256::new();
|
||||||
|
hasher.update(data);
|
||||||
|
base64::encode(hasher.finalize()) == integrity[7..]
|
||||||
|
} else if integrity.starts_with("sha384-") {
|
||||||
|
let mut hasher = Sha384::new();
|
||||||
|
hasher.update(data);
|
||||||
|
base64::encode(hasher.finalize()) == integrity[7..]
|
||||||
|
} else if integrity.starts_with("sha512-") {
|
||||||
|
let mut hasher = Sha512::new();
|
||||||
|
hasher.update(data);
|
||||||
|
base64::encode(hasher.finalize()) == integrity[7..]
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn compose_csp(options: &Options) -> String {
|
||||||
let mut string_list = vec![];
|
let mut string_list = vec![];
|
||||||
|
|
||||||
if options.isolate {
|
if options.isolate {
|
||||||
@ -117,6 +111,42 @@ pub fn csp(options: &Options) -> String {
|
|||||||
string_list.join(" ")
|
string_list.join(" ")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn create_metadata_tag(url: &str) -> String {
|
||||||
|
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||||
|
|
||||||
|
// Safe to unwrap (we just put this through an HTTP request)
|
||||||
|
match Url::parse(url) {
|
||||||
|
Ok(mut clean_url) => {
|
||||||
|
clean_url.set_fragment(None);
|
||||||
|
|
||||||
|
// Prevent credentials from getting into metadata
|
||||||
|
if is_http_url(url) {
|
||||||
|
// Only HTTP(S) URLs may feature credentials
|
||||||
|
clean_url.set_username("").unwrap();
|
||||||
|
clean_url.set_password(None).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
if is_http_url(url) {
|
||||||
|
format!(
|
||||||
|
"<!-- Saved from {} at {} using {} v{} -->",
|
||||||
|
&clean_url,
|
||||||
|
timestamp,
|
||||||
|
env!("CARGO_PKG_NAME"),
|
||||||
|
env!("CARGO_PKG_VERSION"),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
format!(
|
||||||
|
"<!-- Saved from local source at {} using {} v{} -->",
|
||||||
|
timestamp,
|
||||||
|
env!("CARGO_PKG_NAME"),
|
||||||
|
env!("CARGO_PKG_VERSION"),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => str!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn embed_srcset(
|
pub fn embed_srcset(
|
||||||
cache: &mut HashMap<String, Vec<u8>>,
|
cache: &mut HashMap<String, Vec<u8>>,
|
||||||
client: &Client,
|
client: &Client,
|
||||||
@ -188,15 +218,54 @@ pub fn embed_srcset(
|
|||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_child_node_by_name(handle: &Handle, node_name: &str) -> Handle {
|
pub fn find_base_node(node: &Handle) -> Option<Handle> {
|
||||||
let children = handle.children.borrow();
|
match node.data {
|
||||||
|
NodeData::Document => {
|
||||||
|
// Dig deeper
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
if let Some(base_node) = find_base_node(child) {
|
||||||
|
return Some(base_node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NodeData::Element { ref name, .. } => {
|
||||||
|
match name.local.as_ref() {
|
||||||
|
"head" => {
|
||||||
|
return get_child_node_by_name(node, "base");
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dig deeper
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
if let Some(base_node) = find_base_node(child) {
|
||||||
|
return Some(base_node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_base_url(handle: &Handle) -> Option<String> {
|
||||||
|
if let Some(base_node) = find_base_node(handle) {
|
||||||
|
get_node_attr(&base_node, "href")
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_child_node_by_name(parent: &Handle, node_name: &str) -> Option<Handle> {
|
||||||
|
let children = parent.children.borrow();
|
||||||
let matching_children = children.iter().find(|child| match child.data {
|
let matching_children = children.iter().find(|child| match child.data {
|
||||||
NodeData::Element { ref name, .. } => &*name.local == node_name,
|
NodeData::Element { ref name, .. } => &*name.local == node_name,
|
||||||
_ => false,
|
_ => false,
|
||||||
});
|
});
|
||||||
match matching_children {
|
match matching_children {
|
||||||
Some(node) => node.clone(),
|
Some(node) => Some(node.clone()),
|
||||||
_ => handle.clone(),
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -207,79 +276,25 @@ pub fn get_node_name(node: &Handle) -> Option<&'_ str> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_parent_node(node: &Handle) -> Handle {
|
pub fn get_node_attr(node: &Handle, attr_name: &str) -> Option<String> {
|
||||||
let parent = node.parent.take().clone();
|
match &node.data {
|
||||||
|
NodeData::Element { ref attrs, .. } => {
|
||||||
|
for attr in attrs.borrow().iter() {
|
||||||
|
if &*attr.name.local == attr_name {
|
||||||
|
return Some(str!(&*attr.value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_parent_node(child: &Handle) -> Handle {
|
||||||
|
let parent = child.parent.take().clone();
|
||||||
parent.and_then(|node| node.upgrade()).unwrap()
|
parent.and_then(|node| node.upgrade()).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn has_proper_integrity(data: &[u8], integrity: &str) -> bool {
|
|
||||||
if integrity.starts_with("sha256-") {
|
|
||||||
let mut hasher = Sha256::new();
|
|
||||||
hasher.update(data);
|
|
||||||
base64::encode(hasher.finalize()) == integrity[7..]
|
|
||||||
} else if integrity.starts_with("sha384-") {
|
|
||||||
let mut hasher = Sha384::new();
|
|
||||||
hasher.update(data);
|
|
||||||
base64::encode(hasher.finalize()) == integrity[7..]
|
|
||||||
} else if integrity.starts_with("sha512-") {
|
|
||||||
let mut hasher = Sha512::new();
|
|
||||||
hasher.update(data);
|
|
||||||
base64::encode(hasher.finalize()) == integrity[7..]
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn has_base_tag(handle: &Handle) -> bool {
|
|
||||||
let mut found_base_tag: bool = false;
|
|
||||||
|
|
||||||
match handle.data {
|
|
||||||
NodeData::Document => {
|
|
||||||
// Dig deeper
|
|
||||||
for child in handle.children.borrow().iter() {
|
|
||||||
if has_base_tag(child) {
|
|
||||||
found_base_tag = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
NodeData::Element {
|
|
||||||
ref name,
|
|
||||||
ref attrs,
|
|
||||||
..
|
|
||||||
} => {
|
|
||||||
match name.local.as_ref() {
|
|
||||||
"base" => {
|
|
||||||
let attrs_mut = &mut attrs.borrow_mut();
|
|
||||||
|
|
||||||
for attr in attrs_mut.iter_mut() {
|
|
||||||
if &attr.name.local == "href" {
|
|
||||||
if !attr.value.trim().is_empty() {
|
|
||||||
found_base_tag = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
|
|
||||||
if !found_base_tag {
|
|
||||||
// Dig deeper
|
|
||||||
for child in handle.children.borrow().iter() {
|
|
||||||
if has_base_tag(child) {
|
|
||||||
found_base_tag = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
|
|
||||||
found_base_tag
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn has_favicon(handle: &Handle) -> bool {
|
pub fn has_favicon(handle: &Handle) -> bool {
|
||||||
let mut found_favicon: bool = false;
|
let mut found_favicon: bool = false;
|
||||||
|
|
||||||
@ -293,21 +308,12 @@ pub fn has_favicon(handle: &Handle) -> bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
NodeData::Element {
|
NodeData::Element { ref name, .. } => {
|
||||||
ref name,
|
|
||||||
ref attrs,
|
|
||||||
..
|
|
||||||
} => {
|
|
||||||
match name.local.as_ref() {
|
match name.local.as_ref() {
|
||||||
"link" => {
|
"link" => {
|
||||||
let attrs_mut = &mut attrs.borrow_mut();
|
if let Some(attr_value) = get_node_attr(handle, "rel") {
|
||||||
|
if is_icon(attr_value.trim()) {
|
||||||
for attr in attrs_mut.iter_mut() {
|
found_favicon = true;
|
||||||
if &attr.name.local == "rel" {
|
|
||||||
if is_icon(attr.value.trim()) {
|
|
||||||
found_favicon = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -341,46 +347,82 @@ pub fn is_icon(attr_value: &str) -> bool {
|
|||||||
ICON_VALUES.contains(&attr_value.to_lowercase().as_str())
|
ICON_VALUES.contains(&attr_value.to_lowercase().as_str())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn metadata_tag(url: &str) -> String {
|
pub fn set_base_url(document: &Handle, desired_base_href: String) -> RcDom {
|
||||||
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, document, SerializeOpts::default())
|
||||||
|
.expect("unable to serialize DOM into buffer");
|
||||||
|
let result = String::from_utf8(buf).unwrap();
|
||||||
|
|
||||||
// Safe to unwrap (we just put this through an HTTP request)
|
let mut dom = html_to_dom(&result);
|
||||||
match Url::parse(url) {
|
let doc = dom.get_document();
|
||||||
Ok(mut clean_url) => {
|
if let Some(html_node) = get_child_node_by_name(&doc, "html") {
|
||||||
clean_url.set_fragment(None);
|
if let Some(head_node) = get_child_node_by_name(&html_node, "head") {
|
||||||
|
// Check if BASE node already exists in the DOM tree
|
||||||
// Prevent credentials from getting into metadata
|
if let Some(base_node) = get_child_node_by_name(&head_node, "base") {
|
||||||
if is_http_url(url) {
|
set_node_attr(&base_node, "href", Some(desired_base_href));
|
||||||
// Only HTTP(S) URLs may feature credentials
|
|
||||||
clean_url.set_username("").unwrap();
|
|
||||||
clean_url.set_password(None).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
if is_http_url(url) {
|
|
||||||
format!(
|
|
||||||
"<!-- Saved from {} at {} using {} v{} -->",
|
|
||||||
&clean_url,
|
|
||||||
timestamp,
|
|
||||||
env!("CARGO_PKG_NAME"),
|
|
||||||
env!("CARGO_PKG_VERSION"),
|
|
||||||
)
|
|
||||||
} else {
|
} else {
|
||||||
format!(
|
let base_node = dom.create_element(
|
||||||
"<!-- Saved from local source at {} using {} v{} -->",
|
QualName::new(None, ns!(), local_name!("base")),
|
||||||
timestamp,
|
vec![Attribute {
|
||||||
env!("CARGO_PKG_NAME"),
|
name: QualName::new(None, ns!(), local_name!("href")),
|
||||||
env!("CARGO_PKG_VERSION"),
|
value: format_tendril!("{}", desired_base_href),
|
||||||
)
|
}],
|
||||||
|
Default::default(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Insert newly created BASE node into HEAD
|
||||||
|
head_node.children.borrow_mut().push(base_node.clone());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(_) => str!(),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dom
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_node_attr(node: &Handle, attr_name: &str, attr_value: Option<String>) {
|
||||||
|
match &node.data {
|
||||||
|
NodeData::Element { ref attrs, .. } => {
|
||||||
|
let attrs_mut = &mut attrs.borrow_mut();
|
||||||
|
let mut i = 0;
|
||||||
|
let mut found_existing_attr: bool = false;
|
||||||
|
|
||||||
|
while i < attrs_mut.len() {
|
||||||
|
if &attrs_mut[i].name.local == attr_name {
|
||||||
|
found_existing_attr = true;
|
||||||
|
|
||||||
|
if let Some(attr_value) = attr_value.clone() {
|
||||||
|
&attrs_mut[i].value.clear();
|
||||||
|
&attrs_mut[i].value.push_slice(&attr_value.as_str());
|
||||||
|
} else {
|
||||||
|
// Remove attr completely if attr_value is not defined
|
||||||
|
attrs_mut.remove(i);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !found_existing_attr {
|
||||||
|
// Add new attribute (since originally the target node didn't have it)
|
||||||
|
if let Some(attr_value) = attr_value.clone() {
|
||||||
|
let name = LocalName::from(attr_name);
|
||||||
|
|
||||||
|
attrs_mut.push(Attribute {
|
||||||
|
name: QualName::new(None, ns!(), name),
|
||||||
|
value: format_tendril!("{}", attr_value),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn stringify_document(handle: &Handle, options: &Options) -> String {
|
pub fn stringify_document(handle: &Handle, options: &Options) -> String {
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
serialize(&mut buf, handle, SerializeOpts::default())
|
serialize(&mut buf, handle, SerializeOpts::default())
|
||||||
.expect("unable to serialize DOM into buffer");
|
.expect("Unable to serialize DOM into buffer");
|
||||||
|
|
||||||
let mut result = String::from_utf8(buf).unwrap();
|
let mut result = String::from_utf8(buf).unwrap();
|
||||||
|
|
||||||
@ -398,33 +440,33 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String {
|
|||||||
let mut buf: Vec<u8> = Vec::new();
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
let mut dom = html_to_dom(&result);
|
let mut dom = html_to_dom(&result);
|
||||||
let doc = dom.get_document();
|
let doc = dom.get_document();
|
||||||
let html = get_child_node_by_name(&doc, "html");
|
if let Some(html) = get_child_node_by_name(&doc, "html") {
|
||||||
let head = get_child_node_by_name(&html, "head");
|
if let Some(head) = get_child_node_by_name(&html, "head") {
|
||||||
let csp_content: String = csp(options);
|
let meta = dom.create_element(
|
||||||
|
QualName::new(None, ns!(), local_name!("meta")),
|
||||||
let meta = dom.create_element(
|
vec![
|
||||||
QualName::new(None, ns!(), local_name!("meta")),
|
Attribute {
|
||||||
vec![
|
name: QualName::new(None, ns!(), local_name!("http-equiv")),
|
||||||
Attribute {
|
value: format_tendril!("Content-Security-Policy"),
|
||||||
name: QualName::new(None, ns!(), local_name!("http-equiv")),
|
},
|
||||||
value: format_tendril!("Content-Security-Policy"),
|
Attribute {
|
||||||
},
|
name: QualName::new(None, ns!(), local_name!("content")),
|
||||||
Attribute {
|
value: format_tendril!("{}", compose_csp(options)),
|
||||||
name: QualName::new(None, ns!(), local_name!("content")),
|
},
|
||||||
value: format_tendril!("{}", csp_content),
|
],
|
||||||
},
|
Default::default(),
|
||||||
],
|
);
|
||||||
Default::default(),
|
// Note: the CSP meta-tag has to be prepended, never appended,
|
||||||
);
|
// since there already may be one defined in the original document,
|
||||||
// Note: the CSP meta-tag has to be prepended, never appended,
|
// and browsers don't allow re-defining them (for obvious reasons)
|
||||||
// since there already may be one defined in the document,
|
head.children.borrow_mut().reverse();
|
||||||
// and browsers don't allow re-defining them (for obvious reasons)
|
head.children.borrow_mut().push(meta.clone());
|
||||||
head.children.borrow_mut().reverse();
|
head.children.borrow_mut().reverse();
|
||||||
head.children.borrow_mut().push(meta.clone());
|
}
|
||||||
head.children.borrow_mut().reverse();
|
}
|
||||||
|
|
||||||
serialize(&mut buf, &doc, SerializeOpts::default())
|
serialize(&mut buf, &doc, SerializeOpts::default())
|
||||||
.expect("unable to serialize DOM into buffer");
|
.expect("Unable to serialize DOM into buffer");
|
||||||
result = String::from_utf8(buf).unwrap();
|
result = String::from_utf8(buf).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -549,7 +591,7 @@ pub fn walk_and_embed_assets(
|
|||||||
)) => {
|
)) => {
|
||||||
// Check integrity
|
// Check integrity
|
||||||
if integrity.is_empty()
|
if integrity.is_empty()
|
||||||
|| has_proper_integrity(&link_href_data, &integrity)
|
|| check_integrity(&link_href_data, &integrity)
|
||||||
{
|
{
|
||||||
let link_href_data_url = data_to_data_url(
|
let link_href_data_url = data_to_data_url(
|
||||||
&link_href_media_type,
|
&link_href_media_type,
|
||||||
@ -622,7 +664,7 @@ pub fn walk_and_embed_assets(
|
|||||||
)) => {
|
)) => {
|
||||||
// Check integrity
|
// Check integrity
|
||||||
if integrity.is_empty()
|
if integrity.is_empty()
|
||||||
|| has_proper_integrity(&link_href_data, &integrity)
|
|| check_integrity(&link_href_data, &integrity)
|
||||||
{
|
{
|
||||||
let css: String = embed_css(
|
let css: String = embed_css(
|
||||||
cache,
|
cache,
|
||||||
@ -690,7 +732,7 @@ pub fn walk_and_embed_assets(
|
|||||||
}
|
}
|
||||||
"base" => {
|
"base" => {
|
||||||
if is_http_url(url) {
|
if is_http_url(url) {
|
||||||
// Ensure BASE href is a full URL, not a relative one
|
// Ensure the BASE node doesn't have a relative URL
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
let attr_name: &str = &attr.name.local;
|
let attr_name: &str = &attr.name.local;
|
||||||
if attr_name.eq_ignore_ascii_case("href") {
|
if attr_name.eq_ignore_ascii_case("href") {
|
||||||
@ -858,74 +900,54 @@ pub fn walk_and_embed_assets(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
"input" => {
|
"input" => {
|
||||||
// Determine input type
|
if let Some(attr_value) = get_node_attr(node, "type") {
|
||||||
let mut is_image_input: bool = false;
|
if attr_value.to_string().eq_ignore_ascii_case("image") {
|
||||||
for attr in attrs_mut.iter_mut() {
|
let mut input_image_src: String = str!();
|
||||||
let attr_name: &str = &attr.name.local;
|
let mut i = 0;
|
||||||
if attr_name.eq_ignore_ascii_case("type") {
|
while i < attrs_mut.len() {
|
||||||
is_image_input = attr.value.to_string().eq_ignore_ascii_case("image");
|
let attr_name: &str = &attrs_mut[i].name.local;
|
||||||
}
|
if attr_name.eq_ignore_ascii_case("src") {
|
||||||
}
|
input_image_src = str!(attrs_mut.remove(i).value.trim());
|
||||||
|
|
||||||
if is_image_input {
|
|
||||||
let mut input_image_src: String = str!();
|
|
||||||
let mut i = 0;
|
|
||||||
while i < attrs_mut.len() {
|
|
||||||
let attr_name: &str = &attrs_mut[i].name.local;
|
|
||||||
if attr_name.eq_ignore_ascii_case("src") {
|
|
||||||
input_image_src = str!(attrs_mut.remove(i).value.trim());
|
|
||||||
} else {
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if options.no_images || input_image_src.is_empty() {
|
|
||||||
attrs_mut.push(Attribute {
|
|
||||||
name: QualName::new(None, ns!(), local_name!("src")),
|
|
||||||
value: Tendril::from_slice(if input_image_src.is_empty() {
|
|
||||||
""
|
|
||||||
} else {
|
} else {
|
||||||
empty_image!()
|
i += 1;
|
||||||
}),
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
let input_image_full_url =
|
|
||||||
resolve_url(&url, input_image_src).unwrap_or_default();
|
|
||||||
let input_image_url_fragment =
|
|
||||||
get_url_fragment(input_image_full_url.clone());
|
|
||||||
match retrieve_asset(
|
|
||||||
cache,
|
|
||||||
client,
|
|
||||||
&url,
|
|
||||||
&input_image_full_url,
|
|
||||||
options,
|
|
||||||
depth + 1,
|
|
||||||
) {
|
|
||||||
Ok((
|
|
||||||
input_image_data,
|
|
||||||
input_image_final_url,
|
|
||||||
input_image_media_type,
|
|
||||||
)) => {
|
|
||||||
let input_image_data_url = data_to_data_url(
|
|
||||||
&input_image_media_type,
|
|
||||||
&input_image_data,
|
|
||||||
&input_image_final_url,
|
|
||||||
);
|
|
||||||
// Add data URL src attribute
|
|
||||||
let assembled_url: String = url_with_fragment(
|
|
||||||
input_image_data_url.as_str(),
|
|
||||||
input_image_url_fragment.as_str(),
|
|
||||||
);
|
|
||||||
attrs_mut.push(Attribute {
|
|
||||||
name: QualName::new(None, ns!(), local_name!("src")),
|
|
||||||
value: Tendril::from_slice(assembled_url.as_ref()),
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
Err(_) => {
|
}
|
||||||
// Keep remote reference if unable to retrieve the asset
|
|
||||||
if is_http_url(input_image_full_url.clone()) {
|
if options.no_images || input_image_src.is_empty() {
|
||||||
|
attrs_mut.push(Attribute {
|
||||||
|
name: QualName::new(None, ns!(), local_name!("src")),
|
||||||
|
value: Tendril::from_slice(if input_image_src.is_empty() {
|
||||||
|
""
|
||||||
|
} else {
|
||||||
|
empty_image!()
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
let input_image_full_url =
|
||||||
|
resolve_url(&url, input_image_src).unwrap_or_default();
|
||||||
|
let input_image_url_fragment =
|
||||||
|
get_url_fragment(input_image_full_url.clone());
|
||||||
|
match retrieve_asset(
|
||||||
|
cache,
|
||||||
|
client,
|
||||||
|
&url,
|
||||||
|
&input_image_full_url,
|
||||||
|
options,
|
||||||
|
depth + 1,
|
||||||
|
) {
|
||||||
|
Ok((
|
||||||
|
input_image_data,
|
||||||
|
input_image_final_url,
|
||||||
|
input_image_media_type,
|
||||||
|
)) => {
|
||||||
|
let input_image_data_url = data_to_data_url(
|
||||||
|
&input_image_media_type,
|
||||||
|
&input_image_data,
|
||||||
|
&input_image_final_url,
|
||||||
|
);
|
||||||
|
// Add data URL src attribute
|
||||||
let assembled_url: String = url_with_fragment(
|
let assembled_url: String = url_with_fragment(
|
||||||
input_image_full_url.as_str(),
|
input_image_data_url.as_str(),
|
||||||
input_image_url_fragment.as_str(),
|
input_image_url_fragment.as_str(),
|
||||||
);
|
);
|
||||||
attrs_mut.push(Attribute {
|
attrs_mut.push(Attribute {
|
||||||
@ -933,6 +955,23 @@ pub fn walk_and_embed_assets(
|
|||||||
value: Tendril::from_slice(assembled_url.as_ref()),
|
value: Tendril::from_slice(assembled_url.as_ref()),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
Err(_) => {
|
||||||
|
// Keep remote reference if unable to retrieve the asset
|
||||||
|
if is_http_url(input_image_full_url.clone()) {
|
||||||
|
let assembled_url: String = url_with_fragment(
|
||||||
|
input_image_full_url.as_str(),
|
||||||
|
input_image_url_fragment.as_str(),
|
||||||
|
);
|
||||||
|
attrs_mut.push(Attribute {
|
||||||
|
name: QualName::new(
|
||||||
|
None,
|
||||||
|
ns!(),
|
||||||
|
local_name!("src"),
|
||||||
|
),
|
||||||
|
value: Tendril::from_slice(assembled_url.as_ref()),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1066,7 +1105,7 @@ pub fn walk_and_embed_assets(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't touch email links or hrefs which begin with a hash sign
|
// Don't touch email links or hrefs which begin with a hash
|
||||||
if attr_value.starts_with('#') || url_has_protocol(attr_value) {
|
if attr_value.starts_with('#') || url_has_protocol(attr_value) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -1109,7 +1148,7 @@ pub fn walk_and_embed_assets(
|
|||||||
Ok((script_data, script_final_url, _script_media_type)) => {
|
Ok((script_data, script_final_url, _script_media_type)) => {
|
||||||
// Only embed if we're able to validate integrity
|
// Only embed if we're able to validate integrity
|
||||||
if script_integrity.is_empty()
|
if script_integrity.is_empty()
|
||||||
|| has_proper_integrity(&script_data, &script_integrity)
|
|| check_integrity(&script_data, &script_integrity)
|
||||||
{
|
{
|
||||||
let script_data_url = data_to_data_url(
|
let script_data_url = data_to_data_url(
|
||||||
"application/javascript",
|
"application/javascript",
|
||||||
|
52
src/main.rs
52
src/main.rs
@ -9,12 +9,12 @@ use std::process;
|
|||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use monolith::html::{
|
use monolith::html::{
|
||||||
add_base_tag, add_favicon, has_base_tag, has_favicon, html_to_dom, metadata_tag,
|
add_favicon, create_metadata_tag, get_base_url, has_favicon, html_to_dom, set_base_url,
|
||||||
stringify_document, walk_and_embed_assets,
|
stringify_document, walk_and_embed_assets,
|
||||||
};
|
};
|
||||||
use monolith::opts::Options;
|
use monolith::opts::Options;
|
||||||
use monolith::url::{
|
use monolith::url::{
|
||||||
data_to_data_url, data_url_to_data, is_data_url, is_file_url, is_http_url, resolve_url,
|
data_to_data_url, is_data_url, is_file_url, is_http_url, parse_data_url, resolve_url,
|
||||||
};
|
};
|
||||||
use monolith::utils::retrieve_asset;
|
use monolith::utils::retrieve_asset;
|
||||||
|
|
||||||
@ -52,7 +52,7 @@ fn main() {
|
|||||||
let options = Options::from_args();
|
let options = Options::from_args();
|
||||||
let original_target: &str = &options.target;
|
let original_target: &str = &options.target;
|
||||||
let target_url: &str;
|
let target_url: &str;
|
||||||
let base_url;
|
let mut base_url: String;
|
||||||
let mut dom;
|
let mut dom;
|
||||||
|
|
||||||
// Pre-process the input
|
// Pre-process the input
|
||||||
@ -64,7 +64,9 @@ fn main() {
|
|||||||
|
|
||||||
// Determine exact target URL
|
// Determine exact target URL
|
||||||
if target.clone().len() == 0 {
|
if target.clone().len() == 0 {
|
||||||
eprintln!("No target specified");
|
if !options.silent {
|
||||||
|
eprintln!("No target specified");
|
||||||
|
}
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
} else if is_http_url(target.clone()) || is_data_url(target.clone()) {
|
} else if is_http_url(target.clone()) || is_data_url(target.clone()) {
|
||||||
target_url = target.as_str();
|
target_url = target.as_str();
|
||||||
@ -72,7 +74,9 @@ fn main() {
|
|||||||
target_url = target.as_str();
|
target_url = target.as_str();
|
||||||
} else if path.exists() {
|
} else if path.exists() {
|
||||||
if !path.is_file() {
|
if !path.is_file() {
|
||||||
eprintln!("Local target is not a file: {}", original_target);
|
if !options.silent {
|
||||||
|
eprintln!("Local target is not a file: {}", original_target);
|
||||||
|
}
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" });
|
target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" });
|
||||||
@ -111,11 +115,16 @@ fn main() {
|
|||||||
.build()
|
.build()
|
||||||
.expect("Failed to initialize HTTP client");
|
.expect("Failed to initialize HTTP client");
|
||||||
|
|
||||||
|
// At this stage we assume that the base URL is the same as the target URL
|
||||||
|
base_url = str!(target_url);
|
||||||
|
|
||||||
// Retrieve target document
|
// Retrieve target document
|
||||||
if is_file_url(target_url) || is_http_url(target_url) {
|
if is_file_url(target_url) || is_http_url(target_url) {
|
||||||
match retrieve_asset(&mut cache, &client, target_url, target_url, &options, 0) {
|
match retrieve_asset(&mut cache, &client, target_url, target_url, &options, 0) {
|
||||||
Ok((data, final_url, _media_type)) => {
|
Ok((data, final_url, _media_type)) => {
|
||||||
base_url = final_url;
|
if options.base_url.clone().unwrap_or(str!()).is_empty() {
|
||||||
|
base_url = final_url
|
||||||
|
}
|
||||||
dom = html_to_dom(&String::from_utf8_lossy(&data));
|
dom = html_to_dom(&String::from_utf8_lossy(&data));
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
@ -126,23 +135,40 @@ fn main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if is_data_url(target_url) {
|
} else if is_data_url(target_url) {
|
||||||
let (media_type, data): (String, Vec<u8>) = data_url_to_data(target_url);
|
let (media_type, data): (String, Vec<u8>) = parse_data_url(target_url);
|
||||||
if !media_type.eq_ignore_ascii_case("text/html") {
|
if !media_type.eq_ignore_ascii_case("text/html") {
|
||||||
eprintln!("Unsupported data URL media type");
|
if !options.silent {
|
||||||
|
eprintln!("Unsupported data URL media type");
|
||||||
|
}
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
base_url = str!(target_url);
|
|
||||||
dom = html_to_dom(&String::from_utf8_lossy(&data));
|
dom = html_to_dom(&String::from_utf8_lossy(&data));
|
||||||
} else {
|
} else {
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Use custom base URL if specified, read and use what's in the DOM otherwise
|
||||||
|
if !options.base_url.clone().unwrap_or(str!()).is_empty() {
|
||||||
|
if is_data_url(options.base_url.clone().unwrap()) {
|
||||||
|
if !options.silent {
|
||||||
|
eprintln!("Data URLs cannot be used as base URL");
|
||||||
|
}
|
||||||
|
process::exit(1);
|
||||||
|
} else {
|
||||||
|
base_url = options.base_url.clone().unwrap();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if let Some(existing_base_url) = get_base_url(&dom.document) {
|
||||||
|
base_url = resolve_url(target_url, existing_base_url).unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Embed remote assets
|
// Embed remote assets
|
||||||
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
|
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
|
||||||
|
|
||||||
// Take care of BASE tag
|
// Update or add new BASE tag to reroute network requests and hash-links in the final document
|
||||||
if is_http_url(base_url.clone()) && !has_base_tag(&dom.document) {
|
if let Some(new_base_url) = options.base_url.clone() {
|
||||||
dom = add_base_tag(&dom.document, base_url.clone());
|
dom = set_base_url(&dom.document, new_base_url);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Request and embed /favicon.ico (unless it's already linked in the document)
|
// Request and embed /favicon.ico (unless it's already linked in the document)
|
||||||
@ -172,7 +198,7 @@ fn main() {
|
|||||||
|
|
||||||
// Add metadata tag
|
// Add metadata tag
|
||||||
if !options.no_metadata {
|
if !options.no_metadata {
|
||||||
let metadata_comment: String = metadata_tag(&base_url);
|
let metadata_comment: String = create_metadata_tag(&base_url);
|
||||||
result.insert_str(0, &metadata_comment);
|
result.insert_str(0, &metadata_comment);
|
||||||
if metadata_comment.len() > 0 {
|
if metadata_comment.len() > 0 {
|
||||||
result.insert_str(metadata_comment.len(), "\n");
|
result.insert_str(metadata_comment.len(), "\n");
|
||||||
|
29
src/opts.rs
29
src/opts.rs
@ -2,20 +2,21 @@ use clap::{App, Arg};
|
|||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct Options {
|
pub struct Options {
|
||||||
pub target: String,
|
pub base_url: Option<String>,
|
||||||
pub no_css: bool,
|
pub no_css: bool,
|
||||||
pub ignore_errors: bool,
|
pub ignore_errors: bool,
|
||||||
pub no_fonts: bool,
|
|
||||||
pub no_frames: bool,
|
pub no_frames: bool,
|
||||||
|
pub no_fonts: bool,
|
||||||
pub no_images: bool,
|
pub no_images: bool,
|
||||||
|
pub isolate: bool,
|
||||||
pub no_js: bool,
|
pub no_js: bool,
|
||||||
pub insecure: bool,
|
pub insecure: bool,
|
||||||
pub isolate: bool,
|
pub no_metadata: bool,
|
||||||
pub output: String,
|
pub output: String,
|
||||||
pub silent: bool,
|
pub silent: bool,
|
||||||
pub timeout: u64,
|
pub timeout: u64,
|
||||||
pub user_agent: String,
|
pub user_agent: String,
|
||||||
pub no_metadata: bool,
|
pub target: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
const ASCII: &str = " \
|
const ASCII: &str = " \
|
||||||
@ -37,14 +38,8 @@ impl Options {
|
|||||||
.version(crate_version!())
|
.version(crate_version!())
|
||||||
.author(crate_authors!("\n"))
|
.author(crate_authors!("\n"))
|
||||||
.about(format!("{}\n{}", ASCII, crate_description!()).as_str())
|
.about(format!("{}\n{}", ASCII, crate_description!()).as_str())
|
||||||
.arg(
|
|
||||||
Arg::with_name("target")
|
|
||||||
.required(true)
|
|
||||||
.takes_value(true)
|
|
||||||
.index(1)
|
|
||||||
.help("URL or file path"),
|
|
||||||
)
|
|
||||||
// .args_from_usage("-a, --no-audio 'Removes audio sources'")
|
// .args_from_usage("-a, --no-audio 'Removes audio sources'")
|
||||||
|
.args_from_usage("-b, --base-url=[http://localhost/] 'Use custom base URL'")
|
||||||
.args_from_usage("-c, --no-css 'Removes CSS'")
|
.args_from_usage("-c, --no-css 'Removes CSS'")
|
||||||
.args_from_usage("-e, --ignore-errors 'Ignore network errors'")
|
.args_from_usage("-e, --ignore-errors 'Ignore network errors'")
|
||||||
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
|
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
|
||||||
@ -53,12 +48,19 @@ impl Options {
|
|||||||
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
|
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
|
||||||
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
||||||
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
|
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
|
||||||
.args_from_usage("-M, --no-metadata 'Excludes metadata information from the document'")
|
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
|
||||||
.args_from_usage("-o, --output=[document.html] 'Write output to <file>'")
|
.args_from_usage("-o, --output=[document.html] 'Write output to <file>'")
|
||||||
.args_from_usage("-s, --silent 'Suppresses verbosity'")
|
.args_from_usage("-s, --silent 'Suppresses verbosity'")
|
||||||
.args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'")
|
.args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'")
|
||||||
.args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'")
|
.args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'")
|
||||||
// .args_from_usage("-v, --no-video 'Removes video sources'")
|
// .args_from_usage("-v, --no-video 'Removes video sources'")
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("target")
|
||||||
|
.required(true)
|
||||||
|
.takes_value(true)
|
||||||
|
.index(1)
|
||||||
|
.help("URL or file path"),
|
||||||
|
)
|
||||||
.get_matches();
|
.get_matches();
|
||||||
let mut options: Options = Options::default();
|
let mut options: Options = Options::default();
|
||||||
|
|
||||||
@ -67,6 +69,9 @@ impl Options {
|
|||||||
.value_of("target")
|
.value_of("target")
|
||||||
.expect("please set target")
|
.expect("please set target")
|
||||||
.to_string();
|
.to_string();
|
||||||
|
if let Some(base_url) = app.value_of("base-url") {
|
||||||
|
options.base_url = Some(str!(base_url));
|
||||||
|
}
|
||||||
options.no_css = app.is_present("no-css");
|
options.no_css = app.is_present("no-css");
|
||||||
options.ignore_errors = app.is_present("ignore-errors");
|
options.ignore_errors = app.is_present("ignore-errors");
|
||||||
options.no_frames = app.is_present("no-frames");
|
options.no_frames = app.is_present("no-frames");
|
||||||
|
123
src/tests/cli/base_url.rs
Normal file
123
src/tests/cli/base_url.rs
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use assert_cmd::prelude::*;
|
||||||
|
use std::env;
|
||||||
|
use std::process::Command;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn add_new_when_provided() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-b")
|
||||||
|
.arg("http://localhost:8000/")
|
||||||
|
.arg("data:text/html,Hello%2C%20World!")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDOUT should contain newly added base URL
|
||||||
|
assert_eq!(
|
||||||
|
std::str::from_utf8(&out.stdout).unwrap(),
|
||||||
|
"<html><head>\
|
||||||
|
<base href=\"http://localhost:8000/\"></base>\
|
||||||
|
</head><body>Hello, World!</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||||
|
|
||||||
|
// The exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn keep_existing_when_none_provided() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDOUT should contain newly added base URL
|
||||||
|
assert_eq!(
|
||||||
|
std::str::from_utf8(&out.stdout).unwrap(),
|
||||||
|
"<html><head>\
|
||||||
|
<base href=\"http://localhost:8000/\">\
|
||||||
|
</head><body>Hello, World!</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||||
|
|
||||||
|
// The exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn override_existing_when_provided() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-b")
|
||||||
|
.arg("http://localhost/")
|
||||||
|
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDOUT should contain newly added base URL
|
||||||
|
assert_eq!(
|
||||||
|
std::str::from_utf8(&out.stdout).unwrap(),
|
||||||
|
"<html><head>\
|
||||||
|
<base href=\"http://localhost/\">\
|
||||||
|
</head><body>Hello, World!</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||||
|
|
||||||
|
// The exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn remove_existing_when_empty_provided() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||||
|
let out = cmd
|
||||||
|
.arg("-M")
|
||||||
|
.arg("-b")
|
||||||
|
.arg("")
|
||||||
|
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDOUT should contain newly added base URL
|
||||||
|
assert_eq!(
|
||||||
|
std::str::from_utf8(&out.stdout).unwrap(),
|
||||||
|
"<html><head>\
|
||||||
|
<base href=\"\">\
|
||||||
|
</head><body>Hello, World!</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||||
|
|
||||||
|
// The exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
2
src/tests/cli/mod.rs
Normal file
2
src/tests/cli/mod.rs
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
mod base_url;
|
||||||
|
mod basic;
|
@ -11,7 +11,7 @@ mod passing {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn empty_input_sha256() {
|
fn empty_input_sha256() {
|
||||||
assert!(html::has_proper_integrity(
|
assert!(html::check_integrity(
|
||||||
"".as_bytes(),
|
"".as_bytes(),
|
||||||
"sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="
|
"sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="
|
||||||
));
|
));
|
||||||
@ -19,7 +19,7 @@ mod passing {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn sha256() {
|
fn sha256() {
|
||||||
assert!(html::has_proper_integrity(
|
assert!(html::check_integrity(
|
||||||
"abcdef0123456789".as_bytes(),
|
"abcdef0123456789".as_bytes(),
|
||||||
"sha256-9EWAHgy4mSYsm54hmDaIDXPKLRsLnBX7lZyQ6xISNOM="
|
"sha256-9EWAHgy4mSYsm54hmDaIDXPKLRsLnBX7lZyQ6xISNOM="
|
||||||
));
|
));
|
||||||
@ -27,7 +27,7 @@ mod passing {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn sha384() {
|
fn sha384() {
|
||||||
assert!(html::has_proper_integrity(
|
assert!(html::check_integrity(
|
||||||
"abcdef0123456789".as_bytes(),
|
"abcdef0123456789".as_bytes(),
|
||||||
"sha384-gc9l7omltke8C33bedgh15E12M7RrAQa5t63Yb8APlpe7ZhiqV23+oqiulSJl3Kw"
|
"sha384-gc9l7omltke8C33bedgh15E12M7RrAQa5t63Yb8APlpe7ZhiqV23+oqiulSJl3Kw"
|
||||||
));
|
));
|
||||||
@ -35,7 +35,7 @@ mod passing {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn sha512() {
|
fn sha512() {
|
||||||
assert!(html::has_proper_integrity(
|
assert!(html::check_integrity(
|
||||||
"abcdef0123456789".as_bytes(),
|
"abcdef0123456789".as_bytes(),
|
||||||
"sha512-zG5B88cYMqcdiMi9gz0XkOFYw2BpjeYdn5V6+oFrMgSNjRpqL7EF8JEwl17ztZbK3N7I/tTwp3kxQbN1RgFBww=="
|
"sha512-zG5B88cYMqcdiMi9gz0XkOFYw2BpjeYdn5V6+oFrMgSNjRpqL7EF8JEwl17ztZbK3N7I/tTwp3kxQbN1RgFBww=="
|
||||||
));
|
));
|
||||||
@ -55,20 +55,17 @@ mod failing {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn empty_hash() {
|
fn empty_hash() {
|
||||||
assert!(!html::has_proper_integrity(
|
assert!(!html::check_integrity("abcdef0123456789".as_bytes(), ""));
|
||||||
"abcdef0123456789".as_bytes(),
|
|
||||||
""
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn empty_input_empty_hash() {
|
fn empty_input_empty_hash() {
|
||||||
assert!(!html::has_proper_integrity("".as_bytes(), ""));
|
assert!(!html::check_integrity("".as_bytes(), ""));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn sha256() {
|
fn sha256() {
|
||||||
assert!(!html::has_proper_integrity(
|
assert!(!html::check_integrity(
|
||||||
"abcdef0123456789".as_bytes(),
|
"abcdef0123456789".as_bytes(),
|
||||||
"sha256-badhash"
|
"sha256-badhash"
|
||||||
));
|
));
|
||||||
@ -76,7 +73,7 @@ mod failing {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn sha384() {
|
fn sha384() {
|
||||||
assert!(!html::has_proper_integrity(
|
assert!(!html::check_integrity(
|
||||||
"abcdef0123456789".as_bytes(),
|
"abcdef0123456789".as_bytes(),
|
||||||
"sha384-badhash"
|
"sha384-badhash"
|
||||||
));
|
));
|
||||||
@ -84,7 +81,7 @@ mod failing {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn sha512() {
|
fn sha512() {
|
||||||
assert!(!html::has_proper_integrity(
|
assert!(!html::check_integrity(
|
||||||
"abcdef0123456789".as_bytes(),
|
"abcdef0123456789".as_bytes(),
|
||||||
"sha512-badhash"
|
"sha512-badhash"
|
||||||
));
|
));
|
@ -14,7 +14,7 @@ mod passing {
|
|||||||
fn isolated() {
|
fn isolated() {
|
||||||
let mut options = Options::default();
|
let mut options = Options::default();
|
||||||
options.isolate = true;
|
options.isolate = true;
|
||||||
let csp_content = html::csp(&options);
|
let csp_content = html::compose_csp(&options);
|
||||||
|
|
||||||
assert_eq!(csp_content, "default-src 'unsafe-inline' data:;");
|
assert_eq!(csp_content, "default-src 'unsafe-inline' data:;");
|
||||||
}
|
}
|
||||||
@ -23,7 +23,7 @@ mod passing {
|
|||||||
fn no_css() {
|
fn no_css() {
|
||||||
let mut options = Options::default();
|
let mut options = Options::default();
|
||||||
options.no_css = true;
|
options.no_css = true;
|
||||||
let csp_content = html::csp(&options);
|
let csp_content = html::compose_csp(&options);
|
||||||
|
|
||||||
assert_eq!(csp_content, "style-src 'none';");
|
assert_eq!(csp_content, "style-src 'none';");
|
||||||
}
|
}
|
||||||
@ -32,7 +32,7 @@ mod passing {
|
|||||||
fn no_fonts() {
|
fn no_fonts() {
|
||||||
let mut options = Options::default();
|
let mut options = Options::default();
|
||||||
options.no_fonts = true;
|
options.no_fonts = true;
|
||||||
let csp_content = html::csp(&options);
|
let csp_content = html::compose_csp(&options);
|
||||||
|
|
||||||
assert_eq!(csp_content, "font-src 'none';");
|
assert_eq!(csp_content, "font-src 'none';");
|
||||||
}
|
}
|
||||||
@ -41,7 +41,7 @@ mod passing {
|
|||||||
fn no_frames() {
|
fn no_frames() {
|
||||||
let mut options = Options::default();
|
let mut options = Options::default();
|
||||||
options.no_frames = true;
|
options.no_frames = true;
|
||||||
let csp_content = html::csp(&options);
|
let csp_content = html::compose_csp(&options);
|
||||||
|
|
||||||
assert_eq!(csp_content, "frame-src 'none'; child-src 'none';");
|
assert_eq!(csp_content, "frame-src 'none'; child-src 'none';");
|
||||||
}
|
}
|
||||||
@ -50,7 +50,7 @@ mod passing {
|
|||||||
fn no_js() {
|
fn no_js() {
|
||||||
let mut options = Options::default();
|
let mut options = Options::default();
|
||||||
options.no_js = true;
|
options.no_js = true;
|
||||||
let csp_content = html::csp(&options);
|
let csp_content = html::compose_csp(&options);
|
||||||
|
|
||||||
assert_eq!(csp_content, "script-src 'none';");
|
assert_eq!(csp_content, "script-src 'none';");
|
||||||
}
|
}
|
||||||
@ -59,7 +59,7 @@ mod passing {
|
|||||||
fn no_images() {
|
fn no_images() {
|
||||||
let mut options = Options::default();
|
let mut options = Options::default();
|
||||||
options.no_images = true;
|
options.no_images = true;
|
||||||
let csp_content = html::csp(&options);
|
let csp_content = html::compose_csp(&options);
|
||||||
|
|
||||||
assert_eq!(csp_content, "img-src data:;");
|
assert_eq!(csp_content, "img-src data:;");
|
||||||
}
|
}
|
||||||
@ -73,7 +73,7 @@ mod passing {
|
|||||||
options.no_frames = true;
|
options.no_frames = true;
|
||||||
options.no_js = true;
|
options.no_js = true;
|
||||||
options.no_images = true;
|
options.no_images = true;
|
||||||
let csp_content = html::csp(&options);
|
let csp_content = html::compose_csp(&options);
|
||||||
|
|
||||||
assert_eq!(csp_content, "default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;");
|
assert_eq!(csp_content, "default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;");
|
||||||
}
|
}
|
@ -15,7 +15,7 @@ mod passing {
|
|||||||
fn http_url() {
|
fn http_url() {
|
||||||
let url = "http://192.168.1.1/";
|
let url = "http://192.168.1.1/";
|
||||||
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||||
let metadata_comment: String = html::metadata_tag(url);
|
let metadata_comment: String = html::create_metadata_tag(url);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
metadata_comment,
|
metadata_comment,
|
||||||
@ -33,7 +33,7 @@ mod passing {
|
|||||||
fn file_url() {
|
fn file_url() {
|
||||||
let url = "file:///home/monolith/index.html";
|
let url = "file:///home/monolith/index.html";
|
||||||
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||||
let metadata_comment: String = html::metadata_tag(url);
|
let metadata_comment: String = html::create_metadata_tag(url);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
metadata_comment,
|
metadata_comment,
|
||||||
@ -50,7 +50,7 @@ mod passing {
|
|||||||
fn data_url() {
|
fn data_url() {
|
||||||
let url = "data:text/html,Hello%2C%20World!";
|
let url = "data:text/html,Hello%2C%20World!";
|
||||||
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||||
let metadata_comment: String = html::metadata_tag(url);
|
let metadata_comment: String = html::create_metadata_tag(url);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
metadata_comment,
|
metadata_comment,
|
||||||
@ -77,6 +77,6 @@ mod failing {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn empty_string() {
|
fn empty_string() {
|
||||||
assert_eq!(html::metadata_tag(""), "");
|
assert_eq!(html::create_metadata_tag(""), "");
|
||||||
}
|
}
|
||||||
}
|
}
|
104
src/tests/html/get_base_url.rs
Normal file
104
src/tests/html/get_base_url.rs
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use crate::html;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn present() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<base href=\"https://musicbrainz.org\" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html::get_base_url(&dom.document),
|
||||||
|
Some(str!("https://musicbrainz.org"))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn multiple_tags() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<base href=\"https://www.discogs.com/\" />
|
||||||
|
<base href=\"https://musicbrainz.org\" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html::get_base_url(&dom.document),
|
||||||
|
Some(str!("https://www.discogs.com/"))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||||
|
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod failing {
|
||||||
|
use crate::html;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn absent() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html);
|
||||||
|
|
||||||
|
assert_eq!(html::get_base_url(&dom.document), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_href() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<base />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html);
|
||||||
|
|
||||||
|
assert_eq!(html::get_base_url(&dom.document), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty_href() {
|
||||||
|
let html = "<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<base href=\"\" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html);
|
||||||
|
|
||||||
|
assert_eq!(html::get_base_url(&dom.document), Some(str!()));
|
||||||
|
}
|
||||||
|
}
|
54
src/tests/html/get_node_attr.rs
Normal file
54
src/tests/html/get_node_attr.rs
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use html5ever::rcdom::{Handle, NodeData};
|
||||||
|
|
||||||
|
use crate::html;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn div_two_style_attributes() {
|
||||||
|
let html = "<!doctype html><html><head></head><body><DIV STYLE=\"color: blue;\" style=\"display: none;\"></div></body></html>";
|
||||||
|
let dom = html::html_to_dom(&html);
|
||||||
|
let mut count = 0;
|
||||||
|
|
||||||
|
fn test_walk(node: &Handle, i: &mut i8) {
|
||||||
|
*i += 1;
|
||||||
|
|
||||||
|
match &node.data {
|
||||||
|
NodeData::Document => {
|
||||||
|
// Dig deeper
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
test_walk(child, &mut *i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NodeData::Element { ref name, .. } => {
|
||||||
|
let node_name = name.local.as_ref().to_string();
|
||||||
|
|
||||||
|
if node_name == "body" {
|
||||||
|
assert_eq!(html::get_node_attr(node, "class"), None);
|
||||||
|
} else if node_name == "div" {
|
||||||
|
assert_eq!(
|
||||||
|
html::get_node_attr(node, "style"),
|
||||||
|
Some(str!("color: blue;"))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
test_walk(child, &mut *i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
test_walk(&dom.document, &mut count);
|
||||||
|
|
||||||
|
assert_eq!(count, 6);
|
||||||
|
}
|
||||||
|
}
|
@ -12,7 +12,7 @@ mod passing {
|
|||||||
use crate::html;
|
use crate::html;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn get_node_name() {
|
fn parent_node_names() {
|
||||||
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
|
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
|
||||||
let dom = html::html_to_dom(&html);
|
let dom = html::html_to_dom(&html);
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
|
@ -1,10 +1,13 @@
|
|||||||
mod add_favicon;
|
mod add_favicon;
|
||||||
mod csp;
|
mod check_integrity;
|
||||||
|
mod compose_csp;
|
||||||
|
mod create_metadata_tag;
|
||||||
mod embed_srcset;
|
mod embed_srcset;
|
||||||
|
mod get_base_url;
|
||||||
|
mod get_node_attr;
|
||||||
mod get_node_name;
|
mod get_node_name;
|
||||||
mod has_favicon;
|
mod has_favicon;
|
||||||
mod has_proper_integrity;
|
|
||||||
mod is_icon;
|
mod is_icon;
|
||||||
mod metadata_tag;
|
mod set_node_attr;
|
||||||
mod stringify_document;
|
mod stringify_document;
|
||||||
mod walk_and_embed_assets;
|
mod walk_and_embed_assets;
|
||||||
|
66
src/tests/html/set_node_attr.rs
Normal file
66
src/tests/html/set_node_attr.rs
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||||
|
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||||
|
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||||
|
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||||
|
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||||
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod passing {
|
||||||
|
use html5ever::rcdom::{Handle, NodeData};
|
||||||
|
|
||||||
|
use crate::html;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn html_lang_and_body_style() {
|
||||||
|
let html = "<!doctype html><html lang=\"en\"><head></head><body></body></html>";
|
||||||
|
let dom = html::html_to_dom(&html);
|
||||||
|
let mut count = 0;
|
||||||
|
|
||||||
|
fn test_walk(node: &Handle, i: &mut i8) {
|
||||||
|
*i += 1;
|
||||||
|
|
||||||
|
match &node.data {
|
||||||
|
NodeData::Document => {
|
||||||
|
// Dig deeper
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
test_walk(child, &mut *i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NodeData::Element { ref name, .. } => {
|
||||||
|
let node_name = name.local.as_ref().to_string();
|
||||||
|
|
||||||
|
if node_name == "html" {
|
||||||
|
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("en")));
|
||||||
|
|
||||||
|
html::set_node_attr(node, "lang", Some(str!("de")));
|
||||||
|
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("de")));
|
||||||
|
|
||||||
|
html::set_node_attr(node, "lang", None);
|
||||||
|
assert_eq!(html::get_node_attr(node, "lang"), None);
|
||||||
|
|
||||||
|
html::set_node_attr(node, "lang", Some(str!("")));
|
||||||
|
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("")));
|
||||||
|
} else if node_name == "body" {
|
||||||
|
assert_eq!(html::get_node_attr(node, "style"), None);
|
||||||
|
|
||||||
|
html::set_node_attr(node, "style", Some(str!("display: none;")));
|
||||||
|
assert_eq!(
|
||||||
|
html::get_node_attr(node, "style"),
|
||||||
|
Some(str!("display: none;"))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
test_walk(child, &mut *i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
test_walk(&dom.document, &mut count);
|
||||||
|
|
||||||
|
assert_eq!(count, 5);
|
||||||
|
}
|
||||||
|
}
|
@ -1,12 +1,12 @@
|
|||||||
mod clean_url;
|
mod clean_url;
|
||||||
mod data_to_data_url;
|
mod data_to_data_url;
|
||||||
mod data_url_to_data;
|
|
||||||
mod decode_url;
|
mod decode_url;
|
||||||
mod file_url_to_fs_path;
|
mod file_url_to_fs_path;
|
||||||
mod get_url_fragment;
|
mod get_url_fragment;
|
||||||
mod is_data_url;
|
mod is_data_url;
|
||||||
mod is_file_url;
|
mod is_file_url;
|
||||||
mod is_http_url;
|
mod is_http_url;
|
||||||
|
mod parse_data_url;
|
||||||
mod resolve_url;
|
mod resolve_url;
|
||||||
mod url_has_protocol;
|
mod url_has_protocol;
|
||||||
mod url_with_fragment;
|
mod url_with_fragment;
|
||||||
|
@ -11,7 +11,7 @@ mod passing {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_text_html_base64() {
|
fn parse_text_html_base64() {
|
||||||
let (media_type, data) = url::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
|
let (media_type, data) = url::parse_data_url("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
|
||||||
|
|
||||||
assert_eq!(media_type, "text/html");
|
assert_eq!(media_type, "text/html");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@ -22,7 +22,7 @@ mod passing {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_text_html_utf8() {
|
fn parse_text_html_utf8() {
|
||||||
let (media_type, data) = url::data_url_to_data(
|
let (media_type, data) = url::parse_data_url(
|
||||||
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
|
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -35,7 +35,7 @@ mod passing {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_text_html_plaintext() {
|
fn parse_text_html_plaintext() {
|
||||||
let (media_type, data) = url::data_url_to_data(
|
let (media_type, data) = url::parse_data_url(
|
||||||
"data:text/html,Work expands so as to fill the time available for its completion",
|
"data:text/html,Work expands so as to fill the time available for its completion",
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -48,7 +48,7 @@ mod passing {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_text_html_charset_utf_8_between_two_whitespaces() {
|
fn parse_text_html_charset_utf_8_between_two_whitespaces() {
|
||||||
let (media_type, data) = url::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
|
let (media_type, data) = url::parse_data_url(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
|
||||||
|
|
||||||
assert_eq!(media_type, "text/html");
|
assert_eq!(media_type, "text/html");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@ -60,7 +60,7 @@ mod passing {
|
|||||||
#[test]
|
#[test]
|
||||||
fn parse_text_css_url_encoded() {
|
fn parse_text_css_url_encoded() {
|
||||||
let (media_type, data) =
|
let (media_type, data) =
|
||||||
url::data_url_to_data("data:text/css,div{background-color:%23000}");
|
url::parse_data_url("data:text/css,div{background-color:%23000}");
|
||||||
|
|
||||||
assert_eq!(media_type, "text/css");
|
assert_eq!(media_type, "text/css");
|
||||||
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
|
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
|
||||||
@ -68,7 +68,7 @@ mod passing {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_no_media_type_base64() {
|
fn parse_no_media_type_base64() {
|
||||||
let (media_type, data) = url::data_url_to_data("data:;base64,dGVzdA==");
|
let (media_type, data) = url::parse_data_url("data:;base64,dGVzdA==");
|
||||||
|
|
||||||
assert_eq!(media_type, "");
|
assert_eq!(media_type, "");
|
||||||
assert_eq!(String::from_utf8_lossy(&data), "test");
|
assert_eq!(String::from_utf8_lossy(&data), "test");
|
||||||
@ -76,7 +76,7 @@ mod passing {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_no_media_type_no_encoding() {
|
fn parse_no_media_type_no_encoding() {
|
||||||
let (media_type, data) = url::data_url_to_data("data:;,test%20test");
|
let (media_type, data) = url::parse_data_url("data:;,test%20test");
|
||||||
|
|
||||||
assert_eq!(media_type, "");
|
assert_eq!(media_type, "");
|
||||||
assert_eq!(String::from_utf8_lossy(&data), "test test");
|
assert_eq!(String::from_utf8_lossy(&data), "test test");
|
||||||
@ -96,7 +96,7 @@ mod failing {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn just_word_data() {
|
fn just_word_data() {
|
||||||
let (media_type, data) = url::data_url_to_data("data");
|
let (media_type, data) = url::parse_data_url("data");
|
||||||
|
|
||||||
assert_eq!(media_type, "");
|
assert_eq!(media_type, "");
|
||||||
assert_eq!(String::from_utf8_lossy(&data), "");
|
assert_eq!(String::from_utf8_lossy(&data), "");
|
78
src/url.rs
78
src/url.rs
@ -33,45 +33,6 @@ pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String {
|
|||||||
format!("data:{};base64,{}", media_type, base64::encode(data))
|
format!("data:{};base64,{}", media_type, base64::encode(data))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn data_url_to_data<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
|
|
||||||
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
|
|
||||||
let path: String = parsed_url.path().to_string();
|
|
||||||
let comma_loc: usize = path.find(',').unwrap_or(path.len());
|
|
||||||
|
|
||||||
let meta_data: String = path.chars().take(comma_loc).collect();
|
|
||||||
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
|
|
||||||
|
|
||||||
let text: String = decode_url(raw_data);
|
|
||||||
|
|
||||||
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
|
|
||||||
let mut media_type: String = str!();
|
|
||||||
let mut encoding: &str = "";
|
|
||||||
|
|
||||||
let mut i: i8 = 0;
|
|
||||||
for item in &meta_data_items {
|
|
||||||
if i == 0 {
|
|
||||||
media_type = str!(item);
|
|
||||||
} else {
|
|
||||||
if item.eq_ignore_ascii_case("base64")
|
|
||||||
|| item.eq_ignore_ascii_case("utf8")
|
|
||||||
|| item.eq_ignore_ascii_case("charset=UTF-8")
|
|
||||||
{
|
|
||||||
encoding = item;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
i = i + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
|
|
||||||
base64::decode(&text).unwrap_or(vec![])
|
|
||||||
} else {
|
|
||||||
text.as_bytes().to_vec()
|
|
||||||
};
|
|
||||||
|
|
||||||
(media_type, data)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn decode_url(input: String) -> String {
|
pub fn decode_url(input: String) -> String {
|
||||||
let input: String = input.replace("+", "%2B");
|
let input: String = input.replace("+", "%2B");
|
||||||
|
|
||||||
@ -138,6 +99,45 @@ pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
|
|||||||
.unwrap_or(false)
|
.unwrap_or(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn parse_data_url<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
|
||||||
|
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
|
||||||
|
let path: String = parsed_url.path().to_string();
|
||||||
|
let comma_loc: usize = path.find(',').unwrap_or(path.len());
|
||||||
|
|
||||||
|
let meta_data: String = path.chars().take(comma_loc).collect();
|
||||||
|
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
|
||||||
|
|
||||||
|
let text: String = decode_url(raw_data);
|
||||||
|
|
||||||
|
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
|
||||||
|
let mut media_type: String = str!();
|
||||||
|
let mut encoding: &str = "";
|
||||||
|
|
||||||
|
let mut i: i8 = 0;
|
||||||
|
for item in &meta_data_items {
|
||||||
|
if i == 0 {
|
||||||
|
media_type = str!(item);
|
||||||
|
} else {
|
||||||
|
if item.eq_ignore_ascii_case("base64")
|
||||||
|
|| item.eq_ignore_ascii_case("utf8")
|
||||||
|
|| item.eq_ignore_ascii_case("charset=UTF-8")
|
||||||
|
{
|
||||||
|
encoding = item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
i = i + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
|
||||||
|
base64::decode(&text).unwrap_or(vec![])
|
||||||
|
} else {
|
||||||
|
text.as_bytes().to_vec()
|
||||||
|
};
|
||||||
|
|
||||||
|
(media_type, data)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
|
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
|
||||||
let result = if is_http_url(to.as_ref()) {
|
let result = if is_http_url(to.as_ref()) {
|
||||||
to.as_ref().to_string()
|
to.as_ref().to_string()
|
||||||
|
@ -5,7 +5,7 @@ use std::fs;
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use crate::opts::Options;
|
use crate::opts::Options;
|
||||||
use crate::url::{clean_url, data_url_to_data, file_url_to_fs_path, is_data_url, is_file_url};
|
use crate::url::{clean_url, file_url_to_fs_path, is_data_url, is_file_url, parse_data_url};
|
||||||
|
|
||||||
const INDENT: &str = " ";
|
const INDENT: &str = " ";
|
||||||
|
|
||||||
@ -83,7 +83,7 @@ pub fn retrieve_asset(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if is_data_url(&url) {
|
if is_data_url(&url) {
|
||||||
let (media_type, data) = data_url_to_data(url);
|
let (media_type, data) = parse_data_url(url);
|
||||||
Ok((data, url.to_string(), media_type))
|
Ok((data, url.to_string(), media_type))
|
||||||
} else if is_file_url(&url) {
|
} else if is_file_url(&url) {
|
||||||
// Check if parent_url is also file:///
|
// Check if parent_url is also file:///
|
||||||
|
Loading…
Reference in New Issue
Block a user