diff --git a/src/html.rs b/src/html.rs
index 7857e59..3739d3d 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -60,38 +60,36 @@ pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom {
dom
}
-pub fn get_parent_node(node: &Handle) -> Handle {
- let parent = node.parent.take().clone();
- parent.and_then(|node| node.upgrade()).unwrap()
-}
+pub fn csp(options: &Options) -> String {
+ let mut string_list = vec![];
-pub fn get_node_name(node: &Handle) -> Option<&'_ str> {
- match &node.data {
- NodeData::Element { ref name, .. } => Some(name.local.as_ref()),
- _ => None,
+ if options.isolate {
+ string_list.push("default-src 'unsafe-inline' data:;");
}
-}
-pub fn is_icon(attr_value: &str) -> bool {
- ICON_VALUES.contains(&attr_value.to_lowercase().as_str())
-}
-
-pub fn has_proper_integrity(data: &[u8], integrity: &str) -> bool {
- if integrity.starts_with("sha256-") {
- let mut hasher = Sha256::new();
- hasher.update(data);
- base64::encode(hasher.finalize()) == integrity[7..]
- } else if integrity.starts_with("sha384-") {
- let mut hasher = Sha384::new();
- hasher.update(data);
- base64::encode(hasher.finalize()) == integrity[7..]
- } else if integrity.starts_with("sha512-") {
- let mut hasher = Sha512::new();
- hasher.update(data);
- base64::encode(hasher.finalize()) == integrity[7..]
- } else {
- false
+ if options.no_css {
+ string_list.push("style-src 'none';");
}
+
+ if options.no_fonts {
+ string_list.push("font-src 'none';");
+ }
+
+ if options.no_frames {
+ string_list.push("frame-src 'none';");
+ string_list.push("child-src 'none';");
+ }
+
+ if options.no_js {
+ string_list.push("script-src 'none';");
+ }
+
+ if options.no_images {
+ // Note: data: is needed for transparent pixels
+ string_list.push("img-src data:;");
+ }
+
+ string_list.join(" ")
}
pub fn embed_srcset(
@@ -165,6 +163,48 @@ pub fn embed_srcset(
result
}
+fn get_child_node_by_name(handle: &Handle, node_name: &str) -> Handle {
+ let children = handle.children.borrow();
+ let matching_children = children.iter().find(|child| match child.data {
+ NodeData::Element { ref name, .. } => &*name.local == node_name,
+ _ => false,
+ });
+ match matching_children {
+ Some(node) => node.clone(),
+ _ => handle.clone(),
+ }
+}
+
+pub fn get_node_name(node: &Handle) -> Option<&'_ str> {
+ match &node.data {
+ NodeData::Element { ref name, .. } => Some(name.local.as_ref()),
+ _ => None,
+ }
+}
+
+pub fn get_parent_node(node: &Handle) -> Handle {
+ let parent = node.parent.take().clone();
+ parent.and_then(|node| node.upgrade()).unwrap()
+}
+
+pub fn has_proper_integrity(data: &[u8], integrity: &str) -> bool {
+ if integrity.starts_with("sha256-") {
+ let mut hasher = Sha256::new();
+ hasher.update(data);
+ base64::encode(hasher.finalize()) == integrity[7..]
+ } else if integrity.starts_with("sha384-") {
+ let mut hasher = Sha384::new();
+ hasher.update(data);
+ base64::encode(hasher.finalize()) == integrity[7..]
+ } else if integrity.starts_with("sha512-") {
+ let mut hasher = Sha512::new();
+ hasher.update(data);
+ base64::encode(hasher.finalize()) == integrity[7..]
+ } else {
+ false
+ }
+}
+
pub fn has_favicon(handle: &Handle) -> bool {
let mut found_favicon: bool = false;
@@ -215,6 +255,107 @@ pub fn has_favicon(handle: &Handle) -> bool {
found_favicon
}
+pub fn html_to_dom(data: &str) -> RcDom {
+ parse_document(RcDom::default(), Default::default())
+ .from_utf8()
+ .read_from(&mut data.as_bytes())
+ .unwrap()
+}
+
+pub fn is_icon(attr_value: &str) -> bool {
+ ICON_VALUES.contains(&attr_value.to_lowercase().as_str())
+}
+
+pub fn metadata_tag(url: &str) -> String {
+ let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
+
+ // Safe to unwrap (we just put this through an HTTP request)
+ match Url::parse(url) {
+ Ok(mut clean_url) => {
+ clean_url.set_fragment(None);
+
+ // Prevent credentials from getting into metadata
+ if is_http_url(url) {
+ // Only HTTP(S) URLs may feature credentials
+ clean_url.set_username("").unwrap();
+ clean_url.set_password(None).unwrap();
+ }
+
+ if is_http_url(url) {
+ format!(
+ "",
+ &clean_url,
+ timestamp,
+ env!("CARGO_PKG_NAME"),
+ env!("CARGO_PKG_VERSION"),
+ )
+ } else {
+ format!(
+ "",
+ timestamp,
+ env!("CARGO_PKG_NAME"),
+ env!("CARGO_PKG_VERSION"),
+ )
+ }
+ }
+ Err(_) => str!(),
+ }
+}
+
+pub fn stringify_document(handle: &Handle, options: &Options) -> String {
+ let mut buf: Vec = Vec::new();
+ serialize(&mut buf, handle, SerializeOpts::default())
+ .expect("unable to serialize DOM into buffer");
+
+ let mut result = String::from_utf8(buf).unwrap();
+
+ // We can't make it isolate the page right away since it may have no HEAD element,
+ // ergo we have to serialize, parse the DOM again, insert the CSP meta tag, and then
+ // finally serialize and return the resulting string
+ if options.isolate
+ || options.no_css
+ || options.no_fonts
+ || options.no_frames
+ || options.no_js
+ || options.no_images
+ {
+ // Take care of CSP
+ let mut buf: Vec = Vec::new();
+ let mut dom = html_to_dom(&result);
+ let doc = dom.get_document();
+ let html = get_child_node_by_name(&doc, "html");
+ let head = get_child_node_by_name(&html, "head");
+ let csp_content: String = csp(options);
+
+ let meta = dom.create_element(
+ QualName::new(None, ns!(), local_name!("meta")),
+ vec![
+ Attribute {
+ name: QualName::new(None, ns!(), local_name!("http-equiv")),
+ value: format_tendril!("Content-Security-Policy"),
+ },
+ Attribute {
+ name: QualName::new(None, ns!(), local_name!("content")),
+ value: format_tendril!("{}", csp_content),
+ },
+ ],
+ Default::default(),
+ );
+ // Note: the CSP meta-tag has to be prepended, never appended,
+ // since there already may be one defined in the document,
+ // and browsers don't allow re-defining them (for obvious reasons)
+ head.children.borrow_mut().reverse();
+ head.children.borrow_mut().push(meta.clone());
+ head.children.borrow_mut().reverse();
+
+ serialize(&mut buf, &doc, SerializeOpts::default())
+ .expect("unable to serialize DOM into buffer");
+ result = String::from_utf8(buf).unwrap();
+ }
+
+ result
+}
+
pub fn walk_and_embed_assets(
cache: &mut HashMap>,
client: &Client,
@@ -1135,144 +1276,3 @@ pub fn walk_and_embed_assets(
}
}
}
-
-pub fn html_to_dom(data: &str) -> RcDom {
- parse_document(RcDom::default(), Default::default())
- .from_utf8()
- .read_from(&mut data.as_bytes())
- .unwrap()
-}
-
-fn get_child_node_by_name(handle: &Handle, node_name: &str) -> Handle {
- let children = handle.children.borrow();
- let matching_children = children.iter().find(|child| match child.data {
- NodeData::Element { ref name, .. } => &*name.local == node_name,
- _ => false,
- });
- match matching_children {
- Some(node) => node.clone(),
- _ => handle.clone(),
- }
-}
-
-pub fn stringify_document(handle: &Handle, options: &Options) -> String {
- let mut buf: Vec = Vec::new();
- serialize(&mut buf, handle, SerializeOpts::default())
- .expect("unable to serialize DOM into buffer");
-
- let mut result = String::from_utf8(buf).unwrap();
-
- // We can't make it isolate the page right away since it may have no HEAD element,
- // ergo we have to serialize, parse the DOM again, insert the CSP meta tag, and then
- // finally serialize and return the resulting string
- if options.isolate
- || options.no_css
- || options.no_fonts
- || options.no_frames
- || options.no_js
- || options.no_images
- {
- // Take care of CSP
- let mut buf: Vec = Vec::new();
- let mut dom = html_to_dom(&result);
- let doc = dom.get_document();
- let html = get_child_node_by_name(&doc, "html");
- let head = get_child_node_by_name(&html, "head");
- let csp_content: String = csp(options);
-
- let meta = dom.create_element(
- QualName::new(None, ns!(), local_name!("meta")),
- vec![
- Attribute {
- name: QualName::new(None, ns!(), local_name!("http-equiv")),
- value: format_tendril!("Content-Security-Policy"),
- },
- Attribute {
- name: QualName::new(None, ns!(), local_name!("content")),
- value: format_tendril!("{}", csp_content),
- },
- ],
- Default::default(),
- );
- // Note: the CSP meta-tag has to be prepended, never appended,
- // since there already may be one defined in the document,
- // and browsers don't allow re-defining them (for obvious reasons)
- head.children.borrow_mut().reverse();
- head.children.borrow_mut().push(meta.clone());
- head.children.borrow_mut().reverse();
-
- serialize(&mut buf, &doc, SerializeOpts::default())
- .expect("unable to serialize DOM into buffer");
- result = String::from_utf8(buf).unwrap();
- }
-
- result
-}
-
-pub fn csp(options: &Options) -> String {
- let mut string_list = vec![];
-
- if options.isolate {
- string_list.push("default-src 'unsafe-inline' data:;");
- }
-
- if options.no_css {
- string_list.push("style-src 'none';");
- }
-
- if options.no_fonts {
- string_list.push("font-src 'none';");
- }
-
- if options.no_frames {
- string_list.push("frame-src 'none';");
- string_list.push("child-src 'none';");
- }
-
- if options.no_js {
- string_list.push("script-src 'none';");
- }
-
- if options.no_images {
- // Note: data: is needed for transparent pixels
- string_list.push("img-src data:;");
- }
-
- string_list.join(" ")
-}
-
-pub fn metadata_tag(url: &str) -> String {
- let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
-
- // Safe to unwrap (we just put this through an HTTP request)
- match Url::parse(url) {
- Ok(mut clean_url) => {
- clean_url.set_fragment(None);
-
- // Prevent credentials from getting into metadata
- if is_http_url(url) {
- // Only HTTP(S) URLs may feature credentials
- clean_url.set_username("").unwrap();
- clean_url.set_password(None).unwrap();
- }
-
- if is_http_url(url) {
- format!(
- "",
- &clean_url,
- timestamp,
- env!("CARGO_PKG_NAME"),
- env!("CARGO_PKG_VERSION"),
- )
- } else {
- format!(
- "",
- timestamp,
- env!("CARGO_PKG_NAME"),
- env!("CARGO_PKG_VERSION"),
- )
- }
- }
- Err(_) => str!(),
- }
-}