use html5ever::parse_document;
use html5ever::rcdom::{Handle, NodeData, RcDom};
use html5ever::serialize::{serialize, SerializeOpts};
use html5ever::tendril::TendrilSink;
use http::{is_valid_url, resolve_url, retrieve_asset};
use regex::Regex;
use std::default::Default;
use std::io;
use utils::data_to_dataurl;
lazy_static! {
static ref EMPTY_STRING: String = String::new();
static ref HAS_PROTOCOL: Regex = Regex::new(r"^[a-z0-9]+:").unwrap();
static ref ICON_VALUES: Regex = Regex::new(
r"^icon|shortcut icon|mask-icon|apple-touch-icon|fluid-icon$"
).unwrap();
}
const TRANSPARENT_PIXEL: &str = "data:image/png;base64,\
iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=";
const JS_DOM_EVENT_ATTRS: [&str; 21] = [
// Input
"onfocus",
"onblur",
"onselect",
"onchange",
"onsubmit",
"onreset",
"onkeydown",
"onkeypress",
"onkeyup",
// Mouse
"onmouseover",
"onmouseout",
"onmousedown",
"onmouseup",
"onmousemove",
// Click
"onclick",
"ondblclick",
// Load
"onload",
"onunload",
"onabort",
"onerror",
"onresize",
];
fn get_parent_node_name(node: &Handle) -> String {
let parent = node.parent.take().clone();
let parent_node = parent.and_then(|node| node.upgrade()).unwrap();
match &parent_node.data {
NodeData::Document => { EMPTY_STRING.clone() }
NodeData::Doctype { .. } => { EMPTY_STRING.clone() }
NodeData::Text { .. } => { EMPTY_STRING.clone() }
NodeData::Comment { .. } => { EMPTY_STRING.clone() }
NodeData::Element { ref name, attrs: _, .. } => {
name.local.as_ref().to_string()
}
NodeData::ProcessingInstruction { .. } => unreachable!()
}
}
pub fn walk_and_embed_assets(
url: &str,
node: &Handle,
opt_no_js: bool,
opt_no_images: bool,
opt_user_agent: &str,
opt_silent: bool,
opt_insecure: bool,
) {
match node.data {
NodeData::Document => {
// Dig deeper
for child in node.children.borrow().iter() {
walk_and_embed_assets(
&url, child,
opt_no_js,
opt_no_images,
opt_user_agent,
opt_silent,
opt_insecure,
);
}
}
NodeData::Doctype { .. } => {}
NodeData::Text { .. } => {}
NodeData::Comment { .. } => {
// Note: in case of opt_no_js being set to true, there's no need to worry about
// getting rid of comments that may contain scripts, e.g.