use html5ever::parse_document;
use html5ever::rcdom::{Handle, NodeData, RcDom};
use html5ever::serialize::{serialize, SerializeOpts};
use html5ever::tendril::TendrilSink;
use http::{is_valid_url, resolve_url, retrieve_asset};
use regex::Regex;
use std::default::Default;
use std::io;
use utils::data_to_dataurl;
lazy_static! {
static ref HAS_PROTOCOL: Regex = Regex::new(r"^[a-z0-9]+:").unwrap();
}
enum NodeMatch {
Icon,
Image,
Source,
StyleSheet,
Anchor,
Script,
Form,
IFrame,
Other,
}
const TRANSPARENT_PIXEL: &str = "data:image/png;base64,\
iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=";
const JS_DOM_EVENT_ATTRS: [&str; 21] = [
// Input
"onfocus",
"onblur",
"onselect",
"onchange",
"onsubmit",
"onreset",
"onkeydown",
"onkeypress",
"onkeyup",
// Mouse
"onmouseover",
"onmouseout",
"onmousedown",
"onmouseup",
"onmousemove",
// Click
"onclick",
"ondblclick",
// Load
"onload",
"onunload",
"onabort",
"onerror",
"onresize",
];
fn get_parent_node_name(node: &Handle) -> String {
let parent = node.parent.take().clone();
let parent_node = parent.and_then(|node| node.upgrade()).unwrap();
match &parent_node.data {
NodeData::Document => {"".to_string()}
NodeData::Doctype { .. } => {"".to_string()}
NodeData::Text { .. } => {"".to_string()}
NodeData::Comment { .. } => {"".to_string()}
NodeData::Element {
ref name,
attrs: _,
..
} => {
name.local.as_ref().to_string()
}
NodeData::ProcessingInstruction { .. } => unreachable!()
}
}
pub fn walk_and_embed_assets(
url: &str,
node: &Handle,
opt_no_js: bool,
opt_no_images: bool,
opt_user_agent: &str,
) {
match node.data {
NodeData::Document => {
// Dig deeper
for child in node.children.borrow().iter() {
walk_and_embed_assets(
&url, child,
opt_no_js,
opt_no_images,
opt_user_agent,
);
}
}
NodeData::Doctype { .. } => {}
NodeData::Text { .. } => {}
NodeData::Comment { .. } => {
// Note: in case of opt_no_js being set to true, there's no need to worry about
// getting rid of comments that may contain scripts, e.g.