Merge pull request #37 from Y2Z/ignore-other-protocols

Avoid modifying non-HTTP anchor hrefs
This commit is contained in:
Vincent Flyson 2019-08-24 20:07:25 -04:00 committed by GitHub
commit 2c0037fd51
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1,12 +1,16 @@
use http::{is_valid_url, resolve_url, retrieve_asset};
use std::default::Default;
use std::io;
use utils::data_to_dataurl;
use html5ever::parse_document;
use html5ever::rcdom::{Handle, NodeData, RcDom};
use html5ever::serialize::{serialize, SerializeOpts};
use html5ever::tendril::TendrilSink;
use http::{is_valid_url, resolve_url, retrieve_asset};
use regex::Regex;
use std::default::Default;
use std::io;
use utils::data_to_dataurl;
lazy_static! {
static ref HAS_PROTOCOL: Regex = Regex::new(r"^[a-z0-9]+:").unwrap();
}
enum NodeMatch {
Icon,
@ -200,7 +204,7 @@ pub fn walk_and_embed_assets(
for attr in attrs_mut.iter_mut() {
if &attr.name.local == "href" {
// Don't touch email links or hrefs which begin with a hash sign
if attr.value.starts_with('#') || attr.value.starts_with("mailto:") {
if attr.value.starts_with('#') || has_protocol(&attr.value) {
continue;
}
@ -318,6 +322,10 @@ pub fn walk_and_embed_assets(
}
}
fn has_protocol(url: &str) -> bool {
HAS_PROTOCOL.is_match(&url.to_lowercase())
}
pub fn html_to_dom(data: &str) -> html5ever::rcdom::RcDom {
parse_document(RcDom::default(), Default::default())
.from_utf8()
@ -345,4 +353,19 @@ mod tests {
assert_eq!(is_icon("icon"), true);
assert_eq!(is_icon("stylesheet"), false);
}
#[test]
fn test_has_protocol() {
assert_eq!(has_protocol("mailto:somebody@somewhere.com?subject=hello"), true);
assert_eq!(has_protocol("tel:5551234567"), true);
assert_eq!(has_protocol("ftp:user:password@some-ftp-server.com"), true);
assert_eq!(has_protocol("javascript:void(0)"), true);
assert_eq!(has_protocol("http://news.ycombinator.com"), true);
assert_eq!(has_protocol("https://github.com"), true);
assert_eq!(has_protocol("//some-hostname.com/some-file.html"), false);
assert_eq!(has_protocol("some-hostname.com/some-file.html"), false);
assert_eq!(has_protocol("/some-file.html"), false);
assert_eq!(has_protocol(""), false);
assert_eq!(has_protocol("MAILTO:somebody@somewhere.com?subject=hello"), true);
}
}