automatically remove "Refresh" and "Location" META tags
This commit is contained in:
parent
c3ca2ad1d5
commit
44cac65a83
3 changed files with 99 additions and 13 deletions
19
docs/arch/0006-reload-and-location-meta-nodes.md
Normal file
19
docs/arch/0006-reload-and-location-meta-nodes.md
Normal file
|
@ -0,0 +1,19 @@
|
|||
# 4. Reload and location `meta` nodes
|
||||
|
||||
Date: 2020-06-25
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
HTML documents may contain `meta` tags capable of automatically refreshing the page or redirecting to another location.
|
||||
|
||||
## Decision
|
||||
|
||||
Since the resulting document is saved to disk and generally not intended to be served over the network, it only makes sense to remove `meta` nodes that have `http-equiv` attribute equal to "Refresh" or "Location" in order to prevent them from forcing page to reload or redirect to another location.
|
||||
|
||||
## Consequences
|
||||
|
||||
Monolith will ensure that saved documents do not contain `meta` tags capable of changing location or reloading page.
|
19
src/html.rs
19
src/html.rs
|
@ -170,6 +170,25 @@ pub fn walk_and_embed_assets(
|
|||
let attrs_mut = &mut attrs.borrow_mut();
|
||||
|
||||
match name.local.as_ref() {
|
||||
"meta" => {
|
||||
// Determine type
|
||||
let mut is_unwanted_meta: bool = false;
|
||||
for attr in attrs_mut.iter_mut() {
|
||||
let attr_name: &str = &attr.name.local;
|
||||
if attr_name.eq_ignore_ascii_case("http-equiv") {
|
||||
let value: String = attr.value.to_string();
|
||||
is_unwanted_meta = value.eq_ignore_ascii_case("refresh")
|
||||
|| value.eq_ignore_ascii_case("location");
|
||||
}
|
||||
}
|
||||
|
||||
if is_unwanted_meta {
|
||||
// Strip this node off all its attributes
|
||||
while attrs_mut.len() > 0 {
|
||||
attrs_mut.remove(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
"link" => {
|
||||
// Remove integrity attributes, keep value of the last one
|
||||
let mut integrity: String = str!();
|
||||
|
|
|
@ -211,15 +211,15 @@ mod passing {
|
|||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
format!(
|
||||
"<html>\
|
||||
<head>\
|
||||
<link rel=\"icon\">\
|
||||
</head>\
|
||||
<body>\
|
||||
<div>\
|
||||
<img src=\"{empty_image}\">\
|
||||
</div>\
|
||||
</body>\
|
||||
</html>",
|
||||
<head>\
|
||||
<link rel=\"icon\">\
|
||||
</head>\
|
||||
<body>\
|
||||
<div>\
|
||||
<img src=\"{empty_image}\">\
|
||||
</div>\
|
||||
</body>\
|
||||
</html>",
|
||||
empty_image = empty_image!()
|
||||
)
|
||||
);
|
||||
|
@ -341,8 +341,8 @@ mod passing {
|
|||
#[test]
|
||||
fn no_js() {
|
||||
let html = "<div onClick=\"void(0)\">\
|
||||
<script src=\"http://localhost/assets/some.js\"></script>\
|
||||
<script>alert(1)</script>\
|
||||
<script src=\"http://localhost/assets/some.js\"></script>\
|
||||
<script>alert(1)</script>\
|
||||
</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
|
@ -381,7 +381,7 @@ mod passing {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn with_no_integrity() {
|
||||
fn discards_integrity() {
|
||||
let html = "<title>No integrity</title>\
|
||||
<link integrity=\"sha384-...\" rel=\"something\"/>\
|
||||
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
|
||||
|
@ -415,8 +415,56 @@ mod passing {
|
|||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html>\
|
||||
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
|
||||
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
|
||||
<body></body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_unwanted_meta_tags() {
|
||||
let html = "<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Refresh\" value=\"20\"/>\
|
||||
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
|
||||
</head>\
|
||||
<body></body>\
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let opt_no_css: bool = true;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = true;
|
||||
let opt_no_js: bool = true;
|
||||
let opt_no_images: bool = true;
|
||||
let opt_silent = true;
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html>\
|
||||
<head>\
|
||||
<meta>\
|
||||
<meta>\
|
||||
</head>\
|
||||
<body></body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue