automatically remove "Refresh" and "Location" META tags
This commit is contained in:
parent
c3ca2ad1d5
commit
44cac65a83
3 changed files with 99 additions and 13 deletions
19
docs/arch/0006-reload-and-location-meta-nodes.md
Normal file
19
docs/arch/0006-reload-and-location-meta-nodes.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# 4. Reload and location `meta` nodes
|
||||||
|
|
||||||
|
Date: 2020-06-25
|
||||||
|
|
||||||
|
## Status
|
||||||
|
|
||||||
|
Accepted
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
HTML documents may contain `meta` tags capable of automatically refreshing the page or redirecting to another location.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
|
||||||
|
Since the resulting document is saved to disk and generally not intended to be served over the network, it only makes sense to remove `meta` nodes that have `http-equiv` attribute equal to "Refresh" or "Location" in order to prevent them from forcing page to reload or redirect to another location.
|
||||||
|
|
||||||
|
## Consequences
|
||||||
|
|
||||||
|
Monolith will ensure that saved documents do not contain `meta` tags capable of changing location or reloading page.
|
19
src/html.rs
19
src/html.rs
|
@ -170,6 +170,25 @@ pub fn walk_and_embed_assets(
|
||||||
let attrs_mut = &mut attrs.borrow_mut();
|
let attrs_mut = &mut attrs.borrow_mut();
|
||||||
|
|
||||||
match name.local.as_ref() {
|
match name.local.as_ref() {
|
||||||
|
"meta" => {
|
||||||
|
// Determine type
|
||||||
|
let mut is_unwanted_meta: bool = false;
|
||||||
|
for attr in attrs_mut.iter_mut() {
|
||||||
|
let attr_name: &str = &attr.name.local;
|
||||||
|
if attr_name.eq_ignore_ascii_case("http-equiv") {
|
||||||
|
let value: String = attr.value.to_string();
|
||||||
|
is_unwanted_meta = value.eq_ignore_ascii_case("refresh")
|
||||||
|
|| value.eq_ignore_ascii_case("location");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if is_unwanted_meta {
|
||||||
|
// Strip this node off all its attributes
|
||||||
|
while attrs_mut.len() > 0 {
|
||||||
|
attrs_mut.remove(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
"link" => {
|
"link" => {
|
||||||
// Remove integrity attributes, keep value of the last one
|
// Remove integrity attributes, keep value of the last one
|
||||||
let mut integrity: String = str!();
|
let mut integrity: String = str!();
|
||||||
|
|
|
@ -381,7 +381,7 @@ mod passing {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn with_no_integrity() {
|
fn discards_integrity() {
|
||||||
let html = "<title>No integrity</title>\
|
let html = "<title>No integrity</title>\
|
||||||
<link integrity=\"sha384-...\" rel=\"something\"/>\
|
<link integrity=\"sha384-...\" rel=\"something\"/>\
|
||||||
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
|
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
|
||||||
|
@ -420,4 +420,52 @@ mod passing {
|
||||||
</html>"
|
</html>"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn removes_unwanted_meta_tags() {
|
||||||
|
let html = "<html>\
|
||||||
|
<head>\
|
||||||
|
<meta http-equiv=\"Refresh\" value=\"20\"/>\
|
||||||
|
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
|
||||||
|
</head>\
|
||||||
|
<body></body>\
|
||||||
|
</html>";
|
||||||
|
let dom = html::html_to_dom(&html);
|
||||||
|
let url = "http://localhost";
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
let opt_no_css: bool = true;
|
||||||
|
let opt_no_fonts: bool = false;
|
||||||
|
let opt_no_frames: bool = true;
|
||||||
|
let opt_no_js: bool = true;
|
||||||
|
let opt_no_images: bool = true;
|
||||||
|
let opt_silent = true;
|
||||||
|
|
||||||
|
html::walk_and_embed_assets(
|
||||||
|
cache,
|
||||||
|
&client,
|
||||||
|
&url,
|
||||||
|
&dom.document,
|
||||||
|
opt_no_css,
|
||||||
|
opt_no_fonts,
|
||||||
|
opt_no_frames,
|
||||||
|
opt_no_js,
|
||||||
|
opt_no_images,
|
||||||
|
opt_silent,
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||||
|
"<html>\
|
||||||
|
<head>\
|
||||||
|
<meta>\
|
||||||
|
<meta>\
|
||||||
|
</head>\
|
||||||
|
<body></body>\
|
||||||
|
</html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue