automatically remove "Refresh" and "Location" META tags

This commit is contained in:
Sunshine 2020-06-25 23:53:20 -04:00
parent c3ca2ad1d5
commit 44cac65a83
No known key found for this signature in database
GPG key ID: B80CA68703CD8AB1
3 changed files with 99 additions and 13 deletions

View file

@ -0,0 +1,19 @@
# 4. Reload and location `meta` nodes
Date: 2020-06-25
## Status
Accepted
## Context
HTML documents may contain `meta` tags capable of automatically refreshing the page or redirecting to another location.
## Decision
Since the resulting document is saved to disk and generally not intended to be served over the network, it only makes sense to remove `meta` nodes that have `http-equiv` attribute equal to "Refresh" or "Location" in order to prevent them from forcing page to reload or redirect to another location.
## Consequences
Monolith will ensure that saved documents do not contain `meta` tags capable of changing location or reloading page.

View file

@ -170,6 +170,25 @@ pub fn walk_and_embed_assets(
let attrs_mut = &mut attrs.borrow_mut(); let attrs_mut = &mut attrs.borrow_mut();
match name.local.as_ref() { match name.local.as_ref() {
"meta" => {
// Determine type
let mut is_unwanted_meta: bool = false;
for attr in attrs_mut.iter_mut() {
let attr_name: &str = &attr.name.local;
if attr_name.eq_ignore_ascii_case("http-equiv") {
let value: String = attr.value.to_string();
is_unwanted_meta = value.eq_ignore_ascii_case("refresh")
|| value.eq_ignore_ascii_case("location");
}
}
if is_unwanted_meta {
// Strip this node off all its attributes
while attrs_mut.len() > 0 {
attrs_mut.remove(0);
}
}
}
"link" => { "link" => {
// Remove integrity attributes, keep value of the last one // Remove integrity attributes, keep value of the last one
let mut integrity: String = str!(); let mut integrity: String = str!();

View file

@ -381,7 +381,7 @@ mod passing {
} }
#[test] #[test]
fn with_no_integrity() { fn discards_integrity() {
let html = "<title>No integrity</title>\ let html = "<title>No integrity</title>\
<link integrity=\"sha384-...\" rel=\"something\"/>\ <link integrity=\"sha384-...\" rel=\"something\"/>\
<script integrity=\"sha384-...\" src=\"some.js\"></script>"; <script integrity=\"sha384-...\" src=\"some.js\"></script>";
@ -420,4 +420,52 @@ mod passing {
</html>" </html>"
); );
} }
#[test]
fn removes_unwanted_meta_tags() {
let html = "<html>\
<head>\
<meta http-equiv=\"Refresh\" value=\"20\"/>\
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
</head>\
<body></body>\
</html>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let client = Client::new();
let opt_no_css: bool = true;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
let opt_silent = true;
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<meta>\
<meta>\
</head>\
<body></body>\
</html>"
);
}
} }