use proper charset detection for linked assets
This commit is contained in:
parent
125aeeec3b
commit
5effa38392
2 changed files with 21 additions and 25 deletions
src
34
src/html.rs
34
src/html.rs
|
@ -606,7 +606,7 @@ pub fn retrieve_and_embed_asset(
|
||||||
options,
|
options,
|
||||||
depth + 1,
|
depth + 1,
|
||||||
) {
|
) {
|
||||||
Ok((data, final_url, mut media_type, _charset)) => {
|
Ok((data, final_url, mut media_type, charset)) => {
|
||||||
let node_name: &str = get_node_name(&node).unwrap();
|
let node_name: &str = get_node_name(&node).unwrap();
|
||||||
|
|
||||||
// Check integrity if it's a LINK or SCRIPT element
|
// Check integrity if it's a LINK or SCRIPT element
|
||||||
|
@ -624,23 +624,25 @@ pub fn retrieve_and_embed_asset(
|
||||||
}
|
}
|
||||||
|
|
||||||
if ok_to_include {
|
if ok_to_include {
|
||||||
|
let s: String;
|
||||||
|
if let Some(encoding) = Encoding::for_label(charset.as_bytes()) {
|
||||||
|
let (string, _, _) = encoding.decode(&data);
|
||||||
|
s = string.to_string();
|
||||||
|
} else {
|
||||||
|
s = String::from_utf8_lossy(&data).to_string();
|
||||||
|
}
|
||||||
|
|
||||||
if node_name == "link" && determine_link_node_type(node) == "stylesheet" {
|
if node_name == "link" && determine_link_node_type(node) == "stylesheet" {
|
||||||
// Stylesheet LINK elements require special treatment
|
// Stylesheet LINK elements require special treatment
|
||||||
let css: String = embed_css(
|
let css: String = embed_css(cache, client, &final_url, &s, options, depth + 1);
|
||||||
cache,
|
|
||||||
client,
|
|
||||||
&final_url,
|
|
||||||
&String::from_utf8_lossy(&data),
|
|
||||||
options,
|
|
||||||
depth + 1,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Create and embed data URL
|
// Create and embed data URL
|
||||||
let css_data_url = create_data_url("text/css", css.as_bytes(), &final_url);
|
// TODO: use charset
|
||||||
|
let css_data_url = create_data_url(&media_type, css.as_bytes(), &final_url);
|
||||||
set_node_attr(&node, attr_name, Some(css_data_url.to_string()));
|
set_node_attr(&node, attr_name, Some(css_data_url.to_string()));
|
||||||
} else if node_name == "frame" || node_name == "iframe" {
|
} else if node_name == "frame" || node_name == "iframe" {
|
||||||
// (I)FRAMEs are also quite different from conventional resources
|
// (I)FRAMEs are also quite different from conventional resources
|
||||||
let frame_dom = html_to_dom(&data, "utf-8".to_string());
|
let frame_dom = html_to_dom(&data, charset);
|
||||||
walk_and_embed_assets(
|
walk_and_embed_assets(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
|
@ -679,6 +681,7 @@ pub fn retrieve_and_embed_asset(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create and embed data URL
|
// Create and embed data URL
|
||||||
|
// TODO: use charset
|
||||||
let mut data_url = create_data_url(&media_type, &data, &final_url);
|
let mut data_url = create_data_url(&media_type, &data, &final_url);
|
||||||
data_url.set_fragment(resolved_url.fragment());
|
data_url.set_fragment(resolved_url.fragment());
|
||||||
set_node_attr(node, attr_name, Some(data_url.to_string()));
|
set_node_attr(node, attr_name, Some(data_url.to_string()));
|
||||||
|
@ -725,14 +728,7 @@ pub fn walk_and_embed_assets(
|
||||||
|| meta_attr_http_equiv_value.eq_ignore_ascii_case("location")
|
|| meta_attr_http_equiv_value.eq_ignore_ascii_case("location")
|
||||||
{
|
{
|
||||||
// Remove http-equiv attributes from META nodes if they're able to control the page
|
// Remove http-equiv attributes from META nodes if they're able to control the page
|
||||||
set_node_attr(
|
set_node_attr(&node, "http-equiv", None);
|
||||||
&node,
|
|
||||||
"http-equiv",
|
|
||||||
Some(format!(
|
|
||||||
"disabled by monolith ({})",
|
|
||||||
meta_attr_http_equiv_value
|
|
||||||
)),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -290,7 +290,7 @@ mod passing {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn keeps_integrity_for_linked_assets() {
|
fn keeps_integrity_for_unfamiliar_links() {
|
||||||
let html = "<title>Has integrity</title>\
|
let html = "<title>Has integrity</title>\
|
||||||
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\" />";
|
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\" />";
|
||||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||||
|
@ -322,7 +322,7 @@ mod passing {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn discards_integrity_for_linked_assets_nojs_nocss() {
|
fn discards_integrity_for_known_links_nojs_nocss() {
|
||||||
let html = "\
|
let html = "\
|
||||||
<title>No integrity</title>\
|
<title>No integrity</title>\
|
||||||
<link integrity=\"\" rel=\"stylesheet\" href=\"data:;\"/>\
|
<link integrity=\"\" rel=\"stylesheet\" href=\"data:;\"/>\
|
||||||
|
@ -403,8 +403,8 @@ mod passing {
|
||||||
let html = "\
|
let html = "\
|
||||||
<html>\
|
<html>\
|
||||||
<head>\
|
<head>\
|
||||||
<meta http-equiv=\"Refresh\" value=\"20\"/>\
|
<meta http-equiv=\"Refresh\" content=\"2\"/>\
|
||||||
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
|
<meta http-equiv=\"Location\" content=\"https://freebsd.org\"/>\
|
||||||
</head>\
|
</head>\
|
||||||
<body>\
|
<body>\
|
||||||
</body>\
|
</body>\
|
||||||
|
@ -433,8 +433,8 @@ mod passing {
|
||||||
"\
|
"\
|
||||||
<html>\
|
<html>\
|
||||||
<head>\
|
<head>\
|
||||||
<meta http-equiv=\"disabled by monolith (Refresh)\" value=\"20\">\
|
<meta content=\"2\">\
|
||||||
<meta http-equiv=\"disabled by monolith (Location)\" value=\"https://freebsd.org\">\
|
<meta content=\"https://freebsd.org\">\
|
||||||
</head>\
|
</head>\
|
||||||
<body>\
|
<body>\
|
||||||
</body>\
|
</body>\
|
||||||
|
|
Loading…
Add table
Reference in a new issue