use proper charset detection for linked assets
This commit is contained in:
parent
125aeeec3b
commit
5effa38392
2 changed files with 21 additions and 25 deletions
src
34
src/html.rs
34
src/html.rs
|
@ -606,7 +606,7 @@ pub fn retrieve_and_embed_asset(
|
|||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((data, final_url, mut media_type, _charset)) => {
|
||||
Ok((data, final_url, mut media_type, charset)) => {
|
||||
let node_name: &str = get_node_name(&node).unwrap();
|
||||
|
||||
// Check integrity if it's a LINK or SCRIPT element
|
||||
|
@ -624,23 +624,25 @@ pub fn retrieve_and_embed_asset(
|
|||
}
|
||||
|
||||
if ok_to_include {
|
||||
let s: String;
|
||||
if let Some(encoding) = Encoding::for_label(charset.as_bytes()) {
|
||||
let (string, _, _) = encoding.decode(&data);
|
||||
s = string.to_string();
|
||||
} else {
|
||||
s = String::from_utf8_lossy(&data).to_string();
|
||||
}
|
||||
|
||||
if node_name == "link" && determine_link_node_type(node) == "stylesheet" {
|
||||
// Stylesheet LINK elements require special treatment
|
||||
let css: String = embed_css(
|
||||
cache,
|
||||
client,
|
||||
&final_url,
|
||||
&String::from_utf8_lossy(&data),
|
||||
options,
|
||||
depth + 1,
|
||||
);
|
||||
let css: String = embed_css(cache, client, &final_url, &s, options, depth + 1);
|
||||
|
||||
// Create and embed data URL
|
||||
let css_data_url = create_data_url("text/css", css.as_bytes(), &final_url);
|
||||
// TODO: use charset
|
||||
let css_data_url = create_data_url(&media_type, css.as_bytes(), &final_url);
|
||||
set_node_attr(&node, attr_name, Some(css_data_url.to_string()));
|
||||
} else if node_name == "frame" || node_name == "iframe" {
|
||||
// (I)FRAMEs are also quite different from conventional resources
|
||||
let frame_dom = html_to_dom(&data, "utf-8".to_string());
|
||||
let frame_dom = html_to_dom(&data, charset);
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
client,
|
||||
|
@ -679,6 +681,7 @@ pub fn retrieve_and_embed_asset(
|
|||
}
|
||||
|
||||
// Create and embed data URL
|
||||
// TODO: use charset
|
||||
let mut data_url = create_data_url(&media_type, &data, &final_url);
|
||||
data_url.set_fragment(resolved_url.fragment());
|
||||
set_node_attr(node, attr_name, Some(data_url.to_string()));
|
||||
|
@ -725,14 +728,7 @@ pub fn walk_and_embed_assets(
|
|||
|| meta_attr_http_equiv_value.eq_ignore_ascii_case("location")
|
||||
{
|
||||
// Remove http-equiv attributes from META nodes if they're able to control the page
|
||||
set_node_attr(
|
||||
&node,
|
||||
"http-equiv",
|
||||
Some(format!(
|
||||
"disabled by monolith ({})",
|
||||
meta_attr_http_equiv_value
|
||||
)),
|
||||
);
|
||||
set_node_attr(&node, "http-equiv", None);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -290,7 +290,7 @@ mod passing {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn keeps_integrity_for_linked_assets() {
|
||||
fn keeps_integrity_for_unfamiliar_links() {
|
||||
let html = "<title>Has integrity</title>\
|
||||
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\" />";
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||
|
@ -322,7 +322,7 @@ mod passing {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn discards_integrity_for_linked_assets_nojs_nocss() {
|
||||
fn discards_integrity_for_known_links_nojs_nocss() {
|
||||
let html = "\
|
||||
<title>No integrity</title>\
|
||||
<link integrity=\"\" rel=\"stylesheet\" href=\"data:;\"/>\
|
||||
|
@ -403,8 +403,8 @@ mod passing {
|
|||
let html = "\
|
||||
<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Refresh\" value=\"20\"/>\
|
||||
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
|
||||
<meta http-equiv=\"Refresh\" content=\"2\"/>\
|
||||
<meta http-equiv=\"Location\" content=\"https://freebsd.org\"/>\
|
||||
</head>\
|
||||
<body>\
|
||||
</body>\
|
||||
|
@ -433,8 +433,8 @@ mod passing {
|
|||
"\
|
||||
<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"disabled by monolith (Refresh)\" value=\"20\">\
|
||||
<meta http-equiv=\"disabled by monolith (Location)\" value=\"https://freebsd.org\">\
|
||||
<meta content=\"2\">\
|
||||
<meta content=\"https://freebsd.org\">\
|
||||
</head>\
|
||||
<body>\
|
||||
</body>\
|
||||
|
|
Loading…
Add table
Reference in a new issue