improve parsing of data URLs

This commit is contained in:
Sunshine 2020-04-09 20:27:07 -04:00
parent 67d4b7dafc
commit c097733ae7
No known key found for this signature in database
GPG key ID: B80CA68703CD8AB1
6 changed files with 62 additions and 11 deletions

View file

@ -2,7 +2,7 @@ use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::utils::{data_to_data_url, decode_url, get_url_fragment, resolve_url, retrieve_asset};
use crate::utils::{data_to_data_url, get_url_fragment, resolve_url, retrieve_asset};
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
"background",
@ -142,12 +142,11 @@ pub fn process_css<'a>(
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
let full_url_decoded = decode_url(full_url);
let (css, final_url) = retrieve_asset(
cache,
client,
&parent_url,
&full_url_decoded,
&full_url,
false,
"",
opt_silent,
@ -261,12 +260,11 @@ pub fn process_css<'a>(
if is_import {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
let full_url_decoded = decode_url(full_url);
let (css, final_url) = retrieve_asset(
cache,
client,
&parent_url,
&full_url_decoded,
&full_url,
false,
"",
opt_silent,

View file

@ -163,9 +163,9 @@ fn passing_import_string() {
"\
@charset 'UTF-8';\n\
\n\
@import 'data:text/css;base64,ZGF0YTp0ZXh0L2NzcyxodG1se2JhY2tncm91bmQtY29sb3I6IzAwMH0=';\n\
@import 'data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9';\n\
\n\
@import url('data:text/css;base64,ZGF0YTp0ZXh0L2NzcyxodG1se2NvbG9yOiNmZmZ9')\n\
@import url('data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==')\n\
"
);
}

View file

@ -45,6 +45,14 @@ fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() {
);
}
#[test]
fn passing_parse_text_css_url_encoded() {
assert_eq!(
utils::data_url_to_text("data:text/css,div{background-color:%23000}"),
"div{background-color:#000}"
);
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗

View file

@ -24,3 +24,13 @@ fn passing_decode_file_url() {
"file:///tmp/space here/test#1.html"
);
}
#[test]
fn passing_plus_sign() {
assert_eq!(
utils::decode_url(str!(
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
)),
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
);
}

View file

@ -21,3 +21,18 @@ fn passing_remove_protocl_and_fragment() {
);
}
}
#[test]
fn passing_decodes_urls() {
if cfg!(windows) {
assert_eq!(
utils::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
"C:\\Documents and Settings\\some-file.html"
);
} else {
assert_eq!(
utils::file_url_to_fs_path("file:///home/user/My%20Documents"),
"/home/user/My Documents"
);
}
}

View file

@ -30,6 +30,14 @@ const MAGIC: [[&[u8]; 2]; 18] = [
[b"\x1A\x45\xDF\xA3", b"video/webm"],
];
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
"image/svg+xml",
"text/css",
"text/html",
"text/javascript",
"text/plain",
];
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String {
let media_type: String = if media_type.is_empty() {
detect_media_type(data, &url)
@ -88,6 +96,10 @@ pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
.unwrap_or(false)
}
pub fn is_plaintext_media_type(media_type: &str) -> bool {
PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
}
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
let result = if is_http_url(to.as_ref()) {
to.as_ref().to_string()
@ -139,10 +151,11 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
let mut media_type: &str = "";
let mut encoding: &str = "";
// Detect media type and encoding
let mut i: i8 = 0;
for item in &meta_data_items {
if i == 0 {
if item.eq_ignore_ascii_case("text/html") {
if is_plaintext_media_type(item) {
media_type = item;
continue;
}
@ -155,7 +168,7 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
i = i + 1;
}
if media_type.eq_ignore_ascii_case("text/html") {
if is_plaintext_media_type(media_type) {
if encoding.eq_ignore_ascii_case("base64") {
String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
} else {
@ -167,6 +180,8 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
}
pub fn decode_url(input: String) -> String {
let input: String = input.replace("+", "%2B");
form_urlencoded::parse(input.as_bytes())
.map(|(key, val)| {
[
@ -200,7 +215,8 @@ pub fn file_url_to_fs_path(url: &str) -> String {
fs_file_path = fs_file_path.replace("/", "\\");
}
fs_file_path
// File paths should not be %-encoded
decode_url(fs_file_path)
}
pub fn retrieve_asset(
@ -219,7 +235,11 @@ pub fn retrieve_asset(
let cache_key = clean_url(&url);
if is_data_url(&url) {
Ok((url.to_string(), url.to_string()))
if as_data_url {
Ok((url.to_string(), url.to_string()))
} else {
Ok((data_url_to_text(url), url.to_string()))
}
} else if is_file_url(&url) {
// Check if parent_url is also file:///
// (if not, then we don't embed the asset)