improve parsing of data URLs
This commit is contained in:
parent
67d4b7dafc
commit
c097733ae7
6 changed files with 62 additions and 11 deletions
|
@ -2,7 +2,7 @@ use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
|
|||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::utils::{data_to_data_url, decode_url, get_url_fragment, resolve_url, retrieve_asset};
|
||||
use crate::utils::{data_to_data_url, get_url_fragment, resolve_url, retrieve_asset};
|
||||
|
||||
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
|
||||
"background",
|
||||
|
@ -142,12 +142,11 @@ pub fn process_css<'a>(
|
|||
|
||||
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||
let url_fragment = get_url_fragment(full_url.clone());
|
||||
let full_url_decoded = decode_url(full_url);
|
||||
let (css, final_url) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&parent_url,
|
||||
&full_url_decoded,
|
||||
&full_url,
|
||||
false,
|
||||
"",
|
||||
opt_silent,
|
||||
|
@ -261,12 +260,11 @@ pub fn process_css<'a>(
|
|||
if is_import {
|
||||
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||
let url_fragment = get_url_fragment(full_url.clone());
|
||||
let full_url_decoded = decode_url(full_url);
|
||||
let (css, final_url) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&parent_url,
|
||||
&full_url_decoded,
|
||||
&full_url,
|
||||
false,
|
||||
"",
|
||||
opt_silent,
|
||||
|
|
|
@ -163,9 +163,9 @@ fn passing_import_string() {
|
|||
"\
|
||||
@charset 'UTF-8';\n\
|
||||
\n\
|
||||
@import 'data:text/css;base64,ZGF0YTp0ZXh0L2NzcyxodG1se2JhY2tncm91bmQtY29sb3I6IzAwMH0=';\n\
|
||||
@import 'data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9';\n\
|
||||
\n\
|
||||
@import url('data:text/css;base64,ZGF0YTp0ZXh0L2NzcyxodG1se2NvbG9yOiNmZmZ9')\n\
|
||||
@import url('data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==')\n\
|
||||
"
|
||||
);
|
||||
}
|
||||
|
|
|
@ -45,6 +45,14 @@ fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_parse_text_css_url_encoded() {
|
||||
assert_eq!(
|
||||
utils::data_url_to_text("data:text/css,div{background-color:%23000}"),
|
||||
"div{background-color:#000}"
|
||||
);
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
|
|
|
@ -24,3 +24,13 @@ fn passing_decode_file_url() {
|
|||
"file:///tmp/space here/test#1.html"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_plus_sign() {
|
||||
assert_eq!(
|
||||
utils::decode_url(str!(
|
||||
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
|
||||
)),
|
||||
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
|
||||
);
|
||||
}
|
||||
|
|
|
@ -21,3 +21,18 @@ fn passing_remove_protocl_and_fragment() {
|
|||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_decodes_urls() {
|
||||
if cfg!(windows) {
|
||||
assert_eq!(
|
||||
utils::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
|
||||
"C:\\Documents and Settings\\some-file.html"
|
||||
);
|
||||
} else {
|
||||
assert_eq!(
|
||||
utils::file_url_to_fs_path("file:///home/user/My%20Documents"),
|
||||
"/home/user/My Documents"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
28
src/utils.rs
28
src/utils.rs
|
@ -30,6 +30,14 @@ const MAGIC: [[&[u8]; 2]; 18] = [
|
|||
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
||||
];
|
||||
|
||||
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
|
||||
"image/svg+xml",
|
||||
"text/css",
|
||||
"text/html",
|
||||
"text/javascript",
|
||||
"text/plain",
|
||||
];
|
||||
|
||||
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String {
|
||||
let media_type: String = if media_type.is_empty() {
|
||||
detect_media_type(data, &url)
|
||||
|
@ -88,6 +96,10 @@ pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
|
|||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn is_plaintext_media_type(media_type: &str) -> bool {
|
||||
PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
|
||||
}
|
||||
|
||||
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
|
||||
let result = if is_http_url(to.as_ref()) {
|
||||
to.as_ref().to_string()
|
||||
|
@ -139,10 +151,11 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
|
|||
let mut media_type: &str = "";
|
||||
let mut encoding: &str = "";
|
||||
|
||||
// Detect media type and encoding
|
||||
let mut i: i8 = 0;
|
||||
for item in &meta_data_items {
|
||||
if i == 0 {
|
||||
if item.eq_ignore_ascii_case("text/html") {
|
||||
if is_plaintext_media_type(item) {
|
||||
media_type = item;
|
||||
continue;
|
||||
}
|
||||
|
@ -155,7 +168,7 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
|
|||
i = i + 1;
|
||||
}
|
||||
|
||||
if media_type.eq_ignore_ascii_case("text/html") {
|
||||
if is_plaintext_media_type(media_type) {
|
||||
if encoding.eq_ignore_ascii_case("base64") {
|
||||
String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
|
||||
} else {
|
||||
|
@ -167,6 +180,8 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
|
|||
}
|
||||
|
||||
pub fn decode_url(input: String) -> String {
|
||||
let input: String = input.replace("+", "%2B");
|
||||
|
||||
form_urlencoded::parse(input.as_bytes())
|
||||
.map(|(key, val)| {
|
||||
[
|
||||
|
@ -200,7 +215,8 @@ pub fn file_url_to_fs_path(url: &str) -> String {
|
|||
fs_file_path = fs_file_path.replace("/", "\\");
|
||||
}
|
||||
|
||||
fs_file_path
|
||||
// File paths should not be %-encoded
|
||||
decode_url(fs_file_path)
|
||||
}
|
||||
|
||||
pub fn retrieve_asset(
|
||||
|
@ -219,7 +235,11 @@ pub fn retrieve_asset(
|
|||
let cache_key = clean_url(&url);
|
||||
|
||||
if is_data_url(&url) {
|
||||
Ok((url.to_string(), url.to_string()))
|
||||
if as_data_url {
|
||||
Ok((url.to_string(), url.to_string()))
|
||||
} else {
|
||||
Ok((data_url_to_text(url), url.to_string()))
|
||||
}
|
||||
} else if is_file_url(&url) {
|
||||
// Check if parent_url is also file:///
|
||||
// (if not, then we don't embed the asset)
|
||||
|
|
Loading…
Reference in a new issue