improve parsing of data URLs
This commit is contained in:
parent
67d4b7dafc
commit
c097733ae7
@ -2,7 +2,7 @@ use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
|
|||||||
use reqwest::blocking::Client;
|
use reqwest::blocking::Client;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use crate::utils::{data_to_data_url, decode_url, get_url_fragment, resolve_url, retrieve_asset};
|
use crate::utils::{data_to_data_url, get_url_fragment, resolve_url, retrieve_asset};
|
||||||
|
|
||||||
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
|
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
|
||||||
"background",
|
"background",
|
||||||
@ -142,12 +142,11 @@ pub fn process_css<'a>(
|
|||||||
|
|
||||||
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||||
let url_fragment = get_url_fragment(full_url.clone());
|
let url_fragment = get_url_fragment(full_url.clone());
|
||||||
let full_url_decoded = decode_url(full_url);
|
|
||||||
let (css, final_url) = retrieve_asset(
|
let (css, final_url) = retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
&parent_url,
|
&parent_url,
|
||||||
&full_url_decoded,
|
&full_url,
|
||||||
false,
|
false,
|
||||||
"",
|
"",
|
||||||
opt_silent,
|
opt_silent,
|
||||||
@ -261,12 +260,11 @@ pub fn process_css<'a>(
|
|||||||
if is_import {
|
if is_import {
|
||||||
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||||
let url_fragment = get_url_fragment(full_url.clone());
|
let url_fragment = get_url_fragment(full_url.clone());
|
||||||
let full_url_decoded = decode_url(full_url);
|
|
||||||
let (css, final_url) = retrieve_asset(
|
let (css, final_url) = retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
&parent_url,
|
&parent_url,
|
||||||
&full_url_decoded,
|
&full_url,
|
||||||
false,
|
false,
|
||||||
"",
|
"",
|
||||||
opt_silent,
|
opt_silent,
|
||||||
|
@ -163,9 +163,9 @@ fn passing_import_string() {
|
|||||||
"\
|
"\
|
||||||
@charset 'UTF-8';\n\
|
@charset 'UTF-8';\n\
|
||||||
\n\
|
\n\
|
||||||
@import 'data:text/css;base64,ZGF0YTp0ZXh0L2NzcyxodG1se2JhY2tncm91bmQtY29sb3I6IzAwMH0=';\n\
|
@import 'data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9';\n\
|
||||||
\n\
|
\n\
|
||||||
@import url('data:text/css;base64,ZGF0YTp0ZXh0L2NzcyxodG1se2NvbG9yOiNmZmZ9')\n\
|
@import url('data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==')\n\
|
||||||
"
|
"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -45,6 +45,14 @@ fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_parse_text_css_url_encoded() {
|
||||||
|
assert_eq!(
|
||||||
|
utils::data_url_to_text("data:text/css,div{background-color:%23000}"),
|
||||||
|
"div{background-color:#000}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||||
|
@ -24,3 +24,13 @@ fn passing_decode_file_url() {
|
|||||||
"file:///tmp/space here/test#1.html"
|
"file:///tmp/space here/test#1.html"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_plus_sign() {
|
||||||
|
assert_eq!(
|
||||||
|
utils::decode_url(str!(
|
||||||
|
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
|
||||||
|
)),
|
||||||
|
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
@ -21,3 +21,18 @@ fn passing_remove_protocl_and_fragment() {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_decodes_urls() {
|
||||||
|
if cfg!(windows) {
|
||||||
|
assert_eq!(
|
||||||
|
utils::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
|
||||||
|
"C:\\Documents and Settings\\some-file.html"
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
assert_eq!(
|
||||||
|
utils::file_url_to_fs_path("file:///home/user/My%20Documents"),
|
||||||
|
"/home/user/My Documents"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
28
src/utils.rs
28
src/utils.rs
@ -30,6 +30,14 @@ const MAGIC: [[&[u8]; 2]; 18] = [
|
|||||||
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
||||||
];
|
];
|
||||||
|
|
||||||
|
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
|
||||||
|
"image/svg+xml",
|
||||||
|
"text/css",
|
||||||
|
"text/html",
|
||||||
|
"text/javascript",
|
||||||
|
"text/plain",
|
||||||
|
];
|
||||||
|
|
||||||
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String {
|
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String {
|
||||||
let media_type: String = if media_type.is_empty() {
|
let media_type: String = if media_type.is_empty() {
|
||||||
detect_media_type(data, &url)
|
detect_media_type(data, &url)
|
||||||
@ -88,6 +96,10 @@ pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
|
|||||||
.unwrap_or(false)
|
.unwrap_or(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_plaintext_media_type(media_type: &str) -> bool {
|
||||||
|
PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
|
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
|
||||||
let result = if is_http_url(to.as_ref()) {
|
let result = if is_http_url(to.as_ref()) {
|
||||||
to.as_ref().to_string()
|
to.as_ref().to_string()
|
||||||
@ -139,10 +151,11 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
|
|||||||
let mut media_type: &str = "";
|
let mut media_type: &str = "";
|
||||||
let mut encoding: &str = "";
|
let mut encoding: &str = "";
|
||||||
|
|
||||||
|
// Detect media type and encoding
|
||||||
let mut i: i8 = 0;
|
let mut i: i8 = 0;
|
||||||
for item in &meta_data_items {
|
for item in &meta_data_items {
|
||||||
if i == 0 {
|
if i == 0 {
|
||||||
if item.eq_ignore_ascii_case("text/html") {
|
if is_plaintext_media_type(item) {
|
||||||
media_type = item;
|
media_type = item;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -155,7 +168,7 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
|
|||||||
i = i + 1;
|
i = i + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if media_type.eq_ignore_ascii_case("text/html") {
|
if is_plaintext_media_type(media_type) {
|
||||||
if encoding.eq_ignore_ascii_case("base64") {
|
if encoding.eq_ignore_ascii_case("base64") {
|
||||||
String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
|
String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
|
||||||
} else {
|
} else {
|
||||||
@ -167,6 +180,8 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn decode_url(input: String) -> String {
|
pub fn decode_url(input: String) -> String {
|
||||||
|
let input: String = input.replace("+", "%2B");
|
||||||
|
|
||||||
form_urlencoded::parse(input.as_bytes())
|
form_urlencoded::parse(input.as_bytes())
|
||||||
.map(|(key, val)| {
|
.map(|(key, val)| {
|
||||||
[
|
[
|
||||||
@ -200,7 +215,8 @@ pub fn file_url_to_fs_path(url: &str) -> String {
|
|||||||
fs_file_path = fs_file_path.replace("/", "\\");
|
fs_file_path = fs_file_path.replace("/", "\\");
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_file_path
|
// File paths should not be %-encoded
|
||||||
|
decode_url(fs_file_path)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn retrieve_asset(
|
pub fn retrieve_asset(
|
||||||
@ -219,7 +235,11 @@ pub fn retrieve_asset(
|
|||||||
let cache_key = clean_url(&url);
|
let cache_key = clean_url(&url);
|
||||||
|
|
||||||
if is_data_url(&url) {
|
if is_data_url(&url) {
|
||||||
Ok((url.to_string(), url.to_string()))
|
if as_data_url {
|
||||||
|
Ok((url.to_string(), url.to_string()))
|
||||||
|
} else {
|
||||||
|
Ok((data_url_to_text(url), url.to_string()))
|
||||||
|
}
|
||||||
} else if is_file_url(&url) {
|
} else if is_file_url(&url) {
|
||||||
// Check if parent_url is also file:///
|
// Check if parent_url is also file:///
|
||||||
// (if not, then we don't embed the asset)
|
// (if not, then we don't embed the asset)
|
||||||
|
Loading…
Reference in New Issue
Block a user