improve SVG media type detection
This commit is contained in:
parent
f27d5fa23e
commit
be25784297
|
@ -473,7 +473,7 @@ pub fn walk_and_embed_assets(
|
||||||
);
|
);
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
let iframe_data_url = data_to_data_url("text/html", &buf);
|
let iframe_data_url = data_to_data_url("text/html", &buf, "");
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(iframe_data_url.as_str());
|
attr.value.push_slice(iframe_data_url.as_str());
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@ use crate::utils;
|
||||||
fn passing_encode_string_with_specific_media_type() {
|
fn passing_encode_string_with_specific_media_type() {
|
||||||
let mime = "application/javascript";
|
let mime = "application/javascript";
|
||||||
let data = "var word = 'hello';\nalert(word);\n";
|
let data = "var word = 'hello';\nalert(word);\n";
|
||||||
let data_url = utils::data_to_data_url(mime, data.as_bytes());
|
let data_url = utils::data_to_data_url(mime, data.as_bytes(), "");
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&data_url,
|
&data_url,
|
||||||
|
|
|
@ -8,40 +8,130 @@ use crate::utils;
|
||||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn passing_image_media_types() {
|
fn passing_image_gif87() {
|
||||||
assert_eq!(utils::detect_media_type(b"GIF87a"), "image/gif");
|
assert_eq!(utils::detect_media_type(b"GIF87a", ""), "image/gif");
|
||||||
assert_eq!(utils::detect_media_type(b"GIF89a"), "image/gif");
|
}
|
||||||
assert_eq!(utils::detect_media_type(b"\xFF\xD8\xFF"), "image/jpeg");
|
|
||||||
|
#[test]
|
||||||
|
fn passing_image_gif89() {
|
||||||
|
assert_eq!(utils::detect_media_type(b"GIF89a", ""), "image/gif");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_image_jpeg() {
|
||||||
|
assert_eq!(utils::detect_media_type(b"\xFF\xD8\xFF", ""), "image/jpeg");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_image_png() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
utils::detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A"),
|
utils::detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", ""),
|
||||||
"image/png"
|
"image/png"
|
||||||
);
|
);
|
||||||
assert_eq!(utils::detect_media_type(b"<?xml "), "image/svg+xml");
|
}
|
||||||
assert_eq!(utils::detect_media_type(b"<svg "), "image/svg+xml");
|
|
||||||
assert_eq!(utils::detect_media_type(b"RIFF....WEBPVP8 "), "image/webp");
|
#[test]
|
||||||
|
fn passing_image_svg() {
|
||||||
|
assert_eq!(utils::detect_media_type(b"<svg ", ""), "image/svg+xml");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_image_webp() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
utils::detect_media_type(b"\x00\x00\x01\x00"),
|
utils::detect_media_type(b"RIFF....WEBPVP8 ", ""),
|
||||||
|
"image/webp"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_image_icon() {
|
||||||
|
assert_eq!(
|
||||||
|
utils::detect_media_type(b"\x00\x00\x01\x00", ""),
|
||||||
"image/x-icon"
|
"image/x-icon"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn passing_audio_media_types() {
|
fn passing_image_svg_filename() {
|
||||||
assert_eq!(utils::detect_media_type(b"ID3"), "audio/mpeg");
|
assert_eq!(
|
||||||
assert_eq!(utils::detect_media_type(b"\xFF\x0E"), "audio/mpeg");
|
utils::detect_media_type(b"<?xml ", "local-file.svg"),
|
||||||
assert_eq!(utils::detect_media_type(b"\xFF\x0F"), "audio/mpeg");
|
"image/svg+xml"
|
||||||
assert_eq!(utils::detect_media_type(b"OggS"), "audio/ogg");
|
);
|
||||||
assert_eq!(utils::detect_media_type(b"RIFF....WAVEfmt "), "audio/wav");
|
|
||||||
assert_eq!(utils::detect_media_type(b"fLaC"), "audio/x-flac");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn passing_video_media_types() {
|
fn passing_image_svg_url_uppercase() {
|
||||||
assert_eq!(utils::detect_media_type(b"RIFF....AVI LIST"), "video/avi");
|
assert_eq!(
|
||||||
assert_eq!(utils::detect_media_type(b"....ftyp"), "video/mp4");
|
utils::detect_media_type(b"", "https://some-site.com/images/local-file.SVG"),
|
||||||
assert_eq!(utils::detect_media_type(b"\x00\x00\x01\x0B"), "video/mpeg");
|
"image/svg+xml"
|
||||||
assert_eq!(utils::detect_media_type(b"....moov"), "video/quicktime");
|
);
|
||||||
assert_eq!(utils::detect_media_type(b"\x1A\x45\xDF\xA3"), "video/webm");
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_audio_mpeg() {
|
||||||
|
assert_eq!(utils::detect_media_type(b"ID3", ""), "audio/mpeg");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_audio_mpeg_2() {
|
||||||
|
assert_eq!(utils::detect_media_type(b"\xFF\x0E", ""), "audio/mpeg");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_audio_mpeg_3() {
|
||||||
|
assert_eq!(utils::detect_media_type(b"\xFF\x0F", ""), "audio/mpeg");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_audio_ogg() {
|
||||||
|
assert_eq!(utils::detect_media_type(b"OggS", ""), "audio/ogg");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_audio_wav() {
|
||||||
|
assert_eq!(
|
||||||
|
utils::detect_media_type(b"RIFF....WAVEfmt ", ""),
|
||||||
|
"audio/wav"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_audio_flac() {
|
||||||
|
assert_eq!(utils::detect_media_type(b"fLaC", ""), "audio/x-flac");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_video_avi() {
|
||||||
|
assert_eq!(
|
||||||
|
utils::detect_media_type(b"RIFF....AVI LIST", ""),
|
||||||
|
"video/avi"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_video_mp4() {
|
||||||
|
assert_eq!(utils::detect_media_type(b"....ftyp", ""), "video/mp4");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_video_mpeg() {
|
||||||
|
assert_eq!(
|
||||||
|
utils::detect_media_type(b"\x00\x00\x01\x0B", ""),
|
||||||
|
"video/mpeg"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_video_quicktime() {
|
||||||
|
assert_eq!(utils::detect_media_type(b"....moov", ""), "video/quicktime");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_video_webm() {
|
||||||
|
assert_eq!(
|
||||||
|
utils::detect_media_type(b"\x1A\x45\xDF\xA3", ""),
|
||||||
|
"video/webm"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||||
|
@ -53,5 +143,5 @@ fn passing_video_media_types() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn failing_unknown_media_type() {
|
fn failing_unknown_media_type() {
|
||||||
assert_eq!(utils::detect_media_type(b"abcdef0123456789"), "");
|
assert_eq!(utils::detect_media_type(b"abcdef0123456789", ""), "");
|
||||||
}
|
}
|
||||||
|
|
|
@ -111,6 +111,21 @@ fn passing_from_data_url_to_https() -> Result<(), ParseError> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn passing_from_data_url_to_data_url() -> Result<(), ParseError> {
|
||||||
|
let resolved_url = utils::resolve_url(
|
||||||
|
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||||
|
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
|
||||||
|
)?;
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
resolved_url.as_str(),
|
||||||
|
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K"
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn passing_from_file_url_to_relative_path() -> Result<(), ParseError> {
|
fn passing_from_file_url_to_relative_path() -> Result<(), ParseError> {
|
||||||
let resolved_url = utils::resolve_url(
|
let resolved_url = utils::resolve_url(
|
||||||
|
|
48
src/utils.rs
48
src/utils.rs
|
@ -42,13 +42,12 @@ lazy_static! {
|
||||||
static ref REGEX_CSS_URL: Regex = Regex::new(CSS_URL_REGEX_STR).unwrap();
|
static ref REGEX_CSS_URL: Regex = Regex::new(CSS_URL_REGEX_STR).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
const MAGIC: [[&[u8]; 2]; 19] = [
|
const MAGIC: [[&[u8]; 2]; 18] = [
|
||||||
// Image
|
// Image
|
||||||
[b"GIF87a", b"image/gif"],
|
[b"GIF87a", b"image/gif"],
|
||||||
[b"GIF89a", b"image/gif"],
|
[b"GIF89a", b"image/gif"],
|
||||||
[b"\xFF\xD8\xFF", b"image/jpeg"],
|
[b"\xFF\xD8\xFF", b"image/jpeg"],
|
||||||
[b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],
|
[b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],
|
||||||
[b"<?xml ", b"image/svg+xml"],
|
|
||||||
[b"<svg ", b"image/svg+xml"],
|
[b"<svg ", b"image/svg+xml"],
|
||||||
[b"RIFF....WEBPVP8 ", b"image/webp"],
|
[b"RIFF....WEBPVP8 ", b"image/webp"],
|
||||||
[b"\x00\x00\x01\x00", b"image/x-icon"],
|
[b"\x00\x00\x01\x00", b"image/x-icon"],
|
||||||
|
@ -67,21 +66,26 @@ const MAGIC: [[&[u8]; 2]; 19] = [
|
||||||
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
||||||
];
|
];
|
||||||
|
|
||||||
pub fn data_to_data_url(mime: &str, data: &[u8]) -> String {
|
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String {
|
||||||
let media_type = if mime.is_empty() {
|
let media_type = if media_type.is_empty() {
|
||||||
detect_media_type(data)
|
detect_media_type(data, &url)
|
||||||
} else {
|
} else {
|
||||||
mime.to_string()
|
media_type.to_string()
|
||||||
};
|
};
|
||||||
format!("data:{};base64,{}", media_type, base64::encode(data))
|
format!("data:{};base64,{}", media_type, base64::encode(data))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn detect_media_type(data: &[u8]) -> String {
|
pub fn detect_media_type(data: &[u8], url: &str) -> String {
|
||||||
for item in MAGIC.iter() {
|
for item in MAGIC.iter() {
|
||||||
if data.starts_with(item[0]) {
|
if data.starts_with(item[0]) {
|
||||||
return String::from_utf8(item[1].to_vec()).unwrap();
|
return String::from_utf8(item[1].to_vec()).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if url.to_lowercase().ends_with(".svg") {
|
||||||
|
return str!("image/svg+xml");
|
||||||
|
}
|
||||||
|
|
||||||
str!()
|
str!()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -179,7 +183,7 @@ pub fn resolve_css_imports(
|
||||||
&parent_url,
|
&parent_url,
|
||||||
&embedded_url,
|
&embedded_url,
|
||||||
true, // Format as data URL
|
true, // Format as data URL
|
||||||
"", // Unknown MIME type
|
"", // Unknown media type
|
||||||
opt_silent,
|
opt_silent,
|
||||||
)
|
)
|
||||||
.map(|(a, _)| a)
|
.map(|(a, _)| a)
|
||||||
|
@ -206,7 +210,7 @@ pub fn resolve_css_imports(
|
||||||
}
|
}
|
||||||
|
|
||||||
if as_data_url {
|
if as_data_url {
|
||||||
data_to_data_url("text/css", resolved_css.as_bytes())
|
data_to_data_url("text/css", resolved_css.as_bytes(), "")
|
||||||
} else {
|
} else {
|
||||||
resolved_css
|
resolved_css
|
||||||
}
|
}
|
||||||
|
@ -214,8 +218,10 @@ pub fn resolve_css_imports(
|
||||||
|
|
||||||
pub fn clean_url<T: AsRef<str>>(url: T) -> String {
|
pub fn clean_url<T: AsRef<str>>(url: T) -> String {
|
||||||
let mut result = Url::parse(url.as_ref()).unwrap();
|
let mut result = Url::parse(url.as_ref()).unwrap();
|
||||||
|
|
||||||
// Clear fragment
|
// Clear fragment
|
||||||
result.set_fragment(None);
|
result.set_fragment(None);
|
||||||
|
|
||||||
// Get rid of stray question mark
|
// Get rid of stray question mark
|
||||||
if result.query() == Some("") {
|
if result.query() == Some("") {
|
||||||
result.set_query(None);
|
result.set_query(None);
|
||||||
|
@ -238,14 +244,14 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
|
||||||
let data: String = decode_url(raw_data);
|
let data: String = decode_url(raw_data);
|
||||||
|
|
||||||
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
|
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
|
||||||
let mut mime_type: &str = "";
|
let mut media_type: &str = "";
|
||||||
let mut encoding: &str = "";
|
let mut encoding: &str = "";
|
||||||
|
|
||||||
let mut i: i8 = 0;
|
let mut i: i8 = 0;
|
||||||
for item in &meta_data_items {
|
for item in &meta_data_items {
|
||||||
if i == 0 {
|
if i == 0 {
|
||||||
if item.eq_ignore_ascii_case("text/html") {
|
if item.eq_ignore_ascii_case("text/html") {
|
||||||
mime_type = item;
|
media_type = item;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -257,7 +263,7 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
|
||||||
i = i + 1;
|
i = i + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if mime_type.eq_ignore_ascii_case("text/html") {
|
if media_type.eq_ignore_ascii_case("text/html") {
|
||||||
if encoding.eq_ignore_ascii_case("base64") {
|
if encoding.eq_ignore_ascii_case("base64") {
|
||||||
String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
|
String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
|
||||||
} else {
|
} else {
|
||||||
|
@ -291,7 +297,7 @@ pub fn retrieve_asset(
|
||||||
parent_url: &str,
|
parent_url: &str,
|
||||||
url: &str,
|
url: &str,
|
||||||
as_data_url: bool,
|
as_data_url: bool,
|
||||||
mime: &str,
|
media_type: &str,
|
||||||
opt_silent: bool,
|
opt_silent: bool,
|
||||||
) -> Result<(String, String), reqwest::Error> {
|
) -> Result<(String, String), reqwest::Error> {
|
||||||
if url.len() == 0 {
|
if url.len() == 0 {
|
||||||
|
@ -318,7 +324,11 @@ pub fn retrieve_asset(
|
||||||
}
|
}
|
||||||
|
|
||||||
if as_data_url {
|
if as_data_url {
|
||||||
let data_url: String = data_to_data_url(&mime, &fs::read(&fs_file_path).unwrap());
|
let data_url: String = data_to_data_url(
|
||||||
|
&media_type,
|
||||||
|
&fs::read(&fs_file_path).unwrap(),
|
||||||
|
&fs_file_path,
|
||||||
|
);
|
||||||
Ok((data_url, url.to_string()))
|
Ok((data_url, url.to_string()))
|
||||||
} else {
|
} else {
|
||||||
let data: String = fs::read_to_string(&fs_file_path).expect(url);
|
let data: String = fs::read_to_string(&fs_file_path).expect(url);
|
||||||
|
@ -355,17 +365,17 @@ pub fn retrieve_asset(
|
||||||
let mut data: Vec<u8> = vec![];
|
let mut data: Vec<u8> = vec![];
|
||||||
response.copy_to(&mut data)?;
|
response.copy_to(&mut data)?;
|
||||||
|
|
||||||
// Attempt to obtain MIME type by reading the Content-Type header
|
// Attempt to obtain media type by reading the Content-Type header
|
||||||
let media_type = if mime == "" {
|
let media_type = if media_type == "" {
|
||||||
response
|
response
|
||||||
.headers()
|
.headers()
|
||||||
.get(CONTENT_TYPE)
|
.get(CONTENT_TYPE)
|
||||||
.and_then(|header| header.to_str().ok())
|
.and_then(|header| header.to_str().ok())
|
||||||
.unwrap_or(&mime)
|
.unwrap_or(&media_type)
|
||||||
} else {
|
} else {
|
||||||
mime
|
media_type
|
||||||
};
|
};
|
||||||
let data_url = data_to_data_url(&media_type, &data);
|
let data_url = data_to_data_url(&media_type, &data, url);
|
||||||
// Add to cache
|
// Add to cache
|
||||||
cache.insert(new_cache_key, data_url.clone());
|
cache.insert(new_cache_key, data_url.clone());
|
||||||
Ok((data_url, res_url))
|
Ok((data_url, res_url))
|
||||||
|
|
Loading…
Reference in New Issue