improve data URL media type detection
This commit is contained in:
parent
d2615f51dc
commit
594ad55bd8
4 changed files with 68 additions and 40 deletions
|
@ -122,9 +122,9 @@ fn main() {
|
|||
base_url = final_url;
|
||||
dom = html_to_dom(&data);
|
||||
} else if is_data_url(target_url) {
|
||||
let text: String = data_url_to_text(target_url);
|
||||
if text.len() == 0 {
|
||||
eprintln!("Unsupported data URL input");
|
||||
let (media_type, text): (String, String) = data_url_to_text(target_url);
|
||||
if !media_type.eq_ignore_ascii_case("text/html") {
|
||||
eprintln!("Unsupported data URL media type");
|
||||
process::exit(1);
|
||||
}
|
||||
base_url = str!(target_url);
|
||||
|
|
|
@ -62,7 +62,7 @@ fn passing_bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|||
// STDERR should contain error description
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
"Unsupported data URL input\n"
|
||||
"Unsupported data URL media type\n"
|
||||
);
|
||||
|
||||
// The exit code should be 1
|
||||
|
|
|
@ -9,48 +9,74 @@ use crate::utils;
|
|||
|
||||
#[test]
|
||||
fn passing_parse_text_html_base64() {
|
||||
let (media_type, text) = utils::data_url_to_text("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
|
||||
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(
|
||||
utils::data_url_to_text("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="),
|
||||
text,
|
||||
"Work expands so as to fill the time available for its completion"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_parse_text_html_utf8() {
|
||||
let (media_type, text) = utils::data_url_to_text(
|
||||
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
|
||||
);
|
||||
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(
|
||||
utils::data_url_to_text(
|
||||
"data:text/html;utf8,Work expands so as to fill the time available for its completion"
|
||||
),
|
||||
text,
|
||||
"Work expands so as to fill the time available for its completion"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_parse_text_html_plaintext() {
|
||||
let (media_type, text) = utils::data_url_to_text(
|
||||
"data:text/html,Work expands so as to fill the time available for its completion",
|
||||
);
|
||||
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(
|
||||
utils::data_url_to_text(
|
||||
"data:text/html,Work expands so as to fill the time available for its completion"
|
||||
),
|
||||
text,
|
||||
"Work expands so as to fill the time available for its completion"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() {
|
||||
let (media_type, text) = utils::data_url_to_text(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
|
||||
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(
|
||||
utils::data_url_to_text(
|
||||
" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion "
|
||||
),
|
||||
text,
|
||||
"Work expands so as to fill the time available for its completion"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_parse_text_css_url_encoded() {
|
||||
assert_eq!(
|
||||
utils::data_url_to_text("data:text/css,div{background-color:%23000}"),
|
||||
"div{background-color:#000}"
|
||||
);
|
||||
let (media_type, text) = utils::data_url_to_text("data:text/css,div{background-color:%23000}");
|
||||
|
||||
assert_eq!(media_type, "text/css");
|
||||
assert_eq!(text, "div{background-color:#000}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_parse_no_media_type_base64() {
|
||||
let (media_type, text) = utils::data_url_to_text("data:;base64,dGVzdA==");
|
||||
|
||||
assert_eq!(media_type, "");
|
||||
assert_eq!(text, "test");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_parse_no_media_type_no_encoding() {
|
||||
let (media_type, text) = utils::data_url_to_text("data:;,test%20test");
|
||||
|
||||
assert_eq!(media_type, "");
|
||||
assert_eq!(text, "test test");
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
|
@ -62,5 +88,8 @@ fn passing_parse_text_css_url_encoded() {
|
|||
|
||||
#[test]
|
||||
fn failing_just_word_data() {
|
||||
assert_eq!(utils::data_url_to_text("data"), "");
|
||||
let (media_type, text) = utils::data_url_to_text("data");
|
||||
|
||||
assert_eq!(media_type, "");
|
||||
assert_eq!(text, "");
|
||||
}
|
||||
|
|
41
src/utils.rs
41
src/utils.rs
|
@ -133,50 +133,47 @@ pub fn clean_url<T: AsRef<str>>(url: T) -> String {
|
|||
result.to_string()
|
||||
}
|
||||
|
||||
pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
|
||||
let parsed_url = Url::parse(url.as_ref()).unwrap_or(Url::parse("http://[::1]").unwrap());
|
||||
pub fn data_url_to_text<T: AsRef<str>>(url: T) -> (String, String) {
|
||||
let parsed_url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
|
||||
let path: String = parsed_url.path().to_string();
|
||||
let comma_loc: usize = path.find(',').unwrap_or(path.len());
|
||||
|
||||
if comma_loc == path.len() {
|
||||
return str!();
|
||||
}
|
||||
|
||||
let meta_data: String = path.chars().take(comma_loc).collect();
|
||||
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
|
||||
|
||||
let data: String = decode_url(raw_data);
|
||||
|
||||
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
|
||||
let mut media_type: &str = "";
|
||||
let mut encoding: &str = "";
|
||||
|
||||
// Detect media type and encoding
|
||||
let mut media_type: String = str!();
|
||||
let mut text: String = str!();
|
||||
|
||||
let mut i: i8 = 0;
|
||||
for item in &meta_data_items {
|
||||
if i == 0 {
|
||||
if is_plaintext_media_type(item) {
|
||||
media_type = item;
|
||||
continue;
|
||||
media_type = str!(item);
|
||||
} else {
|
||||
if item.eq_ignore_ascii_case("base64")
|
||||
|| item.eq_ignore_ascii_case("utf8")
|
||||
|| item.eq_ignore_ascii_case("charset=UTF-8")
|
||||
{
|
||||
encoding = item;
|
||||
}
|
||||
}
|
||||
|
||||
if item.eq_ignore_ascii_case("base64") || item.eq_ignore_ascii_case("utf8") {
|
||||
encoding = item;
|
||||
}
|
||||
|
||||
i = i + 1;
|
||||
}
|
||||
|
||||
if is_plaintext_media_type(media_type) {
|
||||
if is_plaintext_media_type(&media_type) || media_type.is_empty() {
|
||||
if encoding.eq_ignore_ascii_case("base64") {
|
||||
String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
|
||||
text = String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
|
||||
} else {
|
||||
data
|
||||
text = data
|
||||
}
|
||||
} else {
|
||||
str!()
|
||||
}
|
||||
|
||||
(media_type, text)
|
||||
}
|
||||
|
||||
pub fn decode_url(input: String) -> String {
|
||||
|
@ -238,7 +235,9 @@ pub fn retrieve_asset(
|
|||
if as_data_url {
|
||||
Ok((url.to_string(), url.to_string()))
|
||||
} else {
|
||||
Ok((data_url_to_text(url), url.to_string()))
|
||||
let (_media_type, text) = data_url_to_text(url);
|
||||
|
||||
Ok((text, url.to_string()))
|
||||
}
|
||||
} else if is_file_url(&url) {
|
||||
// Check if parent_url is also file:///
|
||||
|
|
Loading…
Reference in a new issue