move functions related to URL manipulation into url.rs

This commit is contained in:
Sunshine 2020-06-24 03:16:40 -04:00
parent f6ea16b3ad
commit 5c229c51da
No known key found for this signature in database
GPG key ID: B80CA68703CD8AB1
21 changed files with 298 additions and 288 deletions

View file

@ -2,9 +2,8 @@ use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::utils::{
data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset, url_with_fragment,
};
use crate::url::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_with_fragment};
use crate::utils::retrieve_asset;
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
// Universal

View file

@ -1,9 +1,3 @@
use crate::css::embed_css;
use crate::js::attr_is_event_handler;
use crate::utils::{
data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset, url_has_protocol,
url_with_fragment,
};
use base64;
use html5ever::interface::QualName;
use html5ever::parse_document;
@ -17,6 +11,14 @@ use sha2::{Digest, Sha256, Sha384, Sha512};
use std::collections::HashMap;
use std::default::Default;
use crate::css::embed_css;
use crate::js::attr_is_event_handler;
use crate::url::{
data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_has_protocol,
url_with_fragment,
};
use crate::utils::retrieve_asset;
struct SrcSetItem<'a> {
path: &'a str,
descriptor: &'a str,

View file

@ -4,6 +4,7 @@ mod macros;
pub mod css;
pub mod html;
pub mod js;
pub mod url;
pub mod utils;
#[cfg(test)]

View file

@ -1,6 +1,4 @@
use chrono::prelude::*;
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
use monolith::utils::{data_url_to_data, is_data_url, is_file_url, is_http_url, retrieve_asset};
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use reqwest::Url;
@ -12,6 +10,10 @@ use std::path::Path;
use std::process;
use std::time::Duration;
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
use monolith::url::{data_url_to_data, is_data_url, is_file_url, is_http_url};
use monolith::utils::retrieve_asset;
mod args;
mod macros;

View file

@ -7,12 +7,12 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn removes_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot#iefix"),
url::clean_url("https://somewhere.com/font.eot#iefix"),
"https://somewhere.com/font.eot"
);
}
@ -20,7 +20,7 @@ mod passing {
#[test]
fn removes_empty_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot#"),
url::clean_url("https://somewhere.com/font.eot#"),
"https://somewhere.com/font.eot"
);
}
@ -28,7 +28,7 @@ mod passing {
#[test]
fn removes_empty_query_and_empty_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot?#"),
url::clean_url("https://somewhere.com/font.eot?#"),
"https://somewhere.com/font.eot"
);
}
@ -36,7 +36,7 @@ mod passing {
#[test]
fn removes_empty_query_amp_and_empty_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot?a=b&#"),
url::clean_url("https://somewhere.com/font.eot?a=b&#"),
"https://somewhere.com/font.eot?a=b"
);
}
@ -44,7 +44,7 @@ mod passing {
#[test]
fn keeps_credentials() {
assert_eq!(
utils::clean_url("https://cookie:monster@gibson.internet/"),
url::clean_url("https://cookie:monster@gibson.internet/"),
"https://cookie:monster@gibson.internet/"
);
}

View file

@ -7,13 +7,13 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn encode_string_with_specific_media_type() {
let mime = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let data_url = utils::data_to_data_url(mime, data.as_bytes(), "");
let data_url = url::data_to_data_url(mime, data.as_bytes(), "");
assert_eq!(
&data_url,
@ -24,7 +24,7 @@ mod passing {
#[test]
fn encode_append_fragment() {
let data = "<svg></svg>\n";
let data_url = utils::data_to_data_url("image/svg+xml", data.as_bytes(), "");
let data_url = url::data_to_data_url("image/svg+xml", data.as_bytes(), "");
assert_eq!(&data_url, "");
}

View file

@ -7,11 +7,11 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn parse_text_html_base64() {
let (media_type, data) = utils::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
let (media_type, data) = url::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
assert_eq!(media_type, "text/html");
assert_eq!(
@ -22,7 +22,7 @@ mod passing {
#[test]
fn parse_text_html_utf8() {
let (media_type, data) = utils::data_url_to_data(
let (media_type, data) = url::data_url_to_data(
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
);
@ -35,7 +35,7 @@ mod passing {
#[test]
fn parse_text_html_plaintext() {
let (media_type, data) = utils::data_url_to_data(
let (media_type, data) = url::data_url_to_data(
"data:text/html,Work expands so as to fill the time available for its completion",
);
@ -48,7 +48,7 @@ mod passing {
#[test]
fn parse_text_html_charset_utf_8_between_two_whitespaces() {
let (media_type, data) = utils::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
let (media_type, data) = url::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
assert_eq!(media_type, "text/html");
assert_eq!(
@ -60,7 +60,7 @@ mod passing {
#[test]
fn parse_text_css_url_encoded() {
let (media_type, data) =
utils::data_url_to_data("data:text/css,div{background-color:%23000}");
url::data_url_to_data("data:text/css,div{background-color:%23000}");
assert_eq!(media_type, "text/css");
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
@ -68,7 +68,7 @@ mod passing {
#[test]
fn parse_no_media_type_base64() {
let (media_type, data) = utils::data_url_to_data("data:;base64,dGVzdA==");
let (media_type, data) = url::data_url_to_data("data:;base64,dGVzdA==");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "test");
@ -76,7 +76,7 @@ mod passing {
#[test]
fn parse_no_media_type_no_encoding() {
let (media_type, data) = utils::data_url_to_data("data:;,test%20test");
let (media_type, data) = url::data_url_to_data("data:;,test%20test");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "test test");
@ -92,11 +92,11 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::utils;
use crate::url;
#[test]
fn just_word_data() {
let (media_type, data) = utils::data_url_to_data("data");
let (media_type, data) = url::data_url_to_data("data");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "");

View file

@ -7,12 +7,12 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn decode_unicode_characters() {
assert_eq!(
utils::decode_url(str!(
url::decode_url(str!(
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
)),
"検ヒム解塗ゃッ = サ"
@ -22,7 +22,7 @@ mod passing {
#[test]
fn decode_file_url() {
assert_eq!(
utils::decode_url(str!("file:///tmp/space%20here/test%231.html")),
url::decode_url(str!("file:///tmp/space%20here/test%231.html")),
"file:///tmp/space here/test#1.html"
);
}
@ -30,7 +30,7 @@ mod passing {
#[test]
fn plus_sign() {
assert_eq!(
utils::decode_url(str!(
url::decode_url(str!(
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
)),
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"

View file

@ -7,18 +7,18 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn remove_protocl_and_fragment() {
if cfg!(windows) {
assert_eq!(
utils::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"),
url::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"),
"C:\\documents\\some-path\\some-file.svg"
);
} else {
assert_eq!(
utils::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"),
url::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"),
"/tmp/some-path/some-file.svg"
);
}
@ -28,12 +28,12 @@ mod passing {
fn decodes_urls() {
if cfg!(windows) {
assert_eq!(
utils::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
url::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
"C:\\Documents and Settings\\some-file.html"
);
} else {
assert_eq!(
utils::file_url_to_fs_path("file:///home/user/My%20Documents"),
url::file_url_to_fs_path("file:///home/user/My%20Documents"),
"/home/user/My Documents"
);
}

View file

@ -7,12 +7,12 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn data_url() {
assert_eq!(
utils::get_url_fragment(
url::get_url_fragment(
"#test"
),
"test"
@ -21,6 +21,6 @@ mod passing {
#[test]
fn https_empty() {
assert_eq!(utils::get_url_fragment("https://kernel.org#"), "");
assert_eq!(url::get_url_fragment("https://kernel.org#"), "");
}
}

View file

@ -7,18 +7,18 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn data_url_text_html() {
assert!(utils::is_data_url(
assert!(url::is_data_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn data_url_no_media_type() {
assert!(utils::is_data_url(
assert!(url::is_data_url(
"data:;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
@ -33,20 +33,20 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::utils;
use crate::url;
#[test]
fn https_url() {
assert!(!utils::is_data_url("https://kernel.org"));
assert!(!url::is_data_url("https://kernel.org"));
}
#[test]
fn no_protocol_url() {
assert!(!utils::is_data_url("//kernel.org"));
assert!(!url::is_data_url("//kernel.org"));
}
#[test]
fn empty_string() {
assert!(!utils::is_data_url(""));
assert!(!url::is_data_url(""));
}
}

View file

@ -7,32 +7,32 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn unix_file_url() {
assert!(utils::is_file_url(
assert!(url::is_file_url(
"file:///home/user/Websites/my-website/index.html"
));
}
#[test]
fn windows_file_url() {
assert!(utils::is_file_url(
assert!(url::is_file_url(
"file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png"
));
}
#[test]
fn unix_url_with_backslashes() {
assert!(utils::is_file_url(
assert!(url::is_file_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html"
));
}
#[test]
fn windows_file_url_with_backslashes() {
assert!(utils::is_file_url(
assert!(url::is_file_url(
"file:\\\\\\C:\\Documents%20and%20Settings\\user\\Websites\\my-website\\assets\\images\\logo.png"
));
}
@ -47,37 +47,37 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::utils;
use crate::url;
#[test]
fn url_with_no_protocl() {
assert!(!utils::is_file_url("//kernel.org"));
assert!(!url::is_file_url("//kernel.org"));
}
#[test]
fn dot_slash_filename() {
assert!(!utils::is_file_url("./index.html"));
assert!(!url::is_file_url("./index.html"));
}
#[test]
fn just_filename() {
assert!(!utils::is_file_url("some-local-page.htm"));
assert!(!url::is_file_url("some-local-page.htm"));
}
#[test]
fn https_ip_port_url() {
assert!(!utils::is_file_url("https://1.2.3.4:80/www/index.html"));
assert!(!url::is_file_url("https://1.2.3.4:80/www/index.html"));
}
#[test]
fn data_url() {
assert!(!utils::is_file_url(
assert!(!url::is_file_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn just_word_file() {
assert!(!utils::is_file_url("file"));
assert!(!url::is_file_url("file"));
}
}

View file

@ -7,21 +7,21 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn http_url() {
assert!(utils::is_http_url("http://kernel.org"));
assert!(url::is_http_url("http://kernel.org"));
}
#[test]
fn https_url() {
assert!(utils::is_http_url("https://www.rust-lang.org/"));
assert!(url::is_http_url("https://www.rust-lang.org/"));
}
#[test]
fn http_url_with_backslashes() {
assert!(utils::is_http_url("http:\\\\freebsd.org\\"));
assert!(url::is_http_url("http:\\\\freebsd.org\\"));
}
}
@ -34,31 +34,31 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::utils;
use crate::url;
#[test]
fn url_with_no_protocol() {
assert!(!utils::is_http_url("//kernel.org"));
assert!(!url::is_http_url("//kernel.org"));
}
#[test]
fn dot_slash_filename() {
assert!(!utils::is_http_url("./index.html"));
assert!(!url::is_http_url("./index.html"));
}
#[test]
fn just_filename() {
assert!(!utils::is_http_url("some-local-page.htm"));
assert!(!url::is_http_url("some-local-page.htm"));
}
#[test]
fn https_ip_port_url() {
assert!(!utils::is_http_url("ftp://1.2.3.4/www/index.html"));
assert!(!url::is_http_url("ftp://1.2.3.4/www/index.html"));
}
#[test]
fn data_url() {
assert!(!utils::is_http_url(
assert!(!url::is_http_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}

12
src/tests/url/mod.rs Normal file
View file

@ -0,0 +1,12 @@
mod clean_url;
mod data_to_data_url;
mod data_url_to_data;
mod decode_url;
mod file_url_to_fs_path;
mod get_url_fragment;
mod is_data_url;
mod is_file_url;
mod is_http_url;
mod resolve_url;
mod url_has_protocol;
mod url_with_fragment;

View file

@ -7,13 +7,13 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
use url::ParseError;
#[test]
fn from_https_to_level_up_relative() -> Result<(), ParseError> {
let resolved_url =
utils::resolve_url("https://www.kernel.org", "../category/signatures.html")?;
url::resolve_url("https://www.kernel.org", "../category/signatures.html")?;
assert_eq!(
resolved_url.as_str(),
@ -25,7 +25,7 @@ mod passing {
#[test]
fn from_just_filename_to_full_https_url() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"saved_page.htm",
"https://www.kernel.org/category/signatures.html",
)?;
@ -40,7 +40,7 @@ mod passing {
#[test]
fn from_https_url_to_url_with_no_protocol() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"https://www.kernel.org",
"//www.kernel.org/theme/images/logos/tux.png",
)?;
@ -56,7 +56,7 @@ mod passing {
#[test]
fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() -> Result<(), ParseError>
{
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"https://www.kernel.org",
"//another-host.org/theme/images/logos/tux.png",
)?;
@ -71,7 +71,7 @@ mod passing {
#[test]
fn from_https_url_to_relative_root_path() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"https://www.kernel.org/category/signatures.html",
"/theme/images/logos/tux.png",
)?;
@ -86,7 +86,7 @@ mod passing {
#[test]
fn from_https_to_just_filename() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"https://www.w3schools.com/html/html_iframe.asp",
"default.asp",
)?;
@ -101,7 +101,7 @@ mod passing {
#[test]
fn from_data_url_to_https() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"https://www.kernel.org/category/signatures.html",
)?;
@ -116,7 +116,7 @@ mod passing {
#[test]
fn from_data_url_to_data_url() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
)?;
@ -131,7 +131,7 @@ mod passing {
#[test]
fn from_file_url_to_relative_path() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"file:///home/user/Websites/my-website/index.html",
"assets/images/logo.png",
)
@ -147,7 +147,7 @@ mod passing {
#[test]
fn from_file_url_to_relative_path_with_backslashes() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
"assets\\images\\logo.png",
)
@ -163,7 +163,7 @@ mod passing {
#[test]
fn from_data_url_to_file_url() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"file:///etc/passwd",
)
@ -176,7 +176,7 @@ mod passing {
#[test]
fn preserve_fragment() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"http://doesnt-matter.local/",
"css/fonts/fontmarvelous.svg#fontmarvelous",
)
@ -193,9 +193,9 @@ mod passing {
#[test]
fn resolve_from_file_url_to_file_url() -> Result<(), ParseError> {
let resolved_url = if cfg!(windows) {
utils::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!())
url::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!())
} else {
utils::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!())
url::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!())
};
assert_eq!(
@ -220,12 +220,12 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::utils;
use crate::url;
use url::ParseError;
#[test]
fn from_data_url_to_url_with_no_protocol() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"//www.w3schools.com/html/html_iframe.asp",
)

View file

@ -7,50 +7,50 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn mailto() {
assert!(utils::url_has_protocol(
assert!(url::url_has_protocol(
"mailto:somebody@somewhere.com?subject=hello"
));
}
#[test]
fn tel() {
assert!(utils::url_has_protocol("tel:5551234567"));
assert!(url::url_has_protocol("tel:5551234567"));
}
#[test]
fn ftp_no_slashes() {
assert!(utils::url_has_protocol("ftp:some-ftp-server.com"));
assert!(url::url_has_protocol("ftp:some-ftp-server.com"));
}
#[test]
fn ftp_with_credentials() {
assert!(utils::url_has_protocol(
assert!(url::url_has_protocol(
"ftp://user:password@some-ftp-server.com"
));
}
#[test]
fn javascript() {
assert!(utils::url_has_protocol("javascript:void(0)"));
assert!(url::url_has_protocol("javascript:void(0)"));
}
#[test]
fn http() {
assert!(utils::url_has_protocol("http://news.ycombinator.com"));
assert!(url::url_has_protocol("http://news.ycombinator.com"));
}
#[test]
fn https() {
assert!(utils::url_has_protocol("https://github.com"));
assert!(url::url_has_protocol("https://github.com"));
}
#[test]
fn mailto_uppercase() {
assert!(utils::url_has_protocol(
assert!(url::url_has_protocol(
"MAILTO:somebody@somewhere.com?subject=hello"
));
}
@ -69,23 +69,23 @@ mod failing {
#[test]
fn url_with_no_protocol() {
assert!(!utils::url_has_protocol(
assert!(!url::url_has_protocol(
"//some-hostname.com/some-file.html"
));
}
#[test]
fn relative_path() {
assert!(!utils::url_has_protocol("some-hostname.com/some-file.html"));
assert!(!url::url_has_protocol("some-hostname.com/some-file.html"));
}
#[test]
fn relative_to_root_path() {
assert!(!utils::url_has_protocol("/some-file.html"));
assert!(!url::url_has_protocol("/some-file.html"));
}
#[test]
fn empty_string() {
assert!(!utils::url_has_protocol(""));
assert!(!url::url_has_protocol(""));
}
}

View file

@ -7,13 +7,13 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn url_with_fragment_url() {
let url = "https://localhost.localdomain/path/";
let fragment = "test";
let assembled_url = utils::url_with_fragment(url, fragment);
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(&assembled_url, "https://localhost.localdomain/path/#test");
}
@ -21,7 +21,7 @@ mod passing {
fn url_with_fragment_empty_url() {
let url = "https://localhost.localdomain/path/";
let fragment = "";
let assembled_url = utils::url_with_fragment(url, fragment);
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(&assembled_url, "https://localhost.localdomain/path/");
}
@ -30,7 +30,7 @@ mod passing {
fn url_with_fragment_data_url() {
let url = "";
let fragment = "fragment";
let assembled_url = utils::url_with_fragment(url, fragment);
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(
&assembled_url,

View file

@ -1,14 +1,2 @@
mod clean_url;
mod data_to_data_url;
mod data_url_to_data;
mod decode_url;
mod detect_media_type;
mod file_url_to_fs_path;
mod get_url_fragment;
mod is_data_url;
mod is_file_url;
mod is_http_url;
mod resolve_url;
mod retrieve_asset;
mod url_has_protocol;
mod url_with_fragment;

View file

@ -7,11 +7,13 @@
#[cfg(test)]
mod passing {
use crate::utils;
use reqwest::blocking::Client;
use std::collections::HashMap;
use std::env;
use crate::url;
use crate::utils;
#[test]
fn read_data_url() {
let cache = &mut HashMap::new();
@ -28,12 +30,12 @@ mod passing {
)
.unwrap();
assert_eq!(
utils::data_to_data_url(&media_type, &data, &final_url),
utils::data_to_data_url("text/html", "target".as_bytes(), "")
url::data_to_data_url(&media_type, &data, &final_url),
url::data_to_data_url("text/html", "target".as_bytes(), "")
);
assert_eq!(
final_url,
utils::data_to_data_url("text/html", "target".as_bytes(), "")
url::data_to_data_url("text/html", "target".as_bytes(), "")
);
assert_eq!(&media_type, "text/html");
}
@ -63,7 +65,7 @@ mod passing {
false,
)
.unwrap();
assert_eq!(utils::data_to_data_url("application/javascript", &data, &final_url), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
assert_eq!(url::data_to_data_url("application/javascript", &data, &final_url), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
assert_eq!(
&final_url,
&format!(

168
src/url.rs Normal file
View file

@ -0,0 +1,168 @@
use base64;
use url::{form_urlencoded, ParseError, Url};
use crate::utils::detect_media_type;
pub fn clean_url<T: AsRef<str>>(input: T) -> String {
let mut url = Url::parse(input.as_ref()).unwrap();
// Clear fragment
url.set_fragment(None);
// Get rid of stray question mark
if url.query() == Some("") {
url.set_query(None);
}
// Remove empty trailing ampersand(s)
let mut result: String = url.to_string();
while result.ends_with("&") {
result.pop();
}
result
}
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String {
let media_type: String = if media_type.is_empty() {
detect_media_type(data, &url)
} else {
media_type.to_string()
};
format!("data:{};base64,{}", media_type, base64::encode(data))
}
pub fn data_url_to_data<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
let path: String = parsed_url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len());
let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
let text: String = decode_url(raw_data);
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut media_type: String = str!();
let mut encoding: &str = "";
let mut i: i8 = 0;
for item in &meta_data_items {
if i == 0 {
media_type = str!(item);
} else {
if item.eq_ignore_ascii_case("base64")
|| item.eq_ignore_ascii_case("utf8")
|| item.eq_ignore_ascii_case("charset=UTF-8")
{
encoding = item;
}
}
i = i + 1;
}
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
base64::decode(&text).unwrap_or(vec![])
} else {
text.as_bytes().to_vec()
};
(media_type, data)
}
pub fn decode_url(input: String) -> String {
let input: String = input.replace("+", "%2B");
form_urlencoded::parse(input.as_bytes())
.map(|(key, val)| {
[
key.to_string(),
if val.to_string().len() == 0 {
str!()
} else {
str!('=')
},
val.to_string(),
]
.concat()
})
.collect()
}
pub fn file_url_to_fs_path(url: &str) -> String {
if !is_file_url(url) {
return str!();
}
let cutoff_l = if cfg!(windows) { 8 } else { 7 };
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
let url_fragment = get_url_fragment(url);
if url_fragment != "" {
let max_len = fs_file_path.len() - 1 - url_fragment.len();
fs_file_path = fs_file_path[0..max_len].to_string();
}
if cfg!(windows) {
fs_file_path = fs_file_path.replace("/", "\\");
}
// File paths should not be %-encoded
decode_url(fs_file_path)
}
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
if Url::parse(url.as_ref()).unwrap().fragment() == None {
str!()
} else {
str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap())
}
}
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "data"))
.unwrap_or(false)
}
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "file"))
.unwrap_or(false)
}
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
.unwrap_or(false)
}
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
let result = if is_http_url(to.as_ref()) {
to.as_ref().to_string()
} else {
Url::parse(from.as_ref())?
.join(to.as_ref())?
.as_ref()
.to_string()
};
Ok(result)
}
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme().len() > 0))
.unwrap_or(false)
}
pub fn url_with_fragment(url: &str, fragment: &str) -> String {
let mut result = str!(&url);
if !fragment.is_empty() {
result += "#";
result += fragment;
}
result
}

View file

@ -1,10 +1,10 @@
use base64;
use reqwest::blocking::Client;
use reqwest::header::CONTENT_TYPE;
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use url::{form_urlencoded, ParseError, Url};
use crate::url::{clean_url, data_url_to_data, file_url_to_fs_path, is_data_url, is_file_url};
const MAGIC: [[&[u8]; 2]; 18] = [
// Image
@ -38,16 +38,6 @@ const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
"text/plain",
];
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String {
let media_type: String = if media_type.is_empty() {
detect_media_type(data, &url)
} else {
media_type.to_string()
};
format!("data:{};base64,{}", media_type, base64::encode(data))
}
pub fn detect_media_type(data: &[u8], url: &str) -> String {
for item in MAGIC.iter() {
if data.starts_with(item[0]) {
@ -62,153 +52,10 @@ pub fn detect_media_type(data: &[u8], url: &str) -> String {
str!()
}
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme().len() > 0))
.unwrap_or(false)
}
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "data"))
.unwrap_or(false)
}
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "file"))
.unwrap_or(false)
}
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
.unwrap_or(false)
}
pub fn is_plaintext_media_type(media_type: &str) -> bool {
PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
}
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
let result = if is_http_url(to.as_ref()) {
to.as_ref().to_string()
} else {
Url::parse(from.as_ref())?
.join(to.as_ref())?
.as_ref()
.to_string()
};
Ok(result)
}
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
if Url::parse(url.as_ref()).unwrap().fragment() == None {
str!()
} else {
str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap())
}
}
pub fn clean_url<T: AsRef<str>>(input: T) -> String {
let mut url = Url::parse(input.as_ref()).unwrap();
// Clear fragment
url.set_fragment(None);
// Get rid of stray question mark
if url.query() == Some("") {
url.set_query(None);
}
// Remove empty trailing ampersand(s)
let mut result: String = url.to_string();
while result.ends_with("&") {
result.pop();
}
result
}
pub fn data_url_to_data<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
let path: String = parsed_url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len());
let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
let text: String = decode_url(raw_data);
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut media_type: String = str!();
let mut encoding: &str = "";
let mut i: i8 = 0;
for item in &meta_data_items {
if i == 0 {
media_type = str!(item);
} else {
if item.eq_ignore_ascii_case("base64")
|| item.eq_ignore_ascii_case("utf8")
|| item.eq_ignore_ascii_case("charset=UTF-8")
{
encoding = item;
}
}
i = i + 1;
}
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
base64::decode(&text).unwrap_or(vec![])
} else {
text.as_bytes().to_vec()
};
(media_type, data)
}
pub fn decode_url(input: String) -> String {
let input: String = input.replace("+", "%2B");
form_urlencoded::parse(input.as_bytes())
.map(|(key, val)| {
[
key.to_string(),
if val.to_string().len() == 0 {
str!()
} else {
str!('=')
},
val.to_string(),
]
.concat()
})
.collect()
}
pub fn file_url_to_fs_path(url: &str) -> String {
if !is_file_url(url) {
return str!();
}
let cutoff_l = if cfg!(windows) { 8 } else { 7 };
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
let url_fragment = get_url_fragment(url);
if url_fragment != "" {
let max_len = fs_file_path.len() - 1 - url_fragment.len();
fs_file_path = fs_file_path[0..max_len].to_string();
}
if cfg!(windows) {
fs_file_path = fs_file_path.replace("/", "\\");
}
// File paths should not be %-encoded
decode_url(fs_file_path)
}
pub fn retrieve_asset(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
@ -291,14 +138,3 @@ pub fn retrieve_asset(
}
}
}
pub fn url_with_fragment(url: &str, fragment: &str) -> String {
let mut result = str!(&url);
if !fragment.is_empty() {
result += "#";
result += fragment;
}
result
}