monolith/src/utils.rs

159 lines
4.5 KiB
Rust
Raw Normal View History

2019-08-23 05:17:15 +02:00
extern crate base64;
use self::base64::encode;
use http::retrieve_asset;
2019-10-12 13:05:07 +02:00
use regex::Regex;
2019-09-29 23:15:49 +02:00
use url::{ParseError, Url};
use std::io::{stderr, Write};
2019-09-29 23:15:49 +02:00
lazy_static! {
static ref HAS_PROTOCOL: Regex = Regex::new(r"^[a-z0-9]+:").unwrap();
static ref REGEX_URL: Regex = Regex::new(r"^https?://").unwrap();
static ref EMPTY_STRING: String = String::new();
2019-09-29 23:15:49 +02:00
}
2019-08-24 00:48:08 +02:00
static MAGIC: [[&[u8]; 2]; 19] = [
2019-08-24 00:48:08 +02:00
// Image
[b"GIF87a", b"image/gif"],
[b"GIF89a", b"image/gif"],
[b"\xFF\xD8\xFF", b"image/jpeg"],
[b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],
[b"<?xml ", b"image/svg+xml"],
[b"<svg ", b"image/svg+xml"],
[b"RIFF....WEBPVP8 ", b"image/webp"],
[b"\x00\x00\x01\x00", b"image/x-icon"],
// Audio
[b"ID3", b"audio/mpeg"],
[b"\xFF\x0E", b"audio/mpeg"],
[b"\xFF\x0F", b"audio/mpeg"],
[b"OggS", b"audio/ogg"],
[b"RIFF....WAVEfmt ", b"audio/wav"],
[b"fLaC", b"audio/x-flac"],
// Video
[b"RIFF....AVI LIST", b"video/avi"],
[b"....ftyp", b"video/mp4"],
[b"\x00\x00\x01\x0B", b"video/mpeg"],
[b"....moov", b"video/quicktime"],
[b"\x1A\x45\xDF\xA3", b"video/webm"],
];
2019-08-23 05:17:15 +02:00
pub fn data_to_dataurl(mime: &str, data: &[u8]) -> String {
2019-08-23 20:24:45 +02:00
let mimetype = if mime == "" {
detect_mimetype(data)
2019-08-23 05:17:15 +02:00
} else {
2019-08-23 20:24:45 +02:00
mime.to_string()
};
2019-08-23 05:17:15 +02:00
format!("data:{};base64,{}", mimetype, encode(data))
}
2019-09-29 23:15:49 +02:00
pub fn detect_mimetype(data: &[u8]) -> String {
2019-08-24 00:48:08 +02:00
let mut re = String::new();
for item in MAGIC.iter() {
2019-08-24 00:48:08 +02:00
if data.starts_with(item[0]) {
re = String::from_utf8(item[1].to_vec()).unwrap();
break;
}
}
re
2019-08-23 05:17:15 +02:00
}
2019-09-29 23:15:49 +02:00
pub fn url_has_protocol(url: &str) -> bool {
HAS_PROTOCOL.is_match(&url.to_lowercase())
}
2019-08-23 05:17:15 +02:00
2019-09-29 23:15:49 +02:00
pub fn is_data_url(url: &str) -> Result<bool, ParseError> {
match Url::parse(url) {
Ok(parsed_url) => Ok(parsed_url.scheme() == "data"),
Err(err) => Err(err),
2019-08-23 05:17:15 +02:00
}
2019-09-29 23:15:49 +02:00
}
2019-08-24 00:48:08 +02:00
2019-09-29 23:15:49 +02:00
pub fn is_valid_url(path: &str) -> bool {
REGEX_URL.is_match(path)
}
pub fn resolve_url(from: &str, to: &str) -> Result<String, ParseError> {
let result = if is_valid_url(to) {
to.to_string()
} else {
Url::parse(from)?.join(to)?.to_string()
};
Ok(result)
2019-08-23 05:17:15 +02:00
}
2019-10-12 13:05:07 +02:00
pub fn resolve_css_imports(
css_string: &str,
href: &str,
opt_user_agent: &str,
opt_silent: bool,
opt_insecure: bool,
) -> String {
let mut resolved_css = String::from(css_string);
let re = Regex::new(r###"(?P<import>@import )?url\((?P<to_repl>"?(?P<url>[^"]+)"?)\)"###).unwrap();
2019-10-12 13:05:07 +02:00
for link in re.captures_iter(&css_string) {
let target_link = link.name("url").unwrap().as_str();
// Generate absolute URL for content
let embedded_url = match resolve_url(href, target_link) {
Ok(url) => url,
Err(_) => continue, // Malformed URL
2019-10-12 13:05:07 +02:00
};
// Download the asset. If it's more CSS, resolve that too
let content = match link.name("import") {
// The link is an @import link
Some(_) => retrieve_asset(
&embedded_url,
false, // Formating as data URL will be done later
"text/css", // Expect CSS
opt_user_agent,
opt_silent,
opt_insecure,
)
.map(|(content, _)| resolve_css_imports(
&content,
&embedded_url,
opt_user_agent,
opt_silent,
opt_insecure,
)),
// The link is some other, non-@import link
None => retrieve_asset(
&embedded_url,
true, // Format as data URL
"", // Unknown MIME type
opt_user_agent,
opt_silent,
opt_insecure,
).map(|(a, _)| a),
}.unwrap_or_else(|e| {
writeln!(
stderr(),
"Warning: {}",
e,
).unwrap();
//If failed to resolve, replace with absolute URL
embedded_url
});
let replacement = format!("\"{}\"", &content);
2019-10-12 13:05:07 +02:00
let t = resolved_css
.replace(link.name("to_repl").unwrap().as_str(), &replacement)
2019-10-12 13:05:07 +02:00
.to_string();
resolved_css = t.clone();
}
let encoded_css = data_to_dataurl("text/css", resolved_css.as_bytes());
encoded_css.to_string()
2019-10-12 13:05:07 +02:00
}