2019-08-23 20:24:45 +02:00
|
|
|
use regex::Regex;
|
2019-08-25 02:23:53 +02:00
|
|
|
use reqwest::Client;
|
2019-08-23 20:33:18 +02:00
|
|
|
use reqwest::header::{CONTENT_TYPE, USER_AGENT};
|
2019-08-23 11:08:38 +02:00
|
|
|
use std::time::Duration;
|
2019-08-23 20:24:45 +02:00
|
|
|
use url::{ParseError, Url};
|
2019-08-23 05:17:15 +02:00
|
|
|
use utils::data_to_dataurl;
|
|
|
|
|
2019-08-23 20:24:45 +02:00
|
|
|
lazy_static! {
|
|
|
|
static ref REGEX_URL: Regex = Regex::new(r"^https?://").unwrap();
|
2019-08-23 05:17:15 +02:00
|
|
|
}
|
|
|
|
|
2019-08-25 05:06:40 +02:00
|
|
|
pub fn is_data_url(url: &str) -> Result<bool, ParseError> {
|
2019-08-24 05:06:06 +02:00
|
|
|
match Url::parse(url) {
|
|
|
|
Ok(parsed_url) => Ok(parsed_url.scheme() == "data"),
|
2019-08-25 05:06:40 +02:00
|
|
|
Err(err) => Err(err),
|
2019-08-24 05:06:06 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-23 20:24:45 +02:00
|
|
|
pub fn is_valid_url(path: &str) -> bool {
|
|
|
|
REGEX_URL.is_match(path)
|
|
|
|
}
|
2019-08-23 05:17:15 +02:00
|
|
|
|
2019-08-23 20:24:45 +02:00
|
|
|
pub fn resolve_url(from: &str, to: &str) -> Result<String, ParseError> {
|
|
|
|
let result = if is_valid_url(to) {
|
|
|
|
// (anything, http://site.com/css/main.css)
|
|
|
|
to.to_string()
|
2019-08-23 05:17:15 +02:00
|
|
|
} else {
|
2019-08-24 05:06:06 +02:00
|
|
|
Url::parse(from)?.join(to)?.to_string()
|
2019-08-23 20:24:45 +02:00
|
|
|
};
|
2019-08-23 05:17:15 +02:00
|
|
|
|
|
|
|
Ok(result)
|
|
|
|
}
|
|
|
|
|
2019-08-23 20:24:45 +02:00
|
|
|
pub fn retrieve_asset(
|
|
|
|
url: &str,
|
|
|
|
as_dataurl: bool,
|
|
|
|
as_mime: &str,
|
2019-08-23 20:33:18 +02:00
|
|
|
opt_user_agent: &str,
|
2019-08-23 20:24:45 +02:00
|
|
|
) -> Result<String, reqwest::Error> {
|
2019-08-24 05:06:06 +02:00
|
|
|
if is_data_url(&url).unwrap() {
|
2019-08-23 05:17:15 +02:00
|
|
|
Ok(url.to_string())
|
|
|
|
} else {
|
2019-08-23 11:08:38 +02:00
|
|
|
let client = Client::builder()
|
|
|
|
.timeout(Duration::from_secs(10))
|
2019-08-25 05:06:40 +02:00
|
|
|
.build()?;
|
2019-08-23 20:33:18 +02:00
|
|
|
let mut response = client
|
|
|
|
.get(url)
|
|
|
|
.header(USER_AGENT, opt_user_agent)
|
2019-08-25 05:06:40 +02:00
|
|
|
.send()?;
|
2019-08-24 19:33:24 +02:00
|
|
|
let final_url = response.url().as_str();
|
|
|
|
|
|
|
|
if url == final_url {
|
|
|
|
eprintln!("[ {} ]", &url);
|
|
|
|
} else {
|
|
|
|
eprintln!("[ {} -> {} ]", &url, &final_url);
|
|
|
|
}
|
2019-08-23 05:17:15 +02:00
|
|
|
|
|
|
|
if as_dataurl {
|
|
|
|
// Convert response into a byte array
|
|
|
|
let mut data: Vec<u8> = vec![];
|
|
|
|
response.copy_to(&mut data)?;
|
|
|
|
|
|
|
|
// Attempt to obtain MIME type by reading the Content-Type header
|
2019-08-23 20:24:45 +02:00
|
|
|
let mimetype = if as_mime == "" {
|
|
|
|
response
|
|
|
|
.headers()
|
2019-08-23 05:17:15 +02:00
|
|
|
.get(CONTENT_TYPE)
|
|
|
|
.and_then(|header| header.to_str().ok())
|
2019-08-23 20:24:45 +02:00
|
|
|
.unwrap_or(&as_mime)
|
|
|
|
} else {
|
|
|
|
as_mime
|
|
|
|
};
|
2019-08-23 05:17:15 +02:00
|
|
|
|
|
|
|
Ok(data_to_dataurl(&mimetype, &data))
|
|
|
|
} else {
|
|
|
|
Ok(response.text().unwrap())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
2019-08-24 18:06:03 +02:00
|
|
|
fn test_is_valid_url() {
|
2019-08-23 20:24:45 +02:00
|
|
|
assert!(is_valid_url("https://www.rust-lang.org/"));
|
|
|
|
assert!(is_valid_url("http://kernel.org"));
|
|
|
|
assert!(!is_valid_url("./index.html"));
|
|
|
|
assert!(!is_valid_url("some-local-page.htm"));
|
|
|
|
assert!(!is_valid_url("ftp://1.2.3.4/www/index.html"));
|
|
|
|
assert!(!is_valid_url(
|
|
|
|
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
|
|
|
));
|
2019-08-23 05:17:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_resolve_url() -> Result<(), ParseError> {
|
2019-08-24 02:16:16 +02:00
|
|
|
let resolved_url = resolve_url(
|
|
|
|
"https://www.kernel.org",
|
|
|
|
"../category/signatures.html",
|
|
|
|
)?;
|
2019-08-23 20:24:45 +02:00
|
|
|
assert_eq!(
|
|
|
|
resolved_url.as_str(),
|
2019-08-24 02:16:16 +02:00
|
|
|
"https://www.kernel.org/category/signatures.html"
|
2019-08-23 20:24:45 +02:00
|
|
|
);
|
2019-08-23 05:17:15 +02:00
|
|
|
|
2019-08-24 02:16:16 +02:00
|
|
|
let resolved_url = resolve_url(
|
|
|
|
"https://www.kernel.org",
|
|
|
|
"category/signatures.html",
|
|
|
|
)?;
|
2019-08-23 20:24:45 +02:00
|
|
|
assert_eq!(
|
|
|
|
resolved_url.as_str(),
|
|
|
|
"https://www.kernel.org/category/signatures.html"
|
|
|
|
);
|
|
|
|
|
|
|
|
let resolved_url = resolve_url(
|
|
|
|
"saved_page.htm",
|
|
|
|
"https://www.kernel.org/category/signatures.html",
|
|
|
|
)?;
|
|
|
|
assert_eq!(
|
|
|
|
resolved_url.as_str(),
|
|
|
|
"https://www.kernel.org/category/signatures.html"
|
|
|
|
);
|
|
|
|
|
|
|
|
let resolved_url = resolve_url(
|
|
|
|
"https://www.kernel.org",
|
|
|
|
"//www.kernel.org/theme/images/logos/tux.png",
|
|
|
|
)?;
|
|
|
|
assert_eq!(
|
|
|
|
resolved_url.as_str(),
|
|
|
|
"https://www.kernel.org/theme/images/logos/tux.png"
|
|
|
|
);
|
|
|
|
|
2019-08-24 05:06:06 +02:00
|
|
|
let resolved_url = resolve_url(
|
|
|
|
"https://www.kernel.org",
|
|
|
|
"//another-host.org/theme/images/logos/tux.png",
|
|
|
|
)?;
|
|
|
|
assert_eq!(
|
|
|
|
resolved_url.as_str(),
|
|
|
|
"https://another-host.org/theme/images/logos/tux.png"
|
|
|
|
);
|
|
|
|
|
2019-08-23 20:24:45 +02:00
|
|
|
let resolved_url = resolve_url(
|
|
|
|
"https://www.kernel.org/category/signatures.html",
|
|
|
|
"/theme/images/logos/tux.png",
|
|
|
|
)?;
|
|
|
|
assert_eq!(
|
|
|
|
resolved_url.as_str(),
|
|
|
|
"https://www.kernel.org/theme/images/logos/tux.png"
|
|
|
|
);
|
2019-08-23 05:17:15 +02:00
|
|
|
|
2019-08-24 02:16:16 +02:00
|
|
|
let resolved_url = resolve_url(
|
|
|
|
"https://www.w3schools.com/html/html_iframe.asp",
|
|
|
|
"default.asp",
|
|
|
|
)?;
|
|
|
|
assert_eq!(
|
|
|
|
resolved_url.as_str(),
|
|
|
|
"https://www.w3schools.com/html/default.asp"
|
|
|
|
);
|
|
|
|
|
2019-08-23 05:17:15 +02:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2019-08-24 05:06:06 +02:00
|
|
|
fn test_is_data_url() {
|
2019-08-23 20:24:45 +02:00
|
|
|
assert!(
|
2019-08-24 05:06:06 +02:00
|
|
|
is_data_url("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
|
2019-08-23 20:24:45 +02:00
|
|
|
.unwrap_or(false)
|
|
|
|
);
|
2019-08-24 05:06:06 +02:00
|
|
|
assert!(!is_data_url("https://kernel.org").unwrap_or(false));
|
|
|
|
assert!(!is_data_url("//kernel.org").unwrap_or(false));
|
2019-08-23 05:17:15 +02:00
|
|
|
}
|
|
|
|
}
|