monolith/src/http.rs

180 lines
5.4 KiB
Rust
Raw Normal View History

2019-08-23 20:24:45 +02:00
use regex::Regex;
2019-08-23 20:33:18 +02:00
use reqwest::header::{CONTENT_TYPE, USER_AGENT};
2019-08-23 20:24:45 +02:00
use reqwest::Client;
2019-08-23 11:08:38 +02:00
use std::time::Duration;
2019-08-23 20:24:45 +02:00
use url::{ParseError, Url};
2019-08-23 05:17:15 +02:00
use utils::data_to_dataurl;
2019-08-23 20:24:45 +02:00
lazy_static! {
static ref REGEX_URL: Regex = Regex::new(r"^https?://").unwrap();
2019-08-23 05:17:15 +02:00
}
2019-08-23 20:24:45 +02:00
pub fn is_valid_url(path: &str) -> bool {
REGEX_URL.is_match(path)
}
2019-08-23 05:17:15 +02:00
2019-08-23 20:24:45 +02:00
pub fn resolve_url(from: &str, to: &str) -> Result<String, ParseError> {
let result = if is_valid_url(to) {
// (anything, http://site.com/css/main.css)
to.to_string()
2019-08-23 05:17:15 +02:00
} else {
2019-08-23 20:24:45 +02:00
let mut re = String::new();
if is_valid_url(from) {
// It's a remote resource (HTTP)
if to.chars().nth(0) == Some('/') {
// (http://site.com/article/1, /...?)
2019-08-23 05:17:15 +02:00
let from_url = Url::parse(from)?;
2019-08-23 20:24:45 +02:00
if to.chars().nth(1) == Some('/') {
// (http://site.com/article/1, //images/1.png)
re.push_str(from_url.scheme());
re.push_str(":");
re.push_str(to);
} else {
// (http://site.com/article/1, /css/main.css)
re.push_str(from_url.scheme());
re.push_str("://");
re.push_str(from_url.host_str().unwrap());
re.push_str(to);
2019-08-23 05:17:15 +02:00
}
2019-08-23 20:24:45 +02:00
} else {
// (http://site.com, css/main.css)
2019-08-23 05:17:15 +02:00
// TODO improve to ensure no // or /// ever happen
2019-08-23 20:24:45 +02:00
re.push_str(from);
re.push_str("/");
re.push_str(to);
2019-08-23 05:17:15 +02:00
}
2019-08-23 20:24:45 +02:00
} else {
// It's a local resource (fs)
2019-08-23 05:17:15 +02:00
// TODO improve to ensure no // or /// ever happen
// TODO for fs use basepath instead of $from
2019-08-23 20:24:45 +02:00
re.push_str(from);
re.push_str("/");
re.push_str(to);
2019-08-23 05:17:15 +02:00
}
2019-08-23 20:24:45 +02:00
re
};
2019-08-23 05:17:15 +02:00
Ok(result)
}
pub fn url_is_data(url: &str) -> Result<bool, String> {
2019-08-23 10:49:29 +02:00
match Url::parse(url) {
2019-08-23 05:17:15 +02:00
Ok(parsed_url) => Ok(parsed_url.scheme() == "data"),
2019-08-23 20:24:45 +02:00
Err(err) => Err(format!("{}", err)),
2019-08-23 05:17:15 +02:00
}
}
2019-08-23 20:24:45 +02:00
pub fn retrieve_asset(
url: &str,
as_dataurl: bool,
as_mime: &str,
2019-08-23 20:33:18 +02:00
opt_user_agent: &str,
2019-08-23 20:24:45 +02:00
) -> Result<String, reqwest::Error> {
2019-08-23 05:17:15 +02:00
if url_is_data(&url).unwrap() {
Ok(url.to_string())
} else {
2019-08-23 11:08:38 +02:00
let client = Client::builder()
.timeout(Duration::from_secs(10))
.build()
.unwrap();
2019-08-23 20:33:18 +02:00
let mut response = client
.get(url)
.header(USER_AGENT, opt_user_agent)
.send()
.unwrap();
2019-08-23 05:17:15 +02:00
if as_dataurl {
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
// Attempt to obtain MIME type by reading the Content-Type header
2019-08-23 20:24:45 +02:00
let mimetype = if as_mime == "" {
response
.headers()
2019-08-23 05:17:15 +02:00
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
2019-08-23 20:24:45 +02:00
.unwrap_or(&as_mime)
} else {
as_mime
};
2019-08-23 05:17:15 +02:00
Ok(data_to_dataurl(&mimetype, &data))
} else {
Ok(response.text().unwrap())
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_url() {
2019-08-23 20:24:45 +02:00
assert!(is_valid_url("https://www.rust-lang.org/"));
assert!(is_valid_url("http://kernel.org"));
assert!(!is_valid_url("./index.html"));
assert!(!is_valid_url("some-local-page.htm"));
assert!(!is_valid_url("ftp://1.2.3.4/www/index.html"));
assert!(!is_valid_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
2019-08-23 05:17:15 +02:00
}
#[test]
fn test_resolve_url() -> Result<(), ParseError> {
let resolved_url = resolve_url("https://www.kernel.org", "../category/signatures.html")?;
2019-08-23 20:24:45 +02:00
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/../category/signatures.html"
);
2019-08-23 05:17:15 +02:00
let resolved_url = resolve_url("https://www.kernel.org", "category/signatures.html")?;
2019-08-23 20:24:45 +02:00
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
let resolved_url = resolve_url(
"saved_page.htm",
"https://www.kernel.org/category/signatures.html",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
let resolved_url = resolve_url(
"https://www.kernel.org",
"//www.kernel.org/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
let resolved_url = resolve_url(
"https://www.kernel.org/category/signatures.html",
"/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
2019-08-23 05:17:15 +02:00
Ok(())
}
#[test]
fn test_url_is_data() {
2019-08-23 20:24:45 +02:00
assert!(
url_is_data("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
.unwrap_or(false)
);
2019-08-23 05:17:15 +02:00
assert!(!url_is_data("https://kernel.org").unwrap_or(false));
assert!(!url_is_data("//kernel.org").unwrap_or(false));
}
}