2020-01-07 05:22:28 +01:00
|
|
|
use reqwest::blocking::Client;
|
2020-03-08 20:31:42 +01:00
|
|
|
use reqwest::header::CONTENT_TYPE;
|
2019-12-06 02:20:09 +01:00
|
|
|
use std::collections::HashMap;
|
2020-03-08 20:31:42 +01:00
|
|
|
use std::fs;
|
|
|
|
use std::path::Path;
|
2020-06-24 09:16:40 +02:00
|
|
|
|
|
|
|
use crate::url::{clean_url, data_url_to_data, file_url_to_fs_path, is_data_url, is_file_url};
|
2019-09-29 23:15:49 +02:00
|
|
|
|
2020-03-24 13:22:14 +01:00
|
|
|
const MAGIC: [[&[u8]; 2]; 18] = [
|
2019-08-24 00:48:08 +02:00
|
|
|
// Image
|
|
|
|
[b"GIF87a", b"image/gif"],
|
|
|
|
[b"GIF89a", b"image/gif"],
|
|
|
|
[b"\xFF\xD8\xFF", b"image/jpeg"],
|
|
|
|
[b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],
|
|
|
|
[b"<svg ", b"image/svg+xml"],
|
|
|
|
[b"RIFF....WEBPVP8 ", b"image/webp"],
|
|
|
|
[b"\x00\x00\x01\x00", b"image/x-icon"],
|
|
|
|
// Audio
|
|
|
|
[b"ID3", b"audio/mpeg"],
|
|
|
|
[b"\xFF\x0E", b"audio/mpeg"],
|
|
|
|
[b"\xFF\x0F", b"audio/mpeg"],
|
|
|
|
[b"OggS", b"audio/ogg"],
|
|
|
|
[b"RIFF....WAVEfmt ", b"audio/wav"],
|
|
|
|
[b"fLaC", b"audio/x-flac"],
|
|
|
|
// Video
|
|
|
|
[b"RIFF....AVI LIST", b"video/avi"],
|
|
|
|
[b"....ftyp", b"video/mp4"],
|
|
|
|
[b"\x00\x00\x01\x0B", b"video/mpeg"],
|
|
|
|
[b"....moov", b"video/quicktime"],
|
|
|
|
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
|
|
|
];
|
2019-08-23 05:17:15 +02:00
|
|
|
|
2020-04-10 02:27:07 +02:00
|
|
|
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
|
|
|
|
"image/svg+xml",
|
|
|
|
"text/css",
|
|
|
|
"text/html",
|
|
|
|
"text/javascript",
|
|
|
|
"text/plain",
|
|
|
|
];
|
|
|
|
|
2020-03-24 13:22:14 +01:00
|
|
|
pub fn detect_media_type(data: &[u8], url: &str) -> String {
|
2019-09-22 02:06:00 +02:00
|
|
|
for item in MAGIC.iter() {
|
2019-08-24 00:48:08 +02:00
|
|
|
if data.starts_with(item[0]) {
|
2019-10-10 15:23:00 +02:00
|
|
|
return String::from_utf8(item[1].to_vec()).unwrap();
|
2019-08-24 00:48:08 +02:00
|
|
|
}
|
|
|
|
}
|
2020-03-24 13:22:14 +01:00
|
|
|
|
|
|
|
if url.to_lowercase().ends_with(".svg") {
|
|
|
|
return str!("image/svg+xml");
|
|
|
|
}
|
|
|
|
|
2020-02-13 06:56:30 +01:00
|
|
|
str!()
|
2019-08-23 05:17:15 +02:00
|
|
|
}
|
|
|
|
|
2020-04-10 02:27:07 +02:00
|
|
|
pub fn is_plaintext_media_type(media_type: &str) -> bool {
|
|
|
|
PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
|
|
|
|
}
|
|
|
|
|
2020-03-08 20:31:42 +01:00
|
|
|
pub fn retrieve_asset(
|
2020-04-11 02:43:29 +02:00
|
|
|
cache: &mut HashMap<String, Vec<u8>>,
|
2020-03-08 20:31:42 +01:00
|
|
|
client: &Client,
|
|
|
|
parent_url: &str,
|
|
|
|
url: &str,
|
|
|
|
opt_silent: bool,
|
2020-05-02 12:13:28 +02:00
|
|
|
) -> Result<(Vec<u8>, String, String), reqwest::Error> {
|
2020-03-08 20:31:42 +01:00
|
|
|
if url.len() == 0 {
|
2020-05-02 12:13:28 +02:00
|
|
|
// Provoke error
|
|
|
|
client.get("").send()?;
|
2020-03-08 20:31:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if is_data_url(&url) {
|
2020-05-02 12:13:28 +02:00
|
|
|
let (media_type, data) = data_url_to_data(url);
|
|
|
|
Ok((data, url.to_string(), media_type))
|
2020-03-08 20:31:42 +01:00
|
|
|
} else if is_file_url(&url) {
|
|
|
|
// Check if parent_url is also file:///
|
2020-03-29 09:54:20 +02:00
|
|
|
// (if not, then we don't embed the asset)
|
2020-03-08 20:31:42 +01:00
|
|
|
if !is_file_url(&parent_url) {
|
2020-05-02 12:13:28 +02:00
|
|
|
// Provoke error
|
|
|
|
client.get("").send()?;
|
2020-03-08 20:31:42 +01:00
|
|
|
}
|
|
|
|
|
2020-03-29 09:54:20 +02:00
|
|
|
let fs_file_path: String = file_url_to_fs_path(url);
|
2020-03-08 20:31:42 +01:00
|
|
|
let path = Path::new(&fs_file_path);
|
|
|
|
if path.exists() {
|
|
|
|
if !opt_silent {
|
|
|
|
eprintln!("{}", &url);
|
|
|
|
}
|
|
|
|
|
2020-05-02 12:13:28 +02:00
|
|
|
Ok((fs::read(&fs_file_path).expect(""), url.to_string(), str!()))
|
2020-03-08 20:31:42 +01:00
|
|
|
} else {
|
2020-05-02 12:13:28 +02:00
|
|
|
// Provoke error
|
|
|
|
Err(client.get("").send().unwrap_err())
|
2020-03-08 20:31:42 +01:00
|
|
|
}
|
|
|
|
} else {
|
2020-04-11 02:43:29 +02:00
|
|
|
let cache_key: String = clean_url(&url);
|
|
|
|
|
2020-03-08 20:31:42 +01:00
|
|
|
if cache.contains_key(&cache_key) {
|
2020-05-02 12:13:28 +02:00
|
|
|
// URL is in cache, we get and return it
|
2020-03-08 20:31:42 +01:00
|
|
|
if !opt_silent {
|
|
|
|
eprintln!("{} (from cache)", &url);
|
|
|
|
}
|
2020-04-11 02:43:29 +02:00
|
|
|
|
2020-05-02 12:13:28 +02:00
|
|
|
Ok((
|
|
|
|
cache.get(&cache_key).unwrap().to_vec(),
|
|
|
|
url.to_string(),
|
|
|
|
str!(),
|
|
|
|
))
|
2020-03-08 20:31:42 +01:00
|
|
|
} else {
|
2020-05-02 12:13:28 +02:00
|
|
|
// URL not in cache, we retrieve the file
|
2020-03-08 20:31:42 +01:00
|
|
|
let mut response = client.get(url).send()?;
|
|
|
|
let res_url = response.url().to_string();
|
|
|
|
|
|
|
|
if !opt_silent {
|
|
|
|
if url == res_url {
|
|
|
|
eprintln!("{}", &url);
|
|
|
|
} else {
|
|
|
|
eprintln!("{} -> {}", &url, &res_url);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-11 02:43:29 +02:00
|
|
|
let new_cache_key: String = clean_url(&res_url);
|
2020-03-08 20:31:42 +01:00
|
|
|
|
2020-05-02 12:13:28 +02:00
|
|
|
// Convert response into a byte array
|
|
|
|
let mut data: Vec<u8> = vec![];
|
|
|
|
response.copy_to(&mut data)?;
|
2020-04-11 02:43:29 +02:00
|
|
|
|
2020-05-02 12:13:28 +02:00
|
|
|
// Attempt to obtain media type by reading the Content-Type header
|
|
|
|
let media_type = response
|
|
|
|
.headers()
|
|
|
|
.get(CONTENT_TYPE)
|
|
|
|
.and_then(|header| header.to_str().ok())
|
|
|
|
.unwrap_or("");
|
2020-04-11 02:43:29 +02:00
|
|
|
|
2020-05-02 12:13:28 +02:00
|
|
|
// Add to cache
|
|
|
|
cache.insert(new_cache_key, data.clone());
|
2020-04-11 02:43:29 +02:00
|
|
|
|
2020-05-02 12:13:28 +02:00
|
|
|
Ok((data, res_url, media_type.to_string()))
|
2020-03-08 20:31:42 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|