2020-01-07 05:22:28 +01:00
|
|
|
use reqwest::blocking::Client;
|
2020-03-08 20:31:42 +01:00
|
|
|
use reqwest::header::CONTENT_TYPE;
|
2019-12-06 02:20:09 +01:00
|
|
|
use std::collections::HashMap;
|
2020-03-08 20:31:42 +01:00
|
|
|
use std::fs;
|
2021-03-11 23:44:02 +01:00
|
|
|
use std::path::{Path, PathBuf};
|
|
|
|
use url::Url;
|
2020-06-24 09:16:40 +02:00
|
|
|
|
2020-11-23 03:49:26 +01:00
|
|
|
use crate::opts::Options;
|
2021-03-11 23:44:02 +01:00
|
|
|
use crate::url::{clean_url, parse_data_url};
|
2019-09-29 23:15:49 +02:00
|
|
|
|
2021-02-24 10:38:03 +01:00
|
|
|
const ANSI_COLOR_RED: &'static str = "\x1b[31m";
|
|
|
|
const ANSI_COLOR_RESET: &'static str = "\x1b[0m";
|
2020-03-24 13:22:14 +01:00
|
|
|
const MAGIC: [[&[u8]; 2]; 18] = [
|
2019-08-24 00:48:08 +02:00
|
|
|
// Image
|
|
|
|
[b"GIF87a", b"image/gif"],
|
|
|
|
[b"GIF89a", b"image/gif"],
|
|
|
|
[b"\xFF\xD8\xFF", b"image/jpeg"],
|
|
|
|
[b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],
|
|
|
|
[b"<svg ", b"image/svg+xml"],
|
|
|
|
[b"RIFF....WEBPVP8 ", b"image/webp"],
|
|
|
|
[b"\x00\x00\x01\x00", b"image/x-icon"],
|
|
|
|
// Audio
|
|
|
|
[b"ID3", b"audio/mpeg"],
|
|
|
|
[b"\xFF\x0E", b"audio/mpeg"],
|
|
|
|
[b"\xFF\x0F", b"audio/mpeg"],
|
|
|
|
[b"OggS", b"audio/ogg"],
|
|
|
|
[b"RIFF....WAVEfmt ", b"audio/wav"],
|
|
|
|
[b"fLaC", b"audio/x-flac"],
|
|
|
|
// Video
|
|
|
|
[b"RIFF....AVI LIST", b"video/avi"],
|
|
|
|
[b"....ftyp", b"video/mp4"],
|
|
|
|
[b"\x00\x00\x01\x0B", b"video/mpeg"],
|
|
|
|
[b"....moov", b"video/quicktime"],
|
|
|
|
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
|
|
|
];
|
2021-06-08 14:30:15 +02:00
|
|
|
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
|
|
|
|
"application/javascript",
|
|
|
|
"application/json",
|
|
|
|
"image/svg+xml",
|
|
|
|
];
|
2020-04-10 02:27:07 +02:00
|
|
|
|
2021-03-11 23:44:02 +01:00
|
|
|
pub fn detect_media_type(data: &[u8], url: &Url) -> String {
|
2021-06-08 14:30:15 +02:00
|
|
|
// At first attempt to read file's header
|
2021-03-11 23:44:02 +01:00
|
|
|
for magic_item in MAGIC.iter() {
|
|
|
|
if data.starts_with(magic_item[0]) {
|
|
|
|
return String::from_utf8(magic_item[1].to_vec()).unwrap();
|
2019-08-24 00:48:08 +02:00
|
|
|
}
|
|
|
|
}
|
2020-03-24 13:22:14 +01:00
|
|
|
|
2021-06-08 14:30:15 +02:00
|
|
|
// If header didn't match any known magic signatures,
|
|
|
|
// try to guess media type from file name
|
|
|
|
let parts: Vec<&str> = url.path().split('/').collect();
|
|
|
|
detect_media_type_by_file_name(parts.last().unwrap())
|
2019-08-23 05:17:15 +02:00
|
|
|
}
|
|
|
|
|
2021-06-08 14:30:15 +02:00
|
|
|
pub fn detect_media_type_by_file_name(filename: &str) -> String {
|
|
|
|
let filename_lowercased: &str = &filename.to_lowercase();
|
|
|
|
let parts: Vec<&str> = filename_lowercased.split('.').collect();
|
|
|
|
|
|
|
|
let mime: &str = match parts.last() {
|
|
|
|
Some(v) => match *v {
|
|
|
|
"avi" => "video/avi",
|
|
|
|
"bmp" => "image/bmp",
|
|
|
|
"css" => "text/css",
|
|
|
|
"flac" => "audio/flac",
|
|
|
|
"gif" => "image/gif",
|
|
|
|
"htm" | "html" => "text/html",
|
|
|
|
"ico" => "image/x-icon",
|
|
|
|
"jpeg" | "jpg" => "image/jpeg",
|
|
|
|
"js" => "application/javascript",
|
|
|
|
"json" => "application/json",
|
|
|
|
"mp3" => "audio/mpeg",
|
|
|
|
"mp4" | "m4v" => "video/mp4",
|
|
|
|
"ogg" => "audio/ogg",
|
|
|
|
"ogv" => "video/ogg",
|
|
|
|
"pdf" => "application/pdf",
|
|
|
|
"png" => "image/png",
|
|
|
|
"svg" => "image/svg+xml",
|
|
|
|
"swf" => "application/x-shockwave-flash",
|
|
|
|
"tif" | "tiff" => "image/tiff",
|
|
|
|
"txt" => "text/plain",
|
|
|
|
"wav" => "audio/wav",
|
|
|
|
"webp" => "image/webp",
|
|
|
|
"woff" => "font/woff",
|
|
|
|
"woff2" => "font/woff2",
|
|
|
|
"xml" => "text/xml",
|
|
|
|
&_ => "application/octet-stream",
|
|
|
|
},
|
|
|
|
None => "application/octet-stream",
|
|
|
|
};
|
|
|
|
|
|
|
|
mime.to_string()
|
2020-04-10 02:27:07 +02:00
|
|
|
}
|
|
|
|
|
2020-06-28 22:11:15 +02:00
|
|
|
pub fn indent(level: u32) -> String {
|
2021-03-11 23:44:02 +01:00
|
|
|
let mut result: String = String::new();
|
2020-06-28 22:11:15 +02:00
|
|
|
let mut l: u32 = level;
|
2021-03-11 23:44:02 +01:00
|
|
|
|
2020-06-28 22:11:15 +02:00
|
|
|
while l > 0 {
|
2021-03-11 23:44:02 +01:00
|
|
|
result += " ";
|
2020-06-28 22:11:15 +02:00
|
|
|
l -= 1;
|
|
|
|
}
|
2021-03-11 23:44:02 +01:00
|
|
|
|
2020-06-28 22:11:15 +02:00
|
|
|
result
|
|
|
|
}
|
|
|
|
|
2021-06-08 14:30:15 +02:00
|
|
|
pub fn is_plaintext_media_type(media_type: &str) -> bool {
|
|
|
|
media_type.to_lowercase().as_str().starts_with("text/")
|
|
|
|
|| PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn parse_content_type(content_type: &str) -> (String, String, bool) {
|
|
|
|
let mut media_type: String = str!("text/plain");
|
|
|
|
let mut charset: String = str!("US-ASCII");
|
|
|
|
let mut is_base64: bool = false;
|
|
|
|
|
|
|
|
// Parse meta data
|
|
|
|
let content_type_items: Vec<&str> = content_type.split(';').collect();
|
|
|
|
let mut i: i8 = 0;
|
|
|
|
for item in &content_type_items {
|
|
|
|
if i == 0 {
|
|
|
|
if item.trim().len() > 0 {
|
|
|
|
media_type = str!(item.trim());
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if item.trim().eq_ignore_ascii_case("base64") {
|
|
|
|
is_base64 = true;
|
|
|
|
} else if item.trim().starts_with("charset=") {
|
|
|
|
charset = item.trim().chars().skip(8).collect();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
(media_type, charset, is_base64)
|
|
|
|
}
|
|
|
|
|
2020-03-08 20:31:42 +01:00
|
|
|
pub fn retrieve_asset(
|
2020-04-11 02:43:29 +02:00
|
|
|
cache: &mut HashMap<String, Vec<u8>>,
|
2020-03-08 20:31:42 +01:00
|
|
|
client: &Client,
|
2021-03-11 23:44:02 +01:00
|
|
|
parent_url: &Url,
|
|
|
|
url: &Url,
|
2020-11-23 03:49:26 +01:00
|
|
|
options: &Options,
|
2020-06-28 22:11:15 +02:00
|
|
|
depth: u32,
|
2021-06-08 14:30:15 +02:00
|
|
|
) -> Result<(Vec<u8>, Url, String, String), reqwest::Error> {
|
2021-03-11 23:44:02 +01:00
|
|
|
if url.scheme() == "data" {
|
2021-06-08 14:30:15 +02:00
|
|
|
let (media_type, charset, data) = parse_data_url(url);
|
|
|
|
Ok((data, url.clone(), media_type, charset))
|
2021-03-11 23:44:02 +01:00
|
|
|
} else if url.scheme() == "file" {
|
|
|
|
// Check if parent_url is also file:/// (if not, then we don't embed the asset)
|
|
|
|
if parent_url.scheme() != "file" {
|
|
|
|
if !options.silent {
|
|
|
|
eprintln!(
|
|
|
|
"{}{}{} ({}){}",
|
|
|
|
indent(depth).as_str(),
|
|
|
|
if options.no_color { "" } else { ANSI_COLOR_RED },
|
|
|
|
&url,
|
|
|
|
"Security Error",
|
|
|
|
if options.no_color {
|
|
|
|
""
|
|
|
|
} else {
|
|
|
|
ANSI_COLOR_RESET
|
|
|
|
},
|
|
|
|
);
|
|
|
|
}
|
2020-05-02 12:13:28 +02:00
|
|
|
// Provoke error
|
|
|
|
client.get("").send()?;
|
2020-03-08 20:31:42 +01:00
|
|
|
}
|
|
|
|
|
2021-03-11 23:44:02 +01:00
|
|
|
let path_buf: PathBuf = url.to_file_path().unwrap().clone();
|
|
|
|
let path: &Path = path_buf.as_path();
|
2020-03-08 20:31:42 +01:00
|
|
|
if path.exists() {
|
2021-03-11 23:44:02 +01:00
|
|
|
if path.is_dir() {
|
|
|
|
if !options.silent {
|
|
|
|
eprintln!(
|
|
|
|
"{}{}{} (is a directory){}",
|
|
|
|
indent(depth).as_str(),
|
|
|
|
if options.no_color { "" } else { ANSI_COLOR_RED },
|
|
|
|
&url,
|
|
|
|
if options.no_color {
|
|
|
|
""
|
|
|
|
} else {
|
|
|
|
ANSI_COLOR_RESET
|
|
|
|
},
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Provoke error
|
|
|
|
Err(client.get("").send().unwrap_err())
|
|
|
|
} else {
|
|
|
|
if !options.silent {
|
|
|
|
eprintln!("{}{}", indent(depth).as_str(), &url);
|
|
|
|
}
|
|
|
|
|
2021-06-08 14:30:15 +02:00
|
|
|
let file_blob: Vec<u8> = fs::read(&path).expect("Unable to read file");
|
|
|
|
|
|
|
|
Ok((
|
|
|
|
file_blob.clone(),
|
|
|
|
url.clone(),
|
|
|
|
detect_media_type(&file_blob, url),
|
|
|
|
str!(),
|
|
|
|
))
|
2021-03-11 23:44:02 +01:00
|
|
|
}
|
|
|
|
} else {
|
2020-11-23 03:49:26 +01:00
|
|
|
if !options.silent {
|
2021-03-11 23:44:02 +01:00
|
|
|
eprintln!(
|
|
|
|
"{}{}{} (not found){}",
|
|
|
|
indent(depth).as_str(),
|
|
|
|
if options.no_color { "" } else { ANSI_COLOR_RED },
|
|
|
|
&url,
|
|
|
|
if options.no_color {
|
|
|
|
""
|
|
|
|
} else {
|
|
|
|
ANSI_COLOR_RESET
|
|
|
|
},
|
|
|
|
);
|
2020-03-08 20:31:42 +01:00
|
|
|
}
|
|
|
|
|
2020-05-02 12:13:28 +02:00
|
|
|
// Provoke error
|
|
|
|
Err(client.get("").send().unwrap_err())
|
2020-03-08 20:31:42 +01:00
|
|
|
}
|
|
|
|
} else {
|
2021-03-11 23:44:02 +01:00
|
|
|
let cache_key: String = clean_url(url.clone()).as_str().to_string();
|
2020-04-11 02:43:29 +02:00
|
|
|
|
2020-03-08 20:31:42 +01:00
|
|
|
if cache.contains_key(&cache_key) {
|
2021-06-08 14:30:15 +02:00
|
|
|
// URL is in cache, we get and return it
|
2020-11-23 03:49:26 +01:00
|
|
|
if !options.silent {
|
2020-06-28 22:11:15 +02:00
|
|
|
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
|
2020-03-08 20:31:42 +01:00
|
|
|
}
|
2020-04-11 02:43:29 +02:00
|
|
|
|
2021-06-08 14:30:15 +02:00
|
|
|
Ok((
|
|
|
|
cache.get(&cache_key).unwrap().to_vec(),
|
|
|
|
url.clone(),
|
|
|
|
str!(),
|
|
|
|
str!(),
|
|
|
|
))
|
2020-03-08 20:31:42 +01:00
|
|
|
} else {
|
2021-06-08 14:30:15 +02:00
|
|
|
// URL not in cache, we retrieve the file
|
2021-03-11 23:44:02 +01:00
|
|
|
match client.get(url.as_str()).send() {
|
2020-11-23 03:49:26 +01:00
|
|
|
Ok(mut response) => {
|
|
|
|
if !options.ignore_errors && response.status() != 200 {
|
|
|
|
if !options.silent {
|
2021-01-30 07:24:35 +01:00
|
|
|
eprintln!(
|
|
|
|
"{}{}{} ({}){}",
|
|
|
|
indent(depth).as_str(),
|
|
|
|
if options.no_color { "" } else { ANSI_COLOR_RED },
|
|
|
|
&url,
|
|
|
|
response.status(),
|
|
|
|
if options.no_color {
|
|
|
|
""
|
|
|
|
} else {
|
|
|
|
ANSI_COLOR_RESET
|
|
|
|
},
|
|
|
|
);
|
2020-11-23 03:49:26 +01:00
|
|
|
}
|
|
|
|
// Provoke error
|
|
|
|
return Err(client.get("").send().unwrap_err());
|
|
|
|
}
|
|
|
|
|
|
|
|
if !options.silent {
|
2021-03-11 23:44:02 +01:00
|
|
|
if url.as_str() == response.url().as_str() {
|
2020-11-23 03:49:26 +01:00
|
|
|
eprintln!("{}{}", indent(depth).as_str(), &url);
|
|
|
|
} else {
|
2021-03-11 23:44:02 +01:00
|
|
|
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &response.url());
|
2020-11-23 03:49:26 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 23:44:02 +01:00
|
|
|
let new_cache_key: String = clean_url(response.url().clone()).to_string();
|
2020-11-23 03:49:26 +01:00
|
|
|
|
|
|
|
// Convert response into a byte array
|
|
|
|
let mut data: Vec<u8> = vec![];
|
2021-03-11 23:44:02 +01:00
|
|
|
response.copy_to(&mut data).unwrap();
|
2020-11-23 03:49:26 +01:00
|
|
|
|
2021-06-08 14:30:15 +02:00
|
|
|
// Attempt to obtain media type and charset by reading Content-Type header
|
|
|
|
let content_type: &str = response
|
2020-11-23 03:49:26 +01:00
|
|
|
.headers()
|
|
|
|
.get(CONTENT_TYPE)
|
|
|
|
.and_then(|header| header.to_str().ok())
|
|
|
|
.unwrap_or("");
|
|
|
|
|
2021-06-08 14:30:15 +02:00
|
|
|
let (media_type, charset, _is_base64) = parse_content_type(&content_type);
|
|
|
|
|
2020-11-23 03:49:26 +01:00
|
|
|
// Add retrieved resource to cache
|
|
|
|
cache.insert(new_cache_key, data.clone());
|
|
|
|
|
2021-03-11 23:44:02 +01:00
|
|
|
// Return
|
2021-06-08 14:30:15 +02:00
|
|
|
Ok((data, response.url().clone(), media_type, charset))
|
2021-03-11 23:44:02 +01:00
|
|
|
}
|
|
|
|
Err(error) => {
|
|
|
|
if !options.silent {
|
|
|
|
eprintln!(
|
|
|
|
"{}{}{} ({}){}",
|
|
|
|
indent(depth).as_str(),
|
|
|
|
if options.no_color { "" } else { ANSI_COLOR_RED },
|
|
|
|
&url,
|
|
|
|
error,
|
|
|
|
if options.no_color {
|
|
|
|
""
|
|
|
|
} else {
|
|
|
|
ANSI_COLOR_RESET
|
|
|
|
},
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
Err(client.get("").send().unwrap_err())
|
2020-03-08 20:31:42 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|