Merge pull request #135 from snshn/local-file-support
Add support for working with local assets
This commit is contained in:
commit
061386ccc2
3
.gitignore
vendored
3
.gitignore
vendored
@ -4,6 +4,3 @@
|
|||||||
|
|
||||||
# These are backup files generated by rustfmt
|
# These are backup files generated by rustfmt
|
||||||
**/*.rs.bk
|
**/*.rs.bk
|
||||||
|
|
||||||
# Exclude accidental HTML files
|
|
||||||
*.html
|
|
||||||
|
@ -11,8 +11,8 @@ rust:
|
|||||||
- beta
|
- beta
|
||||||
- nightly
|
- nightly
|
||||||
|
|
||||||
services:
|
git:
|
||||||
- docker
|
autocrlf: false # don't mangle LF into CRLF on windows
|
||||||
|
|
||||||
before_script:
|
before_script:
|
||||||
- rustup component add rustfmt
|
- rustup component add rustfmt
|
||||||
|
15
src/html.rs
15
src/html.rs
@ -1,7 +1,7 @@
|
|||||||
use crate::http::retrieve_asset;
|
|
||||||
use crate::js::attr_is_event_handler;
|
use crate::js::attr_is_event_handler;
|
||||||
use crate::utils::{
|
use crate::utils::{
|
||||||
data_to_data_url, is_http_url, resolve_css_imports, resolve_url, url_has_protocol,
|
data_to_data_url, is_http_url, resolve_css_imports, resolve_url, retrieve_asset,
|
||||||
|
url_has_protocol,
|
||||||
};
|
};
|
||||||
use html5ever::interface::QualName;
|
use html5ever::interface::QualName;
|
||||||
use html5ever::parse_document;
|
use html5ever::parse_document;
|
||||||
@ -133,6 +133,7 @@ pub fn walk_and_embed_assets(
|
|||||||
let (favicon_data_url, _) = retrieve_asset(
|
let (favicon_data_url, _) = retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
|
&url,
|
||||||
&href_full_url,
|
&href_full_url,
|
||||||
true,
|
true,
|
||||||
"",
|
"",
|
||||||
@ -156,6 +157,7 @@ pub fn walk_and_embed_assets(
|
|||||||
let replacement_text = match retrieve_asset(
|
let replacement_text = match retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
|
&url,
|
||||||
&href_full_url,
|
&href_full_url,
|
||||||
false,
|
false,
|
||||||
"text/css",
|
"text/css",
|
||||||
@ -167,6 +169,7 @@ pub fn walk_and_embed_assets(
|
|||||||
client,
|
client,
|
||||||
&css_data,
|
&css_data,
|
||||||
true,
|
true,
|
||||||
|
&url,
|
||||||
&href_full_url,
|
&href_full_url,
|
||||||
opt_no_images,
|
opt_no_images,
|
||||||
opt_silent,
|
opt_silent,
|
||||||
@ -231,6 +234,7 @@ pub fn walk_and_embed_assets(
|
|||||||
retrieve_asset(
|
retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
|
&url,
|
||||||
&abs_src,
|
&abs_src,
|
||||||
true,
|
true,
|
||||||
"",
|
"",
|
||||||
@ -278,6 +282,7 @@ pub fn walk_and_embed_assets(
|
|||||||
retrieve_asset(
|
retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
|
&url,
|
||||||
&abs_src,
|
&abs_src,
|
||||||
true,
|
true,
|
||||||
"",
|
"",
|
||||||
@ -311,6 +316,7 @@ pub fn walk_and_embed_assets(
|
|||||||
let (source_data_url, _) = retrieve_asset(
|
let (source_data_url, _) = retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
|
&url,
|
||||||
&srcset_full_url,
|
&srcset_full_url,
|
||||||
true,
|
true,
|
||||||
"",
|
"",
|
||||||
@ -375,6 +381,7 @@ pub fn walk_and_embed_assets(
|
|||||||
let (js_data_url, _) = retrieve_asset(
|
let (js_data_url, _) = retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
|
&url,
|
||||||
&src_full_url,
|
&src_full_url,
|
||||||
true,
|
true,
|
||||||
"application/javascript",
|
"application/javascript",
|
||||||
@ -401,6 +408,7 @@ pub fn walk_and_embed_assets(
|
|||||||
tendril.as_ref(),
|
tendril.as_ref(),
|
||||||
false,
|
false,
|
||||||
&url,
|
&url,
|
||||||
|
&url,
|
||||||
opt_no_images,
|
opt_no_images,
|
||||||
opt_silent,
|
opt_silent,
|
||||||
);
|
);
|
||||||
@ -444,6 +452,7 @@ pub fn walk_and_embed_assets(
|
|||||||
let (frame_data, frame_final_url) = retrieve_asset(
|
let (frame_data, frame_final_url) = retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
|
&url,
|
||||||
&src_full_url,
|
&src_full_url,
|
||||||
false,
|
false,
|
||||||
"text/html",
|
"text/html",
|
||||||
@ -488,6 +497,7 @@ pub fn walk_and_embed_assets(
|
|||||||
let (poster_data_url, _) = retrieve_asset(
|
let (poster_data_url, _) = retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
|
&url,
|
||||||
&poster_full_url,
|
&poster_full_url,
|
||||||
true,
|
true,
|
||||||
"",
|
"",
|
||||||
@ -528,6 +538,7 @@ pub fn walk_and_embed_assets(
|
|||||||
attribute.value.as_ref(),
|
attribute.value.as_ref(),
|
||||||
false,
|
false,
|
||||||
&url,
|
&url,
|
||||||
|
&url,
|
||||||
opt_no_images,
|
opt_no_images,
|
||||||
opt_silent,
|
opt_silent,
|
||||||
);
|
);
|
||||||
|
68
src/http.rs
68
src/http.rs
@ -1,68 +0,0 @@
|
|||||||
use crate::utils::{clean_url, data_to_data_url, is_data_url};
|
|
||||||
use reqwest::blocking::Client;
|
|
||||||
use reqwest::header::CONTENT_TYPE;
|
|
||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
pub fn retrieve_asset(
|
|
||||||
cache: &mut HashMap<String, String>,
|
|
||||||
client: &Client,
|
|
||||||
url: &str,
|
|
||||||
as_data_url: bool,
|
|
||||||
mime: &str,
|
|
||||||
opt_silent: bool,
|
|
||||||
) -> Result<(String, String), reqwest::Error> {
|
|
||||||
let cache_key = clean_url(&url);
|
|
||||||
|
|
||||||
if is_data_url(&url) {
|
|
||||||
Ok((url.to_string(), url.to_string()))
|
|
||||||
} else {
|
|
||||||
if cache.contains_key(&cache_key) {
|
|
||||||
// url is in cache
|
|
||||||
if !opt_silent {
|
|
||||||
eprintln!("{} (from cache)", &url);
|
|
||||||
}
|
|
||||||
let data = cache.get(&cache_key).unwrap();
|
|
||||||
Ok((data.to_string(), url.to_string()))
|
|
||||||
} else {
|
|
||||||
// url not in cache, we request it
|
|
||||||
let mut response = client.get(url).send()?;
|
|
||||||
let res_url = response.url().to_string();
|
|
||||||
|
|
||||||
if !opt_silent {
|
|
||||||
if url == res_url {
|
|
||||||
eprintln!("{}", &url);
|
|
||||||
} else {
|
|
||||||
eprintln!("{} -> {}", &url, &res_url);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let new_cache_key = clean_url(&res_url);
|
|
||||||
|
|
||||||
if as_data_url {
|
|
||||||
// Convert response into a byte array
|
|
||||||
let mut data: Vec<u8> = vec![];
|
|
||||||
response.copy_to(&mut data)?;
|
|
||||||
|
|
||||||
// Attempt to obtain MIME type by reading the Content-Type header
|
|
||||||
let mimetype = if mime == "" {
|
|
||||||
response
|
|
||||||
.headers()
|
|
||||||
.get(CONTENT_TYPE)
|
|
||||||
.and_then(|header| header.to_str().ok())
|
|
||||||
.unwrap_or(&mime)
|
|
||||||
} else {
|
|
||||||
mime
|
|
||||||
};
|
|
||||||
let data_url = data_to_data_url(&mimetype, &data);
|
|
||||||
// insert in cache
|
|
||||||
cache.insert(new_cache_key, data_url.clone());
|
|
||||||
Ok((data_url, res_url))
|
|
||||||
} else {
|
|
||||||
let content = response.text().unwrap();
|
|
||||||
// insert in cache
|
|
||||||
cache.insert(new_cache_key, content.clone());
|
|
||||||
Ok((content, res_url))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,7 +1,7 @@
|
|||||||
const JS_DOM_EVENT_ATTRS: &[&str] = &[
|
const JS_DOM_EVENT_ATTRS: &[&str] = &[
|
||||||
// From WHATWG HTML spec 8.1.5.2 'Event handlers on elements, Document objects, and Window objects':
|
// From WHATWG HTML spec 8.1.5.2 "Event handlers on elements, Document objects, and Window objects":
|
||||||
// https://html.spec.whatwg.org/#event-handlers-on-elements,-document-objects,-and-window-objects
|
// https://html.spec.whatwg.org/#event-handlers-on-elements,-document-objects,-and-window-objects
|
||||||
// https://html.spec.whatwg.org/#attributes-3 (table 'List of event handler content attributes')
|
// https://html.spec.whatwg.org/#attributes-3 (table "List of event handler content attributes")
|
||||||
|
|
||||||
// Global event handlers
|
// Global event handlers
|
||||||
"onabort",
|
"onabort",
|
||||||
|
@ -5,7 +5,6 @@ extern crate lazy_static;
|
|||||||
mod macros;
|
mod macros;
|
||||||
|
|
||||||
pub mod html;
|
pub mod html;
|
||||||
pub mod http;
|
|
||||||
pub mod js;
|
pub mod js;
|
||||||
pub mod utils;
|
pub mod utils;
|
||||||
|
|
||||||
|
67
src/main.rs
67
src/main.rs
@ -6,19 +6,20 @@ mod macros;
|
|||||||
|
|
||||||
use crate::args::AppArgs;
|
use crate::args::AppArgs;
|
||||||
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
|
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
|
||||||
use monolith::http::retrieve_asset;
|
use monolith::utils::{data_url_to_text, is_data_url, is_file_url, is_http_url, retrieve_asset};
|
||||||
use monolith::utils::{data_url_to_text, is_data_url, is_http_url};
|
|
||||||
use reqwest::blocking::Client;
|
use reqwest::blocking::Client;
|
||||||
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fs::File;
|
use std::env;
|
||||||
|
use std::fs;
|
||||||
use std::io::{self, Error, Write};
|
use std::io::{self, Error, Write};
|
||||||
|
use std::path::Path;
|
||||||
use std::process;
|
use std::process;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
enum Output {
|
enum Output {
|
||||||
Stdout(io::Stdout),
|
Stdout(io::Stdout),
|
||||||
File(File),
|
File(fs::File),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Output {
|
impl Output {
|
||||||
@ -26,7 +27,7 @@ impl Output {
|
|||||||
if file_path.is_empty() {
|
if file_path.is_empty() {
|
||||||
Ok(Output::Stdout(io::stdout()))
|
Ok(Output::Stdout(io::stdout()))
|
||||||
} else {
|
} else {
|
||||||
Ok(Output::File(File::create(file_path)?))
|
Ok(Output::File(fs::File::create(file_path)?))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -46,16 +47,41 @@ impl Output {
|
|||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let app_args = AppArgs::get();
|
let app_args = AppArgs::get();
|
||||||
let target_url: &str = app_args.url_target.as_str();
|
let mut original_target: String = app_args.url_target.clone();
|
||||||
|
let target_url: &str;
|
||||||
let base_url;
|
let base_url;
|
||||||
let dom;
|
let dom;
|
||||||
|
|
||||||
if !is_http_url(target_url) && !is_data_url(target_url) {
|
// Pre-process the input
|
||||||
eprintln!(
|
let cwd_normalized: String =
|
||||||
"Only HTTP(S) or data URLs are supported but got: {}",
|
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||||
&target_url
|
let path = Path::new(original_target.as_str());
|
||||||
);
|
let path_is_relative: bool = path.is_relative();
|
||||||
|
if original_target.clone().len() == 0 {
|
||||||
|
eprintln!("No target specified");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
|
} else if is_http_url(original_target.clone()) || is_data_url(original_target.clone()) {
|
||||||
|
target_url = original_target.as_str();
|
||||||
|
} else if is_file_url(original_target.clone()) {
|
||||||
|
target_url = original_target.as_str();
|
||||||
|
} else if path.exists() {
|
||||||
|
if !path.is_file() {
|
||||||
|
eprintln!("Local target is not a file: {}", original_target);
|
||||||
|
process::exit(1);
|
||||||
|
}
|
||||||
|
original_target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" });
|
||||||
|
original_target = original_target.replace("\\", "/");
|
||||||
|
if path_is_relative {
|
||||||
|
original_target.insert_str(if cfg!(windows) { 8 } else { 7 }, &cwd_normalized);
|
||||||
|
original_target.insert_str(
|
||||||
|
if cfg!(windows) { 8 } else { 7 } + &cwd_normalized.len(),
|
||||||
|
"/",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
target_url = original_target.as_str();
|
||||||
|
} else {
|
||||||
|
original_target.insert_str(0, "http://");
|
||||||
|
target_url = original_target.as_str();
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut output = Output::new(&app_args.output).expect("Could not prepare output");
|
let mut output = Output::new(&app_args.output).expect("Could not prepare output");
|
||||||
@ -81,21 +107,26 @@ fn main() {
|
|||||||
.expect("Failed to initialize HTTP client");
|
.expect("Failed to initialize HTTP client");
|
||||||
|
|
||||||
// Retrieve root document
|
// Retrieve root document
|
||||||
if is_http_url(target_url) {
|
if is_file_url(target_url) || is_http_url(target_url) {
|
||||||
let (data, final_url) =
|
let (data, final_url) = retrieve_asset(
|
||||||
retrieve_asset(&mut cache, &client, target_url, false, "", app_args.silent)
|
&mut cache,
|
||||||
.expect("Could not retrieve assets in HTML");
|
&client,
|
||||||
|
target_url,
|
||||||
|
target_url,
|
||||||
|
false,
|
||||||
|
"",
|
||||||
|
app_args.silent,
|
||||||
|
)
|
||||||
|
.expect("Could not retrieve target document");
|
||||||
base_url = final_url;
|
base_url = final_url;
|
||||||
dom = html_to_dom(&data);
|
dom = html_to_dom(&data);
|
||||||
} else if is_data_url(target_url) {
|
} else if is_data_url(target_url) {
|
||||||
let text: String = data_url_to_text(target_url);
|
let text: String = data_url_to_text(target_url);
|
||||||
|
|
||||||
if text.len() == 0 {
|
if text.len() == 0 {
|
||||||
eprintln!("Unsupported data URL input");
|
eprintln!("Unsupported data URL input");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
base_url = str!(target_url);
|
||||||
base_url = str!();
|
|
||||||
dom = html_to_dom(&text);
|
dom = html_to_dom(&text);
|
||||||
} else {
|
} else {
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
|
228
src/tests/cli.rs
228
src/tests/cli.rs
@ -1,4 +1,5 @@
|
|||||||
use assert_cmd::prelude::*;
|
use assert_cmd::prelude::*;
|
||||||
|
use std::env;
|
||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -22,9 +23,9 @@ fn print_version() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn bad_input() -> Result<(), Box<dyn std::error::Error>> {
|
fn bad_input_empty_target() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||||
let out = cmd.arg("kernel.org").output().unwrap();
|
let out = cmd.arg("").output().unwrap();
|
||||||
|
|
||||||
// STDOUT should be empty
|
// STDOUT should be empty
|
||||||
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
|
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
|
||||||
@ -32,7 +33,7 @@ fn bad_input() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
// STDERR should contain error description
|
// STDERR should contain error description
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stderr).unwrap(),
|
std::str::from_utf8(&out.stderr).unwrap(),
|
||||||
"Only HTTP(S) or data URLs are supported but got: kernel.org\n"
|
"No target specified\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// The exit code should be 1
|
// The exit code should be 1
|
||||||
@ -73,7 +74,9 @@ fn isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
// STDOUT should contain isolated HTML
|
// STDOUT should contain isolated HTML
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
std::str::from_utf8(&out.stdout).unwrap(),
|
||||||
"<html><head><meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta></head><body>Hello, World!</body></html>\n"
|
"<html><head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
|
||||||
|
</head><body>Hello, World!</body></html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// STDERR should be empty
|
||||||
@ -97,7 +100,10 @@ fn remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
// STDOUT should contain HTML with no CSS
|
// STDOUT should contain HTML with no CSS
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
std::str::from_utf8(&out.stdout).unwrap(),
|
||||||
"<html><head><meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta><style></style></head><body>Hello</body></html>\n"
|
"<html><head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
|
||||||
|
<style></style>\
|
||||||
|
</head><body>Hello</body></html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// STDERR should be empty
|
||||||
@ -121,7 +127,9 @@ fn remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
// STDOUT should contain HTML with no iframes
|
// STDOUT should contain HTML with no iframes
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
std::str::from_utf8(&out.stdout).unwrap(),
|
||||||
"<html><head><meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta></head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
|
"<html><head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
|
||||||
|
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// STDERR should be empty
|
||||||
@ -145,7 +153,15 @@ fn remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
// STDOUT should contain HTML with no images
|
// STDOUT should contain HTML with no images
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
std::str::from_utf8(&out.stdout).unwrap(),
|
||||||
"<html><head><meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta></head><body><img src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\">Hi</body></html>\n"
|
"<html>\
|
||||||
|
<head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\
|
||||||
|
</head>\
|
||||||
|
<body>\
|
||||||
|
<img src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\">\
|
||||||
|
Hi\
|
||||||
|
</body>\
|
||||||
|
</html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// STDERR should be empty
|
||||||
@ -169,7 +185,203 @@ fn remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
// STDOUT should contain HTML with no JS
|
// STDOUT should contain HTML with no JS
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
std::str::from_utf8(&out.stdout).unwrap(),
|
std::str::from_utf8(&out.stdout).unwrap(),
|
||||||
"<html><head><meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta><script></script></head><body>Hi</body></html>\n"
|
"<html>\
|
||||||
|
<head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
|
||||||
|
<script></script></head>\
|
||||||
|
<body>Hi</body>\
|
||||||
|
</html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDERR should be empty
|
||||||
|
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||||
|
|
||||||
|
// The exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||||
|
let cwd_normalized: String =
|
||||||
|
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||||
|
let out = cmd
|
||||||
|
.arg(if cfg!(windows) {
|
||||||
|
"src\\tests\\data\\local-file.html"
|
||||||
|
} else {
|
||||||
|
"src/tests/data/local-file.html"
|
||||||
|
})
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||||
|
|
||||||
|
// STDOUT should contain HTML from the local file
|
||||||
|
assert_eq!(
|
||||||
|
std::str::from_utf8(&out.stdout).unwrap(),
|
||||||
|
"<!DOCTYPE html><html lang=\"en\"><head>\n \
|
||||||
|
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||||
|
<title>Local HTML file</title>\n \
|
||||||
|
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\" rel=\"stylesheet\" type=\"text/css\">\n \
|
||||||
|
<link href=\"data:text/css;base64,\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||||
|
<img alt=\"\" src=\"\">\n \
|
||||||
|
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||||
|
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||||
|
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
|
||||||
|
</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDERR should contain list of retrieved file URLs
|
||||||
|
assert_eq!(
|
||||||
|
std::str::from_utf8(&out.stderr).unwrap(),
|
||||||
|
format!(
|
||||||
|
"{file}{cwd}/src/tests/data/local-file.html\n\
|
||||||
|
{file}{cwd}/src/tests/data/local-style.css\n\
|
||||||
|
{file}{cwd}/src/tests/data/local-script.js\n",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd_normalized
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// The exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn local_file_target_input_absolute_target_path() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let cwd = env::current_dir().unwrap();
|
||||||
|
let cwd_normalized: String =
|
||||||
|
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||||
|
let out = cmd
|
||||||
|
.arg("-jciI")
|
||||||
|
.arg(if cfg!(windows) {
|
||||||
|
format!(
|
||||||
|
"{cwd}\\src\\tests\\data\\local-file.html",
|
||||||
|
cwd = cwd.to_str().unwrap()
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
format!(
|
||||||
|
"{cwd}/src/tests/data/local-file.html",
|
||||||
|
cwd = cwd.to_str().unwrap()
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||||
|
|
||||||
|
// STDOUT should contain HTML from the local file
|
||||||
|
assert_eq!(
|
||||||
|
std::str::from_utf8(&out.stdout).unwrap(),
|
||||||
|
"<!DOCTYPE html><html lang=\"en\"><head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
|
||||||
|
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||||
|
<title>Local HTML file</title>\n \
|
||||||
|
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n \
|
||||||
|
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||||
|
<img alt=\"\" src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\">\n \
|
||||||
|
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||||
|
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||||
|
<script src=\"\"></script>\n\n\n\n\
|
||||||
|
</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDERR should contain only the target file
|
||||||
|
let cwd = env::current_dir().unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
std::str::from_utf8(&out.stderr).unwrap(),
|
||||||
|
format!(
|
||||||
|
"{file}{cwd}/src/tests/data/local-file.html\n",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd_normalized,
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// The exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn local_file_url_target_input() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||||
|
let cwd = env::current_dir().unwrap();
|
||||||
|
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||||
|
let out = cmd
|
||||||
|
.arg("-cji")
|
||||||
|
.arg(if cfg!(windows) {
|
||||||
|
format!(
|
||||||
|
"{file}{cwd}\\src\\tests\\data\\local-file.html",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd.to_str().unwrap(),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
format!(
|
||||||
|
"{file}{cwd}/src/tests/data/local-file.html",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd.to_str().unwrap(),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDOUT should contain HTML from the local file
|
||||||
|
assert_eq!(
|
||||||
|
std::str::from_utf8(&out.stdout).unwrap(),
|
||||||
|
"<!DOCTYPE html><html lang=\"en\"><head>\
|
||||||
|
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
|
||||||
|
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||||
|
<title>Local HTML file</title>\n \
|
||||||
|
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n \
|
||||||
|
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||||
|
<img alt=\"\" src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\">\n \
|
||||||
|
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||||
|
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||||
|
<script src=\"\"></script>\n\n\n\n\
|
||||||
|
</body></html>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
// STDERR should contain list of retrieved file URLs
|
||||||
|
assert_eq!(
|
||||||
|
std::str::from_utf8(&out.stderr).unwrap(),
|
||||||
|
if cfg!(windows) {
|
||||||
|
format!(
|
||||||
|
"{file}{cwd}\\src\\tests\\data\\local-file.html\n",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd.to_str().unwrap(),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
format!(
|
||||||
|
"{file}{cwd}/src/tests/data/local-file.html\n",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd.to_str().unwrap(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// The exit code should be 0
|
||||||
|
out.assert().code(0);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn security_disallow_local_assets_within_data_url_targets() -> Result<(), Box<dyn std::error::Error>>
|
||||||
|
{
|
||||||
|
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||||
|
let out = cmd
|
||||||
|
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/local-script.js\"%3E%3C/script%3E")
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// STDOUT should contain HTML with no JS in it
|
||||||
|
assert_eq!(
|
||||||
|
std::str::from_utf8(&out.stdout).unwrap(),
|
||||||
|
"<html><head><script src=\"\"></script></head><body></body></html>\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
// STDERR should be empty
|
// STDERR should be empty
|
||||||
|
19
src/tests/data/local-file.html
Normal file
19
src/tests/data/local-file.html
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
<!doctype html>
|
||||||
|
|
||||||
|
<html lang="en">
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||||
|
<title>Local HTML file</title>
|
||||||
|
<link href="local-style.css" rel="stylesheet" type="text/css" />
|
||||||
|
<link href="local-style-does-not-exist.css" rel="stylesheet" type="text/css" />
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<img src="monolith.png" alt="" />
|
||||||
|
<a href="//local-file.html">Tricky href</a>
|
||||||
|
<a href="https://github.com/Y2Z/monolith">Remote URL</a>
|
||||||
|
<script src="local-script.js"></script>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
2
src/tests/data/local-script.js
Normal file
2
src/tests/data/local-script.js
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
document.body.style.backgroundColor = "green";
|
||||||
|
document.body.style.color = "red";
|
4
src/tests/data/local-style.css
Normal file
4
src/tests/data/local-style.css
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
body {
|
||||||
|
background-color: #000;
|
||||||
|
color: #fff;
|
||||||
|
}
|
@ -1,25 +0,0 @@
|
|||||||
use crate::http::retrieve_asset;
|
|
||||||
use reqwest::blocking::Client;
|
|
||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_retrieve_asset() {
|
|
||||||
let cache = &mut HashMap::new();
|
|
||||||
let client = Client::new();
|
|
||||||
let (data, final_url) =
|
|
||||||
retrieve_asset(cache, &client, "data:text/html;base64,...", true, "", false).unwrap();
|
|
||||||
assert_eq!(&data, "data:text/html;base64,...");
|
|
||||||
assert_eq!(&final_url, "data:text/html;base64,...");
|
|
||||||
|
|
||||||
let (data, final_url) = retrieve_asset(
|
|
||||||
cache,
|
|
||||||
&client,
|
|
||||||
"data:text/html;base64,...",
|
|
||||||
true,
|
|
||||||
"image/png",
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(&data, "data:text/html;base64,...");
|
|
||||||
assert_eq!(&final_url, "data:text/html;base64,...");
|
|
||||||
}
|
|
@ -1,5 +1,4 @@
|
|||||||
mod cli;
|
mod cli;
|
||||||
mod html;
|
mod html;
|
||||||
mod http;
|
|
||||||
mod js;
|
mod js;
|
||||||
mod utils;
|
mod utils;
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
use crate::utils::{
|
use crate::utils;
|
||||||
clean_url, data_to_data_url, data_url_to_text, detect_mimetype, is_data_url, is_http_url,
|
use reqwest::blocking::Client;
|
||||||
resolve_url, url_has_protocol,
|
use std::collections::HashMap;
|
||||||
};
|
use std::env;
|
||||||
use url::ParseError;
|
use url::ParseError;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_data_to_data_url() {
|
fn data_to_data_url() {
|
||||||
let mime = "application/javascript";
|
let mime = "application/javascript";
|
||||||
let data = "var word = 'hello';\nalert(word);\n";
|
let data = "var word = 'hello';\nalert(word);\n";
|
||||||
let datauri = data_to_data_url(mime, data.as_bytes());
|
let datauri = utils::data_to_data_url(mime, data.as_bytes());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&datauri,
|
&datauri,
|
||||||
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
|
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
|
||||||
@ -16,90 +16,124 @@ fn test_data_to_data_url() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_detect_mimetype() {
|
fn detect_mimetype() {
|
||||||
// image
|
// Image
|
||||||
assert_eq!(detect_mimetype(b"GIF87a"), "image/gif");
|
assert_eq!(utils::detect_mimetype(b"GIF87a"), "image/gif");
|
||||||
assert_eq!(detect_mimetype(b"GIF89a"), "image/gif");
|
assert_eq!(utils::detect_mimetype(b"GIF89a"), "image/gif");
|
||||||
assert_eq!(detect_mimetype(b"\xFF\xD8\xFF"), "image/jpeg");
|
assert_eq!(utils::detect_mimetype(b"\xFF\xD8\xFF"), "image/jpeg");
|
||||||
assert_eq!(detect_mimetype(b"\x89PNG\x0D\x0A\x1A\x0A"), "image/png");
|
assert_eq!(
|
||||||
assert_eq!(detect_mimetype(b"<?xml "), "image/svg+xml");
|
utils::detect_mimetype(b"\x89PNG\x0D\x0A\x1A\x0A"),
|
||||||
assert_eq!(detect_mimetype(b"<svg "), "image/svg+xml");
|
"image/png"
|
||||||
assert_eq!(detect_mimetype(b"RIFF....WEBPVP8 "), "image/webp");
|
);
|
||||||
assert_eq!(detect_mimetype(b"\x00\x00\x01\x00"), "image/x-icon");
|
assert_eq!(utils::detect_mimetype(b"<?xml "), "image/svg+xml");
|
||||||
// audio
|
assert_eq!(utils::detect_mimetype(b"<svg "), "image/svg+xml");
|
||||||
assert_eq!(detect_mimetype(b"ID3"), "audio/mpeg");
|
assert_eq!(utils::detect_mimetype(b"RIFF....WEBPVP8 "), "image/webp");
|
||||||
assert_eq!(detect_mimetype(b"\xFF\x0E"), "audio/mpeg");
|
assert_eq!(utils::detect_mimetype(b"\x00\x00\x01\x00"), "image/x-icon");
|
||||||
assert_eq!(detect_mimetype(b"\xFF\x0F"), "audio/mpeg");
|
|
||||||
assert_eq!(detect_mimetype(b"OggS"), "audio/ogg");
|
// Audio
|
||||||
assert_eq!(detect_mimetype(b"RIFF....WAVEfmt "), "audio/wav");
|
assert_eq!(utils::detect_mimetype(b"ID3"), "audio/mpeg");
|
||||||
assert_eq!(detect_mimetype(b"fLaC"), "audio/x-flac");
|
assert_eq!(utils::detect_mimetype(b"\xFF\x0E"), "audio/mpeg");
|
||||||
// video
|
assert_eq!(utils::detect_mimetype(b"\xFF\x0F"), "audio/mpeg");
|
||||||
assert_eq!(detect_mimetype(b"RIFF....AVI LIST"), "video/avi");
|
assert_eq!(utils::detect_mimetype(b"OggS"), "audio/ogg");
|
||||||
assert_eq!(detect_mimetype(b"....ftyp"), "video/mp4");
|
assert_eq!(utils::detect_mimetype(b"RIFF....WAVEfmt "), "audio/wav");
|
||||||
assert_eq!(detect_mimetype(b"\x00\x00\x01\x0B"), "video/mpeg");
|
assert_eq!(utils::detect_mimetype(b"fLaC"), "audio/x-flac");
|
||||||
assert_eq!(detect_mimetype(b"....moov"), "video/quicktime");
|
|
||||||
assert_eq!(detect_mimetype(b"\x1A\x45\xDF\xA3"), "video/webm");
|
// Video
|
||||||
|
assert_eq!(utils::detect_mimetype(b"RIFF....AVI LIST"), "video/avi");
|
||||||
|
assert_eq!(utils::detect_mimetype(b"....ftyp"), "video/mp4");
|
||||||
|
assert_eq!(utils::detect_mimetype(b"\x00\x00\x01\x0B"), "video/mpeg");
|
||||||
|
assert_eq!(utils::detect_mimetype(b"....moov"), "video/quicktime");
|
||||||
|
assert_eq!(utils::detect_mimetype(b"\x1A\x45\xDF\xA3"), "video/webm");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_url_has_protocol() {
|
fn url_has_protocol() {
|
||||||
// passing
|
// Passing
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
url_has_protocol("mailto:somebody@somewhere.com?subject=hello"),
|
utils::url_has_protocol("mailto:somebody@somewhere.com?subject=hello"),
|
||||||
true
|
true
|
||||||
);
|
);
|
||||||
assert_eq!(url_has_protocol("tel:5551234567"), true);
|
assert_eq!(utils::url_has_protocol("tel:5551234567"), true);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
url_has_protocol("ftp:user:password@some-ftp-server.com"),
|
utils::url_has_protocol("ftp:user:password@some-ftp-server.com"),
|
||||||
true
|
true
|
||||||
);
|
);
|
||||||
assert_eq!(url_has_protocol("javascript:void(0)"), true);
|
assert_eq!(utils::url_has_protocol("javascript:void(0)"), true);
|
||||||
assert_eq!(url_has_protocol("http://news.ycombinator.com"), true);
|
assert_eq!(utils::url_has_protocol("http://news.ycombinator.com"), true);
|
||||||
assert_eq!(url_has_protocol("https://github.com"), true);
|
assert_eq!(utils::url_has_protocol("https://github.com"), true);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
url_has_protocol("MAILTO:somebody@somewhere.com?subject=hello"),
|
utils::url_has_protocol("MAILTO:somebody@somewhere.com?subject=hello"),
|
||||||
true
|
true
|
||||||
);
|
);
|
||||||
// failing
|
|
||||||
|
// Failing
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
url_has_protocol("//some-hostname.com/some-file.html"),
|
utils::url_has_protocol("//some-hostname.com/some-file.html"),
|
||||||
false
|
false
|
||||||
);
|
);
|
||||||
assert_eq!(url_has_protocol("some-hostname.com/some-file.html"), false);
|
assert_eq!(
|
||||||
assert_eq!(url_has_protocol("/some-file.html"), false);
|
utils::url_has_protocol("some-hostname.com/some-file.html"),
|
||||||
assert_eq!(url_has_protocol(""), false);
|
false
|
||||||
|
);
|
||||||
|
assert_eq!(utils::url_has_protocol("/some-file.html"), false);
|
||||||
|
assert_eq!(utils::url_has_protocol(""), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_is_http_url() {
|
fn is_file_url() {
|
||||||
// passing
|
// Passing
|
||||||
assert!(is_http_url("https://www.rust-lang.org/"));
|
assert!(utils::is_file_url(
|
||||||
assert!(is_http_url("http://kernel.org"));
|
"file:///home/user/Websites/my-website/index.html"
|
||||||
// failing
|
));
|
||||||
assert!(!is_http_url("//kernel.org"));
|
assert!(utils::is_file_url(
|
||||||
assert!(!is_http_url("./index.html"));
|
"file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png"
|
||||||
assert!(!is_http_url("some-local-page.htm"));
|
));
|
||||||
assert!(!is_http_url("ftp://1.2.3.4/www/index.html"));
|
assert!(utils::is_file_url(
|
||||||
assert!(!is_http_url(
|
"file:\\\\\\home\\user\\Websites\\my-website\\index.html"
|
||||||
|
));
|
||||||
|
|
||||||
|
// Failing
|
||||||
|
assert!(!utils::is_file_url("//kernel.org"));
|
||||||
|
assert!(!utils::is_file_url("./index.html"));
|
||||||
|
assert!(!utils::is_file_url("some-local-page.htm"));
|
||||||
|
assert!(!utils::is_file_url("https://1.2.3.4:80/www/index.html"));
|
||||||
|
assert!(!utils::is_file_url(
|
||||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_resolve_url() -> Result<(), ParseError> {
|
fn is_http_url() {
|
||||||
let resolved_url = resolve_url("https://www.kernel.org", "../category/signatures.html")?;
|
// Passing
|
||||||
|
assert!(utils::is_http_url("https://www.rust-lang.org/"));
|
||||||
|
assert!(utils::is_http_url("http://kernel.org"));
|
||||||
|
assert!(utils::is_http_url("http:\\\\freebsd.org\\"));
|
||||||
|
|
||||||
|
// Failing
|
||||||
|
assert!(!utils::is_http_url("//kernel.org"));
|
||||||
|
assert!(!utils::is_http_url("./index.html"));
|
||||||
|
assert!(!utils::is_http_url("some-local-page.htm"));
|
||||||
|
assert!(!utils::is_http_url("ftp://1.2.3.4/www/index.html"));
|
||||||
|
assert!(!utils::is_http_url(
|
||||||
|
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_url() -> Result<(), ParseError> {
|
||||||
|
let resolved_url = utils::resolve_url("https://www.kernel.org", "../category/signatures.html")?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
resolved_url.as_str(),
|
resolved_url.as_str(),
|
||||||
"https://www.kernel.org/category/signatures.html"
|
"https://www.kernel.org/category/signatures.html"
|
||||||
);
|
);
|
||||||
|
|
||||||
let resolved_url = resolve_url("https://www.kernel.org", "category/signatures.html")?;
|
let resolved_url = utils::resolve_url("https://www.kernel.org", "category/signatures.html")?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
resolved_url.as_str(),
|
resolved_url.as_str(),
|
||||||
"https://www.kernel.org/category/signatures.html"
|
"https://www.kernel.org/category/signatures.html"
|
||||||
);
|
);
|
||||||
|
|
||||||
let resolved_url = resolve_url(
|
let resolved_url = utils::resolve_url(
|
||||||
"saved_page.htm",
|
"saved_page.htm",
|
||||||
"https://www.kernel.org/category/signatures.html",
|
"https://www.kernel.org/category/signatures.html",
|
||||||
)?;
|
)?;
|
||||||
@ -108,7 +142,7 @@ fn test_resolve_url() -> Result<(), ParseError> {
|
|||||||
"https://www.kernel.org/category/signatures.html"
|
"https://www.kernel.org/category/signatures.html"
|
||||||
);
|
);
|
||||||
|
|
||||||
let resolved_url = resolve_url(
|
let resolved_url = utils::resolve_url(
|
||||||
"https://www.kernel.org",
|
"https://www.kernel.org",
|
||||||
"//www.kernel.org/theme/images/logos/tux.png",
|
"//www.kernel.org/theme/images/logos/tux.png",
|
||||||
)?;
|
)?;
|
||||||
@ -117,7 +151,7 @@ fn test_resolve_url() -> Result<(), ParseError> {
|
|||||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||||
);
|
);
|
||||||
|
|
||||||
let resolved_url = resolve_url(
|
let resolved_url = utils::resolve_url(
|
||||||
"https://www.kernel.org",
|
"https://www.kernel.org",
|
||||||
"//another-host.org/theme/images/logos/tux.png",
|
"//another-host.org/theme/images/logos/tux.png",
|
||||||
)?;
|
)?;
|
||||||
@ -126,7 +160,7 @@ fn test_resolve_url() -> Result<(), ParseError> {
|
|||||||
"https://another-host.org/theme/images/logos/tux.png"
|
"https://another-host.org/theme/images/logos/tux.png"
|
||||||
);
|
);
|
||||||
|
|
||||||
let resolved_url = resolve_url(
|
let resolved_url = utils::resolve_url(
|
||||||
"https://www.kernel.org/category/signatures.html",
|
"https://www.kernel.org/category/signatures.html",
|
||||||
"/theme/images/logos/tux.png",
|
"/theme/images/logos/tux.png",
|
||||||
)?;
|
)?;
|
||||||
@ -135,7 +169,7 @@ fn test_resolve_url() -> Result<(), ParseError> {
|
|||||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||||
);
|
);
|
||||||
|
|
||||||
let resolved_url = resolve_url(
|
let resolved_url = utils::resolve_url(
|
||||||
"https://www.w3schools.com/html/html_iframe.asp",
|
"https://www.w3schools.com/html/html_iframe.asp",
|
||||||
"default.asp",
|
"default.asp",
|
||||||
)?;
|
)?;
|
||||||
@ -144,7 +178,7 @@ fn test_resolve_url() -> Result<(), ParseError> {
|
|||||||
"https://www.w3schools.com/html/default.asp"
|
"https://www.w3schools.com/html/default.asp"
|
||||||
);
|
);
|
||||||
|
|
||||||
let resolved_url = resolve_url(
|
let resolved_url = utils::resolve_url(
|
||||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||||
"https://www.kernel.org/category/signatures.html",
|
"https://www.kernel.org/category/signatures.html",
|
||||||
)?;
|
)?;
|
||||||
@ -153,62 +187,197 @@ fn test_resolve_url() -> Result<(), ParseError> {
|
|||||||
"https://www.kernel.org/category/signatures.html"
|
"https://www.kernel.org/category/signatures.html"
|
||||||
);
|
);
|
||||||
|
|
||||||
let resolved_url = resolve_url(
|
let resolved_url = utils::resolve_url(
|
||||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||||
"//www.w3schools.com/html/html_iframe.asp",
|
"//www.w3schools.com/html/html_iframe.asp",
|
||||||
)
|
)
|
||||||
.unwrap_or(str!());
|
.unwrap_or(str!());
|
||||||
assert_eq!(resolved_url.as_str(), "");
|
assert_eq!(resolved_url.as_str(), "");
|
||||||
|
|
||||||
|
let resolved_url = utils::resolve_url(
|
||||||
|
"file:///home/user/Websites/my-website/index.html",
|
||||||
|
"assets/images/logo.png",
|
||||||
|
)
|
||||||
|
.unwrap_or(str!());
|
||||||
|
assert_eq!(
|
||||||
|
resolved_url.as_str(),
|
||||||
|
"file:///home/user/Websites/my-website/assets/images/logo.png"
|
||||||
|
);
|
||||||
|
|
||||||
|
let resolved_url = utils::resolve_url(
|
||||||
|
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
|
||||||
|
"assets\\images\\logo.png",
|
||||||
|
)
|
||||||
|
.unwrap_or(str!());
|
||||||
|
assert_eq!(
|
||||||
|
resolved_url.as_str(),
|
||||||
|
"file:///home/user/Websites/my-website/assets/images/logo.png"
|
||||||
|
);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_is_data_url() {
|
fn is_data_url() {
|
||||||
// passing
|
// Passing
|
||||||
assert!(is_data_url(
|
assert!(utils::is_data_url(
|
||||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||||
));
|
));
|
||||||
// failing
|
|
||||||
assert!(!is_data_url("https://kernel.org"));
|
// Failing
|
||||||
assert!(!is_data_url("//kernel.org"));
|
assert!(!utils::is_data_url("https://kernel.org"));
|
||||||
assert!(!is_data_url(""));
|
assert!(!utils::is_data_url("//kernel.org"));
|
||||||
|
assert!(!utils::is_data_url(""));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_clean_url() {
|
fn clean_url() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
clean_url("https://somewhere.com/font.eot#iefix"),
|
utils::clean_url("https://somewhere.com/font.eot#iefix"),
|
||||||
"https://somewhere.com/font.eot"
|
"https://somewhere.com/font.eot"
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
clean_url("https://somewhere.com/font.eot#"),
|
utils::clean_url("https://somewhere.com/font.eot#"),
|
||||||
"https://somewhere.com/font.eot"
|
"https://somewhere.com/font.eot"
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
clean_url("https://somewhere.com/font.eot?#"),
|
utils::clean_url("https://somewhere.com/font.eot?#"),
|
||||||
"https://somewhere.com/font.eot"
|
"https://somewhere.com/font.eot"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_data_url_to_text() {
|
fn data_url_to_text() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
data_url_to_text("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="),
|
utils::data_url_to_text("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="),
|
||||||
"Work expands so as to fill the time available for its completion"
|
"Work expands so as to fill the time available for its completion"
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
data_url_to_text(
|
utils::data_url_to_text(
|
||||||
"data:text/html;utf8,Work expands so as to fill the time available for its completion"
|
"data:text/html;utf8,Work expands so as to fill the time available for its completion"
|
||||||
),
|
),
|
||||||
"Work expands so as to fill the time available for its completion"
|
"Work expands so as to fill the time available for its completion"
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
data_url_to_text(
|
utils::data_url_to_text(
|
||||||
"data:text/html,Work expands so as to fill the time available for its completion"
|
"data:text/html,Work expands so as to fill the time available for its completion"
|
||||||
),
|
),
|
||||||
"Work expands so as to fill the time available for its completion"
|
"Work expands so as to fill the time available for its completion"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
utils::data_url_to_text(
|
||||||
|
" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion "
|
||||||
|
),
|
||||||
|
"Work expands so as to fill the time available for its completion"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn decode_url() {
|
||||||
|
assert_eq!(
|
||||||
|
utils::decode_url(str!(
|
||||||
|
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
|
||||||
|
)),
|
||||||
|
"検ヒム解塗ゃッ = サ"
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(utils::decode_url(str!("%20 %20")), " ");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn retrieve_asset() {
|
||||||
|
let cache = &mut HashMap::new();
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||||
|
|
||||||
|
// If both source and target are data URLs,
|
||||||
|
// ensure the result contains target data URL
|
||||||
|
let (data, final_url) = utils::retrieve_asset(
|
||||||
|
cache,
|
||||||
|
&client,
|
||||||
|
"data:text/html;base64,SoUrCe",
|
||||||
|
"data:text/html;base64,TaRgEt",
|
||||||
|
true,
|
||||||
|
"",
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(&data, "data:text/html;base64,TaRgEt");
|
||||||
|
assert_eq!(&final_url, "data:text/html;base64,TaRgEt");
|
||||||
|
|
||||||
|
// Media type parameter should not influence data URLs
|
||||||
|
let (data, final_url) = utils::retrieve_asset(
|
||||||
|
cache,
|
||||||
|
&client,
|
||||||
|
"data:text/html;base64,SoUrCe",
|
||||||
|
"data:text/html;base64,TaRgEt",
|
||||||
|
true,
|
||||||
|
"image/png",
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(&data, "data:text/html;base64,TaRgEt");
|
||||||
|
assert_eq!(&final_url, "data:text/html;base64,TaRgEt");
|
||||||
|
|
||||||
|
// Inclusion of local assets from data URL sources should not be allowed
|
||||||
|
let (data, final_url) = utils::retrieve_asset(
|
||||||
|
cache,
|
||||||
|
&client,
|
||||||
|
"data:text/html;base64,SoUrCe",
|
||||||
|
"file:///etc/passwd",
|
||||||
|
true,
|
||||||
|
"",
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(&data, "");
|
||||||
|
assert_eq!(&final_url, "");
|
||||||
|
|
||||||
|
// Inclusion of local assets from remote sources should not be allowed
|
||||||
|
let (data, final_url) = utils::retrieve_asset(
|
||||||
|
cache,
|
||||||
|
&client,
|
||||||
|
"https://kernel.org/",
|
||||||
|
"file:///etc/passwd",
|
||||||
|
true,
|
||||||
|
"",
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(&data, "");
|
||||||
|
assert_eq!(&final_url, "");
|
||||||
|
|
||||||
|
// Inclusion of local assets from local sources should be allowed
|
||||||
|
let cwd = env::current_dir().unwrap();
|
||||||
|
let (data, final_url) = utils::retrieve_asset(
|
||||||
|
cache,
|
||||||
|
&client,
|
||||||
|
&format!(
|
||||||
|
"{file}{cwd}/src/tests/data/local-file.html",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd.to_str().unwrap()
|
||||||
|
),
|
||||||
|
&format!(
|
||||||
|
"{file}{cwd}/src/tests/data/local-script.js",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd.to_str().unwrap()
|
||||||
|
),
|
||||||
|
true,
|
||||||
|
"application/javascript",
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(&data, "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
|
||||||
|
assert_eq!(
|
||||||
|
&final_url,
|
||||||
|
&format!(
|
||||||
|
"{file}{cwd}/src/tests/data/local-script.js",
|
||||||
|
file = file_url_protocol,
|
||||||
|
cwd = cwd.to_str().unwrap()
|
||||||
|
)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
161
src/utils.rs
161
src/utils.rs
@ -1,8 +1,10 @@
|
|||||||
use crate::http::retrieve_asset;
|
use base64;
|
||||||
use base64::{decode, encode};
|
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use reqwest::blocking::Client;
|
use reqwest::blocking::Client;
|
||||||
|
use reqwest::header::CONTENT_TYPE;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::fs;
|
||||||
|
use std::path::Path;
|
||||||
use url::{form_urlencoded, ParseError, Url};
|
use url::{form_urlencoded, ParseError, Url};
|
||||||
|
|
||||||
/// This monster of a regex is used to match any kind of URL found in CSS.
|
/// This monster of a regex is used to match any kind of URL found in CSS.
|
||||||
@ -71,7 +73,7 @@ pub fn data_to_data_url(mime: &str, data: &[u8]) -> String {
|
|||||||
} else {
|
} else {
|
||||||
mime.to_string()
|
mime.to_string()
|
||||||
};
|
};
|
||||||
format!("data:{};base64,{}", mimetype, encode(data))
|
format!("data:{};base64,{}", mimetype, base64::encode(data))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn detect_mimetype(data: &[u8]) -> String {
|
pub fn detect_mimetype(data: &[u8]) -> String {
|
||||||
@ -95,6 +97,12 @@ pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
|
|||||||
.unwrap_or(false)
|
.unwrap_or(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
|
||||||
|
Url::parse(url.as_ref())
|
||||||
|
.and_then(|u| Ok(u.scheme() == "file"))
|
||||||
|
.unwrap_or(false)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
|
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
|
||||||
Url::parse(url.as_ref())
|
Url::parse(url.as_ref())
|
||||||
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
|
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
|
||||||
@ -118,6 +126,7 @@ pub fn resolve_css_imports(
|
|||||||
client: &Client,
|
client: &Client,
|
||||||
css_string: &str,
|
css_string: &str,
|
||||||
as_data_url: bool,
|
as_data_url: bool,
|
||||||
|
parent_url: &str,
|
||||||
href: &str,
|
href: &str,
|
||||||
opt_no_images: bool,
|
opt_no_images: bool,
|
||||||
opt_silent: bool,
|
opt_silent: bool,
|
||||||
@ -127,12 +136,12 @@ pub fn resolve_css_imports(
|
|||||||
for link in REGEX_CSS_URL.captures_iter(&css_string) {
|
for link in REGEX_CSS_URL.captures_iter(&css_string) {
|
||||||
let target_link = link.name("url").unwrap().as_str();
|
let target_link = link.name("url").unwrap().as_str();
|
||||||
|
|
||||||
// Determine the type of link
|
// Determine linked asset type
|
||||||
let is_stylesheet = link.name("stylesheet").is_some();
|
let is_stylesheet = link.name("stylesheet").is_some();
|
||||||
let is_font = link.name("font").is_some();
|
let is_font = link.name("font").is_some();
|
||||||
let is_image = !is_stylesheet && !is_font;
|
let is_image = !is_stylesheet && !is_font;
|
||||||
|
|
||||||
// Generate absolute URL for content
|
// Generate absolute URL for the content
|
||||||
let embedded_url = match resolve_url(href, target_link) {
|
let embedded_url = match resolve_url(href, target_link) {
|
||||||
Ok(url) => url,
|
Ok(url) => url,
|
||||||
Err(_) => continue, // Malformed URL
|
Err(_) => continue, // Malformed URL
|
||||||
@ -144,8 +153,9 @@ pub fn resolve_css_imports(
|
|||||||
retrieve_asset(
|
retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
|
&parent_url,
|
||||||
&embedded_url,
|
&embedded_url,
|
||||||
false, // Formating as data URL will be done later
|
false, // Formatting as data URL will be done later
|
||||||
"text/css", // Expect CSS
|
"text/css", // Expect CSS
|
||||||
opt_silent,
|
opt_silent,
|
||||||
)
|
)
|
||||||
@ -155,6 +165,7 @@ pub fn resolve_css_imports(
|
|||||||
client,
|
client,
|
||||||
&content,
|
&content,
|
||||||
true, // Finally, convert to a data URL
|
true, // Finally, convert to a data URL
|
||||||
|
&parent_url,
|
||||||
&embedded_url,
|
&embedded_url,
|
||||||
opt_no_images,
|
opt_no_images,
|
||||||
opt_silent,
|
opt_silent,
|
||||||
@ -165,6 +176,7 @@ pub fn resolve_css_imports(
|
|||||||
retrieve_asset(
|
retrieve_asset(
|
||||||
cache,
|
cache,
|
||||||
client,
|
client,
|
||||||
|
&parent_url,
|
||||||
&embedded_url,
|
&embedded_url,
|
||||||
true, // Format as data URL
|
true, // Format as data URL
|
||||||
"", // Unknown MIME type
|
"", // Unknown MIME type
|
||||||
@ -186,10 +198,11 @@ pub fn resolve_css_imports(
|
|||||||
|
|
||||||
let replacement = format!("\"{}\"", &content);
|
let replacement = format!("\"{}\"", &content);
|
||||||
let dest = link.name("to_repl").unwrap();
|
let dest = link.name("to_repl").unwrap();
|
||||||
let offset = resolved_css.len() - css_string.len();
|
if resolved_css.len() > css_string.len() {
|
||||||
let target_range = (dest.start() + offset)..(dest.end() + offset);
|
let offset = resolved_css.len() - css_string.len();
|
||||||
|
let target_range = (dest.start() + offset)..(dest.end() + offset);
|
||||||
resolved_css.replace_range(target_range, &replacement);
|
resolved_css.replace_range(target_range, &replacement);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if as_data_url {
|
if as_data_url {
|
||||||
@ -222,20 +235,7 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
|
|||||||
let meta_data: String = path.chars().take(comma_loc).collect();
|
let meta_data: String = path.chars().take(comma_loc).collect();
|
||||||
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
|
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
|
||||||
|
|
||||||
let data: String = form_urlencoded::parse(raw_data.as_bytes())
|
let data: String = decode_url(raw_data);
|
||||||
.map(|(key, val)| {
|
|
||||||
[
|
|
||||||
key.to_string(),
|
|
||||||
if val.to_string().len() == 0 {
|
|
||||||
str!()
|
|
||||||
} else {
|
|
||||||
str!('=')
|
|
||||||
},
|
|
||||||
val.to_string(),
|
|
||||||
]
|
|
||||||
.concat()
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
|
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
|
||||||
let mut mime_type: &str = "";
|
let mut mime_type: &str = "";
|
||||||
@ -259,7 +259,7 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
|
|||||||
|
|
||||||
if mime_type.eq_ignore_ascii_case("text/html") {
|
if mime_type.eq_ignore_ascii_case("text/html") {
|
||||||
if encoding.eq_ignore_ascii_case("base64") {
|
if encoding.eq_ignore_ascii_case("base64") {
|
||||||
String::from_utf8(decode(&data).unwrap_or(vec![])).unwrap_or(str!())
|
String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
|
||||||
} else {
|
} else {
|
||||||
data
|
data
|
||||||
}
|
}
|
||||||
@ -267,3 +267,114 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
|
|||||||
str!()
|
str!()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn decode_url(input: String) -> String {
|
||||||
|
form_urlencoded::parse(input.as_bytes())
|
||||||
|
.map(|(key, val)| {
|
||||||
|
[
|
||||||
|
key.to_string(),
|
||||||
|
if val.to_string().len() == 0 {
|
||||||
|
str!()
|
||||||
|
} else {
|
||||||
|
str!('=')
|
||||||
|
},
|
||||||
|
val.to_string(),
|
||||||
|
]
|
||||||
|
.concat()
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn retrieve_asset(
|
||||||
|
cache: &mut HashMap<String, String>,
|
||||||
|
client: &Client,
|
||||||
|
parent_url: &str,
|
||||||
|
url: &str,
|
||||||
|
as_data_url: bool,
|
||||||
|
mime: &str,
|
||||||
|
opt_silent: bool,
|
||||||
|
) -> Result<(String, String), reqwest::Error> {
|
||||||
|
if url.len() == 0 {
|
||||||
|
return Ok((str!(), str!()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let cache_key = clean_url(&url);
|
||||||
|
|
||||||
|
if is_data_url(&url) {
|
||||||
|
Ok((url.to_string(), url.to_string()))
|
||||||
|
} else if is_file_url(&url) {
|
||||||
|
// Check if parent_url is also file:///
|
||||||
|
// (if not then we don't download/embed the asset)
|
||||||
|
if !is_file_url(&parent_url) {
|
||||||
|
return Ok((str!(), str!()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let cutoff = if cfg!(windows) { 8 } else { 7 };
|
||||||
|
let fs_file_path: String = decode_url(url.to_string()[cutoff..].to_string());
|
||||||
|
let path = Path::new(&fs_file_path);
|
||||||
|
if path.exists() {
|
||||||
|
if !opt_silent {
|
||||||
|
eprintln!("{}", &url);
|
||||||
|
}
|
||||||
|
|
||||||
|
if as_data_url {
|
||||||
|
let data_url: String = data_to_data_url(&mime, &fs::read(&fs_file_path).unwrap());
|
||||||
|
Ok((data_url, url.to_string()))
|
||||||
|
} else {
|
||||||
|
let data: String = fs::read_to_string(&fs_file_path).expect(url);
|
||||||
|
Ok((data, url.to_string()))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Ok((str!(), url.to_string()))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if cache.contains_key(&cache_key) {
|
||||||
|
// URL is in cache
|
||||||
|
if !opt_silent {
|
||||||
|
eprintln!("{} (from cache)", &url);
|
||||||
|
}
|
||||||
|
let data = cache.get(&cache_key).unwrap();
|
||||||
|
Ok((data.to_string(), url.to_string()))
|
||||||
|
} else {
|
||||||
|
// URL not in cache, we request it
|
||||||
|
let mut response = client.get(url).send()?;
|
||||||
|
let res_url = response.url().to_string();
|
||||||
|
|
||||||
|
if !opt_silent {
|
||||||
|
if url == res_url {
|
||||||
|
eprintln!("{}", &url);
|
||||||
|
} else {
|
||||||
|
eprintln!("{} -> {}", &url, &res_url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let new_cache_key = clean_url(&res_url);
|
||||||
|
|
||||||
|
if as_data_url {
|
||||||
|
// Convert response into a byte array
|
||||||
|
let mut data: Vec<u8> = vec![];
|
||||||
|
response.copy_to(&mut data)?;
|
||||||
|
|
||||||
|
// Attempt to obtain MIME type by reading the Content-Type header
|
||||||
|
let mimetype = if mime == "" {
|
||||||
|
response
|
||||||
|
.headers()
|
||||||
|
.get(CONTENT_TYPE)
|
||||||
|
.and_then(|header| header.to_str().ok())
|
||||||
|
.unwrap_or(&mime)
|
||||||
|
} else {
|
||||||
|
mime
|
||||||
|
};
|
||||||
|
let data_url = data_to_data_url(&mimetype, &data);
|
||||||
|
// Add to cache
|
||||||
|
cache.insert(new_cache_key, data_url.clone());
|
||||||
|
Ok((data_url, res_url))
|
||||||
|
} else {
|
||||||
|
let content = response.text().unwrap();
|
||||||
|
// Add to cache
|
||||||
|
cache.insert(new_cache_key, content.clone());
|
||||||
|
Ok((content, res_url))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user