Merge pull request #135 from snshn/local-file-support
Add support for working with local assets
This commit is contained in:
commit
061386ccc2
15 changed files with 694 additions and 233 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -4,6 +4,3 @@
|
|||
|
||||
# These are backup files generated by rustfmt
|
||||
**/*.rs.bk
|
||||
|
||||
# Exclude accidental HTML files
|
||||
*.html
|
||||
|
|
|
@ -11,8 +11,8 @@ rust:
|
|||
- beta
|
||||
- nightly
|
||||
|
||||
services:
|
||||
- docker
|
||||
git:
|
||||
autocrlf: false # don't mangle LF into CRLF on windows
|
||||
|
||||
before_script:
|
||||
- rustup component add rustfmt
|
||||
|
|
15
src/html.rs
15
src/html.rs
|
@ -1,7 +1,7 @@
|
|||
use crate::http::retrieve_asset;
|
||||
use crate::js::attr_is_event_handler;
|
||||
use crate::utils::{
|
||||
data_to_data_url, is_http_url, resolve_css_imports, resolve_url, url_has_protocol,
|
||||
data_to_data_url, is_http_url, resolve_css_imports, resolve_url, retrieve_asset,
|
||||
url_has_protocol,
|
||||
};
|
||||
use html5ever::interface::QualName;
|
||||
use html5ever::parse_document;
|
||||
|
@ -133,6 +133,7 @@ pub fn walk_and_embed_assets(
|
|||
let (favicon_data_url, _) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
&href_full_url,
|
||||
true,
|
||||
"",
|
||||
|
@ -156,6 +157,7 @@ pub fn walk_and_embed_assets(
|
|||
let replacement_text = match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
&href_full_url,
|
||||
false,
|
||||
"text/css",
|
||||
|
@ -167,6 +169,7 @@ pub fn walk_and_embed_assets(
|
|||
client,
|
||||
&css_data,
|
||||
true,
|
||||
&url,
|
||||
&href_full_url,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
|
@ -231,6 +234,7 @@ pub fn walk_and_embed_assets(
|
|||
retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
&abs_src,
|
||||
true,
|
||||
"",
|
||||
|
@ -278,6 +282,7 @@ pub fn walk_and_embed_assets(
|
|||
retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
&abs_src,
|
||||
true,
|
||||
"",
|
||||
|
@ -311,6 +316,7 @@ pub fn walk_and_embed_assets(
|
|||
let (source_data_url, _) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
&srcset_full_url,
|
||||
true,
|
||||
"",
|
||||
|
@ -375,6 +381,7 @@ pub fn walk_and_embed_assets(
|
|||
let (js_data_url, _) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
&src_full_url,
|
||||
true,
|
||||
"application/javascript",
|
||||
|
@ -401,6 +408,7 @@ pub fn walk_and_embed_assets(
|
|||
tendril.as_ref(),
|
||||
false,
|
||||
&url,
|
||||
&url,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
|
@ -444,6 +452,7 @@ pub fn walk_and_embed_assets(
|
|||
let (frame_data, frame_final_url) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
&src_full_url,
|
||||
false,
|
||||
"text/html",
|
||||
|
@ -488,6 +497,7 @@ pub fn walk_and_embed_assets(
|
|||
let (poster_data_url, _) = retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
&poster_full_url,
|
||||
true,
|
||||
"",
|
||||
|
@ -528,6 +538,7 @@ pub fn walk_and_embed_assets(
|
|||
attribute.value.as_ref(),
|
||||
false,
|
||||
&url,
|
||||
&url,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
|
|
68
src/http.rs
68
src/http.rs
|
@ -1,68 +0,0 @@
|
|||
use crate::utils::{clean_url, data_to_data_url, is_data_url};
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use std::collections::HashMap;
|
||||
|
||||
pub fn retrieve_asset(
|
||||
cache: &mut HashMap<String, String>,
|
||||
client: &Client,
|
||||
url: &str,
|
||||
as_data_url: bool,
|
||||
mime: &str,
|
||||
opt_silent: bool,
|
||||
) -> Result<(String, String), reqwest::Error> {
|
||||
let cache_key = clean_url(&url);
|
||||
|
||||
if is_data_url(&url) {
|
||||
Ok((url.to_string(), url.to_string()))
|
||||
} else {
|
||||
if cache.contains_key(&cache_key) {
|
||||
// url is in cache
|
||||
if !opt_silent {
|
||||
eprintln!("{} (from cache)", &url);
|
||||
}
|
||||
let data = cache.get(&cache_key).unwrap();
|
||||
Ok((data.to_string(), url.to_string()))
|
||||
} else {
|
||||
// url not in cache, we request it
|
||||
let mut response = client.get(url).send()?;
|
||||
let res_url = response.url().to_string();
|
||||
|
||||
if !opt_silent {
|
||||
if url == res_url {
|
||||
eprintln!("{}", &url);
|
||||
} else {
|
||||
eprintln!("{} -> {}", &url, &res_url);
|
||||
}
|
||||
}
|
||||
|
||||
let new_cache_key = clean_url(&res_url);
|
||||
|
||||
if as_data_url {
|
||||
// Convert response into a byte array
|
||||
let mut data: Vec<u8> = vec![];
|
||||
response.copy_to(&mut data)?;
|
||||
|
||||
// Attempt to obtain MIME type by reading the Content-Type header
|
||||
let mimetype = if mime == "" {
|
||||
response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|header| header.to_str().ok())
|
||||
.unwrap_or(&mime)
|
||||
} else {
|
||||
mime
|
||||
};
|
||||
let data_url = data_to_data_url(&mimetype, &data);
|
||||
// insert in cache
|
||||
cache.insert(new_cache_key, data_url.clone());
|
||||
Ok((data_url, res_url))
|
||||
} else {
|
||||
let content = response.text().unwrap();
|
||||
// insert in cache
|
||||
cache.insert(new_cache_key, content.clone());
|
||||
Ok((content, res_url))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,7 +1,7 @@
|
|||
const JS_DOM_EVENT_ATTRS: &[&str] = &[
|
||||
// From WHATWG HTML spec 8.1.5.2 'Event handlers on elements, Document objects, and Window objects':
|
||||
// From WHATWG HTML spec 8.1.5.2 "Event handlers on elements, Document objects, and Window objects":
|
||||
// https://html.spec.whatwg.org/#event-handlers-on-elements,-document-objects,-and-window-objects
|
||||
// https://html.spec.whatwg.org/#attributes-3 (table 'List of event handler content attributes')
|
||||
// https://html.spec.whatwg.org/#attributes-3 (table "List of event handler content attributes")
|
||||
|
||||
// Global event handlers
|
||||
"onabort",
|
||||
|
|
|
@ -5,7 +5,6 @@ extern crate lazy_static;
|
|||
mod macros;
|
||||
|
||||
pub mod html;
|
||||
pub mod http;
|
||||
pub mod js;
|
||||
pub mod utils;
|
||||
|
||||
|
|
67
src/main.rs
67
src/main.rs
|
@ -6,19 +6,20 @@ mod macros;
|
|||
|
||||
use crate::args::AppArgs;
|
||||
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
|
||||
use monolith::http::retrieve_asset;
|
||||
use monolith::utils::{data_url_to_text, is_data_url, is_http_url};
|
||||
use monolith::utils::{data_url_to_text, is_data_url, is_file_url, is_http_url, retrieve_asset};
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::io::{self, Error, Write};
|
||||
use std::path::Path;
|
||||
use std::process;
|
||||
use std::time::Duration;
|
||||
|
||||
enum Output {
|
||||
Stdout(io::Stdout),
|
||||
File(File),
|
||||
File(fs::File),
|
||||
}
|
||||
|
||||
impl Output {
|
||||
|
@ -26,7 +27,7 @@ impl Output {
|
|||
if file_path.is_empty() {
|
||||
Ok(Output::Stdout(io::stdout()))
|
||||
} else {
|
||||
Ok(Output::File(File::create(file_path)?))
|
||||
Ok(Output::File(fs::File::create(file_path)?))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -46,16 +47,41 @@ impl Output {
|
|||
|
||||
fn main() {
|
||||
let app_args = AppArgs::get();
|
||||
let target_url: &str = app_args.url_target.as_str();
|
||||
let mut original_target: String = app_args.url_target.clone();
|
||||
let target_url: &str;
|
||||
let base_url;
|
||||
let dom;
|
||||
|
||||
if !is_http_url(target_url) && !is_data_url(target_url) {
|
||||
eprintln!(
|
||||
"Only HTTP(S) or data URLs are supported but got: {}",
|
||||
&target_url
|
||||
);
|
||||
// Pre-process the input
|
||||
let cwd_normalized: String =
|
||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||
let path = Path::new(original_target.as_str());
|
||||
let path_is_relative: bool = path.is_relative();
|
||||
if original_target.clone().len() == 0 {
|
||||
eprintln!("No target specified");
|
||||
process::exit(1);
|
||||
} else if is_http_url(original_target.clone()) || is_data_url(original_target.clone()) {
|
||||
target_url = original_target.as_str();
|
||||
} else if is_file_url(original_target.clone()) {
|
||||
target_url = original_target.as_str();
|
||||
} else if path.exists() {
|
||||
if !path.is_file() {
|
||||
eprintln!("Local target is not a file: {}", original_target);
|
||||
process::exit(1);
|
||||
}
|
||||
original_target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" });
|
||||
original_target = original_target.replace("\\", "/");
|
||||
if path_is_relative {
|
||||
original_target.insert_str(if cfg!(windows) { 8 } else { 7 }, &cwd_normalized);
|
||||
original_target.insert_str(
|
||||
if cfg!(windows) { 8 } else { 7 } + &cwd_normalized.len(),
|
||||
"/",
|
||||
);
|
||||
}
|
||||
target_url = original_target.as_str();
|
||||
} else {
|
||||
original_target.insert_str(0, "http://");
|
||||
target_url = original_target.as_str();
|
||||
}
|
||||
|
||||
let mut output = Output::new(&app_args.output).expect("Could not prepare output");
|
||||
|
@ -81,21 +107,26 @@ fn main() {
|
|||
.expect("Failed to initialize HTTP client");
|
||||
|
||||
// Retrieve root document
|
||||
if is_http_url(target_url) {
|
||||
let (data, final_url) =
|
||||
retrieve_asset(&mut cache, &client, target_url, false, "", app_args.silent)
|
||||
.expect("Could not retrieve assets in HTML");
|
||||
if is_file_url(target_url) || is_http_url(target_url) {
|
||||
let (data, final_url) = retrieve_asset(
|
||||
&mut cache,
|
||||
&client,
|
||||
target_url,
|
||||
target_url,
|
||||
false,
|
||||
"",
|
||||
app_args.silent,
|
||||
)
|
||||
.expect("Could not retrieve target document");
|
||||
base_url = final_url;
|
||||
dom = html_to_dom(&data);
|
||||
} else if is_data_url(target_url) {
|
||||
let text: String = data_url_to_text(target_url);
|
||||
|
||||
if text.len() == 0 {
|
||||
eprintln!("Unsupported data URL input");
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
base_url = str!();
|
||||
base_url = str!(target_url);
|
||||
dom = html_to_dom(&text);
|
||||
} else {
|
||||
process::exit(1);
|
||||
|
|
228
src/tests/cli.rs
228
src/tests/cli.rs
|
@ -1,4 +1,5 @@
|
|||
use assert_cmd::prelude::*;
|
||||
use std::env;
|
||||
use std::process::Command;
|
||||
|
||||
#[test]
|
||||
|
@ -22,9 +23,9 @@ fn print_version() -> Result<(), Box<dyn std::error::Error>> {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn bad_input() -> Result<(), Box<dyn std::error::Error>> {
|
||||
fn bad_input_empty_target() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd.arg("kernel.org").output().unwrap();
|
||||
let out = cmd.arg("").output().unwrap();
|
||||
|
||||
// STDOUT should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
|
||||
|
@ -32,7 +33,7 @@ fn bad_input() -> Result<(), Box<dyn std::error::Error>> {
|
|||
// STDERR should contain error description
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
"Only HTTP(S) or data URLs are supported but got: kernel.org\n"
|
||||
"No target specified\n"
|
||||
);
|
||||
|
||||
// The exit code should be 1
|
||||
|
@ -73,7 +74,9 @@ fn isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|||
// STDOUT should contain isolated HTML
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head><meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta></head><body>Hello, World!</body></html>\n"
|
||||
"<html><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
|
||||
</head><body>Hello, World!</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
|
@ -97,7 +100,10 @@ fn remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|||
// STDOUT should contain HTML with no CSS
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head><meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta><style></style></head><body>Hello</body></html>\n"
|
||||
"<html><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
|
||||
<style></style>\
|
||||
</head><body>Hello</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
|
@ -121,7 +127,9 @@ fn remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|||
// STDOUT should contain HTML with no iframes
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head><meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta></head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
|
||||
"<html><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
|
||||
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
|
@ -145,7 +153,15 @@ fn remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|||
// STDOUT should contain HTML with no images
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head><meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta></head><body><img src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\">Hi</body></html>\n"
|
||||
"<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\
|
||||
</head>\
|
||||
<body>\
|
||||
<img src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\">\
|
||||
Hi\
|
||||
</body>\
|
||||
</html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
|
@ -169,7 +185,203 @@ fn remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
|||
// STDOUT should contain HTML with no JS
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head><meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta><script></script></head><body>Hi</body></html>\n"
|
||||
"<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
|
||||
<script></script></head>\
|
||||
<body>Hi</body>\
|
||||
</html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let cwd_normalized: String =
|
||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||
let out = cmd
|
||||
.arg(if cfg!(windows) {
|
||||
"src\\tests\\data\\local-file.html"
|
||||
} else {
|
||||
"src/tests/data/local-file.html"
|
||||
})
|
||||
.output()
|
||||
.unwrap();
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
|
||||
// STDOUT should contain HTML from the local file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<!DOCTYPE html><html lang=\"en\"><head>\n \
|
||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||
<title>Local HTML file</title>\n \
|
||||
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\" rel=\"stylesheet\" type=\"text/css\">\n \
|
||||
<link href=\"data:text/css;base64,\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||
<img alt=\"\" src=\"\">\n \
|
||||
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
|
||||
</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should contain list of retrieved file URLs
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/local-file.html\n\
|
||||
{file}{cwd}/src/tests/data/local-style.css\n\
|
||||
{file}{cwd}/src/tests/data/local-script.js\n",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_file_target_input_absolute_target_path() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let cwd = env::current_dir().unwrap();
|
||||
let cwd_normalized: String =
|
||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-jciI")
|
||||
.arg(if cfg!(windows) {
|
||||
format!(
|
||||
"{cwd}\\src\\tests\\data\\local-file.html",
|
||||
cwd = cwd.to_str().unwrap()
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{cwd}/src/tests/data/local-file.html",
|
||||
cwd = cwd.to_str().unwrap()
|
||||
)
|
||||
})
|
||||
.output()
|
||||
.unwrap();
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
|
||||
// STDOUT should contain HTML from the local file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<!DOCTYPE html><html lang=\"en\"><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
|
||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||
<title>Local HTML file</title>\n \
|
||||
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n \
|
||||
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||
<img alt=\"\" src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\">\n \
|
||||
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||
<script src=\"\"></script>\n\n\n\n\
|
||||
</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should contain only the target file
|
||||
let cwd = env::current_dir().unwrap();
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/local-file.html\n",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_file_url_target_input() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let cwd = env::current_dir().unwrap();
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
let out = cmd
|
||||
.arg("-cji")
|
||||
.arg(if cfg!(windows) {
|
||||
format!(
|
||||
"{file}{cwd}\\src\\tests\\data\\local-file.html",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd.to_str().unwrap(),
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/local-file.html",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd.to_str().unwrap(),
|
||||
)
|
||||
})
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML from the local file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<!DOCTYPE html><html lang=\"en\"><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
|
||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||
<title>Local HTML file</title>\n \
|
||||
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n \
|
||||
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||
<img alt=\"\" src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\">\n \
|
||||
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||
<script src=\"\"></script>\n\n\n\n\
|
||||
</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should contain list of retrieved file URLs
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
if cfg!(windows) {
|
||||
format!(
|
||||
"{file}{cwd}\\src\\tests\\data\\local-file.html\n",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd.to_str().unwrap(),
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/local-file.html\n",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd.to_str().unwrap(),
|
||||
)
|
||||
}
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn security_disallow_local_assets_within_data_url_targets() -> Result<(), Box<dyn std::error::Error>>
|
||||
{
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/local-script.js\"%3E%3C/script%3E")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no JS in it
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head><script src=\"\"></script></head><body></body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
|
|
19
src/tests/data/local-file.html
Normal file
19
src/tests/data/local-file.html
Normal file
|
@ -0,0 +1,19 @@
|
|||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<title>Local HTML file</title>
|
||||
<link href="local-style.css" rel="stylesheet" type="text/css" />
|
||||
<link href="local-style-does-not-exist.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<img src="monolith.png" alt="" />
|
||||
<a href="//local-file.html">Tricky href</a>
|
||||
<a href="https://github.com/Y2Z/monolith">Remote URL</a>
|
||||
<script src="local-script.js"></script>
|
||||
</body>
|
||||
|
||||
</html>
|
2
src/tests/data/local-script.js
Normal file
2
src/tests/data/local-script.js
Normal file
|
@ -0,0 +1,2 @@
|
|||
document.body.style.backgroundColor = "green";
|
||||
document.body.style.color = "red";
|
4
src/tests/data/local-style.css
Normal file
4
src/tests/data/local-style.css
Normal file
|
@ -0,0 +1,4 @@
|
|||
body {
|
||||
background-color: #000;
|
||||
color: #fff;
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
use crate::http::retrieve_asset;
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[test]
|
||||
fn test_retrieve_asset() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let (data, final_url) =
|
||||
retrieve_asset(cache, &client, "data:text/html;base64,...", true, "", false).unwrap();
|
||||
assert_eq!(&data, "data:text/html;base64,...");
|
||||
assert_eq!(&final_url, "data:text/html;base64,...");
|
||||
|
||||
let (data, final_url) = retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
"data:text/html;base64,...",
|
||||
true,
|
||||
"image/png",
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(&data, "data:text/html;base64,...");
|
||||
assert_eq!(&final_url, "data:text/html;base64,...");
|
||||
}
|
|
@ -1,5 +1,4 @@
|
|||
mod cli;
|
||||
mod html;
|
||||
mod http;
|
||||
mod js;
|
||||
mod utils;
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
use crate::utils::{
|
||||
clean_url, data_to_data_url, data_url_to_text, detect_mimetype, is_data_url, is_http_url,
|
||||
resolve_url, url_has_protocol,
|
||||
};
|
||||
use crate::utils;
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use url::ParseError;
|
||||
|
||||
#[test]
|
||||
fn test_data_to_data_url() {
|
||||
fn data_to_data_url() {
|
||||
let mime = "application/javascript";
|
||||
let data = "var word = 'hello';\nalert(word);\n";
|
||||
let datauri = data_to_data_url(mime, data.as_bytes());
|
||||
let datauri = utils::data_to_data_url(mime, data.as_bytes());
|
||||
assert_eq!(
|
||||
&datauri,
|
||||
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
|
||||
|
@ -16,90 +16,124 @@ fn test_data_to_data_url() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_mimetype() {
|
||||
// image
|
||||
assert_eq!(detect_mimetype(b"GIF87a"), "image/gif");
|
||||
assert_eq!(detect_mimetype(b"GIF89a"), "image/gif");
|
||||
assert_eq!(detect_mimetype(b"\xFF\xD8\xFF"), "image/jpeg");
|
||||
assert_eq!(detect_mimetype(b"\x89PNG\x0D\x0A\x1A\x0A"), "image/png");
|
||||
assert_eq!(detect_mimetype(b"<?xml "), "image/svg+xml");
|
||||
assert_eq!(detect_mimetype(b"<svg "), "image/svg+xml");
|
||||
assert_eq!(detect_mimetype(b"RIFF....WEBPVP8 "), "image/webp");
|
||||
assert_eq!(detect_mimetype(b"\x00\x00\x01\x00"), "image/x-icon");
|
||||
// audio
|
||||
assert_eq!(detect_mimetype(b"ID3"), "audio/mpeg");
|
||||
assert_eq!(detect_mimetype(b"\xFF\x0E"), "audio/mpeg");
|
||||
assert_eq!(detect_mimetype(b"\xFF\x0F"), "audio/mpeg");
|
||||
assert_eq!(detect_mimetype(b"OggS"), "audio/ogg");
|
||||
assert_eq!(detect_mimetype(b"RIFF....WAVEfmt "), "audio/wav");
|
||||
assert_eq!(detect_mimetype(b"fLaC"), "audio/x-flac");
|
||||
// video
|
||||
assert_eq!(detect_mimetype(b"RIFF....AVI LIST"), "video/avi");
|
||||
assert_eq!(detect_mimetype(b"....ftyp"), "video/mp4");
|
||||
assert_eq!(detect_mimetype(b"\x00\x00\x01\x0B"), "video/mpeg");
|
||||
assert_eq!(detect_mimetype(b"....moov"), "video/quicktime");
|
||||
assert_eq!(detect_mimetype(b"\x1A\x45\xDF\xA3"), "video/webm");
|
||||
fn detect_mimetype() {
|
||||
// Image
|
||||
assert_eq!(utils::detect_mimetype(b"GIF87a"), "image/gif");
|
||||
assert_eq!(utils::detect_mimetype(b"GIF89a"), "image/gif");
|
||||
assert_eq!(utils::detect_mimetype(b"\xFF\xD8\xFF"), "image/jpeg");
|
||||
assert_eq!(
|
||||
utils::detect_mimetype(b"\x89PNG\x0D\x0A\x1A\x0A"),
|
||||
"image/png"
|
||||
);
|
||||
assert_eq!(utils::detect_mimetype(b"<?xml "), "image/svg+xml");
|
||||
assert_eq!(utils::detect_mimetype(b"<svg "), "image/svg+xml");
|
||||
assert_eq!(utils::detect_mimetype(b"RIFF....WEBPVP8 "), "image/webp");
|
||||
assert_eq!(utils::detect_mimetype(b"\x00\x00\x01\x00"), "image/x-icon");
|
||||
|
||||
// Audio
|
||||
assert_eq!(utils::detect_mimetype(b"ID3"), "audio/mpeg");
|
||||
assert_eq!(utils::detect_mimetype(b"\xFF\x0E"), "audio/mpeg");
|
||||
assert_eq!(utils::detect_mimetype(b"\xFF\x0F"), "audio/mpeg");
|
||||
assert_eq!(utils::detect_mimetype(b"OggS"), "audio/ogg");
|
||||
assert_eq!(utils::detect_mimetype(b"RIFF....WAVEfmt "), "audio/wav");
|
||||
assert_eq!(utils::detect_mimetype(b"fLaC"), "audio/x-flac");
|
||||
|
||||
// Video
|
||||
assert_eq!(utils::detect_mimetype(b"RIFF....AVI LIST"), "video/avi");
|
||||
assert_eq!(utils::detect_mimetype(b"....ftyp"), "video/mp4");
|
||||
assert_eq!(utils::detect_mimetype(b"\x00\x00\x01\x0B"), "video/mpeg");
|
||||
assert_eq!(utils::detect_mimetype(b"....moov"), "video/quicktime");
|
||||
assert_eq!(utils::detect_mimetype(b"\x1A\x45\xDF\xA3"), "video/webm");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_has_protocol() {
|
||||
// passing
|
||||
fn url_has_protocol() {
|
||||
// Passing
|
||||
assert_eq!(
|
||||
url_has_protocol("mailto:somebody@somewhere.com?subject=hello"),
|
||||
utils::url_has_protocol("mailto:somebody@somewhere.com?subject=hello"),
|
||||
true
|
||||
);
|
||||
assert_eq!(url_has_protocol("tel:5551234567"), true);
|
||||
assert_eq!(utils::url_has_protocol("tel:5551234567"), true);
|
||||
assert_eq!(
|
||||
url_has_protocol("ftp:user:password@some-ftp-server.com"),
|
||||
utils::url_has_protocol("ftp:user:password@some-ftp-server.com"),
|
||||
true
|
||||
);
|
||||
assert_eq!(url_has_protocol("javascript:void(0)"), true);
|
||||
assert_eq!(url_has_protocol("http://news.ycombinator.com"), true);
|
||||
assert_eq!(url_has_protocol("https://github.com"), true);
|
||||
assert_eq!(utils::url_has_protocol("javascript:void(0)"), true);
|
||||
assert_eq!(utils::url_has_protocol("http://news.ycombinator.com"), true);
|
||||
assert_eq!(utils::url_has_protocol("https://github.com"), true);
|
||||
assert_eq!(
|
||||
url_has_protocol("MAILTO:somebody@somewhere.com?subject=hello"),
|
||||
utils::url_has_protocol("MAILTO:somebody@somewhere.com?subject=hello"),
|
||||
true
|
||||
);
|
||||
// failing
|
||||
|
||||
// Failing
|
||||
assert_eq!(
|
||||
url_has_protocol("//some-hostname.com/some-file.html"),
|
||||
utils::url_has_protocol("//some-hostname.com/some-file.html"),
|
||||
false
|
||||
);
|
||||
assert_eq!(url_has_protocol("some-hostname.com/some-file.html"), false);
|
||||
assert_eq!(url_has_protocol("/some-file.html"), false);
|
||||
assert_eq!(url_has_protocol(""), false);
|
||||
assert_eq!(
|
||||
utils::url_has_protocol("some-hostname.com/some-file.html"),
|
||||
false
|
||||
);
|
||||
assert_eq!(utils::url_has_protocol("/some-file.html"), false);
|
||||
assert_eq!(utils::url_has_protocol(""), false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_http_url() {
|
||||
// passing
|
||||
assert!(is_http_url("https://www.rust-lang.org/"));
|
||||
assert!(is_http_url("http://kernel.org"));
|
||||
// failing
|
||||
assert!(!is_http_url("//kernel.org"));
|
||||
assert!(!is_http_url("./index.html"));
|
||||
assert!(!is_http_url("some-local-page.htm"));
|
||||
assert!(!is_http_url("ftp://1.2.3.4/www/index.html"));
|
||||
assert!(!is_http_url(
|
||||
fn is_file_url() {
|
||||
// Passing
|
||||
assert!(utils::is_file_url(
|
||||
"file:///home/user/Websites/my-website/index.html"
|
||||
));
|
||||
assert!(utils::is_file_url(
|
||||
"file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png"
|
||||
));
|
||||
assert!(utils::is_file_url(
|
||||
"file:\\\\\\home\\user\\Websites\\my-website\\index.html"
|
||||
));
|
||||
|
||||
// Failing
|
||||
assert!(!utils::is_file_url("//kernel.org"));
|
||||
assert!(!utils::is_file_url("./index.html"));
|
||||
assert!(!utils::is_file_url("some-local-page.htm"));
|
||||
assert!(!utils::is_file_url("https://1.2.3.4:80/www/index.html"));
|
||||
assert!(!utils::is_file_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_url() -> Result<(), ParseError> {
|
||||
let resolved_url = resolve_url("https://www.kernel.org", "../category/signatures.html")?;
|
||||
fn is_http_url() {
|
||||
// Passing
|
||||
assert!(utils::is_http_url("https://www.rust-lang.org/"));
|
||||
assert!(utils::is_http_url("http://kernel.org"));
|
||||
assert!(utils::is_http_url("http:\\\\freebsd.org\\"));
|
||||
|
||||
// Failing
|
||||
assert!(!utils::is_http_url("//kernel.org"));
|
||||
assert!(!utils::is_http_url("./index.html"));
|
||||
assert!(!utils::is_http_url("some-local-page.htm"));
|
||||
assert!(!utils::is_http_url("ftp://1.2.3.4/www/index.html"));
|
||||
assert!(!utils::is_http_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_url() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url("https://www.kernel.org", "../category/signatures.html")?;
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url("https://www.kernel.org", "category/signatures.html")?;
|
||||
let resolved_url = utils::resolve_url("https://www.kernel.org", "category/signatures.html")?;
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
let resolved_url = utils::resolve_url(
|
||||
"saved_page.htm",
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
)?;
|
||||
|
@ -108,7 +142,7 @@ fn test_resolve_url() -> Result<(), ParseError> {
|
|||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
let resolved_url = utils::resolve_url(
|
||||
"https://www.kernel.org",
|
||||
"//www.kernel.org/theme/images/logos/tux.png",
|
||||
)?;
|
||||
|
@ -117,7 +151,7 @@ fn test_resolve_url() -> Result<(), ParseError> {
|
|||
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
let resolved_url = utils::resolve_url(
|
||||
"https://www.kernel.org",
|
||||
"//another-host.org/theme/images/logos/tux.png",
|
||||
)?;
|
||||
|
@ -126,7 +160,7 @@ fn test_resolve_url() -> Result<(), ParseError> {
|
|||
"https://another-host.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
let resolved_url = utils::resolve_url(
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
"/theme/images/logos/tux.png",
|
||||
)?;
|
||||
|
@ -135,7 +169,7 @@ fn test_resolve_url() -> Result<(), ParseError> {
|
|||
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
let resolved_url = utils::resolve_url(
|
||||
"https://www.w3schools.com/html/html_iframe.asp",
|
||||
"default.asp",
|
||||
)?;
|
||||
|
@ -144,7 +178,7 @@ fn test_resolve_url() -> Result<(), ParseError> {
|
|||
"https://www.w3schools.com/html/default.asp"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
let resolved_url = utils::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
)?;
|
||||
|
@ -153,62 +187,197 @@ fn test_resolve_url() -> Result<(), ParseError> {
|
|||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
let resolved_url = utils::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"//www.w3schools.com/html/html_iframe.asp",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
assert_eq!(resolved_url.as_str(), "");
|
||||
|
||||
let resolved_url = utils::resolve_url(
|
||||
"file:///home/user/Websites/my-website/index.html",
|
||||
"assets/images/logo.png",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"file:///home/user/Websites/my-website/assets/images/logo.png"
|
||||
);
|
||||
|
||||
let resolved_url = utils::resolve_url(
|
||||
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
|
||||
"assets\\images\\logo.png",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"file:///home/user/Websites/my-website/assets/images/logo.png"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_data_url() {
|
||||
// passing
|
||||
assert!(is_data_url(
|
||||
fn is_data_url() {
|
||||
// Passing
|
||||
assert!(utils::is_data_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
// failing
|
||||
assert!(!is_data_url("https://kernel.org"));
|
||||
assert!(!is_data_url("//kernel.org"));
|
||||
assert!(!is_data_url(""));
|
||||
|
||||
// Failing
|
||||
assert!(!utils::is_data_url("https://kernel.org"));
|
||||
assert!(!utils::is_data_url("//kernel.org"));
|
||||
assert!(!utils::is_data_url(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clean_url() {
|
||||
fn clean_url() {
|
||||
assert_eq!(
|
||||
clean_url("https://somewhere.com/font.eot#iefix"),
|
||||
utils::clean_url("https://somewhere.com/font.eot#iefix"),
|
||||
"https://somewhere.com/font.eot"
|
||||
);
|
||||
assert_eq!(
|
||||
clean_url("https://somewhere.com/font.eot#"),
|
||||
utils::clean_url("https://somewhere.com/font.eot#"),
|
||||
"https://somewhere.com/font.eot"
|
||||
);
|
||||
assert_eq!(
|
||||
clean_url("https://somewhere.com/font.eot?#"),
|
||||
utils::clean_url("https://somewhere.com/font.eot?#"),
|
||||
"https://somewhere.com/font.eot"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_data_url_to_text() {
|
||||
fn data_url_to_text() {
|
||||
assert_eq!(
|
||||
data_url_to_text("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="),
|
||||
utils::data_url_to_text("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="),
|
||||
"Work expands so as to fill the time available for its completion"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
data_url_to_text(
|
||||
utils::data_url_to_text(
|
||||
"data:text/html;utf8,Work expands so as to fill the time available for its completion"
|
||||
),
|
||||
"Work expands so as to fill the time available for its completion"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
data_url_to_text(
|
||||
utils::data_url_to_text(
|
||||
"data:text/html,Work expands so as to fill the time available for its completion"
|
||||
),
|
||||
"Work expands so as to fill the time available for its completion"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
utils::data_url_to_text(
|
||||
" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion "
|
||||
),
|
||||
"Work expands so as to fill the time available for its completion"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_url() {
|
||||
assert_eq!(
|
||||
utils::decode_url(str!(
|
||||
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
|
||||
)),
|
||||
"検ヒム解塗ゃッ = サ"
|
||||
);
|
||||
|
||||
assert_eq!(utils::decode_url(str!("%20 %20")), " ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn retrieve_asset() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
|
||||
// If both source and target are data URLs,
|
||||
// ensure the result contains target data URL
|
||||
let (data, final_url) = utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
"data:text/html;base64,SoUrCe",
|
||||
"data:text/html;base64,TaRgEt",
|
||||
true,
|
||||
"",
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(&data, "data:text/html;base64,TaRgEt");
|
||||
assert_eq!(&final_url, "data:text/html;base64,TaRgEt");
|
||||
|
||||
// Media type parameter should not influence data URLs
|
||||
let (data, final_url) = utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
"data:text/html;base64,SoUrCe",
|
||||
"data:text/html;base64,TaRgEt",
|
||||
true,
|
||||
"image/png",
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(&data, "data:text/html;base64,TaRgEt");
|
||||
assert_eq!(&final_url, "data:text/html;base64,TaRgEt");
|
||||
|
||||
// Inclusion of local assets from data URL sources should not be allowed
|
||||
let (data, final_url) = utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
"data:text/html;base64,SoUrCe",
|
||||
"file:///etc/passwd",
|
||||
true,
|
||||
"",
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(&data, "");
|
||||
assert_eq!(&final_url, "");
|
||||
|
||||
// Inclusion of local assets from remote sources should not be allowed
|
||||
let (data, final_url) = utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
"https://kernel.org/",
|
||||
"file:///etc/passwd",
|
||||
true,
|
||||
"",
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(&data, "");
|
||||
assert_eq!(&final_url, "");
|
||||
|
||||
// Inclusion of local assets from local sources should be allowed
|
||||
let cwd = env::current_dir().unwrap();
|
||||
let (data, final_url) = utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
&format!(
|
||||
"{file}{cwd}/src/tests/data/local-file.html",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd.to_str().unwrap()
|
||||
),
|
||||
&format!(
|
||||
"{file}{cwd}/src/tests/data/local-script.js",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd.to_str().unwrap()
|
||||
),
|
||||
true,
|
||||
"application/javascript",
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(&data, "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
|
||||
assert_eq!(
|
||||
&final_url,
|
||||
&format!(
|
||||
"{file}{cwd}/src/tests/data/local-script.js",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd.to_str().unwrap()
|
||||
)
|
||||
);
|
||||
}
|
||||
|
|
161
src/utils.rs
161
src/utils.rs
|
@ -1,8 +1,10 @@
|
|||
use crate::http::retrieve_asset;
|
||||
use base64::{decode, encode};
|
||||
use base64;
|
||||
use regex::Regex;
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use url::{form_urlencoded, ParseError, Url};
|
||||
|
||||
/// This monster of a regex is used to match any kind of URL found in CSS.
|
||||
|
@ -71,7 +73,7 @@ pub fn data_to_data_url(mime: &str, data: &[u8]) -> String {
|
|||
} else {
|
||||
mime.to_string()
|
||||
};
|
||||
format!("data:{};base64,{}", mimetype, encode(data))
|
||||
format!("data:{};base64,{}", mimetype, base64::encode(data))
|
||||
}
|
||||
|
||||
pub fn detect_mimetype(data: &[u8]) -> String {
|
||||
|
@ -95,6 +97,12 @@ pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
|
|||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme() == "file"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
|
||||
|
@ -118,6 +126,7 @@ pub fn resolve_css_imports(
|
|||
client: &Client,
|
||||
css_string: &str,
|
||||
as_data_url: bool,
|
||||
parent_url: &str,
|
||||
href: &str,
|
||||
opt_no_images: bool,
|
||||
opt_silent: bool,
|
||||
|
@ -127,12 +136,12 @@ pub fn resolve_css_imports(
|
|||
for link in REGEX_CSS_URL.captures_iter(&css_string) {
|
||||
let target_link = link.name("url").unwrap().as_str();
|
||||
|
||||
// Determine the type of link
|
||||
// Determine linked asset type
|
||||
let is_stylesheet = link.name("stylesheet").is_some();
|
||||
let is_font = link.name("font").is_some();
|
||||
let is_image = !is_stylesheet && !is_font;
|
||||
|
||||
// Generate absolute URL for content
|
||||
// Generate absolute URL for the content
|
||||
let embedded_url = match resolve_url(href, target_link) {
|
||||
Ok(url) => url,
|
||||
Err(_) => continue, // Malformed URL
|
||||
|
@ -144,8 +153,9 @@ pub fn resolve_css_imports(
|
|||
retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&parent_url,
|
||||
&embedded_url,
|
||||
false, // Formating as data URL will be done later
|
||||
false, // Formatting as data URL will be done later
|
||||
"text/css", // Expect CSS
|
||||
opt_silent,
|
||||
)
|
||||
|
@ -155,6 +165,7 @@ pub fn resolve_css_imports(
|
|||
client,
|
||||
&content,
|
||||
true, // Finally, convert to a data URL
|
||||
&parent_url,
|
||||
&embedded_url,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
|
@ -165,6 +176,7 @@ pub fn resolve_css_imports(
|
|||
retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&parent_url,
|
||||
&embedded_url,
|
||||
true, // Format as data URL
|
||||
"", // Unknown MIME type
|
||||
|
@ -186,10 +198,11 @@ pub fn resolve_css_imports(
|
|||
|
||||
let replacement = format!("\"{}\"", &content);
|
||||
let dest = link.name("to_repl").unwrap();
|
||||
let offset = resolved_css.len() - css_string.len();
|
||||
let target_range = (dest.start() + offset)..(dest.end() + offset);
|
||||
|
||||
resolved_css.replace_range(target_range, &replacement);
|
||||
if resolved_css.len() > css_string.len() {
|
||||
let offset = resolved_css.len() - css_string.len();
|
||||
let target_range = (dest.start() + offset)..(dest.end() + offset);
|
||||
resolved_css.replace_range(target_range, &replacement);
|
||||
}
|
||||
}
|
||||
|
||||
if as_data_url {
|
||||
|
@ -222,20 +235,7 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
|
|||
let meta_data: String = path.chars().take(comma_loc).collect();
|
||||
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
|
||||
|
||||
let data: String = form_urlencoded::parse(raw_data.as_bytes())
|
||||
.map(|(key, val)| {
|
||||
[
|
||||
key.to_string(),
|
||||
if val.to_string().len() == 0 {
|
||||
str!()
|
||||
} else {
|
||||
str!('=')
|
||||
},
|
||||
val.to_string(),
|
||||
]
|
||||
.concat()
|
||||
})
|
||||
.collect();
|
||||
let data: String = decode_url(raw_data);
|
||||
|
||||
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
|
||||
let mut mime_type: &str = "";
|
||||
|
@ -259,7 +259,7 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
|
|||
|
||||
if mime_type.eq_ignore_ascii_case("text/html") {
|
||||
if encoding.eq_ignore_ascii_case("base64") {
|
||||
String::from_utf8(decode(&data).unwrap_or(vec![])).unwrap_or(str!())
|
||||
String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
|
||||
} else {
|
||||
data
|
||||
}
|
||||
|
@ -267,3 +267,114 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
|
|||
str!()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode_url(input: String) -> String {
|
||||
form_urlencoded::parse(input.as_bytes())
|
||||
.map(|(key, val)| {
|
||||
[
|
||||
key.to_string(),
|
||||
if val.to_string().len() == 0 {
|
||||
str!()
|
||||
} else {
|
||||
str!('=')
|
||||
},
|
||||
val.to_string(),
|
||||
]
|
||||
.concat()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn retrieve_asset(
|
||||
cache: &mut HashMap<String, String>,
|
||||
client: &Client,
|
||||
parent_url: &str,
|
||||
url: &str,
|
||||
as_data_url: bool,
|
||||
mime: &str,
|
||||
opt_silent: bool,
|
||||
) -> Result<(String, String), reqwest::Error> {
|
||||
if url.len() == 0 {
|
||||
return Ok((str!(), str!()));
|
||||
}
|
||||
|
||||
let cache_key = clean_url(&url);
|
||||
|
||||
if is_data_url(&url) {
|
||||
Ok((url.to_string(), url.to_string()))
|
||||
} else if is_file_url(&url) {
|
||||
// Check if parent_url is also file:///
|
||||
// (if not then we don't download/embed the asset)
|
||||
if !is_file_url(&parent_url) {
|
||||
return Ok((str!(), str!()));
|
||||
}
|
||||
|
||||
let cutoff = if cfg!(windows) { 8 } else { 7 };
|
||||
let fs_file_path: String = decode_url(url.to_string()[cutoff..].to_string());
|
||||
let path = Path::new(&fs_file_path);
|
||||
if path.exists() {
|
||||
if !opt_silent {
|
||||
eprintln!("{}", &url);
|
||||
}
|
||||
|
||||
if as_data_url {
|
||||
let data_url: String = data_to_data_url(&mime, &fs::read(&fs_file_path).unwrap());
|
||||
Ok((data_url, url.to_string()))
|
||||
} else {
|
||||
let data: String = fs::read_to_string(&fs_file_path).expect(url);
|
||||
Ok((data, url.to_string()))
|
||||
}
|
||||
} else {
|
||||
Ok((str!(), url.to_string()))
|
||||
}
|
||||
} else {
|
||||
if cache.contains_key(&cache_key) {
|
||||
// URL is in cache
|
||||
if !opt_silent {
|
||||
eprintln!("{} (from cache)", &url);
|
||||
}
|
||||
let data = cache.get(&cache_key).unwrap();
|
||||
Ok((data.to_string(), url.to_string()))
|
||||
} else {
|
||||
// URL not in cache, we request it
|
||||
let mut response = client.get(url).send()?;
|
||||
let res_url = response.url().to_string();
|
||||
|
||||
if !opt_silent {
|
||||
if url == res_url {
|
||||
eprintln!("{}", &url);
|
||||
} else {
|
||||
eprintln!("{} -> {}", &url, &res_url);
|
||||
}
|
||||
}
|
||||
|
||||
let new_cache_key = clean_url(&res_url);
|
||||
|
||||
if as_data_url {
|
||||
// Convert response into a byte array
|
||||
let mut data: Vec<u8> = vec![];
|
||||
response.copy_to(&mut data)?;
|
||||
|
||||
// Attempt to obtain MIME type by reading the Content-Type header
|
||||
let mimetype = if mime == "" {
|
||||
response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|header| header.to_str().ok())
|
||||
.unwrap_or(&mime)
|
||||
} else {
|
||||
mime
|
||||
};
|
||||
let data_url = data_to_data_url(&mimetype, &data);
|
||||
// Add to cache
|
||||
cache.insert(new_cache_key, data_url.clone());
|
||||
Ok((data_url, res_url))
|
||||
} else {
|
||||
let content = response.text().unwrap();
|
||||
// Add to cache
|
||||
cache.insert(new_cache_key, content.clone());
|
||||
Ok((content, res_url))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue